diff --git a/[refs] b/[refs] index 999710b2f0d3..43e8de8bcdc1 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 6a0ed91e361a93ee1efb4c20c4967024ed2a8dd7 +refs/heads/master: b247bbf1da69ce376aa1ceb8057331214589e366 diff --git a/trunk/Documentation/lguest/Makefile b/trunk/Documentation/lguest/Makefile index c0b7a4556390..31e794ef5f98 100644 --- a/trunk/Documentation/lguest/Makefile +++ b/trunk/Documentation/lguest/Makefile @@ -13,9 +13,7 @@ LGUEST_GUEST_TOP := ($(CONFIG_PAGE_OFFSET) - 0x08000000) CFLAGS:=-Wall -Wmissing-declarations -Wmissing-prototypes -O3 -Wl,-T,lguest.lds LDLIBS:=-lz -# Removing this works for some versions of ld.so (eg. Ubuntu Feisty) and -# not others (eg. FC7). -LDFLAGS+=-static + all: lguest.lds lguest # The linker script on x86 is so complex the only way of creating one diff --git a/trunk/Documentation/sched-design-CFS.txt b/trunk/Documentation/sched-design-CFS.txt index 84901e7c0508..16feebb7bdc0 100644 --- a/trunk/Documentation/sched-design-CFS.txt +++ b/trunk/Documentation/sched-design-CFS.txt @@ -83,7 +83,7 @@ Some implementation details: CFS uses nanosecond granularity accounting and does not rely on any jiffies or other HZ detail. Thus the CFS scheduler has no notion of 'timeslices' and has no heuristics whatsoever. There is only one - central tunable (you have to switch on CONFIG_SCHED_DEBUG): + central tunable: /proc/sys/kernel/sched_granularity_ns diff --git a/trunk/Documentation/sched-nice-design.txt b/trunk/Documentation/sched-nice-design.txt deleted file mode 100644 index e2bae5a577e3..000000000000 --- a/trunk/Documentation/sched-nice-design.txt +++ /dev/null @@ -1,108 +0,0 @@ -This document explains the thinking about the revamped and streamlined -nice-levels implementation in the new Linux scheduler. - -Nice levels were always pretty weak under Linux and people continuously -pestered us to make nice +19 tasks use up much less CPU time. - -Unfortunately that was not that easy to implement under the old -scheduler, (otherwise we'd have done it long ago) because nice level -support was historically coupled to timeslice length, and timeslice -units were driven by the HZ tick, so the smallest timeslice was 1/HZ. - -In the O(1) scheduler (in 2003) we changed negative nice levels to be -much stronger than they were before in 2.4 (and people were happy about -that change), and we also intentionally calibrated the linear timeslice -rule so that nice +19 level would be _exactly_ 1 jiffy. To better -understand it, the timeslice graph went like this (cheesy ASCII art -alert!): - - - A - \ | [timeslice length] - \ | - \ | - \ | - \ | - \|___100msecs - |^ . _ - | ^ . _ - | ^ . _ - -*----------------------------------*-----> [nice level] - -20 | +19 - | - | - -So that if someone wanted to really renice tasks, +19 would give a much -bigger hit than the normal linear rule would do. (The solution of -changing the ABI to extend priorities was discarded early on.) - -This approach worked to some degree for some time, but later on with -HZ=1000 it caused 1 jiffy to be 1 msec, which meant 0.1% CPU usage which -we felt to be a bit excessive. Excessive _not_ because it's too small of -a CPU utilization, but because it causes too frequent (once per -millisec) rescheduling. (and would thus trash the cache, etc. Remember, -this was long ago when hardware was weaker and caches were smaller, and -people were running number crunching apps at nice +19.) - -So for HZ=1000 we changed nice +19 to 5msecs, because that felt like the -right minimal granularity - and this translates to 5% CPU utilization. -But the fundamental HZ-sensitive property for nice+19 still remained, -and we never got a single complaint about nice +19 being too _weak_ in -terms of CPU utilization, we only got complaints about it (still) being -too _strong_ :-) - -To sum it up: we always wanted to make nice levels more consistent, but -within the constraints of HZ and jiffies and their nasty design level -coupling to timeslices and granularity it was not really viable. - -The second (less frequent but still periodically occuring) complaint -about Linux's nice level support was its assymetry around the origo -(which you can see demonstrated in the picture above), or more -accurately: the fact that nice level behavior depended on the _absolute_ -nice level as well, while the nice API itself is fundamentally -"relative": - - int nice(int inc); - - asmlinkage long sys_nice(int increment) - -(the first one is the glibc API, the second one is the syscall API.) -Note that the 'inc' is relative to the current nice level. Tools like -bash's "nice" command mirror this relative API. - -With the old scheduler, if you for example started a niced task with +1 -and another task with +2, the CPU split between the two tasks would -depend on the nice level of the parent shell - if it was at nice -10 the -CPU split was different than if it was at +5 or +10. - -A third complaint against Linux's nice level support was that negative -nice levels were not 'punchy enough', so lots of people had to resort to -run audio (and other multimedia) apps under RT priorities such as -SCHED_FIFO. But this caused other problems: SCHED_FIFO is not starvation -proof, and a buggy SCHED_FIFO app can also lock up the system for good. - -The new scheduler in v2.6.23 addresses all three types of complaints: - -To address the first complaint (of nice levels being not "punchy" -enough), the scheduler was decoupled from 'time slice' and HZ concepts -(and granularity was made a separate concept from nice levels) and thus -it was possible to implement better and more consistent nice +19 -support: with the new scheduler nice +19 tasks get a HZ-independent -1.5%, instead of the variable 3%-5%-9% range they got in the old -scheduler. - -To address the second complaint (of nice levels not being consistent), -the new scheduler makes nice(1) have the same CPU utilization effect on -tasks, regardless of their absolute nice levels. So on the new -scheduler, running a nice +10 and a nice 11 task has the same CPU -utilization "split" between them as running a nice -5 and a nice -4 -task. (one will get 55% of the CPU, the other 45%.) That is why nice -levels were changed to be "multiplicative" (or exponential) - that way -it does not matter which nice level you start out from, the 'relative -result' will always be the same. - -The third complaint (of negative nice levels not being "punchy" enough -and forcing audio apps to run under the more dangerous SCHED_FIFO -scheduling policy) is addressed by the new scheduler almost -automatically: stronger negative nice levels are an automatic -side-effect of the recalibrated dynamic range of nice levels. diff --git a/trunk/arch/sparc/kernel/prom.c b/trunk/arch/sparc/kernel/prom.c index cd4fb79aa3a8..39fbd3c8ab0b 100644 --- a/trunk/arch/sparc/kernel/prom.c +++ b/trunk/arch/sparc/kernel/prom.c @@ -102,21 +102,6 @@ int of_set_property(struct device_node *dp, const char *name, void *val, int len } EXPORT_SYMBOL(of_set_property); -int of_find_in_proplist(const char *list, const char *match, int len) -{ - while (len > 0) { - int l; - - if (!strcmp(list, match)) - return 1; - l = strlen(list) + 1; - list += l; - len -= l; - } - return 0; -} -EXPORT_SYMBOL(of_find_in_proplist); - static unsigned int prom_early_allocated; static void * __init prom_early_alloc(unsigned long size) diff --git a/trunk/arch/sparc64/kernel/cpu.c b/trunk/arch/sparc64/kernel/cpu.c index e43db73f2b91..7eb81d3954d9 100644 --- a/trunk/arch/sparc64/kernel/cpu.c +++ b/trunk/arch/sparc64/kernel/cpu.c @@ -1,7 +1,7 @@ /* cpu.c: Dinky routines to look for the kind of Sparc cpu * we are on. * - * Copyright (C) 1996, 2007 David S. Miller (davem@davemloft.net) + * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) */ #include @@ -13,7 +13,6 @@ #include #include #include -#include DEFINE_PER_CPU(cpuinfo_sparc, __cpu_data) = { 0 }; @@ -62,40 +61,21 @@ struct cpu_iu_info linux_sparc_chips[] = { #define NSPARCCHIPS ARRAY_SIZE(linux_sparc_chips) -char *sparc_cpu_type; -char *sparc_fpu_type; +char *sparc_cpu_type = "cpu-oops"; +char *sparc_fpu_type = "fpu-oops"; unsigned int fsr_storage; -static void __init sun4v_cpu_probe(void) -{ - switch (sun4v_chip_type) { - case SUN4V_CHIP_NIAGARA1: - sparc_cpu_type = "UltraSparc T1 (Niagara)"; - sparc_fpu_type = "UltraSparc T1 integrated FPU"; - break; - - case SUN4V_CHIP_NIAGARA2: - sparc_cpu_type = "UltraSparc T2 (Niagara2)"; - sparc_fpu_type = "UltraSparc T2 integrated FPU"; - break; - - default: - printk(KERN_WARNING "CPU: Unknown sun4v cpu type [%s]\n", - prom_cpu_compatible); - sparc_cpu_type = "Unknown SUN4V CPU"; - sparc_fpu_type = "Unknown SUN4V FPU"; - break; - } -} - void __init cpu_probe(void) { unsigned long ver, fpu_vers, manuf, impl, fprs; int i; - if (tlb_type == hypervisor) - return sun4v_cpu_probe(); + if (tlb_type == hypervisor) { + sparc_cpu_type = "UltraSparc T1 (Niagara)"; + sparc_fpu_type = "UltraSparc T1 integrated FPU"; + return; + } fprs = fprs_read(); fprs_write(FPRS_FEF); diff --git a/trunk/arch/sparc64/kernel/head.S b/trunk/arch/sparc64/kernel/head.S index ac18bd8e273f..9dbd833d79d6 100644 --- a/trunk/arch/sparc64/kernel/head.S +++ b/trunk/arch/sparc64/kernel/head.S @@ -97,8 +97,7 @@ sparc64_boot: .globl prom_map_name, prom_unmap_name, prom_mmu_ihandle_cache .globl prom_boot_mapped_pc, prom_boot_mapping_mode .globl prom_boot_mapping_phys_high, prom_boot_mapping_phys_low - .globl prom_compatible_name, prom_cpu_path, prom_cpu_compatible - .globl is_sun4v, sun4v_chip_type + .globl is_sun4v prom_peer_name: .asciz "peer" prom_compatible_name: @@ -107,8 +106,6 @@ prom_finddev_name: .asciz "finddevice" prom_chosen_path: .asciz "/chosen" -prom_cpu_path: - .asciz "/cpu" prom_getprop_name: .asciz "getprop" prom_mmu_name: @@ -123,13 +120,9 @@ prom_unmap_name: .asciz "unmap" prom_sun4v_name: .asciz "sun4v" -prom_niagara_prefix: - .asciz "SUNW,UltraSPARC-T" .align 4 prom_root_compatible: .skip 64 -prom_cpu_compatible: - .skip 64 prom_root_node: .word 0 prom_mmu_ihandle_cache: @@ -145,8 +138,6 @@ prom_boot_mapping_phys_low: .xword 0 is_sun4v: .word 0 -sun4v_chip_type: - .word SUN4V_CHIP_INVALID 1: rd %pc, %l0 @@ -305,13 +296,13 @@ sun4v_chip_type: sethi %hi(prom_sun4v_name), %g7 or %g7, %lo(prom_sun4v_name), %g7 mov 5, %g3 -90: ldub [%g7], %g2 +1: ldub [%g7], %g2 ldub [%g1], %g4 cmp %g2, %g4 - bne,pn %icc, 80f + bne,pn %icc, 2f add %g7, 1, %g7 subcc %g3, 1, %g3 - bne,pt %xcc, 90b + bne,pt %xcc, 1b add %g1, 1, %g1 sethi %hi(is_sun4v), %g1 @@ -319,80 +310,7 @@ sun4v_chip_type: mov 1, %g7 stw %g7, [%g1] - /* cpu_node = prom_finddevice("/cpu") */ - mov (1b - prom_finddev_name), %l1 - mov (1b - prom_cpu_path), %l2 - sub %l0, %l1, %l1 - sub %l0, %l2, %l2 - sub %sp, (192 + 128), %sp - - stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "finddevice" - mov 1, %l3 - stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 1 - stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1 - stx %l2, [%sp + 2047 + 128 + 0x18] ! arg1, "/cpu" - stx %g0, [%sp + 2047 + 128 + 0x20] ! ret1 - call %l7 - add %sp, (2047 + 128), %o0 ! argument array - - ldx [%sp + 2047 + 128 + 0x20], %l4 ! cpu device node - - mov (1b - prom_getprop_name), %l1 - mov (1b - prom_compatible_name), %l2 - mov (1b - prom_cpu_compatible), %l5 - sub %l0, %l1, %l1 - sub %l0, %l2, %l2 - sub %l0, %l5, %l5 - - /* prom_getproperty(cpu_node, "compatible", - * &prom_cpu_compatible, 64) - */ - stx %l1, [%sp + 2047 + 128 + 0x00] ! service, "getprop" - mov 4, %l3 - stx %l3, [%sp + 2047 + 128 + 0x08] ! num_args, 4 - mov 1, %l3 - stx %l3, [%sp + 2047 + 128 + 0x10] ! num_rets, 1 - stx %l4, [%sp + 2047 + 128 + 0x18] ! arg1, cpu_node - stx %l2, [%sp + 2047 + 128 + 0x20] ! arg2, "compatible" - stx %l5, [%sp + 2047 + 128 + 0x28] ! arg3, &prom_cpu_compatible - mov 64, %l3 - stx %l3, [%sp + 2047 + 128 + 0x30] ! arg4, size - stx %g0, [%sp + 2047 + 128 + 0x38] ! ret1 - call %l7 - add %sp, (2047 + 128), %o0 ! argument array - - add %sp, (192 + 128), %sp - - sethi %hi(prom_cpu_compatible), %g1 - or %g1, %lo(prom_cpu_compatible), %g1 - sethi %hi(prom_niagara_prefix), %g7 - or %g7, %lo(prom_niagara_prefix), %g7 - mov 17, %g3 -90: ldub [%g7], %g2 - ldub [%g1], %g4 - cmp %g2, %g4 - bne,pn %icc, 4f - add %g7, 1, %g7 - subcc %g3, 1, %g3 - bne,pt %xcc, 90b - add %g1, 1, %g1 - - sethi %hi(prom_cpu_compatible), %g1 - or %g1, %lo(prom_cpu_compatible), %g1 - ldub [%g1 + 17], %g2 - cmp %g2, '1' - be,pt %xcc, 5f - mov SUN4V_CHIP_NIAGARA1, %g4 - cmp %g2, '2' - be,pt %xcc, 5f - mov SUN4V_CHIP_NIAGARA2, %g4 -4: - mov SUN4V_CHIP_UNKNOWN, %g4 -5: sethi %hi(sun4v_chip_type), %g2 - or %g2, %lo(sun4v_chip_type), %g2 - stw %g4, [%g2] - -80: +2: BRANCH_IF_SUN4V(g1, jump_to_sun4u_init) BRANCH_IF_CHEETAH_BASE(g1,g7,cheetah_boot) BRANCH_IF_CHEETAH_PLUS_OR_FOLLOWON(g1,g7,cheetah_plus_boot) @@ -496,24 +414,6 @@ niagara_tlb_fixup: stw %g2, [%g1 + %lo(tlb_type)] /* Patch copy/clear ops. */ - sethi %hi(sun4v_chip_type), %g1 - lduw [%g1 + %lo(sun4v_chip_type)], %g1 - cmp %g1, SUN4V_CHIP_NIAGARA1 - be,pt %xcc, niagara_patch - cmp %g1, SUN4V_CHIP_NIAGARA2 - be,pt %xcc, niagara_patch - nop - - call generic_patch_copyops - nop - call generic_patch_bzero - nop - call generic_patch_pageops - nop - - ba,a,pt %xcc, 80f - -niagara_patch: call niagara_patch_copyops nop call niagara_patch_bzero @@ -521,7 +421,6 @@ niagara_patch: call niagara_patch_pageops nop -80: /* Patch TLB/cache ops. */ call hypervisor_patch_cachetlbops nop diff --git a/trunk/arch/sparc64/kernel/hvtramp.S b/trunk/arch/sparc64/kernel/hvtramp.S index b692e044a463..a55c252e18cc 100644 --- a/trunk/arch/sparc64/kernel/hvtramp.S +++ b/trunk/arch/sparc64/kernel/hvtramp.S @@ -115,8 +115,11 @@ hv_cpu_startup: call hard_smp_processor_id nop - call sun4v_register_mondo_queues - nop + mov %o0, %o1 + mov 0, %o0 + mov 0, %o2 + call sun4v_init_mondo_queues + mov 1, %o3 call init_cur_cpu_trap mov %g6, %o0 diff --git a/trunk/arch/sparc64/kernel/irq.c b/trunk/arch/sparc64/kernel/irq.c index 384abf410cf0..db31bf6b42db 100644 --- a/trunk/arch/sparc64/kernel/irq.c +++ b/trunk/arch/sparc64/kernel/irq.c @@ -929,7 +929,7 @@ static void __cpuinit register_one_mondo(unsigned long paddr, unsigned long type } } -void __cpuinit sun4v_register_mondo_queues(int this_cpu) +static void __cpuinit sun4v_register_mondo_queues(int this_cpu) { struct trap_per_cpu *tb = &trap_block[this_cpu]; @@ -943,10 +943,20 @@ void __cpuinit sun4v_register_mondo_queues(int this_cpu) tb->nonresum_qmask); } -static void __init alloc_one_mondo(unsigned long *pa_ptr, unsigned long qmask) +static void __cpuinit alloc_one_mondo(unsigned long *pa_ptr, unsigned long qmask, int use_bootmem) { unsigned long size = PAGE_ALIGN(qmask + 1); - void *p = __alloc_bootmem_low(size, size, 0); + unsigned long order = get_order(size); + void *p = NULL; + + if (use_bootmem) { + p = __alloc_bootmem_low(size, size, 0); + } else { + struct page *page = alloc_pages(GFP_ATOMIC | __GFP_ZERO, order); + if (page) + p = page_address(page); + } + if (!p) { prom_printf("SUN4V: Error, cannot allocate mondo queue.\n"); prom_halt(); @@ -955,10 +965,19 @@ static void __init alloc_one_mondo(unsigned long *pa_ptr, unsigned long qmask) *pa_ptr = __pa(p); } -static void __init alloc_one_kbuf(unsigned long *pa_ptr, unsigned long qmask) +static void __cpuinit alloc_one_kbuf(unsigned long *pa_ptr, unsigned long qmask, int use_bootmem) { unsigned long size = PAGE_ALIGN(qmask + 1); - void *p = __alloc_bootmem_low(size, size, 0); + unsigned long order = get_order(size); + void *p = NULL; + + if (use_bootmem) { + p = __alloc_bootmem_low(size, size, 0); + } else { + struct page *page = alloc_pages(GFP_ATOMIC | __GFP_ZERO, order); + if (page) + p = page_address(page); + } if (!p) { prom_printf("SUN4V: Error, cannot allocate kbuf page.\n"); @@ -968,14 +987,18 @@ static void __init alloc_one_kbuf(unsigned long *pa_ptr, unsigned long qmask) *pa_ptr = __pa(p); } -static void __init init_cpu_send_mondo_info(struct trap_per_cpu *tb) +static void __cpuinit init_cpu_send_mondo_info(struct trap_per_cpu *tb, int use_bootmem) { #ifdef CONFIG_SMP void *page; BUILD_BUG_ON((NR_CPUS * sizeof(u16)) > (PAGE_SIZE - 64)); - page = alloc_bootmem_low_pages(PAGE_SIZE); + if (use_bootmem) + page = alloc_bootmem_low_pages(PAGE_SIZE); + else + page = (void *) get_zeroed_page(GFP_ATOMIC); + if (!page) { prom_printf("SUN4V: Error, cannot allocate cpu mondo page.\n"); prom_halt(); @@ -986,27 +1009,30 @@ static void __init init_cpu_send_mondo_info(struct trap_per_cpu *tb) #endif } -/* Allocate mondo and error queues for all possible cpus. */ -static void __init sun4v_init_mondo_queues(void) +/* Allocate and register the mondo and error queues for this cpu. */ +void __cpuinit sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load) { - int cpu; - - for_each_possible_cpu(cpu) { - struct trap_per_cpu *tb = &trap_block[cpu]; + struct trap_per_cpu *tb = &trap_block[cpu]; - alloc_one_mondo(&tb->cpu_mondo_pa, tb->cpu_mondo_qmask); - alloc_one_mondo(&tb->dev_mondo_pa, tb->dev_mondo_qmask); - alloc_one_mondo(&tb->resum_mondo_pa, tb->resum_qmask); - alloc_one_kbuf(&tb->resum_kernel_buf_pa, tb->resum_qmask); - alloc_one_mondo(&tb->nonresum_mondo_pa, tb->nonresum_qmask); - alloc_one_kbuf(&tb->nonresum_kernel_buf_pa, - tb->nonresum_qmask); + if (alloc) { + alloc_one_mondo(&tb->cpu_mondo_pa, tb->cpu_mondo_qmask, use_bootmem); + alloc_one_mondo(&tb->dev_mondo_pa, tb->dev_mondo_qmask, use_bootmem); + alloc_one_mondo(&tb->resum_mondo_pa, tb->resum_qmask, use_bootmem); + alloc_one_kbuf(&tb->resum_kernel_buf_pa, tb->resum_qmask, use_bootmem); + alloc_one_mondo(&tb->nonresum_mondo_pa, tb->nonresum_qmask, use_bootmem); + alloc_one_kbuf(&tb->nonresum_kernel_buf_pa, tb->nonresum_qmask, use_bootmem); - init_cpu_send_mondo_info(tb); + init_cpu_send_mondo_info(tb, use_bootmem); } - /* Load up the boot cpu's entries. */ - sun4v_register_mondo_queues(hard_smp_processor_id()); + if (load) { + if (cpu != hard_smp_processor_id()) { + prom_printf("SUN4V: init mondo on cpu %d not %d\n", + cpu, hard_smp_processor_id()); + prom_halt(); + } + sun4v_register_mondo_queues(cpu); + } } static struct irqaction timer_irq_action = { @@ -1021,7 +1047,7 @@ void __init init_IRQ(void) memset(&ivector_table[0], 0, sizeof(ivector_table)); if (tlb_type == hypervisor) - sun4v_init_mondo_queues(); + sun4v_init_mondo_queues(1, hard_smp_processor_id(), 1, 1); /* We need to clear any IRQ's pending in the soft interrupt * registers, a spurious one could be left around from the diff --git a/trunk/arch/sparc64/kernel/mdesc.c b/trunk/arch/sparc64/kernel/mdesc.c index 95059c2ec414..cce4d0ddf5d5 100644 --- a/trunk/arch/sparc64/kernel/mdesc.c +++ b/trunk/arch/sparc64/kernel/mdesc.c @@ -568,6 +568,20 @@ static void __init report_platform_properties(void) mdesc_release(hp); } +static int inline find_in_proplist(const char *list, const char *match, int len) +{ + while (len > 0) { + int l; + + if (!strcmp(list, match)) + return 1; + l = strlen(list) + 1; + list += l; + len -= l; + } + return 0; +} + static void __devinit fill_in_one_cache(cpuinfo_sparc *c, struct mdesc_handle *hp, u64 mp) @@ -582,10 +596,10 @@ static void __devinit fill_in_one_cache(cpuinfo_sparc *c, switch (*level) { case 1: - if (of_find_in_proplist(type, "instn", type_len)) { + if (find_in_proplist(type, "instn", type_len)) { c->icache_size = *size; c->icache_line_size = *line_size; - } else if (of_find_in_proplist(type, "data", type_len)) { + } else if (find_in_proplist(type, "data", type_len)) { c->dcache_size = *size; c->dcache_line_size = *line_size; } @@ -663,7 +677,7 @@ static void __devinit set_core_ids(struct mdesc_handle *hp) continue; type = mdesc_get_property(hp, mp, "type", &len); - if (!of_find_in_proplist(type, "instn", len)) + if (!find_in_proplist(type, "instn", len)) continue; mark_core_ids(hp, mp, idx); @@ -704,8 +718,8 @@ static void __devinit __set_proc_ids(struct mdesc_handle *hp, int len; type = mdesc_get_property(hp, mp, "type", &len); - if (!of_find_in_proplist(type, "int", len) && - !of_find_in_proplist(type, "integer", len)) + if (!find_in_proplist(type, "int", len) && + !find_in_proplist(type, "integer", len)) continue; mark_proc_ids(hp, mp, idx); diff --git a/trunk/arch/sparc64/kernel/prom.c b/trunk/arch/sparc64/kernel/prom.c index d1a78c976cef..f4e0a9ad9be3 100644 --- a/trunk/arch/sparc64/kernel/prom.c +++ b/trunk/arch/sparc64/kernel/prom.c @@ -107,21 +107,6 @@ int of_set_property(struct device_node *dp, const char *name, void *val, int len } EXPORT_SYMBOL(of_set_property); -int of_find_in_proplist(const char *list, const char *match, int len) -{ - while (len > 0) { - int l; - - if (!strcmp(list, match)) - return 1; - l = strlen(list) + 1; - list += l; - len -= l; - } - return 0; -} -EXPORT_SYMBOL(of_find_in_proplist); - static unsigned int prom_early_allocated; static void * __init prom_early_alloc(unsigned long size) diff --git a/trunk/arch/sparc64/kernel/smp.c b/trunk/arch/sparc64/kernel/smp.c index b84c49e3697c..b448d33321c6 100644 --- a/trunk/arch/sparc64/kernel/smp.c +++ b/trunk/arch/sparc64/kernel/smp.c @@ -334,6 +334,8 @@ static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg) } #endif +extern void sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load); + extern unsigned long sparc64_cpu_startup; /* The OBP cpu startup callback truncates the 3rd arg cookie to @@ -357,6 +359,9 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu) cpu_new_thread = task_thread_info(p); if (tlb_type == hypervisor) { + /* Alloc the mondo queues, cpu will load them. */ + sun4v_init_mondo_queues(0, cpu, 1, 0); + #if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU) if (ldom_domaining_enabled) ldom_startcpu_cpuid(cpu, diff --git a/trunk/arch/sparc64/kernel/sparc64_ksyms.c b/trunk/arch/sparc64/kernel/sparc64_ksyms.c index 23fad7ebdd0d..d270c2f0be0f 100644 --- a/trunk/arch/sparc64/kernel/sparc64_ksyms.c +++ b/trunk/arch/sparc64/kernel/sparc64_ksyms.c @@ -168,7 +168,6 @@ EXPORT_SYMBOL(change_bit); EXPORT_SYMBOL(__flushw_user); EXPORT_SYMBOL(tlb_type); -EXPORT_SYMBOL(sun4v_chip_type); EXPORT_SYMBOL(get_fb_unmapped_area); EXPORT_SYMBOL(flush_icache_range); diff --git a/trunk/arch/sparc64/kernel/trampoline.S b/trunk/arch/sparc64/kernel/trampoline.S index 9448595f9063..a4dc01a3d238 100644 --- a/trunk/arch/sparc64/kernel/trampoline.S +++ b/trunk/arch/sparc64/kernel/trampoline.S @@ -366,8 +366,11 @@ after_lock_tlb: call hard_smp_processor_id nop - call sun4v_register_mondo_queues - nop + mov %o0, %o1 + mov 0, %o0 + mov 0, %o2 + call sun4v_init_mondo_queues + mov 1, %o3 1: call init_cur_cpu_trap ldx [%l0], %o0 diff --git a/trunk/arch/sparc64/kernel/vio.c b/trunk/arch/sparc64/kernel/vio.c index 1550ac5673da..3685daf5157f 100644 --- a/trunk/arch/sparc64/kernel/vio.c +++ b/trunk/arch/sparc64/kernel/vio.c @@ -16,6 +16,21 @@ #include #include +static inline int find_in_proplist(const char *list, const char *match, + int len) +{ + while (len > 0) { + int l; + + if (!strcmp(list, match)) + return 1; + l = strlen(list) + 1; + list += l; + len -= l; + } + return 0; +} + static const struct vio_device_id *vio_match_device( const struct vio_device_id *matches, const struct vio_dev *dev) @@ -34,7 +49,7 @@ static const struct vio_device_id *vio_match_device( if (matches->compat[0]) { match &= len && - of_find_in_proplist(compat, matches->compat, len); + find_in_proplist(compat, matches->compat, len); } if (match) return matches; @@ -391,7 +406,7 @@ static int __init vio_init(void) "property\n"); goto out_release; } - if (!of_find_in_proplist(compat, channel_devices_compat, len)) { + if (!find_in_proplist(compat, channel_devices_compat, len)) { printk(KERN_ERR "VIO: Channel devices node lacks (%s) " "compat entry.\n", channel_devices_compat); goto out_release; diff --git a/trunk/arch/sparc64/lib/GENbzero.S b/trunk/arch/sparc64/lib/GENbzero.S deleted file mode 100644 index f9c71d64eba1..000000000000 --- a/trunk/arch/sparc64/lib/GENbzero.S +++ /dev/null @@ -1,160 +0,0 @@ -/* GENbzero.S: Generic sparc64 memset/clear_user. - * - * Copyright (C) 2007 David S. Miller (davem@davemloft.net) - */ -#include - -#define EX_ST(x,y) \ -98: x,y; \ - .section .fixup; \ - .align 4; \ -99: retl; \ - mov %o1, %o0; \ - .section __ex_table; \ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; - - .align 32 - .text - - .globl GENmemset - .type GENmemset, #function -GENmemset: /* %o0=buf, %o1=pat, %o2=len */ - and %o1, 0xff, %o3 - mov %o2, %o1 - sllx %o3, 8, %g1 - or %g1, %o3, %o2 - sllx %o2, 16, %g1 - or %g1, %o2, %o2 - sllx %o2, 32, %g1 - ba,pt %xcc, 1f - or %g1, %o2, %o2 - - .globl GENbzero - .type GENbzero, #function -GENbzero: - clr %o2 -1: brz,pn %o1, GENbzero_return - mov %o0, %o3 - - /* %o5: saved %asi, restored at GENbzero_done - * %o4: store %asi to use - */ - rd %asi, %o5 - mov ASI_P, %o4 - wr %o4, 0x0, %asi - -GENbzero_from_clear_user: - cmp %o1, 15 - bl,pn %icc, GENbzero_tiny - andcc %o0, 0x7, %g1 - be,pt %xcc, 2f - mov 8, %g2 - sub %g2, %g1, %g1 - sub %o1, %g1, %o1 -1: EX_ST(stba %o2, [%o0 + 0x00] %asi) - subcc %g1, 1, %g1 - bne,pt %xcc, 1b - add %o0, 1, %o0 -2: cmp %o1, 128 - bl,pn %icc, GENbzero_medium - andcc %o0, (64 - 1), %g1 - be,pt %xcc, GENbzero_pre_loop - mov 64, %g2 - sub %g2, %g1, %g1 - sub %o1, %g1, %o1 -1: EX_ST(stxa %o2, [%o0 + 0x00] %asi) - subcc %g1, 8, %g1 - bne,pt %xcc, 1b - add %o0, 8, %o0 - -GENbzero_pre_loop: - andn %o1, (64 - 1), %g1 - sub %o1, %g1, %o1 -GENbzero_loop: - EX_ST(stxa %o2, [%o0 + 0x00] %asi) - EX_ST(stxa %o2, [%o0 + 0x08] %asi) - EX_ST(stxa %o2, [%o0 + 0x10] %asi) - EX_ST(stxa %o2, [%o0 + 0x18] %asi) - EX_ST(stxa %o2, [%o0 + 0x20] %asi) - EX_ST(stxa %o2, [%o0 + 0x28] %asi) - EX_ST(stxa %o2, [%o0 + 0x30] %asi) - EX_ST(stxa %o2, [%o0 + 0x38] %asi) - subcc %g1, 64, %g1 - bne,pt %xcc, GENbzero_loop - add %o0, 64, %o0 - - membar #Sync - wr %o4, 0x0, %asi - brz,pn %o1, GENbzero_done -GENbzero_medium: - andncc %o1, 0x7, %g1 - be,pn %xcc, 2f - sub %o1, %g1, %o1 -1: EX_ST(stxa %o2, [%o0 + 0x00] %asi) - subcc %g1, 8, %g1 - bne,pt %xcc, 1b - add %o0, 8, %o0 -2: brz,pt %o1, GENbzero_done - nop - -GENbzero_tiny: -1: EX_ST(stba %o2, [%o0 + 0x00] %asi) - subcc %o1, 1, %o1 - bne,pt %icc, 1b - add %o0, 1, %o0 - - /* fallthrough */ - -GENbzero_done: - wr %o5, 0x0, %asi - -GENbzero_return: - retl - mov %o3, %o0 - .size GENbzero, .-GENbzero - .size GENmemset, .-GENmemset - - .globl GENclear_user - .type GENclear_user, #function -GENclear_user: /* %o0=buf, %o1=len */ - rd %asi, %o5 - brz,pn %o1, GENbzero_done - clr %o3 - cmp %o5, ASI_AIUS - bne,pn %icc, GENbzero - clr %o2 - ba,pt %xcc, GENbzero_from_clear_user - mov ASI_AIUS, %o4 - .size GENclear_user, .-GENclear_user - -#define BRANCH_ALWAYS 0x10680000 -#define NOP 0x01000000 -#define GEN_DO_PATCH(OLD, NEW) \ - sethi %hi(NEW), %g1; \ - or %g1, %lo(NEW), %g1; \ - sethi %hi(OLD), %g2; \ - or %g2, %lo(OLD), %g2; \ - sub %g1, %g2, %g1; \ - sethi %hi(BRANCH_ALWAYS), %g3; \ - sll %g1, 11, %g1; \ - srl %g1, 11 + 2, %g1; \ - or %g3, %lo(BRANCH_ALWAYS), %g3; \ - or %g3, %g1, %g3; \ - stw %g3, [%g2]; \ - sethi %hi(NOP), %g3; \ - or %g3, %lo(NOP), %g3; \ - stw %g3, [%g2 + 0x4]; \ - flush %g2; - - .globl generic_patch_bzero - .type generic_patch_bzero,#function -generic_patch_bzero: - GEN_DO_PATCH(memset, GENmemset) - GEN_DO_PATCH(__bzero, GENbzero) - GEN_DO_PATCH(__clear_user, GENclear_user) - retl - nop - .size generic_patch_bzero,.-generic_patch_bzero diff --git a/trunk/arch/sparc64/lib/GENcopy_from_user.S b/trunk/arch/sparc64/lib/GENcopy_from_user.S deleted file mode 100644 index 2b9df99e87f9..000000000000 --- a/trunk/arch/sparc64/lib/GENcopy_from_user.S +++ /dev/null @@ -1,34 +0,0 @@ -/* GENcopy_from_user.S: Generic sparc64 copy from userspace. - * - * Copyright (C) 2007 David S. Miller (davem@davemloft.net) - */ - -#define EX_LD(x) \ -98: x; \ - .section .fixup; \ - .align 4; \ -99: retl; \ - mov 1, %o0; \ - .section __ex_table,"a";\ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; - -#ifndef ASI_AIUS -#define ASI_AIUS 0x11 -#endif - -#define FUNC_NAME GENcopy_from_user -#define LOAD(type,addr,dest) type##a [addr] ASI_AIUS, dest -#define EX_RETVAL(x) 0 - -#ifdef __KERNEL__ -#define PREAMBLE \ - rd %asi, %g1; \ - cmp %g1, ASI_AIUS; \ - bne,pn %icc, memcpy_user_stub; \ - nop -#endif - -#include "GENmemcpy.S" diff --git a/trunk/arch/sparc64/lib/GENcopy_to_user.S b/trunk/arch/sparc64/lib/GENcopy_to_user.S deleted file mode 100644 index bb3f7084daf9..000000000000 --- a/trunk/arch/sparc64/lib/GENcopy_to_user.S +++ /dev/null @@ -1,38 +0,0 @@ -/* GENcopy_to_user.S: Generic sparc64 copy to userspace. - * - * Copyright (C) 2007 David S. Miller (davem@davemloft.net) - */ - -#define EX_ST(x) \ -98: x; \ - .section .fixup; \ - .align 4; \ -99: retl; \ - mov 1, %o0; \ - .section __ex_table,"a";\ - .align 4; \ - .word 98b, 99b; \ - .text; \ - .align 4; - -#ifndef ASI_AIUS -#define ASI_AIUS 0x11 -#endif - -#define FUNC_NAME GENcopy_to_user -#define STORE(type,src,addr) type##a src, [addr] ASI_AIUS -#define EX_RETVAL(x) 0 - -#ifdef __KERNEL__ - /* Writing to %asi is _expensive_ so we hardcode it. - * Reading %asi to check for KERNEL_DS is comparatively - * cheap. - */ -#define PREAMBLE \ - rd %asi, %g1; \ - cmp %g1, ASI_AIUS; \ - bne,pn %icc, memcpy_user_stub; \ - nop -#endif - -#include "GENmemcpy.S" diff --git a/trunk/arch/sparc64/lib/GENmemcpy.S b/trunk/arch/sparc64/lib/GENmemcpy.S deleted file mode 100644 index 89358ee94851..000000000000 --- a/trunk/arch/sparc64/lib/GENmemcpy.S +++ /dev/null @@ -1,121 +0,0 @@ -/* GENmemcpy.S: Generic sparc64 memcpy. - * - * Copyright (C) 2007 David S. Miller (davem@davemloft.net) - */ - -#ifdef __KERNEL__ -#define GLOBAL_SPARE %g7 -#else -#define GLOBAL_SPARE %g5 -#endif - -#ifndef EX_LD -#define EX_LD(x) x -#endif - -#ifndef EX_ST -#define EX_ST(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x -#endif - -#ifndef LOAD -#define LOAD(type,addr,dest) type [addr], dest -#endif - -#ifndef STORE -#define STORE(type,src,addr) type src, [addr] -#endif - -#ifndef FUNC_NAME -#define FUNC_NAME GENmemcpy -#endif - -#ifndef PREAMBLE -#define PREAMBLE -#endif - -#ifndef XCC -#define XCC xcc -#endif - - .register %g2,#scratch - .register %g3,#scratch - - .text - .align 64 - - .globl FUNC_NAME - .type FUNC_NAME,#function -FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ - srlx %o2, 31, %g2 - cmp %g2, 0 - tne %XCC, 5 - PREAMBLE - mov %o0, GLOBAL_SPARE - - cmp %o2, 0 - be,pn %XCC, 85f - or %o0, %o1, %o3 - cmp %o2, 16 - blu,a,pn %XCC, 80f - or %o3, %o2, %o3 - - xor %o0, %o1, %o4 - andcc %o4, 0x7, %g0 - bne,a,pn %XCC, 90f - sub %o0, %o1, %o3 - - and %o0, 0x7, %o4 - sub %o4, 0x8, %o4 - sub %g0, %o4, %o4 - sub %o2, %o4, %o2 -1: subcc %o4, 1, %o4 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o0)) - add %o1, 1, %o1 - bne,pt %XCC, 1b - add %o0, 1, %o0 - - andn %o2, 0x7, %g1 - sub %o2, %g1, %o2 -1: subcc %g1, 0x8, %g1 - EX_LD(LOAD(ldx, %o1, %g2)) - EX_ST(STORE(stx, %g2, %o0)) - add %o1, 0x8, %o1 - bne,pt %XCC, 1b - add %o0, 0x8, %o0 - - brz,pt %o2, 85f - sub %o0, %o1, %o3 - ba,a,pt %XCC, 90f - - .align 64 -80: /* 0 < len <= 16 */ - andcc %o3, 0x3, %g0 - bne,pn %XCC, 90f - sub %o0, %o1, %o3 - -1: - subcc %o2, 4, %o2 - EX_LD(LOAD(lduw, %o1, %g1)) - EX_ST(STORE(stw, %g1, %o1 + %o3)) - bgu,pt %XCC, 1b - add %o1, 4, %o1 - -85: retl - mov EX_RETVAL(GLOBAL_SPARE), %o0 - - .align 32 -90: - subcc %o2, 1, %o2 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o1 + %o3)) - bgu,pt %XCC, 90b - add %o1, 1, %o1 - retl - mov EX_RETVAL(GLOBAL_SPARE), %o0 - - .size FUNC_NAME, .-FUNC_NAME diff --git a/trunk/arch/sparc64/lib/GENpage.S b/trunk/arch/sparc64/lib/GENpage.S deleted file mode 100644 index 2ef9d05f21bc..000000000000 --- a/trunk/arch/sparc64/lib/GENpage.S +++ /dev/null @@ -1,77 +0,0 @@ -/* GENpage.S: Generic clear and copy page. - * - * Copyright (C) 2007 (davem@davemloft.net) - */ -#include - - .text - .align 32 - -GENcopy_user_page: - set PAGE_SIZE, %g7 -1: ldx [%o1 + 0x00], %o2 - ldx [%o1 + 0x08], %o3 - ldx [%o1 + 0x10], %o4 - ldx [%o1 + 0x18], %o5 - stx %o2, [%o0 + 0x00] - stx %o3, [%o0 + 0x08] - stx %o4, [%o0 + 0x10] - stx %o5, [%o0 + 0x18] - ldx [%o1 + 0x20], %o2 - ldx [%o1 + 0x28], %o3 - ldx [%o1 + 0x30], %o4 - ldx [%o1 + 0x38], %o5 - stx %o2, [%o0 + 0x20] - stx %o3, [%o0 + 0x28] - stx %o4, [%o0 + 0x30] - stx %o5, [%o0 + 0x38] - subcc %g7, 64, %g7 - add %o1, 64, %o1 - bne,pt %xcc, 1b - add %o0, 64, %o0 - retl - nop - -GENclear_page: -GENclear_user_page: - set PAGE_SIZE, %g7 -1: stx %g0, [%o0 + 0x00] - stx %g0, [%o0 + 0x08] - stx %g0, [%o0 + 0x10] - stx %g0, [%o0 + 0x18] - stx %g0, [%o0 + 0x20] - stx %g0, [%o0 + 0x28] - stx %g0, [%o0 + 0x30] - stx %g0, [%o0 + 0x38] - subcc %g7, 64, %g7 - bne,pt %xcc, 1b - add %o0, 64, %o0 - -#define BRANCH_ALWAYS 0x10680000 -#define NOP 0x01000000 -#define GEN_DO_PATCH(OLD, NEW) \ - sethi %hi(NEW), %g1; \ - or %g1, %lo(NEW), %g1; \ - sethi %hi(OLD), %g2; \ - or %g2, %lo(OLD), %g2; \ - sub %g1, %g2, %g1; \ - sethi %hi(BRANCH_ALWAYS), %g3; \ - sll %g1, 11, %g1; \ - srl %g1, 11 + 2, %g1; \ - or %g3, %lo(BRANCH_ALWAYS), %g3; \ - or %g3, %g1, %g3; \ - stw %g3, [%g2]; \ - sethi %hi(NOP), %g3; \ - or %g3, %lo(NOP), %g3; \ - stw %g3, [%g2 + 0x4]; \ - flush %g2; - - .globl generic_patch_pageops - .type generic_patch_pageops,#function -generic_patch_pageops: - GEN_DO_PATCH(copy_user_page, GENcopy_user_page) - GEN_DO_PATCH(_clear_page, GENclear_page) - GEN_DO_PATCH(clear_user_page, GENclear_user_page) - retl - nop - .size generic_patch_pageops,.-generic_patch_pageops diff --git a/trunk/arch/sparc64/lib/GENpatch.S b/trunk/arch/sparc64/lib/GENpatch.S deleted file mode 100644 index fab9e89f16bd..000000000000 --- a/trunk/arch/sparc64/lib/GENpatch.S +++ /dev/null @@ -1,33 +0,0 @@ -/* GENpatch.S: Patch Ultra-I routines with generic variant. - * - * Copyright (C) 2007 David S. Miller - */ - -#define BRANCH_ALWAYS 0x10680000 -#define NOP 0x01000000 -#define GEN_DO_PATCH(OLD, NEW) \ - sethi %hi(NEW), %g1; \ - or %g1, %lo(NEW), %g1; \ - sethi %hi(OLD), %g2; \ - or %g2, %lo(OLD), %g2; \ - sub %g1, %g2, %g1; \ - sethi %hi(BRANCH_ALWAYS), %g3; \ - sll %g1, 11, %g1; \ - srl %g1, 11 + 2, %g1; \ - or %g3, %lo(BRANCH_ALWAYS), %g3; \ - or %g3, %g1, %g3; \ - stw %g3, [%g2]; \ - sethi %hi(NOP), %g3; \ - or %g3, %lo(NOP), %g3; \ - stw %g3, [%g2 + 0x4]; \ - flush %g2; - - .globl generic_patch_copyops - .type generic_patch_copyops,#function -generic_patch_copyops: - GEN_DO_PATCH(memcpy, GENmemcpy) - GEN_DO_PATCH(___copy_from_user, GENcopy_from_user) - GEN_DO_PATCH(___copy_to_user, GENcopy_to_user) - retl - nop - .size generic_patch_copyops,.-generic_patch_copyops diff --git a/trunk/arch/sparc64/lib/Makefile b/trunk/arch/sparc64/lib/Makefile index f95fbfa3eeb8..c4a6d6e7d03c 100644 --- a/trunk/arch/sparc64/lib/Makefile +++ b/trunk/arch/sparc64/lib/Makefile @@ -1,4 +1,4 @@ -# +# $Id: Makefile,v 1.25 2000/12/14 22:57:25 davem Exp $ # Makefile for Sparc64 library files.. # @@ -13,8 +13,6 @@ lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \ U3memcpy.o U3copy_from_user.o U3copy_to_user.o U3patch.o \ NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o NGpatch.o \ NGpage.o NGbzero.o \ - GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o GENpatch.o \ - GENpage.o GENbzero.o \ copy_in_user.o user_fixup.o memmove.o \ mcount.o ipcsum.o rwsem.o xor.o diff --git a/trunk/drivers/lguest/core.c b/trunk/drivers/lguest/core.c index 4a315f08a567..0a46e8837d9a 100644 --- a/trunk/drivers/lguest/core.c +++ b/trunk/drivers/lguest/core.c @@ -453,11 +453,6 @@ static void run_guest_once(struct lguest *lg, struct lguest_pages *pages) * lguest_pages". */ copy_in_guest_info(lg, pages); - /* Set the trap number to 256 (impossible value). If we fault while - * switching to the Guest (bad segment registers or bug), this will - * cause us to abort the Guest. */ - lg->regs->trapnum = 256; - /* Now: we push the "eflags" register on the stack, then do an "lcall". * This is how we change from using the kernel code segment to using * the dedicated lguest code segment, as well as jumping into the diff --git a/trunk/drivers/lguest/interrupts_and_traps.c b/trunk/drivers/lguest/interrupts_and_traps.c index 49aa55577d0d..49787e964a0d 100644 --- a/trunk/drivers/lguest/interrupts_and_traps.c +++ b/trunk/drivers/lguest/interrupts_and_traps.c @@ -195,16 +195,13 @@ static int has_err(unsigned int trap) /* deliver_trap() returns true if it could deliver the trap. */ int deliver_trap(struct lguest *lg, unsigned int num) { - /* Trap numbers are always 8 bit, but we set an impossible trap number - * for traps inside the Switcher, so check that here. */ - if (num >= ARRAY_SIZE(lg->idt)) - return 0; + u32 lo = lg->idt[num].a, hi = lg->idt[num].b; /* Early on the Guest hasn't set the IDT entries (or maybe it put a * bogus one in): if we fail here, the Guest will be killed. */ - if (!idt_present(lg->idt[num].a, lg->idt[num].b)) + if (!idt_present(lo, hi)) return 0; - set_guest_interrupt(lg, lg->idt[num].a, lg->idt[num].b, has_err(num)); + set_guest_interrupt(lg, lo, hi, has_err(num)); return 1; } diff --git a/trunk/drivers/lguest/lguest.c b/trunk/drivers/lguest/lguest.c index 524beea7fb19..1bc1546c7fd0 100644 --- a/trunk/drivers/lguest/lguest.c +++ b/trunk/drivers/lguest/lguest.c @@ -323,12 +323,9 @@ static void lguest_write_gdt_entry(struct desc_struct *dt, * __thread variables). So we have a hypercall specifically for this case. */ static void lguest_load_tls(struct thread_struct *t, unsigned int cpu) { - /* There's one problem which normal hardware doesn't have: the Host - * can't handle us removing entries we're currently using. So we clear - * the GS register here: if it's needed it'll be reloaded anyway. */ - loadsegment(gs, 0); lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0); } +/*:*/ /*G:038 That's enough excitement for now, back to ploughing through each of * the paravirt_ops (we're about 1/3 of the way through). @@ -690,8 +687,7 @@ static struct clocksource lguest_clock = { .rating = 400, .read = lguest_clock_read, .mask = CLOCKSOURCE_MASK(64), - .mult = 1 << 22, - .shift = 22, + .mult = 1, }; /* The "scheduler clock" is just our real clock, adjusted to start at zero */ @@ -774,6 +770,7 @@ static void lguest_time_init(void) * way, the "rating" is initialized so high that it's always chosen * over any other clocksource. */ if (lguest_data.tsc_khz) { + lguest_clock.shift = 22; lguest_clock.mult = clocksource_khz2mult(lguest_data.tsc_khz, lguest_clock.shift); lguest_clock.flags = CLOCK_SOURCE_IS_CONTINUOUS; diff --git a/trunk/drivers/lguest/segments.c b/trunk/drivers/lguest/segments.c index 9b81119f46e9..f675a41a80da 100644 --- a/trunk/drivers/lguest/segments.c +++ b/trunk/drivers/lguest/segments.c @@ -43,6 +43,22 @@ * begin. */ +/* Is the descriptor the Guest wants us to put in OK? + * + * The flag which Intel says must be zero: must be zero. The descriptor must + * be present, (this is actually checked earlier but is here for thorougness), + * and the descriptor type must be 1 (a memory segment). */ +static int desc_ok(const struct desc_struct *gdt) +{ + return ((gdt->b & 0x00209000) == 0x00009000); +} + +/* Is the segment present? (Otherwise it can't be used by the Guest). */ +static int segment_present(const struct desc_struct *gdt) +{ + return gdt->b & 0x8000; +} + /* There are several entries we don't let the Guest set. The TSS entry is the * "Task State Segment" which controls all kinds of delicate things. The * LGUEST_CS and LGUEST_DS entries are reserved for the Switcher, and the @@ -55,11 +71,37 @@ static int ignored_gdt(unsigned int num) || num == GDT_ENTRY_DOUBLEFAULT_TSS); } -/*H:610 Once the GDT has been changed, we fix the new entries up a little. We - * don't care if they're invalid: the worst that can happen is a General - * Protection Fault in the Switcher when it restores a Guest segment register - * which tries to use that entry. Then we kill the Guest for causing such a - * mess: the message will be "unhandled trap 256". */ +/* If the Guest asks us to remove an entry from the GDT, we have to be careful. + * If one of the segment registers is pointing at that entry the Switcher will + * crash when it tries to reload the segment registers for the Guest. + * + * It doesn't make much sense for the Guest to try to remove its own code, data + * or stack segments while they're in use: assume that's a Guest bug. If it's + * one of the lesser segment registers using the removed entry, we simply set + * that register to 0 (unusable). */ +static void check_segment_use(struct lguest *lg, unsigned int desc) +{ + /* GDT entries are 8 bytes long, so we divide to get the index and + * ignore the bottom bits. */ + if (lg->regs->gs / 8 == desc) + lg->regs->gs = 0; + if (lg->regs->fs / 8 == desc) + lg->regs->fs = 0; + if (lg->regs->es / 8 == desc) + lg->regs->es = 0; + if (lg->regs->ds / 8 == desc + || lg->regs->cs / 8 == desc + || lg->regs->ss / 8 == desc) + kill_guest(lg, "Removed live GDT entry %u", desc); +} +/*:*/ +/*M:009 We wouldn't need to check for removal of in-use segments if we handled + * faults in the Switcher. However, it's probably not a worthwhile + * optimization. :*/ + +/*H:610 Once the GDT has been changed, we look through the changed entries and + * see if they're OK. If not, we'll call kill_guest() and the Guest will never + * get to use the invalid entries. */ static void fixup_gdt_table(struct lguest *lg, unsigned start, unsigned end) { unsigned int i; @@ -70,6 +112,16 @@ static void fixup_gdt_table(struct lguest *lg, unsigned start, unsigned end) if (ignored_gdt(i)) continue; + /* We could fault in switch_to_guest if they are using + * a removed segment. */ + if (!segment_present(&lg->gdt[i])) { + check_segment_use(lg, i); + continue; + } + + if (!desc_ok(&lg->gdt[i])) + kill_guest(lg, "Bad GDT descriptor %i", i); + /* Segment descriptors contain a privilege level: the Guest is * sometimes careless and leaves this as 0, even though it's * running at privilege level 1. If so, we fix it here. */ diff --git a/trunk/drivers/lguest/switcher.S b/trunk/drivers/lguest/switcher.S index 7c9c230cc845..d418179ea6b5 100644 --- a/trunk/drivers/lguest/switcher.S +++ b/trunk/drivers/lguest/switcher.S @@ -47,7 +47,6 @@ // Down here in the depths of assembler code. #include #include -#include #include "lg.h" // We mark the start of the code to copy @@ -183,15 +182,13 @@ ENTRY(switch_to_guest) movl $(LGUEST_DS), %eax; \ movl %eax, %ds; \ /* So where are we? Which CPU, which struct? \ - * The stack is our clue: our TSS starts \ - * It at the end of "struct lguest_pages". \ - * Or we may have stumbled while restoring \ - * Our Guest segment regs while in switch_to_guest, \ - * The fault pushed atop that part-unwound stack. \ - * If we round the stack down to the page start \ - * We're at the start of "struct lguest_pages". */ \ + * The stack is our clue: our TSS sets \ + * It at the end of "struct lguest_pages" \ + * And we then pushed and pushed and pushed Guest regs: \ + * Now stack points atop the "struct lguest_regs". \ + * Subtract that offset, and we find our struct. */ \ movl %esp, %eax; \ - andl $(~(1 << PAGE_SHIFT - 1)), %eax; \ + subl $LGUEST_PAGES_regs, %eax; \ /* Save our trap number: the switch will obscure it \ * (The Guest regs are not mapped here in the Host) \ * %ebx holds it safe for deliver_to_host */ \ diff --git a/trunk/drivers/mmc/card/queue.c b/trunk/drivers/mmc/card/queue.c index b0abc7d92805..c9a289c6c139 100644 --- a/trunk/drivers/mmc/card/queue.c +++ b/trunk/drivers/mmc/card/queue.c @@ -117,6 +117,7 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock struct mmc_host *host = card->host; u64 limit = BLK_BOUNCE_HIGH; int ret; + unsigned int bouncesz; if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask) limit = *mmc_dev(host)->dma_mask; @@ -133,8 +134,6 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock #ifdef CONFIG_MMC_BLOCK_BOUNCE if (host->max_hw_segs == 1) { - unsigned int bouncesz; - bouncesz = MMC_QUEUE_BOUNCESZ; if (bouncesz > host->max_req_size) @@ -157,14 +156,14 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock GFP_KERNEL); if (!mq->sg) { ret = -ENOMEM; - goto cleanup_queue; + goto free_bounce_buf; } mq->bounce_sg = kmalloc(sizeof(struct scatterlist) * bouncesz / 512, GFP_KERNEL); if (!mq->bounce_sg) { ret = -ENOMEM; - goto cleanup_queue; + goto free_sg; } } } @@ -198,13 +197,14 @@ int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card, spinlock_t *lock if (mq->bounce_sg) kfree(mq->bounce_sg); mq->bounce_sg = NULL; - cleanup_queue: - if (mq->sg) - kfree(mq->sg); + free_sg: + kfree(mq->sg); mq->sg = NULL; + free_bounce_buf: if (mq->bounce_buf) kfree(mq->bounce_buf); mq->bounce_buf = NULL; + cleanup_queue: blk_cleanup_queue(mq->queue); return ret; } diff --git a/trunk/drivers/mmc/host/at91_mci.c b/trunk/drivers/mmc/host/at91_mci.c index bfebd2fa7ada..62564ccde03a 100644 --- a/trunk/drivers/mmc/host/at91_mci.c +++ b/trunk/drivers/mmc/host/at91_mci.c @@ -83,7 +83,7 @@ #define AT91_MCI_ERRORS (AT91_MCI_RINDE | AT91_MCI_RDIRE | AT91_MCI_RCRCE \ | AT91_MCI_RENDE | AT91_MCI_RTOE | AT91_MCI_DCRCE \ - | AT91_MCI_DTOE | AT91_MCI_OVRE | AT91_MCI_UNRE) + | AT91_MCI_DTOE | AT91_MCI_OVRE | AT91_MCI_UNRE) #define at91_mci_read(host, reg) __raw_readl((host)->baseaddr + (reg)) #define at91_mci_write(host, reg, val) __raw_writel((val), (host)->baseaddr + (reg)) @@ -676,15 +676,15 @@ static irqreturn_t at91_mci_irq(int irq, void *devid) int_status = at91_mci_read(host, AT91_MCI_SR); int_mask = at91_mci_read(host, AT91_MCI_IMR); - + pr_debug("MCI irq: status = %08X, %08X, %08X\n", int_status, int_mask, int_status & int_mask); - + int_status = int_status & int_mask; if (int_status & AT91_MCI_ERRORS) { completed = 1; - + if (int_status & AT91_MCI_UNRE) pr_debug("MMC: Underrun error\n"); if (int_status & AT91_MCI_OVRE) diff --git a/trunk/drivers/mmc/host/wbsd.c b/trunk/drivers/mmc/host/wbsd.c index 9bf2a877113b..e0c9808fd424 100644 --- a/trunk/drivers/mmc/host/wbsd.c +++ b/trunk/drivers/mmc/host/wbsd.c @@ -1266,7 +1266,7 @@ static int __devinit wbsd_alloc_mmc(struct device *dev) return 0; } -static void wbsd_free_mmc(struct device *dev) +static void __devexit wbsd_free_mmc(struct device *dev) { struct mmc_host *mmc; struct wbsd_host *host; @@ -1358,7 +1358,7 @@ static int __devinit wbsd_request_region(struct wbsd_host *host, int base) return 0; } -static void wbsd_release_regions(struct wbsd_host *host) +static void __devexit wbsd_release_regions(struct wbsd_host *host) { if (host->base) release_region(host->base, 8); @@ -1434,7 +1434,7 @@ static void __devinit wbsd_request_dma(struct wbsd_host *host, int dma) "Falling back on FIFO.\n", dma); } -static void wbsd_release_dma(struct wbsd_host *host) +static void __devexit wbsd_release_dma(struct wbsd_host *host) { if (host->dma_addr) { dma_unmap_single(mmc_dev(host->mmc), host->dma_addr, @@ -1484,7 +1484,7 @@ static int __devinit wbsd_request_irq(struct wbsd_host *host, int irq) return 0; } -static void wbsd_release_irq(struct wbsd_host *host) +static void __devexit wbsd_release_irq(struct wbsd_host *host) { if (!host->irq) return; @@ -1535,7 +1535,7 @@ static int __devinit wbsd_request_resources(struct wbsd_host *host, * Release all resources for the host. */ -static void wbsd_release_resources(struct wbsd_host *host) +static void __devexit wbsd_release_resources(struct wbsd_host *host) { wbsd_release_dma(host); wbsd_release_irq(host); diff --git a/trunk/drivers/net/atl1/atl1_main.c b/trunk/drivers/net/atl1/atl1_main.c index 3c1984ecf36c..56f6389a300e 100644 --- a/trunk/drivers/net/atl1/atl1_main.c +++ b/trunk/drivers/net/atl1/atl1_main.c @@ -1704,8 +1704,10 @@ static int atl1_xmit_frame(struct sk_buff *skb, struct net_device *netdev) } } - if (!spin_trylock_irqsave(&adapter->lock, flags)) { + local_irq_save(flags); + if (!spin_trylock(&adapter->lock)) { /* Can't get lock - tell upper layer to requeue */ + local_irq_restore(flags); dev_printk(KERN_DEBUG, &adapter->pdev->dev, "tx locked\n"); return NETDEV_TX_LOCKED; } diff --git a/trunk/drivers/net/ehea/ehea.h b/trunk/drivers/net/ehea/ehea.h index d67f97bfa3a4..8ee2c2c86b42 100644 --- a/trunk/drivers/net/ehea/ehea.h +++ b/trunk/drivers/net/ehea/ehea.h @@ -39,7 +39,7 @@ #include #define DRV_NAME "ehea" -#define DRV_VERSION "EHEA_0073" +#define DRV_VERSION "EHEA_0072" /* eHEA capability flags */ #define DLPAR_PORT_ADD_REM 1 diff --git a/trunk/drivers/net/ehea/ehea_main.c b/trunk/drivers/net/ehea/ehea_main.c index 9756211e83ce..58702f54c3fb 100644 --- a/trunk/drivers/net/ehea/ehea_main.c +++ b/trunk/drivers/net/ehea/ehea_main.c @@ -1326,6 +1326,7 @@ static void write_swqe2_TSO(struct sk_buff *skb, u8 *imm_data = &swqe->u.immdata_desc.immediate_data[0]; int skb_data_size = skb->len - skb->data_len; int headersize; + u64 tmp_addr; /* Packet is TCP with TSO enabled */ swqe->tx_control |= EHEA_SWQE_TSO; @@ -1346,8 +1347,9 @@ static void write_swqe2_TSO(struct sk_buff *skb, /* set sg1entry data */ sg1entry->l_key = lkey; sg1entry->len = skb_data_size - headersize; - sg1entry->vaddr = - ehea_map_vaddr(skb->data + headersize); + + tmp_addr = (u64)(skb->data + headersize); + sg1entry->vaddr = ehea_map_vaddr(tmp_addr); swqe->descriptors++; } } else @@ -1360,6 +1362,7 @@ static void write_swqe2_nonTSO(struct sk_buff *skb, int skb_data_size = skb->len - skb->data_len; u8 *imm_data = &swqe->u.immdata_desc.immediate_data[0]; struct ehea_vsgentry *sg1entry = &swqe->u.immdata_desc.sg_entry; + u64 tmp_addr; /* Packet is any nonTSO type * @@ -1376,8 +1379,8 @@ static void write_swqe2_nonTSO(struct sk_buff *skb, /* copy sg1entry data */ sg1entry->l_key = lkey; sg1entry->len = skb_data_size - SWQE2_MAX_IMM; - sg1entry->vaddr = - ehea_map_vaddr(skb->data + SWQE2_MAX_IMM); + tmp_addr = (u64)(skb->data + SWQE2_MAX_IMM); + sg1entry->vaddr = ehea_map_vaddr(tmp_addr); swqe->descriptors++; } } else { @@ -1392,6 +1395,7 @@ static inline void write_swqe2_data(struct sk_buff *skb, struct net_device *dev, struct ehea_vsgentry *sg_list, *sg1entry, *sgentry; skb_frag_t *frag; int nfrags, sg1entry_contains_frag_data, i; + u64 tmp_addr; nfrags = skb_shinfo(skb)->nr_frags; sg1entry = &swqe->u.immdata_desc.sg_entry; @@ -1413,9 +1417,9 @@ static inline void write_swqe2_data(struct sk_buff *skb, struct net_device *dev, /* copy sg1entry data */ sg1entry->l_key = lkey; sg1entry->len = frag->size; - sg1entry->vaddr = - ehea_map_vaddr(page_address(frag->page) - + frag->page_offset); + tmp_addr = (u64)(page_address(frag->page) + + frag->page_offset); + sg1entry->vaddr = ehea_map_vaddr(tmp_addr); swqe->descriptors++; sg1entry_contains_frag_data = 1; } @@ -1427,9 +1431,10 @@ static inline void write_swqe2_data(struct sk_buff *skb, struct net_device *dev, sgentry->l_key = lkey; sgentry->len = frag->size; - sgentry->vaddr = - ehea_map_vaddr(page_address(frag->page) - + frag->page_offset); + + tmp_addr = (u64)(page_address(frag->page) + + frag->page_offset); + sgentry->vaddr = ehea_map_vaddr(tmp_addr); swqe->descriptors++; } } @@ -2160,18 +2165,24 @@ static int ehea_clean_all_portres(struct ehea_port *port) return ret; } -static void ehea_remove_adapter_mr(struct ehea_adapter *adapter) +static void ehea_remove_adapter_mr (struct ehea_adapter *adapter) { - if (adapter->active_ports) - return; + int i; + + for (i=0; i < EHEA_MAX_PORTS; i++) + if (adapter->port[i]) + return; ehea_rem_mr(&adapter->mr); } -static int ehea_add_adapter_mr(struct ehea_adapter *adapter) +static int ehea_add_adapter_mr (struct ehea_adapter *adapter) { - if (adapter->active_ports) - return 0; + int i; + + for (i=0; i < EHEA_MAX_PORTS; i++) + if (adapter->port[i]) + return 0; return ehea_reg_kernel_mr(adapter, &adapter->mr); } @@ -3088,7 +3099,6 @@ int __init ehea_module_init(void) static void __exit ehea_module_exit(void) { - destroy_workqueue(ehea_driver_wq); driver_remove_file(&ehea_driver.driver, &driver_attr_capabilities); ibmebus_unregister_driver(&ehea_driver); ehea_destroy_busmap(); diff --git a/trunk/drivers/net/ibmveth.c b/trunk/drivers/net/ibmveth.c index acba90f1638e..d96eb7229548 100644 --- a/trunk/drivers/net/ibmveth.c +++ b/trunk/drivers/net/ibmveth.c @@ -963,7 +963,7 @@ static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_ { int rc, i; struct net_device *netdev; - struct ibmveth_adapter *adapter; + struct ibmveth_adapter *adapter = NULL; unsigned char *mac_addr_p; unsigned int *mcastFilterSize_p; @@ -997,6 +997,7 @@ static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_ SET_MODULE_OWNER(netdev); adapter = netdev->priv; + memset(adapter, 0, sizeof(adapter)); dev->dev.driver_data = netdev; adapter->vdev = dev; @@ -1279,28 +1280,24 @@ const char * buf, size_t count) int i; /* Make sure there is a buffer pool with buffers that can hold a packet of the size of the MTU */ - for (i = 0; i < IbmVethNumBufferPools; i++) { + for(i = 0; irx_buff_pool[i]) continue; if (!adapter->rx_buff_pool[i].active) continue; - if (mtu <= adapter->rx_buff_pool[i].buff_size) - break; + if (mtu < adapter->rx_buff_pool[i].buff_size) { + pool->active = 0; + h_free_logical_lan_buffer(adapter-> + vdev-> + unit_address, + pool-> + buff_size); + } } - - if (i == IbmVethNumBufferPools) { + if (pool->active) { ibmveth_error_printk("no active pool >= MTU\n"); return -EPERM; } - - pool->active = 0; - if (netif_running(netdev)) { - adapter->pool_config = 1; - ibmveth_close(netdev); - adapter->pool_config = 0; - if ((rc = ibmveth_open(netdev))) - return rc; - } } } else if (attr == &veth_num_attr) { if (value <= 0 || value > IBMVETH_MAX_POOL_COUNT) diff --git a/trunk/drivers/net/ibmveth.h b/trunk/drivers/net/ibmveth.h index 72cc15a6cab7..bb69ccae8ace 100644 --- a/trunk/drivers/net/ibmveth.h +++ b/trunk/drivers/net/ibmveth.h @@ -73,6 +73,9 @@ static inline long h_send_logical_lan(unsigned long unit_address, #define h_change_logical_lan_mac(ua, mac) \ plpar_hcall_norets(H_CHANGE_LOGICAL_LAN_MAC, ua, mac) +#define h_free_logical_lan_buffer(ua, bufsize) \ + plpar_hcall_norets(H_FREE_LOGICAL_LAN_BUFFER, ua, bufsize) + #define IbmVethNumBufferPools 5 #define IBMVETH_BUFF_OH 22 /* Overhead: 14 ethernet header + 8 opaque handle */ #define IBMVETH_MAX_MTU 68 diff --git a/trunk/drivers/net/phy/phy.c b/trunk/drivers/net/phy/phy.c index e323efd4ed18..f71dab347667 100644 --- a/trunk/drivers/net/phy/phy.c +++ b/trunk/drivers/net/phy/phy.c @@ -261,7 +261,7 @@ void phy_sanitize_settings(struct phy_device *phydev) /* Sanitize settings based on PHY capabilities */ if ((features & SUPPORTED_Autoneg) == 0) - phydev->autoneg = AUTONEG_DISABLE; + phydev->autoneg = 0; idx = phy_find_valid(phy_find_setting(phydev->speed, phydev->duplex), features); @@ -374,7 +374,7 @@ int phy_mii_ioctl(struct phy_device *phydev, if (mii_data->phy_id == phydev->addr) { switch(mii_data->reg_num) { case MII_BMCR: - if ((val & (BMCR_RESET|BMCR_ANENABLE)) == 0) + if (val & (BMCR_RESET|BMCR_ANENABLE)) phydev->autoneg = AUTONEG_DISABLE; else phydev->autoneg = AUTONEG_ENABLE; diff --git a/trunk/drivers/net/r8169.c b/trunk/drivers/net/r8169.c index b85ab4a8f2a3..c9333b9dd51a 100644 --- a/trunk/drivers/net/r8169.c +++ b/trunk/drivers/net/r8169.c @@ -725,12 +725,6 @@ static int rtl8169_set_speed_xmii(struct net_device *dev, auto_nego |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM; - if (tp->mac_version == RTL_GIGA_MAC_VER_12) { - /* Vendor specific (0x1f) and reserved (0x0e) MII registers. */ - mdio_write(ioaddr, 0x1f, 0x0000); - mdio_write(ioaddr, 0x0e, 0x0000); - } - tp->phy_auto_nego_reg = auto_nego; tp->phy_1000_ctrl_reg = giga_ctrl; @@ -2766,16 +2760,14 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) rtl8169_check_link_status(dev, tp, ioaddr); #ifdef CONFIG_R8169_NAPI - if (status & tp->napi_event) { - RTL_W16(IntrMask, tp->intr_event & ~tp->napi_event); - tp->intr_mask = ~tp->napi_event; - - if (likely(netif_rx_schedule_prep(dev))) - __netif_rx_schedule(dev); - else if (netif_msg_intr(tp)) { - printk(KERN_INFO "%s: interrupt %04x in poll\n", - dev->name, status); - } + RTL_W16(IntrMask, tp->intr_event & ~tp->napi_event); + tp->intr_mask = ~tp->napi_event; + + if (likely(netif_rx_schedule_prep(dev))) + __netif_rx_schedule(dev); + else if (netif_msg_intr(tp)) { + printk(KERN_INFO "%s: interrupt %04x taken in poll\n", + dev->name, status); } break; #else diff --git a/trunk/drivers/net/sis190.c b/trunk/drivers/net/sis190.c index d470b19c0810..ec2ad9f0efa2 100644 --- a/trunk/drivers/net/sis190.c +++ b/trunk/drivers/net/sis190.c @@ -1593,9 +1593,6 @@ static int __devinit sis190_get_mac_addr_from_apc(struct pci_dev *pdev, pci_name(pdev)); isa_bridge = pci_get_device(PCI_VENDOR_ID_SI, 0x0965, NULL); - if (!isa_bridge) - isa_bridge = pci_get_device(PCI_VENDOR_ID_SI, 0x0966, NULL); - if (!isa_bridge) { net_probe(tp, KERN_INFO "%s: Can not find ISA bridge.\n", pci_name(pdev)); diff --git a/trunk/drivers/net/smc91x.h b/trunk/drivers/net/smc91x.h index 6ff3a1627af8..f8429449dc1e 100644 --- a/trunk/drivers/net/smc91x.h +++ b/trunk/drivers/net/smc91x.h @@ -299,7 +299,7 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg) #define SMC_CAN_USE_8BIT 1 #define SMC_CAN_USE_16BIT 1 -#define SMC_CAN_USE_32BIT 0 +#define SMC_CAN_USE_32BIT 1 #define SMC_inb(a, r) inb((a) + (r)) #define SMC_inw(a, r) inw((a) + (r)) @@ -310,6 +310,8 @@ SMC_outw(u16 val, void __iomem *ioaddr, int reg) #endif /* BOARDS */ +#define set_irq_type(irq, type) do {} while (0) + #elif defined(CONFIG_M32R) #define SMC_CAN_USE_8BIT 0 diff --git a/trunk/drivers/net/ucc_geth_ethtool.c b/trunk/drivers/net/ucc_geth_ethtool.c index 64bef7c12365..a8994c7b8583 100644 --- a/trunk/drivers/net/ucc_geth_ethtool.c +++ b/trunk/drivers/net/ucc_geth_ethtool.c @@ -379,6 +379,7 @@ static const struct ethtool_ops uec_ethtool_ops = { .get_stats_count = uec_get_stats_count, .get_strings = uec_get_strings, .get_ethtool_stats = uec_get_ethtool_stats, + .get_perm_addr = ethtool_op_get_perm_addr, }; void uec_set_ethtool_ops(struct net_device *netdev) diff --git a/trunk/drivers/net/ucc_geth_mii.c b/trunk/drivers/net/ucc_geth_mii.c index 6c257b88ce51..5f8c2d30a328 100644 --- a/trunk/drivers/net/ucc_geth_mii.c +++ b/trunk/drivers/net/ucc_geth_mii.c @@ -272,8 +272,7 @@ int __init uec_mdio_init(void) return of_register_platform_driver(&uec_mdio_driver); } -/* called from __init ucc_geth_init, therefore can not be __exit */ -void uec_mdio_exit(void) +void __exit uec_mdio_exit(void) { of_unregister_platform_driver(&uec_mdio_driver); } diff --git a/trunk/drivers/net/wireless/bcm43xx/bcm43xx_phy.c b/trunk/drivers/net/wireless/bcm43xx/bcm43xx_phy.c index b37f1e348700..d779199c30d0 100644 --- a/trunk/drivers/net/wireless/bcm43xx/bcm43xx_phy.c +++ b/trunk/drivers/net/wireless/bcm43xx/bcm43xx_phy.c @@ -1638,7 +1638,7 @@ void bcm43xx_phy_set_baseband_attenuation(struct bcm43xx_private *bcm, return; } - if (phy->analog > 1) { + if (phy->analog == 1) { value = bcm43xx_phy_read(bcm, 0x0060) & ~0x003C; value |= (baseband_attenuation << 2) & 0x003C; } else { diff --git a/trunk/drivers/net/wireless/rtl8187_dev.c b/trunk/drivers/net/wireless/rtl8187_dev.c index e61c6d5ba1a9..cea85894b7f2 100644 --- a/trunk/drivers/net/wireless/rtl8187_dev.c +++ b/trunk/drivers/net/wireless/rtl8187_dev.c @@ -466,7 +466,7 @@ static int rtl8187_add_interface(struct ieee80211_hw *dev, return -EOPNOTSUPP; } - priv->hwaddr = conf->mac_addr ? conf->mac_addr : dev->wiphy->perm_addr; + priv->hwaddr = conf->mac_addr; return 0; } diff --git a/trunk/drivers/net/wireless/zd1211rw/zd_mac.c b/trunk/drivers/net/wireless/zd1211rw/zd_mac.c index 26869d107e52..f6c487aa8246 100644 --- a/trunk/drivers/net/wireless/zd1211rw/zd_mac.c +++ b/trunk/drivers/net/wireless/zd1211rw/zd_mac.c @@ -822,7 +822,7 @@ static void cs_set_control(struct zd_mac *mac, struct zd_ctrlset *cs, cs->control |= ZD_CS_MULTICAST; /* PS-POLL */ - if (ftype == IEEE80211_FTYPE_CTL && stype == IEEE80211_STYPE_PSPOLL) + if (stype == IEEE80211_STYPE_PSPOLL) cs->control |= ZD_CS_PS_POLL_FRAME; /* Unicast data frames over the threshold should have RTS */ diff --git a/trunk/fs/compat_ioctl.c b/trunk/fs/compat_ioctl.c index a6c9078af124..2bc1428d621c 100644 --- a/trunk/fs/compat_ioctl.c +++ b/trunk/fs/compat_ioctl.c @@ -3161,9 +3161,12 @@ COMPATIBLE_IOCTL(SIOCSIWSENS) COMPATIBLE_IOCTL(SIOCGIWSENS) COMPATIBLE_IOCTL(SIOCSIWRANGE) COMPATIBLE_IOCTL(SIOCSIWPRIV) +COMPATIBLE_IOCTL(SIOCGIWPRIV) COMPATIBLE_IOCTL(SIOCSIWSTATS) +COMPATIBLE_IOCTL(SIOCGIWSTATS) COMPATIBLE_IOCTL(SIOCSIWAP) COMPATIBLE_IOCTL(SIOCGIWAP) +COMPATIBLE_IOCTL(SIOCSIWSCAN) COMPATIBLE_IOCTL(SIOCSIWRATE) COMPATIBLE_IOCTL(SIOCGIWRATE) COMPATIBLE_IOCTL(SIOCSIWRTS) diff --git a/trunk/include/asm-sparc/prom.h b/trunk/include/asm-sparc/prom.h index 71f2a1998324..350676c589f9 100644 --- a/trunk/include/asm-sparc/prom.h +++ b/trunk/include/asm-sparc/prom.h @@ -67,7 +67,6 @@ extern int of_set_property(struct device_node *node, const char *name, void *val extern int of_getintprop_default(struct device_node *np, const char *name, int def); -extern int of_find_in_proplist(const char *list, const char *match, int len); extern void prom_build_devicetree(void); diff --git a/trunk/include/asm-sparc64/oplib.h b/trunk/include/asm-sparc64/oplib.h index 86dc5c018a19..3f23c5dc5f21 100644 --- a/trunk/include/asm-sparc64/oplib.h +++ b/trunk/include/asm-sparc64/oplib.h @@ -1,7 +1,8 @@ -/* oplib.h: Describes the interface and available routines in the +/* $Id: oplib.h,v 1.14 2001/12/19 00:29:51 davem Exp $ + * oplib.h: Describes the interface and available routines in the * Linux Prom library. * - * Copyright (C) 1995, 2007 David S. Miller (davem@davemloft.net) + * Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu) * Copyright (C) 1996 Jakub Jelinek (jj@sunsite.mff.cuni.cz) */ @@ -30,10 +31,8 @@ extern int prom_chosen_node; extern const char prom_peer_name[]; extern const char prom_compatible_name[]; extern const char prom_root_compatible[]; -extern const char prom_cpu_compatible[]; extern const char prom_finddev_name[]; extern const char prom_chosen_path[]; -extern const char prom_cpu_path[]; extern const char prom_getprop_name[]; extern const char prom_mmu_name[]; extern const char prom_callmethod_name[]; diff --git a/trunk/include/asm-sparc64/prom.h b/trunk/include/asm-sparc64/prom.h index 07843f9f05df..31dcb92fbae0 100644 --- a/trunk/include/asm-sparc64/prom.h +++ b/trunk/include/asm-sparc64/prom.h @@ -76,7 +76,6 @@ extern int of_set_property(struct device_node *node, const char *name, void *val extern int of_getintprop_default(struct device_node *np, const char *name, int def); -extern int of_find_in_proplist(const char *list, const char *match, int len); extern void prom_build_devicetree(void); diff --git a/trunk/include/asm-sparc64/spitfire.h b/trunk/include/asm-sparc64/spitfire.h index cf7807813e85..23ad8a7987ad 100644 --- a/trunk/include/asm-sparc64/spitfire.h +++ b/trunk/include/asm-sparc64/spitfire.h @@ -38,11 +38,6 @@ #define L1DCACHE_SIZE 0x4000 -#define SUN4V_CHIP_INVALID 0x00 -#define SUN4V_CHIP_NIAGARA1 0x01 -#define SUN4V_CHIP_NIAGARA2 0x02 -#define SUN4V_CHIP_UNKNOWN 0xff - #ifndef __ASSEMBLY__ enum ultra_tlb_layout { @@ -54,8 +49,6 @@ enum ultra_tlb_layout { extern enum ultra_tlb_layout tlb_type; -extern int sun4v_chip_type; - extern int cheetah_pcache_forced_on; extern void cheetah_enable_pcache(void); diff --git a/trunk/include/asm-sparc64/xor.h b/trunk/include/asm-sparc64/xor.h index a0233884fc94..8ce3f1813e28 100644 --- a/trunk/include/asm-sparc64/xor.h +++ b/trunk/include/asm-sparc64/xor.h @@ -63,8 +63,4 @@ static struct xor_block_template xor_block_niagara = { /* For VIS for everything except Niagara. */ #define XOR_SELECT_TEMPLATE(FASTEST) \ - ((tlb_type == hypervisor && \ - (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || \ - sun4v_chip_type == SUN4V_CHIP_NIAGARA2)) ? \ - &xor_block_niagara : \ - &xor_block_VIS) + (tlb_type == hypervisor ? &xor_block_niagara : &xor_block_VIS) diff --git a/trunk/include/linux/kernel.h b/trunk/include/linux/kernel.h index b4f5b81b4257..4300bb462d29 100644 --- a/trunk/include/linux/kernel.h +++ b/trunk/include/linux/kernel.h @@ -224,7 +224,7 @@ extern void hex_dump_to_buffer(const void *buf, size_t len, char *linebuf, size_t linebuflen, bool ascii); extern void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, int rowsize, int groupsize, - const void *buf, size_t len, bool ascii); + void *buf, size_t len, bool ascii); extern void print_hex_dump_bytes(const char *prefix_str, int prefix_type, void *buf, size_t len); #define hex_asc(x) "0123456789abcdef"[x] diff --git a/trunk/include/linux/sched.h b/trunk/include/linux/sched.h index 682ef87da6eb..17249fae5014 100644 --- a/trunk/include/linux/sched.h +++ b/trunk/include/linux/sched.h @@ -139,7 +139,7 @@ struct cfs_rq; extern void proc_sched_show_task(struct task_struct *p, struct seq_file *m); extern void proc_sched_set_task(struct task_struct *p); extern void -print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); +print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now); #else static inline void proc_sched_show_task(struct task_struct *p, struct seq_file *m) @@ -149,7 +149,7 @@ static inline void proc_sched_set_task(struct task_struct *p) { } static inline void -print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) +print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now) { } #endif @@ -855,24 +855,26 @@ struct sched_domain; struct sched_class { struct sched_class *next; - void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup); - void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep); + void (*enqueue_task) (struct rq *rq, struct task_struct *p, + int wakeup, u64 now); + void (*dequeue_task) (struct rq *rq, struct task_struct *p, + int sleep, u64 now); void (*yield_task) (struct rq *rq, struct task_struct *p); void (*check_preempt_curr) (struct rq *rq, struct task_struct *p); - struct task_struct * (*pick_next_task) (struct rq *rq); - void (*put_prev_task) (struct rq *rq, struct task_struct *p); + struct task_struct * (*pick_next_task) (struct rq *rq, u64 now); + void (*put_prev_task) (struct rq *rq, struct task_struct *p, u64 now); - unsigned long (*load_balance) (struct rq *this_rq, int this_cpu, + int (*load_balance) (struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned, int *this_best_prio); + int *all_pinned, unsigned long *total_load_moved); void (*set_curr_task) (struct rq *rq); void (*task_tick) (struct rq *rq, struct task_struct *p); - void (*task_new) (struct rq *rq, struct task_struct *p); + void (*task_new) (struct rq *rq, struct task_struct *p, u64 now); }; struct load_weight { diff --git a/trunk/include/net/netfilter/ipv4/nf_conntrack_ipv4.h b/trunk/include/net/netfilter/ipv4/nf_conntrack_ipv4.h index 9bf059817aec..7a671603fca6 100644 --- a/trunk/include/net/netfilter/ipv4/nf_conntrack_ipv4.h +++ b/trunk/include/net/netfilter/ipv4/nf_conntrack_ipv4.h @@ -21,6 +21,4 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp; extern int nf_conntrack_ipv4_compat_init(void); extern void nf_conntrack_ipv4_compat_fini(void); -extern void need_ipv4_conntrack(void); - #endif /*_NF_CONNTRACK_IPV4_H*/ diff --git a/trunk/kernel/auditsc.c b/trunk/kernel/auditsc.c index 3401293359e8..a777d3761416 100644 --- a/trunk/kernel/auditsc.c +++ b/trunk/kernel/auditsc.c @@ -1992,19 +1992,19 @@ int __audit_signal_info(int sig, struct task_struct *t) extern uid_t audit_sig_uid; extern u32 audit_sig_sid; - if (audit_pid && t->tgid == audit_pid) { - if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1) { - audit_sig_pid = tsk->pid; - if (ctx) - audit_sig_uid = ctx->loginuid; - else - audit_sig_uid = tsk->uid; - selinux_get_task_sid(tsk, &audit_sig_sid); - } - if (!audit_signals || audit_dummy_context()) - return 0; + if (audit_pid && t->tgid == audit_pid && + (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1)) { + audit_sig_pid = tsk->pid; + if (ctx) + audit_sig_uid = ctx->loginuid; + else + audit_sig_uid = tsk->uid; + selinux_get_task_sid(tsk, &audit_sig_sid); } + if (!audit_signals) /* audit_context checked in wrapper */ + return 0; + /* optimize the common case by putting first signal recipient directly * in audit_context */ if (!ctx->target_pid) { diff --git a/trunk/kernel/irq/resend.c b/trunk/kernel/irq/resend.c index 5bfeaed7e487..c38272746887 100644 --- a/trunk/kernel/irq/resend.c +++ b/trunk/kernel/irq/resend.c @@ -62,6 +62,15 @@ void check_irq_resend(struct irq_desc *desc, unsigned int irq) */ desc->chip->enable(irq); + /* + * Temporary hack to figure out more about the problem, which + * is causing the ancient network cards to die. + */ + if (desc->handle_irq != handle_edge_irq) { + WARN_ON_ONCE(1); + return; + } + if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { desc->status = (status & ~IRQ_PENDING) | IRQ_REPLAY; diff --git a/trunk/kernel/sched.c b/trunk/kernel/sched.c index b0afd8db1396..72bb9483d949 100644 --- a/trunk/kernel/sched.c +++ b/trunk/kernel/sched.c @@ -318,19 +318,15 @@ static inline int cpu_of(struct rq *rq) } /* - * Update the per-runqueue clock, as finegrained as the platform can give - * us, but without assuming monotonicity, etc.: + * Per-runqueue clock, as finegrained as the platform can give us: */ -static void __update_rq_clock(struct rq *rq) +static unsigned long long __rq_clock(struct rq *rq) { u64 prev_raw = rq->prev_clock_raw; u64 now = sched_clock(); s64 delta = now - prev_raw; u64 clock = rq->clock; -#ifdef CONFIG_SCHED_DEBUG - WARN_ON_ONCE(cpu_of(rq) != smp_processor_id()); -#endif /* * Protect against sched_clock() occasionally going backwards: */ @@ -353,12 +349,18 @@ static void __update_rq_clock(struct rq *rq) rq->prev_clock_raw = now; rq->clock = clock; + + return clock; } -static void update_rq_clock(struct rq *rq) +static inline unsigned long long rq_clock(struct rq *rq) { - if (likely(smp_processor_id() == cpu_of(rq))) - __update_rq_clock(rq); + int this_cpu = smp_processor_id(); + + if (this_cpu == cpu_of(rq)) + return __rq_clock(rq); + + return rq->clock; } /* @@ -384,12 +386,9 @@ unsigned long long cpu_clock(int cpu) { unsigned long long now; unsigned long flags; - struct rq *rq; local_irq_save(flags); - rq = cpu_rq(cpu); - update_rq_clock(rq); - now = rq->clock; + now = rq_clock(cpu_rq(cpu)); local_irq_restore(flags); return now; @@ -638,11 +637,6 @@ static u64 div64_likely32(u64 divident, unsigned long divisor) #define WMULT_SHIFT 32 -/* - * Shift right and round: - */ -#define RSR(x, y) (((x) + (1UL << ((y) - 1))) >> (y)) - static unsigned long calc_delta_mine(unsigned long delta_exec, unsigned long weight, struct load_weight *lw) @@ -650,17 +644,18 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight, u64 tmp; if (unlikely(!lw->inv_weight)) - lw->inv_weight = (WMULT_CONST - lw->weight/2) / lw->weight + 1; + lw->inv_weight = WMULT_CONST / lw->weight; tmp = (u64)delta_exec * weight; /* * Check whether we'd overflow the 64-bit multiplication: */ - if (unlikely(tmp > WMULT_CONST)) - tmp = RSR(RSR(tmp, WMULT_SHIFT/2) * lw->inv_weight, - WMULT_SHIFT/2); - else - tmp = RSR(tmp * lw->inv_weight, WMULT_SHIFT); + if (unlikely(tmp > WMULT_CONST)) { + tmp = ((tmp >> WMULT_SHIFT/2) * lw->inv_weight) + >> (WMULT_SHIFT/2); + } else { + tmp = (tmp * lw->inv_weight) >> WMULT_SHIFT; + } return (unsigned long)min(tmp, (u64)(unsigned long)LONG_MAX); } @@ -708,14 +703,11 @@ static void update_load_sub(struct load_weight *lw, unsigned long dec) * the relative distance between them is ~25%.) */ static const int prio_to_weight[40] = { - /* -20 */ 88761, 71755, 56483, 46273, 36291, - /* -15 */ 29154, 23254, 18705, 14949, 11916, - /* -10 */ 9548, 7620, 6100, 4904, 3906, - /* -5 */ 3121, 2501, 1991, 1586, 1277, - /* 0 */ 1024, 820, 655, 526, 423, - /* 5 */ 335, 272, 215, 172, 137, - /* 10 */ 110, 87, 70, 56, 45, - /* 15 */ 36, 29, 23, 18, 15, +/* -20 */ 88818, 71054, 56843, 45475, 36380, 29104, 23283, 18626, 14901, 11921, +/* -10 */ 9537, 7629, 6103, 4883, 3906, 3125, 2500, 2000, 1600, 1280, +/* 0 */ NICE_0_LOAD /* 1024 */, +/* 1 */ 819, 655, 524, 419, 336, 268, 215, 172, 137, +/* 10 */ 110, 87, 70, 56, 45, 36, 29, 23, 18, 15, }; /* @@ -726,14 +718,14 @@ static const int prio_to_weight[40] = { * into multiplications: */ static const u32 prio_to_wmult[40] = { - /* -20 */ 48388, 59856, 76040, 92818, 118348, - /* -15 */ 147320, 184698, 229616, 287308, 360437, - /* -10 */ 449829, 563644, 704093, 875809, 1099582, - /* -5 */ 1376151, 1717300, 2157191, 2708050, 3363326, - /* 0 */ 4194304, 5237765, 6557202, 8165337, 10153587, - /* 5 */ 12820798, 15790321, 19976592, 24970740, 31350126, - /* 10 */ 39045157, 49367440, 61356676, 76695844, 95443717, - /* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, +/* -20 */ 48356, 60446, 75558, 94446, 118058, +/* -15 */ 147573, 184467, 230589, 288233, 360285, +/* -10 */ 450347, 562979, 703746, 879575, 1099582, +/* -5 */ 1374389, 1717986, 2147483, 2684354, 3355443, +/* 0 */ 4194304, 5244160, 6557201, 8196502, 10250518, +/* 5 */ 12782640, 16025997, 19976592, 24970740, 31350126, +/* 10 */ 39045157, 49367440, 61356675, 76695844, 95443717, +/* 15 */ 119304647, 148102320, 186737708, 238609294, 286331153, }; static void activate_task(struct rq *rq, struct task_struct *p, int wakeup); @@ -753,7 +745,8 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned, unsigned long *load_moved, - int *this_best_prio, struct rq_iterator *iterator); + int this_best_prio, int best_prio, int best_prio_seen, + struct rq_iterator *iterator); #include "sched_stats.h" #include "sched_rt.c" @@ -789,14 +782,14 @@ static void __update_curr_load(struct rq *rq, struct load_stat *ls) * This function is called /before/ updating rq->ls.load * and when switching tasks. */ -static void update_curr_load(struct rq *rq) +static void update_curr_load(struct rq *rq, u64 now) { struct load_stat *ls = &rq->ls; u64 start; start = ls->load_update_start; - ls->load_update_start = rq->clock; - ls->delta_stat += rq->clock - start; + ls->load_update_start = now; + ls->delta_stat += now - start; /* * Stagger updates to ls->delta_fair. Very frequent updates * can be expensive. @@ -805,28 +798,30 @@ static void update_curr_load(struct rq *rq) __update_curr_load(rq, ls); } -static inline void inc_load(struct rq *rq, const struct task_struct *p) +static inline void +inc_load(struct rq *rq, const struct task_struct *p, u64 now) { - update_curr_load(rq); + update_curr_load(rq, now); update_load_add(&rq->ls.load, p->se.load.weight); } -static inline void dec_load(struct rq *rq, const struct task_struct *p) +static inline void +dec_load(struct rq *rq, const struct task_struct *p, u64 now) { - update_curr_load(rq); + update_curr_load(rq, now); update_load_sub(&rq->ls.load, p->se.load.weight); } -static void inc_nr_running(struct task_struct *p, struct rq *rq) +static void inc_nr_running(struct task_struct *p, struct rq *rq, u64 now) { rq->nr_running++; - inc_load(rq, p); + inc_load(rq, p, now); } -static void dec_nr_running(struct task_struct *p, struct rq *rq) +static void dec_nr_running(struct task_struct *p, struct rq *rq, u64 now) { rq->nr_running--; - dec_load(rq, p); + dec_load(rq, p, now); } static void set_load_weight(struct task_struct *p) @@ -853,16 +848,18 @@ static void set_load_weight(struct task_struct *p) p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO]; } -static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup) +static void +enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, u64 now) { sched_info_queued(p); - p->sched_class->enqueue_task(rq, p, wakeup); + p->sched_class->enqueue_task(rq, p, wakeup, now); p->se.on_rq = 1; } -static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep) +static void +dequeue_task(struct rq *rq, struct task_struct *p, int sleep, u64 now) { - p->sched_class->dequeue_task(rq, p, sleep); + p->sched_class->dequeue_task(rq, p, sleep, now); p->se.on_rq = 0; } @@ -917,11 +914,13 @@ static int effective_prio(struct task_struct *p) */ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) { + u64 now = rq_clock(rq); + if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible--; - enqueue_task(rq, p, wakeup); - inc_nr_running(p, rq); + enqueue_task(rq, p, wakeup, now); + inc_nr_running(p, rq, now); } /* @@ -929,13 +928,13 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup) */ static inline void activate_idle_task(struct task_struct *p, struct rq *rq) { - update_rq_clock(rq); + u64 now = rq_clock(rq); if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible--; - enqueue_task(rq, p, 0); - inc_nr_running(p, rq); + enqueue_task(rq, p, 0, now); + inc_nr_running(p, rq, now); } /* @@ -943,11 +942,13 @@ static inline void activate_idle_task(struct task_struct *p, struct rq *rq) */ static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep) { + u64 now = rq_clock(rq); + if (p->state == TASK_UNINTERRUPTIBLE) rq->nr_uninterruptible++; - dequeue_task(rq, p, sleep); - dec_nr_running(p, rq); + dequeue_task(rq, p, sleep, now); + dec_nr_running(p, rq, now); } /** @@ -1515,7 +1516,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync) out_activate: #endif /* CONFIG_SMP */ - update_rq_clock(rq); activate_task(rq, p, 1); /* * Sync wakeups (i.e. those types of wakeups where the waker @@ -1647,11 +1647,12 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) unsigned long flags; struct rq *rq; int this_cpu; + u64 now; rq = task_rq_lock(p, &flags); BUG_ON(p->state != TASK_RUNNING); this_cpu = smp_processor_id(); /* parent's CPU */ - update_rq_clock(rq); + now = rq_clock(rq); p->prio = effective_prio(p); @@ -1665,8 +1666,8 @@ void fastcall wake_up_new_task(struct task_struct *p, unsigned long clone_flags) * Let the scheduling class do new task startup * management (if any): */ - p->sched_class->task_new(rq, p); - inc_nr_running(p, rq); + p->sched_class->task_new(rq, p, now); + inc_nr_running(p, rq, now); } check_preempt_curr(rq, p); task_rq_unlock(rq, &flags); @@ -1953,6 +1954,7 @@ static void update_cpu_load(struct rq *this_rq) unsigned long total_load = this_rq->ls.load.weight; unsigned long this_load = total_load; struct load_stat *ls = &this_rq->ls; + u64 now = __rq_clock(this_rq); int i, scale; this_rq->nr_load_updates++; @@ -1960,7 +1962,7 @@ static void update_cpu_load(struct rq *this_rq) goto do_avg; /* Update delta_fair/delta_exec fields first */ - update_curr_load(this_rq); + update_curr_load(this_rq, now); fair_delta64 = ls->delta_fair + 1; ls->delta_fair = 0; @@ -1968,8 +1970,8 @@ static void update_cpu_load(struct rq *this_rq) exec_delta64 = ls->delta_exec + 1; ls->delta_exec = 0; - sample_interval64 = this_rq->clock - ls->load_update_last; - ls->load_update_last = this_rq->clock; + sample_interval64 = now - ls->load_update_last; + ls->load_update_last = now; if ((s64)sample_interval64 < (s64)TICK_NSEC) sample_interval64 = TICK_NSEC; @@ -2024,8 +2026,6 @@ static void double_rq_lock(struct rq *rq1, struct rq *rq2) spin_lock(&rq1->lock); } } - update_rq_clock(rq1); - update_rq_clock(rq2); } /* @@ -2166,7 +2166,8 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned, unsigned long *load_moved, - int *this_best_prio, struct rq_iterator *iterator) + int this_best_prio, int best_prio, int best_prio_seen, + struct rq_iterator *iterator) { int pulled = 0, pinned = 0, skip_for_load; struct task_struct *p; @@ -2191,8 +2192,12 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, */ skip_for_load = (p->se.load.weight >> 1) > rem_load_move + SCHED_LOAD_SCALE_FUZZ; - if ((skip_for_load && p->prio >= *this_best_prio) || + if (skip_for_load && p->prio < this_best_prio) + skip_for_load = !best_prio_seen && p->prio == best_prio; + if (skip_for_load || !can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) { + + best_prio_seen |= p->prio == best_prio; p = iterator->next(iterator->arg); goto next; } @@ -2206,8 +2211,8 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, * and the prescribed amount of weighted load. */ if (pulled < max_nr_move && rem_load_move > 0) { - if (p->prio < *this_best_prio) - *this_best_prio = p->prio; + if (p->prio < this_best_prio) + this_best_prio = p->prio; p = iterator->next(iterator->arg); goto next; } @@ -2226,52 +2231,32 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, } /* - * move_tasks tries to move up to max_load_move weighted load from busiest to - * this_rq, as part of a balancing operation within domain "sd". - * Returns 1 if successful and 0 otherwise. + * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted + * load from busiest to this_rq, as part of a balancing operation within + * "domain". Returns the number of tasks moved. * * Called with both runqueues locked. */ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, - unsigned long max_load_move, + unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned) { struct sched_class *class = sched_class_highest; - unsigned long total_load_moved = 0; - int this_best_prio = this_rq->curr->prio; + unsigned long load_moved, total_nr_moved = 0, nr_moved; + long rem_load_move = max_load_move; do { - total_load_moved += - class->load_balance(this_rq, this_cpu, busiest, - ULONG_MAX, max_load_move - total_load_moved, - sd, idle, all_pinned, &this_best_prio); + nr_moved = class->load_balance(this_rq, this_cpu, busiest, + max_nr_move, (unsigned long)rem_load_move, + sd, idle, all_pinned, &load_moved); + total_nr_moved += nr_moved; + max_nr_move -= nr_moved; + rem_load_move -= load_moved; class = class->next; - } while (class && max_load_move > total_load_moved); + } while (class && max_nr_move && rem_load_move > 0); - return total_load_moved > 0; -} - -/* - * move_one_task tries to move exactly one task from busiest to this_rq, as - * part of active balancing operations within "domain". - * Returns 1 if successful and 0 otherwise. - * - * Called with both runqueues locked. - */ -static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, - struct sched_domain *sd, enum cpu_idle_type idle) -{ - struct sched_class *class; - int this_best_prio = MAX_PRIO; - - for (class = sched_class_highest; class; class = class->next) - if (class->load_balance(this_rq, this_cpu, busiest, - 1, ULONG_MAX, sd, idle, NULL, - &this_best_prio)) - return 1; - - return 0; + return total_nr_moved; } /* @@ -2603,6 +2588,11 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, */ #define MAX_PINNED_INTERVAL 512 +static inline unsigned long minus_1_or_zero(unsigned long n) +{ + return n > 0 ? n - 1 : 0; +} + /* * Check this_cpu to ensure it is balanced within domain. Attempt to move * tasks if there is an imbalance. @@ -2611,7 +2601,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, struct sched_domain *sd, enum cpu_idle_type idle, int *balance) { - int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; + int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; struct sched_group *group; unsigned long imbalance; struct rq *busiest; @@ -2652,17 +2642,18 @@ static int load_balance(int this_cpu, struct rq *this_rq, schedstat_add(sd, lb_imbalance[idle], imbalance); - ld_moved = 0; + nr_moved = 0; if (busiest->nr_running > 1) { /* * Attempt to move tasks. If find_busiest_group has found * an imbalance but busiest->nr_running <= 1, the group is - * still unbalanced. ld_moved simply stays zero, so it is + * still unbalanced. nr_moved simply stays zero, so it is * correctly treated as an imbalance. */ local_irq_save(flags); double_rq_lock(this_rq, busiest); - ld_moved = move_tasks(this_rq, this_cpu, busiest, + nr_moved = move_tasks(this_rq, this_cpu, busiest, + minus_1_or_zero(busiest->nr_running), imbalance, sd, idle, &all_pinned); double_rq_unlock(this_rq, busiest); local_irq_restore(flags); @@ -2670,7 +2661,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, /* * some other cpu did the load balance for us. */ - if (ld_moved && this_cpu != smp_processor_id()) + if (nr_moved && this_cpu != smp_processor_id()) resched_cpu(this_cpu); /* All tasks on this runqueue were pinned by CPU affinity */ @@ -2682,7 +2673,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, } } - if (!ld_moved) { + if (!nr_moved) { schedstat_inc(sd, lb_failed[idle]); sd->nr_balance_failed++; @@ -2731,10 +2722,10 @@ static int load_balance(int this_cpu, struct rq *this_rq, sd->balance_interval *= 2; } - if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && + if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) return -1; - return ld_moved; + return nr_moved; out_balanced: schedstat_inc(sd, lb_balanced[idle]); @@ -2766,7 +2757,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) struct sched_group *group; struct rq *busiest = NULL; unsigned long imbalance; - int ld_moved = 0; + int nr_moved = 0; int sd_idle = 0; int all_pinned = 0; cpumask_t cpus = CPU_MASK_ALL; @@ -2801,13 +2792,12 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) schedstat_add(sd, lb_imbalance[CPU_NEWLY_IDLE], imbalance); - ld_moved = 0; + nr_moved = 0; if (busiest->nr_running > 1) { /* Attempt to move tasks */ double_lock_balance(this_rq, busiest); - /* this_rq->clock is already updated */ - update_rq_clock(busiest); - ld_moved = move_tasks(this_rq, this_cpu, busiest, + nr_moved = move_tasks(this_rq, this_cpu, busiest, + minus_1_or_zero(busiest->nr_running), imbalance, sd, CPU_NEWLY_IDLE, &all_pinned); spin_unlock(&busiest->lock); @@ -2819,7 +2809,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) } } - if (!ld_moved) { + if (!nr_moved) { schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]); if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) @@ -2827,7 +2817,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) } else sd->nr_balance_failed = 0; - return ld_moved; + return nr_moved; out_balanced: schedstat_inc(sd, lb_balanced[CPU_NEWLY_IDLE]); @@ -2904,8 +2894,6 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) /* move a task from busiest_rq to target_rq */ double_lock_balance(busiest_rq, target_rq); - update_rq_clock(busiest_rq); - update_rq_clock(target_rq); /* Search for an sd spanning us and the target CPU. */ for_each_domain(target_cpu, sd) { @@ -2917,8 +2905,8 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) if (likely(sd)) { schedstat_inc(sd, alb_cnt); - if (move_one_task(target_rq, target_cpu, busiest_rq, - sd, CPU_IDLE)) + if (move_tasks(target_rq, target_cpu, busiest_rq, 1, + ULONG_MAX, sd, CPU_IDLE, NULL)) schedstat_inc(sd, alb_pushed); else schedstat_inc(sd, alb_failed); @@ -3187,7 +3175,8 @@ static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, int *all_pinned, unsigned long *load_moved, - int *this_best_prio, struct rq_iterator *iterator) + int this_best_prio, int best_prio, int best_prio_seen, + struct rq_iterator *iterator) { *load_moved = 0; @@ -3213,8 +3202,7 @@ unsigned long long task_sched_runtime(struct task_struct *p) rq = task_rq_lock(p, &flags); ns = p->se.sum_exec_runtime; if (rq->curr == p) { - update_rq_clock(rq); - delta_exec = rq->clock - p->se.exec_start; + delta_exec = rq_clock(rq) - p->se.exec_start; if ((s64)delta_exec > 0) ns += delta_exec; } @@ -3310,10 +3298,9 @@ void scheduler_tick(void) struct task_struct *curr = rq->curr; spin_lock(&rq->lock); - __update_rq_clock(rq); - update_cpu_load(rq); if (curr != rq->idle) /* FIXME: needed? */ curr->sched_class->task_tick(rq, curr); + update_cpu_load(rq); spin_unlock(&rq->lock); #ifdef CONFIG_SMP @@ -3395,7 +3382,7 @@ static inline void schedule_debug(struct task_struct *prev) * Pick up the highest-prio task: */ static inline struct task_struct * -pick_next_task(struct rq *rq, struct task_struct *prev) +pick_next_task(struct rq *rq, struct task_struct *prev, u64 now) { struct sched_class *class; struct task_struct *p; @@ -3405,14 +3392,14 @@ pick_next_task(struct rq *rq, struct task_struct *prev) * the fair class we can call that function directly: */ if (likely(rq->nr_running == rq->cfs.nr_running)) { - p = fair_sched_class.pick_next_task(rq); + p = fair_sched_class.pick_next_task(rq, now); if (likely(p)) return p; } class = sched_class_highest; for ( ; ; ) { - p = class->pick_next_task(rq); + p = class->pick_next_task(rq, now); if (p) return p; /* @@ -3431,6 +3418,7 @@ asmlinkage void __sched schedule(void) struct task_struct *prev, *next; long *switch_count; struct rq *rq; + u64 now; int cpu; need_resched: @@ -3448,7 +3436,6 @@ asmlinkage void __sched schedule(void) spin_lock_irq(&rq->lock); clear_tsk_need_resched(prev); - __update_rq_clock(rq); if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { if (unlikely((prev->state & TASK_INTERRUPTIBLE) && @@ -3463,8 +3450,9 @@ asmlinkage void __sched schedule(void) if (unlikely(!rq->nr_running)) idle_balance(cpu, rq); - prev->sched_class->put_prev_task(rq, prev); - next = pick_next_task(rq, prev); + now = __rq_clock(rq); + prev->sched_class->put_prev_task(rq, prev, now); + next = pick_next_task(rq, prev, now); sched_info_switch(prev, next); @@ -3907,16 +3895,17 @@ void rt_mutex_setprio(struct task_struct *p, int prio) unsigned long flags; int oldprio, on_rq; struct rq *rq; + u64 now; BUG_ON(prio < 0 || prio > MAX_PRIO); rq = task_rq_lock(p, &flags); - update_rq_clock(rq); + now = rq_clock(rq); oldprio = p->prio; on_rq = p->se.on_rq; if (on_rq) - dequeue_task(rq, p, 0); + dequeue_task(rq, p, 0, now); if (rt_prio(prio)) p->sched_class = &rt_sched_class; @@ -3926,7 +3915,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio) p->prio = prio; if (on_rq) { - enqueue_task(rq, p, 0); + enqueue_task(rq, p, 0, now); /* * Reschedule if we are currently running on this runqueue and * our priority decreased, or if we are not currently running on @@ -3949,6 +3938,7 @@ void set_user_nice(struct task_struct *p, long nice) int old_prio, delta, on_rq; unsigned long flags; struct rq *rq; + u64 now; if (TASK_NICE(p) == nice || nice < -20 || nice > 19) return; @@ -3957,7 +3947,7 @@ void set_user_nice(struct task_struct *p, long nice) * the task might be in the middle of scheduling on another CPU. */ rq = task_rq_lock(p, &flags); - update_rq_clock(rq); + now = rq_clock(rq); /* * The RT priorities are set via sched_setscheduler(), but we still * allow the 'normal' nice value to be set - but as expected @@ -3970,8 +3960,8 @@ void set_user_nice(struct task_struct *p, long nice) } on_rq = p->se.on_rq; if (on_rq) { - dequeue_task(rq, p, 0); - dec_load(rq, p); + dequeue_task(rq, p, 0, now); + dec_load(rq, p, now); } p->static_prio = NICE_TO_PRIO(nice); @@ -3981,8 +3971,8 @@ void set_user_nice(struct task_struct *p, long nice) delta = p->prio - old_prio; if (on_rq) { - enqueue_task(rq, p, 0); - inc_load(rq, p); + enqueue_task(rq, p, 0, now); + inc_load(rq, p, now); /* * If the task increased its priority or is running and * lowered its priority, then reschedule its CPU: @@ -4218,7 +4208,6 @@ int sched_setscheduler(struct task_struct *p, int policy, spin_unlock_irqrestore(&p->pi_lock, flags); goto recheck; } - update_rq_clock(rq); on_rq = p->se.on_rq; if (on_rq) deactivate_task(rq, p, 0); @@ -4474,8 +4463,10 @@ long sched_getaffinity(pid_t pid, cpumask_t *mask) out_unlock: read_unlock(&tasklist_lock); mutex_unlock(&sched_hotcpu_mutex); + if (retval) + return retval; - return retval; + return 0; } /** @@ -4975,7 +4966,6 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu) on_rq = p->se.on_rq; if (on_rq) deactivate_task(rq_src, p, 0); - set_task_cpu(p, dest_cpu); if (on_rq) { activate_task(rq_dest, p, 0); @@ -5208,8 +5198,7 @@ static void migrate_dead_tasks(unsigned int dead_cpu) for ( ; ; ) { if (!rq->nr_running) break; - update_rq_clock(rq); - next = pick_next_task(rq, rq->curr); + next = pick_next_task(rq, rq->curr, rq_clock(rq)); if (!next) break; migrate_dead(dead_cpu, next); @@ -5221,19 +5210,12 @@ static void migrate_dead_tasks(unsigned int dead_cpu) #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL) static struct ctl_table sd_ctl_dir[] = { - { - .procname = "sched_domain", - .mode = 0755, - }, + {CTL_UNNUMBERED, "sched_domain", NULL, 0, 0755, NULL, }, {0,}, }; static struct ctl_table sd_ctl_root[] = { - { - .procname = "kernel", - .mode = 0755, - .child = sd_ctl_dir, - }, + {CTL_UNNUMBERED, "kernel", NULL, 0, 0755, sd_ctl_dir, }, {0,}, }; @@ -5249,10 +5231,11 @@ static struct ctl_table *sd_alloc_ctl_entry(int n) } static void -set_table_entry(struct ctl_table *entry, +set_table_entry(struct ctl_table *entry, int ctl_name, const char *procname, void *data, int maxlen, mode_t mode, proc_handler *proc_handler) { + entry->ctl_name = ctl_name; entry->procname = procname; entry->data = data; entry->maxlen = maxlen; @@ -5265,28 +5248,28 @@ sd_alloc_ctl_domain_table(struct sched_domain *sd) { struct ctl_table *table = sd_alloc_ctl_entry(14); - set_table_entry(&table[0], "min_interval", &sd->min_interval, + set_table_entry(&table[0], 1, "min_interval", &sd->min_interval, sizeof(long), 0644, proc_doulongvec_minmax); - set_table_entry(&table[1], "max_interval", &sd->max_interval, + set_table_entry(&table[1], 2, "max_interval", &sd->max_interval, sizeof(long), 0644, proc_doulongvec_minmax); - set_table_entry(&table[2], "busy_idx", &sd->busy_idx, + set_table_entry(&table[2], 3, "busy_idx", &sd->busy_idx, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[3], "idle_idx", &sd->idle_idx, + set_table_entry(&table[3], 4, "idle_idx", &sd->idle_idx, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx, + set_table_entry(&table[4], 5, "newidle_idx", &sd->newidle_idx, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[5], "wake_idx", &sd->wake_idx, + set_table_entry(&table[5], 6, "wake_idx", &sd->wake_idx, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx, + set_table_entry(&table[6], 7, "forkexec_idx", &sd->forkexec_idx, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[7], "busy_factor", &sd->busy_factor, + set_table_entry(&table[7], 8, "busy_factor", &sd->busy_factor, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct, + set_table_entry(&table[8], 9, "imbalance_pct", &sd->imbalance_pct, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[10], "cache_nice_tries", + set_table_entry(&table[10], 11, "cache_nice_tries", &sd->cache_nice_tries, sizeof(int), 0644, proc_dointvec_minmax); - set_table_entry(&table[12], "flags", &sd->flags, + set_table_entry(&table[12], 13, "flags", &sd->flags, sizeof(int), 0644, proc_dointvec_minmax); return table; @@ -5306,6 +5289,7 @@ static ctl_table *sd_alloc_ctl_cpu_table(int cpu) i = 0; for_each_domain(cpu, sd) { snprintf(buf, 32, "domain%d", i); + entry->ctl_name = i + 1; entry->procname = kstrdup(buf, GFP_KERNEL); entry->mode = 0755; entry->child = sd_alloc_ctl_domain_table(sd); @@ -5326,6 +5310,7 @@ static void init_sched_domain_sysctl(void) for (i = 0; i < cpu_num; i++, entry++) { snprintf(buf, 32, "cpu%d", i); + entry->ctl_name = i + 1; entry->procname = kstrdup(buf, GFP_KERNEL); entry->mode = 0755; entry->child = sd_alloc_ctl_cpu_table(i); @@ -5394,7 +5379,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) rq->migration_thread = NULL; /* Idle task back to normal (off runqueue, low prio) */ rq = task_rq_lock(rq->idle, &flags); - update_rq_clock(rq); deactivate_task(rq, rq->idle, 0); rq->idle->static_prio = MAX_PRIO; __setscheduler(rq, rq->idle, SCHED_NORMAL, 0); @@ -6632,13 +6616,12 @@ void normalize_rt_tasks(void) goto out_unlock; #endif - update_rq_clock(rq); on_rq = p->se.on_rq; if (on_rq) - deactivate_task(rq, p, 0); + deactivate_task(task_rq(p), p, 0); __setscheduler(rq, p, SCHED_NORMAL, 0); if (on_rq) { - activate_task(rq, p, 0); + activate_task(task_rq(p), p, 0); resched_task(rq->curr); } #ifdef CONFIG_SMP diff --git a/trunk/kernel/sched_debug.c b/trunk/kernel/sched_debug.c index 3da32156394e..8421b9399e10 100644 --- a/trunk/kernel/sched_debug.c +++ b/trunk/kernel/sched_debug.c @@ -29,7 +29,7 @@ } while (0) static void -print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) +print_task(struct seq_file *m, struct rq *rq, struct task_struct *p, u64 now) { if (rq->curr == p) SEQ_printf(m, "R"); @@ -56,7 +56,7 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) #endif } -static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) +static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu, u64 now) { struct task_struct *g, *p; @@ -77,7 +77,7 @@ static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu) if (!p->se.on_rq || task_cpu(p) != rq_cpu) continue; - print_task(m, rq, p); + print_task(m, rq, p, now); } while_each_thread(g, p); read_unlock_irq(&tasklist_lock); @@ -106,7 +106,7 @@ print_cfs_rq_runtime_sum(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) (long long)wait_runtime_rq_sum); } -void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) +void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq, u64 now) { SEQ_printf(m, "\ncfs_rq %p\n", cfs_rq); @@ -124,7 +124,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) print_cfs_rq_runtime_sum(m, cpu, cfs_rq); } -static void print_cpu(struct seq_file *m, int cpu) +static void print_cpu(struct seq_file *m, int cpu, u64 now) { struct rq *rq = &per_cpu(runqueues, cpu); @@ -166,9 +166,9 @@ static void print_cpu(struct seq_file *m, int cpu) P(cpu_load[4]); #undef P - print_cfs_stats(m, cpu); + print_cfs_stats(m, cpu, now); - print_rq(m, rq, cpu); + print_rq(m, rq, cpu, now); } static int sched_debug_show(struct seq_file *m, void *v) @@ -184,7 +184,7 @@ static int sched_debug_show(struct seq_file *m, void *v) SEQ_printf(m, "now at %Lu nsecs\n", (unsigned long long)now); for_each_online_cpu(cpu) - print_cpu(m, cpu); + print_cpu(m, cpu, now); SEQ_printf(m, "\n"); diff --git a/trunk/kernel/sched_fair.c b/trunk/kernel/sched_fair.c index e91db32cadfd..6f579ff5a9bc 100644 --- a/trunk/kernel/sched_fair.c +++ b/trunk/kernel/sched_fair.c @@ -222,25 +222,21 @@ niced_granularity(struct sched_entity *curr, unsigned long granularity) { u64 tmp; - if (likely(curr->load.weight == NICE_0_LOAD)) - return granularity; /* - * Positive nice levels get the same granularity as nice-0: + * Negative nice levels get the same granularity as nice-0: */ - if (likely(curr->load.weight < NICE_0_LOAD)) { - tmp = curr->load.weight * (u64)granularity; - return (long) (tmp >> NICE_0_SHIFT); - } + if (likely(curr->load.weight >= NICE_0_LOAD)) + return granularity; /* - * Negative nice level tasks get linearly finer + * Positive nice level tasks get linearly finer * granularity: */ - tmp = curr->load.inv_weight * (u64)granularity; + tmp = curr->load.weight * (u64)granularity; /* * It will always fit into 'long': */ - return (long) (tmp >> WMULT_SHIFT); + return (long) (tmp >> NICE_0_SHIFT); } static inline void @@ -285,25 +281,26 @@ add_wait_runtime(struct cfs_rq *cfs_rq, struct sched_entity *se, long delta) * are not in our scheduling class. */ static inline void -__update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr) +__update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, u64 now) { - unsigned long delta, delta_exec, delta_fair, delta_mine; + unsigned long delta, delta_exec, delta_fair; + long delta_mine; struct load_weight *lw = &cfs_rq->load; unsigned long load = lw->weight; + if (unlikely(!load)) + return; + delta_exec = curr->delta_exec; schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max)); curr->sum_exec_runtime += delta_exec; cfs_rq->exec_clock += delta_exec; - if (unlikely(!load)) - return; - delta_fair = calc_delta_fair(delta_exec, lw); delta_mine = calc_delta_mine(delta_exec, curr->load.weight, lw); - if (cfs_rq->sleeper_bonus > sysctl_sched_granularity) { + if (cfs_rq->sleeper_bonus > sysctl_sched_stat_granularity) { delta = calc_delta_mine(cfs_rq->sleeper_bonus, curr->load.weight, lw); if (unlikely(delta > cfs_rq->sleeper_bonus)) @@ -324,7 +321,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr) add_wait_runtime(cfs_rq, curr, delta_mine - delta_exec); } -static void update_curr(struct cfs_rq *cfs_rq) +static void update_curr(struct cfs_rq *cfs_rq, u64 now) { struct sched_entity *curr = cfs_rq_curr(cfs_rq); unsigned long delta_exec; @@ -337,22 +334,22 @@ static void update_curr(struct cfs_rq *cfs_rq) * since the last time we changed load (this cannot * overflow on 32 bits): */ - delta_exec = (unsigned long)(rq_of(cfs_rq)->clock - curr->exec_start); + delta_exec = (unsigned long)(now - curr->exec_start); curr->delta_exec += delta_exec; if (unlikely(curr->delta_exec > sysctl_sched_stat_granularity)) { - __update_curr(cfs_rq, curr); + __update_curr(cfs_rq, curr, now); curr->delta_exec = 0; } - curr->exec_start = rq_of(cfs_rq)->clock; + curr->exec_start = now; } static inline void -update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se) +update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) { se->wait_start_fair = cfs_rq->fair_clock; - schedstat_set(se->wait_start, rq_of(cfs_rq)->clock); + schedstat_set(se->wait_start, now); } /* @@ -380,7 +377,8 @@ calc_weighted(unsigned long delta, unsigned long weight, int shift) /* * Task is being enqueued - update stats: */ -static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) +static void +update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) { s64 key; @@ -389,7 +387,7 @@ static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) * a dequeue/enqueue event is a NOP) */ if (se != cfs_rq_curr(cfs_rq)) - update_stats_wait_start(cfs_rq, se); + update_stats_wait_start(cfs_rq, se, now); /* * Update the key: */ @@ -409,8 +407,7 @@ static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) (WMULT_SHIFT - NICE_0_SHIFT); } else { tmp = se->wait_runtime; - key -= (tmp * se->load.inv_weight) >> - (WMULT_SHIFT - NICE_0_SHIFT); + key -= (tmp * se->load.weight) >> NICE_0_SHIFT; } } @@ -421,12 +418,11 @@ static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se) * Note: must be called with a freshly updated rq->fair_clock. */ static inline void -__update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) +__update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) { unsigned long delta_fair = se->delta_fair_run; - schedstat_set(se->wait_max, max(se->wait_max, - rq_of(cfs_rq)->clock - se->wait_start)); + schedstat_set(se->wait_max, max(se->wait_max, now - se->wait_start)); if (unlikely(se->load.weight != NICE_0_LOAD)) delta_fair = calc_weighted(delta_fair, se->load.weight, @@ -436,7 +432,7 @@ __update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) } static void -update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) +update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) { unsigned long delta_fair; @@ -446,7 +442,7 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) se->delta_fair_run += delta_fair; if (unlikely(abs(se->delta_fair_run) >= sysctl_sched_stat_granularity)) { - __update_stats_wait_end(cfs_rq, se); + __update_stats_wait_end(cfs_rq, se, now); se->delta_fair_run = 0; } @@ -455,34 +451,34 @@ update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se) } static inline void -update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) +update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) { - update_curr(cfs_rq); + update_curr(cfs_rq, now); /* * Mark the end of the wait period if dequeueing a * waiting task: */ if (se != cfs_rq_curr(cfs_rq)) - update_stats_wait_end(cfs_rq, se); + update_stats_wait_end(cfs_rq, se, now); } /* * We are picking a new current task - update its stats: */ static inline void -update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se) +update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) { /* * We are starting a new run period: */ - se->exec_start = rq_of(cfs_rq)->clock; + se->exec_start = now; } /* * We are descheduling a task - update its stats: */ static inline void -update_stats_curr_end(struct cfs_rq *cfs_rq, struct sched_entity *se) +update_stats_curr_end(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) { se->exec_start = 0; } @@ -491,7 +487,8 @@ update_stats_curr_end(struct cfs_rq *cfs_rq, struct sched_entity *se) * Scheduling class queueing methods: */ -static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) +static void +__enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) { unsigned long load = cfs_rq->load.weight, delta_fair; long prev_runtime; @@ -525,7 +522,8 @@ static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) schedstat_add(cfs_rq, wait_runtime, se->wait_runtime); } -static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) +static void +enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) { struct task_struct *tsk = task_of(se); unsigned long delta_fair; @@ -540,7 +538,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) se->delta_fair_sleep += delta_fair; if (unlikely(abs(se->delta_fair_sleep) >= sysctl_sched_stat_granularity)) { - __enqueue_sleeper(cfs_rq, se); + __enqueue_sleeper(cfs_rq, se, now); se->delta_fair_sleep = 0; } @@ -548,7 +546,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) #ifdef CONFIG_SCHEDSTATS if (se->sleep_start) { - u64 delta = rq_of(cfs_rq)->clock - se->sleep_start; + u64 delta = now - se->sleep_start; if ((s64)delta < 0) delta = 0; @@ -560,7 +558,7 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) se->sum_sleep_runtime += delta; } if (se->block_start) { - u64 delta = rq_of(cfs_rq)->clock - se->block_start; + u64 delta = now - se->block_start; if ((s64)delta < 0) delta = 0; @@ -575,24 +573,26 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) } static void -enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) +enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, + int wakeup, u64 now) { /* * Update the fair clock. */ - update_curr(cfs_rq); + update_curr(cfs_rq, now); if (wakeup) - enqueue_sleeper(cfs_rq, se); + enqueue_sleeper(cfs_rq, se, now); - update_stats_enqueue(cfs_rq, se); + update_stats_enqueue(cfs_rq, se, now); __enqueue_entity(cfs_rq, se); } static void -dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) +dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, + int sleep, u64 now) { - update_stats_dequeue(cfs_rq, se); + update_stats_dequeue(cfs_rq, se, now); if (sleep) { se->sleep_start_fair = cfs_rq->fair_clock; #ifdef CONFIG_SCHEDSTATS @@ -600,9 +600,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) struct task_struct *tsk = task_of(se); if (tsk->state & TASK_INTERRUPTIBLE) - se->sleep_start = rq_of(cfs_rq)->clock; + se->sleep_start = now; if (tsk->state & TASK_UNINTERRUPTIBLE) - se->block_start = rq_of(cfs_rq)->clock; + se->block_start = now; } cfs_rq->wait_runtime -= se->wait_runtime; #endif @@ -629,7 +629,7 @@ __check_preempt_curr_fair(struct cfs_rq *cfs_rq, struct sched_entity *se, } static inline void -set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) +set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 now) { /* * Any task has to be enqueued before it get to execute on @@ -638,46 +638,49 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) * done a put_prev_task_fair() shortly before this, which * updated rq->fair_clock - used by update_stats_wait_end()) */ - update_stats_wait_end(cfs_rq, se); - update_stats_curr_start(cfs_rq, se); + update_stats_wait_end(cfs_rq, se, now); + update_stats_curr_start(cfs_rq, se, now); set_cfs_rq_curr(cfs_rq, se); } -static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) +static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq, u64 now) { struct sched_entity *se = __pick_next_entity(cfs_rq); - set_next_entity(cfs_rq, se); + set_next_entity(cfs_rq, se, now); return se; } -static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev) +static void +put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev, u64 now) { /* * If still on the runqueue then deactivate_task() * was not called and update_curr() has to be done: */ if (prev->on_rq) - update_curr(cfs_rq); + update_curr(cfs_rq, now); - update_stats_curr_end(cfs_rq, prev); + update_stats_curr_end(cfs_rq, prev, now); if (prev->on_rq) - update_stats_wait_start(cfs_rq, prev); + update_stats_wait_start(cfs_rq, prev, now); set_cfs_rq_curr(cfs_rq, NULL); } static void entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) { + struct rq *rq = rq_of(cfs_rq); struct sched_entity *next; + u64 now = __rq_clock(rq); /* * Dequeue and enqueue the task to update its * position within the tree: */ - dequeue_entity(cfs_rq, curr, 0); - enqueue_entity(cfs_rq, curr, 0); + dequeue_entity(cfs_rq, curr, 0, now); + enqueue_entity(cfs_rq, curr, 0, now); /* * Reschedule if another task tops the current one. @@ -782,7 +785,8 @@ static inline int is_same_group(struct task_struct *curr, struct task_struct *p) * increased. Here we update the fair scheduling stats and * then put the task into the rbtree: */ -static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) +static void +enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, u64 now) { struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; @@ -791,7 +795,7 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) if (se->on_rq) break; cfs_rq = cfs_rq_of(se); - enqueue_entity(cfs_rq, se, wakeup); + enqueue_entity(cfs_rq, se, wakeup, now); } } @@ -800,14 +804,15 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) * decreased. We remove the task from the rbtree and * update the fair scheduling stats: */ -static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep) +static void +dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep, u64 now) { struct cfs_rq *cfs_rq; struct sched_entity *se = &p->se; for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); - dequeue_entity(cfs_rq, se, sleep); + dequeue_entity(cfs_rq, se, sleep, now); /* Don't dequeue parent if it has other entities besides us */ if (cfs_rq->load.weight) break; @@ -820,14 +825,14 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep) static void yield_task_fair(struct rq *rq, struct task_struct *p) { struct cfs_rq *cfs_rq = task_cfs_rq(p); + u64 now = __rq_clock(rq); - __update_rq_clock(rq); /* * Dequeue and enqueue the task to update its * position within the tree: */ - dequeue_entity(cfs_rq, &p->se, 0); - enqueue_entity(cfs_rq, &p->se, 0); + dequeue_entity(cfs_rq, &p->se, 0, now); + enqueue_entity(cfs_rq, &p->se, 0, now); } /* @@ -840,8 +845,7 @@ static void check_preempt_curr_fair(struct rq *rq, struct task_struct *p) unsigned long gran; if (unlikely(rt_prio(p->prio))) { - update_rq_clock(rq); - update_curr(cfs_rq); + update_curr(cfs_rq, rq_clock(rq)); resched_task(curr); return; } @@ -857,7 +861,7 @@ static void check_preempt_curr_fair(struct rq *rq, struct task_struct *p) __check_preempt_curr_fair(cfs_rq, &p->se, &curr->se, gran); } -static struct task_struct *pick_next_task_fair(struct rq *rq) +static struct task_struct *pick_next_task_fair(struct rq *rq, u64 now) { struct cfs_rq *cfs_rq = &rq->cfs; struct sched_entity *se; @@ -866,7 +870,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq) return NULL; do { - se = pick_next_entity(cfs_rq); + se = pick_next_entity(cfs_rq, now); cfs_rq = group_cfs_rq(se); } while (cfs_rq); @@ -876,14 +880,14 @@ static struct task_struct *pick_next_task_fair(struct rq *rq) /* * Account for a descheduled task: */ -static void put_prev_task_fair(struct rq *rq, struct task_struct *prev) +static void put_prev_task_fair(struct rq *rq, struct task_struct *prev, u64 now) { struct sched_entity *se = &prev->se; struct cfs_rq *cfs_rq; for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); - put_prev_entity(cfs_rq, se); + put_prev_entity(cfs_rq, se, now); } } @@ -926,7 +930,6 @@ static struct task_struct *load_balance_next_fair(void *arg) return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr); } -#ifdef CONFIG_FAIR_GROUP_SCHED static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) { struct sched_entity *curr; @@ -940,13 +943,12 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) return p->prio; } -#endif -static unsigned long +static int load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, - unsigned long max_nr_move, unsigned long max_load_move, - struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned, int *this_best_prio) + unsigned long max_nr_move, unsigned long max_load_move, + struct sched_domain *sd, enum cpu_idle_type idle, + int *all_pinned, unsigned long *total_load_moved) { struct cfs_rq *busy_cfs_rq; unsigned long load_moved, total_nr_moved = 0, nr_moved; @@ -957,10 +959,10 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, cfs_rq_iterator.next = load_balance_next_fair; for_each_leaf_cfs_rq(busiest, busy_cfs_rq) { -#ifdef CONFIG_FAIR_GROUP_SCHED struct cfs_rq *this_cfs_rq; - long imbalances; + long imbalance; unsigned long maxload; + int this_best_prio, best_prio, best_prio_seen = 0; this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu); @@ -974,17 +976,27 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, imbalance /= 2; maxload = min(rem_load_move, imbalance); - *this_best_prio = cfs_rq_best_prio(this_cfs_rq); -#else -#define maxload rem_load_move -#endif + this_best_prio = cfs_rq_best_prio(this_cfs_rq); + best_prio = cfs_rq_best_prio(busy_cfs_rq); + + /* + * Enable handling of the case where there is more than one task + * with the best priority. If the current running task is one + * of those with prio==best_prio we know it won't be moved + * and therefore it's safe to override the skip (based on load) + * of any task we find with that prio. + */ + if (cfs_rq_curr(busy_cfs_rq) == &busiest->curr->se) + best_prio_seen = 1; + /* pass busy_cfs_rq argument into * load_balance_[start|next]_fair iterators */ cfs_rq_iterator.arg = busy_cfs_rq; nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move, maxload, sd, idle, all_pinned, - &load_moved, this_best_prio, &cfs_rq_iterator); + &load_moved, this_best_prio, best_prio, + best_prio_seen, &cfs_rq_iterator); total_nr_moved += nr_moved; max_nr_move -= nr_moved; @@ -994,7 +1006,9 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, break; } - return max_load_move - rem_load_move; + *total_load_moved = max_load_move - rem_load_move; + + return total_nr_moved; } /* @@ -1018,14 +1032,14 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr) * monopolize the CPU. Note: the parent runqueue is locked, * the child is not running yet. */ -static void task_new_fair(struct rq *rq, struct task_struct *p) +static void task_new_fair(struct rq *rq, struct task_struct *p, u64 now) { struct cfs_rq *cfs_rq = task_cfs_rq(p); struct sched_entity *se = &p->se; sched_info_queued(p); - update_stats_enqueue(cfs_rq, se); + update_stats_enqueue(cfs_rq, se, now); /* * Child runs first: we let it run before the parent * until it reschedules once. We set up the key so that @@ -1058,10 +1072,15 @@ static void task_new_fair(struct rq *rq, struct task_struct *p) */ static void set_curr_task_fair(struct rq *rq) { - struct sched_entity *se = &rq->curr.se; + struct task_struct *curr = rq->curr; + struct sched_entity *se = &curr->se; + u64 now = rq_clock(rq); + struct cfs_rq *cfs_rq; - for_each_sched_entity(se) - set_next_entity(cfs_rq_of(se), se); + for_each_sched_entity(se) { + cfs_rq = cfs_rq_of(se); + set_next_entity(cfs_rq, se, now); + } } #else static void set_curr_task_fair(struct rq *rq) @@ -1090,11 +1109,12 @@ struct sched_class fair_sched_class __read_mostly = { }; #ifdef CONFIG_SCHED_DEBUG -static void print_cfs_stats(struct seq_file *m, int cpu) +void print_cfs_stats(struct seq_file *m, int cpu, u64 now) { + struct rq *rq = cpu_rq(cpu); struct cfs_rq *cfs_rq; - for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq) - print_cfs_rq(m, cpu, cfs_rq); + for_each_leaf_cfs_rq(rq, cfs_rq) + print_cfs_rq(m, cpu, cfs_rq, now); } #endif diff --git a/trunk/kernel/sched_idletask.c b/trunk/kernel/sched_idletask.c index 3503fb2d9f96..41841e741c4a 100644 --- a/trunk/kernel/sched_idletask.c +++ b/trunk/kernel/sched_idletask.c @@ -13,7 +13,7 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p) resched_task(rq->idle); } -static struct task_struct *pick_next_task_idle(struct rq *rq) +static struct task_struct *pick_next_task_idle(struct rq *rq, u64 now) { schedstat_inc(rq, sched_goidle); @@ -25,7 +25,7 @@ static struct task_struct *pick_next_task_idle(struct rq *rq) * message if some code attempts to do it: */ static void -dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep) +dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep, u64 now) { spin_unlock_irq(&rq->lock); printk(KERN_ERR "bad: scheduling from the idle thread!\n"); @@ -33,15 +33,15 @@ dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep) spin_lock_irq(&rq->lock); } -static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) +static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, u64 now) { } -static unsigned long +static int load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned, int *this_best_prio) + int *all_pinned, unsigned long *total_load_moved) { return 0; } diff --git a/trunk/kernel/sched_rt.c b/trunk/kernel/sched_rt.c index dcdcad632fd9..002fcf8d3f64 100644 --- a/trunk/kernel/sched_rt.c +++ b/trunk/kernel/sched_rt.c @@ -7,7 +7,7 @@ * Update the current task's runtime statistics. Skip current tasks that * are not in our scheduling class. */ -static inline void update_curr_rt(struct rq *rq) +static inline void update_curr_rt(struct rq *rq, u64 now) { struct task_struct *curr = rq->curr; u64 delta_exec; @@ -15,17 +15,18 @@ static inline void update_curr_rt(struct rq *rq) if (!task_has_rt_policy(curr)) return; - delta_exec = rq->clock - curr->se.exec_start; + delta_exec = now - curr->se.exec_start; if (unlikely((s64)delta_exec < 0)) delta_exec = 0; schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec)); curr->se.sum_exec_runtime += delta_exec; - curr->se.exec_start = rq->clock; + curr->se.exec_start = now; } -static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) +static void +enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup, u64 now) { struct rt_prio_array *array = &rq->rt.active; @@ -36,11 +37,12 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) /* * Adding/removing a task to/from a priority array: */ -static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) +static void +dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep, u64 now) { struct rt_prio_array *array = &rq->rt.active; - update_curr_rt(rq); + update_curr_rt(rq, now); list_del(&p->run_list); if (list_empty(array->queue + p->prio)) @@ -73,7 +75,7 @@ static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) resched_task(rq->curr); } -static struct task_struct *pick_next_task_rt(struct rq *rq) +static struct task_struct *pick_next_task_rt(struct rq *rq, u64 now) { struct rt_prio_array *array = &rq->rt.active; struct task_struct *next; @@ -87,14 +89,14 @@ static struct task_struct *pick_next_task_rt(struct rq *rq) queue = array->queue + idx; next = list_entry(queue->next, struct task_struct, run_list); - next->se.exec_start = rq->clock; + next->se.exec_start = now; return next; } -static void put_prev_task_rt(struct rq *rq, struct task_struct *p) +static void put_prev_task_rt(struct rq *rq, struct task_struct *p, u64 now) { - update_curr_rt(rq); + update_curr_rt(rq, now); p->se.exec_start = 0; } @@ -170,15 +172,28 @@ static struct task_struct *load_balance_next_rt(void *arg) return p; } -static unsigned long +static int load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, unsigned long max_nr_move, unsigned long max_load_move, struct sched_domain *sd, enum cpu_idle_type idle, - int *all_pinned, int *this_best_prio) + int *all_pinned, unsigned long *load_moved) { + int this_best_prio, best_prio, best_prio_seen = 0; int nr_moved; struct rq_iterator rt_rq_iterator; - unsigned long load_moved; + + best_prio = sched_find_first_bit(busiest->rt.active.bitmap); + this_best_prio = sched_find_first_bit(this_rq->rt.active.bitmap); + + /* + * Enable handling of the case where there is more than one task + * with the best priority. If the current running task is one + * of those with prio==best_prio we know it won't be moved + * and therefore it's safe to override the skip (based on load) + * of any task we find with that prio. + */ + if (busiest->curr->prio == best_prio) + best_prio_seen = 1; rt_rq_iterator.start = load_balance_start_rt; rt_rq_iterator.next = load_balance_next_rt; @@ -188,10 +203,11 @@ load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, rt_rq_iterator.arg = busiest; nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move, - max_load_move, sd, idle, all_pinned, &load_moved, - this_best_prio, &rt_rq_iterator); + max_load_move, sd, idle, all_pinned, load_moved, + this_best_prio, best_prio, best_prio_seen, + &rt_rq_iterator); - return load_moved; + return nr_moved; } static void task_tick_rt(struct rq *rq, struct task_struct *p) diff --git a/trunk/lib/hexdump.c b/trunk/lib/hexdump.c index 16f2e2935e87..473f5aed6cae 100644 --- a/trunk/lib/hexdump.c +++ b/trunk/lib/hexdump.c @@ -145,9 +145,9 @@ EXPORT_SYMBOL(hex_dump_to_buffer); */ void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, int rowsize, int groupsize, - const void *buf, size_t len, bool ascii) + void *buf, size_t len, bool ascii) { - const u8 *ptr = buf; + u8 *ptr = buf; int i, linelen, remaining = len; unsigned char linebuf[200]; diff --git a/trunk/net/core/utils.c b/trunk/net/core/utils.c index 0bf17da40d52..2030bb8c2d30 100644 --- a/trunk/net/core/utils.c +++ b/trunk/net/core/utils.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include diff --git a/trunk/net/ieee80211/softmac/ieee80211softmac_wx.c b/trunk/net/ieee80211/softmac/ieee80211softmac_wx.c index d054e9224b3e..f13937bf9e8c 100644 --- a/trunk/net/ieee80211/softmac/ieee80211softmac_wx.c +++ b/trunk/net/ieee80211/softmac/ieee80211softmac_wx.c @@ -74,8 +74,8 @@ ieee80211softmac_wx_set_essid(struct net_device *net_dev, struct ieee80211softmac_auth_queue_item *authptr; int length = 0; -check_assoc_again: mutex_lock(&sm->associnfo.mutex); + /* Check if we're already associating to this or another network * If it's another network, cancel and start over with our new network * If it's our network, ignore the change, we're already doing it! @@ -98,18 +98,13 @@ ieee80211softmac_wx_set_essid(struct net_device *net_dev, cancel_delayed_work(&authptr->work); sm->associnfo.bssvalid = 0; sm->associnfo.bssfixed = 0; + flush_scheduled_work(); sm->associnfo.associating = 0; sm->associnfo.associated = 0; - /* We must unlock to avoid deadlocks with the assoc workqueue - * on the associnfo.mutex */ - mutex_unlock(&sm->associnfo.mutex); - flush_scheduled_work(); - /* Avoid race! Check assoc status again. Maybe someone started an - * association while we flushed. */ - goto check_assoc_again; } } + sm->associnfo.static_essid = 0; sm->associnfo.assoc_wait = 0; diff --git a/trunk/net/ipv4/netfilter/ipt_recent.c b/trunk/net/ipv4/netfilter/ipt_recent.c index 6d0c0f7364ad..321804315659 100644 --- a/trunk/net/ipv4/netfilter/ipt_recent.c +++ b/trunk/net/ipv4/netfilter/ipt_recent.c @@ -387,17 +387,12 @@ static int recent_seq_open(struct inode *inode, struct file *file) st = kzalloc(sizeof(*st), GFP_KERNEL); if (st == NULL) return -ENOMEM; - ret = seq_open(file, &recent_seq_ops); - if (ret) { + if (ret) kfree(st); - goto out; - } - st->table = pde->data; seq = file->private_data; seq->private = st; -out: return ret; } diff --git a/trunk/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/trunk/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index d9b5177989c6..64552afd01cb 100644 --- a/trunk/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/trunk/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -509,9 +509,3 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void) module_init(nf_conntrack_l3proto_ipv4_init); module_exit(nf_conntrack_l3proto_ipv4_fini); - -void need_ipv4_conntrack(void) -{ - return; -} -EXPORT_SYMBOL_GPL(need_ipv4_conntrack); diff --git a/trunk/net/ipv4/netfilter/nf_nat_standalone.c b/trunk/net/ipv4/netfilter/nf_nat_standalone.c index 46cc99def165..332814dac503 100644 --- a/trunk/net/ipv4/netfilter/nf_nat_standalone.c +++ b/trunk/net/ipv4/netfilter/nf_nat_standalone.c @@ -328,7 +328,7 @@ static int __init nf_nat_standalone_init(void) { int ret = 0; - need_ipv4_conntrack(); + need_conntrack(); #ifdef CONFIG_XFRM BUG_ON(ip_nat_decode_session != NULL); diff --git a/trunk/net/ipv4/tcp_htcp.c b/trunk/net/ipv4/tcp_htcp.c index 5215691f2760..b66556c0a5bd 100644 --- a/trunk/net/ipv4/tcp_htcp.c +++ b/trunk/net/ipv4/tcp_htcp.c @@ -79,6 +79,7 @@ static u32 htcp_cwnd_undo(struct sock *sk) static inline void measure_rtt(struct sock *sk, u32 srtt) { const struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_sock *tp = tcp_sk(sk); struct htcp *ca = inet_csk_ca(sk); /* keep track of minimum RTT seen so far, minRTT is zero at first */ @@ -86,7 +87,8 @@ static inline void measure_rtt(struct sock *sk, u32 srtt) ca->minRTT = srtt; /* max RTT */ - if (icsk->icsk_ca_state == TCP_CA_Open) { + if (icsk->icsk_ca_state == TCP_CA_Open + && tp->snd_ssthresh < 0xFFFF && htcp_ccount(ca) > 3) { if (ca->maxRTT < ca->minRTT) ca->maxRTT = ca->minRTT; if (ca->maxRTT < srtt diff --git a/trunk/net/mac80211/debugfs_netdev.c b/trunk/net/mac80211/debugfs_netdev.c index 095be91829ca..799a9208c4b4 100644 --- a/trunk/net/mac80211/debugfs_netdev.c +++ b/trunk/net/mac80211/debugfs_netdev.c @@ -271,11 +271,9 @@ static void add_files(struct ieee80211_sub_if_data *sdata) } } -#define DEBUGFS_DEL(name, type) \ - do { \ - debugfs_remove(sdata->debugfs.type.name); \ - sdata->debugfs.type.name = NULL; \ - } while (0) +#define DEBUGFS_DEL(name, type)\ + debugfs_remove(sdata->debugfs.type.name);\ + sdata->debugfs.type.name = NULL; static void del_sta_files(struct ieee80211_sub_if_data *sdata) { diff --git a/trunk/net/mac80211/debugfs_sta.c b/trunk/net/mac80211/debugfs_sta.c index da34ea70276f..d41e696f3980 100644 --- a/trunk/net/mac80211/debugfs_sta.c +++ b/trunk/net/mac80211/debugfs_sta.c @@ -157,7 +157,7 @@ static ssize_t sta_last_seq_ctrl_read(struct file *file, char __user *userbuf, struct sta_info *sta = file->private_data; for (i = 0; i < NUM_RX_DATA_QUEUES; i++) p += scnprintf(p, sizeof(buf)+buf-p, "%x ", - le16_to_cpu(sta->last_seq_ctrl[i])); + sta->last_seq_ctrl[i]); p += scnprintf(p, sizeof(buf)+buf-p, "\n"); return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); } diff --git a/trunk/net/mac80211/ieee80211.c b/trunk/net/mac80211/ieee80211.c index 8ec5ed192b5d..c944b17d0fc0 100644 --- a/trunk/net/mac80211/ieee80211.c +++ b/trunk/net/mac80211/ieee80211.c @@ -1650,7 +1650,6 @@ static int ieee80211_master_start_xmit(struct sk_buff *skb, if (skb_headroom(skb) < headroom) { if (pskb_expand_head(skb, headroom, 0, GFP_ATOMIC)) { dev_kfree_skb(skb); - dev_put(odev); return 0; } } diff --git a/trunk/net/mac80211/ieee80211_ioctl.c b/trunk/net/mac80211/ieee80211_ioctl.c index e7904db55325..d0e1ab5589db 100644 --- a/trunk/net/mac80211/ieee80211_ioctl.c +++ b/trunk/net/mac80211/ieee80211_ioctl.c @@ -697,24 +697,17 @@ static int ieee80211_ioctl_siwscan(struct net_device *dev, if (!netif_running(dev)) return -ENETDOWN; - switch (sdata->type) { - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: - if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) { + if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) { + if (sdata->type == IEEE80211_IF_TYPE_STA || + sdata->type == IEEE80211_IF_TYPE_IBSS) { ssid = sdata->u.sta.ssid; ssid_len = sdata->u.sta.ssid_len; - } - break; - case IEEE80211_IF_TYPE_AP: - if (local->scan_flags & IEEE80211_SCAN_MATCH_SSID) { + } else if (sdata->type == IEEE80211_IF_TYPE_AP) { ssid = sdata->u.ap.ssid; ssid_len = sdata->u.ap.ssid_len; - } - break; - default: - return -EOPNOTSUPP; + } else + return -EINVAL; } - return ieee80211_sta_req_scan(dev, ssid, ssid_len); } diff --git a/trunk/net/netfilter/nf_conntrack_netlink.c b/trunk/net/netfilter/nf_conntrack_netlink.c index 2863e72b4091..6f89b105a205 100644 --- a/trunk/net/netfilter/nf_conntrack_netlink.c +++ b/trunk/net/netfilter/nf_conntrack_netlink.c @@ -1052,18 +1052,17 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, } /* implicit 'else' */ + /* we only allow nat config for new conntracks */ + if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) { + err = -EINVAL; + goto out_unlock; + } + /* We manipulate the conntrack inside the global conntrack table lock, * so there's no need to increase the refcount */ err = -EEXIST; - if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { - /* we only allow nat config for new conntracks */ - if (cda[CTA_NAT_SRC-1] || cda[CTA_NAT_DST-1]) { - err = -EINVAL; - goto out_unlock; - } - err = ctnetlink_change_conntrack(nf_ct_tuplehash_to_ctrack(h), - cda); - } + if (!(nlh->nlmsg_flags & NLM_F_EXCL)) + err = ctnetlink_change_conntrack(nf_ct_tuplehash_to_ctrack(h), cda); out_unlock: write_unlock_bh(&nf_conntrack_lock); diff --git a/trunk/net/netlabel/netlabel_domainhash.c b/trunk/net/netlabel/netlabel_domainhash.c index b6c844b7e1c1..f46a0aeec44f 100644 --- a/trunk/net/netlabel/netlabel_domainhash.c +++ b/trunk/net/netlabel/netlabel_domainhash.c @@ -126,9 +126,7 @@ static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain, u32 def) if (domain != NULL) { bkt = netlbl_domhsh_hash(domain); - list_for_each_entry_rcu(iter, - &rcu_dereference(netlbl_domhsh)->tbl[bkt], - list) + list_for_each_entry_rcu(iter, &netlbl_domhsh->tbl[bkt], list) if (iter->valid && strcmp(iter->domain, domain) == 0) return iter; } @@ -229,7 +227,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, spin_lock(&netlbl_domhsh_lock); if (netlbl_domhsh_search(entry->domain, 0) == NULL) list_add_tail_rcu(&entry->list, - &rcu_dereference(netlbl_domhsh)->tbl[bkt]); + &netlbl_domhsh->tbl[bkt]); else ret_val = -EEXIST; spin_unlock(&netlbl_domhsh_lock); @@ -425,8 +423,8 @@ int netlbl_domhsh_walk(u32 *skip_bkt, iter_bkt < rcu_dereference(netlbl_domhsh)->size; iter_bkt++, chain_cnt = 0) { list_for_each_entry_rcu(iter_entry, - &rcu_dereference(netlbl_domhsh)->tbl[iter_bkt], - list) + &netlbl_domhsh->tbl[iter_bkt], + list) if (iter_entry->valid) { if (chain_cnt++ < *skip_chain) continue; diff --git a/trunk/net/sunrpc/sched.c b/trunk/net/sunrpc/sched.c index b5723c262a3e..954d7ec86c7e 100644 --- a/trunk/net/sunrpc/sched.c +++ b/trunk/net/sunrpc/sched.c @@ -50,8 +50,6 @@ static RPC_WAITQ(delay_queue, "delayq"); /* * rpciod-related stuff */ -static DEFINE_MUTEX(rpciod_mutex); -static atomic_t rpciod_users = ATOMIC_INIT(0); struct workqueue_struct *rpciod_workqueue; /* @@ -961,60 +959,49 @@ void rpc_killall_tasks(struct rpc_clnt *clnt) spin_unlock(&clnt->cl_lock); } +int rpciod_up(void) +{ + return try_module_get(THIS_MODULE) ? 0 : -EINVAL; +} + +void rpciod_down(void) +{ + module_put(THIS_MODULE); +} + /* - * Start up the rpciod process if it's not already running. + * Start up the rpciod workqueue. */ -int -rpciod_up(void) +static int rpciod_start(void) { struct workqueue_struct *wq; - int error = 0; - - if (atomic_inc_not_zero(&rpciod_users)) - return 0; - - mutex_lock(&rpciod_mutex); - /* Guard against races with rpciod_down() */ - if (rpciod_workqueue != NULL) - goto out_ok; /* * Create the rpciod thread and wait for it to start. */ dprintk("RPC: creating workqueue rpciod\n"); - error = -ENOMEM; wq = create_workqueue("rpciod"); - if (wq == NULL) - goto out; - rpciod_workqueue = wq; - error = 0; -out_ok: - atomic_inc(&rpciod_users); -out: - mutex_unlock(&rpciod_mutex); - return error; + return rpciod_workqueue != NULL; } -void -rpciod_down(void) +static void rpciod_stop(void) { - if (!atomic_dec_and_test(&rpciod_users)) - return; + struct workqueue_struct *wq = NULL; - mutex_lock(&rpciod_mutex); + if (rpciod_workqueue == NULL) + return; dprintk("RPC: destroying workqueue rpciod\n"); - if (atomic_read(&rpciod_users) == 0 && rpciod_workqueue != NULL) { - destroy_workqueue(rpciod_workqueue); - rpciod_workqueue = NULL; - } - mutex_unlock(&rpciod_mutex); + wq = rpciod_workqueue; + rpciod_workqueue = NULL; + destroy_workqueue(wq); } void rpc_destroy_mempool(void) { + rpciod_stop(); if (rpc_buffer_mempool) mempool_destroy(rpc_buffer_mempool); if (rpc_task_mempool) @@ -1048,6 +1035,8 @@ rpc_init_mempool(void) rpc_buffer_slabp); if (!rpc_buffer_mempool) goto err_nomem; + if (!rpciod_start()) + goto err_nomem; return 0; err_nomem: rpc_destroy_mempool();