From 351af0725e5222e35741011d1ea62215c1ed06db Mon Sep 17 00:00:00 2001 From: "Zhang, Yanmin" Date: Fri, 6 Aug 2010 13:39:08 +0800 Subject: [PATCH 01/29] perf, x86: Fix Intel-nhm PMU programming errata workaround Fix the Errata AAK100/AAP53/BD53 workaround, the officialy documented workaround we implemented in: 11164cd: perf, x86: Add Nehelem PMU programming errata workaround doesn't actually work fully and causes a stuck PMU state under load and non-functioning perf profiling. A functional workaround was found by trial & error. Affects all Nehalem-class Intel PMUs. Signed-off-by: Zhang Yanmin Signed-off-by: Peter Zijlstra LKML-Reference: <1281073148.2125.63.camel@ymzhang.sh.intel.com> Cc: Arjan van de Ven Cc: "H. Peter Anvin" Cc: # .35.x Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 81 ++++++++++++++++++++------ 1 file changed, 63 insertions(+), 18 deletions(-) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 214ac860ebe0..d8d86d014008 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -491,33 +491,78 @@ static void intel_pmu_enable_all(int added) * Intel Errata AAP53 (model 30) * Intel Errata BD53 (model 44) * - * These chips need to be 'reset' when adding counters by programming - * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5 - * either in sequence on the same PMC or on different PMCs. + * The official story: + * These chips need to be 'reset' when adding counters by programming the + * magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either + * in sequence on the same PMC or on different PMCs. + * + * In practise it appears some of these events do in fact count, and + * we need to programm all 4 events. */ -static void intel_pmu_nhm_enable_all(int added) +static void intel_pmu_nhm_workaround(void) { - if (added) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - int i; + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + static const unsigned long nhm_magic[4] = { + 0x4300B5, + 0x4300D2, + 0x4300B1, + 0x4300B1 + }; + struct perf_event *event; + int i; + + /* + * The Errata requires below steps: + * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL; + * 2) Configure 4 PERFEVTSELx with the magic events and clear + * the corresponding PMCx; + * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL; + * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL; + * 5) Clear 4 pairs of ERFEVTSELx and PMCx; + */ + + /* + * The real steps we choose are a little different from above. + * A) To reduce MSR operations, we don't run step 1) as they + * are already cleared before this function is called; + * B) Call x86_perf_event_update to save PMCx before configuring + * PERFEVTSELx with magic number; + * C) With step 5), we do clear only when the PERFEVTSELx is + * not used currently. + * D) Call x86_perf_event_set_period to restore PMCx; + */ - wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2); - wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1); - wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5); + /* We always operate 4 pairs of PERF Counters */ + for (i = 0; i < 4; i++) { + event = cpuc->events[i]; + if (event) + x86_perf_event_update(event); + } - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3); - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); + for (i = 0; i < 4; i++) { + wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]); + wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0); + } - for (i = 0; i < 3; i++) { - struct perf_event *event = cpuc->events[i]; + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); - if (!event) - continue; + for (i = 0; i < 4; i++) { + event = cpuc->events[i]; + if (event) { + x86_perf_event_set_period(event); __x86_pmu_enable_event(&event->hw, - ARCH_PERFMON_EVENTSEL_ENABLE); - } + ARCH_PERFMON_EVENTSEL_ENABLE); + } else + wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0); } +} + +static void intel_pmu_nhm_enable_all(int added) +{ + if (added) + intel_pmu_nhm_workaround(); intel_pmu_enable_all(added); } From 07a7795ca2e6e66d00b184efb46bd0e23d90d3fe Mon Sep 17 00:00:00 2001 From: Hans Rosenfeld Date: Wed, 18 Aug 2010 16:19:50 +0200 Subject: [PATCH 02/29] x86, cpu: Fix regression in AMD errata checking code A bug in the family-model-stepping matching code caused the presence of errata to go undetected when OSVW was not used. This causes hangs on some K8 systems because the E400 workaround is not enabled. Signed-off-by: Hans Rosenfeld LKML-Reference: <1282141190-930137-1-git-send-email-hans.rosenfeld@amd.com> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/cpu/amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 60a57b13082d..ba5f62f45f01 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -669,7 +669,7 @@ bool cpu_has_amd_erratum(const int *erratum) } /* OSVW unavailable or ID unknown, match family-model-stepping range */ - ms = (cpu->x86_model << 8) | cpu->x86_mask; + ms = (cpu->x86_model << 4) | cpu->x86_mask; while ((range = *erratum++)) if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) && (ms >= AMD_MODEL_RANGE_START(range)) && From fd89a137924e0710078c3ae855e7cec1c43cb845 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 16 Aug 2010 14:38:33 +0200 Subject: [PATCH 03/29] x86-32: Separate 1:1 pagetables from swapper_pg_dir This patch fixes machine crashes which occur when heavily exercising the CPU hotplug codepaths on a 32-bit kernel. These crashes are caused by AMD Erratum 383 and result in a fatal machine check exception. Here's the scenario: 1. On 32-bit, the swapper_pg_dir page table is used as the initial page table for booting a secondary CPU. 2. To make this work, swapper_pg_dir needs a direct mapping of physical memory in it (the low mappings). By adding those low, large page (2M) mappings (PAE kernel), we create the necessary conditions for Erratum 383 to occur. 3. Other CPUs which do not participate in the off- and onlining game may use swapper_pg_dir while the low mappings are present (when leave_mm is called). For all steps below, the CPU referred to is a CPU that is using swapper_pg_dir, and not the CPU which is being onlined. 4. The presence of the low mappings in swapper_pg_dir can result in TLB entries for addresses below __PAGE_OFFSET to be established speculatively. These TLB entries are marked global and large. 5. When the CPU with such TLB entry switches to another page table, this TLB entry remains because it is global. 6. The process then generates an access to an address covered by the above TLB entry but there is a permission mismatch - the TLB entry covers a large global page not accessible to userspace. 7. Due to this permission mismatch a new 4kb, user TLB entry gets established. Further, Erratum 383 provides for a small window of time where both TLB entries are present. This results in an uncorrectable machine check exception signalling a TLB multimatch which panics the machine. There are two ways to fix this issue: 1. Always do a global TLB flush when a new cr3 is loaded and the old page table was swapper_pg_dir. I consider this a hack hard to understand and with performance implications 2. Do not use swapper_pg_dir to boot secondary CPUs like 64-bit does. This patch implements solution 2. It introduces a trampoline_pg_dir which has the same layout as swapper_pg_dir with low_mappings. This page table is used as the initial page table of the booting CPU. Later in the bringup process, it switches to swapper_pg_dir and does a global TLB flush. This fixes the crashes in our test cases. -v2: switch to swapper_pg_dir right after entering start_secondary() so that we are able to access percpu data which might not be mapped in the trampoline page table. Signed-off-by: Joerg Roedel LKML-Reference: <20100816123833.GB28147@aftab> Signed-off-by: Borislav Petkov Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/pgtable_32.h | 1 + arch/x86/include/asm/trampoline.h | 3 +++ arch/x86/kernel/head_32.S | 8 +++++++- arch/x86/kernel/setup.c | 2 ++ arch/x86/kernel/smpboot.c | 32 +++++++++++++------------------ arch/x86/kernel/trampoline.c | 18 +++++++++++++++++ 6 files changed, 44 insertions(+), 20 deletions(-) diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 2984a25ff383..f686f49e8b7b 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -26,6 +26,7 @@ struct mm_struct; struct vm_area_struct; extern pgd_t swapper_pg_dir[1024]; +extern pgd_t trampoline_pg_dir[1024]; static inline void pgtable_cache_init(void) { } static inline void check_pgt_cache(void) { } diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h index cb507bb05d79..8f78fdf3f178 100644 --- a/arch/x86/include/asm/trampoline.h +++ b/arch/x86/include/asm/trampoline.h @@ -13,14 +13,17 @@ extern unsigned char *trampoline_base; extern unsigned long init_rsp; extern unsigned long initial_code; +extern unsigned long initial_page_table; extern unsigned long initial_gs; #define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE) extern unsigned long setup_trampoline(void); +extern void __init setup_trampoline_page_table(void); extern void __init reserve_trampoline_memory(void); #else static inline void reserve_trampoline_memory(void) {}; +extern void __init setup_trampoline_page_table(void) {}; #endif /* CONFIG_X86_TRAMPOLINE */ #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index ff4c453e13f3..fa8c1b8e09fb 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -334,7 +334,7 @@ ENTRY(startup_32_smp) /* * Enable paging */ - movl $pa(swapper_pg_dir),%eax + movl pa(initial_page_table), %eax movl %eax,%cr3 /* set the page table pointer.. */ movl %cr0,%eax orl $X86_CR0_PG,%eax @@ -614,6 +614,8 @@ ignore_int: .align 4 ENTRY(initial_code) .long i386_start_kernel +ENTRY(initial_page_table) + .long pa(swapper_pg_dir) /* * BSS section @@ -629,6 +631,10 @@ ENTRY(swapper_pg_dir) #endif swapper_pg_fixmap: .fill 1024,4,0 +#ifdef CONFIG_X86_TRAMPOLINE +ENTRY(trampoline_pg_dir) + .fill 1024,4,0 +#endif ENTRY(empty_zero_page) .fill 4096,1,0 diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b008e7883207..c3a4fbb2b996 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1014,6 +1014,8 @@ void __init setup_arch(char **cmdline_p) paging_init(); x86_init.paging.pagetable_setup_done(swapper_pg_dir); + setup_trampoline_page_table(); + tboot_probe(); #ifdef CONFIG_X86_64 diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a5e928b0cb5f..abf4a86ffc54 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -73,7 +73,6 @@ #ifdef CONFIG_X86_32 u8 apicid_2_node[MAX_APICID]; -static int low_mappings; #endif /* State of each CPU */ @@ -281,6 +280,18 @@ notrace static void __cpuinit start_secondary(void *unused) * fragile that we want to limit the things done here to the * most necessary things. */ + +#ifdef CONFIG_X86_32 + /* + * Switch away from the trampoline page-table + * + * Do this before cpu_init() because it needs to access per-cpu + * data which may not be mapped in the trampoline page-table. + */ + load_cr3(swapper_pg_dir); + __flush_tlb_all(); +#endif + vmi_bringup(); cpu_init(); preempt_disable(); @@ -299,12 +310,6 @@ notrace static void __cpuinit start_secondary(void *unused) legacy_pic->chip->unmask(0); } -#ifdef CONFIG_X86_32 - while (low_mappings) - cpu_relax(); - __flush_tlb_all(); -#endif - /* This must be done before setting cpu_online_mask */ set_cpu_sibling_map(raw_smp_processor_id()); wmb(); @@ -750,6 +755,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) #ifdef CONFIG_X86_32 /* Stack for startup_32 can be just as for start_secondary onwards */ irq_ctx_init(cpu); + initial_page_table = __pa(&trampoline_pg_dir); #else clear_tsk_thread_flag(c_idle.idle, TIF_FORK); initial_gs = per_cpu_offset(cpu); @@ -897,20 +903,8 @@ int __cpuinit native_cpu_up(unsigned int cpu) per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; -#ifdef CONFIG_X86_32 - /* init low mem mapping */ - clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, - min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); - flush_tlb_all(); - low_mappings = 1; - err = do_boot_cpu(apicid, cpu); - zap_low_mappings(false); - low_mappings = 0; -#else - err = do_boot_cpu(apicid, cpu); -#endif if (err) { pr_debug("do_boot_cpu failed %d\n", err); return -EIO; diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index c652ef62742d..a874495b3673 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c @@ -1,6 +1,7 @@ #include #include +#include #include #if defined(CONFIG_X86_64) && defined(CONFIG_ACPI_SLEEP) @@ -37,3 +38,20 @@ unsigned long __trampinit setup_trampoline(void) memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); return virt_to_phys(trampoline_base); } + +void __init setup_trampoline_page_table(void) +{ +#ifdef CONFIG_X86_32 + /* Copy kernel address range */ + clone_pgd_range(trampoline_pg_dir + KERNEL_PGD_BOUNDARY, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, + min_t(unsigned long, KERNEL_PGD_PTRS, + KERNEL_PGD_BOUNDARY)); + + /* Initialize low mappings */ + clone_pgd_range(trampoline_pg_dir, + swapper_pg_dir + KERNEL_PGD_BOUNDARY, + min_t(unsigned long, KERNEL_PGD_PTRS, + KERNEL_PGD_BOUNDARY)); +#endif +} From ecafda60e88031bcc4271c446f984ee883d69ea8 Mon Sep 17 00:00:00 2001 From: Kusanagi Kouichi Date: Wed, 18 Aug 2010 13:32:37 -0300 Subject: [PATCH 04/29] perf tools: Fix build error on read only source. Parts of the build process were generating files outside the specified O= directory, causing the build to fail on systems where the sources are in a read only file system. Fix it by using $(OUTPUT) on these locations. Also check that $(OUTPUT) actually exists, just like the top level kernel Makefile does. Otherwise the failure message emitted is completely misleading. Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <20100817140841.0859362C03A@msa106.auone-net.jp> Signed-off-by: Kusanagi Kouichi Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile | 14 ++++++++++---- tools/perf/feature-tests.mak | 2 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/tools/perf/Makefile b/tools/perf/Makefile index dcb9700b88d2..4f1fa77c1feb 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -5,6 +5,12 @@ endif # The default target of this Makefile is... all:: +ifneq ($(OUTPUT),) +# check that the output directory actually exists +OUTDIR := $(shell cd $(OUTPUT) && /bin/pwd) +$(if $(OUTDIR),, $(error output directory "$(OUTPUT)" does not exist)) +endif + # Define V=1 to have a more verbose compile. # Define V=2 to have an even more verbose compile. # @@ -931,15 +937,15 @@ $(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt) $(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@ $(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh - $(QUIET_GEN)$(RM) $@ $@+ && \ + $(QUIET_GEN)$(RM) $(OUTPUT)$@ $(OUTPUT)$@+ && \ sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \ -e 's|@SHELL_PATH@|$(SHELL_PATH_SQ)|' \ -e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \ -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \ -e 's/@@NO_CURL@@/$(NO_CURL)/g' \ - $@.sh >$@+ && \ - chmod +x $@+ && \ - mv $@+ $(OUTPUT)$@ + $@.sh > $(OUTPUT)$@+ && \ + chmod +x $(OUTPUT)$@+ && \ + mv $(OUTPUT)$@+ $(OUTPUT)$@ configure: configure.ac $(QUIET_GEN)$(RM) $@ $<+ && \ diff --git a/tools/perf/feature-tests.mak b/tools/perf/feature-tests.mak index ddb68e601f0e..7a7b60859053 100644 --- a/tools/perf/feature-tests.mak +++ b/tools/perf/feature-tests.mak @@ -113,7 +113,7 @@ endef # try-cc # Usage: option = $(call try-cc, source-to-build, cc-options) try-cc = $(shell sh -c \ - 'TMP="$(TMPOUT).$$$$"; \ + 'TMP="$(OUTPUT)$(TMPOUT).$$$$"; \ echo "$(1)" | \ $(CC) -x c - $(2) -o "$$TMP" > /dev/null 2>&1 && echo y; \ rm -f "$$TMP"') From 8848a91068c018bc91f597038a0f41462a0f88a4 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 18 Aug 2010 11:42:23 -0700 Subject: [PATCH 05/29] x86-32: Fix dummy trampoline-related inline stubs Fix dummy inline stubs for trampoline-related functions when no trampolines exist (until we get rid of the no-trampoline case entirely.) Signed-off-by: H. Peter Anvin Cc: Joerg Roedel Cc: Borislav Petkov LKML-Reference: <4C6C294D.3030404@zytor.com> --- arch/x86/include/asm/trampoline.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h index 8f78fdf3f178..4dde797c0578 100644 --- a/arch/x86/include/asm/trampoline.h +++ b/arch/x86/include/asm/trampoline.h @@ -22,8 +22,8 @@ extern unsigned long setup_trampoline(void); extern void __init setup_trampoline_page_table(void); extern void __init reserve_trampoline_memory(void); #else -static inline void reserve_trampoline_memory(void) {}; -extern void __init setup_trampoline_page_table(void) {}; +static inline void setup_trampoline_page_table(void) {} +static inline void reserve_trampoline_memory(void) {} #endif /* CONFIG_X86_TRAMPOLINE */ #endif /* __ASSEMBLY__ */ From 737480a0d525dae13306296da08029dff545bc72 Mon Sep 17 00:00:00 2001 From: KUMANO Syuhei Date: Sun, 15 Aug 2010 15:18:04 +0900 Subject: [PATCH 06/29] kprobes/x86: Fix the return address of multiple kretprobes Fix the return address of subsequent kretprobes when multiple kretprobes are set on the same function. For example: # cd /sys/kernel/debug/tracing # echo "r:event1 sys_symlink" > kprobe_events # echo "r:event2 sys_symlink" >> kprobe_events # echo 1 > events/kprobes/enable # ln -s /tmp/foo /tmp/bar (without this patch) # cat trace ln-897 [000] 20404.133727: event1: (kretprobe_trampoline+0x0/0x4c <- sys_symlink) ln-897 [000] 20404.133747: event2: (system_call_fastpath+0x16/0x1b <- sys_symlink) (with this patch) # cat trace ln-740 [000] 13799.491076: event1: (system_call_fastpath+0x16/0x1b <- sys_symlink) ln-740 [000] 13799.491096: event2: (system_call_fastpath+0x16/0x1b <- sys_symlink) Signed-off-by: KUMANO Syuhei Reviewed-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Ananth N Mavinakayanahalli Cc: Peter Zijlstra Cc: YOSHIFUJI Hideaki LKML-Reference: <1281853084.3254.11.camel@camp10-laptop> Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 1bfb6cf4dd55..770ebfb349e9 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -709,6 +709,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) struct hlist_node *node, *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; + kprobe_opcode_t *correct_ret_addr = NULL; INIT_HLIST_HEAD(&empty_rp); kretprobe_hash_lock(current, &head, &flags); @@ -740,14 +741,34 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) /* another task is sharing our hash bucket */ continue; + orig_ret_address = (unsigned long)ri->ret_addr; + + if (orig_ret_address != trampoline_address) + /* + * This is the real return address. Any other + * instances associated with this task are for + * other calls deeper on the call stack + */ + break; + } + + kretprobe_assert(ri, orig_ret_address, trampoline_address); + + correct_ret_addr = ri->ret_addr; + hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + if (ri->task != current) + /* another task is sharing our hash bucket */ + continue; + + orig_ret_address = (unsigned long)ri->ret_addr; if (ri->rp && ri->rp->handler) { __get_cpu_var(current_kprobe) = &ri->rp->kp; get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; + ri->ret_addr = correct_ret_addr; ri->rp->handler(ri, regs); __get_cpu_var(current_kprobe) = NULL; } - orig_ret_address = (unsigned long)ri->ret_addr; recycle_rp_inst(ri, &empty_rp); if (orig_ret_address != trampoline_address) @@ -759,8 +780,6 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) break; } - kretprobe_assert(ri, orig_ret_address, trampoline_address); - kretprobe_hash_unlock(current, &flags); hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { From d7c53c9e822a4fefa13a0cae76f3190bfd0d5c11 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 19 Aug 2010 20:10:29 +0200 Subject: [PATCH 07/29] x86, hotplug: Serialize CPU hotplug to avoid bringup concurrency issues When testing cpu hotplug code on 32-bit we kept hitting the "CPU%d: Stuck ??" message due to multiple cores concurrently accessing the cpu_callin_mask, among others. Since these codepaths are not protected from concurrent access due to the fact that there's no sane reason for making an already complex code unnecessarily more complex - we hit the issue only when insanely switching cores off- and online - serialize hotplugging cores on the sysfs level and be done with it. [ v2.1: fix !HOTPLUG_CPU build ] Cc: Signed-off-by: Borislav Petkov LKML-Reference: <20100819181029.GC17171@aftab> Signed-off-by: H. Peter Anvin --- arch/x86/Kconfig | 5 +++++ arch/x86/kernel/smpboot.c | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a84fc34c8f77..ac7827fc0823 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -245,6 +245,11 @@ config ARCH_HWEIGHT_CFLAGS config KTIME_SCALAR def_bool X86_32 + +config ARCH_CPU_PROBE_RELEASE + def_bool y + depends on HOTPLUG_CPU + source "init/Kconfig" source "kernel/Kconfig.freezer" diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index abf4a86ffc54..8b3bfc4dd708 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -90,6 +90,25 @@ DEFINE_PER_CPU(int, cpu_state) = { 0 }; static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); #define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) #define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p)) + +/* + * We need this for trampoline_base protection from concurrent accesses when + * off- and onlining cores wildly. + */ +static DEFINE_MUTEX(x86_cpu_hotplug_driver_mutex); + +void cpu_hotplug_driver_lock() +{ + mutex_lock(&x86_cpu_hotplug_driver_mutex); +} + +void cpu_hotplug_driver_unlock() +{ + mutex_unlock(&x86_cpu_hotplug_driver_mutex); +} + +ssize_t arch_cpu_probe(const char *buf, size_t count) { return -1; } +ssize_t arch_cpu_release(const char *buf, size_t count) { return -1; } #else static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; #define get_idle_for_cpu(x) (idle_thread_array[(x)]) From 05e407603e527f9d808dd3866d3a17c2ce4dfcc5 Mon Sep 17 00:00:00 2001 From: Daniel Kiper Date: Fri, 20 Aug 2010 00:46:16 +0200 Subject: [PATCH 08/29] x86, apic: Fix apic=debug boot crash Fix a boot crash when apic=debug is used and the APIC is not properly initialized. This issue appears during Xen Dom0 kernel boot but the fix is generic and the crash could occur on real hardware as well. Signed-off-by: Daniel Kiper Cc: xen-devel@lists.xensource.com Cc: konrad.wilk@oracle.com Cc: jeremy@goop.org Cc: # .35.x, .34.x, .33.x, .32.x LKML-Reference: <20100819224616.GB9967@router-fw-old.local.net-space.pl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 4dc0084ec1b1..f1efebaf5510 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1728,6 +1728,8 @@ __apicdebuginit(void) print_IO_APIC(void) struct irq_pin_list *entry; cfg = desc->chip_data; + if (!cfg) + continue; entry = cfg->irq_2_pin; if (!entry) continue; From c81476df1b4241aefba4ff83a7701b3a926bd7ce Mon Sep 17 00:00:00 2001 From: Ondrej Zary Date: Thu, 19 Aug 2010 14:13:25 -0700 Subject: [PATCH 09/29] matroxfb: fix incorrect use of memcpy_toio() Screen is completely corrupted since 2.6.34. Bisection revealed that it's caused by commit 6175ddf06b61720 ("x86: Clean up mem*io functions."). H. Peter Anvin explained that memcpy_toio() does not copy data in 32bit chunks anymore on x86. Signed-off-by: Ondrej Zary Cc: Brian Gerst Cc: H. Peter Anvin Cc: Petr Vandrovec Cc: Jean Delvare Cc: [2.6.34.x, 2.6.35.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/matrox/matroxfb_base.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/video/matrox/matroxfb_base.h b/drivers/video/matrox/matroxfb_base.h index f3a4e15672d9..f96a471cb1a8 100644 --- a/drivers/video/matrox/matroxfb_base.h +++ b/drivers/video/matrox/matroxfb_base.h @@ -151,13 +151,13 @@ static inline void mga_writel(vaddr_t va, unsigned int offs, u_int32_t value) { static inline void mga_memcpy_toio(vaddr_t va, const void* src, int len) { #if defined(__alpha__) || defined(__i386__) || defined(__x86_64__) /* - * memcpy_toio works for us if: + * iowrite32_rep works for us if: * (1) Copies data as 32bit quantities, not byte after byte, * (2) Performs LE ordered stores, and * (3) It copes with unaligned source (destination is guaranteed to be page * aligned and length is guaranteed to be multiple of 4). */ - memcpy_toio(va.vaddr, src, len); + iowrite32_rep(va.vaddr, src, len >> 2); #else u_int32_t __iomem* addr = va.vaddr; From b35de43b31040828f83046f40fd34ba33146409d Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Thu, 19 Aug 2010 14:13:27 -0700 Subject: [PATCH 10/29] kfifo: implement missing __kfifo_skip_r() kfifo_skip() is currently broken, due to the missing of the internal helper function. Add it. Signed-off-by: Andrea Righi Cc: Greg KH Acked-by: Stefani Seibold Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kfifo.h | 2 ++ kernel/kfifo.c | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 311f8753d713..4aa95f203f3e 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -836,6 +836,8 @@ extern void __kfifo_dma_out_finish_r(struct __kfifo *fifo, size_t recsize); extern unsigned int __kfifo_len_r(struct __kfifo *fifo, size_t recsize); +extern void __kfifo_skip_r(struct __kfifo *fifo, size_t recsize); + extern unsigned int __kfifo_out_peek_r(struct __kfifo *fifo, void *buf, unsigned int len, size_t recsize); diff --git a/kernel/kfifo.c b/kernel/kfifo.c index 4502604ecadf..6b5580c57644 100644 --- a/kernel/kfifo.c +++ b/kernel/kfifo.c @@ -503,6 +503,15 @@ unsigned int __kfifo_out_r(struct __kfifo *fifo, void *buf, } EXPORT_SYMBOL(__kfifo_out_r); +void __kfifo_skip_r(struct __kfifo *fifo, size_t recsize) +{ + unsigned int n; + + n = __kfifo_peek_n(fifo, recsize); + fifo->out += n + recsize; +} +EXPORT_SYMBOL(__kfifo_skip_r); + int __kfifo_from_user_r(struct __kfifo *fifo, const void __user *from, unsigned long len, unsigned int *copied, size_t recsize) { From 5ddf83912c8b49a24ab0841f6d77f33781dcf10f Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Thu, 19 Aug 2010 14:13:28 -0700 Subject: [PATCH 11/29] kfifo: add kfifo_skip() testcase Add a testcase for kfifo_skip() to the byte stream fifo example. Signed-off-by: Andrea Righi Cc: Greg KH Acked-by: Stefani Seibold Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- samples/kfifo/bytestream-example.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/samples/kfifo/bytestream-example.c b/samples/kfifo/bytestream-example.c index 642eef3f6336..2e3a7a8128a2 100644 --- a/samples/kfifo/bytestream-example.c +++ b/samples/kfifo/bytestream-example.c @@ -73,6 +73,10 @@ static int __init testfunc(void) ret = kfifo_in(&test, buf, ret); printk(KERN_INFO "ret: %d\n", ret); + /* skip first element of the fifo */ + printk(KERN_INFO "skip 1st element\n"); + kfifo_skip(&test); + /* put values into the fifo until is full */ for (i = 20; kfifo_put(&test, &i); i++) ; From 2aaf2092c168fc02df0645415f524b357ee7ec2e Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Thu, 19 Aug 2010 14:13:29 -0700 Subject: [PATCH 12/29] kfifo: add explicit error checking in byte stream example Provide a static array of expected items that kfifo should contain at the end of the test to validate it. Signed-off-by: Andrea Righi Cc: Stefani Seibold Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- samples/kfifo/bytestream-example.c | 33 ++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/samples/kfifo/bytestream-example.c b/samples/kfifo/bytestream-example.c index 2e3a7a8128a2..a94e6948b30d 100644 --- a/samples/kfifo/bytestream-example.c +++ b/samples/kfifo/bytestream-example.c @@ -44,10 +44,17 @@ static struct kfifo test; static DECLARE_KFIFO(test, unsigned char, FIFO_SIZE); #endif +static unsigned char expected_result[FIFO_SIZE] = { + 3, 4, 5, 6, 7, 8, 9, 0, + 1, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, +}; + static int __init testfunc(void) { unsigned char buf[6]; - unsigned char i; + unsigned char i, j; unsigned int ret; printk(KERN_INFO "byte stream fifo test start\n"); @@ -83,10 +90,19 @@ static int __init testfunc(void) printk(KERN_INFO "queue len: %u\n", kfifo_len(&test)); - /* print out all values in the fifo */ - while (kfifo_get(&test, &i)) - printk("%d ", i); - printk("\n"); + /* check the correctness of all values in the fifo */ + j = 0; + while (kfifo_get(&test, &i)) { + if (i != expected_result[j++]) { + printk(KERN_WARNING "value mismatch: test failed\n"); + return -EIO; + } + } + if (j != ARRAY_SIZE(expected_result)) { + printk(KERN_WARNING "size mismatch: test failed\n"); + return -EIO; + } + printk(KERN_INFO "test passed\n"); return 0; } @@ -142,7 +158,12 @@ static int __init example_init(void) #else INIT_KFIFO(test); #endif - testfunc(); + if (testfunc() < 0) { +#ifdef DYNAMIC + kfifo_free(&test); +#endif + return -EIO; + } if (proc_create(PROC_FIFO, 0, NULL, &fifo_fops) == NULL) { #ifdef DYNAMIC From 7b34d5257a90c419d67c1c3b52f87a679845ef1e Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Thu, 19 Aug 2010 14:13:29 -0700 Subject: [PATCH 13/29] kfifo: fix kernel BUG in dma example The scatterlist is used uninitialized in kfifo_dma_in_prepare(). This triggers the following bug if CONFIG_DEBUG_SG=y: ------------[ cut here ]------------ kernel BUG at include/linux/scatterlist.h:65! invalid opcode: 0000 [#1] PREEMPT SMP ... Call Trace: [] setup_sgl+0x6b/0xe0 [] ? example_init+0x0/0x265 [dma_example] [] __kfifo_dma_in_prepare+0x21/0x30 [] example_init+0x124/0x265 [dma_example] [] ? trace_module_notify+0x25/0x370 [] ? free_pages_prepare+0x11e/0x1e0 [] ? get_parent_ip+0x11/0x50 [] ? trace_module_notify+0x25/0x370 [] ? trace_hardirqs_on+0xd/0x10 [] ? mutex_unlock+0xe/0x10 [] ? trace_module_notify+0x41/0x370 [] ? __blocking_notifier_call_chain+0x45/0x80 [] ? vfree+0x2a/0x30 [] ? up_read+0x23/0x40 [] ? __blocking_notifier_call_chain+0x65/0x80 [] do_one_initcall+0x43/0x180 [] sys_init_module+0xba/0x200 [] system_call_fastpath+0x16/0x1b RIP [] setup_sgl_buf+0x1a1/0x1b0 RSP ---[ end trace a72b979fd3c1d3a5 ]--- Add the proper initialization to avoid the bug. Signed-off-by: Andrea Righi Acked-by: Stefani Seibold Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- samples/kfifo/dma-example.c | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/kfifo/dma-example.c b/samples/kfifo/dma-example.c index b9482c28b41a..03433ca5bdad 100644 --- a/samples/kfifo/dma-example.c +++ b/samples/kfifo/dma-example.c @@ -45,6 +45,7 @@ static int __init example_init(void) printk(KERN_INFO "queue len: %u\n", kfifo_len(&fifo)); + sg_init_table(sg, ARRAY_SIZE(sg)); ret = kfifo_dma_in_prepare(&fifo, sg, ARRAY_SIZE(sg), FIFO_SIZE); printk(KERN_INFO "DMA sgl entries: %d\n", ret); From d83a71c4219191a9a881318ae5ca9b39aa1d0540 Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Thu, 19 Aug 2010 14:13:30 -0700 Subject: [PATCH 14/29] kfifo: fix a memory leak in dma example We use a dynamically allocated kfifo in the dma example, so we need to free it when unloading the module. Signed-off-by: Andrea Righi Acked-by: Stefani Seibold Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- samples/kfifo/dma-example.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/samples/kfifo/dma-example.c b/samples/kfifo/dma-example.c index 03433ca5bdad..3682278785f7 100644 --- a/samples/kfifo/dma-example.c +++ b/samples/kfifo/dma-example.c @@ -105,9 +105,7 @@ static int __init example_init(void) static void __exit example_exit(void) { -#ifdef DYNAMIC - kfifo_free(&test); -#endif + kfifo_free(&fifo); } module_init(example_init); From a25effa4d265eb5028c7d4a92a0ddd9267c3c43d Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Thu, 19 Aug 2010 14:13:30 -0700 Subject: [PATCH 15/29] kfifo: add explicit error checking in all the examples Provide a check in all the kfifo examples to validate the correct execution of each testcase. Signed-off-by: Andrea Righi Acked-by: Stefani Seibold Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- samples/kfifo/bytestream-example.c | 7 +- samples/kfifo/dma-example.c | 106 ++++++++++++++++++----------- samples/kfifo/inttype-example.c | 43 +++++++++--- samples/kfifo/record-example.c | 39 ++++++++++- 4 files changed, 144 insertions(+), 51 deletions(-) diff --git a/samples/kfifo/bytestream-example.c b/samples/kfifo/bytestream-example.c index a94e6948b30d..178061e87ffe 100644 --- a/samples/kfifo/bytestream-example.c +++ b/samples/kfifo/bytestream-example.c @@ -44,7 +44,7 @@ static struct kfifo test; static DECLARE_KFIFO(test, unsigned char, FIFO_SIZE); #endif -static unsigned char expected_result[FIFO_SIZE] = { +static const unsigned char expected_result[FIFO_SIZE] = { 3, 4, 5, 6, 7, 8, 9, 0, 1, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, @@ -90,9 +90,14 @@ static int __init testfunc(void) printk(KERN_INFO "queue len: %u\n", kfifo_len(&test)); + /* show the first value without removing from the fifo */ + if (kfifo_peek(&test, &i)) + printk(KERN_INFO "%d\n", i); + /* check the correctness of all values in the fifo */ j = 0; while (kfifo_get(&test, &i)) { + printk(KERN_INFO "item = %d\n", i); if (i != expected_result[j++]) { printk(KERN_WARNING "value mismatch: test failed\n"); return -EIO; diff --git a/samples/kfifo/dma-example.c b/samples/kfifo/dma-example.c index 3682278785f7..ee03a4f0b64f 100644 --- a/samples/kfifo/dma-example.c +++ b/samples/kfifo/dma-example.c @@ -29,8 +29,8 @@ static int __init example_init(void) printk(KERN_INFO "DMA fifo test start\n"); if (kfifo_alloc(&fifo, FIFO_SIZE, GFP_KERNEL)) { - printk(KERN_ERR "error kfifo_alloc\n"); - return 1; + printk(KERN_WARNING "error kfifo_alloc\n"); + return -ENOMEM; } printk(KERN_INFO "queue size: %u\n", kfifo_size(&fifo)); @@ -41,65 +41,93 @@ static int __init example_init(void) kfifo_put(&fifo, &i); /* kick away first byte */ - ret = kfifo_get(&fifo, &i); + kfifo_skip(&fifo); printk(KERN_INFO "queue len: %u\n", kfifo_len(&fifo)); + /* + * Configure the kfifo buffer to receive data from DMA input. + * + * .--------------------------------------. + * | 0 | 1 | 2 | ... | 12 | 13 | ... | 31 | + * |---|------------------|---------------| + * \_/ \________________/ \_____________/ + * \ \ \ + * \ \_allocated data \ + * \_*free space* \_*free space* + * + * We need two different SG entries: one for the free space area at the + * end of the kfifo buffer (19 bytes) and another for the first free + * byte at the beginning, after the kfifo_skip(). + */ sg_init_table(sg, ARRAY_SIZE(sg)); ret = kfifo_dma_in_prepare(&fifo, sg, ARRAY_SIZE(sg), FIFO_SIZE); printk(KERN_INFO "DMA sgl entries: %d\n", ret); + if (!ret) { + /* fifo is full and no sgl was created */ + printk(KERN_WARNING "error kfifo_dma_in_prepare\n"); + return -EIO; + } - /* if 0 was returned, fifo is full and no sgl was created */ - if (ret) { - printk(KERN_INFO "scatterlist for receive:\n"); - for (i = 0; i < ARRAY_SIZE(sg); i++) { - printk(KERN_INFO - "sg[%d] -> " - "page_link 0x%.8lx offset 0x%.8x length 0x%.8x\n", - i, sg[i].page_link, sg[i].offset, sg[i].length); + /* receive data */ + printk(KERN_INFO "scatterlist for receive:\n"); + for (i = 0; i < ARRAY_SIZE(sg); i++) { + printk(KERN_INFO + "sg[%d] -> " + "page_link 0x%.8lx offset 0x%.8x length 0x%.8x\n", + i, sg[i].page_link, sg[i].offset, sg[i].length); - if (sg_is_last(&sg[i])) - break; - } + if (sg_is_last(&sg[i])) + break; + } - /* but here your code to setup and exectute the dma operation */ - /* ... */ + /* put here your code to setup and exectute the dma operation */ + /* ... */ - /* example: zero bytes received */ - ret = 0; + /* example: zero bytes received */ + ret = 0; - /* finish the dma operation and update the received data */ - kfifo_dma_in_finish(&fifo, ret); - } + /* finish the dma operation and update the received data */ + kfifo_dma_in_finish(&fifo, ret); + /* Prepare to transmit data, example: 8 bytes */ ret = kfifo_dma_out_prepare(&fifo, sg, ARRAY_SIZE(sg), 8); printk(KERN_INFO "DMA sgl entries: %d\n", ret); + if (!ret) { + /* no data was available and no sgl was created */ + printk(KERN_WARNING "error kfifo_dma_out_prepare\n"); + return -EIO; + } - /* if 0 was returned, no data was available and no sgl was created */ - if (ret) { - printk(KERN_INFO "scatterlist for transmit:\n"); - for (i = 0; i < ARRAY_SIZE(sg); i++) { - printk(KERN_INFO - "sg[%d] -> " - "page_link 0x%.8lx offset 0x%.8x length 0x%.8x\n", - i, sg[i].page_link, sg[i].offset, sg[i].length); + printk(KERN_INFO "scatterlist for transmit:\n"); + for (i = 0; i < ARRAY_SIZE(sg); i++) { + printk(KERN_INFO + "sg[%d] -> " + "page_link 0x%.8lx offset 0x%.8x length 0x%.8x\n", + i, sg[i].page_link, sg[i].offset, sg[i].length); - if (sg_is_last(&sg[i])) - break; - } + if (sg_is_last(&sg[i])) + break; + } - /* but here your code to setup and exectute the dma operation */ - /* ... */ + /* put here your code to setup and exectute the dma operation */ + /* ... */ - /* example: 5 bytes transmitted */ - ret = 5; + /* example: 5 bytes transmitted */ + ret = 5; - /* finish the dma operation and update the transmitted data */ - kfifo_dma_out_finish(&fifo, ret); - } + /* finish the dma operation and update the transmitted data */ + kfifo_dma_out_finish(&fifo, ret); + ret = kfifo_len(&fifo); printk(KERN_INFO "queue len: %u\n", kfifo_len(&fifo)); + if (ret != 7) { + printk(KERN_WARNING "size mismatch: test failed"); + return -EIO; + } + printk(KERN_INFO "test passed\n"); + return 0; } diff --git a/samples/kfifo/inttype-example.c b/samples/kfifo/inttype-example.c index d6c5b7d9df64..71b2aabca96a 100644 --- a/samples/kfifo/inttype-example.c +++ b/samples/kfifo/inttype-example.c @@ -44,10 +44,17 @@ static DECLARE_KFIFO_PTR(test, int); static DEFINE_KFIFO(test, int, FIFO_SIZE); #endif +static const int expected_result[FIFO_SIZE] = { + 3, 4, 5, 6, 7, 8, 9, 0, + 1, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 36, 37, 38, 39, 40, 41, 42, +}; + static int __init testfunc(void) { int buf[6]; - int i; + int i, j; unsigned int ret; printk(KERN_INFO "int fifo test start\n"); @@ -66,8 +73,13 @@ static int __init testfunc(void) ret = kfifo_in(&test, buf, ret); printk(KERN_INFO "ret: %d\n", ret); - for (i = 20; i != 30; i++) - kfifo_put(&test, &i); + /* skip first element of the fifo */ + printk(KERN_INFO "skip 1st element\n"); + kfifo_skip(&test); + + /* put values into the fifo until is full */ + for (i = 20; kfifo_put(&test, &i); i++) + ; printk(KERN_INFO "queue len: %u\n", kfifo_len(&test)); @@ -75,10 +87,20 @@ static int __init testfunc(void) if (kfifo_peek(&test, &i)) printk(KERN_INFO "%d\n", i); - /* print out all values in the fifo */ - while (kfifo_get(&test, &i)) - printk("%d ", i); - printk("\n"); + /* check the correctness of all values in the fifo */ + j = 0; + while (kfifo_get(&test, &i)) { + printk(KERN_INFO "item = %d\n", i); + if (i != expected_result[j++]) { + printk(KERN_WARNING "value mismatch: test failed\n"); + return -EIO; + } + } + if (j != ARRAY_SIZE(expected_result)) { + printk(KERN_WARNING "size mismatch: test failed\n"); + return -EIO; + } + printk(KERN_INFO "test passed\n"); return 0; } @@ -132,7 +154,12 @@ static int __init example_init(void) return ret; } #endif - testfunc(); + if (testfunc() < 0) { +#ifdef DYNAMIC + kfifo_free(&test); +#endif + return -EIO; + } if (proc_create(PROC_FIFO, 0, NULL, &fifo_fops) == NULL) { #ifdef DYNAMIC diff --git a/samples/kfifo/record-example.c b/samples/kfifo/record-example.c index 32c6e0bda744..e68bd16a5da4 100644 --- a/samples/kfifo/record-example.c +++ b/samples/kfifo/record-example.c @@ -55,6 +55,19 @@ typedef STRUCT_KFIFO_REC_1(FIFO_SIZE) mytest; static mytest test; #endif +static const char *expected_result[] = { + "a", + "bb", + "ccc", + "dddd", + "eeeee", + "ffffff", + "ggggggg", + "hhhhhhhh", + "iiiiiiiii", + "jjjjjjjjjj", +}; + static int __init testfunc(void) { char buf[100]; @@ -75,6 +88,10 @@ static int __init testfunc(void) kfifo_in(&test, buf, i + 1); } + /* skip first element of the fifo */ + printk(KERN_INFO "skip 1st element\n"); + kfifo_skip(&test); + printk(KERN_INFO "fifo len: %u\n", kfifo_len(&test)); /* show the first record without removing from the fifo */ @@ -82,11 +99,22 @@ static int __init testfunc(void) if (ret) printk(KERN_INFO "%.*s\n", ret, buf); - /* print out all records in the fifo */ + /* check the correctness of all values in the fifo */ + i = 0; while (!kfifo_is_empty(&test)) { ret = kfifo_out(&test, buf, sizeof(buf)); - printk(KERN_INFO "%.*s\n", ret, buf); + buf[ret] = '\0'; + printk(KERN_INFO "item = %.*s\n", ret, buf); + if (strcmp(buf, expected_result[i++])) { + printk(KERN_WARNING "value mismatch: test failed\n"); + return -EIO; + } + } + if (i != ARRAY_SIZE(expected_result)) { + printk(KERN_WARNING "size mismatch: test failed\n"); + return -EIO; } + printk(KERN_INFO "test passed\n"); return 0; } @@ -142,7 +170,12 @@ static int __init example_init(void) #else INIT_KFIFO(test); #endif - testfunc(); + if (testfunc() < 0) { +#ifdef DYNAMIC + kfifo_free(&test); +#endif + return -EIO; + } if (proc_create(PROC_FIFO, 0, NULL, &fifo_fops) == NULL) { #ifdef DYNAMIC From f2e41e910320197d55b52e28d99a07130f2ae738 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 19 Aug 2010 14:13:31 -0700 Subject: [PATCH 16/29] revert "hwmon: f71882fg: add support for the Fintek F71808E" Revert commit 7721fea3d0fd93fb4d000eb737b444369358d6d3 ("hwmon: f71882fg: add support for the Fintek F71808E"). Hans said: : A second review after I've received a data sheet for this device from : Fintek has turned up a few bugs. : : Unfortunately Giel (nor I) have time to fix this in time for the 2.6.36 : cycle. Therefor I would like to see this patch reverted as not having any : support for the hwmon function of this superio chip is better then having : unreliable support. Cc: Giel van Schijndel Cc: Jean Delvare Cc: Hans de Goede Cc: Jonathan Cameron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/hwmon/f71882fg | 4 -- drivers/hwmon/Kconfig | 6 +-- drivers/hwmon/f71882fg.c | 83 ++++-------------------------------- 3 files changed, 11 insertions(+), 82 deletions(-) diff --git a/Documentation/hwmon/f71882fg b/Documentation/hwmon/f71882fg index 1a07fd674cd0..a7952c2bd959 100644 --- a/Documentation/hwmon/f71882fg +++ b/Documentation/hwmon/f71882fg @@ -2,10 +2,6 @@ Kernel driver f71882fg ====================== Supported chips: - * Fintek F71808E - Prefix: 'f71808fg' - Addresses scanned: none, address read from Super I/O config space - Datasheet: Not public * Fintek F71858FG Prefix: 'f71858fg' Addresses scanned: none, address read from Super I/O config space diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index 0fba82943125..4d4d09bdec0a 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -332,11 +332,11 @@ config SENSORS_F71805F will be called f71805f. config SENSORS_F71882FG - tristate "Fintek F71808E, F71858FG, F71862FG, F71882FG, F71889FG and F8000" + tristate "Fintek F71858FG, F71862FG, F71882FG, F71889FG and F8000" depends on EXPERIMENTAL help - If you say yes here you get support for hardware monitoring features - of the Fintek F71808E, F71858FG, F71862FG/71863FG, F71882FG/F71883FG, + If you say yes here you get support for hardware monitoring + features of the Fintek F71858FG, F71862FG/71863FG, F71882FG/F71883FG, F71889FG and F8000 Super-I/O chips. This driver can also be built as a module. If so, the module diff --git a/drivers/hwmon/f71882fg.c b/drivers/hwmon/f71882fg.c index 6207120dcd4d..537841ef44b9 100644 --- a/drivers/hwmon/f71882fg.c +++ b/drivers/hwmon/f71882fg.c @@ -45,7 +45,6 @@ #define SIO_REG_ADDR 0x60 /* Logical device address (2 bytes) */ #define SIO_FINTEK_ID 0x1934 /* Manufacturers ID */ -#define SIO_F71808_ID 0x0901 /* Chipset ID */ #define SIO_F71858_ID 0x0507 /* Chipset ID */ #define SIO_F71862_ID 0x0601 /* Chipset ID */ #define SIO_F71882_ID 0x0541 /* Chipset ID */ @@ -97,10 +96,9 @@ static unsigned short force_id; module_param(force_id, ushort, 0); MODULE_PARM_DESC(force_id, "Override the detected device ID"); -enum chips { f71808fg, f71858fg, f71862fg, f71882fg, f71889fg, f8000 }; +enum chips { f71858fg, f71862fg, f71882fg, f71889fg, f8000 }; static const char *f71882fg_names[] = { - "f71808fg", "f71858fg", "f71862fg", "f71882fg", @@ -308,8 +306,8 @@ static struct sensor_device_attribute_2 f71858fg_in_temp_attr[] = { SENSOR_ATTR_2(temp3_fault, S_IRUGO, show_temp_fault, NULL, 0, 2), }; -/* In attr common to the f71862fg, f71882fg and f71889fg */ -static struct sensor_device_attribute_2 fxxxx_in_attr[] = { +/* Temp and in attr common to the f71862fg, f71882fg and f71889fg */ +static struct sensor_device_attribute_2 fxxxx_in_temp_attr[] = { SENSOR_ATTR_2(in0_input, S_IRUGO, show_in, NULL, 0, 0), SENSOR_ATTR_2(in1_input, S_IRUGO, show_in, NULL, 0, 1), SENSOR_ATTR_2(in2_input, S_IRUGO, show_in, NULL, 0, 2), @@ -319,22 +317,6 @@ static struct sensor_device_attribute_2 fxxxx_in_attr[] = { SENSOR_ATTR_2(in6_input, S_IRUGO, show_in, NULL, 0, 6), SENSOR_ATTR_2(in7_input, S_IRUGO, show_in, NULL, 0, 7), SENSOR_ATTR_2(in8_input, S_IRUGO, show_in, NULL, 0, 8), -}; - -/* In attr for the f71808fg */ -static struct sensor_device_attribute_2 f71808_in_attr[] = { - SENSOR_ATTR_2(in0_input, S_IRUGO, show_in, NULL, 0, 0), - SENSOR_ATTR_2(in1_input, S_IRUGO, show_in, NULL, 0, 1), - SENSOR_ATTR_2(in2_input, S_IRUGO, show_in, NULL, 0, 2), - SENSOR_ATTR_2(in3_input, S_IRUGO, show_in, NULL, 0, 3), - SENSOR_ATTR_2(in4_input, S_IRUGO, show_in, NULL, 0, 4), - SENSOR_ATTR_2(in5_input, S_IRUGO, show_in, NULL, 0, 5), - SENSOR_ATTR_2(in6_input, S_IRUGO, show_in, NULL, 0, 7), - SENSOR_ATTR_2(in7_input, S_IRUGO, show_in, NULL, 0, 8), -}; - -/* Temp attr common to the f71808fg, f71862fg, f71882fg and f71889fg */ -static struct sensor_device_attribute_2 fxxxx_temp_attr[] = { SENSOR_ATTR_2(temp1_input, S_IRUGO, show_temp, NULL, 0, 1), SENSOR_ATTR_2(temp1_max, S_IRUGO|S_IWUSR, show_temp_max, store_temp_max, 0, 1), @@ -373,10 +355,6 @@ static struct sensor_device_attribute_2 fxxxx_temp_attr[] = { store_temp_beep, 0, 6), SENSOR_ATTR_2(temp2_type, S_IRUGO, show_temp_type, NULL, 0, 2), SENSOR_ATTR_2(temp2_fault, S_IRUGO, show_temp_fault, NULL, 0, 2), -}; - -/* Temp and in attr common to the f71862fg, f71882fg and f71889fg */ -static struct sensor_device_attribute_2 f71862_temp_attr[] = { SENSOR_ATTR_2(temp3_input, S_IRUGO, show_temp, NULL, 0, 3), SENSOR_ATTR_2(temp3_max, S_IRUGO|S_IWUSR, show_temp_max, store_temp_max, 0, 3), @@ -1011,11 +989,6 @@ static struct f71882fg_data *f71882fg_update_device(struct device *dev) data->temp_type[1] = 6; break; } - } else if (data->type == f71808fg) { - reg = f71882fg_read8(data, F71882FG_REG_TEMP_TYPE); - data->temp_type[1] = (reg & 0x02) ? 2 : 4; - data->temp_type[2] = (reg & 0x04) ? 2 : 4; - } else { reg2 = f71882fg_read8(data, F71882FG_REG_PECI); if ((reg2 & 0x03) == 0x01) @@ -1898,8 +1871,7 @@ static ssize_t store_pwm_auto_point_temp(struct device *dev, val /= 1000; - if (data->type == f71889fg - || data->type == f71808fg) + if (data->type == f71889fg) val = SENSORS_LIMIT(val, -128, 127); else val = SENSORS_LIMIT(val, 0, 127); @@ -2002,28 +1974,8 @@ static int __devinit f71882fg_probe(struct platform_device *pdev) /* fall through! */ case f71862fg: err = f71882fg_create_sysfs_files(pdev, - f71862_temp_attr, - ARRAY_SIZE(f71862_temp_attr)); - if (err) - goto exit_unregister_sysfs; - err = f71882fg_create_sysfs_files(pdev, - fxxxx_in_attr, - ARRAY_SIZE(fxxxx_in_attr)); - if (err) - goto exit_unregister_sysfs; - err = f71882fg_create_sysfs_files(pdev, - fxxxx_temp_attr, - ARRAY_SIZE(fxxxx_temp_attr)); - break; - case f71808fg: - err = f71882fg_create_sysfs_files(pdev, - f71808_in_attr, - ARRAY_SIZE(f71808_in_attr)); - if (err) - goto exit_unregister_sysfs; - err = f71882fg_create_sysfs_files(pdev, - fxxxx_temp_attr, - ARRAY_SIZE(fxxxx_temp_attr)); + fxxxx_in_temp_attr, + ARRAY_SIZE(fxxxx_in_temp_attr)); break; case f8000: err = f71882fg_create_sysfs_files(pdev, @@ -2050,7 +2002,6 @@ static int __devinit f71882fg_probe(struct platform_device *pdev) case f71862fg: err = (data->pwm_enable & 0x15) != 0x15; break; - case f71808fg: case f71882fg: case f71889fg: err = 0; @@ -2096,7 +2047,6 @@ static int __devinit f71882fg_probe(struct platform_device *pdev) f8000_auto_pwm_attr, ARRAY_SIZE(f8000_auto_pwm_attr)); break; - case f71808fg: case f71889fg: for (i = 0; i < nr_fans; i++) { data->pwm_auto_point_mapping[i] = @@ -2176,22 +2126,8 @@ static int f71882fg_remove(struct platform_device *pdev) /* fall through! */ case f71862fg: f71882fg_remove_sysfs_files(pdev, - f71862_temp_attr, - ARRAY_SIZE(f71862_temp_attr)); - f71882fg_remove_sysfs_files(pdev, - fxxxx_in_attr, - ARRAY_SIZE(fxxxx_in_attr)); - f71882fg_remove_sysfs_files(pdev, - fxxxx_temp_attr, - ARRAY_SIZE(fxxxx_temp_attr)); - break; - case f71808fg: - f71882fg_remove_sysfs_files(pdev, - f71808_in_attr, - ARRAY_SIZE(f71808_in_attr)); - f71882fg_remove_sysfs_files(pdev, - fxxxx_temp_attr, - ARRAY_SIZE(fxxxx_temp_attr)); + fxxxx_in_temp_attr, + ARRAY_SIZE(fxxxx_in_temp_attr)); break; case f8000: f71882fg_remove_sysfs_files(pdev, @@ -2259,9 +2195,6 @@ static int __init f71882fg_find(int sioaddr, unsigned short *address, devid = force_id ? force_id : superio_inw(sioaddr, SIO_REG_DEVID); switch (devid) { - case SIO_F71808_ID: - sio_data->type = f71808fg; - break; case SIO_F71858_ID: sio_data->type = f71858fg; break; From d5ed3a4af77b851b6271ad3d9abc4c57fa3ce0f5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 19 Aug 2010 14:13:33 -0700 Subject: [PATCH 17/29] lib/radix-tree.c: fix overflow in radix_tree_range_tag_if_tagged() When radix_tree_maxindex() is ~0UL, it can happen that scanning overflows index and tree traversal code goes astray reading memory until it hits unreadable memory. Check for overflow and exit in that case. Signed-off-by: Jan Kara Cc: Christoph Hellwig Cc: Nick Piggin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 5 ++++- mm/page-writeback.c | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/radix-tree.c b/lib/radix-tree.c index e907858498a6..5b7d4623f0b7 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -625,6 +625,8 @@ EXPORT_SYMBOL(radix_tree_tag_get); * * The function returns number of leaves where the tag was set and sets * *first_indexp to the first unscanned index. + * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must + * be prepared to handle that. */ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, unsigned long *first_indexp, unsigned long last_index, @@ -675,7 +677,8 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, next: /* Go to next item at level determined by 'shift' */ index = ((index >> shift) + 1) << shift; - if (index > last_index) + /* Overflow can happen when last_index is ~0UL... */ + if (index > last_index || !index) break; if (tagged >= nr_to_tag) break; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 7262aacea8a2..c09ef5219cbe 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -836,7 +836,8 @@ void tag_pages_for_writeback(struct address_space *mapping, spin_unlock_irq(&mapping->tree_lock); WARN_ON_ONCE(tagged > WRITEBACK_TAG_BATCH); cond_resched(); - } while (tagged >= WRITEBACK_TAG_BATCH); + /* We check 'start' to handle wrapping when end == ~0UL */ + } while (tagged >= WRITEBACK_TAG_BATCH && start); } EXPORT_SYMBOL(tag_pages_for_writeback); From 944645c33e4ada914f4c045525a9293610085ccd Mon Sep 17 00:00:00 2001 From: Kyungmin Park Date: Thu, 19 Aug 2010 14:13:34 -0700 Subject: [PATCH 18/29] s5pc110: SDHCI-s3c can override host capabilities Each board can override the default sdhci host capabilities. Some board has broken features by hardwares and support 8-bit bandwidth. Signed-off-by: Kyungmin Park Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/plat-samsung/dev-hsmmc.c | 2 ++ arch/arm/plat-samsung/dev-hsmmc1.c | 2 ++ arch/arm/plat-samsung/dev-hsmmc2.c | 2 ++ 3 files changed, 6 insertions(+) diff --git a/arch/arm/plat-samsung/dev-hsmmc.c b/arch/arm/plat-samsung/dev-hsmmc.c index b0f93f11e281..9d2be0941410 100644 --- a/arch/arm/plat-samsung/dev-hsmmc.c +++ b/arch/arm/plat-samsung/dev-hsmmc.c @@ -70,4 +70,6 @@ void s3c_sdhci0_set_platdata(struct s3c_sdhci_platdata *pd) set->cfg_gpio = pd->cfg_gpio; if (pd->cfg_card) set->cfg_card = pd->cfg_card; + if (pd->host_caps) + set->host_caps = pd->host_caps; } diff --git a/arch/arm/plat-samsung/dev-hsmmc1.c b/arch/arm/plat-samsung/dev-hsmmc1.c index 1504fd802865..a6c8295840af 100644 --- a/arch/arm/plat-samsung/dev-hsmmc1.c +++ b/arch/arm/plat-samsung/dev-hsmmc1.c @@ -70,4 +70,6 @@ void s3c_sdhci1_set_platdata(struct s3c_sdhci_platdata *pd) set->cfg_gpio = pd->cfg_gpio; if (pd->cfg_card) set->cfg_card = pd->cfg_card; + if (pd->host_caps) + set->host_caps = pd->host_caps; } diff --git a/arch/arm/plat-samsung/dev-hsmmc2.c b/arch/arm/plat-samsung/dev-hsmmc2.c index b28ef173444d..cb0d7143381a 100644 --- a/arch/arm/plat-samsung/dev-hsmmc2.c +++ b/arch/arm/plat-samsung/dev-hsmmc2.c @@ -71,4 +71,6 @@ void s3c_sdhci2_set_platdata(struct s3c_sdhci_platdata *pd) set->cfg_gpio = pd->cfg_gpio; if (pd->cfg_card) set->cfg_card = pd->cfg_card; + if (pd->host_caps) + set->host_caps = pd->host_caps; } From 930a6f70fa3b9c79a57dd6850ef9cb1efa470575 Mon Sep 17 00:00:00 2001 From: Kyungmin Park Date: Thu, 19 Aug 2010 14:13:35 -0700 Subject: [PATCH 19/29] s5pc110: SDHCI-s3c support on s5pc110 s5pc110 (aka s5pv210) uses the same SDHCI IP. Signed-off-by: Kyungmin Park Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/mmc/host/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig index 283190bc2a40..68d12794cfd9 100644 --- a/drivers/mmc/host/Kconfig +++ b/drivers/mmc/host/Kconfig @@ -132,7 +132,7 @@ config MMC_SDHCI_CNS3XXX config MMC_SDHCI_S3C tristate "SDHCI support on Samsung S3C SoC" - depends on MMC_SDHCI && (PLAT_S3C24XX || PLAT_S3C64XX) + depends on MMC_SDHCI && PLAT_SAMSUNG help This selects the Secure Digital Host Controller Interface (SDHCI) often referrered to as the HSMMC block in some of the Samsung S3C From 5193250168ccdf87364e35a11965336dc088578c Mon Sep 17 00:00:00 2001 From: Kyungmin Park Date: Thu, 19 Aug 2010 14:13:35 -0700 Subject: [PATCH 20/29] sdhci: add no hi-speed bit quirk support Some SDHCI controllers like s5pc110 don't have an HISPD bit in the HOSTCTL register. Signed-off-by: Kyungmin Park Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/mmc/host/sdhci.c | 3 ++- drivers/mmc/host/sdhci.h | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c index 785512133b50..401527d273b5 100644 --- a/drivers/mmc/host/sdhci.c +++ b/drivers/mmc/host/sdhci.c @@ -1180,7 +1180,8 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios) else ctrl &= ~SDHCI_CTRL_4BITBUS; - if (ios->timing == MMC_TIMING_SD_HS) + if (ios->timing == MMC_TIMING_SD_HS && + !(host->quirks & SDHCI_QUIRK_NO_HISPD_BIT)) ctrl |= SDHCI_CTRL_HISPD; else ctrl &= ~SDHCI_CTRL_HISPD; diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h index 036cfae76368..d316bc79b636 100644 --- a/drivers/mmc/host/sdhci.h +++ b/drivers/mmc/host/sdhci.h @@ -245,6 +245,8 @@ struct sdhci_host { #define SDHCI_QUIRK_MISSING_CAPS (1<<27) /* Controller uses Auto CMD12 command to stop the transfer */ #define SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12 (1<<28) +/* Controller doesn't have HISPD bit field in HI-SPEED SD card */ +#define SDHCI_QUIRK_NO_HISPD_BIT (1<<29) int irq; /* Device IRQ */ void __iomem * ioaddr; /* Mapped address */ From f522886e202a34a2191dd5d471b3c4d46410a9a0 Mon Sep 17 00:00:00 2001 From: Kyungmin Park Date: Thu, 19 Aug 2010 14:13:37 -0700 Subject: [PATCH 21/29] drivers/mmc/host/sdhci-s3c.c: use the correct mutex and card detect function There's some merge problem between sdhic core and sdhci-s3c host. After mutex is changed to spinlock. It needs to use use spin lock functions and use the correct card detection function. Signed-off-by: Kyungmin Park Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/mmc/host/sdhci-s3c.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c index 0a7f2614c6f0..71ad4163b95e 100644 --- a/drivers/mmc/host/sdhci-s3c.c +++ b/drivers/mmc/host/sdhci-s3c.c @@ -242,7 +242,7 @@ static void sdhci_s3c_notify_change(struct platform_device *dev, int state) { struct sdhci_host *host = platform_get_drvdata(dev); if (host) { - mutex_lock(&host->lock); + spin_lock(&host->lock); if (state) { dev_dbg(&dev->dev, "card inserted.\n"); host->flags &= ~SDHCI_DEVICE_DEAD; @@ -252,8 +252,8 @@ static void sdhci_s3c_notify_change(struct platform_device *dev, int state) host->flags |= SDHCI_DEVICE_DEAD; host->quirks &= ~SDHCI_QUIRK_BROKEN_CARD_DETECTION; } - sdhci_card_detect(host); - mutex_unlock(&host->lock); + tasklet_schedule(&host->card_tasklet); + spin_unlock(&host->lock); } } From be71cf2202971e50ce4953d473649c724799eb8a Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Thu, 19 Aug 2010 14:13:38 -0700 Subject: [PATCH 22/29] oom: fix NULL pointer dereference Commit b940fd7035 ("oom: remove unnecessary code and cleanup") added an unnecessary NULL pointer dereference. remove it. Signed-off-by: KOSAKI Motohiro Reviewed-by: Minchan Kim Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 5014e50644d1..17d48a67e7b7 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -401,10 +401,9 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order, static int oom_kill_task(struct task_struct *p, struct mem_cgroup *mem) { p = find_lock_task_mm(p); - if (!p) { - task_unlock(p); + if (!p) return 1; - } + pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n", task_pid_nr(p), p->comm, K(p->mm->total_vm), K(get_mm_counter(p->mm, MM_ANONPAGES)), From b52723c5607f7684c2c0c075f86f86da0d7fb6d0 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Thu, 19 Aug 2010 14:13:39 -0700 Subject: [PATCH 23/29] oom: fix tasklist_lock leak Commit 0aad4b3124 ("oom: fold __out_of_memory into out_of_memory") introduced a tasklist_lock leak. Then it caused following obvious danger warnings and panic. ================================================ [ BUG: lock held when returning to user space! ] ------------------------------------------------ rsyslogd/1422 is leaving the kernel with locks still held! 1 lock held by rsyslogd/1422: #0: (tasklist_lock){.+.+.+}, at: [] out_of_memory+0x164/0x3f0 BUG: scheduling while atomic: rsyslogd/1422/0x00000002 INFO: lockdep is turned off. This patch fixes it. Signed-off-by: KOSAKI Motohiro Reviewed-by: Minchan Kim Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 17d48a67e7b7..c48c5ef3ccfd 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -646,6 +646,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, unsigned long freed = 0; unsigned int points; enum oom_constraint constraint = CONSTRAINT_NONE; + int killed = 0; blocking_notifier_call_chain(&oom_notify_list, 0, &freed); if (freed > 0) @@ -683,7 +684,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, if (!oom_kill_process(current, gfp_mask, order, 0, totalpages, NULL, nodemask, "Out of memory (oom_kill_allocating_task)")) - return; + goto out; } retry: @@ -691,7 +692,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, constraint == CONSTRAINT_MEMORY_POLICY ? nodemask : NULL); if (PTR_ERR(p) == -1UL) - return; + goto out; /* Found nothing?!?! Either we hang forever, or we panic. */ if (!p) { @@ -703,13 +704,15 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, if (oom_kill_process(p, gfp_mask, order, points, totalpages, NULL, nodemask, "Out of memory")) goto retry; + killed = 1; +out: read_unlock(&tasklist_lock); /* * Give "p" a good chance of killing itself before we * retry to allocate memory unless "p" is current */ - if (!test_thread_flag(TIF_MEMDIE)) + if (killed && !test_thread_flag(TIF_MEMDIE)) schedule_timeout_uninterruptible(1); } From 8d6c83f0ba5e1bd1e8bb2e3c7de4c276dc247f99 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Thu, 19 Aug 2010 14:13:39 -0700 Subject: [PATCH 24/29] oom: __task_cred() need rcu_read_lock() dump_tasks() needs to hold the RCU read lock around its access of the target task's UID. To this end it should use task_uid() as it only needs that one thing from the creds. The fact that dump_tasks() holds tasklist_lock is insufficient to prevent the target process replacing its credentials on another CPU. Then, this patch change to call rcu_read_lock() explicitly. =================================================== [ INFO: suspicious rcu_dereference_check() usage. ] --------------------------------------------------- mm/oom_kill.c:410 invoked rcu_dereference_check() without protection! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 1 4 locks held by kworker/1:2/651: #0: (events){+.+.+.}, at: [] process_one_work+0x137/0x4a0 #1: (moom_work){+.+...}, at: [] process_one_work+0x137/0x4a0 #2: (tasklist_lock){.+.+..}, at: [] out_of_memory+0x164/0x3f0 #3: (&(&p->alloc_lock)->rlock){+.+...}, at: [] find_lock_task_mm+0x2e/0x70 Signed-off-by: KOSAKI Motohiro Signed-off-by: David Howells Acked-by: Paul E. McKenney Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index c48c5ef3ccfd..fc81cb22869e 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -372,7 +372,7 @@ static void dump_tasks(const struct mem_cgroup *mem) } pr_info("[%5d] %5d %5d %8lu %8lu %3u %3d %5d %s\n", - task->pid, __task_cred(task)->uid, task->tgid, + task->pid, task_uid(task), task->tgid, task->mm->total_vm, get_mm_rss(task->mm), task_cpu(task), task->signal->oom_adj, task->signal->oom_score_adj, task->comm); From f3c072ad727617f00bffae17dba8cd895f5b7bdb Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 19 Aug 2010 14:13:40 -0700 Subject: [PATCH 25/29] uml: fix compile error in dma_get_cache_alignment() Fix uml compile error: include/linux/dma-mapping.h:145: error: redefinition of 'dma_get_cache_alignment' arch/um/include/asm/dma-mapping.h:99: note: previous definition of 'dma_get_cache_alignment' was here Introduced by commit 4565f0170dfc ("dma-mapping: unify dma_get_cache_alignment implementations") Signed-off-by: Miklos Szeredi Cc: Jeff Dike Cc: FUJITA Tomonori Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/include/asm/dma-mapping.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/um/include/asm/dma-mapping.h b/arch/um/include/asm/dma-mapping.h index 17a2cb5a4178..1f469e80fdd3 100644 --- a/arch/um/include/asm/dma-mapping.h +++ b/arch/um/include/asm/dma-mapping.h @@ -95,13 +95,6 @@ dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, #define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) #define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) -static inline int -dma_get_cache_alignment(void) -{ - BUG(); - return(0); -} - static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) From 626115cda9a31d7618cfd5ca8928811e5947d360 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 19 Aug 2010 14:13:42 -0700 Subject: [PATCH 26/29] drivers/scsi/qla4xxx: fix build gcc-4.0.2: drivers/scsi/qla4xxx/ql4_os.c: In function 'qla4_8xxx_error_recovery': drivers/scsi/qla4xxx/ql4_glbl.h:135: sorry, unimplemented: inlining failed in call to 'qla4_8xxx_set_drv_active': function body not available drivers/scsi/qla4xxx/ql4_os.c:2377: sorry, unimplemented: called from here drivers/scsi/qla4xxx/ql4_glbl.h:135: sorry, unimplemented: inlining failed in call to 'qla4_8xxx_set_drv_active': function body not available drivers/scsi/qla4xxx/ql4_os.c:2393: sorry, unimplemented: called from here Cc: Ravi Anand Cc: Vikas Chaudhary Cc: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/scsi/qla4xxx/ql4_glbl.h | 2 +- drivers/scsi/qla4xxx/ql4_nx.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/qla4xxx/ql4_glbl.h b/drivers/scsi/qla4xxx/ql4_glbl.h index f065204e401b..95a26fb1626c 100644 --- a/drivers/scsi/qla4xxx/ql4_glbl.h +++ b/drivers/scsi/qla4xxx/ql4_glbl.h @@ -132,7 +132,7 @@ void qla4_8xxx_idc_unlock(struct scsi_qla_host *ha); int qla4_8xxx_device_state_handler(struct scsi_qla_host *ha); void qla4_8xxx_need_qsnt_handler(struct scsi_qla_host *ha); void qla4_8xxx_clear_drv_active(struct scsi_qla_host *ha); -inline void qla4_8xxx_set_drv_active(struct scsi_qla_host *ha); +void qla4_8xxx_set_drv_active(struct scsi_qla_host *ha); extern int ql4xextended_error_logging; extern int ql4xdiscoverywait; diff --git a/drivers/scsi/qla4xxx/ql4_nx.c b/drivers/scsi/qla4xxx/ql4_nx.c index e031a734836e..5d4a3822382d 100644 --- a/drivers/scsi/qla4xxx/ql4_nx.c +++ b/drivers/scsi/qla4xxx/ql4_nx.c @@ -1418,7 +1418,7 @@ static int qla4_8xxx_rcvpeg_ready(struct scsi_qla_host *ha) return QLA_SUCCESS; } -inline void +void qla4_8xxx_set_drv_active(struct scsi_qla_host *ha) { uint32_t drv_active; From 1ee41680572971e34d90d5f584daf33195ec6dcb Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Thu, 19 Aug 2010 14:13:43 -0700 Subject: [PATCH 27/29] Documentation: kernel-locking: mutex_trylock cannot be used in interrupt context Chapter 6 is right about mutex_trylock, but chapter 10 wasn't. This error was introduced during semaphore-to-mutex conversion of the Unreliable guide. :-) If user context which performs mutex_lock() or mutex_trylock() is preempted by interrupt context which performs mutex_trylock() on the same mutex instance, a deadlock occurs. This is because these functions do not disable local IRQs when they operate on mutex->wait_lock. Signed-off-by: Stefan Richter Acked-by: Rusty Russell Cc: Matthew Wilcox Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/DocBook/kernel-locking.tmpl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl index 084f6ad7b7a0..0b1a3f97f285 100644 --- a/Documentation/DocBook/kernel-locking.tmpl +++ b/Documentation/DocBook/kernel-locking.tmpl @@ -1922,9 +1922,12 @@ machines due to caching. mutex_lock() - There is a mutex_trylock() which can be - used inside interrupt context, as it will not sleep. + There is a mutex_trylock() which does not + sleep. Still, it must not be used inside interrupt context since + its implementation is not safe for that. mutex_unlock() will also never sleep. + It cannot be used in interrupt context either since a mutex + must be released by the same task that acquired it. From a4724ed6f084fd294b001103a68c3196eb60364c Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 20 Aug 2010 19:52:45 +1000 Subject: [PATCH 28/29] MAINTAINERS: Fix ozlabs.org mailing list addresses All these lists moved to lists.ozlabs.org quite a while ago. Signed-off-by: Stephen Rothwell Signed-off-by: Linus Torvalds --- MAINTAINERS | 56 ++++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index b5b8baa1d70e..433f35385756 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -456,7 +456,7 @@ F: drivers/infiniband/hw/amso1100/ AOA (Apple Onboard Audio) ALSA DRIVER M: Johannes Berg -L: linuxppc-dev@ozlabs.org +L: linuxppc-dev@lists.ozlabs.org L: alsa-devel@alsa-project.org (moderated for non-subscribers) S: Maintained F: sound/aoa/ @@ -1472,8 +1472,8 @@ F: include/linux/can/platform/ CELL BROADBAND ENGINE ARCHITECTURE M: Arnd Bergmann -L: linuxppc-dev@ozlabs.org -L: cbe-oss-dev@ozlabs.org +L: linuxppc-dev@lists.ozlabs.org +L: cbe-oss-dev@lists.ozlabs.org W: http://www.ibm.com/developerworks/power/cell/ S: Supported F: arch/powerpc/include/asm/cell*.h @@ -2371,13 +2371,13 @@ F: include/linux/fb.h FREESCALE DMA DRIVER M: Li Yang M: Zhang Wei -L: linuxppc-dev@ozlabs.org +L: linuxppc-dev@lists.ozlabs.org S: Maintained F: drivers/dma/fsldma.* FREESCALE I2C CPM DRIVER M: Jochen Friedrich -L: linuxppc-dev@ozlabs.org +L: linuxppc-dev@lists.ozlabs.org L: linux-i2c@vger.kernel.org S: Maintained F: drivers/i2c/busses/i2c-cpm.c @@ -2393,7 +2393,7 @@ F: drivers/video/imxfb.c FREESCALE SOC FS_ENET DRIVER M: Pantelis Antoniou M: Vitaly Bordug -L: linuxppc-dev@ozlabs.org +L: linuxppc-dev@lists.ozlabs.org L: netdev@vger.kernel.org S: Maintained F: drivers/net/fs_enet/ @@ -2401,7 +2401,7 @@ F: include/linux/fs_enet_pd.h FREESCALE QUICC ENGINE LIBRARY M: Timur Tabi -L: linuxppc-dev@ozlabs.org +L: linuxppc-dev@lists.ozlabs.org S: Supported F: arch/powerpc/sysdev/qe_lib/ F: arch/powerpc/include/asm/*qe.h @@ -2409,27 +2409,27 @@ F: arch/powerpc/include/asm/*qe.h FREESCALE USB PERIPHERAL DRIVERS M: Li Yang L: linux-usb@vger.kernel.org -L: linuxppc-dev@ozlabs.org +L: linuxppc-dev@lists.ozlabs.org S: Maintained F: drivers/usb/gadget/fsl* FREESCALE QUICC ENGINE UCC ETHERNET DRIVER M: Li Yang L: netdev@vger.kernel.org -L: linuxppc-dev@ozlabs.org +L: linuxppc-dev@lists.ozlabs.org S: Maintained F: drivers/net/ucc_geth* FREESCALE QUICC ENGINE UCC UART DRIVER M: Timur Tabi -L: linuxppc-dev@ozlabs.org +L: linuxppc-dev@lists.ozlabs.org S: Supported F: drivers/serial/ucc_uart.c FREESCALE SOC SOUND DRIVERS M: Timur Tabi L: alsa-devel@alsa-project.org (moderated for non-subscribers) -L: linuxppc-dev@ozlabs.org +L: linuxppc-dev@lists.ozlabs.org S: Supported F: sound/soc/fsl/fsl* F: sound/soc/fsl/mpc8610_hpcd.c @@ -2564,7 +2564,7 @@ F: mm/memory-failure.c F: mm/hwpoison-inject.c HYPERVISOR VIRTUAL CONSOLE DRIVER -L: linuxppc-dev@ozlabs.org +L: linuxppc-dev@lists.ozlabs.org S: Odd Fixes F: drivers/char/hvc_* @@ -3476,7 +3476,7 @@ F: drivers/usb/misc/legousbtower.c LGUEST M: Rusty Russell -L: lguest@ozlabs.org +L: lguest@lists.ozlabs.org W: http://lguest.ozlabs.org/ S: Maintained F: Documentation/lguest/ @@ -3495,7 +3495,7 @@ LINUX FOR POWERPC (32-BIT AND 64-BIT) M: Benjamin Herrenschmidt M: Paul Mackerras W: http://www.penguinppc.org/ -L: linuxppc-dev@ozlabs.org +L: linuxppc-dev@lists.ozlabs.org Q: http://patchwork.ozlabs.org/project/linuxppc-dev/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git S: Supported @@ -3505,14 +3505,14 @@ F: arch/powerpc/ LINUX FOR POWER MACINTOSH M: Benjamin Herrenschmidt W: http://www.penguinppc.org/ -L: linuxppc-dev@ozlabs.org +L: linuxppc-dev@lists.ozlabs.org S: Maintained F: arch/powerpc/platforms/powermac/ F: drivers/macintosh/ LINUX FOR POWERPC EMBEDDED MPC5XXX M: Grant Likely -L: linuxppc-dev@ozlabs.org +L: linuxppc-dev@lists.ozlabs.org T: git git://git.secretlab.ca/git/linux-2.6.git S: Maintained F: arch/powerpc/platforms/512x/ @@ -3522,7 +3522,7 @@ LINUX FOR POWERPC EMBEDDED PPC4XX M: Josh Boyer M: Matt Porter W: http://www.penguinppc.org/ -L: linuxppc-dev@ozlabs.org +L: linuxppc-dev@lists.ozlabs.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/jwboyer/powerpc-4xx.git S: Maintained F: arch/powerpc/platforms/40x/ @@ -3531,7 +3531,7 @@ F: arch/powerpc/platforms/44x/ LINUX FOR POWERPC EMBEDDED XILINX VIRTEX M: Grant Likely W: http://wiki.secretlab.ca/index.php/Linux_on_Xilinx_Virtex -L: linuxppc-dev@ozlabs.org +L: linuxppc-dev@lists.ozlabs.org T: git git://git.secretlab.ca/git/linux-2.6.git S: Maintained F: arch/powerpc/*/*virtex* @@ -3541,20 +3541,20 @@ LINUX FOR POWERPC EMBEDDED PPC8XX M: Vitaly Bordug M: Marcelo Tosatti W: http://www.penguinppc.org/ -L: linuxppc-dev@ozlabs.org +L: linuxppc-dev@lists.ozlabs.org S: Maintained F: arch/powerpc/platforms/8xx/ LINUX FOR POWERPC EMBEDDED PPC83XX AND PPC85XX M: Kumar Gala W: http://www.penguinppc.org/ -L: linuxppc-dev@ozlabs.org +L: linuxppc-dev@lists.ozlabs.org S: Maintained F: arch/powerpc/platforms/83xx/ LINUX FOR POWERPC PA SEMI PWRFICIENT M: Olof Johansson -L: linuxppc-dev@ozlabs.org +L: linuxppc-dev@lists.ozlabs.org S: Maintained F: arch/powerpc/platforms/pasemi/ F: drivers/*/*pasemi* @@ -4601,14 +4601,14 @@ F: drivers/ata/sata_promise.* PS3 NETWORK SUPPORT M: Geoff Levand L: netdev@vger.kernel.org -L: cbe-oss-dev@ozlabs.org +L: cbe-oss-dev@lists.ozlabs.org S: Maintained F: drivers/net/ps3_gelic_net.* PS3 PLATFORM SUPPORT M: Geoff Levand -L: linuxppc-dev@ozlabs.org -L: cbe-oss-dev@ozlabs.org +L: linuxppc-dev@lists.ozlabs.org +L: cbe-oss-dev@lists.ozlabs.org S: Maintained F: arch/powerpc/boot/ps3* F: arch/powerpc/include/asm/lv1call.h @@ -4622,7 +4622,7 @@ F: sound/ppc/snd_ps3* PS3VRAM DRIVER M: Jim Paris -L: cbe-oss-dev@ozlabs.org +L: cbe-oss-dev@lists.ozlabs.org S: Maintained F: drivers/block/ps3vram.c @@ -5068,7 +5068,7 @@ F: drivers/mmc/host/sdhci.* SECURE DIGITAL HOST CONTROLLER INTERFACE, OPEN FIRMWARE BINDINGS (SDHCI-OF) M: Anton Vorontsov -L: linuxppc-dev@ozlabs.org +L: linuxppc-dev@lists.ozlabs.org L: linux-mmc@vger.kernel.org S: Maintained F: drivers/mmc/host/sdhci-of.* @@ -5485,8 +5485,8 @@ F: drivers/net/spider_net* SPU FILE SYSTEM M: Jeremy Kerr -L: linuxppc-dev@ozlabs.org -L: cbe-oss-dev@ozlabs.org +L: linuxppc-dev@lists.ozlabs.org +L: cbe-oss-dev@lists.ozlabs.org W: http://www.ibm.com/developerworks/power/cell/ S: Supported F: Documentation/filesystems/spufs.txt From f6143a9b732977859cb6e04c604d04976aa5ccbd Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Fri, 20 Aug 2010 19:56:31 +1000 Subject: [PATCH 29/29] Documentation: fix ozlabs.org mailing list address This list moved to lists.ozlabs.org quite some time ago. Signed-off-by: Stephen Rothwell Signed-off-by: Linus Torvalds --- Documentation/powerpc/hvcs.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/powerpc/hvcs.txt b/Documentation/powerpc/hvcs.txt index f93462c5db25..6d8be3468d7d 100644 --- a/Documentation/powerpc/hvcs.txt +++ b/Documentation/powerpc/hvcs.txt @@ -560,7 +560,7 @@ The proper channel for reporting bugs is either through the Linux OS distribution company that provided your OS or by posting issues to the PowerPC development mailing list at: -linuxppc-dev@ozlabs.org +linuxppc-dev@lists.ozlabs.org This request is to provide a documented and searchable public exchange of the problems and solutions surrounding this driver for the benefit of