From d9b21cff7475d3a7e1aa7289c0ae4950551e8cef Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 9 Mar 2009 12:40:40 -0400 Subject: [PATCH] --- yaml --- r: 140906 b: refs/heads/master c: 7cf49427042400d40bdc80b5c3399b6b5945afa8 h: refs/heads/master v: v3 --- [refs] | 2 +- trunk/Documentation/kernel-parameters.txt | 2 - trunk/Documentation/tracepoints.txt | 8 +- trunk/arch/blackfin/include/asm/percpu.h | 10 + trunk/arch/x86/include/asm/linkage.h | 16 +- trunk/arch/x86/kernel/alternative.c | 4 +- trunk/arch/x86/kernel/cpu/amd.c | 52 -- trunk/arch/x86/kernel/cpu/intel.c | 25 - trunk/arch/x86/kernel/setup_percpu.c | 73 +-- trunk/arch/x86/kernel/smpboot.c | 78 +++ trunk/arch/x86/kernel/tlb_uv.c | 2 + trunk/arch/x86/mm/init.c | 2 +- trunk/arch/x86/mm/init_32.c | 23 +- trunk/arch/x86/mm/init_64.c | 8 +- trunk/arch/x86/mm/ioremap.c | 21 +- trunk/arch/x86/mm/kmmio.c | 15 +- trunk/arch/x86/mm/memtest.c | 3 - trunk/include/linux/percpu.h | 60 +- trunk/include/linux/tracepoint.h | 10 +- trunk/include/trace/block.h | 70 +-- trunk/include/trace/irq_event_types.h | 46 +- trunk/include/trace/lockdep_event_types.h | 26 +- trunk/include/trace/power.h | 12 +- trunk/include/trace/sched_event_types.h | 442 ++++---------- trunk/include/trace/workqueue.h | 16 +- trunk/kernel/module.c | 2 +- trunk/kernel/trace/events.c | 4 +- trunk/kernel/trace/trace.h | 5 + trunk/kernel/trace/trace_clock.c | 9 +- trunk/kernel/trace/trace_event_types.h | 29 +- trunk/kernel/trace/trace_events.c | 174 +++++- trunk/kernel/trace/trace_events_stage_1.h | 27 +- trunk/kernel/trace/trace_events_stage_2.h | 91 +-- trunk/kernel/trace/trace_events_stage_3.h | 42 +- trunk/kernel/trace/trace_export.c | 41 +- trunk/kernel/trace/trace_format.h | 55 ++ trunk/kernel/trace/trace_functions_graph.c | 2 +- trunk/kernel/trace/trace_selftest.c | 6 +- trunk/kernel/trace/trace_workqueue.c | 14 +- trunk/lib/vsprintf.c | 13 +- trunk/mm/percpu.c | 593 ++++++------------- trunk/samples/tracepoints/tp-samples-trace.h | 8 +- 42 files changed, 909 insertions(+), 1232 deletions(-) create mode 100644 trunk/kernel/trace/trace_format.h diff --git a/[refs] b/[refs] index 7923bfbf5af0..1fc69250694e 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 6cc3c6e12bb039047974ad2e7e2d46d15a1b762f +refs/heads/master: 7cf49427042400d40bdc80b5c3399b6b5945afa8 diff --git a/trunk/Documentation/kernel-parameters.txt b/trunk/Documentation/kernel-parameters.txt index 7643483bdd6a..454f42b21f16 100644 --- a/trunk/Documentation/kernel-parameters.txt +++ b/trunk/Documentation/kernel-parameters.txt @@ -2344,8 +2344,6 @@ and is between 256 and 4096 characters. It is defined in the file tp720= [HW,PS2] - trace_buf_size=nn[KMG] [ftrace] will set tracing buffer size. - trix= [HW,OSS] MediaTrix AudioTrix Pro Format: ,,,,,,,, diff --git a/trunk/Documentation/tracepoints.txt b/trunk/Documentation/tracepoints.txt index 4ff43c6de299..6f0a044f5b5e 100644 --- a/trunk/Documentation/tracepoints.txt +++ b/trunk/Documentation/tracepoints.txt @@ -45,8 +45,8 @@ In include/trace/subsys.h : #include DECLARE_TRACE(subsys_eventname, - TP_PROTO(int firstarg, struct task_struct *p), - TP_ARGS(firstarg, p)); + TPPROTO(int firstarg, struct task_struct *p), + TPARGS(firstarg, p)); In subsys/file.c (where the tracing statement must be added) : @@ -66,10 +66,10 @@ Where : - subsys is the name of your subsystem. - eventname is the name of the event to trace. -- TP_PROTO(int firstarg, struct task_struct *p) is the prototype of the +- TPPROTO(int firstarg, struct task_struct *p) is the prototype of the function called by this tracepoint. -- TP_ARGS(firstarg, p) are the parameters names, same as found in the +- TPARGS(firstarg, p) are the parameters names, same as found in the prototype. Connecting a function (probe) to a tracepoint is done by providing a diff --git a/trunk/arch/blackfin/include/asm/percpu.h b/trunk/arch/blackfin/include/asm/percpu.h index c94c7bc88c71..797c0c165069 100644 --- a/trunk/arch/blackfin/include/asm/percpu.h +++ b/trunk/arch/blackfin/include/asm/percpu.h @@ -3,4 +3,14 @@ #include +#ifdef CONFIG_MODULES +#define PERCPU_MODULE_RESERVE 8192 +#else +#define PERCPU_MODULE_RESERVE 0 +#endif + +#define PERCPU_ENOUGH_ROOM \ + (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \ + PERCPU_MODULE_RESERVE) + #endif /* __ARCH_BLACKFIN_PERCPU__ */ diff --git a/trunk/arch/x86/include/asm/linkage.h b/trunk/arch/x86/include/asm/linkage.h index a0d70b46c27c..9320e2a8a26a 100644 --- a/trunk/arch/x86/include/asm/linkage.h +++ b/trunk/arch/x86/include/asm/linkage.h @@ -4,6 +4,11 @@ #undef notrace #define notrace __attribute__((no_instrument_function)) +#ifdef CONFIG_X86_64 +#define __ALIGN .p2align 4,,15 +#define __ALIGN_STR ".p2align 4,,15" +#endif + #ifdef CONFIG_X86_32 #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0))) /* @@ -45,25 +50,16 @@ __asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \ "g" (arg4), "g" (arg5), "g" (arg6)) -#endif /* CONFIG_X86_32 */ - -#ifdef __ASSEMBLY__ +#endif #define GLOBAL(name) \ .globl name; \ name: -#ifdef CONFIG_X86_64 -#define __ALIGN .p2align 4,,15 -#define __ALIGN_STR ".p2align 4,,15" -#endif - #ifdef CONFIG_X86_ALIGNMENT_16 #define __ALIGN .align 16,0x90 #define __ALIGN_STR ".align 16,0x90" #endif -#endif /* __ASSEMBLY__ */ - #endif /* _ASM_X86_LINKAGE_H */ diff --git a/trunk/arch/x86/kernel/alternative.c b/trunk/arch/x86/kernel/alternative.c index 2d903b760ddb..f57658702571 100644 --- a/trunk/arch/x86/kernel/alternative.c +++ b/trunk/arch/x86/kernel/alternative.c @@ -526,13 +526,12 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len) pages[1] = virt_to_page(addr + PAGE_SIZE); } BUG_ON(!pages[0]); + local_irq_save(flags); set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0])); if (pages[1]) set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1])); vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0); - local_irq_save(flags); memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len); - local_irq_restore(flags); clear_fixmap(FIX_TEXT_POKE0); if (pages[1]) clear_fixmap(FIX_TEXT_POKE1); @@ -542,5 +541,6 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len) that causes hangs on some VIA CPUs. */ for (i = 0; i < len; i++) BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]); + local_irq_restore(flags); return addr; } diff --git a/trunk/arch/x86/kernel/cpu/amd.c b/trunk/arch/x86/kernel/cpu/amd.c index f47df59016c5..25423a5b80ed 100644 --- a/trunk/arch/x86/kernel/cpu/amd.c +++ b/trunk/arch/x86/kernel/cpu/amd.c @@ -5,7 +5,6 @@ #include #include #include -#include #ifdef CONFIG_X86_64 # include @@ -142,55 +141,6 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) } } -static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - /* calling is from identify_secondary_cpu() ? */ - if (c->cpu_index == boot_cpu_id) - return; - - /* - * Certain Athlons might work (for various values of 'work') in SMP - * but they are not certified as MP capable. - */ - /* Athlon 660/661 is valid. */ - if ((c->x86_model == 6) && ((c->x86_mask == 0) || - (c->x86_mask == 1))) - goto valid_k7; - - /* Duron 670 is valid */ - if ((c->x86_model == 7) && (c->x86_mask == 0)) - goto valid_k7; - - /* - * Athlon 662, Duron 671, and Athlon >model 7 have capability - * bit. It's worth noting that the A5 stepping (662) of some - * Athlon XP's have the MP bit set. - * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for - * more. - */ - if (((c->x86_model == 6) && (c->x86_mask >= 2)) || - ((c->x86_model == 7) && (c->x86_mask >= 1)) || - (c->x86_model > 7)) - if (cpu_has_mp) - goto valid_k7; - - /* If we get here, not a certified SMP capable AMD system. */ - - /* - * Don't taint if we are running SMP kernel on a single non-MP - * approved Athlon - */ - WARN_ONCE(1, "WARNING: This combination of AMD" - "processors is not suitable for SMP.\n"); - if (!test_taint(TAINT_UNSAFE_SMP)) - add_taint(TAINT_UNSAFE_SMP); - -valid_k7: - ; -#endif -} - static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) { u32 l, h; @@ -225,8 +175,6 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) } set_cpu_cap(c, X86_FEATURE_K7); - - amd_k7_smp_check(c); } #endif diff --git a/trunk/arch/x86/kernel/cpu/intel.c b/trunk/arch/x86/kernel/cpu/intel.c index c1c04bf0df77..1a89a2b68d15 100644 --- a/trunk/arch/x86/kernel/cpu/intel.c +++ b/trunk/arch/x86/kernel/cpu/intel.c @@ -14,7 +14,6 @@ #include #include #include -#include #ifdef CONFIG_X86_64 #include @@ -117,28 +116,6 @@ static void __cpuinit trap_init_f00f_bug(void) } #endif -static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c) -{ -#ifdef CONFIG_SMP - /* calling is from identify_secondary_cpu() ? */ - if (c->cpu_index == boot_cpu_id) - return; - - /* - * Mask B, Pentium, but not Pentium MMX - */ - if (c->x86 == 5 && - c->x86_mask >= 1 && c->x86_mask <= 4 && - c->x86_model <= 3) { - /* - * Remember we have B step Pentia with bugs - */ - WARN_ONCE(1, "WARNING: SMP operation may be unreliable" - "with B stepping processors.\n"); - } -#endif -} - static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) { unsigned long lo, hi; @@ -215,8 +192,6 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_NUMAQ numaq_tsc_disable(); #endif - - intel_smp_check(c); } #else static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) diff --git a/trunk/arch/x86/kernel/setup_percpu.c b/trunk/arch/x86/kernel/setup_percpu.c index efa615f2bf43..c29f301d3885 100644 --- a/trunk/arch/x86/kernel/setup_percpu.c +++ b/trunk/arch/x86/kernel/setup_percpu.c @@ -42,19 +42,6 @@ unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = { }; EXPORT_SYMBOL(__per_cpu_offset); -/* - * On x86_64 symbols referenced from code should be reachable using - * 32bit relocations. Reserve space for static percpu variables in - * modules so that they are always served from the first chunk which - * is located at the percpu segment base. On x86_32, anything can - * address anywhere. No need to reserve space in the first chunk. - */ -#ifdef CONFIG_X86_64 -#define PERCPU_FIRST_CHUNK_RESERVE PERCPU_MODULE_RESERVE -#else -#define PERCPU_FIRST_CHUNK_RESERVE 0 -#endif - /** * pcpu_need_numa - determine percpu allocation needs to consider NUMA * @@ -154,7 +141,7 @@ static ssize_t __init setup_pcpu_remap(size_t static_size) { static struct vm_struct vm; pg_data_t *last; - size_t ptrs_size, dyn_size; + size_t ptrs_size; unsigned int cpu; ssize_t ret; @@ -182,14 +169,12 @@ static ssize_t __init setup_pcpu_remap(size_t static_size) * Currently supports only single page. Supporting multiple * pages won't be too difficult if it ever becomes necessary. */ - pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + - PERCPU_DYNAMIC_RESERVE); + pcpur_size = PFN_ALIGN(static_size + PERCPU_DYNAMIC_RESERVE); if (pcpur_size > PMD_SIZE) { pr_warning("PERCPU: static data is larger than large page, " "can't use large page\n"); return -EINVAL; } - dyn_size = pcpur_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; /* allocate pointer array and alloc large pages */ ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0])); @@ -232,9 +217,8 @@ static ssize_t __init setup_pcpu_remap(size_t static_size) pr_info("PERCPU: Remapped at %p with large pages, static data " "%zu bytes\n", vm.addr, static_size); - ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, - PERCPU_FIRST_CHUNK_RESERVE, - PMD_SIZE, dyn_size, vm.addr, NULL); + ret = pcpu_setup_first_chunk(pcpur_get_page, static_size, PMD_SIZE, + pcpur_size - static_size, vm.addr, NULL); goto out_free_ar; enomem: @@ -257,31 +241,24 @@ static ssize_t __init setup_pcpu_remap(size_t static_size) * Embedding allocator * * The first chunk is sized to just contain the static area plus - * module and dynamic reserves, and allocated as a contiguous area - * using bootmem allocator and used as-is without being mapped into - * vmalloc area. This enables the first chunk to piggy back on the - * linear physical PMD mapping and doesn't add any additional pressure - * to TLB. Note that if the needed size is smaller than the minimum - * unit size, the leftover is returned to the bootmem allocator. + * PERCPU_DYNAMIC_RESERVE and allocated as a contiguous area using + * bootmem allocator and used as-is without being mapped into vmalloc + * area. This enables the first chunk to piggy back on the linear + * physical PMD mapping and doesn't add any additional pressure to + * TLB. */ static void *pcpue_ptr __initdata; -static size_t pcpue_size __initdata; static size_t pcpue_unit_size __initdata; static struct page * __init pcpue_get_page(unsigned int cpu, int pageno) { - size_t off = (size_t)pageno << PAGE_SHIFT; - - if (off >= pcpue_size) - return NULL; - - return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off); + return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + + ((size_t)pageno << PAGE_SHIFT)); } static ssize_t __init setup_pcpu_embed(size_t static_size) { unsigned int cpu; - size_t dyn_size; /* * If large page isn't supported, there's no benefit in doing @@ -292,32 +269,25 @@ static ssize_t __init setup_pcpu_embed(size_t static_size) return -EINVAL; /* allocate and copy */ - pcpue_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + - PERCPU_DYNAMIC_RESERVE); - pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE); - dyn_size = pcpue_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; - + pcpue_unit_size = PFN_ALIGN(static_size + PERCPU_DYNAMIC_RESERVE); + pcpue_unit_size = max_t(size_t, pcpue_unit_size, PCPU_MIN_UNIT_SIZE); pcpue_ptr = pcpu_alloc_bootmem(0, num_possible_cpus() * pcpue_unit_size, PAGE_SIZE); if (!pcpue_ptr) return -ENOMEM; - for_each_possible_cpu(cpu) { - void *ptr = pcpue_ptr + cpu * pcpue_unit_size; - - free_bootmem(__pa(ptr + pcpue_size), - pcpue_unit_size - pcpue_size); - memcpy(ptr, __per_cpu_load, static_size); - } + for_each_possible_cpu(cpu) + memcpy(pcpue_ptr + cpu * pcpue_unit_size, __per_cpu_load, + static_size); /* we're ready, commit */ pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n", - pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size); + pcpue_unit_size >> PAGE_SHIFT, pcpue_ptr, static_size); return pcpu_setup_first_chunk(pcpue_get_page, static_size, - PERCPU_FIRST_CHUNK_RESERVE, - pcpue_unit_size, dyn_size, - pcpue_ptr, NULL); + pcpue_unit_size, + pcpue_unit_size - static_size, pcpue_ptr, + NULL); } /* @@ -374,8 +344,7 @@ static ssize_t __init setup_pcpu_4k(size_t static_size) pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n", pcpu4k_nr_static_pages, static_size); - ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, - PERCPU_FIRST_CHUNK_RESERVE, -1, -1, NULL, + ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, 0, 0, NULL, pcpu4k_populate_pte); goto out_free_ar; diff --git a/trunk/arch/x86/kernel/smpboot.c b/trunk/arch/x86/kernel/smpboot.c index ef7d10170c30..249334f5080a 100644 --- a/trunk/arch/x86/kernel/smpboot.c +++ b/trunk/arch/x86/kernel/smpboot.c @@ -114,6 +114,10 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); atomic_t init_deasserted; + +/* Set if we find a B stepping CPU */ +static int __cpuinitdata smp_b_stepping; + #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32) /* which logical CPUs are on which nodes */ @@ -267,6 +271,8 @@ static void __cpuinit smp_callin(void) cpumask_set_cpu(cpuid, cpu_callin_mask); } +static int __cpuinitdata unsafe_smp; + /* * Activate a secondary processor. */ @@ -334,6 +340,76 @@ notrace static void __cpuinit start_secondary(void *unused) cpu_idle(); } +static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c) +{ + /* + * Mask B, Pentium, but not Pentium MMX + */ + if (c->x86_vendor == X86_VENDOR_INTEL && + c->x86 == 5 && + c->x86_mask >= 1 && c->x86_mask <= 4 && + c->x86_model <= 3) + /* + * Remember we have B step Pentia with bugs + */ + smp_b_stepping = 1; + + /* + * Certain Athlons might work (for various values of 'work') in SMP + * but they are not certified as MP capable. + */ + if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { + + if (num_possible_cpus() == 1) + goto valid_k7; + + /* Athlon 660/661 is valid. */ + if ((c->x86_model == 6) && ((c->x86_mask == 0) || + (c->x86_mask == 1))) + goto valid_k7; + + /* Duron 670 is valid */ + if ((c->x86_model == 7) && (c->x86_mask == 0)) + goto valid_k7; + + /* + * Athlon 662, Duron 671, and Athlon >model 7 have capability + * bit. It's worth noting that the A5 stepping (662) of some + * Athlon XP's have the MP bit set. + * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for + * more. + */ + if (((c->x86_model == 6) && (c->x86_mask >= 2)) || + ((c->x86_model == 7) && (c->x86_mask >= 1)) || + (c->x86_model > 7)) + if (cpu_has_mp) + goto valid_k7; + + /* If we get here, not a certified SMP capable AMD system. */ + unsafe_smp = 1; + } + +valid_k7: + ; +} + +static void __cpuinit smp_checks(void) +{ + if (smp_b_stepping) + printk(KERN_WARNING "WARNING: SMP operation may be unreliable" + "with B stepping processors.\n"); + + /* + * Don't taint if we are running SMP kernel on a single non-MP + * approved Athlon + */ + if (unsafe_smp && num_online_cpus() > 1) { + printk(KERN_INFO "WARNING: This combination of AMD" + "processors is not suitable for SMP.\n"); + add_taint(TAINT_UNSAFE_SMP); + } +} + /* * The bootstrap kernel entry code has set these up. Save them for * a given CPU @@ -347,6 +423,7 @@ void __cpuinit smp_store_cpu_info(int id) c->cpu_index = id; if (id != 0) identify_secondary_cpu(c); + smp_apply_quirks(c); } @@ -1116,6 +1193,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) pr_debug("Boot done.\n"); impress_friends(); + smp_checks(); #ifdef CONFIG_X86_IO_APIC setup_ioapic_dest(); #endif diff --git a/trunk/arch/x86/kernel/tlb_uv.c b/trunk/arch/x86/kernel/tlb_uv.c index d038b9c45cf8..f04549afcfe9 100644 --- a/trunk/arch/x86/kernel/tlb_uv.c +++ b/trunk/arch/x86/kernel/tlb_uv.c @@ -314,6 +314,8 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, int locals = 0; struct bau_desc *bau_desc; + WARN_ON(!in_atomic()); + cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); uv_cpu = uv_blade_processor_id(); diff --git a/trunk/arch/x86/mm/init.c b/trunk/arch/x86/mm/init.c index 15219e0d1243..6d63e3d1253d 100644 --- a/trunk/arch/x86/mm/init.c +++ b/trunk/arch/x86/mm/init.c @@ -134,8 +134,8 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, { unsigned long page_size_mask = 0; unsigned long start_pfn, end_pfn; - unsigned long ret = 0; unsigned long pos; + unsigned long ret; struct map_range mr[NR_RANGE_MR]; int nr_range, i; diff --git a/trunk/arch/x86/mm/init_32.c b/trunk/arch/x86/mm/init_32.c index 749559ed80f5..d7f5060ab21c 100644 --- a/trunk/arch/x86/mm/init_32.c +++ b/trunk/arch/x86/mm/init_32.c @@ -806,6 +806,11 @@ static unsigned long __init setup_node_bootmem(int nodeid, { unsigned long bootmap_size; + if (start_pfn > max_low_pfn) + return bootmap; + if (end_pfn > max_low_pfn) + end_pfn = max_low_pfn; + /* don't touch min_low_pfn */ bootmap_size = init_bootmem_node(NODE_DATA(nodeid), bootmap >> PAGE_SHIFT, @@ -838,23 +843,13 @@ void __init setup_bootmem_allocator(void) max_pfn_mapped< max_low_pfn) - continue; - if (end_pfn > max_low_pfn) - end_pfn = max_low_pfn; + for_each_online_node(nodeid) + bootmap = setup_node_bootmem(nodeid, node_start_pfn[nodeid], + node_end_pfn[nodeid], bootmap); #else - start_pfn = 0; - end_pfn = max_low_pfn; + bootmap = setup_node_bootmem(0, 0, max_low_pfn, bootmap); #endif - bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, - bootmap); - } after_bootmem = 1; } diff --git a/trunk/arch/x86/mm/init_64.c b/trunk/arch/x86/mm/init_64.c index 1753e8020df6..66d6be85df82 100644 --- a/trunk/arch/x86/mm/init_64.c +++ b/trunk/arch/x86/mm/init_64.c @@ -85,7 +85,7 @@ early_param("gbpages", parse_direct_gbpages_on); pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; EXPORT_SYMBOL_GPL(__supported_pte_mask); -static int disable_nx __cpuinitdata; +static int do_not_nx __cpuinitdata; /* * noexec=on|off @@ -100,9 +100,9 @@ static int __init nonx_setup(char *str) return -EINVAL; if (!strncmp(str, "on", 2)) { __supported_pte_mask |= _PAGE_NX; - disable_nx = 0; + do_not_nx = 0; } else if (!strncmp(str, "off", 3)) { - disable_nx = 1; + do_not_nx = 1; __supported_pte_mask &= ~_PAGE_NX; } return 0; @@ -114,7 +114,7 @@ void __cpuinit check_efer(void) unsigned long efer; rdmsrl(MSR_EFER, efer); - if (!(efer & EFER_NX) || disable_nx) + if (!(efer & EFER_NX) || do_not_nx) __supported_pte_mask &= ~_PAGE_NX; } diff --git a/trunk/arch/x86/mm/ioremap.c b/trunk/arch/x86/mm/ioremap.c index aca924a30ee6..62773abdf088 100644 --- a/trunk/arch/x86/mm/ioremap.c +++ b/trunk/arch/x86/mm/ioremap.c @@ -87,8 +87,6 @@ bool __virt_addr_valid(unsigned long x) return false; if (__vmalloc_start_set && is_vmalloc_addr((void *) x)) return false; - if (x >= FIXADDR_START) - return false; return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT); } EXPORT_SYMBOL(__virt_addr_valid); @@ -506,19 +504,13 @@ static inline pte_t * __init early_ioremap_pte(unsigned long addr) return &bm_pte[pte_index(addr)]; } -static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata; - void __init early_ioremap_init(void) { pmd_t *pmd; - int i; if (early_ioremap_debug) printk(KERN_INFO "early_ioremap_init()\n"); - for (i = 0; i < FIX_BTMAPS_SLOTS; i++) - slot_virt[i] = fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); - pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); memset(bm_pte, 0, sizeof(bm_pte)); pmd_populate_kernel(&init_mm, pmd, bm_pte); @@ -585,7 +577,6 @@ static inline void __init early_clear_fixmap(enum fixed_addresses idx) static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; - static int __init check_early_ioremap_leak(void) { int count = 0; @@ -607,8 +598,7 @@ static int __init check_early_ioremap_leak(void) } late_initcall(check_early_ioremap_leak); -static void __init __iomem * -__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) +static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) { unsigned long offset, last_addr; unsigned int nrpages; @@ -674,9 +664,9 @@ __early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot) --nrpages; } if (early_ioremap_debug) - printk(KERN_CONT "%08lx + %08lx\n", offset, slot_virt[slot]); + printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0)); - prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]); + prev_map[slot] = (void __iomem *)(offset + fix_to_virt(idx0)); return prev_map[slot]; } @@ -744,3 +734,8 @@ void __init early_iounmap(void __iomem *addr, unsigned long size) } prev_map[slot] = NULL; } + +void __this_fixmap_does_not_exist(void) +{ + WARN_ON(1); +} diff --git a/trunk/arch/x86/mm/kmmio.c b/trunk/arch/x86/mm/kmmio.c index 6a518dd08a36..9f205030d9aa 100644 --- a/trunk/arch/x86/mm/kmmio.c +++ b/trunk/arch/x86/mm/kmmio.c @@ -451,24 +451,23 @@ static void rcu_free_kmmio_fault_pages(struct rcu_head *head) static void remove_kmmio_fault_pages(struct rcu_head *head) { - struct kmmio_delayed_release *dr = - container_of(head, struct kmmio_delayed_release, rcu); + struct kmmio_delayed_release *dr = container_of( + head, + struct kmmio_delayed_release, + rcu); struct kmmio_fault_page *p = dr->release_list; struct kmmio_fault_page **prevp = &dr->release_list; unsigned long flags; - spin_lock_irqsave(&kmmio_lock, flags); while (p) { - if (!p->count) { + if (!p->count) list_del_rcu(&p->list); - prevp = &p->release_next; - } else { + else *prevp = p->release_next; - } + prevp = &p->release_next; p = p->release_next; } spin_unlock_irqrestore(&kmmio_lock, flags); - /* This is the real RCU destroy call. */ call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); } diff --git a/trunk/arch/x86/mm/memtest.c b/trunk/arch/x86/mm/memtest.c index 605c8be06217..0bcd7883d036 100644 --- a/trunk/arch/x86/mm/memtest.c +++ b/trunk/arch/x86/mm/memtest.c @@ -100,9 +100,6 @@ static int __init parse_memtest(char *arg) { if (arg) memtest_pattern = simple_strtoul(arg, NULL, 0); - else - memtest_pattern = ARRAY_SIZE(patterns); - return 0; } diff --git a/trunk/include/linux/percpu.h b/trunk/include/linux/percpu.h index 54a968b4b924..545b068bcb70 100644 --- a/trunk/include/linux/percpu.h +++ b/trunk/include/linux/percpu.h @@ -5,7 +5,6 @@ #include /* For kmalloc() */ #include #include -#include #include @@ -53,18 +52,17 @@ #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) -/* enough to cover all DEFINE_PER_CPUs in modules */ +/* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */ +#ifndef PERCPU_ENOUGH_ROOM #ifdef CONFIG_MODULES -#define PERCPU_MODULE_RESERVE (8 << 10) +#define PERCPU_MODULE_RESERVE 8192 #else -#define PERCPU_MODULE_RESERVE 0 +#define PERCPU_MODULE_RESERVE 0 #endif -#ifndef PERCPU_ENOUGH_ROOM #define PERCPU_ENOUGH_ROOM \ - (ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \ - PERCPU_MODULE_RESERVE) -#endif + (__per_cpu_end - __per_cpu_start + PERCPU_MODULE_RESERVE) +#endif /* PERCPU_ENOUGH_ROOM */ /* * Must be an lvalue. Since @var must be a simple identifier, @@ -81,24 +79,35 @@ #ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA /* minimum unit size, also is the maximum supported allocation size */ -#define PCPU_MIN_UNIT_SIZE PFN_ALIGN(64 << 10) +#define PCPU_MIN_UNIT_SIZE (16UL << PAGE_SHIFT) /* * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy - * back on the first chunk for dynamic percpu allocation if arch is - * manually allocating and mapping it for faster access (as a part of - * large page mapping for example). + * back on the first chunk if arch is manually allocating and mapping + * it for faster access (as a part of large page mapping for example). + * Note that dynamic percpu allocator covers both static and dynamic + * areas, so these values are bigger than PERCPU_MODULE_RESERVE. * - * The following values give between one and two pages of free space - * after typical minimal boot (2-way SMP, single disk and NIC) with - * both defconfig and a distro config on x86_64 and 32. More - * intelligent way to determine this would be nice. + * On typical configuration with modules, the following values leave + * about 8k of free space on the first chunk after boot on both x86_32 + * and 64 when module support is enabled. When module support is + * disabled, it's much tighter. */ -#if BITS_PER_LONG > 32 -#define PERCPU_DYNAMIC_RESERVE (20 << 10) -#else -#define PERCPU_DYNAMIC_RESERVE (12 << 10) -#endif +#ifndef PERCPU_DYNAMIC_RESERVE +# if BITS_PER_LONG > 32 +# ifdef CONFIG_MODULES +# define PERCPU_DYNAMIC_RESERVE (6 << PAGE_SHIFT) +# else +# define PERCPU_DYNAMIC_RESERVE (4 << PAGE_SHIFT) +# endif +# else +# ifdef CONFIG_MODULES +# define PERCPU_DYNAMIC_RESERVE (4 << PAGE_SHIFT) +# else +# define PERCPU_DYNAMIC_RESERVE (2 << PAGE_SHIFT) +# endif +# endif +#endif /* PERCPU_DYNAMIC_RESERVE */ extern void *pcpu_base_addr; @@ -106,10 +115,9 @@ typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno); typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr); extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, - size_t static_size, size_t reserved_size, - ssize_t unit_size, ssize_t dyn_size, - void *base_addr, - pcpu_populate_pte_fn_t populate_pte_fn); + size_t static_size, size_t unit_size, + size_t free_size, void *base_addr, + pcpu_populate_pte_fn_t populate_pte_fn); /* * Use this to get to a cpu's version of the per-cpu object @@ -118,8 +126,6 @@ extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, */ #define per_cpu_ptr(ptr, cpu) SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu))) -extern void *__alloc_reserved_percpu(size_t size, size_t align); - #else /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */ struct percpu_data { diff --git a/trunk/include/linux/tracepoint.h b/trunk/include/linux/tracepoint.h index 69b56988813d..152b2f03fb86 100644 --- a/trunk/include/linux/tracepoint.h +++ b/trunk/include/linux/tracepoint.h @@ -31,8 +31,8 @@ struct tracepoint { * Keep in sync with vmlinux.lds.h. */ -#define TP_PROTO(args...) args -#define TP_ARGS(args...) args +#define TPPROTO(args...) args +#define TPARGS(args...) args #ifdef CONFIG_TRACEPOINTS @@ -65,7 +65,7 @@ struct tracepoint { { \ if (unlikely(__tracepoint_##name.state)) \ __DO_TRACE(&__tracepoint_##name, \ - TP_PROTO(proto), TP_ARGS(args)); \ + TPPROTO(proto), TPARGS(args)); \ } \ static inline int register_trace_##name(void (*probe)(proto)) \ { \ @@ -157,7 +157,7 @@ static inline void tracepoint_synchronize_unregister(void) #define TRACE_FORMAT(name, proto, args, fmt) \ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) -#define TRACE_EVENT(name, proto, args, struct, print, assign) \ - DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#define TRACE_EVENT_FORMAT(name, proto, args, fmt, struct, tpfmt) \ + TRACE_FORMAT(name, PARAMS(proto), PARAMS(args), PARAMS(fmt)) #endif diff --git a/trunk/include/trace/block.h b/trunk/include/trace/block.h index 25b7068b819e..25c6a1fd5b77 100644 --- a/trunk/include/trace/block.h +++ b/trunk/include/trace/block.h @@ -5,72 +5,72 @@ #include DECLARE_TRACE(block_rq_abort, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); + TPPROTO(struct request_queue *q, struct request *rq), + TPARGS(q, rq)); DECLARE_TRACE(block_rq_insert, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); + TPPROTO(struct request_queue *q, struct request *rq), + TPARGS(q, rq)); DECLARE_TRACE(block_rq_issue, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); + TPPROTO(struct request_queue *q, struct request *rq), + TPARGS(q, rq)); DECLARE_TRACE(block_rq_requeue, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); + TPPROTO(struct request_queue *q, struct request *rq), + TPARGS(q, rq)); DECLARE_TRACE(block_rq_complete, - TP_PROTO(struct request_queue *q, struct request *rq), - TP_ARGS(q, rq)); + TPPROTO(struct request_queue *q, struct request *rq), + TPARGS(q, rq)); DECLARE_TRACE(block_bio_bounce, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); + TPPROTO(struct request_queue *q, struct bio *bio), + TPARGS(q, bio)); DECLARE_TRACE(block_bio_complete, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); + TPPROTO(struct request_queue *q, struct bio *bio), + TPARGS(q, bio)); DECLARE_TRACE(block_bio_backmerge, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); + TPPROTO(struct request_queue *q, struct bio *bio), + TPARGS(q, bio)); DECLARE_TRACE(block_bio_frontmerge, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); + TPPROTO(struct request_queue *q, struct bio *bio), + TPARGS(q, bio)); DECLARE_TRACE(block_bio_queue, - TP_PROTO(struct request_queue *q, struct bio *bio), - TP_ARGS(q, bio)); + TPPROTO(struct request_queue *q, struct bio *bio), + TPARGS(q, bio)); DECLARE_TRACE(block_getrq, - TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - TP_ARGS(q, bio, rw)); + TPPROTO(struct request_queue *q, struct bio *bio, int rw), + TPARGS(q, bio, rw)); DECLARE_TRACE(block_sleeprq, - TP_PROTO(struct request_queue *q, struct bio *bio, int rw), - TP_ARGS(q, bio, rw)); + TPPROTO(struct request_queue *q, struct bio *bio, int rw), + TPARGS(q, bio, rw)); DECLARE_TRACE(block_plug, - TP_PROTO(struct request_queue *q), - TP_ARGS(q)); + TPPROTO(struct request_queue *q), + TPARGS(q)); DECLARE_TRACE(block_unplug_timer, - TP_PROTO(struct request_queue *q), - TP_ARGS(q)); + TPPROTO(struct request_queue *q), + TPARGS(q)); DECLARE_TRACE(block_unplug_io, - TP_PROTO(struct request_queue *q), - TP_ARGS(q)); + TPPROTO(struct request_queue *q), + TPARGS(q)); DECLARE_TRACE(block_split, - TP_PROTO(struct request_queue *q, struct bio *bio, unsigned int pdu), - TP_ARGS(q, bio, pdu)); + TPPROTO(struct request_queue *q, struct bio *bio, unsigned int pdu), + TPARGS(q, bio, pdu)); DECLARE_TRACE(block_remap, - TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev, - sector_t from, sector_t to), - TP_ARGS(q, bio, dev, from, to)); + TPPROTO(struct request_queue *q, struct bio *bio, dev_t dev, + sector_t from, sector_t to), + TPARGS(q, bio, dev, from, to)); #endif diff --git a/trunk/include/trace/irq_event_types.h b/trunk/include/trace/irq_event_types.h index 43bcb74dd49f..65850bc5ea06 100644 --- a/trunk/include/trace/irq_event_types.h +++ b/trunk/include/trace/irq_event_types.h @@ -8,36 +8,26 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM irq -/* - * Tracepoint for entry of interrupt handler: - */ -TRACE_FORMAT(irq_handler_entry, - TP_PROTO(int irq, struct irqaction *action), - TP_ARGS(irq, action), - TP_FMT("irq=%d handler=%s", irq, action->name) +TRACE_EVENT_FORMAT(irq_handler_entry, + TPPROTO(int irq, struct irqaction *action), + TPARGS(irq, action), + TPFMT("irq=%d handler=%s", irq, action->name), + TRACE_STRUCT( + TRACE_FIELD(int, irq, irq) + ), + TPRAWFMT("irq %d") ); -/* - * Tracepoint for return of an interrupt handler: - */ -TRACE_EVENT(irq_handler_exit, - - TP_PROTO(int irq, struct irqaction *action, int ret), - - TP_ARGS(irq, action, ret), - - TP_STRUCT__entry( - __field( int, irq ) - __field( int, ret ) +TRACE_EVENT_FORMAT(irq_handler_exit, + TPPROTO(int irq, struct irqaction *action, int ret), + TPARGS(irq, action, ret), + TPFMT("irq=%d handler=%s return=%s", + irq, action->name, ret ? "handled" : "unhandled"), + TRACE_STRUCT( + TRACE_FIELD(int, irq, irq) + TRACE_FIELD(int, ret, ret) ), - - TP_printk("irq=%d return=%s", - __entry->irq, __entry->ret ? "handled" : "unhandled"), - - TP_fast_assign( - __entry->irq = irq; - __entry->ret = ret; - ) -); + TPRAWFMT("irq %d ret %d") + ); #undef TRACE_SYSTEM diff --git a/trunk/include/trace/lockdep_event_types.h b/trunk/include/trace/lockdep_event_types.h index adccfcd2ec8f..f713d74a82b4 100644 --- a/trunk/include/trace/lockdep_event_types.h +++ b/trunk/include/trace/lockdep_event_types.h @@ -1,5 +1,5 @@ -#ifndef TRACE_FORMAT +#ifndef TRACE_EVENT_FORMAT # error Do not include this file directly. # error Unless you know what you are doing. #endif @@ -10,32 +10,32 @@ #ifdef CONFIG_LOCKDEP TRACE_FORMAT(lock_acquire, - TP_PROTO(struct lockdep_map *lock, unsigned int subclass, + TPPROTO(struct lockdep_map *lock, unsigned int subclass, int trylock, int read, int check, struct lockdep_map *next_lock, unsigned long ip), - TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip), - TP_FMT("%s%s%s", trylock ? "try " : "", + TPARGS(lock, subclass, trylock, read, check, next_lock, ip), + TPFMT("%s%s%s", trylock ? "try " : "", read ? "read " : "", lock->name) ); TRACE_FORMAT(lock_release, - TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip), - TP_ARGS(lock, nested, ip), - TP_FMT("%s", lock->name) + TPPROTO(struct lockdep_map *lock, int nested, unsigned long ip), + TPARGS(lock, nested, ip), + TPFMT("%s", lock->name) ); #ifdef CONFIG_LOCK_STAT TRACE_FORMAT(lock_contended, - TP_PROTO(struct lockdep_map *lock, unsigned long ip), - TP_ARGS(lock, ip), - TP_FMT("%s", lock->name) + TPPROTO(struct lockdep_map *lock, unsigned long ip), + TPARGS(lock, ip), + TPFMT("%s", lock->name) ); TRACE_FORMAT(lock_acquired, - TP_PROTO(struct lockdep_map *lock, unsigned long ip), - TP_ARGS(lock, ip), - TP_FMT("%s", lock->name) + TPPROTO(struct lockdep_map *lock, unsigned long ip), + TPARGS(lock, ip), + TPFMT("%s", lock->name) ); #endif diff --git a/trunk/include/trace/power.h b/trunk/include/trace/power.h index ef204666e983..38aca537e497 100644 --- a/trunk/include/trace/power.h +++ b/trunk/include/trace/power.h @@ -18,15 +18,15 @@ struct power_trace { }; DECLARE_TRACE(power_start, - TP_PROTO(struct power_trace *it, unsigned int type, unsigned int state), - TP_ARGS(it, type, state)); + TPPROTO(struct power_trace *it, unsigned int type, unsigned int state), + TPARGS(it, type, state)); DECLARE_TRACE(power_mark, - TP_PROTO(struct power_trace *it, unsigned int type, unsigned int state), - TP_ARGS(it, type, state)); + TPPROTO(struct power_trace *it, unsigned int type, unsigned int state), + TPARGS(it, type, state)); DECLARE_TRACE(power_end, - TP_PROTO(struct power_trace *it), - TP_ARGS(it)); + TPPROTO(struct power_trace *it), + TPARGS(it)); #endif /* _TRACE_POWER_H */ diff --git a/trunk/include/trace/sched_event_types.h b/trunk/include/trace/sched_event_types.h index fb37af672c88..a6de5c1601a0 100644 --- a/trunk/include/trace/sched_event_types.h +++ b/trunk/include/trace/sched_event_types.h @@ -1,6 +1,6 @@ /* use instead */ -#ifndef TRACE_EVENT +#ifndef TRACE_EVENT_FORMAT # error Do not include this file directly. # error Unless you know what you are doing. #endif @@ -8,330 +8,144 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM sched -/* - * Tracepoint for calling kthread_stop, performed to end a kthread: - */ -TRACE_EVENT(sched_kthread_stop, - - TP_PROTO(struct task_struct *t), - - TP_ARGS(t), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) +TRACE_EVENT_FORMAT(sched_kthread_stop, + TPPROTO(struct task_struct *t), + TPARGS(t), + TPFMT("task %s:%d", t->comm, t->pid), + TRACE_STRUCT( + TRACE_FIELD(pid_t, pid, t->pid) ), - - TP_printk("task %s:%d", __entry->comm, __entry->pid), - - TP_fast_assign( - memcpy(__entry->comm, t->comm, TASK_COMM_LEN); - __entry->pid = t->pid; - ) -); - -/* - * Tracepoint for the return value of the kthread stopping: - */ -TRACE_EVENT(sched_kthread_stop_ret, - - TP_PROTO(int ret), - - TP_ARGS(ret), - - TP_STRUCT__entry( - __field( int, ret ) + TPRAWFMT("task %d") + ); + +TRACE_EVENT_FORMAT(sched_kthread_stop_ret, + TPPROTO(int ret), + TPARGS(ret), + TPFMT("ret=%d", ret), + TRACE_STRUCT( + TRACE_FIELD(int, ret, ret) ), - - TP_printk("ret %d", __entry->ret), - - TP_fast_assign( - __entry->ret = ret; - ) -); - -/* - * Tracepoint for waiting on task to unschedule: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wait_task, - - TP_PROTO(struct rq *rq, struct task_struct *p), - - TP_ARGS(rq, p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) + TPRAWFMT("ret=%d") + ); + +TRACE_EVENT_FORMAT(sched_wait_task, + TPPROTO(struct rq *rq, struct task_struct *p), + TPARGS(rq, p), + TPFMT("task %s:%d", p->comm, p->pid), + TRACE_STRUCT( + TRACE_FIELD(pid_t, pid, p->pid) ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ) -); - -/* - * Tracepoint for waking up a task: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wakeup, - - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - - TP_ARGS(rq, p, success), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, success ) + TPRAWFMT("task %d") + ); + +TRACE_EVENT_FORMAT(sched_wakeup, + TPPROTO(struct rq *rq, struct task_struct *p, int success), + TPARGS(rq, p, success), + TPFMT("task %s:%d %s", + p->comm, p->pid, success ? "succeeded" : "failed"), + TRACE_STRUCT( + TRACE_FIELD(pid_t, pid, p->pid) + TRACE_FIELD(int, success, success) ), - - TP_printk("task %s:%d [%d] success=%d", - __entry->comm, __entry->pid, __entry->prio, - __entry->success), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->success = success; - ) -); - -/* - * Tracepoint for waking up a new task: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_wakeup_new, - - TP_PROTO(struct rq *rq, struct task_struct *p, int success), - - TP_ARGS(rq, p, success), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, success ) + TPRAWFMT("task %d success=%d") + ); + +TRACE_EVENT_FORMAT(sched_wakeup_new, + TPPROTO(struct rq *rq, struct task_struct *p, int success), + TPARGS(rq, p, success), + TPFMT("task %s:%d", + p->comm, p->pid, success ? "succeeded" : "failed"), + TRACE_STRUCT( + TRACE_FIELD(pid_t, pid, p->pid) + TRACE_FIELD(int, success, success) ), - - TP_printk("task %s:%d [%d] success=%d", - __entry->comm, __entry->pid, __entry->prio, - __entry->success), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->success = success; - ) -); - -/* - * Tracepoint for task switches, performed by the scheduler: - * - * (NOTE: the 'rq' argument is not used by generic trace events, - * but used by the latency tracer plugin. ) - */ -TRACE_EVENT(sched_switch, - - TP_PROTO(struct rq *rq, struct task_struct *prev, - struct task_struct *next), - - TP_ARGS(rq, prev, next), - - TP_STRUCT__entry( - __array( char, prev_comm, TASK_COMM_LEN ) - __field( pid_t, prev_pid ) - __field( int, prev_prio ) - __array( char, next_comm, TASK_COMM_LEN ) - __field( pid_t, next_pid ) - __field( int, next_prio ) + TPRAWFMT("task %d success=%d") + ); + +TRACE_EVENT_FORMAT(sched_switch, + TPPROTO(struct rq *rq, struct task_struct *prev, + struct task_struct *next), + TPARGS(rq, prev, next), + TPFMT("task %s:%d ==> %s:%d", + prev->comm, prev->pid, next->comm, next->pid), + TRACE_STRUCT( + TRACE_FIELD(pid_t, prev_pid, prev->pid) + TRACE_FIELD(int, prev_prio, prev->prio) + TRACE_FIELD_SPECIAL(char next_comm[TASK_COMM_LEN], + next_comm, + TPCMD(memcpy(TRACE_ENTRY->next_comm, + next->comm, + TASK_COMM_LEN))) + TRACE_FIELD(pid_t, next_pid, next->pid) + TRACE_FIELD(int, next_prio, next->prio) ), - - TP_printk("task %s:%d [%d] ==> %s:%d [%d]", - __entry->prev_comm, __entry->prev_pid, __entry->prev_prio, - __entry->next_comm, __entry->next_pid, __entry->next_prio), - - TP_fast_assign( - memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN); - __entry->prev_pid = prev->pid; - __entry->prev_prio = prev->prio; - memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN); - __entry->next_pid = next->pid; - __entry->next_prio = next->prio; - ) -); - -/* - * Tracepoint for a task being migrated: - */ -TRACE_EVENT(sched_migrate_task, - - TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu), - - TP_ARGS(p, orig_cpu, dest_cpu), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) - __field( int, orig_cpu ) - __field( int, dest_cpu ) + TPRAWFMT("prev %d:%d ==> next %s:%d:%d") + ); + +TRACE_EVENT_FORMAT(sched_migrate_task, + TPPROTO(struct task_struct *p, int orig_cpu, int dest_cpu), + TPARGS(p, orig_cpu, dest_cpu), + TPFMT("task %s:%d from: %d to: %d", + p->comm, p->pid, orig_cpu, dest_cpu), + TRACE_STRUCT( + TRACE_FIELD(pid_t, pid, p->pid) + TRACE_FIELD(int, orig_cpu, orig_cpu) + TRACE_FIELD(int, dest_cpu, dest_cpu) ), - - TP_printk("task %s:%d [%d] from: %d to: %d", - __entry->comm, __entry->pid, __entry->prio, - __entry->orig_cpu, __entry->dest_cpu), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - __entry->orig_cpu = orig_cpu; - __entry->dest_cpu = dest_cpu; - ) -); - -/* - * Tracepoint for freeing a task: - */ -TRACE_EVENT(sched_process_free, - - TP_PROTO(struct task_struct *p), - - TP_ARGS(p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) + TPRAWFMT("task %d from: %d to: %d") + ); + +TRACE_EVENT_FORMAT(sched_process_free, + TPPROTO(struct task_struct *p), + TPARGS(p), + TPFMT("task %s:%d", p->comm, p->pid), + TRACE_STRUCT( + TRACE_FIELD(pid_t, pid, p->pid) ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ) -); - -/* - * Tracepoint for a task exiting: - */ -TRACE_EVENT(sched_process_exit, - - TP_PROTO(struct task_struct *p), - - TP_ARGS(p), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) + TPRAWFMT("task %d") + ); + +TRACE_EVENT_FORMAT(sched_process_exit, + TPPROTO(struct task_struct *p), + TPARGS(p), + TPFMT("task %s:%d", p->comm, p->pid), + TRACE_STRUCT( + TRACE_FIELD(pid_t, pid, p->pid) ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->prio = p->prio; - ) -); - -/* - * Tracepoint for a waiting task: - */ -TRACE_EVENT(sched_process_wait, - - TP_PROTO(struct pid *pid), - - TP_ARGS(pid), - - TP_STRUCT__entry( - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) - __field( int, prio ) + TPRAWFMT("task %d") + ); + +TRACE_EVENT_FORMAT(sched_process_wait, + TPPROTO(struct pid *pid), + TPARGS(pid), + TPFMT("pid %d", pid_nr(pid)), + TRACE_STRUCT( + TRACE_FIELD(pid_t, pid, pid_nr(pid)) ), - - TP_printk("task %s:%d [%d]", - __entry->comm, __entry->pid, __entry->prio), - - TP_fast_assign( - memcpy(__entry->comm, current->comm, TASK_COMM_LEN); - __entry->pid = pid_nr(pid); - __entry->prio = current->prio; - ) -); - -/* - * Tracepoint for do_fork: - */ -TRACE_EVENT(sched_process_fork, - - TP_PROTO(struct task_struct *parent, struct task_struct *child), - - TP_ARGS(parent, child), - - TP_STRUCT__entry( - __array( char, parent_comm, TASK_COMM_LEN ) - __field( pid_t, parent_pid ) - __array( char, child_comm, TASK_COMM_LEN ) - __field( pid_t, child_pid ) + TPRAWFMT("task %d") + ); + +TRACE_EVENT_FORMAT(sched_process_fork, + TPPROTO(struct task_struct *parent, struct task_struct *child), + TPARGS(parent, child), + TPFMT("parent %s:%d child %s:%d", + parent->comm, parent->pid, child->comm, child->pid), + TRACE_STRUCT( + TRACE_FIELD(pid_t, parent, parent->pid) + TRACE_FIELD(pid_t, child, child->pid) ), - - TP_printk("parent %s:%d child %s:%d", - __entry->parent_comm, __entry->parent_pid, - __entry->child_comm, __entry->child_pid), - - TP_fast_assign( - memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN); - __entry->parent_pid = parent->pid; - memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN); - __entry->child_pid = child->pid; - ) -); - -/* - * Tracepoint for sending a signal: - */ -TRACE_EVENT(sched_signal_send, - - TP_PROTO(int sig, struct task_struct *p), - - TP_ARGS(sig, p), - - TP_STRUCT__entry( - __field( int, sig ) - __array( char, comm, TASK_COMM_LEN ) - __field( pid_t, pid ) + TPRAWFMT("parent %d child %d") + ); + +TRACE_EVENT_FORMAT(sched_signal_send, + TPPROTO(int sig, struct task_struct *p), + TPARGS(sig, p), + TPFMT("sig: %d task %s:%d", sig, p->comm, p->pid), + TRACE_STRUCT( + TRACE_FIELD(int, sig, sig) + TRACE_FIELD(pid_t, pid, p->pid) ), - - TP_printk("sig: %d task %s:%d", - __entry->sig, __entry->comm, __entry->pid), - - TP_fast_assign( - memcpy(__entry->comm, p->comm, TASK_COMM_LEN); - __entry->pid = p->pid; - __entry->sig = sig; - ) -); + TPRAWFMT("sig: %d task %d") + ); #undef TRACE_SYSTEM diff --git a/trunk/include/trace/workqueue.h b/trunk/include/trace/workqueue.h index 7626523deeba..867829df4571 100644 --- a/trunk/include/trace/workqueue.h +++ b/trunk/include/trace/workqueue.h @@ -6,20 +6,20 @@ #include DECLARE_TRACE(workqueue_insertion, - TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), - TP_ARGS(wq_thread, work)); + TPPROTO(struct task_struct *wq_thread, struct work_struct *work), + TPARGS(wq_thread, work)); DECLARE_TRACE(workqueue_execution, - TP_PROTO(struct task_struct *wq_thread, struct work_struct *work), - TP_ARGS(wq_thread, work)); + TPPROTO(struct task_struct *wq_thread, struct work_struct *work), + TPARGS(wq_thread, work)); /* Trace the creation of one workqueue thread on a cpu */ DECLARE_TRACE(workqueue_creation, - TP_PROTO(struct task_struct *wq_thread, int cpu), - TP_ARGS(wq_thread, cpu)); + TPPROTO(struct task_struct *wq_thread, int cpu), + TPARGS(wq_thread, cpu)); DECLARE_TRACE(workqueue_destruction, - TP_PROTO(struct task_struct *wq_thread), - TP_ARGS(wq_thread)); + TPPROTO(struct task_struct *wq_thread), + TPARGS(wq_thread)); #endif /* __TRACE_WORKQUEUE_H */ diff --git a/trunk/kernel/module.c b/trunk/kernel/module.c index 8b742f2b3845..90a6d63d9211 100644 --- a/trunk/kernel/module.c +++ b/trunk/kernel/module.c @@ -381,7 +381,7 @@ static void *percpu_modalloc(unsigned long size, unsigned long align, align = PAGE_SIZE; } - ptr = __alloc_reserved_percpu(size, align); + ptr = __alloc_percpu(size, align); if (!ptr) printk(KERN_WARNING "Could not allocate %lu bytes percpu data\n", size); diff --git a/trunk/kernel/trace/events.c b/trunk/kernel/trace/events.c index 9fc918da404f..f2509cbaacea 100644 --- a/trunk/kernel/trace/events.c +++ b/trunk/kernel/trace/events.c @@ -2,7 +2,9 @@ * This is the place to register all trace points as events. */ -#include +/* someday this needs to go in a generic header */ +#define __STR(x) #x +#define STR(x) __STR(x) #include diff --git a/trunk/kernel/trace/trace.h b/trunk/kernel/trace/trace.h index c5e1d8865fe4..2bfb7d11fc17 100644 --- a/trunk/kernel/trace/trace.h +++ b/trunk/kernel/trace/trace.h @@ -751,7 +751,12 @@ struct ftrace_event_call { int (*regfunc)(void); void (*unregfunc)(void); int id; + struct dentry *raw_dir; + int raw_enabled; + int type; int (*raw_init)(void); + int (*raw_reg)(void); + void (*raw_unreg)(void); int (*show_format)(struct trace_seq *s); }; diff --git a/trunk/kernel/trace/trace_clock.c b/trunk/kernel/trace/trace_clock.c index 05b176abfd30..2d4953f93560 100644 --- a/trunk/kernel/trace/trace_clock.c +++ b/trunk/kernel/trace/trace_clock.c @@ -27,19 +27,12 @@ */ u64 notrace trace_clock_local(void) { - unsigned long flags; - u64 clock; - /* * sched_clock() is an architecture implemented, fast, scalable, * lockless clock. It is not guaranteed to be coherent across * CPUs, nor across CPU idle events. */ - raw_local_irq_save(flags); - clock = sched_clock(); - raw_local_irq_restore(flags); - - return clock; + return sched_clock(); } /* diff --git a/trunk/kernel/trace/trace_event_types.h b/trunk/kernel/trace/trace_event_types.h index 5cca4c978bde..fb4eba166433 100644 --- a/trunk/kernel/trace/trace_event_types.h +++ b/trunk/kernel/trace/trace_event_types.h @@ -10,7 +10,7 @@ TRACE_EVENT_FORMAT(function, TRACE_FN, ftrace_entry, ignore, TRACE_FIELD(unsigned long, ip, ip) TRACE_FIELD(unsigned long, parent_ip, parent_ip) ), - TP_RAW_FMT(" %lx <-- %lx") + TPRAWFMT(" %lx <-- %lx") ); TRACE_EVENT_FORMAT(funcgraph_entry, TRACE_GRAPH_ENT, @@ -19,7 +19,7 @@ TRACE_EVENT_FORMAT(funcgraph_entry, TRACE_GRAPH_ENT, TRACE_FIELD(unsigned long, graph_ent.func, func) TRACE_FIELD(int, graph_ent.depth, depth) ), - TP_RAW_FMT("--> %lx (%d)") + TPRAWFMT("--> %lx (%d)") ); TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET, @@ -28,7 +28,7 @@ TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET, TRACE_FIELD(unsigned long, ret.func, func) TRACE_FIELD(int, ret.depth, depth) ), - TP_RAW_FMT("<-- %lx (%d)") + TPRAWFMT("<-- %lx (%d)") ); TRACE_EVENT_FORMAT(wakeup, TRACE_WAKE, ctx_switch_entry, ignore, @@ -41,7 +41,7 @@ TRACE_EVENT_FORMAT(wakeup, TRACE_WAKE, ctx_switch_entry, ignore, TRACE_FIELD(unsigned char, next_state, next_state) TRACE_FIELD(unsigned int, next_cpu, next_cpu) ), - TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]") + TPRAWFMT("%u:%u:%u ==+ %u:%u:%u [%03u]") ); TRACE_EVENT_FORMAT(context_switch, TRACE_CTX, ctx_switch_entry, ignore, @@ -54,7 +54,7 @@ TRACE_EVENT_FORMAT(context_switch, TRACE_CTX, ctx_switch_entry, ignore, TRACE_FIELD(unsigned char, next_state, next_state) TRACE_FIELD(unsigned int, next_cpu, next_cpu) ), - TP_RAW_FMT("%u:%u:%u ==+ %u:%u:%u [%03u]") + TPRAWFMT("%u:%u:%u ==+ %u:%u:%u [%03u]") ); TRACE_EVENT_FORMAT(special, TRACE_SPECIAL, special_entry, ignore, @@ -63,7 +63,7 @@ TRACE_EVENT_FORMAT(special, TRACE_SPECIAL, special_entry, ignore, TRACE_FIELD(unsigned long, arg2, arg2) TRACE_FIELD(unsigned long, arg3, arg3) ), - TP_RAW_FMT("(%08lx) (%08lx) (%08lx)") + TPRAWFMT("(%08lx) (%08lx) (%08lx)") ); /* @@ -83,7 +83,7 @@ TRACE_EVENT_FORMAT(kernel_stack, TRACE_STACK, stack_entry, ignore, TRACE_FIELD(unsigned long, caller[6], stack6) TRACE_FIELD(unsigned long, caller[7], stack7) ), - TP_RAW_FMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" + TPRAWFMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n") ); @@ -98,7 +98,7 @@ TRACE_EVENT_FORMAT(user_stack, TRACE_USER_STACK, userstack_entry, ignore, TRACE_FIELD(unsigned long, caller[6], stack6) TRACE_FIELD(unsigned long, caller[7], stack7) ), - TP_RAW_FMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" + TPRAWFMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n" "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n") ); @@ -106,10 +106,9 @@ TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore, TRACE_STRUCT( TRACE_FIELD(unsigned long, ip, ip) TRACE_FIELD(unsigned int, depth, depth) - TRACE_FIELD(char *, fmt, fmt) TRACE_FIELD_ZERO_CHAR(buf) ), - TP_RAW_FMT("%08lx (%d) fmt:%p %s") + TPRAWFMT("%08lx (%d) %s") ); TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore, @@ -119,7 +118,7 @@ TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore, TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file, file) TRACE_FIELD(char, correct, correct) ), - TP_RAW_FMT("%u:%s:%s (%u)") + TPRAWFMT("%u:%s:%s (%u)") ); TRACE_EVENT_FORMAT(hw_branch, TRACE_HW_BRANCHES, hw_branch_entry, ignore, @@ -127,7 +126,7 @@ TRACE_EVENT_FORMAT(hw_branch, TRACE_HW_BRANCHES, hw_branch_entry, ignore, TRACE_FIELD(u64, from, from) TRACE_FIELD(u64, to, to) ), - TP_RAW_FMT("from: %llx to: %llx") + TPRAWFMT("from: %llx to: %llx") ); TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore, @@ -137,7 +136,7 @@ TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore, TRACE_FIELD(int, state_data.type, type) TRACE_FIELD(int, state_data.state, state) ), - TP_RAW_FMT("%llx->%llx type:%u state:%u") + TPRAWFMT("%llx->%llx type:%u state:%u") ); TRACE_EVENT_FORMAT(kmem_alloc, TRACE_KMEM_ALLOC, kmemtrace_alloc_entry, ignore, @@ -150,7 +149,7 @@ TRACE_EVENT_FORMAT(kmem_alloc, TRACE_KMEM_ALLOC, kmemtrace_alloc_entry, ignore, TRACE_FIELD(gfp_t, gfp_flags, gfp_flags) TRACE_FIELD(int, node, node) ), - TP_RAW_FMT("type:%u call_site:%lx ptr:%p req:%lu alloc:%lu" + TPRAWFMT("type:%u call_site:%lx ptr:%p req:%lu alloc:%lu" " flags:%x node:%d") ); @@ -160,7 +159,7 @@ TRACE_EVENT_FORMAT(kmem_free, TRACE_KMEM_FREE, kmemtrace_free_entry, ignore, TRACE_FIELD(unsigned long, call_site, call_site) TRACE_FIELD(const void *, ptr, ptr) ), - TP_RAW_FMT("type:%u call_site:%lx ptr:%p") + TPRAWFMT("type:%u call_site:%lx ptr:%p") ); #undef TRACE_SYSTEM diff --git a/trunk/kernel/trace/trace_events.c b/trunk/kernel/trace/trace_events.c index 1880a6438097..4488d90e75ef 100644 --- a/trunk/kernel/trace/trace_events.c +++ b/trunk/kernel/trace/trace_events.c @@ -59,12 +59,22 @@ static void ftrace_event_enable_disable(struct ftrace_event_call *call, call->enabled = 0; call->unregfunc(); } + if (call->raw_enabled) { + call->raw_enabled = 0; + call->raw_unreg(); + } break; case 1: - if (!call->enabled) { + if (!call->enabled && + (call->type & TRACE_EVENT_TYPE_PRINTF)) { call->enabled = 1; call->regfunc(); } + if (!call->raw_enabled && + (call->type & TRACE_EVENT_TYPE_RAW)) { + call->raw_enabled = 1; + call->raw_reg(); + } break; } } @@ -290,7 +300,7 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, struct ftrace_event_call *call = filp->private_data; char *buf; - if (call->enabled) + if (call->enabled || call->raw_enabled) buf = "1\n"; else buf = "0\n"; @@ -336,10 +346,110 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, return cnt; } +static ssize_t +event_type_read(struct file *filp, char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct ftrace_event_call *call = filp->private_data; + char buf[16]; + int r = 0; + + if (call->type & TRACE_EVENT_TYPE_PRINTF) + r += sprintf(buf, "printf\n"); + + if (call->type & TRACE_EVENT_TYPE_RAW) + r += sprintf(buf+r, "raw\n"); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static ssize_t +event_type_write(struct file *filp, const char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct ftrace_event_call *call = filp->private_data; + char buf[64]; + + /* + * If there's only one type, we can't change it. + * And currently we always have printf type, and we + * may or may not have raw type. + * + * This is a redundant check, the file should be read + * only if this is the case anyway. + */ + + if (!call->raw_init) + return -EPERM; + + if (cnt >= sizeof(buf)) + return -EINVAL; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + + if (!strncmp(buf, "printf", 6) && + (!buf[6] || isspace(buf[6]))) { + + call->type = TRACE_EVENT_TYPE_PRINTF; + + /* + * If raw enabled, the disable it and enable + * printf type. + */ + if (call->raw_enabled) { + call->raw_enabled = 0; + call->raw_unreg(); + + call->enabled = 1; + call->regfunc(); + } + + } else if (!strncmp(buf, "raw", 3) && + (!buf[3] || isspace(buf[3]))) { + + call->type = TRACE_EVENT_TYPE_RAW; + + /* + * If printf enabled, the disable it and enable + * raw type. + */ + if (call->enabled) { + call->enabled = 0; + call->unregfunc(); + + call->raw_enabled = 1; + call->raw_reg(); + } + } else + return -EINVAL; + + *ppos += cnt; + + return cnt; +} + +static ssize_t +event_available_types_read(struct file *filp, char __user *ubuf, size_t cnt, + loff_t *ppos) +{ + struct ftrace_event_call *call = filp->private_data; + char buf[16]; + int r = 0; + + r += sprintf(buf, "printf\n"); + + if (call->raw_init) + r += sprintf(buf+r, "raw\n"); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + #undef FIELD -#define FIELD(type, name) \ - #type, #name, (unsigned int)offsetof(typeof(field), name), \ - (unsigned int)sizeof(field.name) +#define FIELD(type, name) \ + #type, #name, offsetof(typeof(field), name), sizeof(field.name) static int trace_write_header(struct trace_seq *s) { @@ -347,11 +457,11 @@ static int trace_write_header(struct trace_seq *s) /* struct trace_entry */ return trace_seq_printf(s, - "\tfield:%s %s;\toffset:%u;\tsize:%u;\n" - "\tfield:%s %s;\toffset:%u;\tsize:%u;\n" - "\tfield:%s %s;\toffset:%u;\tsize:%u;\n" - "\tfield:%s %s;\toffset:%u;\tsize:%u;\n" - "\tfield:%s %s;\toffset:%u;\tsize:%u;\n" + "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n" + "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n" + "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n" + "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n" + "\tfield:%s %s;\toffset:%lu;\tsize:%lu;\n" "\n", FIELD(unsigned char, type), FIELD(unsigned char, flags), @@ -359,7 +469,6 @@ static int trace_write_header(struct trace_seq *s) FIELD(int, pid), FIELD(int, tgid)); } - static ssize_t event_format_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos) @@ -417,6 +526,13 @@ static const struct seq_operations show_set_event_seq_ops = { .stop = t_stop, }; +static const struct file_operations ftrace_avail_fops = { + .open = ftrace_event_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + static const struct file_operations ftrace_set_event_fops = { .open = ftrace_event_seq_open, .read = seq_read, @@ -431,6 +547,17 @@ static const struct file_operations ftrace_enable_fops = { .write = event_enable_write, }; +static const struct file_operations ftrace_type_fops = { + .open = tracing_open_generic, + .read = event_type_read, + .write = event_type_write, +}; + +static const struct file_operations ftrace_available_types_fops = { + .open = tracing_open_generic, + .read = event_available_types_read, +}; + static const struct file_operations ftrace_event_format_fops = { .open = tracing_open_generic, .read = event_format_read, @@ -519,6 +646,9 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) } } + /* default the output to printf */ + call->type = TRACE_EVENT_TYPE_PRINTF; + call->dir = debugfs_create_dir(call->name, d_events); if (!call->dir) { pr_warning("Could not create debugfs " @@ -534,6 +664,21 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events) "'%s/enable' entry\n", call->name); } + /* Only let type be writable, if we can change it */ + entry = debugfs_create_file("type", + call->raw_init ? 0644 : 0444, + call->dir, call, + &ftrace_type_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'%s/type' entry\n", call->name); + + entry = debugfs_create_file("available_types", 0444, call->dir, call, + &ftrace_available_types_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'%s/available_types' entry\n", call->name); + /* A trace may not want to export its format */ if (!call->show_format) return 0; @@ -558,6 +703,13 @@ static __init int event_trace_init(void) if (!d_tracer) return 0; + entry = debugfs_create_file("available_events", 0444, d_tracer, + (void *)&show_event_seq_ops, + &ftrace_avail_fops); + if (!entry) + pr_warning("Could not create debugfs " + "'available_events' entry\n"); + entry = debugfs_create_file("set_event", 0644, d_tracer, (void *)&show_set_event_seq_ops, &ftrace_set_event_fops); diff --git a/trunk/kernel/trace/trace_events_stage_1.h b/trunk/kernel/trace/trace_events_stage_1.h index 15e9bf965a18..3830a731424c 100644 --- a/trunk/kernel/trace/trace_events_stage_1.h +++ b/trunk/kernel/trace/trace_events_stage_1.h @@ -17,21 +17,20 @@ #undef TRACE_FORMAT #define TRACE_FORMAT(call, proto, args, fmt) -#undef __array -#define __array(type, item, len) type item[len]; - -#undef __field -#define __field(type, item) type item; +#undef TRACE_EVENT_FORMAT +#define TRACE_EVENT_FORMAT(name, proto, args, fmt, tstruct, tpfmt) \ + struct ftrace_raw_##name { \ + struct trace_entry ent; \ + tstruct \ + }; \ + static struct ftrace_event_call event_##name -#undef TP_STRUCT__entry -#define TP_STRUCT__entry(args...) args +#undef TRACE_STRUCT +#define TRACE_STRUCT(args...) args -#undef TRACE_EVENT -#define TRACE_EVENT(name, proto, args, tstruct, print, assign) \ - struct ftrace_raw_##name { \ - struct trace_entry ent; \ - tstruct \ - }; \ - static struct ftrace_event_call event_##name +#define TRACE_FIELD(type, item, assign) \ + type item; +#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ + type_item; #include diff --git a/trunk/kernel/trace/trace_events_stage_2.h b/trunk/kernel/trace/trace_events_stage_2.h index d91bf4c56661..d24a97e74aea 100644 --- a/trunk/kernel/trace/trace_events_stage_2.h +++ b/trunk/kernel/trace/trace_events_stage_2.h @@ -20,7 +20,7 @@ * * field = (typeof(field))entry; * - * ret = trace_seq_printf(s, "%s", "\n"); + * ret = trace_seq_printf(s, "%s", "\n"); * if (!ret) * return TRACE_TYPE_PARTIAL_LINE; * @@ -32,14 +32,23 @@ * in binary. */ -#undef __entry -#define __entry field +#undef TRACE_STRUCT +#define TRACE_STRUCT(args...) args -#undef TP_printk -#define TP_printk(fmt, args...) fmt "\n", args +#undef TRACE_FIELD +#define TRACE_FIELD(type, item, assign) \ + field->item, -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, print, assign) \ +#undef TRACE_FIELD_SPECIAL +#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ + field->item, + + +#undef TPRAWFMT +#define TPRAWFMT(args...) args + +#undef TRACE_EVENT_FORMAT +#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ enum print_line_t \ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ { \ @@ -57,76 +66,14 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ \ field = (typeof(field))entry; \ \ - ret = trace_seq_printf(s, print); \ + ret = trace_seq_printf(s, tpfmt "%s", tstruct "\n"); \ if (!ret) \ return TRACE_TYPE_PARTIAL_LINE; \ \ return TRACE_TYPE_HANDLED; \ } - -#include - -/* - * Setup the showing format of trace point. - * - * int - * ftrace_format_##call(struct trace_seq *s) - * { - * struct ftrace_raw_##call field; - * int ret; - * - * ret = trace_seq_printf(s, #type " " #item ";" - * " size:%d; offset:%d;\n", - * sizeof(field.type), - * offsetof(struct ftrace_raw_##call, - * item)); - * - * } - */ - -#undef TP_STRUCT__entry -#define TP_STRUCT__entry(args...) args -#undef __field -#define __field(type, item) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%u;\tsize:%u;\n", \ - (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ - if (!ret) \ - return 0; - -#undef __array -#define __array(type, item, len) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t" \ - "offset:%u;\tsize:%u;\n", \ - (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ - if (!ret) \ - return 0; - -#undef __entry -#define __entry "REC" - -#undef TP_printk -#define TP_printk(fmt, args...) "%s, %s\n", #fmt, #args - -#undef TP_fast_assign -#define TP_fast_assign(args...) args - -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, print, func) \ -static int \ -ftrace_format_##call(struct trace_seq *s) \ -{ \ - struct ftrace_raw_##call field; \ - int ret; \ - \ - tstruct; \ - \ - trace_seq_printf(s, "\nprint fmt: " print); \ - \ - return ret; \ -} +#include +#include "trace_format.h" #include diff --git a/trunk/kernel/trace/trace_events_stage_3.h b/trunk/kernel/trace/trace_events_stage_3.h index 3ba55d4ab073..2c8d76c7dbed 100644 --- a/trunk/kernel/trace/trace_events_stage_3.h +++ b/trunk/kernel/trace/trace_events_stage_3.h @@ -35,7 +35,7 @@ * } * * - * For those macros defined with TRACE_EVENT: + * For those macros defined with TRACE_EVENT_FORMAT: * * static struct ftrace_event_call event_; * @@ -106,8 +106,8 @@ * */ -#undef TP_FMT -#define TP_FMT(fmt, args...) fmt "\n", ##args +#undef TPFMT +#define TPFMT(fmt, args...) fmt "\n", ##args #define _TRACE_FORMAT(call, proto, args, fmt) \ static void ftrace_event_##call(proto) \ @@ -139,16 +139,32 @@ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ .name = #call, \ - .system = __stringify(TRACE_SYSTEM), \ + .system = STR(TRACE_SYSTEM), \ .regfunc = ftrace_reg_event_##call, \ .unregfunc = ftrace_unreg_event_##call, \ } -#undef __entry -#define __entry entry +#undef TRACE_FIELD +#define TRACE_FIELD(type, item, assign)\ + entry->item = assign; -#undef TRACE_EVENT -#define TRACE_EVENT(call, proto, args, tstruct, print, assign) \ +#undef TRACE_FIELD +#define TRACE_FIELD(type, item, assign)\ + entry->item = assign; + +#undef TPCMD +#define TPCMD(cmd...) cmd + +#undef TRACE_ENTRY +#define TRACE_ENTRY entry + +#undef TRACE_FIELD_SPECIAL +#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ + cmd; + +#undef TRACE_EVENT_FORMAT +#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ +_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt)) \ \ static struct ftrace_event_call event_##call; \ \ @@ -169,7 +185,7 @@ static void ftrace_raw_event_##call(proto) \ return; \ entry = ring_buffer_event_data(event); \ \ - assign; \ + tstruct; \ \ trace_current_buffer_unlock_commit(event, irq_flags, pc); \ } \ @@ -209,9 +225,11 @@ static struct ftrace_event_call __used \ __attribute__((__aligned__(4))) \ __attribute__((section("_ftrace_events"))) event_##call = { \ .name = #call, \ - .system = __stringify(TRACE_SYSTEM), \ + .system = STR(TRACE_SYSTEM), \ + .regfunc = ftrace_reg_event_##call, \ + .unregfunc = ftrace_unreg_event_##call, \ .raw_init = ftrace_raw_init_event_##call, \ - .regfunc = ftrace_raw_reg_event_##call, \ - .unregfunc = ftrace_raw_unreg_event_##call, \ + .raw_reg = ftrace_raw_reg_event_##call, \ + .raw_unreg = ftrace_raw_unreg_event_##call, \ .show_format = ftrace_format_##call, \ } diff --git a/trunk/kernel/trace/trace_export.c b/trunk/kernel/trace/trace_export.c index 23ae78430d58..0fb7be73e31c 100644 --- a/trunk/kernel/trace/trace_export.c +++ b/trunk/kernel/trace/trace_export.c @@ -15,40 +15,19 @@ #include "trace_output.h" - -#undef TRACE_STRUCT -#define TRACE_STRUCT(args...) args - -#undef TRACE_FIELD -#define TRACE_FIELD(type, item, assign) \ - ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ - "offset:%u;\tsize:%u;\n", \ - (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ - if (!ret) \ - return 0; - - -#undef TRACE_FIELD_SPECIAL -#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ - ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \ - "offset:%u;\tsize:%u;\n", \ - (unsigned int)offsetof(typeof(field), item), \ - (unsigned int)sizeof(field.item)); \ - if (!ret) \ - return 0; +#include "trace_format.h" #undef TRACE_FIELD_ZERO_CHAR -#define TRACE_FIELD_ZERO_CHAR(item) \ - ret = trace_seq_printf(s, "\tfield: char " #item ";\t" \ - "offset:%u;\tsize:0;\n", \ - (unsigned int)offsetof(typeof(field), item)); \ - if (!ret) \ +#define TRACE_FIELD_ZERO_CHAR(item) \ + ret = trace_seq_printf(s, "\tfield: char " #item ";\t" \ + "offset:%lu;\tsize:0;\n", \ + offsetof(typeof(field), item)); \ + if (!ret) \ return 0; -#undef TP_RAW_FMT -#define TP_RAW_FMT(args...) args +#undef TPRAWFMT +#define TPRAWFMT(args...) args #undef TRACE_EVENT_FORMAT #define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ @@ -78,8 +57,8 @@ ftrace_format_##call(struct trace_seq *s) \ #define TRACE_FIELD(type, item, assign)\ entry->item = assign; -#undef TP_CMD -#define TP_CMD(cmd...) cmd +#undef TPCMD +#define TPCMD(cmd...) cmd #undef TRACE_ENTRY #define TRACE_ENTRY entry diff --git a/trunk/kernel/trace/trace_format.h b/trunk/kernel/trace/trace_format.h new file mode 100644 index 000000000000..03f9a4c165ca --- /dev/null +++ b/trunk/kernel/trace/trace_format.h @@ -0,0 +1,55 @@ +/* + * Setup the showing format of trace point. + * + * int + * ftrace_format_##call(struct trace_seq *s) + * { + * struct ftrace_raw_##call field; + * int ret; + * + * ret = trace_seq_printf(s, #type " " #item ";" + * " size:%d; offset:%d;\n", + * sizeof(field.type), + * offsetof(struct ftrace_raw_##call, + * item)); + * + * } + */ + +#undef TRACE_STRUCT +#define TRACE_STRUCT(args...) args + +#undef TRACE_FIELD +#define TRACE_FIELD(type, item, assign) \ + ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t" \ + "offset:%lu;\tsize:%lu;\n", \ + offsetof(typeof(field), item), \ + sizeof(field.item)); \ + if (!ret) \ + return 0; + + +#undef TRACE_FIELD_SPECIAL +#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \ + ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t" \ + "offset:%lu;\tsize:%lu;\n", \ + offsetof(typeof(field), item), \ + sizeof(field.item)); \ + if (!ret) \ + return 0; + +#undef TRACE_EVENT_FORMAT +#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt) \ +static int \ +ftrace_format_##call(struct trace_seq *s) \ +{ \ + struct ftrace_raw_##call field; \ + int ret; \ + \ + tstruct; \ + \ + trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt); \ + \ + return ret; \ +} + diff --git a/trunk/kernel/trace/trace_functions_graph.c b/trunk/kernel/trace/trace_functions_graph.c index 35257be6a9d6..453ebd3b636e 100644 --- a/trunk/kernel/trace/trace_functions_graph.c +++ b/trunk/kernel/trace/trace_functions_graph.c @@ -837,7 +837,7 @@ static void graph_trace_open(struct trace_iterator *iter) static void graph_trace_close(struct trace_iterator *iter) { - free_percpu(iter->private); + percpu_free(iter->private); } static struct tracer graph_trace __read_mostly = { diff --git a/trunk/kernel/trace/trace_selftest.c b/trunk/kernel/trace/trace_selftest.c index f907a2b29028..7238646b8723 100644 --- a/trunk/kernel/trace/trace_selftest.c +++ b/trunk/kernel/trace/trace_selftest.c @@ -1,6 +1,5 @@ /* Include in trace.c */ -#include #include #include @@ -101,6 +100,9 @@ static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret) #ifdef CONFIG_DYNAMIC_FTRACE +#define __STR(x) #x +#define STR(x) __STR(x) + /* Test dynamic code modification and ftrace filters */ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, struct trace_array *tr, @@ -128,7 +130,7 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, * start of the function names. We simply put a '*' to * accommodate them. */ - func_name = "*" __stringify(DYN_FTRACE_TEST_NAME); + func_name = "*" STR(DYN_FTRACE_TEST_NAME); /* filter only on our function */ ftrace_set_filter(func_name, strlen(func_name), 1); diff --git a/trunk/kernel/trace/trace_workqueue.c b/trunk/kernel/trace/trace_workqueue.c index 739fdacf873b..46c8dc896bd3 100644 --- a/trunk/kernel/trace/trace_workqueue.c +++ b/trunk/kernel/trace/trace_workqueue.c @@ -91,7 +91,7 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu) struct cpu_workqueue_stats *cws; unsigned long flags; - WARN_ON(cpu < 0); + WARN_ON(cpu < 0 || cpu >= num_possible_cpus()); /* Workqueues are sometimes created in atomic context */ cws = kzalloc(sizeof(struct cpu_workqueue_stats), GFP_ATOMIC); @@ -175,12 +175,12 @@ static void *workqueue_stat_next(void *prev, int idx) spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags); if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) { spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); - do { - cpu = cpumask_next(cpu, cpu_possible_mask); - if (cpu >= nr_cpu_ids) - return NULL; - } while (!(ret = workqueue_stat_start_cpu(cpu))); - return ret; + for (++cpu ; cpu < num_possible_cpus(); cpu++) { + ret = workqueue_stat_start_cpu(cpu); + if (ret) + return ret; + } + return NULL; } spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags); diff --git a/trunk/lib/vsprintf.c b/trunk/lib/vsprintf.c index dc1674377009..25f01578c856 100644 --- a/trunk/lib/vsprintf.c +++ b/trunk/lib/vsprintf.c @@ -768,6 +768,7 @@ static char *pointer(const char *fmt, char *buf, char *end, void *ptr, static int format_decode(const char *fmt, struct printf_spec *spec) { const char *start = fmt; + bool sign = false; /* we finished early by reading the field width */ if (spec->type == FORMAT_TYPE_WITDH) { @@ -899,7 +900,7 @@ static int format_decode(const char *fmt, struct printf_spec *spec) case 'd': case 'i': - spec->flags |= SIGN; + sign = true; case 'u': break; @@ -911,7 +912,7 @@ static int format_decode(const char *fmt, struct printf_spec *spec) if (spec->qualifier == 'L') spec->type = FORMAT_TYPE_LONG_LONG; else if (spec->qualifier == 'l') { - if (spec->flags & SIGN) + if (sign) spec->type = FORMAT_TYPE_LONG; else spec->type = FORMAT_TYPE_ULONG; @@ -920,12 +921,12 @@ static int format_decode(const char *fmt, struct printf_spec *spec) } else if (spec->qualifier == 't') { spec->type = FORMAT_TYPE_PTRDIFF; } else if (spec->qualifier == 'h') { - if (spec->flags & SIGN) + if (sign) spec->type = FORMAT_TYPE_SHORT; else spec->type = FORMAT_TYPE_USHORT; } else { - if (spec->flags & SIGN) + if (sign) spec->type = FORMAT_TYPE_INT; else spec->type = FORMAT_TYPE_UINT; @@ -1100,8 +1101,8 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) case FORMAT_TYPE_SHORT: num = (short) va_arg(args, int); break; - case FORMAT_TYPE_INT: - num = (int) va_arg(args, int); + case FORMAT_TYPE_UINT: + num = va_arg(args, unsigned int); break; default: num = va_arg(args, unsigned int); diff --git a/trunk/mm/percpu.c b/trunk/mm/percpu.c index bfe6a3afaf45..3d0f5456827c 100644 --- a/trunk/mm/percpu.c +++ b/trunk/mm/percpu.c @@ -62,9 +62,7 @@ #include #include #include -#include #include -#include #include #include @@ -82,8 +80,7 @@ struct pcpu_chunk { int map_alloc; /* # of map entries allocated */ int *map; /* allocation map */ bool immutable; /* no [de]population allowed */ - struct page **page; /* points to page array */ - struct page *page_ar[]; /* #cpus * UNIT_PAGES */ + struct page *page[]; /* #cpus * UNIT_PAGES */ }; static int pcpu_unit_pages __read_mostly; @@ -96,42 +93,28 @@ static size_t pcpu_chunk_struct_size __read_mostly; void *pcpu_base_addr __read_mostly; EXPORT_SYMBOL_GPL(pcpu_base_addr); -/* optional reserved chunk, only accessible for reserved allocations */ -static struct pcpu_chunk *pcpu_reserved_chunk; -/* offset limit of the reserved chunk */ -static int pcpu_reserved_chunk_limit; +/* the size of kernel static area */ +static int pcpu_static_size __read_mostly; /* - * Synchronization rules. - * - * There are two locks - pcpu_alloc_mutex and pcpu_lock. The former - * protects allocation/reclaim paths, chunks and chunk->page arrays. - * The latter is a spinlock and protects the index data structures - - * chunk slots, rbtree, chunks and area maps in chunks. - * - * During allocation, pcpu_alloc_mutex is kept locked all the time and - * pcpu_lock is grabbed and released as necessary. All actual memory - * allocations are done using GFP_KERNEL with pcpu_lock released. - * - * Free path accesses and alters only the index data structures, so it - * can be safely called from atomic context. When memory needs to be - * returned to the system, free path schedules reclaim_work which - * grabs both pcpu_alloc_mutex and pcpu_lock, unlinks chunks to be - * reclaimed, release both locks and frees the chunks. Note that it's - * necessary to grab both locks to remove a chunk from circulation as - * allocation path might be referencing the chunk with only - * pcpu_alloc_mutex locked. + * One mutex to rule them all. + * + * The following mutex is grabbed in the outermost public alloc/free + * interface functions and released only when the operation is + * complete. As such, every function in this file other than the + * outermost functions are called under pcpu_mutex. + * + * It can easily be switched to use spinlock such that only the area + * allocation and page population commit are protected with it doing + * actual [de]allocation without holding any lock. However, given + * what this allocator does, I think it's better to let them run + * sequentially. */ -static DEFINE_MUTEX(pcpu_alloc_mutex); /* protects whole alloc and reclaim */ -static DEFINE_SPINLOCK(pcpu_lock); /* protects index data structures */ +static DEFINE_MUTEX(pcpu_mutex); static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */ static struct rb_root pcpu_addr_root = RB_ROOT; /* chunks by address */ -/* reclaim work to release fully free chunks, scheduled from free path */ -static void pcpu_reclaim(struct work_struct *work); -static DECLARE_WORK(pcpu_reclaim_work, pcpu_reclaim); - static int __pcpu_size_to_slot(int size) { int highbit = fls(size); /* size is in bytes */ @@ -178,44 +161,39 @@ static bool pcpu_chunk_page_occupied(struct pcpu_chunk *chunk, } /** - * pcpu_mem_alloc - allocate memory - * @size: bytes to allocate - * - * Allocate @size bytes. If @size is smaller than PAGE_SIZE, - * kzalloc() is used; otherwise, vmalloc() is used. The returned - * memory is always zeroed. + * pcpu_realloc - versatile realloc + * @p: the current pointer (can be NULL for new allocations) + * @size: the current size in bytes (can be 0 for new allocations) + * @new_size: the wanted new size in bytes (can be 0 for free) * - * CONTEXT: - * Does GFP_KERNEL allocation. + * More robust realloc which can be used to allocate, resize or free a + * memory area of arbitrary size. If the needed size goes over + * PAGE_SIZE, kernel VM is used. * * RETURNS: - * Pointer to the allocated area on success, NULL on failure. + * The new pointer on success, NULL on failure. */ -static void *pcpu_mem_alloc(size_t size) +static void *pcpu_realloc(void *p, size_t size, size_t new_size) { - if (size <= PAGE_SIZE) - return kzalloc(size, GFP_KERNEL); - else { - void *ptr = vmalloc(size); - if (ptr) - memset(ptr, 0, size); - return ptr; - } -} + void *new; + + if (new_size <= PAGE_SIZE) + new = kmalloc(new_size, GFP_KERNEL); + else + new = vmalloc(new_size); + if (new_size && !new) + return NULL; + + memcpy(new, p, min(size, new_size)); + if (new_size > size) + memset(new + size, 0, new_size - size); -/** - * pcpu_mem_free - free memory - * @ptr: memory to free - * @size: size of the area - * - * Free @ptr. @ptr should have been allocated using pcpu_mem_alloc(). - */ -static void pcpu_mem_free(void *ptr, size_t size) -{ if (size <= PAGE_SIZE) - kfree(ptr); + kfree(p); else - vfree(ptr); + vfree(p); + + return new; } /** @@ -225,17 +203,13 @@ static void pcpu_mem_free(void *ptr, size_t size) * * This function is called after an allocation or free changed @chunk. * New slot according to the changed state is determined and @chunk is - * moved to the slot. Note that the reserved chunk is never put on - * chunk slots. - * - * CONTEXT: - * pcpu_lock. + * moved to the slot. */ static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot) { int nslot = pcpu_chunk_slot(chunk); - if (chunk != pcpu_reserved_chunk && oslot != nslot) { + if (oslot != nslot) { if (oslot < nslot) list_move(&chunk->list, &pcpu_slot[nslot]); else @@ -275,9 +249,6 @@ static struct rb_node **pcpu_chunk_rb_search(void *addr, * searchs for the chunk with the highest start address which isn't * beyond @addr. * - * CONTEXT: - * pcpu_lock. - * * RETURNS: * The address of the found chunk. */ @@ -286,15 +257,6 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) struct rb_node *n, *parent; struct pcpu_chunk *chunk; - /* is it in the reserved chunk? */ - if (pcpu_reserved_chunk) { - void *start = pcpu_reserved_chunk->vm->addr; - - if (addr >= start && addr < start + pcpu_reserved_chunk_limit) - return pcpu_reserved_chunk; - } - - /* nah... search the regular ones */ n = *pcpu_chunk_rb_search(addr, &parent); if (!n) { /* no exactly matching chunk, the parent is the closest */ @@ -318,9 +280,6 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) * @new: chunk to insert * * Insert @new into address rb tree. - * - * CONTEXT: - * pcpu_lock. */ static void pcpu_chunk_addr_insert(struct pcpu_chunk *new) { @@ -332,66 +291,6 @@ static void pcpu_chunk_addr_insert(struct pcpu_chunk *new) rb_insert_color(&new->rb_node, &pcpu_addr_root); } -/** - * pcpu_extend_area_map - extend area map for allocation - * @chunk: target chunk - * - * Extend area map of @chunk so that it can accomodate an allocation. - * A single allocation can split an area into three areas, so this - * function makes sure that @chunk->map has at least two extra slots. - * - * CONTEXT: - * pcpu_alloc_mutex, pcpu_lock. pcpu_lock is released and reacquired - * if area map is extended. - * - * RETURNS: - * 0 if noop, 1 if successfully extended, -errno on failure. - */ -static int pcpu_extend_area_map(struct pcpu_chunk *chunk) -{ - int new_alloc; - int *new; - size_t size; - - /* has enough? */ - if (chunk->map_alloc >= chunk->map_used + 2) - return 0; - - spin_unlock_irq(&pcpu_lock); - - new_alloc = PCPU_DFL_MAP_ALLOC; - while (new_alloc < chunk->map_used + 2) - new_alloc *= 2; - - new = pcpu_mem_alloc(new_alloc * sizeof(new[0])); - if (!new) { - spin_lock_irq(&pcpu_lock); - return -ENOMEM; - } - - /* - * Acquire pcpu_lock and switch to new area map. Only free - * could have happened inbetween, so map_used couldn't have - * grown. - */ - spin_lock_irq(&pcpu_lock); - BUG_ON(new_alloc < chunk->map_used + 2); - - size = chunk->map_alloc * sizeof(chunk->map[0]); - memcpy(new, chunk->map, size); - - /* - * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is - * one of the first chunks and still using static map. - */ - if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC) - pcpu_mem_free(chunk->map, size); - - chunk->map_alloc = new_alloc; - chunk->map = new; - return 0; -} - /** * pcpu_split_block - split a map block * @chunk: chunk of interest @@ -407,19 +306,33 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk) * depending on @head, is reduced by @tail bytes and @tail byte block * is inserted after the target block. * - * @chunk->map must have enough free slots to accomodate the split. - * - * CONTEXT: - * pcpu_lock. + * RETURNS: + * 0 on success, -errno on failure. */ -static void pcpu_split_block(struct pcpu_chunk *chunk, int i, - int head, int tail) +static int pcpu_split_block(struct pcpu_chunk *chunk, int i, int head, int tail) { int nr_extra = !!head + !!tail; + int target = chunk->map_used + nr_extra; + + /* reallocation required? */ + if (chunk->map_alloc < target) { + int new_alloc = chunk->map_alloc; + int *new; - BUG_ON(chunk->map_alloc < chunk->map_used + nr_extra); + while (new_alloc < target) + new_alloc *= 2; - /* insert new subblocks */ + new = pcpu_realloc(chunk->map, + chunk->map_alloc * sizeof(new[0]), + new_alloc * sizeof(new[0])); + if (!new) + return -ENOMEM; + + chunk->map_alloc = new_alloc; + chunk->map = new; + } + + /* insert a new subblock */ memmove(&chunk->map[i + nr_extra], &chunk->map[i], sizeof(chunk->map[0]) * (chunk->map_used - i)); chunk->map_used += nr_extra; @@ -432,6 +345,7 @@ static void pcpu_split_block(struct pcpu_chunk *chunk, int i, chunk->map[i++] -= tail; chunk->map[i] = tail; } + return 0; } /** @@ -444,14 +358,8 @@ static void pcpu_split_block(struct pcpu_chunk *chunk, int i, * Note that this function only allocates the offset. It doesn't * populate or map the area. * - * @chunk->map must have at least two free slots. - * - * CONTEXT: - * pcpu_lock. - * * RETURNS: - * Allocated offset in @chunk on success, -1 if no matching area is - * found. + * Allocated offset in @chunk on success, -errno on failure. */ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) { @@ -459,6 +367,22 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) int max_contig = 0; int i, off; + /* + * The static chunk initially doesn't have map attached + * because kmalloc wasn't available during init. Give it one. + */ + if (unlikely(!chunk->map)) { + chunk->map = pcpu_realloc(NULL, 0, + PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); + if (!chunk->map) + return -ENOMEM; + + chunk->map_alloc = PCPU_DFL_MAP_ALLOC; + chunk->map[chunk->map_used++] = -pcpu_static_size; + if (chunk->free_size) + chunk->map[chunk->map_used++] = chunk->free_size; + } + for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) { bool is_last = i + 1 == chunk->map_used; int head, tail; @@ -499,7 +423,8 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) /* split if warranted */ if (head || tail) { - pcpu_split_block(chunk, i, head, tail); + if (pcpu_split_block(chunk, i, head, tail)) + return -ENOMEM; if (head) { i++; off += head; @@ -526,8 +451,14 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) chunk->contig_hint = max_contig; /* fully scanned */ pcpu_chunk_relocate(chunk, oslot); - /* tell the upper layer that this chunk has no matching area */ - return -1; + /* + * Tell the upper layer that this chunk has no area left. + * Note that this is not an error condition but a notification + * to upper layer that it needs to look at other chunks. + * -ENOSPC is chosen as it isn't used in memory subsystem and + * matches the meaning in a way. + */ + return -ENOSPC; } /** @@ -538,9 +469,6 @@ static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align) * Free area starting from @freeme to @chunk. Note that this function * only modifies the allocation map. It doesn't depopulate or unmap * the area. - * - * CONTEXT: - * pcpu_lock. */ static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme) { @@ -626,9 +554,6 @@ static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end, * For each cpu, depopulate and unmap pages [@page_start,@page_end) * from @chunk. If @flush is true, vcache is flushed before unmapping * and tlb after. - * - * CONTEXT: - * pcpu_alloc_mutex. */ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size, bool flush) @@ -707,9 +632,6 @@ static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end) * * For each cpu, populate and map pages [@page_start,@page_end) into * @chunk. The area is cleared on return. - * - * CONTEXT: - * pcpu_alloc_mutex, does GFP_KERNEL allocation. */ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size) { @@ -764,7 +686,7 @@ static void free_pcpu_chunk(struct pcpu_chunk *chunk) return; if (chunk->vm) free_vm_area(chunk->vm); - pcpu_mem_free(chunk->map, chunk->map_alloc * sizeof(chunk->map[0])); + pcpu_realloc(chunk->map, chunk->map_alloc * sizeof(chunk->map[0]), 0); kfree(chunk); } @@ -776,10 +698,10 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void) if (!chunk) return NULL; - chunk->map = pcpu_mem_alloc(PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); + chunk->map = pcpu_realloc(NULL, 0, + PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); chunk->map_alloc = PCPU_DFL_MAP_ALLOC; chunk->map[chunk->map_used++] = pcpu_unit_size; - chunk->page = chunk->page_ar; chunk->vm = get_vm_area(pcpu_chunk_size, GFP_KERNEL); if (!chunk->vm) { @@ -795,21 +717,19 @@ static struct pcpu_chunk *alloc_pcpu_chunk(void) } /** - * pcpu_alloc - the percpu allocator + * __alloc_percpu - allocate percpu area * @size: size of area to allocate in bytes * @align: alignment of area (max PAGE_SIZE) - * @reserved: allocate from the reserved chunk if available * - * Allocate percpu area of @size bytes aligned at @align. - * - * CONTEXT: - * Does GFP_KERNEL allocation. + * Allocate percpu area of @size bytes aligned at @align. Might + * sleep. Might trigger writeouts. * * RETURNS: * Percpu pointer to the allocated area on success, NULL on failure. */ -static void *pcpu_alloc(size_t size, size_t align, bool reserved) +void *__alloc_percpu(size_t size, size_t align) { + void *ptr = NULL; struct pcpu_chunk *chunk; int slot, off; @@ -819,192 +739,90 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved) return NULL; } - mutex_lock(&pcpu_alloc_mutex); - spin_lock_irq(&pcpu_lock); - - /* serve reserved allocations from the reserved chunk if available */ - if (reserved && pcpu_reserved_chunk) { - chunk = pcpu_reserved_chunk; - if (size > chunk->contig_hint || - pcpu_extend_area_map(chunk) < 0) - goto fail_unlock; - off = pcpu_alloc_area(chunk, size, align); - if (off >= 0) - goto area_found; - goto fail_unlock; - } + mutex_lock(&pcpu_mutex); -restart: - /* search through normal chunks */ + /* allocate area */ for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) { list_for_each_entry(chunk, &pcpu_slot[slot], list) { if (size > chunk->contig_hint) continue; - - switch (pcpu_extend_area_map(chunk)) { - case 0: - break; - case 1: - goto restart; /* pcpu_lock dropped, restart */ - default: - goto fail_unlock; - } - off = pcpu_alloc_area(chunk, size, align); if (off >= 0) goto area_found; + if (off != -ENOSPC) + goto out_unlock; } } /* hmmm... no space left, create a new chunk */ - spin_unlock_irq(&pcpu_lock); - chunk = alloc_pcpu_chunk(); if (!chunk) - goto fail_unlock_mutex; - - spin_lock_irq(&pcpu_lock); + goto out_unlock; pcpu_chunk_relocate(chunk, -1); pcpu_chunk_addr_insert(chunk); - goto restart; -area_found: - spin_unlock_irq(&pcpu_lock); + off = pcpu_alloc_area(chunk, size, align); + if (off < 0) + goto out_unlock; +area_found: /* populate, map and clear the area */ if (pcpu_populate_chunk(chunk, off, size)) { - spin_lock_irq(&pcpu_lock); pcpu_free_area(chunk, off); - goto fail_unlock; + goto out_unlock; } - mutex_unlock(&pcpu_alloc_mutex); - - return __addr_to_pcpu_ptr(chunk->vm->addr + off); - -fail_unlock: - spin_unlock_irq(&pcpu_lock); -fail_unlock_mutex: - mutex_unlock(&pcpu_alloc_mutex); - return NULL; -} - -/** - * __alloc_percpu - allocate dynamic percpu area - * @size: size of area to allocate in bytes - * @align: alignment of area (max PAGE_SIZE) - * - * Allocate percpu area of @size bytes aligned at @align. Might - * sleep. Might trigger writeouts. - * - * CONTEXT: - * Does GFP_KERNEL allocation. - * - * RETURNS: - * Percpu pointer to the allocated area on success, NULL on failure. - */ -void *__alloc_percpu(size_t size, size_t align) -{ - return pcpu_alloc(size, align, false); + ptr = __addr_to_pcpu_ptr(chunk->vm->addr + off); +out_unlock: + mutex_unlock(&pcpu_mutex); + return ptr; } EXPORT_SYMBOL_GPL(__alloc_percpu); -/** - * __alloc_reserved_percpu - allocate reserved percpu area - * @size: size of area to allocate in bytes - * @align: alignment of area (max PAGE_SIZE) - * - * Allocate percpu area of @size bytes aligned at @align from reserved - * percpu area if arch has set it up; otherwise, allocation is served - * from the same dynamic area. Might sleep. Might trigger writeouts. - * - * CONTEXT: - * Does GFP_KERNEL allocation. - * - * RETURNS: - * Percpu pointer to the allocated area on success, NULL on failure. - */ -void *__alloc_reserved_percpu(size_t size, size_t align) -{ - return pcpu_alloc(size, align, true); -} - -/** - * pcpu_reclaim - reclaim fully free chunks, workqueue function - * @work: unused - * - * Reclaim all fully free chunks except for the first one. - * - * CONTEXT: - * workqueue context. - */ -static void pcpu_reclaim(struct work_struct *work) +static void pcpu_kill_chunk(struct pcpu_chunk *chunk) { - LIST_HEAD(todo); - struct list_head *head = &pcpu_slot[pcpu_nr_slots - 1]; - struct pcpu_chunk *chunk, *next; - - mutex_lock(&pcpu_alloc_mutex); - spin_lock_irq(&pcpu_lock); - - list_for_each_entry_safe(chunk, next, head, list) { - WARN_ON(chunk->immutable); - - /* spare the first one */ - if (chunk == list_first_entry(head, struct pcpu_chunk, list)) - continue; - - rb_erase(&chunk->rb_node, &pcpu_addr_root); - list_move(&chunk->list, &todo); - } - - spin_unlock_irq(&pcpu_lock); - mutex_unlock(&pcpu_alloc_mutex); - - list_for_each_entry_safe(chunk, next, &todo, list) { - pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size, false); - free_pcpu_chunk(chunk); - } + WARN_ON(chunk->immutable); + pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size, false); + list_del(&chunk->list); + rb_erase(&chunk->rb_node, &pcpu_addr_root); + free_pcpu_chunk(chunk); } /** * free_percpu - free percpu area * @ptr: pointer to area to free * - * Free percpu area @ptr. - * - * CONTEXT: - * Can be called from atomic context. + * Free percpu area @ptr. Might sleep. */ void free_percpu(void *ptr) { void *addr = __pcpu_ptr_to_addr(ptr); struct pcpu_chunk *chunk; - unsigned long flags; int off; if (!ptr) return; - spin_lock_irqsave(&pcpu_lock, flags); + mutex_lock(&pcpu_mutex); chunk = pcpu_chunk_addr_search(addr); off = addr - chunk->vm->addr; pcpu_free_area(chunk, off); - /* if there are more than one fully free chunks, wake up grim reaper */ + /* the chunk became fully free, kill one if there are other free ones */ if (chunk->free_size == pcpu_unit_size) { struct pcpu_chunk *pos; - list_for_each_entry(pos, &pcpu_slot[pcpu_nr_slots - 1], list) + list_for_each_entry(pos, + &pcpu_slot[pcpu_chunk_slot(chunk)], list) if (pos != chunk) { - schedule_work(&pcpu_reclaim_work); + pcpu_kill_chunk(pos); break; } } - spin_unlock_irqrestore(&pcpu_lock, flags); + mutex_unlock(&pcpu_mutex); } EXPORT_SYMBOL_GPL(free_percpu); @@ -1012,9 +830,8 @@ EXPORT_SYMBOL_GPL(free_percpu); * pcpu_setup_first_chunk - initialize the first percpu chunk * @get_page_fn: callback to fetch page pointer * @static_size: the size of static percpu area in bytes - * @reserved_size: the size of reserved percpu area in bytes - * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto - * @dyn_size: free size for dynamic allocation in bytes, -1 for auto + * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, 0 for auto + * @free_size: free size in bytes, 0 for auto * @base_addr: mapped address, NULL for auto * @populate_pte_fn: callback to allocate pagetable, NULL if unnecessary * @@ -1031,22 +848,13 @@ EXPORT_SYMBOL_GPL(free_percpu); * indicates end of pages for the cpu. Note that @get_page_fn() must * return the same number of pages for all cpus. * - * @reserved_size, if non-zero, specifies the amount of bytes to - * reserve after the static area in the first chunk. This reserves - * the first chunk such that it's available only through reserved - * percpu allocation. This is primarily used to serve module percpu - * static areas on architectures where the addressing model has - * limited offset range for symbol relocations to guarantee module - * percpu symbols fall inside the relocatable range. + * @unit_size, if non-zero, determines unit size and must be aligned + * to PAGE_SIZE and equal to or larger than @static_size + @free_size. * - * @unit_size, if non-negative, specifies unit size and must be - * aligned to PAGE_SIZE and equal to or larger than @static_size + - * @reserved_size + @dyn_size. - * - * @dyn_size, if non-negative, limits the number of bytes available - * for dynamic allocation in the first chunk. Specifying non-negative - * value make percpu leave alone the area beyond @static_size + - * @reserved_size + @dyn_size. + * @free_size determines the number of free bytes after the static + * area in the first chunk. If zero, whatever left is available. + * Specifying non-zero value make percpu leave the area after + * @static_size + @free_size alone. * * Non-null @base_addr means that the caller already allocated virtual * region for the first chunk and mapped it. percpu must not mess @@ -1056,58 +864,41 @@ EXPORT_SYMBOL_GPL(free_percpu); * @populate_pte_fn is used to populate the pagetable. NULL means the * caller already populated the pagetable. * - * If the first chunk ends up with both reserved and dynamic areas, it - * is served by two chunks - one to serve the core static and reserved - * areas and the other for the dynamic area. They share the same vm - * and page map but uses different area allocation map to stay away - * from each other. The latter chunk is circulated in the chunk slots - * and available for dynamic allocation like any other chunks. - * * RETURNS: * The determined pcpu_unit_size which can be used to initialize * percpu access. */ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, - size_t static_size, size_t reserved_size, - ssize_t unit_size, ssize_t dyn_size, - void *base_addr, + size_t static_size, size_t unit_size, + size_t free_size, void *base_addr, pcpu_populate_pte_fn_t populate_pte_fn) { - static struct vm_struct first_vm; - static int smap[2], dmap[2]; - struct pcpu_chunk *schunk, *dchunk = NULL; + static struct vm_struct static_vm; + struct pcpu_chunk *static_chunk; unsigned int cpu; int nr_pages; int err, i; /* santiy checks */ - BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC || - ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC); BUG_ON(!static_size); - if (unit_size >= 0) { - BUG_ON(unit_size < static_size + reserved_size + - (dyn_size >= 0 ? dyn_size : 0)); - BUG_ON(unit_size & ~PAGE_MASK); - } else { - BUG_ON(dyn_size >= 0); - BUG_ON(base_addr); - } + BUG_ON(!unit_size && free_size); + BUG_ON(unit_size && unit_size < static_size + free_size); + BUG_ON(unit_size & ~PAGE_MASK); + BUG_ON(base_addr && !unit_size); BUG_ON(base_addr && populate_pte_fn); - if (unit_size >= 0) + if (unit_size) pcpu_unit_pages = unit_size >> PAGE_SHIFT; else pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT, - PFN_UP(static_size + reserved_size)); + PFN_UP(static_size)); + pcpu_static_size = static_size; pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT; pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size; pcpu_chunk_struct_size = sizeof(struct pcpu_chunk) + num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *); - if (dyn_size < 0) - dyn_size = pcpu_unit_size - static_size - reserved_size; - /* * Allocate chunk slots. The additional last slot is for * empty chunks. @@ -1117,66 +908,33 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, for (i = 0; i < pcpu_nr_slots; i++) INIT_LIST_HEAD(&pcpu_slot[i]); - /* - * Initialize static chunk. If reserved_size is zero, the - * static chunk covers static area + dynamic allocation area - * in the first chunk. If reserved_size is not zero, it - * covers static area + reserved area (mostly used for module - * static percpu allocation). - */ - schunk = alloc_bootmem(pcpu_chunk_struct_size); - INIT_LIST_HEAD(&schunk->list); - schunk->vm = &first_vm; - schunk->map = smap; - schunk->map_alloc = ARRAY_SIZE(smap); - schunk->page = schunk->page_ar; - - if (reserved_size) { - schunk->free_size = reserved_size; - pcpu_reserved_chunk = schunk; /* not for dynamic alloc */ - } else { - schunk->free_size = dyn_size; - dyn_size = 0; /* dynamic area covered */ - } - schunk->contig_hint = schunk->free_size; - - schunk->map[schunk->map_used++] = -static_size; - if (schunk->free_size) - schunk->map[schunk->map_used++] = schunk->free_size; - - pcpu_reserved_chunk_limit = static_size + schunk->free_size; - - /* init dynamic chunk if necessary */ - if (dyn_size) { - dchunk = alloc_bootmem(sizeof(struct pcpu_chunk)); - INIT_LIST_HEAD(&dchunk->list); - dchunk->vm = &first_vm; - dchunk->map = dmap; - dchunk->map_alloc = ARRAY_SIZE(dmap); - dchunk->page = schunk->page_ar; /* share page map with schunk */ - - dchunk->contig_hint = dchunk->free_size = dyn_size; - dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit; - dchunk->map[dchunk->map_used++] = dchunk->free_size; - } + /* init static_chunk */ + static_chunk = alloc_bootmem(pcpu_chunk_struct_size); + INIT_LIST_HEAD(&static_chunk->list); + static_chunk->vm = &static_vm; + + if (free_size) + static_chunk->free_size = free_size; + else + static_chunk->free_size = pcpu_unit_size - pcpu_static_size; + + static_chunk->contig_hint = static_chunk->free_size; /* allocate vm address */ - first_vm.flags = VM_ALLOC; - first_vm.size = pcpu_chunk_size; + static_vm.flags = VM_ALLOC; + static_vm.size = pcpu_chunk_size; if (!base_addr) - vm_area_register_early(&first_vm, PAGE_SIZE); + vm_area_register_early(&static_vm, PAGE_SIZE); else { /* * Pages already mapped. No need to remap into - * vmalloc area. In this case the first chunks can't - * be mapped or unmapped by percpu and are marked + * vmalloc area. In this case the static chunk can't + * be mapped or unmapped by percpu and is marked * immutable. */ - first_vm.addr = base_addr; - schunk->immutable = true; - if (dchunk) - dchunk->immutable = true; + static_vm.addr = base_addr; + static_chunk->immutable = true; } /* assign pages */ @@ -1187,10 +945,10 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, if (!page) break; - *pcpu_chunk_pagep(schunk, cpu, i) = page; + *pcpu_chunk_pagep(static_chunk, cpu, i) = page; } - BUG_ON(i < PFN_UP(static_size)); + BUG_ON(i < PFN_UP(pcpu_static_size)); if (nr_pages < 0) nr_pages = i; @@ -1202,25 +960,20 @@ size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn, if (populate_pte_fn) { for_each_possible_cpu(cpu) for (i = 0; i < nr_pages; i++) - populate_pte_fn(pcpu_chunk_addr(schunk, + populate_pte_fn(pcpu_chunk_addr(static_chunk, cpu, i)); - err = pcpu_map(schunk, 0, nr_pages); + err = pcpu_map(static_chunk, 0, nr_pages); if (err) panic("failed to setup static percpu area, err=%d\n", err); } - /* link the first chunk in */ - if (!dchunk) { - pcpu_chunk_relocate(schunk, -1); - pcpu_chunk_addr_insert(schunk); - } else { - pcpu_chunk_relocate(dchunk, -1); - pcpu_chunk_addr_insert(dchunk); - } + /* link static_chunk in */ + pcpu_chunk_relocate(static_chunk, -1); + pcpu_chunk_addr_insert(static_chunk); /* we're done */ - pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0); + pcpu_base_addr = (void *)pcpu_chunk_addr(static_chunk, 0, 0); return pcpu_unit_size; } diff --git a/trunk/samples/tracepoints/tp-samples-trace.h b/trunk/samples/tracepoints/tp-samples-trace.h index dffdc49878af..01724e04c556 100644 --- a/trunk/samples/tracepoints/tp-samples-trace.h +++ b/trunk/samples/tracepoints/tp-samples-trace.h @@ -5,9 +5,9 @@ #include DECLARE_TRACE(subsys_event, - TP_PROTO(struct inode *inode, struct file *file), - TP_ARGS(inode, file)); + TPPROTO(struct inode *inode, struct file *file), + TPARGS(inode, file)); DECLARE_TRACE(subsys_eventb, - TP_PROTO(void), - TP_ARGS()); + TPPROTO(void), + TPARGS()); #endif