diff --git a/[refs] b/[refs] index cb4578d26eed..5e03b2003461 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 512626a04e72aca60effe111fa0333ed0b195d21 +refs/heads/master: 90586523eb4b349806887c62ee70685a49415124 diff --git a/trunk/Documentation/kernel-parameters.txt b/trunk/Documentation/kernel-parameters.txt index 7bcdebffdab3..72d3bf08d79b 100644 --- a/trunk/Documentation/kernel-parameters.txt +++ b/trunk/Documentation/kernel-parameters.txt @@ -1083,10 +1083,6 @@ and is between 256 and 4096 characters. It is defined in the file Configure the RouterBoard 532 series on-chip Ethernet adapter MAC address. - kmemleak= [KNL] Boot-time kmemleak enable/disable - Valid arguments: on, off - Default: on - kstack=N [X86] Print N words from the kernel stack in oops dumps. diff --git a/trunk/Documentation/kmemleak.txt b/trunk/Documentation/kmemleak.txt deleted file mode 100644 index 0112da3b9ab8..000000000000 --- a/trunk/Documentation/kmemleak.txt +++ /dev/null @@ -1,142 +0,0 @@ -Kernel Memory Leak Detector -=========================== - -Introduction ------------- - -Kmemleak provides a way of detecting possible kernel memory leaks in a -way similar to a tracing garbage collector -(http://en.wikipedia.org/wiki/Garbage_collection_%28computer_science%29#Tracing_garbage_collectors), -with the difference that the orphan objects are not freed but only -reported via /sys/kernel/debug/kmemleak. A similar method is used by the -Valgrind tool (memcheck --leak-check) to detect the memory leaks in -user-space applications. - -Usage ------ - -CONFIG_DEBUG_KMEMLEAK in "Kernel hacking" has to be enabled. A kernel -thread scans the memory every 10 minutes (by default) and prints any new -unreferenced objects found. To trigger an intermediate scan and display -all the possible memory leaks: - - # mount -t debugfs nodev /sys/kernel/debug/ - # cat /sys/kernel/debug/kmemleak - -Note that the orphan objects are listed in the order they were allocated -and one object at the beginning of the list may cause other subsequent -objects to be reported as orphan. - -Memory scanning parameters can be modified at run-time by writing to the -/sys/kernel/debug/kmemleak file. The following parameters are supported: - - off - disable kmemleak (irreversible) - stack=on - enable the task stacks scanning - stack=off - disable the tasks stacks scanning - scan=on - start the automatic memory scanning thread - scan=off - stop the automatic memory scanning thread - scan= - set the automatic memory scanning period in seconds (0 - to disable it) - -Kmemleak can also be disabled at boot-time by passing "kmemleak=off" on -the kernel command line. - -Basic Algorithm ---------------- - -The memory allocations via kmalloc, vmalloc, kmem_cache_alloc and -friends are traced and the pointers, together with additional -information like size and stack trace, are stored in a prio search tree. -The corresponding freeing function calls are tracked and the pointers -removed from the kmemleak data structures. - -An allocated block of memory is considered orphan if no pointer to its -start address or to any location inside the block can be found by -scanning the memory (including saved registers). This means that there -might be no way for the kernel to pass the address of the allocated -block to a freeing function and therefore the block is considered a -memory leak. - -The scanning algorithm steps: - - 1. mark all objects as white (remaining white objects will later be - considered orphan) - 2. scan the memory starting with the data section and stacks, checking - the values against the addresses stored in the prio search tree. If - a pointer to a white object is found, the object is added to the - gray list - 3. scan the gray objects for matching addresses (some white objects - can become gray and added at the end of the gray list) until the - gray set is finished - 4. the remaining white objects are considered orphan and reported via - /sys/kernel/debug/kmemleak - -Some allocated memory blocks have pointers stored in the kernel's -internal data structures and they cannot be detected as orphans. To -avoid this, kmemleak can also store the number of values pointing to an -address inside the block address range that need to be found so that the -block is not considered a leak. One example is __vmalloc(). - -Kmemleak API ------------- - -See the include/linux/kmemleak.h header for the functions prototype. - -kmemleak_init - initialize kmemleak -kmemleak_alloc - notify of a memory block allocation -kmemleak_free - notify of a memory block freeing -kmemleak_not_leak - mark an object as not a leak -kmemleak_ignore - do not scan or report an object as leak -kmemleak_scan_area - add scan areas inside a memory block -kmemleak_no_scan - do not scan a memory block -kmemleak_erase - erase an old value in a pointer variable -kmemleak_alloc_recursive - as kmemleak_alloc but checks the recursiveness -kmemleak_free_recursive - as kmemleak_free but checks the recursiveness - -Dealing with false positives/negatives --------------------------------------- - -The false negatives are real memory leaks (orphan objects) but not -reported by kmemleak because values found during the memory scanning -point to such objects. To reduce the number of false negatives, kmemleak -provides the kmemleak_ignore, kmemleak_scan_area, kmemleak_no_scan and -kmemleak_erase functions (see above). The task stacks also increase the -amount of false negatives and their scanning is not enabled by default. - -The false positives are objects wrongly reported as being memory leaks -(orphan). For objects known not to be leaks, kmemleak provides the -kmemleak_not_leak function. The kmemleak_ignore could also be used if -the memory block is known not to contain other pointers and it will no -longer be scanned. - -Some of the reported leaks are only transient, especially on SMP -systems, because of pointers temporarily stored in CPU registers or -stacks. Kmemleak defines MSECS_MIN_AGE (defaulting to 1000) representing -the minimum age of an object to be reported as a memory leak. - -Limitations and Drawbacks -------------------------- - -The main drawback is the reduced performance of memory allocation and -freeing. To avoid other penalties, the memory scanning is only performed -when the /sys/kernel/debug/kmemleak file is read. Anyway, this tool is -intended for debugging purposes where the performance might not be the -most important requirement. - -To keep the algorithm simple, kmemleak scans for values pointing to any -address inside a block's address range. This may lead to an increased -number of false negatives. However, it is likely that a real memory leak -will eventually become visible. - -Another source of false negatives is the data stored in non-pointer -values. In a future version, kmemleak could only scan the pointer -members in the allocated structures. This feature would solve many of -the false negative cases described above. - -The tool can report false positives. These are cases where an allocated -block doesn't need to be freed (some cases in the init_call functions), -the pointer is calculated by other methods than the usual container_of -macro or the pointer is stored in a location not scanned by kmemleak. - -Page allocations and ioremap are not tracked. Only the ARM and x86 -architectures are currently supported. diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS index 1a0084e22cf3..ccdb57524e3c 100644 --- a/trunk/MAINTAINERS +++ b/trunk/MAINTAINERS @@ -3370,12 +3370,6 @@ F: Documentation/trace/kmemtrace.txt F: include/trace/kmemtrace.h F: kernel/trace/kmemtrace.c -KMEMLEAK -P: Catalin Marinas -M: catalin.marinas@arm.com -L: linux-kernel@vger.kernel.org -S: Maintained - KPROBES P: Ananth N Mavinakayanahalli M: ananth@in.ibm.com @@ -4409,16 +4403,6 @@ S: Maintained F: include/linux/delayacct.h F: kernel/delayacct.c -PERFORMANCE COUNTER SUBSYSTEM -P: Peter Zijlstra -M: a.p.zijlstra@chello.nl -P: Paul Mackerras -M: paulus@samba.org -P: Ingo Molnar -M: mingo@elte.hu -L: linux-kernel@vger.kernel.org -S: Supported - PERSONALITY HANDLING P: Christoph Hellwig M: hch@infradead.org diff --git a/trunk/arch/powerpc/include/asm/hw_irq.h b/trunk/arch/powerpc/include/asm/hw_irq.h index 20a44d0c9fdd..b7e034b0a6dd 100644 --- a/trunk/arch/powerpc/include/asm/hw_irq.h +++ b/trunk/arch/powerpc/include/asm/hw_irq.h @@ -131,44 +131,5 @@ static inline int irqs_disabled_flags(unsigned long flags) */ struct irq_chip; -#ifdef CONFIG_PERF_COUNTERS -static inline unsigned long test_perf_counter_pending(void) -{ - unsigned long x; - - asm volatile("lbz %0,%1(13)" - : "=r" (x) - : "i" (offsetof(struct paca_struct, perf_counter_pending))); - return x; -} - -static inline void set_perf_counter_pending(void) -{ - asm volatile("stb %0,%1(13)" : : - "r" (1), - "i" (offsetof(struct paca_struct, perf_counter_pending))); -} - -static inline void clear_perf_counter_pending(void) -{ - asm volatile("stb %0,%1(13)" : : - "r" (0), - "i" (offsetof(struct paca_struct, perf_counter_pending))); -} - -extern void perf_counter_do_pending(void); - -#else - -static inline unsigned long test_perf_counter_pending(void) -{ - return 0; -} - -static inline void set_perf_counter_pending(void) {} -static inline void clear_perf_counter_pending(void) {} -static inline void perf_counter_do_pending(void) {} -#endif /* CONFIG_PERF_COUNTERS */ - #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_HW_IRQ_H */ diff --git a/trunk/arch/powerpc/include/asm/paca.h b/trunk/arch/powerpc/include/asm/paca.h index 6ef055723019..082b3aedf145 100644 --- a/trunk/arch/powerpc/include/asm/paca.h +++ b/trunk/arch/powerpc/include/asm/paca.h @@ -99,7 +99,6 @@ struct paca_struct { u8 soft_enabled; /* irq soft-enable flag */ u8 hard_enabled; /* set if irqs are enabled in MSR */ u8 io_sync; /* writel() needs spin_unlock sync */ - u8 perf_counter_pending; /* PM interrupt while soft-disabled */ /* Stuff for accurate time accounting */ u64 user_time; /* accumulated usermode TB ticks */ diff --git a/trunk/arch/powerpc/include/asm/perf_counter.h b/trunk/arch/powerpc/include/asm/perf_counter.h deleted file mode 100644 index cc7c887705b8..000000000000 --- a/trunk/arch/powerpc/include/asm/perf_counter.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Performance counter support - PowerPC-specific definitions. - * - * Copyright 2008-2009 Paul Mackerras, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include - -#define MAX_HWCOUNTERS 8 -#define MAX_EVENT_ALTERNATIVES 8 -#define MAX_LIMITED_HWCOUNTERS 2 - -/* - * This struct provides the constants and functions needed to - * describe the PMU on a particular POWER-family CPU. - */ -struct power_pmu { - int n_counter; - int max_alternatives; - u64 add_fields; - u64 test_adder; - int (*compute_mmcr)(u64 events[], int n_ev, - unsigned int hwc[], u64 mmcr[]); - int (*get_constraint)(u64 event, u64 *mskp, u64 *valp); - int (*get_alternatives)(u64 event, unsigned int flags, - u64 alt[]); - void (*disable_pmc)(unsigned int pmc, u64 mmcr[]); - int (*limited_pmc_event)(u64 event); - u32 flags; - int n_generic; - int *generic_events; - int (*cache_events)[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX]; -}; - -extern struct power_pmu *ppmu; - -/* - * Values for power_pmu.flags - */ -#define PPMU_LIMITED_PMC5_6 1 /* PMC5/6 have limited function */ -#define PPMU_ALT_SIPR 2 /* uses alternate posn for SIPR/HV */ - -/* - * Values for flags to get_alternatives() - */ -#define PPMU_LIMITED_PMC_OK 1 /* can put this on a limited PMC */ -#define PPMU_LIMITED_PMC_REQD 2 /* have to put this on a limited PMC */ -#define PPMU_ONLY_COUNT_RUN 4 /* only counting in run state */ - -struct pt_regs; -extern unsigned long perf_misc_flags(struct pt_regs *regs); -#define perf_misc_flags(regs) perf_misc_flags(regs) - -extern unsigned long perf_instruction_pointer(struct pt_regs *regs); - -/* - * The power_pmu.get_constraint function returns a 64-bit value and - * a 64-bit mask that express the constraints between this event and - * other events. - * - * The value and mask are divided up into (non-overlapping) bitfields - * of three different types: - * - * Select field: this expresses the constraint that some set of bits - * in MMCR* needs to be set to a specific value for this event. For a - * select field, the mask contains 1s in every bit of the field, and - * the value contains a unique value for each possible setting of the - * MMCR* bits. The constraint checking code will ensure that two events - * that set the same field in their masks have the same value in their - * value dwords. - * - * Add field: this expresses the constraint that there can be at most - * N events in a particular class. A field of k bits can be used for - * N <= 2^(k-1) - 1. The mask has the most significant bit of the field - * set (and the other bits 0), and the value has only the least significant - * bit of the field set. In addition, the 'add_fields' and 'test_adder' - * in the struct power_pmu for this processor come into play. The - * add_fields value contains 1 in the LSB of the field, and the - * test_adder contains 2^(k-1) - 1 - N in the field. - * - * NAND field: this expresses the constraint that you may not have events - * in all of a set of classes. (For example, on PPC970, you can't select - * events from the FPU, ISU and IDU simultaneously, although any two are - * possible.) For N classes, the field is N+1 bits wide, and each class - * is assigned one bit from the least-significant N bits. The mask has - * only the most-significant bit set, and the value has only the bit - * for the event's class set. The test_adder has the least significant - * bit set in the field. - * - * If an event is not subject to the constraint expressed by a particular - * field, then it will have 0 in both the mask and value for that field. - */ diff --git a/trunk/arch/powerpc/include/asm/reg.h b/trunk/arch/powerpc/include/asm/reg.h index fb359b0a6937..e8018d540e87 100644 --- a/trunk/arch/powerpc/include/asm/reg.h +++ b/trunk/arch/powerpc/include/asm/reg.h @@ -492,13 +492,11 @@ #define MMCR0_FCHV 0x00000001UL /* freeze conditions in hypervisor mode */ #define SPRN_MMCR1 798 #define SPRN_MMCRA 0x312 -#define MMCRA_SDSYNC 0x80000000UL /* SDAR synced with SIAR */ #define MMCRA_SIHV 0x10000000UL /* state of MSR HV when SIAR set */ #define MMCRA_SIPR 0x08000000UL /* state of MSR PR when SIAR set */ #define MMCRA_SLOT 0x07000000UL /* SLOT bits (37-39) */ #define MMCRA_SLOT_SHIFT 24 #define MMCRA_SAMPLE_ENABLE 0x00000001UL /* enable sampling */ -#define POWER6_MMCRA_SDSYNC 0x0000080000000000ULL /* SDAR/SIAR synced */ #define POWER6_MMCRA_SIHV 0x0000040000000000ULL #define POWER6_MMCRA_SIPR 0x0000020000000000ULL #define POWER6_MMCRA_THRM 0x00000020UL diff --git a/trunk/arch/powerpc/include/asm/systbl.h b/trunk/arch/powerpc/include/asm/systbl.h index a0b92de51c7e..d98a30dfd41c 100644 --- a/trunk/arch/powerpc/include/asm/systbl.h +++ b/trunk/arch/powerpc/include/asm/systbl.h @@ -322,6 +322,6 @@ SYSCALL_SPU(epoll_create1) SYSCALL_SPU(dup3) SYSCALL_SPU(pipe2) SYSCALL(inotify_init1) -SYSCALL_SPU(perf_counter_open) +SYSCALL(ni_syscall) COMPAT_SYS_SPU(preadv) COMPAT_SYS_SPU(pwritev) diff --git a/trunk/arch/powerpc/include/asm/unistd.h b/trunk/arch/powerpc/include/asm/unistd.h index 4badac2d11d1..3f06f8ec81c5 100644 --- a/trunk/arch/powerpc/include/asm/unistd.h +++ b/trunk/arch/powerpc/include/asm/unistd.h @@ -341,7 +341,6 @@ #define __NR_dup3 316 #define __NR_pipe2 317 #define __NR_inotify_init1 318 -#define __NR_perf_counter_open 319 #define __NR_preadv 320 #define __NR_pwritev 321 diff --git a/trunk/arch/powerpc/kernel/Makefile b/trunk/arch/powerpc/kernel/Makefile index a2c683403c2b..71901fbda4a5 100644 --- a/trunk/arch/powerpc/kernel/Makefile +++ b/trunk/arch/powerpc/kernel/Makefile @@ -94,9 +94,6 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o -obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o power4-pmu.o ppc970-pmu.o \ - power5-pmu.o power5+-pmu.o power6-pmu.o \ - power7-pmu.o obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o diff --git a/trunk/arch/powerpc/kernel/asm-offsets.c b/trunk/arch/powerpc/kernel/asm-offsets.c index e981d1ce1914..1e40bc053946 100644 --- a/trunk/arch/powerpc/kernel/asm-offsets.c +++ b/trunk/arch/powerpc/kernel/asm-offsets.c @@ -131,7 +131,6 @@ int main(void) DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr)); DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled)); DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled)); - DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_counter_pending)); DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache)); DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); diff --git a/trunk/arch/powerpc/kernel/entry_64.S b/trunk/arch/powerpc/kernel/entry_64.S index 43e073477c34..abfc32330479 100644 --- a/trunk/arch/powerpc/kernel/entry_64.S +++ b/trunk/arch/powerpc/kernel/entry_64.S @@ -526,15 +526,6 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_ISERIES) 2: TRACE_AND_RESTORE_IRQ(r5); -#ifdef CONFIG_PERF_COUNTERS - /* check paca->perf_counter_pending if we're enabling ints */ - lbz r3,PACAPERFPEND(r13) - and. r3,r3,r5 - beq 27f - bl .perf_counter_do_pending -27: -#endif /* CONFIG_PERF_COUNTERS */ - /* extract EE bit and use it to restore paca->hard_enabled */ ld r3,_MSR(r1) rldicl r4,r3,49,63 /* r0 = (r3 >> 15) & 1 */ diff --git a/trunk/arch/powerpc/kernel/irq.c b/trunk/arch/powerpc/kernel/irq.c index feff792ed0f9..8c1a4966867e 100644 --- a/trunk/arch/powerpc/kernel/irq.c +++ b/trunk/arch/powerpc/kernel/irq.c @@ -135,11 +135,6 @@ notrace void raw_local_irq_restore(unsigned long en) iseries_handle_interrupts(); } - if (test_perf_counter_pending()) { - clear_perf_counter_pending(); - perf_counter_do_pending(); - } - /* * if (get_paca()->hard_enabled) return; * But again we need to take care that gcc gets hard_enabled directly diff --git a/trunk/arch/powerpc/kernel/perf_counter.c b/trunk/arch/powerpc/kernel/perf_counter.c deleted file mode 100644 index bb202388170e..000000000000 --- a/trunk/arch/powerpc/kernel/perf_counter.c +++ /dev/null @@ -1,1263 +0,0 @@ -/* - * Performance counter support - powerpc architecture code - * - * Copyright 2008-2009 Paul Mackerras, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct cpu_hw_counters { - int n_counters; - int n_percpu; - int disabled; - int n_added; - int n_limited; - u8 pmcs_enabled; - struct perf_counter *counter[MAX_HWCOUNTERS]; - u64 events[MAX_HWCOUNTERS]; - unsigned int flags[MAX_HWCOUNTERS]; - u64 mmcr[3]; - struct perf_counter *limited_counter[MAX_LIMITED_HWCOUNTERS]; - u8 limited_hwidx[MAX_LIMITED_HWCOUNTERS]; -}; -DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters); - -struct power_pmu *ppmu; - -/* - * Normally, to ignore kernel events we set the FCS (freeze counters - * in supervisor mode) bit in MMCR0, but if the kernel runs with the - * hypervisor bit set in the MSR, or if we are running on a processor - * where the hypervisor bit is forced to 1 (as on Apple G5 processors), - * then we need to use the FCHV bit to ignore kernel events. - */ -static unsigned int freeze_counters_kernel = MMCR0_FCS; - -static void perf_counter_interrupt(struct pt_regs *regs); - -void perf_counter_print_debug(void) -{ -} - -/* - * Read one performance monitor counter (PMC). - */ -static unsigned long read_pmc(int idx) -{ - unsigned long val; - - switch (idx) { - case 1: - val = mfspr(SPRN_PMC1); - break; - case 2: - val = mfspr(SPRN_PMC2); - break; - case 3: - val = mfspr(SPRN_PMC3); - break; - case 4: - val = mfspr(SPRN_PMC4); - break; - case 5: - val = mfspr(SPRN_PMC5); - break; - case 6: - val = mfspr(SPRN_PMC6); - break; - case 7: - val = mfspr(SPRN_PMC7); - break; - case 8: - val = mfspr(SPRN_PMC8); - break; - default: - printk(KERN_ERR "oops trying to read PMC%d\n", idx); - val = 0; - } - return val; -} - -/* - * Write one PMC. - */ -static void write_pmc(int idx, unsigned long val) -{ - switch (idx) { - case 1: - mtspr(SPRN_PMC1, val); - break; - case 2: - mtspr(SPRN_PMC2, val); - break; - case 3: - mtspr(SPRN_PMC3, val); - break; - case 4: - mtspr(SPRN_PMC4, val); - break; - case 5: - mtspr(SPRN_PMC5, val); - break; - case 6: - mtspr(SPRN_PMC6, val); - break; - case 7: - mtspr(SPRN_PMC7, val); - break; - case 8: - mtspr(SPRN_PMC8, val); - break; - default: - printk(KERN_ERR "oops trying to write PMC%d\n", idx); - } -} - -/* - * Check if a set of events can all go on the PMU at once. - * If they can't, this will look at alternative codes for the events - * and see if any combination of alternative codes is feasible. - * The feasible set is returned in event[]. - */ -static int power_check_constraints(u64 event[], unsigned int cflags[], - int n_ev) -{ - u64 mask, value, nv; - u64 alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; - u64 amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; - u64 avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES]; - u64 smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS]; - int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS]; - int i, j; - u64 addf = ppmu->add_fields; - u64 tadd = ppmu->test_adder; - - if (n_ev > ppmu->n_counter) - return -1; - - /* First see if the events will go on as-is */ - for (i = 0; i < n_ev; ++i) { - if ((cflags[i] & PPMU_LIMITED_PMC_REQD) - && !ppmu->limited_pmc_event(event[i])) { - ppmu->get_alternatives(event[i], cflags[i], - alternatives[i]); - event[i] = alternatives[i][0]; - } - if (ppmu->get_constraint(event[i], &amasks[i][0], - &avalues[i][0])) - return -1; - } - value = mask = 0; - for (i = 0; i < n_ev; ++i) { - nv = (value | avalues[i][0]) + (value & avalues[i][0] & addf); - if ((((nv + tadd) ^ value) & mask) != 0 || - (((nv + tadd) ^ avalues[i][0]) & amasks[i][0]) != 0) - break; - value = nv; - mask |= amasks[i][0]; - } - if (i == n_ev) - return 0; /* all OK */ - - /* doesn't work, gather alternatives... */ - if (!ppmu->get_alternatives) - return -1; - for (i = 0; i < n_ev; ++i) { - choice[i] = 0; - n_alt[i] = ppmu->get_alternatives(event[i], cflags[i], - alternatives[i]); - for (j = 1; j < n_alt[i]; ++j) - ppmu->get_constraint(alternatives[i][j], - &amasks[i][j], &avalues[i][j]); - } - - /* enumerate all possibilities and see if any will work */ - i = 0; - j = -1; - value = mask = nv = 0; - while (i < n_ev) { - if (j >= 0) { - /* we're backtracking, restore context */ - value = svalues[i]; - mask = smasks[i]; - j = choice[i]; - } - /* - * See if any alternative k for event i, - * where k > j, will satisfy the constraints. - */ - while (++j < n_alt[i]) { - nv = (value | avalues[i][j]) + - (value & avalues[i][j] & addf); - if ((((nv + tadd) ^ value) & mask) == 0 && - (((nv + tadd) ^ avalues[i][j]) - & amasks[i][j]) == 0) - break; - } - if (j >= n_alt[i]) { - /* - * No feasible alternative, backtrack - * to event i-1 and continue enumerating its - * alternatives from where we got up to. - */ - if (--i < 0) - return -1; - } else { - /* - * Found a feasible alternative for event i, - * remember where we got up to with this event, - * go on to the next event, and start with - * the first alternative for it. - */ - choice[i] = j; - svalues[i] = value; - smasks[i] = mask; - value = nv; - mask |= amasks[i][j]; - ++i; - j = -1; - } - } - - /* OK, we have a feasible combination, tell the caller the solution */ - for (i = 0; i < n_ev; ++i) - event[i] = alternatives[i][choice[i]]; - return 0; -} - -/* - * Check if newly-added counters have consistent settings for - * exclude_{user,kernel,hv} with each other and any previously - * added counters. - */ -static int check_excludes(struct perf_counter **ctrs, unsigned int cflags[], - int n_prev, int n_new) -{ - int eu = 0, ek = 0, eh = 0; - int i, n, first; - struct perf_counter *counter; - - n = n_prev + n_new; - if (n <= 1) - return 0; - - first = 1; - for (i = 0; i < n; ++i) { - if (cflags[i] & PPMU_LIMITED_PMC_OK) { - cflags[i] &= ~PPMU_LIMITED_PMC_REQD; - continue; - } - counter = ctrs[i]; - if (first) { - eu = counter->attr.exclude_user; - ek = counter->attr.exclude_kernel; - eh = counter->attr.exclude_hv; - first = 0; - } else if (counter->attr.exclude_user != eu || - counter->attr.exclude_kernel != ek || - counter->attr.exclude_hv != eh) { - return -EAGAIN; - } - } - - if (eu || ek || eh) - for (i = 0; i < n; ++i) - if (cflags[i] & PPMU_LIMITED_PMC_OK) - cflags[i] |= PPMU_LIMITED_PMC_REQD; - - return 0; -} - -static void power_pmu_read(struct perf_counter *counter) -{ - long val, delta, prev; - - if (!counter->hw.idx) - return; - /* - * Performance monitor interrupts come even when interrupts - * are soft-disabled, as long as interrupts are hard-enabled. - * Therefore we treat them like NMIs. - */ - do { - prev = atomic64_read(&counter->hw.prev_count); - barrier(); - val = read_pmc(counter->hw.idx); - } while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev); - - /* The counters are only 32 bits wide */ - delta = (val - prev) & 0xfffffffful; - atomic64_add(delta, &counter->count); - atomic64_sub(delta, &counter->hw.period_left); -} - -/* - * On some machines, PMC5 and PMC6 can't be written, don't respect - * the freeze conditions, and don't generate interrupts. This tells - * us if `counter' is using such a PMC. - */ -static int is_limited_pmc(int pmcnum) -{ - return (ppmu->flags & PPMU_LIMITED_PMC5_6) - && (pmcnum == 5 || pmcnum == 6); -} - -static void freeze_limited_counters(struct cpu_hw_counters *cpuhw, - unsigned long pmc5, unsigned long pmc6) -{ - struct perf_counter *counter; - u64 val, prev, delta; - int i; - - for (i = 0; i < cpuhw->n_limited; ++i) { - counter = cpuhw->limited_counter[i]; - if (!counter->hw.idx) - continue; - val = (counter->hw.idx == 5) ? pmc5 : pmc6; - prev = atomic64_read(&counter->hw.prev_count); - counter->hw.idx = 0; - delta = (val - prev) & 0xfffffffful; - atomic64_add(delta, &counter->count); - } -} - -static void thaw_limited_counters(struct cpu_hw_counters *cpuhw, - unsigned long pmc5, unsigned long pmc6) -{ - struct perf_counter *counter; - u64 val; - int i; - - for (i = 0; i < cpuhw->n_limited; ++i) { - counter = cpuhw->limited_counter[i]; - counter->hw.idx = cpuhw->limited_hwidx[i]; - val = (counter->hw.idx == 5) ? pmc5 : pmc6; - atomic64_set(&counter->hw.prev_count, val); - perf_counter_update_userpage(counter); - } -} - -/* - * Since limited counters don't respect the freeze conditions, we - * have to read them immediately after freezing or unfreezing the - * other counters. We try to keep the values from the limited - * counters as consistent as possible by keeping the delay (in - * cycles and instructions) between freezing/unfreezing and reading - * the limited counters as small and consistent as possible. - * Therefore, if any limited counters are in use, we read them - * both, and always in the same order, to minimize variability, - * and do it inside the same asm that writes MMCR0. - */ -static void write_mmcr0(struct cpu_hw_counters *cpuhw, unsigned long mmcr0) -{ - unsigned long pmc5, pmc6; - - if (!cpuhw->n_limited) { - mtspr(SPRN_MMCR0, mmcr0); - return; - } - - /* - * Write MMCR0, then read PMC5 and PMC6 immediately. - * To ensure we don't get a performance monitor interrupt - * between writing MMCR0 and freezing/thawing the limited - * counters, we first write MMCR0 with the counter overflow - * interrupt enable bits turned off. - */ - asm volatile("mtspr %3,%2; mfspr %0,%4; mfspr %1,%5" - : "=&r" (pmc5), "=&r" (pmc6) - : "r" (mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)), - "i" (SPRN_MMCR0), - "i" (SPRN_PMC5), "i" (SPRN_PMC6)); - - if (mmcr0 & MMCR0_FC) - freeze_limited_counters(cpuhw, pmc5, pmc6); - else - thaw_limited_counters(cpuhw, pmc5, pmc6); - - /* - * Write the full MMCR0 including the counter overflow interrupt - * enable bits, if necessary. - */ - if (mmcr0 & (MMCR0_PMC1CE | MMCR0_PMCjCE)) - mtspr(SPRN_MMCR0, mmcr0); -} - -/* - * Disable all counters to prevent PMU interrupts and to allow - * counters to be added or removed. - */ -void hw_perf_disable(void) -{ - struct cpu_hw_counters *cpuhw; - unsigned long ret; - unsigned long flags; - - local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_counters); - - ret = cpuhw->disabled; - if (!ret) { - cpuhw->disabled = 1; - cpuhw->n_added = 0; - - /* - * Check if we ever enabled the PMU on this cpu. - */ - if (!cpuhw->pmcs_enabled) { - if (ppc_md.enable_pmcs) - ppc_md.enable_pmcs(); - cpuhw->pmcs_enabled = 1; - } - - /* - * Disable instruction sampling if it was enabled - */ - if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { - mtspr(SPRN_MMCRA, - cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); - mb(); - } - - /* - * Set the 'freeze counters' bit. - * The barrier is to make sure the mtspr has been - * executed and the PMU has frozen the counters - * before we return. - */ - write_mmcr0(cpuhw, mfspr(SPRN_MMCR0) | MMCR0_FC); - mb(); - } - local_irq_restore(flags); -} - -/* - * Re-enable all counters if disable == 0. - * If we were previously disabled and counters were added, then - * put the new config on the PMU. - */ -void hw_perf_enable(void) -{ - struct perf_counter *counter; - struct cpu_hw_counters *cpuhw; - unsigned long flags; - long i; - unsigned long val; - s64 left; - unsigned int hwc_index[MAX_HWCOUNTERS]; - int n_lim; - int idx; - - local_irq_save(flags); - cpuhw = &__get_cpu_var(cpu_hw_counters); - if (!cpuhw->disabled) { - local_irq_restore(flags); - return; - } - cpuhw->disabled = 0; - - /* - * If we didn't change anything, or only removed counters, - * no need to recalculate MMCR* settings and reset the PMCs. - * Just reenable the PMU with the current MMCR* settings - * (possibly updated for removal of counters). - */ - if (!cpuhw->n_added) { - mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); - mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); - if (cpuhw->n_counters == 0) - get_lppaca()->pmcregs_in_use = 0; - goto out_enable; - } - - /* - * Compute MMCR* values for the new set of counters - */ - if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index, - cpuhw->mmcr)) { - /* shouldn't ever get here */ - printk(KERN_ERR "oops compute_mmcr failed\n"); - goto out; - } - - /* - * Add in MMCR0 freeze bits corresponding to the - * attr.exclude_* bits for the first counter. - * We have already checked that all counters have the - * same values for these bits as the first counter. - */ - counter = cpuhw->counter[0]; - if (counter->attr.exclude_user) - cpuhw->mmcr[0] |= MMCR0_FCP; - if (counter->attr.exclude_kernel) - cpuhw->mmcr[0] |= freeze_counters_kernel; - if (counter->attr.exclude_hv) - cpuhw->mmcr[0] |= MMCR0_FCHV; - - /* - * Write the new configuration to MMCR* with the freeze - * bit set and set the hardware counters to their initial values. - * Then unfreeze the counters. - */ - get_lppaca()->pmcregs_in_use = 1; - mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); - mtspr(SPRN_MMCR1, cpuhw->mmcr[1]); - mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE)) - | MMCR0_FC); - - /* - * Read off any pre-existing counters that need to move - * to another PMC. - */ - for (i = 0; i < cpuhw->n_counters; ++i) { - counter = cpuhw->counter[i]; - if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) { - power_pmu_read(counter); - write_pmc(counter->hw.idx, 0); - counter->hw.idx = 0; - } - } - - /* - * Initialize the PMCs for all the new and moved counters. - */ - cpuhw->n_limited = n_lim = 0; - for (i = 0; i < cpuhw->n_counters; ++i) { - counter = cpuhw->counter[i]; - if (counter->hw.idx) - continue; - idx = hwc_index[i] + 1; - if (is_limited_pmc(idx)) { - cpuhw->limited_counter[n_lim] = counter; - cpuhw->limited_hwidx[n_lim] = idx; - ++n_lim; - continue; - } - val = 0; - if (counter->hw.sample_period) { - left = atomic64_read(&counter->hw.period_left); - if (left < 0x80000000L) - val = 0x80000000L - left; - } - atomic64_set(&counter->hw.prev_count, val); - counter->hw.idx = idx; - write_pmc(idx, val); - perf_counter_update_userpage(counter); - } - cpuhw->n_limited = n_lim; - cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; - - out_enable: - mb(); - write_mmcr0(cpuhw, cpuhw->mmcr[0]); - - /* - * Enable instruction sampling if necessary - */ - if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) { - mb(); - mtspr(SPRN_MMCRA, cpuhw->mmcr[2]); - } - - out: - local_irq_restore(flags); -} - -static int collect_events(struct perf_counter *group, int max_count, - struct perf_counter *ctrs[], u64 *events, - unsigned int *flags) -{ - int n = 0; - struct perf_counter *counter; - - if (!is_software_counter(group)) { - if (n >= max_count) - return -1; - ctrs[n] = group; - flags[n] = group->hw.counter_base; - events[n++] = group->hw.config; - } - list_for_each_entry(counter, &group->sibling_list, list_entry) { - if (!is_software_counter(counter) && - counter->state != PERF_COUNTER_STATE_OFF) { - if (n >= max_count) - return -1; - ctrs[n] = counter; - flags[n] = counter->hw.counter_base; - events[n++] = counter->hw.config; - } - } - return n; -} - -static void counter_sched_in(struct perf_counter *counter, int cpu) -{ - counter->state = PERF_COUNTER_STATE_ACTIVE; - counter->oncpu = cpu; - counter->tstamp_running += counter->ctx->time - counter->tstamp_stopped; - if (is_software_counter(counter)) - counter->pmu->enable(counter); -} - -/* - * Called to enable a whole group of counters. - * Returns 1 if the group was enabled, or -EAGAIN if it could not be. - * Assumes the caller has disabled interrupts and has - * frozen the PMU with hw_perf_save_disable. - */ -int hw_perf_group_sched_in(struct perf_counter *group_leader, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx, int cpu) -{ - struct cpu_hw_counters *cpuhw; - long i, n, n0; - struct perf_counter *sub; - - cpuhw = &__get_cpu_var(cpu_hw_counters); - n0 = cpuhw->n_counters; - n = collect_events(group_leader, ppmu->n_counter - n0, - &cpuhw->counter[n0], &cpuhw->events[n0], - &cpuhw->flags[n0]); - if (n < 0) - return -EAGAIN; - if (check_excludes(cpuhw->counter, cpuhw->flags, n0, n)) - return -EAGAIN; - i = power_check_constraints(cpuhw->events, cpuhw->flags, n + n0); - if (i < 0) - return -EAGAIN; - cpuhw->n_counters = n0 + n; - cpuhw->n_added += n; - - /* - * OK, this group can go on; update counter states etc., - * and enable any software counters - */ - for (i = n0; i < n0 + n; ++i) - cpuhw->counter[i]->hw.config = cpuhw->events[i]; - cpuctx->active_oncpu += n; - n = 1; - counter_sched_in(group_leader, cpu); - list_for_each_entry(sub, &group_leader->sibling_list, list_entry) { - if (sub->state != PERF_COUNTER_STATE_OFF) { - counter_sched_in(sub, cpu); - ++n; - } - } - ctx->nr_active += n; - - return 1; -} - -/* - * Add a counter to the PMU. - * If all counters are not already frozen, then we disable and - * re-enable the PMU in order to get hw_perf_enable to do the - * actual work of reconfiguring the PMU. - */ -static int power_pmu_enable(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuhw; - unsigned long flags; - int n0; - int ret = -EAGAIN; - - local_irq_save(flags); - perf_disable(); - - /* - * Add the counter to the list (if there is room) - * and check whether the total set is still feasible. - */ - cpuhw = &__get_cpu_var(cpu_hw_counters); - n0 = cpuhw->n_counters; - if (n0 >= ppmu->n_counter) - goto out; - cpuhw->counter[n0] = counter; - cpuhw->events[n0] = counter->hw.config; - cpuhw->flags[n0] = counter->hw.counter_base; - if (check_excludes(cpuhw->counter, cpuhw->flags, n0, 1)) - goto out; - if (power_check_constraints(cpuhw->events, cpuhw->flags, n0 + 1)) - goto out; - - counter->hw.config = cpuhw->events[n0]; - ++cpuhw->n_counters; - ++cpuhw->n_added; - - ret = 0; - out: - perf_enable(); - local_irq_restore(flags); - return ret; -} - -/* - * Remove a counter from the PMU. - */ -static void power_pmu_disable(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuhw; - long i; - unsigned long flags; - - local_irq_save(flags); - perf_disable(); - - power_pmu_read(counter); - - cpuhw = &__get_cpu_var(cpu_hw_counters); - for (i = 0; i < cpuhw->n_counters; ++i) { - if (counter == cpuhw->counter[i]) { - while (++i < cpuhw->n_counters) - cpuhw->counter[i-1] = cpuhw->counter[i]; - --cpuhw->n_counters; - ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr); - if (counter->hw.idx) { - write_pmc(counter->hw.idx, 0); - counter->hw.idx = 0; - } - perf_counter_update_userpage(counter); - break; - } - } - for (i = 0; i < cpuhw->n_limited; ++i) - if (counter == cpuhw->limited_counter[i]) - break; - if (i < cpuhw->n_limited) { - while (++i < cpuhw->n_limited) { - cpuhw->limited_counter[i-1] = cpuhw->limited_counter[i]; - cpuhw->limited_hwidx[i-1] = cpuhw->limited_hwidx[i]; - } - --cpuhw->n_limited; - } - if (cpuhw->n_counters == 0) { - /* disable exceptions if no counters are running */ - cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE); - } - - perf_enable(); - local_irq_restore(flags); -} - -/* - * Re-enable interrupts on a counter after they were throttled - * because they were coming too fast. - */ -static void power_pmu_unthrottle(struct perf_counter *counter) -{ - s64 val, left; - unsigned long flags; - - if (!counter->hw.idx || !counter->hw.sample_period) - return; - local_irq_save(flags); - perf_disable(); - power_pmu_read(counter); - left = counter->hw.sample_period; - counter->hw.last_period = left; - val = 0; - if (left < 0x80000000L) - val = 0x80000000L - left; - write_pmc(counter->hw.idx, val); - atomic64_set(&counter->hw.prev_count, val); - atomic64_set(&counter->hw.period_left, left); - perf_counter_update_userpage(counter); - perf_enable(); - local_irq_restore(flags); -} - -struct pmu power_pmu = { - .enable = power_pmu_enable, - .disable = power_pmu_disable, - .read = power_pmu_read, - .unthrottle = power_pmu_unthrottle, -}; - -/* - * Return 1 if we might be able to put counter on a limited PMC, - * or 0 if not. - * A counter can only go on a limited PMC if it counts something - * that a limited PMC can count, doesn't require interrupts, and - * doesn't exclude any processor mode. - */ -static int can_go_on_limited_pmc(struct perf_counter *counter, u64 ev, - unsigned int flags) -{ - int n; - u64 alt[MAX_EVENT_ALTERNATIVES]; - - if (counter->attr.exclude_user - || counter->attr.exclude_kernel - || counter->attr.exclude_hv - || counter->attr.sample_period) - return 0; - - if (ppmu->limited_pmc_event(ev)) - return 1; - - /* - * The requested event isn't on a limited PMC already; - * see if any alternative code goes on a limited PMC. - */ - if (!ppmu->get_alternatives) - return 0; - - flags |= PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD; - n = ppmu->get_alternatives(ev, flags, alt); - - return n > 0; -} - -/* - * Find an alternative event that goes on a normal PMC, if possible, - * and return the event code, or 0 if there is no such alternative. - * (Note: event code 0 is "don't count" on all machines.) - */ -static u64 normal_pmc_alternative(u64 ev, unsigned long flags) -{ - u64 alt[MAX_EVENT_ALTERNATIVES]; - int n; - - flags &= ~(PPMU_LIMITED_PMC_OK | PPMU_LIMITED_PMC_REQD); - n = ppmu->get_alternatives(ev, flags, alt); - if (!n) - return 0; - return alt[0]; -} - -/* Number of perf_counters counting hardware events */ -static atomic_t num_counters; -/* Used to avoid races in calling reserve/release_pmc_hardware */ -static DEFINE_MUTEX(pmc_reserve_mutex); - -/* - * Release the PMU if this is the last perf_counter. - */ -static void hw_perf_counter_destroy(struct perf_counter *counter) -{ - if (!atomic_add_unless(&num_counters, -1, 1)) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_dec_return(&num_counters) == 0) - release_pmc_hardware(); - mutex_unlock(&pmc_reserve_mutex); - } -} - -/* - * Translate a generic cache event config to a raw event code. - */ -static int hw_perf_cache_event(u64 config, u64 *eventp) -{ - unsigned long type, op, result; - int ev; - - if (!ppmu->cache_events) - return -EINVAL; - - /* unpack config */ - type = config & 0xff; - op = (config >> 8) & 0xff; - result = (config >> 16) & 0xff; - - if (type >= PERF_COUNT_HW_CACHE_MAX || - op >= PERF_COUNT_HW_CACHE_OP_MAX || - result >= PERF_COUNT_HW_CACHE_RESULT_MAX) - return -EINVAL; - - ev = (*ppmu->cache_events)[type][op][result]; - if (ev == 0) - return -EOPNOTSUPP; - if (ev == -1) - return -EINVAL; - *eventp = ev; - return 0; -} - -const struct pmu *hw_perf_counter_init(struct perf_counter *counter) -{ - u64 ev; - unsigned long flags; - struct perf_counter *ctrs[MAX_HWCOUNTERS]; - u64 events[MAX_HWCOUNTERS]; - unsigned int cflags[MAX_HWCOUNTERS]; - int n; - int err; - - if (!ppmu) - return ERR_PTR(-ENXIO); - switch (counter->attr.type) { - case PERF_TYPE_HARDWARE: - ev = counter->attr.config; - if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0) - return ERR_PTR(-EOPNOTSUPP); - ev = ppmu->generic_events[ev]; - break; - case PERF_TYPE_HW_CACHE: - err = hw_perf_cache_event(counter->attr.config, &ev); - if (err) - return ERR_PTR(err); - break; - case PERF_TYPE_RAW: - ev = counter->attr.config; - break; - } - counter->hw.config_base = ev; - counter->hw.idx = 0; - - /* - * If we are not running on a hypervisor, force the - * exclude_hv bit to 0 so that we don't care what - * the user set it to. - */ - if (!firmware_has_feature(FW_FEATURE_LPAR)) - counter->attr.exclude_hv = 0; - - /* - * If this is a per-task counter, then we can use - * PM_RUN_* events interchangeably with their non RUN_* - * equivalents, e.g. PM_RUN_CYC instead of PM_CYC. - * XXX we should check if the task is an idle task. - */ - flags = 0; - if (counter->ctx->task) - flags |= PPMU_ONLY_COUNT_RUN; - - /* - * If this machine has limited counters, check whether this - * event could go on a limited counter. - */ - if (ppmu->flags & PPMU_LIMITED_PMC5_6) { - if (can_go_on_limited_pmc(counter, ev, flags)) { - flags |= PPMU_LIMITED_PMC_OK; - } else if (ppmu->limited_pmc_event(ev)) { - /* - * The requested event is on a limited PMC, - * but we can't use a limited PMC; see if any - * alternative goes on a normal PMC. - */ - ev = normal_pmc_alternative(ev, flags); - if (!ev) - return ERR_PTR(-EINVAL); - } - } - - /* - * If this is in a group, check if it can go on with all the - * other hardware counters in the group. We assume the counter - * hasn't been linked into its leader's sibling list at this point. - */ - n = 0; - if (counter->group_leader != counter) { - n = collect_events(counter->group_leader, ppmu->n_counter - 1, - ctrs, events, cflags); - if (n < 0) - return ERR_PTR(-EINVAL); - } - events[n] = ev; - ctrs[n] = counter; - cflags[n] = flags; - if (check_excludes(ctrs, cflags, n, 1)) - return ERR_PTR(-EINVAL); - if (power_check_constraints(events, cflags, n + 1)) - return ERR_PTR(-EINVAL); - - counter->hw.config = events[n]; - counter->hw.counter_base = cflags[n]; - counter->hw.last_period = counter->hw.sample_period; - atomic64_set(&counter->hw.period_left, counter->hw.last_period); - - /* - * See if we need to reserve the PMU. - * If no counters are currently in use, then we have to take a - * mutex to ensure that we don't race with another task doing - * reserve_pmc_hardware or release_pmc_hardware. - */ - err = 0; - if (!atomic_inc_not_zero(&num_counters)) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_read(&num_counters) == 0 && - reserve_pmc_hardware(perf_counter_interrupt)) - err = -EBUSY; - else - atomic_inc(&num_counters); - mutex_unlock(&pmc_reserve_mutex); - } - counter->destroy = hw_perf_counter_destroy; - - if (err) - return ERR_PTR(err); - return &power_pmu; -} - -/* - * A counter has overflowed; update its count and record - * things if requested. Note that interrupts are hard-disabled - * here so there is no possibility of being interrupted. - */ -static void record_and_restart(struct perf_counter *counter, long val, - struct pt_regs *regs, int nmi) -{ - u64 period = counter->hw.sample_period; - s64 prev, delta, left; - int record = 0; - u64 addr, mmcra, sdsync; - - /* we don't have to worry about interrupts here */ - prev = atomic64_read(&counter->hw.prev_count); - delta = (val - prev) & 0xfffffffful; - atomic64_add(delta, &counter->count); - - /* - * See if the total period for this counter has expired, - * and update for the next period. - */ - val = 0; - left = atomic64_read(&counter->hw.period_left) - delta; - if (period) { - if (left <= 0) { - left += period; - if (left <= 0) - left = period; - record = 1; - } - if (left < 0x80000000L) - val = 0x80000000L - left; - } - - /* - * Finally record data if requested. - */ - if (record) { - struct perf_sample_data data = { - .regs = regs, - .addr = 0, - .period = counter->hw.last_period, - }; - - if (counter->attr.sample_type & PERF_SAMPLE_ADDR) { - /* - * The user wants a data address recorded. - * If we're not doing instruction sampling, - * give them the SDAR (sampled data address). - * If we are doing instruction sampling, then only - * give them the SDAR if it corresponds to the - * instruction pointed to by SIAR; this is indicated - * by the [POWER6_]MMCRA_SDSYNC bit in MMCRA. - */ - mmcra = regs->dsisr; - sdsync = (ppmu->flags & PPMU_ALT_SIPR) ? - POWER6_MMCRA_SDSYNC : MMCRA_SDSYNC; - if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) - data.addr = mfspr(SPRN_SDAR); - } - if (perf_counter_overflow(counter, nmi, &data)) { - /* - * Interrupts are coming too fast - throttle them - * by setting the counter to 0, so it will be - * at least 2^30 cycles until the next interrupt - * (assuming each counter counts at most 2 counts - * per cycle). - */ - val = 0; - left = ~0ULL >> 1; - } - } - - write_pmc(counter->hw.idx, val); - atomic64_set(&counter->hw.prev_count, val); - atomic64_set(&counter->hw.period_left, left); - perf_counter_update_userpage(counter); -} - -/* - * Called from generic code to get the misc flags (i.e. processor mode) - * for an event. - */ -unsigned long perf_misc_flags(struct pt_regs *regs) -{ - unsigned long mmcra; - - if (TRAP(regs) != 0xf00) { - /* not a PMU interrupt */ - return user_mode(regs) ? PERF_EVENT_MISC_USER : - PERF_EVENT_MISC_KERNEL; - } - - mmcra = regs->dsisr; - if (ppmu->flags & PPMU_ALT_SIPR) { - if (mmcra & POWER6_MMCRA_SIHV) - return PERF_EVENT_MISC_HYPERVISOR; - return (mmcra & POWER6_MMCRA_SIPR) ? PERF_EVENT_MISC_USER : - PERF_EVENT_MISC_KERNEL; - } - if (mmcra & MMCRA_SIHV) - return PERF_EVENT_MISC_HYPERVISOR; - return (mmcra & MMCRA_SIPR) ? PERF_EVENT_MISC_USER : - PERF_EVENT_MISC_KERNEL; -} - -/* - * Called from generic code to get the instruction pointer - * for an event. - */ -unsigned long perf_instruction_pointer(struct pt_regs *regs) -{ - unsigned long mmcra; - unsigned long ip; - unsigned long slot; - - if (TRAP(regs) != 0xf00) - return regs->nip; /* not a PMU interrupt */ - - ip = mfspr(SPRN_SIAR); - mmcra = regs->dsisr; - if ((mmcra & MMCRA_SAMPLE_ENABLE) && !(ppmu->flags & PPMU_ALT_SIPR)) { - slot = (mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT; - if (slot > 1) - ip += 4 * (slot - 1); - } - return ip; -} - -/* - * Performance monitor interrupt stuff - */ -static void perf_counter_interrupt(struct pt_regs *regs) -{ - int i; - struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters); - struct perf_counter *counter; - long val; - int found = 0; - int nmi; - - if (cpuhw->n_limited) - freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5), - mfspr(SPRN_PMC6)); - - /* - * Overload regs->dsisr to store MMCRA so we only need to read it once. - */ - regs->dsisr = mfspr(SPRN_MMCRA); - - /* - * If interrupts were soft-disabled when this PMU interrupt - * occurred, treat it as an NMI. - */ - nmi = !regs->softe; - if (nmi) - nmi_enter(); - else - irq_enter(); - - for (i = 0; i < cpuhw->n_counters; ++i) { - counter = cpuhw->counter[i]; - if (!counter->hw.idx || is_limited_pmc(counter->hw.idx)) - continue; - val = read_pmc(counter->hw.idx); - if ((int)val < 0) { - /* counter has overflowed */ - found = 1; - record_and_restart(counter, val, regs, nmi); - } - } - - /* - * In case we didn't find and reset the counter that caused - * the interrupt, scan all counters and reset any that are - * negative, to avoid getting continual interrupts. - * Any that we processed in the previous loop will not be negative. - */ - if (!found) { - for (i = 0; i < ppmu->n_counter; ++i) { - if (is_limited_pmc(i + 1)) - continue; - val = read_pmc(i + 1); - if ((int)val < 0) - write_pmc(i + 1, 0); - } - } - - /* - * Reset MMCR0 to its normal value. This will set PMXE and - * clear FC (freeze counters) and PMAO (perf mon alert occurred) - * and thus allow interrupts to occur again. - * XXX might want to use MSR.PM to keep the counters frozen until - * we get back out of this interrupt. - */ - write_mmcr0(cpuhw, cpuhw->mmcr[0]); - - if (nmi) - nmi_exit(); - else - irq_exit(); -} - -void hw_perf_counter_setup(int cpu) -{ - struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu); - - memset(cpuhw, 0, sizeof(*cpuhw)); - cpuhw->mmcr[0] = MMCR0_FC; -} - -extern struct power_pmu power4_pmu; -extern struct power_pmu ppc970_pmu; -extern struct power_pmu power5_pmu; -extern struct power_pmu power5p_pmu; -extern struct power_pmu power6_pmu; -extern struct power_pmu power7_pmu; - -static int init_perf_counters(void) -{ - unsigned long pvr; - - /* XXX should get this from cputable */ - pvr = mfspr(SPRN_PVR); - switch (PVR_VER(pvr)) { - case PV_POWER4: - case PV_POWER4p: - ppmu = &power4_pmu; - break; - case PV_970: - case PV_970FX: - case PV_970MP: - ppmu = &ppc970_pmu; - break; - case PV_POWER5: - ppmu = &power5_pmu; - break; - case PV_POWER5p: - ppmu = &power5p_pmu; - break; - case 0x3e: - ppmu = &power6_pmu; - break; - case 0x3f: - ppmu = &power7_pmu; - break; - } - - /* - * Use FCHV to ignore kernel events if MSR.HV is set. - */ - if (mfmsr() & MSR_HV) - freeze_counters_kernel = MMCR0_FCHV; - - return 0; -} - -arch_initcall(init_perf_counters); diff --git a/trunk/arch/powerpc/kernel/power4-pmu.c b/trunk/arch/powerpc/kernel/power4-pmu.c deleted file mode 100644 index 07bd308a5fa7..000000000000 --- a/trunk/arch/powerpc/kernel/power4-pmu.c +++ /dev/null @@ -1,598 +0,0 @@ -/* - * Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors. - * - * Copyright 2009 Paul Mackerras, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include -#include -#include - -/* - * Bits in event code for POWER4 - */ -#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */ -#define PM_PMC_MSK 0xf -#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */ -#define PM_UNIT_MSK 0xf -#define PM_LOWER_SH 6 -#define PM_LOWER_MSK 1 -#define PM_LOWER_MSKS 0x40 -#define PM_BYTE_SH 4 /* Byte number of event bus to use */ -#define PM_BYTE_MSK 3 -#define PM_PMCSEL_MSK 7 - -/* - * Unit code values - */ -#define PM_FPU 1 -#define PM_ISU1 2 -#define PM_IFU 3 -#define PM_IDU0 4 -#define PM_ISU1_ALT 6 -#define PM_ISU2 7 -#define PM_IFU_ALT 8 -#define PM_LSU0 9 -#define PM_LSU1 0xc -#define PM_GPS 0xf - -/* - * Bits in MMCR0 for POWER4 - */ -#define MMCR0_PMC1SEL_SH 8 -#define MMCR0_PMC2SEL_SH 1 -#define MMCR_PMCSEL_MSK 0x1f - -/* - * Bits in MMCR1 for POWER4 - */ -#define MMCR1_TTM0SEL_SH 62 -#define MMCR1_TTC0SEL_SH 61 -#define MMCR1_TTM1SEL_SH 59 -#define MMCR1_TTC1SEL_SH 58 -#define MMCR1_TTM2SEL_SH 56 -#define MMCR1_TTC2SEL_SH 55 -#define MMCR1_TTM3SEL_SH 53 -#define MMCR1_TTC3SEL_SH 52 -#define MMCR1_TTMSEL_MSK 3 -#define MMCR1_TD_CP_DBG0SEL_SH 50 -#define MMCR1_TD_CP_DBG1SEL_SH 48 -#define MMCR1_TD_CP_DBG2SEL_SH 46 -#define MMCR1_TD_CP_DBG3SEL_SH 44 -#define MMCR1_DEBUG0SEL_SH 43 -#define MMCR1_DEBUG1SEL_SH 42 -#define MMCR1_DEBUG2SEL_SH 41 -#define MMCR1_DEBUG3SEL_SH 40 -#define MMCR1_PMC1_ADDER_SEL_SH 39 -#define MMCR1_PMC2_ADDER_SEL_SH 38 -#define MMCR1_PMC6_ADDER_SEL_SH 37 -#define MMCR1_PMC5_ADDER_SEL_SH 36 -#define MMCR1_PMC8_ADDER_SEL_SH 35 -#define MMCR1_PMC7_ADDER_SEL_SH 34 -#define MMCR1_PMC3_ADDER_SEL_SH 33 -#define MMCR1_PMC4_ADDER_SEL_SH 32 -#define MMCR1_PMC3SEL_SH 27 -#define MMCR1_PMC4SEL_SH 22 -#define MMCR1_PMC5SEL_SH 17 -#define MMCR1_PMC6SEL_SH 12 -#define MMCR1_PMC7SEL_SH 7 -#define MMCR1_PMC8SEL_SH 2 /* note bit 0 is in MMCRA for GP */ - -static short mmcr1_adder_bits[8] = { - MMCR1_PMC1_ADDER_SEL_SH, - MMCR1_PMC2_ADDER_SEL_SH, - MMCR1_PMC3_ADDER_SEL_SH, - MMCR1_PMC4_ADDER_SEL_SH, - MMCR1_PMC5_ADDER_SEL_SH, - MMCR1_PMC6_ADDER_SEL_SH, - MMCR1_PMC7_ADDER_SEL_SH, - MMCR1_PMC8_ADDER_SEL_SH -}; - -/* - * Bits in MMCRA - */ -#define MMCRA_PMC8SEL0_SH 17 /* PMC8SEL bit 0 for GP */ - -/* - * Layout of constraint bits: - * 6666555555555544444444443333333333222222222211111111110000000000 - * 3210987654321098765432109876543210987654321098765432109876543210 - * |[ >[ >[ >|||[ >[ >< >< >< >< ><><><><><><><><> - * | UC1 UC2 UC3 ||| PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8 - * \SMPL ||\TTC3SEL - * |\TTC_IFU_SEL - * \TTM2SEL0 - * - * SMPL - SAMPLE_ENABLE constraint - * 56: SAMPLE_ENABLE value 0x0100_0000_0000_0000 - * - * UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2 - * 55: UC1 error 0x0080_0000_0000_0000 - * 54: FPU events needed 0x0040_0000_0000_0000 - * 53: ISU1 events needed 0x0020_0000_0000_0000 - * 52: IDU0|ISU2 events needed 0x0010_0000_0000_0000 - * - * UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0 - * 51: UC2 error 0x0008_0000_0000_0000 - * 50: FPU events needed 0x0004_0000_0000_0000 - * 49: IFU events needed 0x0002_0000_0000_0000 - * 48: LSU0 events needed 0x0001_0000_0000_0000 - * - * UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1 - * 47: UC3 error 0x8000_0000_0000 - * 46: LSU0 events needed 0x4000_0000_0000 - * 45: IFU events needed 0x2000_0000_0000 - * 44: IDU0|ISU2 events needed 0x1000_0000_0000 - * 43: ISU1 events needed 0x0800_0000_0000 - * - * TTM2SEL0 - * 42: 0 = IDU0 events needed - * 1 = ISU2 events needed 0x0400_0000_0000 - * - * TTC_IFU_SEL - * 41: 0 = IFU.U events needed - * 1 = IFU.L events needed 0x0200_0000_0000 - * - * TTC3SEL - * 40: 0 = LSU1.U events needed - * 1 = LSU1.L events needed 0x0100_0000_0000 - * - * PS1 - * 39: PS1 error 0x0080_0000_0000 - * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000 - * - * PS2 - * 35: PS2 error 0x0008_0000_0000 - * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000 - * - * B0 - * 28-31: Byte 0 event source 0xf000_0000 - * 1 = FPU - * 2 = ISU1 - * 3 = IFU - * 4 = IDU0 - * 7 = ISU2 - * 9 = LSU0 - * c = LSU1 - * f = GPS - * - * B1, B2, B3 - * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources - * - * P8 - * 15: P8 error 0x8000 - * 14-15: Count of events needing PMC8 - * - * P1..P7 - * 0-13: Count of events needing PMC1..PMC7 - * - * Note: this doesn't allow events using IFU.U to be combined with events - * using IFU.L, though that is feasible (using TTM0 and TTM2). However - * there are no listed events for IFU.L (they are debug events not - * verified for performance monitoring) so this shouldn't cause a - * problem. - */ - -static struct unitinfo { - u64 value, mask; - int unit; - int lowerbit; -} p4_unitinfo[16] = { - [PM_FPU] = { 0x44000000000000ull, 0x88000000000000ull, PM_FPU, 0 }, - [PM_ISU1] = { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 }, - [PM_ISU1_ALT] = - { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 }, - [PM_IFU] = { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 }, - [PM_IFU_ALT] = - { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 }, - [PM_IDU0] = { 0x10100000000000ull, 0x80840000000000ull, PM_IDU0, 1 }, - [PM_ISU2] = { 0x10140000000000ull, 0x80840000000000ull, PM_ISU2, 0 }, - [PM_LSU0] = { 0x01400000000000ull, 0x08800000000000ull, PM_LSU0, 0 }, - [PM_LSU1] = { 0x00000000000000ull, 0x00010000000000ull, PM_LSU1, 40 }, - [PM_GPS] = { 0x00000000000000ull, 0x00000000000000ull, PM_GPS, 0 } -}; - -static unsigned char direct_marked_event[8] = { - (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */ - (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */ - (1<<3), /* PMC3: PM_MRK_ST_CMPL_INT */ - (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */ - (1<<4) | (1<<5), /* PMC5: PM_MRK_GRP_TIMEO */ - (1<<3) | (1<<4) | (1<<5), - /* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */ - (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */ - (1<<4), /* PMC8: PM_MRK_LSU_FIN */ -}; - -/* - * Returns 1 if event counts things relating to marked instructions - * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. - */ -static int p4_marked_instr_event(u64 event) -{ - int pmc, psel, unit, byte, bit; - unsigned int mask; - - pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; - psel = event & PM_PMCSEL_MSK; - if (pmc) { - if (direct_marked_event[pmc - 1] & (1 << psel)) - return 1; - if (psel == 0) /* add events */ - bit = (pmc <= 4)? pmc - 1: 8 - pmc; - else if (psel == 6) /* decode events */ - bit = 4; - else - return 0; - } else - bit = psel; - - byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; - unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; - mask = 0; - switch (unit) { - case PM_LSU1: - if (event & PM_LOWER_MSKS) - mask = 1 << 28; /* byte 7 bit 4 */ - else - mask = 6 << 24; /* byte 3 bits 1 and 2 */ - break; - case PM_LSU0: - /* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */ - mask = 0x083dff00; - } - return (mask >> (byte * 8 + bit)) & 1; -} - -static int p4_get_constraint(u64 event, u64 *maskp, u64 *valp) -{ - int pmc, byte, unit, lower, sh; - u64 mask = 0, value = 0; - int grp = -1; - - pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; - if (pmc) { - if (pmc > 8) - return -1; - sh = (pmc - 1) * 2; - mask |= 2 << sh; - value |= 1 << sh; - grp = ((pmc - 1) >> 1) & 1; - } - unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; - byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; - if (unit) { - lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK; - - /* - * Bus events on bytes 0 and 2 can be counted - * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8. - */ - if (!pmc) - grp = byte & 1; - - if (!p4_unitinfo[unit].unit) - return -1; - mask |= p4_unitinfo[unit].mask; - value |= p4_unitinfo[unit].value; - sh = p4_unitinfo[unit].lowerbit; - if (sh > 1) - value |= (u64)lower << sh; - else if (lower != sh) - return -1; - unit = p4_unitinfo[unit].unit; - - /* Set byte lane select field */ - mask |= 0xfULL << (28 - 4 * byte); - value |= (u64)unit << (28 - 4 * byte); - } - if (grp == 0) { - /* increment PMC1/2/5/6 field */ - mask |= 0x8000000000ull; - value |= 0x1000000000ull; - } else { - /* increment PMC3/4/7/8 field */ - mask |= 0x800000000ull; - value |= 0x100000000ull; - } - - /* Marked instruction events need sample_enable set */ - if (p4_marked_instr_event(event)) { - mask |= 1ull << 56; - value |= 1ull << 56; - } - - /* PMCSEL=6 decode events on byte 2 need sample_enable clear */ - if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2) - mask |= 1ull << 56; - - *maskp = mask; - *valp = value; - return 0; -} - -static unsigned int ppc_inst_cmpl[] = { - 0x1001, 0x4001, 0x6001, 0x7001, 0x8001 -}; - -static int p4_get_alternatives(u64 event, unsigned int flags, u64 alt[]) -{ - int i, j, na; - - alt[0] = event; - na = 1; - - /* 2 possibilities for PM_GRP_DISP_REJECT */ - if (event == 0x8003 || event == 0x0224) { - alt[1] = event ^ (0x8003 ^ 0x0224); - return 2; - } - - /* 2 possibilities for PM_ST_MISS_L1 */ - if (event == 0x0c13 || event == 0x0c23) { - alt[1] = event ^ (0x0c13 ^ 0x0c23); - return 2; - } - - /* several possibilities for PM_INST_CMPL */ - for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) { - if (event == ppc_inst_cmpl[i]) { - for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j) - if (j != i) - alt[na++] = ppc_inst_cmpl[j]; - break; - } - } - - return na; -} - -static int p4_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], u64 mmcr[]) -{ - u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0; - unsigned int pmc, unit, byte, psel, lower; - unsigned int ttm, grp; - unsigned int pmc_inuse = 0; - unsigned int pmc_grp_use[2]; - unsigned char busbyte[4]; - unsigned char unituse[16]; - unsigned int unitlower = 0; - int i; - - if (n_ev > 8) - return -1; - - /* First pass to count resource use */ - pmc_grp_use[0] = pmc_grp_use[1] = 0; - memset(busbyte, 0, sizeof(busbyte)); - memset(unituse, 0, sizeof(unituse)); - for (i = 0; i < n_ev; ++i) { - pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; - if (pmc) { - if (pmc_inuse & (1 << (pmc - 1))) - return -1; - pmc_inuse |= 1 << (pmc - 1); - /* count 1/2/5/6 vs 3/4/7/8 use */ - ++pmc_grp_use[((pmc - 1) >> 1) & 1]; - } - unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; - byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; - lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK; - if (unit) { - if (!pmc) - ++pmc_grp_use[byte & 1]; - if (unit == 6 || unit == 8) - /* map alt ISU1/IFU codes: 6->2, 8->3 */ - unit = (unit >> 1) - 1; - if (busbyte[byte] && busbyte[byte] != unit) - return -1; - busbyte[byte] = unit; - lower <<= unit; - if (unituse[unit] && lower != (unitlower & lower)) - return -1; - unituse[unit] = 1; - unitlower |= lower; - } - } - if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4) - return -1; - - /* - * Assign resources and set multiplexer selects. - * - * Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2. - * Each TTMx can only select one unit, but since - * units 2 and 6 are both ISU1, and 3 and 8 are both IFU, - * we have some choices. - */ - if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) { - unituse[6] = 1; /* Move 2 to 6 */ - unituse[2] = 0; - } - if (unituse[3] & (unituse[1] | unituse[2])) { - unituse[8] = 1; /* Move 3 to 8 */ - unituse[3] = 0; - unitlower = (unitlower & ~8) | ((unitlower & 8) << 5); - } - /* Check only one unit per TTMx */ - if (unituse[1] + unituse[2] + unituse[3] > 1 || - unituse[4] + unituse[6] + unituse[7] > 1 || - unituse[8] + unituse[9] > 1 || - (unituse[5] | unituse[10] | unituse[11] | - unituse[13] | unituse[14])) - return -1; - - /* Set TTMxSEL fields. Note, units 1-3 => TTM0SEL codes 0-2 */ - mmcr1 |= (u64)(unituse[3] * 2 + unituse[2]) << MMCR1_TTM0SEL_SH; - mmcr1 |= (u64)(unituse[7] * 3 + unituse[6] * 2) << MMCR1_TTM1SEL_SH; - mmcr1 |= (u64)unituse[9] << MMCR1_TTM2SEL_SH; - - /* Set TTCxSEL fields. */ - if (unitlower & 0xe) - mmcr1 |= 1ull << MMCR1_TTC0SEL_SH; - if (unitlower & 0xf0) - mmcr1 |= 1ull << MMCR1_TTC1SEL_SH; - if (unitlower & 0xf00) - mmcr1 |= 1ull << MMCR1_TTC2SEL_SH; - if (unitlower & 0x7000) - mmcr1 |= 1ull << MMCR1_TTC3SEL_SH; - - /* Set byte lane select fields. */ - for (byte = 0; byte < 4; ++byte) { - unit = busbyte[byte]; - if (!unit) - continue; - if (unit == 0xf) { - /* special case for GPS */ - mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte); - } else { - if (!unituse[unit]) - ttm = unit - 1; /* 2->1, 3->2 */ - else - ttm = unit >> 2; - mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2*byte); - } - } - - /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ - for (i = 0; i < n_ev; ++i) { - pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; - unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; - byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; - psel = event[i] & PM_PMCSEL_MSK; - if (!pmc) { - /* Bus event or 00xxx direct event (off or cycles) */ - if (unit) - psel |= 0x10 | ((byte & 2) << 2); - for (pmc = 0; pmc < 8; ++pmc) { - if (pmc_inuse & (1 << pmc)) - continue; - grp = (pmc >> 1) & 1; - if (unit) { - if (grp == (byte & 1)) - break; - } else if (pmc_grp_use[grp] < 4) { - ++pmc_grp_use[grp]; - break; - } - } - pmc_inuse |= 1 << pmc; - } else { - /* Direct event */ - --pmc; - if (psel == 0 && (byte & 2)) - /* add events on higher-numbered bus */ - mmcr1 |= 1ull << mmcr1_adder_bits[pmc]; - else if (psel == 6 && byte == 3) - /* seem to need to set sample_enable here */ - mmcra |= MMCRA_SAMPLE_ENABLE; - psel |= 8; - } - if (pmc <= 1) - mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc); - else - mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); - if (pmc == 7) /* PMC8 */ - mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH; - hwc[i] = pmc; - if (p4_marked_instr_event(event[i])) - mmcra |= MMCRA_SAMPLE_ENABLE; - } - - if (pmc_inuse & 1) - mmcr0 |= MMCR0_PMC1CE; - if (pmc_inuse & 0xfe) - mmcr0 |= MMCR0_PMCjCE; - - mmcra |= 0x2000; /* mark only one IOP per PPC instruction */ - - /* Return MMCRx values */ - mmcr[0] = mmcr0; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; - return 0; -} - -static void p4_disable_pmc(unsigned int pmc, u64 mmcr[]) -{ - /* - * Setting the PMCxSEL field to 0 disables PMC x. - * (Note that pmc is 0-based here, not 1-based.) - */ - if (pmc <= 1) { - mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc)); - } else { - mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2))); - if (pmc == 7) - mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH); - } -} - -static int p4_generic_events[] = { - [PERF_COUNT_HW_CPU_CYCLES] = 7, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x1001, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8c10, /* PM_LD_REF_L1 */ - [PERF_COUNT_HW_CACHE_MISSES] = 0x3c10, /* PM_LD_MISS_L1 */ - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x330, /* PM_BR_ISSUED */ - [PERF_COUNT_HW_BRANCH_MISSES] = 0x331, /* PM_BR_MPRED_CR */ -}; - -#define C(x) PERF_COUNT_HW_CACHE_##x - -/* - * Table of generalized cache-related events. - * 0 means not supported, -1 means nonsensical, other values - * are event codes. - */ -static int power4_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { - [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0x8c10, 0x3c10 }, - [C(OP_WRITE)] = { 0x7c10, 0xc13 }, - [C(OP_PREFETCH)] = { 0xc35, 0 }, - }, - [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0, 0 }, - [C(OP_WRITE)] = { -1, -1 }, - [C(OP_PREFETCH)] = { 0, 0 }, - }, - [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0, 0 }, - [C(OP_WRITE)] = { 0, 0 }, - [C(OP_PREFETCH)] = { 0xc34, 0 }, - }, - [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0, 0x904 }, - [C(OP_WRITE)] = { -1, -1 }, - [C(OP_PREFETCH)] = { -1, -1 }, - }, - [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0, 0x900 }, - [C(OP_WRITE)] = { -1, -1 }, - [C(OP_PREFETCH)] = { -1, -1 }, - }, - [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0x330, 0x331 }, - [C(OP_WRITE)] = { -1, -1 }, - [C(OP_PREFETCH)] = { -1, -1 }, - }, -}; - -struct power_pmu power4_pmu = { - .n_counter = 8, - .max_alternatives = 5, - .add_fields = 0x0000001100005555ull, - .test_adder = 0x0011083300000000ull, - .compute_mmcr = p4_compute_mmcr, - .get_constraint = p4_get_constraint, - .get_alternatives = p4_get_alternatives, - .disable_pmc = p4_disable_pmc, - .n_generic = ARRAY_SIZE(p4_generic_events), - .generic_events = p4_generic_events, - .cache_events = &power4_cache_events, -}; diff --git a/trunk/arch/powerpc/kernel/power5+-pmu.c b/trunk/arch/powerpc/kernel/power5+-pmu.c deleted file mode 100644 index 41e5d2d958d4..000000000000 --- a/trunk/arch/powerpc/kernel/power5+-pmu.c +++ /dev/null @@ -1,671 +0,0 @@ -/* - * Performance counter support for POWER5+/++ (not POWER5) processors. - * - * Copyright 2009 Paul Mackerras, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include -#include -#include - -/* - * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3) - */ -#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ -#define PM_PMC_MSK 0xf -#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) -#define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */ -#define PM_UNIT_MSK 0xf -#define PM_BYTE_SH 12 /* Byte number of event bus to use */ -#define PM_BYTE_MSK 7 -#define PM_GRS_SH 8 /* Storage subsystem mux select */ -#define PM_GRS_MSK 7 -#define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */ -#define PM_PMCSEL_MSK 0x7f - -/* Values in PM_UNIT field */ -#define PM_FPU 0 -#define PM_ISU0 1 -#define PM_IFU 2 -#define PM_ISU1 3 -#define PM_IDU 4 -#define PM_ISU0_ALT 6 -#define PM_GRS 7 -#define PM_LSU0 8 -#define PM_LSU1 0xc -#define PM_LASTUNIT 0xc - -/* - * Bits in MMCR1 for POWER5+ - */ -#define MMCR1_TTM0SEL_SH 62 -#define MMCR1_TTM1SEL_SH 60 -#define MMCR1_TTM2SEL_SH 58 -#define MMCR1_TTM3SEL_SH 56 -#define MMCR1_TTMSEL_MSK 3 -#define MMCR1_TD_CP_DBG0SEL_SH 54 -#define MMCR1_TD_CP_DBG1SEL_SH 52 -#define MMCR1_TD_CP_DBG2SEL_SH 50 -#define MMCR1_TD_CP_DBG3SEL_SH 48 -#define MMCR1_GRS_L2SEL_SH 46 -#define MMCR1_GRS_L2SEL_MSK 3 -#define MMCR1_GRS_L3SEL_SH 44 -#define MMCR1_GRS_L3SEL_MSK 3 -#define MMCR1_GRS_MCSEL_SH 41 -#define MMCR1_GRS_MCSEL_MSK 7 -#define MMCR1_GRS_FABSEL_SH 39 -#define MMCR1_GRS_FABSEL_MSK 3 -#define MMCR1_PMC1_ADDER_SEL_SH 35 -#define MMCR1_PMC2_ADDER_SEL_SH 34 -#define MMCR1_PMC3_ADDER_SEL_SH 33 -#define MMCR1_PMC4_ADDER_SEL_SH 32 -#define MMCR1_PMC1SEL_SH 25 -#define MMCR1_PMC2SEL_SH 17 -#define MMCR1_PMC3SEL_SH 9 -#define MMCR1_PMC4SEL_SH 1 -#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) -#define MMCR1_PMCSEL_MSK 0x7f - -/* - * Bits in MMCRA - */ - -/* - * Layout of constraint bits: - * 6666555555555544444444443333333333222222222211111111110000000000 - * 3210987654321098765432109876543210987654321098765432109876543210 - * [ ><><>< ><> <><>[ > < >< >< >< ><><><><><><> - * NC G0G1G2 G3 T0T1 UC B0 B1 B2 B3 P6P5P4P3P2P1 - * - * NC - number of counters - * 51: NC error 0x0008_0000_0000_0000 - * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000 - * - * G0..G3 - GRS mux constraints - * 46-47: GRS_L2SEL value - * 44-45: GRS_L3SEL value - * 41-44: GRS_MCSEL value - * 39-40: GRS_FABSEL value - * Note that these match up with their bit positions in MMCR1 - * - * T0 - TTM0 constraint - * 36-37: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0x30_0000_0000 - * - * T1 - TTM1 constraint - * 34-35: TTM1SEL value (0=IDU, 3=GRS) 0x0c_0000_0000 - * - * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS - * 33: UC3 error 0x02_0000_0000 - * 32: FPU|IFU|ISU1 events needed 0x01_0000_0000 - * 31: ISU0 events needed 0x01_8000_0000 - * 30: IDU|GRS events needed 0x00_4000_0000 - * - * B0 - * 24-27: Byte 0 event source 0x0f00_0000 - * Encoding as for the event code - * - * B1, B2, B3 - * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources - * - * P6 - * 11: P6 error 0x800 - * 10-11: Count of events needing PMC6 - * - * P1..P5 - * 0-9: Count of events needing PMC1..PMC5 - */ - -static const int grsel_shift[8] = { - MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, - MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, - MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH -}; - -/* Masks and values for using events from the various units */ -static u64 unit_cons[PM_LASTUNIT+1][2] = { - [PM_FPU] = { 0x3200000000ull, 0x0100000000ull }, - [PM_ISU0] = { 0x0200000000ull, 0x0080000000ull }, - [PM_ISU1] = { 0x3200000000ull, 0x3100000000ull }, - [PM_IFU] = { 0x3200000000ull, 0x2100000000ull }, - [PM_IDU] = { 0x0e00000000ull, 0x0040000000ull }, - [PM_GRS] = { 0x0e00000000ull, 0x0c40000000ull }, -}; - -static int power5p_get_constraint(u64 event, u64 *maskp, u64 *valp) -{ - int pmc, byte, unit, sh; - int bit, fmask; - u64 mask = 0, value = 0; - - pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; - if (pmc) { - if (pmc > 6) - return -1; - sh = (pmc - 1) * 2; - mask |= 2 << sh; - value |= 1 << sh; - if (pmc >= 5 && !(event == 0x500009 || event == 0x600005)) - return -1; - } - if (event & PM_BUSEVENT_MSK) { - unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; - if (unit > PM_LASTUNIT) - return -1; - if (unit == PM_ISU0_ALT) - unit = PM_ISU0; - mask |= unit_cons[unit][0]; - value |= unit_cons[unit][1]; - byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; - if (byte >= 4) { - if (unit != PM_LSU1) - return -1; - /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */ - ++unit; - byte &= 3; - } - if (unit == PM_GRS) { - bit = event & 7; - fmask = (bit == 6)? 7: 3; - sh = grsel_shift[bit]; - mask |= (u64)fmask << sh; - value |= (u64)((event >> PM_GRS_SH) & fmask) << sh; - } - /* Set byte lane select field */ - mask |= 0xfULL << (24 - 4 * byte); - value |= (u64)unit << (24 - 4 * byte); - } - if (pmc < 5) { - /* need a counter from PMC1-4 set */ - mask |= 0x8000000000000ull; - value |= 0x1000000000000ull; - } - *maskp = mask; - *valp = value; - return 0; -} - -static int power5p_limited_pmc_event(u64 event) -{ - int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; - - return pmc == 5 || pmc == 6; -} - -#define MAX_ALT 3 /* at most 3 alternatives for any event */ - -static const unsigned int event_alternatives[][MAX_ALT] = { - { 0x100c0, 0x40001f }, /* PM_GCT_FULL_CYC */ - { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */ - { 0x230e2, 0x323087 }, /* PM_BR_PRED_CR */ - { 0x230e3, 0x223087, 0x3230a0 }, /* PM_BR_PRED_TA */ - { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */ - { 0x800c4, 0xc20e0 }, /* PM_DTLB_MISS */ - { 0xc50c6, 0xc60e0 }, /* PM_MRK_DTLB_MISS */ - { 0x100005, 0x600005 }, /* PM_RUN_CYC */ - { 0x100009, 0x200009 }, /* PM_INST_CMPL */ - { 0x200015, 0x300015 }, /* PM_LSU_LMQ_SRQ_EMPTY_CYC */ - { 0x300009, 0x400009 }, /* PM_INST_DISP */ -}; - -/* - * Scan the alternatives table for a match and return the - * index into the alternatives table if found, else -1. - */ -static int find_alternative(unsigned int event) -{ - int i, j; - - for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { - if (event < event_alternatives[i][0]) - break; - for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) - if (event == event_alternatives[i][j]) - return i; - } - return -1; -} - -static const unsigned char bytedecode_alternatives[4][4] = { - /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 }, - /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e }, - /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 }, - /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e } -}; - -/* - * Some direct events for decodes of event bus byte 3 have alternative - * PMCSEL values on other counters. This returns the alternative - * event code for those that do, or -1 otherwise. This also handles - * alternative PCMSEL values for add events. - */ -static s64 find_alternative_bdecode(u64 event) -{ - int pmc, altpmc, pp, j; - - pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; - if (pmc == 0 || pmc > 4) - return -1; - altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */ - pp = event & PM_PMCSEL_MSK; - for (j = 0; j < 4; ++j) { - if (bytedecode_alternatives[pmc - 1][j] == pp) { - return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) | - (altpmc << PM_PMC_SH) | - bytedecode_alternatives[altpmc - 1][j]; - } - } - - /* new decode alternatives for power5+ */ - if (pmc == 1 && (pp == 0x0d || pp == 0x0e)) - return event + (2 << PM_PMC_SH) + (0x2e - 0x0d); - if (pmc == 3 && (pp == 0x2e || pp == 0x2f)) - return event - (2 << PM_PMC_SH) - (0x2e - 0x0d); - - /* alternative add event encodings */ - if (pp == 0x10 || pp == 0x28) - return ((event ^ (0x10 ^ 0x28)) & ~PM_PMC_MSKS) | - (altpmc << PM_PMC_SH); - - return -1; -} - -static int power5p_get_alternatives(u64 event, unsigned int flags, u64 alt[]) -{ - int i, j, nalt = 1; - int nlim; - s64 ae; - - alt[0] = event; - nalt = 1; - nlim = power5p_limited_pmc_event(event); - i = find_alternative(event); - if (i >= 0) { - for (j = 0; j < MAX_ALT; ++j) { - ae = event_alternatives[i][j]; - if (ae && ae != event) - alt[nalt++] = ae; - nlim += power5p_limited_pmc_event(ae); - } - } else { - ae = find_alternative_bdecode(event); - if (ae > 0) - alt[nalt++] = ae; - } - - if (flags & PPMU_ONLY_COUNT_RUN) { - /* - * We're only counting in RUN state, - * so PM_CYC is equivalent to PM_RUN_CYC - * and PM_INST_CMPL === PM_RUN_INST_CMPL. - * This doesn't include alternatives that don't provide - * any extra flexibility in assigning PMCs (e.g. - * 0x100005 for PM_RUN_CYC vs. 0xf for PM_CYC). - * Note that even with these additional alternatives - * we never end up with more than 3 alternatives for any event. - */ - j = nalt; - for (i = 0; i < nalt; ++i) { - switch (alt[i]) { - case 0xf: /* PM_CYC */ - alt[j++] = 0x600005; /* PM_RUN_CYC */ - ++nlim; - break; - case 0x600005: /* PM_RUN_CYC */ - alt[j++] = 0xf; - break; - case 0x100009: /* PM_INST_CMPL */ - alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */ - ++nlim; - break; - case 0x500009: /* PM_RUN_INST_CMPL */ - alt[j++] = 0x100009; /* PM_INST_CMPL */ - alt[j++] = 0x200009; - break; - } - } - nalt = j; - } - - if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) { - /* remove the limited PMC events */ - j = 0; - for (i = 0; i < nalt; ++i) { - if (!power5p_limited_pmc_event(alt[i])) { - alt[j] = alt[i]; - ++j; - } - } - nalt = j; - } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) { - /* remove all but the limited PMC events */ - j = 0; - for (i = 0; i < nalt; ++i) { - if (power5p_limited_pmc_event(alt[i])) { - alt[j] = alt[i]; - ++j; - } - } - nalt = j; - } - - return nalt; -} - -/* - * Map of which direct events on which PMCs are marked instruction events. - * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event. - * Bit 0 is set if it is marked for all PMCs. - * The 0x80 bit indicates a byte decode PMCSEL value. - */ -static unsigned char direct_event_is_marked[0x28] = { - 0, /* 00 */ - 0x1f, /* 01 PM_IOPS_CMPL */ - 0x2, /* 02 PM_MRK_GRP_DISP */ - 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ - 0, /* 04 */ - 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */ - 0x80, /* 06 */ - 0x80, /* 07 */ - 0, 0, 0,/* 08 - 0a */ - 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */ - 0, /* 0c */ - 0x80, /* 0d */ - 0x80, /* 0e */ - 0, /* 0f */ - 0, /* 10 */ - 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */ - 0, /* 12 */ - 0x10, /* 13 PM_MRK_GRP_CMPL */ - 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */ - 0x2, /* 15 PM_MRK_GRP_ISSUED */ - 0x80, /* 16 */ - 0x80, /* 17 */ - 0, 0, 0, 0, 0, - 0x80, /* 1d */ - 0x80, /* 1e */ - 0, /* 1f */ - 0x80, /* 20 */ - 0x80, /* 21 */ - 0x80, /* 22 */ - 0x80, /* 23 */ - 0x80, /* 24 */ - 0x80, /* 25 */ - 0x80, /* 26 */ - 0x80, /* 27 */ -}; - -/* - * Returns 1 if event counts things relating to marked instructions - * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. - */ -static int power5p_marked_instr_event(u64 event) -{ - int pmc, psel; - int bit, byte, unit; - u32 mask; - - pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; - psel = event & PM_PMCSEL_MSK; - if (pmc >= 5) - return 0; - - bit = -1; - if (psel < sizeof(direct_event_is_marked)) { - if (direct_event_is_marked[psel] & (1 << pmc)) - return 1; - if (direct_event_is_marked[psel] & 0x80) - bit = 4; - else if (psel == 0x08) - bit = pmc - 1; - else if (psel == 0x10) - bit = 4 - pmc; - else if (psel == 0x1b && (pmc == 1 || pmc == 3)) - bit = 4; - } else if ((psel & 0x48) == 0x40) { - bit = psel & 7; - } else if (psel == 0x28) { - bit = pmc - 1; - } else if (pmc == 3 && (psel == 0x2e || psel == 0x2f)) { - bit = 4; - } - - if (!(event & PM_BUSEVENT_MSK) || bit == -1) - return 0; - - byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; - unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; - if (unit == PM_LSU0) { - /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */ - mask = 0x5dff00; - } else if (unit == PM_LSU1 && byte >= 4) { - byte -= 4; - /* byte 5 bits 6-7, byte 6 bits 0,4, byte 7 bits 0-4,6 */ - mask = 0x5f11c000; - } else - return 0; - - return (mask >> (byte * 8 + bit)) & 1; -} - -static int power5p_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], u64 mmcr[]) -{ - u64 mmcr1 = 0; - u64 mmcra = 0; - unsigned int pmc, unit, byte, psel; - unsigned int ttm; - int i, isbus, bit, grsel; - unsigned int pmc_inuse = 0; - unsigned char busbyte[4]; - unsigned char unituse[16]; - int ttmuse; - - if (n_ev > 6) - return -1; - - /* First pass to count resource use */ - memset(busbyte, 0, sizeof(busbyte)); - memset(unituse, 0, sizeof(unituse)); - for (i = 0; i < n_ev; ++i) { - pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; - if (pmc) { - if (pmc > 6) - return -1; - if (pmc_inuse & (1 << (pmc - 1))) - return -1; - pmc_inuse |= 1 << (pmc - 1); - } - if (event[i] & PM_BUSEVENT_MSK) { - unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; - byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; - if (unit > PM_LASTUNIT) - return -1; - if (unit == PM_ISU0_ALT) - unit = PM_ISU0; - if (byte >= 4) { - if (unit != PM_LSU1) - return -1; - ++unit; - byte &= 3; - } - if (busbyte[byte] && busbyte[byte] != unit) - return -1; - busbyte[byte] = unit; - unituse[unit] = 1; - } - } - - /* - * Assign resources and set multiplexer selects. - * - * PM_ISU0 can go either on TTM0 or TTM1, but that's the only - * choice we have to deal with. - */ - if (unituse[PM_ISU0] & - (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) { - unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */ - unituse[PM_ISU0] = 0; - } - /* Set TTM[01]SEL fields. */ - ttmuse = 0; - for (i = PM_FPU; i <= PM_ISU1; ++i) { - if (!unituse[i]) - continue; - if (ttmuse++) - return -1; - mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH; - } - ttmuse = 0; - for (; i <= PM_GRS; ++i) { - if (!unituse[i]) - continue; - if (ttmuse++) - return -1; - mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH; - } - if (ttmuse > 1) - return -1; - - /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */ - for (byte = 0; byte < 4; ++byte) { - unit = busbyte[byte]; - if (!unit) - continue; - if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) { - /* get ISU0 through TTM1 rather than TTM0 */ - unit = PM_ISU0_ALT; - } else if (unit == PM_LSU1 + 1) { - /* select lower word of LSU1 for this byte */ - mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); - } - ttm = unit >> 2; - mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); - } - - /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ - for (i = 0; i < n_ev; ++i) { - pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; - unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; - byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; - psel = event[i] & PM_PMCSEL_MSK; - isbus = event[i] & PM_BUSEVENT_MSK; - if (!pmc) { - /* Bus event or any-PMC direct event */ - for (pmc = 0; pmc < 4; ++pmc) { - if (!(pmc_inuse & (1 << pmc))) - break; - } - if (pmc >= 4) - return -1; - pmc_inuse |= 1 << pmc; - } else if (pmc <= 4) { - /* Direct event */ - --pmc; - if (isbus && (byte & 2) && - (psel == 8 || psel == 0x10 || psel == 0x28)) - /* add events on higher-numbered bus */ - mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc); - } else { - /* Instructions or run cycles on PMC5/6 */ - --pmc; - } - if (isbus && unit == PM_GRS) { - bit = psel & 7; - grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; - mmcr1 |= (u64)grsel << grsel_shift[bit]; - } - if (power5p_marked_instr_event(event[i])) - mmcra |= MMCRA_SAMPLE_ENABLE; - if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1)) - /* select alternate byte lane */ - psel |= 0x10; - if (pmc <= 3) - mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); - hwc[i] = pmc; - } - - /* Return MMCRx values */ - mmcr[0] = 0; - if (pmc_inuse & 1) - mmcr[0] = MMCR0_PMC1CE; - if (pmc_inuse & 0x3e) - mmcr[0] |= MMCR0_PMCjCE; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; - return 0; -} - -static void power5p_disable_pmc(unsigned int pmc, u64 mmcr[]) -{ - if (pmc <= 3) - mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); -} - -static int power5p_generic_events[] = { - [PERF_COUNT_HW_CPU_CYCLES] = 0xf, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x1c10a8, /* LD_REF_L1 */ - [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ - [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ -}; - -#define C(x) PERF_COUNT_HW_CACHE_##x - -/* - * Table of generalized cache-related events. - * 0 means not supported, -1 means nonsensical, other values - * are event codes. - */ -static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { - [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0x1c10a8, 0x3c1088 }, - [C(OP_WRITE)] = { 0x2c10a8, 0xc10c3 }, - [C(OP_PREFETCH)] = { 0xc70e7, -1 }, - }, - [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0, 0 }, - [C(OP_WRITE)] = { -1, -1 }, - [C(OP_PREFETCH)] = { 0, 0 }, - }, - [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0, 0 }, - [C(OP_WRITE)] = { 0, 0 }, - [C(OP_PREFETCH)] = { 0xc50c3, 0 }, - }, - [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0xc20e4, 0x800c4 }, - [C(OP_WRITE)] = { -1, -1 }, - [C(OP_PREFETCH)] = { -1, -1 }, - }, - [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0, 0x800c0 }, - [C(OP_WRITE)] = { -1, -1 }, - [C(OP_PREFETCH)] = { -1, -1 }, - }, - [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0x230e4, 0x230e5 }, - [C(OP_WRITE)] = { -1, -1 }, - [C(OP_PREFETCH)] = { -1, -1 }, - }, -}; - -struct power_pmu power5p_pmu = { - .n_counter = 6, - .max_alternatives = MAX_ALT, - .add_fields = 0x7000000000055ull, - .test_adder = 0x3000040000000ull, - .compute_mmcr = power5p_compute_mmcr, - .get_constraint = power5p_get_constraint, - .get_alternatives = power5p_get_alternatives, - .disable_pmc = power5p_disable_pmc, - .limited_pmc_event = power5p_limited_pmc_event, - .flags = PPMU_LIMITED_PMC5_6, - .n_generic = ARRAY_SIZE(power5p_generic_events), - .generic_events = power5p_generic_events, - .cache_events = &power5p_cache_events, -}; diff --git a/trunk/arch/powerpc/kernel/power5-pmu.c b/trunk/arch/powerpc/kernel/power5-pmu.c deleted file mode 100644 index 05600b66221a..000000000000 --- a/trunk/arch/powerpc/kernel/power5-pmu.c +++ /dev/null @@ -1,611 +0,0 @@ -/* - * Performance counter support for POWER5 (not POWER5++) processors. - * - * Copyright 2009 Paul Mackerras, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include -#include -#include - -/* - * Bits in event code for POWER5 (not POWER5++) - */ -#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ -#define PM_PMC_MSK 0xf -#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) -#define PM_UNIT_SH 16 /* TTMMUX number and setting - unit select */ -#define PM_UNIT_MSK 0xf -#define PM_BYTE_SH 12 /* Byte number of event bus to use */ -#define PM_BYTE_MSK 7 -#define PM_GRS_SH 8 /* Storage subsystem mux select */ -#define PM_GRS_MSK 7 -#define PM_BUSEVENT_MSK 0x80 /* Set if event uses event bus */ -#define PM_PMCSEL_MSK 0x7f - -/* Values in PM_UNIT field */ -#define PM_FPU 0 -#define PM_ISU0 1 -#define PM_IFU 2 -#define PM_ISU1 3 -#define PM_IDU 4 -#define PM_ISU0_ALT 6 -#define PM_GRS 7 -#define PM_LSU0 8 -#define PM_LSU1 0xc -#define PM_LASTUNIT 0xc - -/* - * Bits in MMCR1 for POWER5 - */ -#define MMCR1_TTM0SEL_SH 62 -#define MMCR1_TTM1SEL_SH 60 -#define MMCR1_TTM2SEL_SH 58 -#define MMCR1_TTM3SEL_SH 56 -#define MMCR1_TTMSEL_MSK 3 -#define MMCR1_TD_CP_DBG0SEL_SH 54 -#define MMCR1_TD_CP_DBG1SEL_SH 52 -#define MMCR1_TD_CP_DBG2SEL_SH 50 -#define MMCR1_TD_CP_DBG3SEL_SH 48 -#define MMCR1_GRS_L2SEL_SH 46 -#define MMCR1_GRS_L2SEL_MSK 3 -#define MMCR1_GRS_L3SEL_SH 44 -#define MMCR1_GRS_L3SEL_MSK 3 -#define MMCR1_GRS_MCSEL_SH 41 -#define MMCR1_GRS_MCSEL_MSK 7 -#define MMCR1_GRS_FABSEL_SH 39 -#define MMCR1_GRS_FABSEL_MSK 3 -#define MMCR1_PMC1_ADDER_SEL_SH 35 -#define MMCR1_PMC2_ADDER_SEL_SH 34 -#define MMCR1_PMC3_ADDER_SEL_SH 33 -#define MMCR1_PMC4_ADDER_SEL_SH 32 -#define MMCR1_PMC1SEL_SH 25 -#define MMCR1_PMC2SEL_SH 17 -#define MMCR1_PMC3SEL_SH 9 -#define MMCR1_PMC4SEL_SH 1 -#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) -#define MMCR1_PMCSEL_MSK 0x7f - -/* - * Bits in MMCRA - */ - -/* - * Layout of constraint bits: - * 6666555555555544444444443333333333222222222211111111110000000000 - * 3210987654321098765432109876543210987654321098765432109876543210 - * <><>[ ><><>< ><> [ >[ >[ >< >< >< >< ><><><><><><> - * T0T1 NC G0G1G2 G3 UC PS1PS2 B0 B1 B2 B3 P6P5P4P3P2P1 - * - * T0 - TTM0 constraint - * 54-55: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0xc0_0000_0000_0000 - * - * T1 - TTM1 constraint - * 52-53: TTM1SEL value (0=IDU, 3=GRS) 0x30_0000_0000_0000 - * - * NC - number of counters - * 51: NC error 0x0008_0000_0000_0000 - * 48-50: number of events needing PMC1-4 0x0007_0000_0000_0000 - * - * G0..G3 - GRS mux constraints - * 46-47: GRS_L2SEL value - * 44-45: GRS_L3SEL value - * 41-44: GRS_MCSEL value - * 39-40: GRS_FABSEL value - * Note that these match up with their bit positions in MMCR1 - * - * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS - * 37: UC3 error 0x20_0000_0000 - * 36: FPU|IFU|ISU1 events needed 0x10_0000_0000 - * 35: ISU0 events needed 0x08_0000_0000 - * 34: IDU|GRS events needed 0x04_0000_0000 - * - * PS1 - * 33: PS1 error 0x2_0000_0000 - * 31-32: count of events needing PMC1/2 0x1_8000_0000 - * - * PS2 - * 30: PS2 error 0x4000_0000 - * 28-29: count of events needing PMC3/4 0x3000_0000 - * - * B0 - * 24-27: Byte 0 event source 0x0f00_0000 - * Encoding as for the event code - * - * B1, B2, B3 - * 20-23, 16-19, 12-15: Byte 1, 2, 3 event sources - * - * P1..P6 - * 0-11: Count of events needing PMC1..PMC6 - */ - -static const int grsel_shift[8] = { - MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, - MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, - MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH -}; - -/* Masks and values for using events from the various units */ -static u64 unit_cons[PM_LASTUNIT+1][2] = { - [PM_FPU] = { 0xc0002000000000ull, 0x00001000000000ull }, - [PM_ISU0] = { 0x00002000000000ull, 0x00000800000000ull }, - [PM_ISU1] = { 0xc0002000000000ull, 0xc0001000000000ull }, - [PM_IFU] = { 0xc0002000000000ull, 0x80001000000000ull }, - [PM_IDU] = { 0x30002000000000ull, 0x00000400000000ull }, - [PM_GRS] = { 0x30002000000000ull, 0x30000400000000ull }, -}; - -static int power5_get_constraint(u64 event, u64 *maskp, u64 *valp) -{ - int pmc, byte, unit, sh; - int bit, fmask; - u64 mask = 0, value = 0; - int grp = -1; - - pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; - if (pmc) { - if (pmc > 6) - return -1; - sh = (pmc - 1) * 2; - mask |= 2 << sh; - value |= 1 << sh; - if (pmc <= 4) - grp = (pmc - 1) >> 1; - else if (event != 0x500009 && event != 0x600005) - return -1; - } - if (event & PM_BUSEVENT_MSK) { - unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; - if (unit > PM_LASTUNIT) - return -1; - if (unit == PM_ISU0_ALT) - unit = PM_ISU0; - mask |= unit_cons[unit][0]; - value |= unit_cons[unit][1]; - byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; - if (byte >= 4) { - if (unit != PM_LSU1) - return -1; - /* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */ - ++unit; - byte &= 3; - } - if (unit == PM_GRS) { - bit = event & 7; - fmask = (bit == 6)? 7: 3; - sh = grsel_shift[bit]; - mask |= (u64)fmask << sh; - value |= (u64)((event >> PM_GRS_SH) & fmask) << sh; - } - /* - * Bus events on bytes 0 and 2 can be counted - * on PMC1/2; bytes 1 and 3 on PMC3/4. - */ - if (!pmc) - grp = byte & 1; - /* Set byte lane select field */ - mask |= 0xfULL << (24 - 4 * byte); - value |= (u64)unit << (24 - 4 * byte); - } - if (grp == 0) { - /* increment PMC1/2 field */ - mask |= 0x200000000ull; - value |= 0x080000000ull; - } else if (grp == 1) { - /* increment PMC3/4 field */ - mask |= 0x40000000ull; - value |= 0x10000000ull; - } - if (pmc < 5) { - /* need a counter from PMC1-4 set */ - mask |= 0x8000000000000ull; - value |= 0x1000000000000ull; - } - *maskp = mask; - *valp = value; - return 0; -} - -#define MAX_ALT 3 /* at most 3 alternatives for any event */ - -static const unsigned int event_alternatives[][MAX_ALT] = { - { 0x120e4, 0x400002 }, /* PM_GRP_DISP_REJECT */ - { 0x410c7, 0x441084 }, /* PM_THRD_L2MISS_BOTH_CYC */ - { 0x100005, 0x600005 }, /* PM_RUN_CYC */ - { 0x100009, 0x200009, 0x500009 }, /* PM_INST_CMPL */ - { 0x300009, 0x400009 }, /* PM_INST_DISP */ -}; - -/* - * Scan the alternatives table for a match and return the - * index into the alternatives table if found, else -1. - */ -static int find_alternative(u64 event) -{ - int i, j; - - for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { - if (event < event_alternatives[i][0]) - break; - for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) - if (event == event_alternatives[i][j]) - return i; - } - return -1; -} - -static const unsigned char bytedecode_alternatives[4][4] = { - /* PMC 1 */ { 0x21, 0x23, 0x25, 0x27 }, - /* PMC 2 */ { 0x07, 0x17, 0x0e, 0x1e }, - /* PMC 3 */ { 0x20, 0x22, 0x24, 0x26 }, - /* PMC 4 */ { 0x07, 0x17, 0x0e, 0x1e } -}; - -/* - * Some direct events for decodes of event bus byte 3 have alternative - * PMCSEL values on other counters. This returns the alternative - * event code for those that do, or -1 otherwise. - */ -static s64 find_alternative_bdecode(u64 event) -{ - int pmc, altpmc, pp, j; - - pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; - if (pmc == 0 || pmc > 4) - return -1; - altpmc = 5 - pmc; /* 1 <-> 4, 2 <-> 3 */ - pp = event & PM_PMCSEL_MSK; - for (j = 0; j < 4; ++j) { - if (bytedecode_alternatives[pmc - 1][j] == pp) { - return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) | - (altpmc << PM_PMC_SH) | - bytedecode_alternatives[altpmc - 1][j]; - } - } - return -1; -} - -static int power5_get_alternatives(u64 event, unsigned int flags, u64 alt[]) -{ - int i, j, nalt = 1; - s64 ae; - - alt[0] = event; - nalt = 1; - i = find_alternative(event); - if (i >= 0) { - for (j = 0; j < MAX_ALT; ++j) { - ae = event_alternatives[i][j]; - if (ae && ae != event) - alt[nalt++] = ae; - } - } else { - ae = find_alternative_bdecode(event); - if (ae > 0) - alt[nalt++] = ae; - } - return nalt; -} - -/* - * Map of which direct events on which PMCs are marked instruction events. - * Indexed by PMCSEL value, bit i (LE) set if PMC i is a marked event. - * Bit 0 is set if it is marked for all PMCs. - * The 0x80 bit indicates a byte decode PMCSEL value. - */ -static unsigned char direct_event_is_marked[0x28] = { - 0, /* 00 */ - 0x1f, /* 01 PM_IOPS_CMPL */ - 0x2, /* 02 PM_MRK_GRP_DISP */ - 0xe, /* 03 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ - 0, /* 04 */ - 0x1c, /* 05 PM_MRK_BRU_FIN, PM_MRK_INST_FIN, PM_MRK_CRU_FIN */ - 0x80, /* 06 */ - 0x80, /* 07 */ - 0, 0, 0,/* 08 - 0a */ - 0x18, /* 0b PM_THRESH_TIMEO, PM_MRK_GRP_TIMEO */ - 0, /* 0c */ - 0x80, /* 0d */ - 0x80, /* 0e */ - 0, /* 0f */ - 0, /* 10 */ - 0x14, /* 11 PM_MRK_GRP_BR_REDIR, PM_MRK_GRP_IC_MISS */ - 0, /* 12 */ - 0x10, /* 13 PM_MRK_GRP_CMPL */ - 0x1f, /* 14 PM_GRP_MRK, PM_MRK_{FXU,FPU,LSU}_FIN */ - 0x2, /* 15 PM_MRK_GRP_ISSUED */ - 0x80, /* 16 */ - 0x80, /* 17 */ - 0, 0, 0, 0, 0, - 0x80, /* 1d */ - 0x80, /* 1e */ - 0, /* 1f */ - 0x80, /* 20 */ - 0x80, /* 21 */ - 0x80, /* 22 */ - 0x80, /* 23 */ - 0x80, /* 24 */ - 0x80, /* 25 */ - 0x80, /* 26 */ - 0x80, /* 27 */ -}; - -/* - * Returns 1 if event counts things relating to marked instructions - * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. - */ -static int power5_marked_instr_event(u64 event) -{ - int pmc, psel; - int bit, byte, unit; - u32 mask; - - pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; - psel = event & PM_PMCSEL_MSK; - if (pmc >= 5) - return 0; - - bit = -1; - if (psel < sizeof(direct_event_is_marked)) { - if (direct_event_is_marked[psel] & (1 << pmc)) - return 1; - if (direct_event_is_marked[psel] & 0x80) - bit = 4; - else if (psel == 0x08) - bit = pmc - 1; - else if (psel == 0x10) - bit = 4 - pmc; - else if (psel == 0x1b && (pmc == 1 || pmc == 3)) - bit = 4; - } else if ((psel & 0x58) == 0x40) - bit = psel & 7; - - if (!(event & PM_BUSEVENT_MSK)) - return 0; - - byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; - unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; - if (unit == PM_LSU0) { - /* byte 1 bits 0-7, byte 2 bits 0,2-4,6 */ - mask = 0x5dff00; - } else if (unit == PM_LSU1 && byte >= 4) { - byte -= 4; - /* byte 4 bits 1,3,5,7, byte 5 bits 6-7, byte 7 bits 0-4,6 */ - mask = 0x5f00c0aa; - } else - return 0; - - return (mask >> (byte * 8 + bit)) & 1; -} - -static int power5_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], u64 mmcr[]) -{ - u64 mmcr1 = 0; - u64 mmcra = 0; - unsigned int pmc, unit, byte, psel; - unsigned int ttm, grp; - int i, isbus, bit, grsel; - unsigned int pmc_inuse = 0; - unsigned int pmc_grp_use[2]; - unsigned char busbyte[4]; - unsigned char unituse[16]; - int ttmuse; - - if (n_ev > 6) - return -1; - - /* First pass to count resource use */ - pmc_grp_use[0] = pmc_grp_use[1] = 0; - memset(busbyte, 0, sizeof(busbyte)); - memset(unituse, 0, sizeof(unituse)); - for (i = 0; i < n_ev; ++i) { - pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; - if (pmc) { - if (pmc > 6) - return -1; - if (pmc_inuse & (1 << (pmc - 1))) - return -1; - pmc_inuse |= 1 << (pmc - 1); - /* count 1/2 vs 3/4 use */ - if (pmc <= 4) - ++pmc_grp_use[(pmc - 1) >> 1]; - } - if (event[i] & PM_BUSEVENT_MSK) { - unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; - byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; - if (unit > PM_LASTUNIT) - return -1; - if (unit == PM_ISU0_ALT) - unit = PM_ISU0; - if (byte >= 4) { - if (unit != PM_LSU1) - return -1; - ++unit; - byte &= 3; - } - if (!pmc) - ++pmc_grp_use[byte & 1]; - if (busbyte[byte] && busbyte[byte] != unit) - return -1; - busbyte[byte] = unit; - unituse[unit] = 1; - } - } - if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2) - return -1; - - /* - * Assign resources and set multiplexer selects. - * - * PM_ISU0 can go either on TTM0 or TTM1, but that's the only - * choice we have to deal with. - */ - if (unituse[PM_ISU0] & - (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) { - unituse[PM_ISU0_ALT] = 1; /* move ISU to TTM1 */ - unituse[PM_ISU0] = 0; - } - /* Set TTM[01]SEL fields. */ - ttmuse = 0; - for (i = PM_FPU; i <= PM_ISU1; ++i) { - if (!unituse[i]) - continue; - if (ttmuse++) - return -1; - mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH; - } - ttmuse = 0; - for (; i <= PM_GRS; ++i) { - if (!unituse[i]) - continue; - if (ttmuse++) - return -1; - mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH; - } - if (ttmuse > 1) - return -1; - - /* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */ - for (byte = 0; byte < 4; ++byte) { - unit = busbyte[byte]; - if (!unit) - continue; - if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) { - /* get ISU0 through TTM1 rather than TTM0 */ - unit = PM_ISU0_ALT; - } else if (unit == PM_LSU1 + 1) { - /* select lower word of LSU1 for this byte */ - mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); - } - ttm = unit >> 2; - mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); - } - - /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ - for (i = 0; i < n_ev; ++i) { - pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; - unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; - byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; - psel = event[i] & PM_PMCSEL_MSK; - isbus = event[i] & PM_BUSEVENT_MSK; - if (!pmc) { - /* Bus event or any-PMC direct event */ - for (pmc = 0; pmc < 4; ++pmc) { - if (pmc_inuse & (1 << pmc)) - continue; - grp = (pmc >> 1) & 1; - if (isbus) { - if (grp == (byte & 1)) - break; - } else if (pmc_grp_use[grp] < 2) { - ++pmc_grp_use[grp]; - break; - } - } - pmc_inuse |= 1 << pmc; - } else if (pmc <= 4) { - /* Direct event */ - --pmc; - if ((psel == 8 || psel == 0x10) && isbus && (byte & 2)) - /* add events on higher-numbered bus */ - mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc); - } else { - /* Instructions or run cycles on PMC5/6 */ - --pmc; - } - if (isbus && unit == PM_GRS) { - bit = psel & 7; - grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK; - mmcr1 |= (u64)grsel << grsel_shift[bit]; - } - if (power5_marked_instr_event(event[i])) - mmcra |= MMCRA_SAMPLE_ENABLE; - if (pmc <= 3) - mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); - hwc[i] = pmc; - } - - /* Return MMCRx values */ - mmcr[0] = 0; - if (pmc_inuse & 1) - mmcr[0] = MMCR0_PMC1CE; - if (pmc_inuse & 0x3e) - mmcr[0] |= MMCR0_PMCjCE; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; - return 0; -} - -static void power5_disable_pmc(unsigned int pmc, u64 mmcr[]) -{ - if (pmc <= 3) - mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc)); -} - -static int power5_generic_events[] = { - [PERF_COUNT_HW_CPU_CYCLES] = 0xf, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x100009, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4c1090, /* LD_REF_L1 */ - [PERF_COUNT_HW_CACHE_MISSES] = 0x3c1088, /* LD_MISS_L1 */ - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x230e4, /* BR_ISSUED */ - [PERF_COUNT_HW_BRANCH_MISSES] = 0x230e5, /* BR_MPRED_CR */ -}; - -#define C(x) PERF_COUNT_HW_CACHE_##x - -/* - * Table of generalized cache-related events. - * 0 means not supported, -1 means nonsensical, other values - * are event codes. - */ -static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { - [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0x4c1090, 0x3c1088 }, - [C(OP_WRITE)] = { 0x3c1090, 0xc10c3 }, - [C(OP_PREFETCH)] = { 0xc70e7, 0 }, - }, - [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0, 0 }, - [C(OP_WRITE)] = { -1, -1 }, - [C(OP_PREFETCH)] = { 0, 0 }, - }, - [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0, 0x3c309b }, - [C(OP_WRITE)] = { 0, 0 }, - [C(OP_PREFETCH)] = { 0xc50c3, 0 }, - }, - [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0x2c4090, 0x800c4 }, - [C(OP_WRITE)] = { -1, -1 }, - [C(OP_PREFETCH)] = { -1, -1 }, - }, - [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0, 0x800c0 }, - [C(OP_WRITE)] = { -1, -1 }, - [C(OP_PREFETCH)] = { -1, -1 }, - }, - [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0x230e4, 0x230e5 }, - [C(OP_WRITE)] = { -1, -1 }, - [C(OP_PREFETCH)] = { -1, -1 }, - }, -}; - -struct power_pmu power5_pmu = { - .n_counter = 6, - .max_alternatives = MAX_ALT, - .add_fields = 0x7000090000555ull, - .test_adder = 0x3000490000000ull, - .compute_mmcr = power5_compute_mmcr, - .get_constraint = power5_get_constraint, - .get_alternatives = power5_get_alternatives, - .disable_pmc = power5_disable_pmc, - .n_generic = ARRAY_SIZE(power5_generic_events), - .generic_events = power5_generic_events, - .cache_events = &power5_cache_events, -}; diff --git a/trunk/arch/powerpc/kernel/power6-pmu.c b/trunk/arch/powerpc/kernel/power6-pmu.c deleted file mode 100644 index 46f74bebcfd9..000000000000 --- a/trunk/arch/powerpc/kernel/power6-pmu.c +++ /dev/null @@ -1,532 +0,0 @@ -/* - * Performance counter support for POWER6 processors. - * - * Copyright 2008-2009 Paul Mackerras, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include -#include -#include - -/* - * Bits in event code for POWER6 - */ -#define PM_PMC_SH 20 /* PMC number (1-based) for direct events */ -#define PM_PMC_MSK 0x7 -#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) -#define PM_UNIT_SH 16 /* Unit event comes (TTMxSEL encoding) */ -#define PM_UNIT_MSK 0xf -#define PM_UNIT_MSKS (PM_UNIT_MSK << PM_UNIT_SH) -#define PM_LLAV 0x8000 /* Load lookahead match value */ -#define PM_LLA 0x4000 /* Load lookahead match enable */ -#define PM_BYTE_SH 12 /* Byte of event bus to use */ -#define PM_BYTE_MSK 3 -#define PM_SUBUNIT_SH 8 /* Subunit event comes from (NEST_SEL enc.) */ -#define PM_SUBUNIT_MSK 7 -#define PM_SUBUNIT_MSKS (PM_SUBUNIT_MSK << PM_SUBUNIT_SH) -#define PM_PMCSEL_MSK 0xff /* PMCxSEL value */ -#define PM_BUSEVENT_MSK 0xf3700 - -/* - * Bits in MMCR1 for POWER6 - */ -#define MMCR1_TTM0SEL_SH 60 -#define MMCR1_TTMSEL_SH(n) (MMCR1_TTM0SEL_SH - (n) * 4) -#define MMCR1_TTMSEL_MSK 0xf -#define MMCR1_TTMSEL(m, n) (((m) >> MMCR1_TTMSEL_SH(n)) & MMCR1_TTMSEL_MSK) -#define MMCR1_NESTSEL_SH 45 -#define MMCR1_NESTSEL_MSK 0x7 -#define MMCR1_NESTSEL(m) (((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK) -#define MMCR1_PMC1_LLA ((u64)1 << 44) -#define MMCR1_PMC1_LLA_VALUE ((u64)1 << 39) -#define MMCR1_PMC1_ADDR_SEL ((u64)1 << 35) -#define MMCR1_PMC1SEL_SH 24 -#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) -#define MMCR1_PMCSEL_MSK 0xff - -/* - * Map of which direct events on which PMCs are marked instruction events. - * Indexed by PMCSEL value >> 1. - * Bottom 4 bits are a map of which PMCs are interesting, - * top 4 bits say what sort of event: - * 0 = direct marked event, - * 1 = byte decode event, - * 4 = add/and event (PMC1 -> bits 0 & 4), - * 5 = add/and event (PMC1 -> bits 1 & 5), - * 6 = add/and event (PMC1 -> bits 2 & 6), - * 7 = add/and event (PMC1 -> bits 3 & 7). - */ -static unsigned char direct_event_is_marked[0x60 >> 1] = { - 0, /* 00 */ - 0, /* 02 */ - 0, /* 04 */ - 0x07, /* 06 PM_MRK_ST_CMPL, PM_MRK_ST_GPS, PM_MRK_ST_CMPL_INT */ - 0x04, /* 08 PM_MRK_DFU_FIN */ - 0x06, /* 0a PM_MRK_IFU_FIN, PM_MRK_INST_FIN */ - 0, /* 0c */ - 0, /* 0e */ - 0x02, /* 10 PM_MRK_INST_DISP */ - 0x08, /* 12 PM_MRK_LSU_DERAT_MISS */ - 0, /* 14 */ - 0, /* 16 */ - 0x0c, /* 18 PM_THRESH_TIMEO, PM_MRK_INST_FIN */ - 0x0f, /* 1a PM_MRK_INST_DISP, PM_MRK_{FXU,FPU,LSU}_FIN */ - 0x01, /* 1c PM_MRK_INST_ISSUED */ - 0, /* 1e */ - 0, /* 20 */ - 0, /* 22 */ - 0, /* 24 */ - 0, /* 26 */ - 0x15, /* 28 PM_MRK_DATA_FROM_L2MISS, PM_MRK_DATA_FROM_L3MISS */ - 0, /* 2a */ - 0, /* 2c */ - 0, /* 2e */ - 0x4f, /* 30 */ - 0x7f, /* 32 */ - 0x4f, /* 34 */ - 0x5f, /* 36 */ - 0x6f, /* 38 */ - 0x4f, /* 3a */ - 0, /* 3c */ - 0x08, /* 3e PM_MRK_INST_TIMEO */ - 0x1f, /* 40 */ - 0x1f, /* 42 */ - 0x1f, /* 44 */ - 0x1f, /* 46 */ - 0x1f, /* 48 */ - 0x1f, /* 4a */ - 0x1f, /* 4c */ - 0x1f, /* 4e */ - 0, /* 50 */ - 0x05, /* 52 PM_MRK_BR_TAKEN, PM_MRK_BR_MPRED */ - 0x1c, /* 54 PM_MRK_PTEG_FROM_L3MISS, PM_MRK_PTEG_FROM_L2MISS */ - 0x02, /* 56 PM_MRK_LD_MISS_L1 */ - 0, /* 58 */ - 0, /* 5a */ - 0, /* 5c */ - 0, /* 5e */ -}; - -/* - * Masks showing for each unit which bits are marked events. - * These masks are in LE order, i.e. 0x00000001 is byte 0, bit 0. - */ -static u32 marked_bus_events[16] = { - 0x01000000, /* direct events set 1: byte 3 bit 0 */ - 0x00010000, /* direct events set 2: byte 2 bit 0 */ - 0, 0, 0, 0, /* IDU, IFU, nest: nothing */ - 0x00000088, /* VMX set 1: byte 0 bits 3, 7 */ - 0x000000c0, /* VMX set 2: byte 0 bits 4-7 */ - 0x04010000, /* LSU set 1: byte 2 bit 0, byte 3 bit 2 */ - 0xff010000u, /* LSU set 2: byte 2 bit 0, all of byte 3 */ - 0, /* LSU set 3 */ - 0x00000010, /* VMX set 3: byte 0 bit 4 */ - 0, /* BFP set 1 */ - 0x00000022, /* BFP set 2: byte 0 bits 1, 5 */ - 0, 0 -}; - -/* - * Returns 1 if event counts things relating to marked instructions - * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. - */ -static int power6_marked_instr_event(u64 event) -{ - int pmc, psel, ptype; - int bit, byte, unit; - u32 mask; - - pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; - psel = (event & PM_PMCSEL_MSK) >> 1; /* drop edge/level bit */ - if (pmc >= 5) - return 0; - - bit = -1; - if (psel < sizeof(direct_event_is_marked)) { - ptype = direct_event_is_marked[psel]; - if (pmc == 0 || !(ptype & (1 << (pmc - 1)))) - return 0; - ptype >>= 4; - if (ptype == 0) - return 1; - if (ptype == 1) - bit = 0; - else - bit = ptype ^ (pmc - 1); - } else if ((psel & 0x48) == 0x40) - bit = psel & 7; - - if (!(event & PM_BUSEVENT_MSK) || bit == -1) - return 0; - - byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; - unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; - mask = marked_bus_events[unit]; - return (mask >> (byte * 8 + bit)) & 1; -} - -/* - * Assign PMC numbers and compute MMCR1 value for a set of events - */ -static int p6_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], u64 mmcr[]) -{ - u64 mmcr1 = 0; - u64 mmcra = 0; - int i; - unsigned int pmc, ev, b, u, s, psel; - unsigned int ttmset = 0; - unsigned int pmc_inuse = 0; - - if (n_ev > 6) - return -1; - for (i = 0; i < n_ev; ++i) { - pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; - if (pmc) { - if (pmc_inuse & (1 << (pmc - 1))) - return -1; /* collision! */ - pmc_inuse |= 1 << (pmc - 1); - } - } - for (i = 0; i < n_ev; ++i) { - ev = event[i]; - pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK; - if (pmc) { - --pmc; - } else { - /* can go on any PMC; find a free one */ - for (pmc = 0; pmc < 4; ++pmc) - if (!(pmc_inuse & (1 << pmc))) - break; - if (pmc >= 4) - return -1; - pmc_inuse |= 1 << pmc; - } - hwc[i] = pmc; - psel = ev & PM_PMCSEL_MSK; - if (ev & PM_BUSEVENT_MSK) { - /* this event uses the event bus */ - b = (ev >> PM_BYTE_SH) & PM_BYTE_MSK; - u = (ev >> PM_UNIT_SH) & PM_UNIT_MSK; - /* check for conflict on this byte of event bus */ - if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u) - return -1; - mmcr1 |= (u64)u << MMCR1_TTMSEL_SH(b); - ttmset |= 1 << b; - if (u == 5) { - /* Nest events have a further mux */ - s = (ev >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK; - if ((ttmset & 0x10) && - MMCR1_NESTSEL(mmcr1) != s) - return -1; - ttmset |= 0x10; - mmcr1 |= (u64)s << MMCR1_NESTSEL_SH; - } - if (0x30 <= psel && psel <= 0x3d) { - /* these need the PMCx_ADDR_SEL bits */ - if (b >= 2) - mmcr1 |= MMCR1_PMC1_ADDR_SEL >> pmc; - } - /* bus select values are different for PMC3/4 */ - if (pmc >= 2 && (psel & 0x90) == 0x80) - psel ^= 0x20; - } - if (ev & PM_LLA) { - mmcr1 |= MMCR1_PMC1_LLA >> pmc; - if (ev & PM_LLAV) - mmcr1 |= MMCR1_PMC1_LLA_VALUE >> pmc; - } - if (power6_marked_instr_event(event[i])) - mmcra |= MMCRA_SAMPLE_ENABLE; - if (pmc < 4) - mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc); - } - mmcr[0] = 0; - if (pmc_inuse & 1) - mmcr[0] = MMCR0_PMC1CE; - if (pmc_inuse & 0xe) - mmcr[0] |= MMCR0_PMCjCE; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; - return 0; -} - -/* - * Layout of constraint bits: - * - * 0-1 add field: number of uses of PMC1 (max 1) - * 2-3, 4-5, 6-7, 8-9, 10-11: ditto for PMC2, 3, 4, 5, 6 - * 12-15 add field: number of uses of PMC1-4 (max 4) - * 16-19 select field: unit on byte 0 of event bus - * 20-23, 24-27, 28-31 ditto for bytes 1, 2, 3 - * 32-34 select field: nest (subunit) event selector - */ -static int p6_get_constraint(u64 event, u64 *maskp, u64 *valp) -{ - int pmc, byte, sh, subunit; - u64 mask = 0, value = 0; - - pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; - if (pmc) { - if (pmc > 4 && !(event == 0x500009 || event == 0x600005)) - return -1; - sh = (pmc - 1) * 2; - mask |= 2 << sh; - value |= 1 << sh; - } - if (event & PM_BUSEVENT_MSK) { - byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; - sh = byte * 4 + (16 - PM_UNIT_SH); - mask |= PM_UNIT_MSKS << sh; - value |= (u64)(event & PM_UNIT_MSKS) << sh; - if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) { - subunit = (event >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK; - mask |= (u64)PM_SUBUNIT_MSK << 32; - value |= (u64)subunit << 32; - } - } - if (pmc <= 4) { - mask |= 0x8000; /* add field for count of PMC1-4 uses */ - value |= 0x1000; - } - *maskp = mask; - *valp = value; - return 0; -} - -static int p6_limited_pmc_event(u64 event) -{ - int pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; - - return pmc == 5 || pmc == 6; -} - -#define MAX_ALT 4 /* at most 4 alternatives for any event */ - -static const unsigned int event_alternatives[][MAX_ALT] = { - { 0x0130e8, 0x2000f6, 0x3000fc }, /* PM_PTEG_RELOAD_VALID */ - { 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */ - { 0x080088, 0x200054, 0x3000f0 }, /* PM_ST_MISS_L1 */ - { 0x10000a, 0x2000f4, 0x600005 }, /* PM_RUN_CYC */ - { 0x10000b, 0x2000f5 }, /* PM_RUN_COUNT */ - { 0x10000e, 0x400010 }, /* PM_PURR */ - { 0x100010, 0x4000f8 }, /* PM_FLUSH */ - { 0x10001a, 0x200010 }, /* PM_MRK_INST_DISP */ - { 0x100026, 0x3000f8 }, /* PM_TB_BIT_TRANS */ - { 0x100054, 0x2000f0 }, /* PM_ST_FIN */ - { 0x100056, 0x2000fc }, /* PM_L1_ICACHE_MISS */ - { 0x1000f0, 0x40000a }, /* PM_INST_IMC_MATCH_CMPL */ - { 0x1000f8, 0x200008 }, /* PM_GCT_EMPTY_CYC */ - { 0x1000fc, 0x400006 }, /* PM_LSU_DERAT_MISS_CYC */ - { 0x20000e, 0x400007 }, /* PM_LSU_DERAT_MISS */ - { 0x200012, 0x300012 }, /* PM_INST_DISP */ - { 0x2000f2, 0x3000f2 }, /* PM_INST_DISP */ - { 0x2000f8, 0x300010 }, /* PM_EXT_INT */ - { 0x2000fe, 0x300056 }, /* PM_DATA_FROM_L2MISS */ - { 0x2d0030, 0x30001a }, /* PM_MRK_FPU_FIN */ - { 0x30000a, 0x400018 }, /* PM_MRK_INST_FIN */ - { 0x3000f6, 0x40000e }, /* PM_L1_DCACHE_RELOAD_VALID */ - { 0x3000fe, 0x400056 }, /* PM_DATA_FROM_L3MISS */ -}; - -/* - * This could be made more efficient with a binary search on - * a presorted list, if necessary - */ -static int find_alternatives_list(u64 event) -{ - int i, j; - unsigned int alt; - - for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { - if (event < event_alternatives[i][0]) - return -1; - for (j = 0; j < MAX_ALT; ++j) { - alt = event_alternatives[i][j]; - if (!alt || event < alt) - break; - if (event == alt) - return i; - } - } - return -1; -} - -static int p6_get_alternatives(u64 event, unsigned int flags, u64 alt[]) -{ - int i, j, nlim; - unsigned int psel, pmc; - unsigned int nalt = 1; - u64 aevent; - - alt[0] = event; - nlim = p6_limited_pmc_event(event); - - /* check the alternatives table */ - i = find_alternatives_list(event); - if (i >= 0) { - /* copy out alternatives from list */ - for (j = 0; j < MAX_ALT; ++j) { - aevent = event_alternatives[i][j]; - if (!aevent) - break; - if (aevent != event) - alt[nalt++] = aevent; - nlim += p6_limited_pmc_event(aevent); - } - - } else { - /* Check for alternative ways of computing sum events */ - /* PMCSEL 0x32 counter N == PMCSEL 0x34 counter 5-N */ - psel = event & (PM_PMCSEL_MSK & ~1); /* ignore edge bit */ - pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; - if (pmc && (psel == 0x32 || psel == 0x34)) - alt[nalt++] = ((event ^ 0x6) & ~PM_PMC_MSKS) | - ((5 - pmc) << PM_PMC_SH); - - /* PMCSEL 0x38 counter N == PMCSEL 0x3a counter N+/-2 */ - if (pmc && (psel == 0x38 || psel == 0x3a)) - alt[nalt++] = ((event ^ 0x2) & ~PM_PMC_MSKS) | - ((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH); - } - - if (flags & PPMU_ONLY_COUNT_RUN) { - /* - * We're only counting in RUN state, - * so PM_CYC is equivalent to PM_RUN_CYC, - * PM_INST_CMPL === PM_RUN_INST_CMPL, PM_PURR === PM_RUN_PURR. - * This doesn't include alternatives that don't provide - * any extra flexibility in assigning PMCs (e.g. - * 0x10000a for PM_RUN_CYC vs. 0x1e for PM_CYC). - * Note that even with these additional alternatives - * we never end up with more than 4 alternatives for any event. - */ - j = nalt; - for (i = 0; i < nalt; ++i) { - switch (alt[i]) { - case 0x1e: /* PM_CYC */ - alt[j++] = 0x600005; /* PM_RUN_CYC */ - ++nlim; - break; - case 0x10000a: /* PM_RUN_CYC */ - alt[j++] = 0x1e; /* PM_CYC */ - break; - case 2: /* PM_INST_CMPL */ - alt[j++] = 0x500009; /* PM_RUN_INST_CMPL */ - ++nlim; - break; - case 0x500009: /* PM_RUN_INST_CMPL */ - alt[j++] = 2; /* PM_INST_CMPL */ - break; - case 0x10000e: /* PM_PURR */ - alt[j++] = 0x4000f4; /* PM_RUN_PURR */ - break; - case 0x4000f4: /* PM_RUN_PURR */ - alt[j++] = 0x10000e; /* PM_PURR */ - break; - } - } - nalt = j; - } - - if (!(flags & PPMU_LIMITED_PMC_OK) && nlim) { - /* remove the limited PMC events */ - j = 0; - for (i = 0; i < nalt; ++i) { - if (!p6_limited_pmc_event(alt[i])) { - alt[j] = alt[i]; - ++j; - } - } - nalt = j; - } else if ((flags & PPMU_LIMITED_PMC_REQD) && nlim < nalt) { - /* remove all but the limited PMC events */ - j = 0; - for (i = 0; i < nalt; ++i) { - if (p6_limited_pmc_event(alt[i])) { - alt[j] = alt[i]; - ++j; - } - } - nalt = j; - } - - return nalt; -} - -static void p6_disable_pmc(unsigned int pmc, u64 mmcr[]) -{ - /* Set PMCxSEL to 0 to disable PMCx */ - if (pmc <= 3) - mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc)); -} - -static int power6_generic_events[] = { - [PERF_COUNT_HW_CPU_CYCLES] = 0x1e, - [PERF_COUNT_HW_INSTRUCTIONS] = 2, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x280030, /* LD_REF_L1 */ - [PERF_COUNT_HW_CACHE_MISSES] = 0x30000c, /* LD_MISS_L1 */ - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x410a0, /* BR_PRED */ - [PERF_COUNT_HW_BRANCH_MISSES] = 0x400052, /* BR_MPRED */ -}; - -#define C(x) PERF_COUNT_HW_CACHE_##x - -/* - * Table of generalized cache-related events. - * 0 means not supported, -1 means nonsensical, other values - * are event codes. - * The "DTLB" and "ITLB" events relate to the DERAT and IERAT. - */ -static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { - [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0x80082, 0x80080 }, - [C(OP_WRITE)] = { 0x80086, 0x80088 }, - [C(OP_PREFETCH)] = { 0x810a4, 0 }, - }, - [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0, 0x100056 }, - [C(OP_WRITE)] = { -1, -1 }, - [C(OP_PREFETCH)] = { 0x4008c, 0 }, - }, - [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0x150730, 0x250532 }, - [C(OP_WRITE)] = { 0x250432, 0x150432 }, - [C(OP_PREFETCH)] = { 0x810a6, 0 }, - }, - [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0, 0x20000e }, - [C(OP_WRITE)] = { -1, -1 }, - [C(OP_PREFETCH)] = { -1, -1 }, - }, - [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0, 0x420ce }, - [C(OP_WRITE)] = { -1, -1 }, - [C(OP_PREFETCH)] = { -1, -1 }, - }, - [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0x430e6, 0x400052 }, - [C(OP_WRITE)] = { -1, -1 }, - [C(OP_PREFETCH)] = { -1, -1 }, - }, -}; - -struct power_pmu power6_pmu = { - .n_counter = 6, - .max_alternatives = MAX_ALT, - .add_fields = 0x1555, - .test_adder = 0x3000, - .compute_mmcr = p6_compute_mmcr, - .get_constraint = p6_get_constraint, - .get_alternatives = p6_get_alternatives, - .disable_pmc = p6_disable_pmc, - .limited_pmc_event = p6_limited_pmc_event, - .flags = PPMU_LIMITED_PMC5_6 | PPMU_ALT_SIPR, - .n_generic = ARRAY_SIZE(power6_generic_events), - .generic_events = power6_generic_events, - .cache_events = &power6_cache_events, -}; diff --git a/trunk/arch/powerpc/kernel/power7-pmu.c b/trunk/arch/powerpc/kernel/power7-pmu.c deleted file mode 100644 index b3f7d1216bae..000000000000 --- a/trunk/arch/powerpc/kernel/power7-pmu.c +++ /dev/null @@ -1,357 +0,0 @@ -/* - * Performance counter support for POWER7 processors. - * - * Copyright 2009 Paul Mackerras, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include -#include -#include - -/* - * Bits in event code for POWER7 - */ -#define PM_PMC_SH 16 /* PMC number (1-based) for direct events */ -#define PM_PMC_MSK 0xf -#define PM_PMC_MSKS (PM_PMC_MSK << PM_PMC_SH) -#define PM_UNIT_SH 12 /* TTMMUX number and setting - unit select */ -#define PM_UNIT_MSK 0xf -#define PM_COMBINE_SH 11 /* Combined event bit */ -#define PM_COMBINE_MSK 1 -#define PM_COMBINE_MSKS 0x800 -#define PM_L2SEL_SH 8 /* L2 event select */ -#define PM_L2SEL_MSK 7 -#define PM_PMCSEL_MSK 0xff - -/* - * Bits in MMCR1 for POWER7 - */ -#define MMCR1_TTM0SEL_SH 60 -#define MMCR1_TTM1SEL_SH 56 -#define MMCR1_TTM2SEL_SH 52 -#define MMCR1_TTM3SEL_SH 48 -#define MMCR1_TTMSEL_MSK 0xf -#define MMCR1_L2SEL_SH 45 -#define MMCR1_L2SEL_MSK 7 -#define MMCR1_PMC1_COMBINE_SH 35 -#define MMCR1_PMC2_COMBINE_SH 34 -#define MMCR1_PMC3_COMBINE_SH 33 -#define MMCR1_PMC4_COMBINE_SH 32 -#define MMCR1_PMC1SEL_SH 24 -#define MMCR1_PMC2SEL_SH 16 -#define MMCR1_PMC3SEL_SH 8 -#define MMCR1_PMC4SEL_SH 0 -#define MMCR1_PMCSEL_SH(n) (MMCR1_PMC1SEL_SH - (n) * 8) -#define MMCR1_PMCSEL_MSK 0xff - -/* - * Bits in MMCRA - */ - -/* - * Layout of constraint bits: - * 6666555555555544444444443333333333222222222211111111110000000000 - * 3210987654321098765432109876543210987654321098765432109876543210 - * [ ><><><><><><> - * NC P6P5P4P3P2P1 - * - * NC - number of counters - * 15: NC error 0x8000 - * 12-14: number of events needing PMC1-4 0x7000 - * - * P6 - * 11: P6 error 0x800 - * 10-11: Count of events needing PMC6 - * - * P1..P5 - * 0-9: Count of events needing PMC1..PMC5 - */ - -static int power7_get_constraint(u64 event, u64 *maskp, u64 *valp) -{ - int pmc, sh; - u64 mask = 0, value = 0; - - pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; - if (pmc) { - if (pmc > 6) - return -1; - sh = (pmc - 1) * 2; - mask |= 2 << sh; - value |= 1 << sh; - if (pmc >= 5 && !(event == 0x500fa || event == 0x600f4)) - return -1; - } - if (pmc < 5) { - /* need a counter from PMC1-4 set */ - mask |= 0x8000; - value |= 0x1000; - } - *maskp = mask; - *valp = value; - return 0; -} - -#define MAX_ALT 2 /* at most 2 alternatives for any event */ - -static const unsigned int event_alternatives[][MAX_ALT] = { - { 0x200f2, 0x300f2 }, /* PM_INST_DISP */ - { 0x200f4, 0x600f4 }, /* PM_RUN_CYC */ - { 0x400fa, 0x500fa }, /* PM_RUN_INST_CMPL */ -}; - -/* - * Scan the alternatives table for a match and return the - * index into the alternatives table if found, else -1. - */ -static int find_alternative(u64 event) -{ - int i, j; - - for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) { - if (event < event_alternatives[i][0]) - break; - for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j) - if (event == event_alternatives[i][j]) - return i; - } - return -1; -} - -static s64 find_alternative_decode(u64 event) -{ - int pmc, psel; - - /* this only handles the 4x decode events */ - pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; - psel = event & PM_PMCSEL_MSK; - if ((pmc == 2 || pmc == 4) && (psel & ~7) == 0x40) - return event - (1 << PM_PMC_SH) + 8; - if ((pmc == 1 || pmc == 3) && (psel & ~7) == 0x48) - return event + (1 << PM_PMC_SH) - 8; - return -1; -} - -static int power7_get_alternatives(u64 event, unsigned int flags, u64 alt[]) -{ - int i, j, nalt = 1; - s64 ae; - - alt[0] = event; - nalt = 1; - i = find_alternative(event); - if (i >= 0) { - for (j = 0; j < MAX_ALT; ++j) { - ae = event_alternatives[i][j]; - if (ae && ae != event) - alt[nalt++] = ae; - } - } else { - ae = find_alternative_decode(event); - if (ae > 0) - alt[nalt++] = ae; - } - - if (flags & PPMU_ONLY_COUNT_RUN) { - /* - * We're only counting in RUN state, - * so PM_CYC is equivalent to PM_RUN_CYC - * and PM_INST_CMPL === PM_RUN_INST_CMPL. - * This doesn't include alternatives that don't provide - * any extra flexibility in assigning PMCs. - */ - j = nalt; - for (i = 0; i < nalt; ++i) { - switch (alt[i]) { - case 0x1e: /* PM_CYC */ - alt[j++] = 0x600f4; /* PM_RUN_CYC */ - break; - case 0x600f4: /* PM_RUN_CYC */ - alt[j++] = 0x1e; - break; - case 0x2: /* PM_PPC_CMPL */ - alt[j++] = 0x500fa; /* PM_RUN_INST_CMPL */ - break; - case 0x500fa: /* PM_RUN_INST_CMPL */ - alt[j++] = 0x2; /* PM_PPC_CMPL */ - break; - } - } - nalt = j; - } - - return nalt; -} - -/* - * Returns 1 if event counts things relating to marked instructions - * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. - */ -static int power7_marked_instr_event(u64 event) -{ - int pmc, psel; - int unit; - - pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; - unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; - psel = event & PM_PMCSEL_MSK & ~1; /* trim off edge/level bit */ - if (pmc >= 5) - return 0; - - switch (psel >> 4) { - case 2: - return pmc == 2 || pmc == 4; - case 3: - if (psel == 0x3c) - return pmc == 1; - if (psel == 0x3e) - return pmc != 2; - return 1; - case 4: - case 5: - return unit == 0xd; - case 6: - if (psel == 0x64) - return pmc >= 3; - case 8: - return unit == 0xd; - } - return 0; -} - -static int power7_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], u64 mmcr[]) -{ - u64 mmcr1 = 0; - u64 mmcra = 0; - unsigned int pmc, unit, combine, l2sel, psel; - unsigned int pmc_inuse = 0; - int i; - - /* First pass to count resource use */ - for (i = 0; i < n_ev; ++i) { - pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; - if (pmc) { - if (pmc > 6) - return -1; - if (pmc_inuse & (1 << (pmc - 1))) - return -1; - pmc_inuse |= 1 << (pmc - 1); - } - } - - /* Second pass: assign PMCs, set all MMCR1 fields */ - for (i = 0; i < n_ev; ++i) { - pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; - unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; - combine = (event[i] >> PM_COMBINE_SH) & PM_COMBINE_MSK; - l2sel = (event[i] >> PM_L2SEL_SH) & PM_L2SEL_MSK; - psel = event[i] & PM_PMCSEL_MSK; - if (!pmc) { - /* Bus event or any-PMC direct event */ - for (pmc = 0; pmc < 4; ++pmc) { - if (!(pmc_inuse & (1 << pmc))) - break; - } - if (pmc >= 4) - return -1; - pmc_inuse |= 1 << pmc; - } else { - /* Direct or decoded event */ - --pmc; - } - if (pmc <= 3) { - mmcr1 |= (u64) unit << (MMCR1_TTM0SEL_SH - 4 * pmc); - mmcr1 |= (u64) combine << (MMCR1_PMC1_COMBINE_SH - pmc); - mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc); - if (unit == 6) /* L2 events */ - mmcr1 |= (u64) l2sel << MMCR1_L2SEL_SH; - } - if (power7_marked_instr_event(event[i])) - mmcra |= MMCRA_SAMPLE_ENABLE; - hwc[i] = pmc; - } - - /* Return MMCRx values */ - mmcr[0] = 0; - if (pmc_inuse & 1) - mmcr[0] = MMCR0_PMC1CE; - if (pmc_inuse & 0x3e) - mmcr[0] |= MMCR0_PMCjCE; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; - return 0; -} - -static void power7_disable_pmc(unsigned int pmc, u64 mmcr[]) -{ - if (pmc <= 3) - mmcr[1] &= ~(0xffULL << MMCR1_PMCSEL_SH(pmc)); -} - -static int power7_generic_events[] = { - [PERF_COUNT_CPU_CYCLES] = 0x1e, - [PERF_COUNT_INSTRUCTIONS] = 2, - [PERF_COUNT_CACHE_REFERENCES] = 0xc880, /* LD_REF_L1_LSU */ - [PERF_COUNT_CACHE_MISSES] = 0x400f0, /* LD_MISS_L1 */ - [PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x10068, /* BRU_FIN */ - [PERF_COUNT_BRANCH_MISSES] = 0x400f6, /* BR_MPRED */ -}; - -#define C(x) PERF_COUNT_HW_CACHE_##x - -/* - * Table of generalized cache-related events. - * 0 means not supported, -1 means nonsensical, other values - * are event codes. - */ -static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { - [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0x400f0, 0xc880 }, - [C(OP_WRITE)] = { 0, 0x300f0 }, - [C(OP_PREFETCH)] = { 0xd8b8, 0 }, - }, - [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0, 0x200fc }, - [C(OP_WRITE)] = { -1, -1 }, - [C(OP_PREFETCH)] = { 0x408a, 0 }, - }, - [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0x6080, 0x6084 }, - [C(OP_WRITE)] = { 0x6082, 0x6086 }, - [C(OP_PREFETCH)] = { 0, 0 }, - }, - [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0, 0x300fc }, - [C(OP_WRITE)] = { -1, -1 }, - [C(OP_PREFETCH)] = { -1, -1 }, - }, - [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0, 0x400fc }, - [C(OP_WRITE)] = { -1, -1 }, - [C(OP_PREFETCH)] = { -1, -1 }, - }, - [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0x10068, 0x400f6 }, - [C(OP_WRITE)] = { -1, -1 }, - [C(OP_PREFETCH)] = { -1, -1 }, - }, -}; - -struct power_pmu power7_pmu = { - .n_counter = 6, - .max_alternatives = MAX_ALT + 1, - .add_fields = 0x1555ull, - .test_adder = 0x3000ull, - .compute_mmcr = power7_compute_mmcr, - .get_constraint = power7_get_constraint, - .get_alternatives = power7_get_alternatives, - .disable_pmc = power7_disable_pmc, - .n_generic = ARRAY_SIZE(power7_generic_events), - .generic_events = power7_generic_events, - .cache_events = &power7_cache_events, -}; diff --git a/trunk/arch/powerpc/kernel/ppc970-pmu.c b/trunk/arch/powerpc/kernel/ppc970-pmu.c deleted file mode 100644 index ba0a357a89f4..000000000000 --- a/trunk/arch/powerpc/kernel/ppc970-pmu.c +++ /dev/null @@ -1,482 +0,0 @@ -/* - * Performance counter support for PPC970-family processors. - * - * Copyright 2008-2009 Paul Mackerras, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include -#include -#include - -/* - * Bits in event code for PPC970 - */ -#define PM_PMC_SH 12 /* PMC number (1-based) for direct events */ -#define PM_PMC_MSK 0xf -#define PM_UNIT_SH 8 /* TTMMUX number and setting - unit select */ -#define PM_UNIT_MSK 0xf -#define PM_SPCSEL_SH 6 -#define PM_SPCSEL_MSK 3 -#define PM_BYTE_SH 4 /* Byte number of event bus to use */ -#define PM_BYTE_MSK 3 -#define PM_PMCSEL_MSK 0xf - -/* Values in PM_UNIT field */ -#define PM_NONE 0 -#define PM_FPU 1 -#define PM_VPU 2 -#define PM_ISU 3 -#define PM_IFU 4 -#define PM_IDU 5 -#define PM_STS 6 -#define PM_LSU0 7 -#define PM_LSU1U 8 -#define PM_LSU1L 9 -#define PM_LASTUNIT 9 - -/* - * Bits in MMCR0 for PPC970 - */ -#define MMCR0_PMC1SEL_SH 8 -#define MMCR0_PMC2SEL_SH 1 -#define MMCR_PMCSEL_MSK 0x1f - -/* - * Bits in MMCR1 for PPC970 - */ -#define MMCR1_TTM0SEL_SH 62 -#define MMCR1_TTM1SEL_SH 59 -#define MMCR1_TTM3SEL_SH 53 -#define MMCR1_TTMSEL_MSK 3 -#define MMCR1_TD_CP_DBG0SEL_SH 50 -#define MMCR1_TD_CP_DBG1SEL_SH 48 -#define MMCR1_TD_CP_DBG2SEL_SH 46 -#define MMCR1_TD_CP_DBG3SEL_SH 44 -#define MMCR1_PMC1_ADDER_SEL_SH 39 -#define MMCR1_PMC2_ADDER_SEL_SH 38 -#define MMCR1_PMC6_ADDER_SEL_SH 37 -#define MMCR1_PMC5_ADDER_SEL_SH 36 -#define MMCR1_PMC8_ADDER_SEL_SH 35 -#define MMCR1_PMC7_ADDER_SEL_SH 34 -#define MMCR1_PMC3_ADDER_SEL_SH 33 -#define MMCR1_PMC4_ADDER_SEL_SH 32 -#define MMCR1_PMC3SEL_SH 27 -#define MMCR1_PMC4SEL_SH 22 -#define MMCR1_PMC5SEL_SH 17 -#define MMCR1_PMC6SEL_SH 12 -#define MMCR1_PMC7SEL_SH 7 -#define MMCR1_PMC8SEL_SH 2 - -static short mmcr1_adder_bits[8] = { - MMCR1_PMC1_ADDER_SEL_SH, - MMCR1_PMC2_ADDER_SEL_SH, - MMCR1_PMC3_ADDER_SEL_SH, - MMCR1_PMC4_ADDER_SEL_SH, - MMCR1_PMC5_ADDER_SEL_SH, - MMCR1_PMC6_ADDER_SEL_SH, - MMCR1_PMC7_ADDER_SEL_SH, - MMCR1_PMC8_ADDER_SEL_SH -}; - -/* - * Bits in MMCRA - */ - -/* - * Layout of constraint bits: - * 6666555555555544444444443333333333222222222211111111110000000000 - * 3210987654321098765432109876543210987654321098765432109876543210 - * <><><>[ >[ >[ >< >< >< >< ><><><><><><><><> - * SPT0T1 UC PS1 PS2 B0 B1 B2 B3 P1P2P3P4P5P6P7P8 - * - * SP - SPCSEL constraint - * 48-49: SPCSEL value 0x3_0000_0000_0000 - * - * T0 - TTM0 constraint - * 46-47: TTM0SEL value (0=FPU, 2=IFU, 3=VPU) 0xC000_0000_0000 - * - * T1 - TTM1 constraint - * 44-45: TTM1SEL value (0=IDU, 3=STS) 0x3000_0000_0000 - * - * UC - unit constraint: can't have all three of FPU|IFU|VPU, ISU, IDU|STS - * 43: UC3 error 0x0800_0000_0000 - * 42: FPU|IFU|VPU events needed 0x0400_0000_0000 - * 41: ISU events needed 0x0200_0000_0000 - * 40: IDU|STS events needed 0x0100_0000_0000 - * - * PS1 - * 39: PS1 error 0x0080_0000_0000 - * 36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000 - * - * PS2 - * 35: PS2 error 0x0008_0000_0000 - * 32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000 - * - * B0 - * 28-31: Byte 0 event source 0xf000_0000 - * Encoding as for the event code - * - * B1, B2, B3 - * 24-27, 20-23, 16-19: Byte 1, 2, 3 event sources - * - * P1 - * 15: P1 error 0x8000 - * 14-15: Count of events needing PMC1 - * - * P2..P8 - * 0-13: Count of events needing PMC2..PMC8 - */ - -static unsigned char direct_marked_event[8] = { - (1<<2) | (1<<3), /* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */ - (1<<3) | (1<<5), /* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */ - (1<<3) | (1<<5), /* PMC3: PM_MRK_ST_CMPL_INT, PM_MRK_VMX_FIN */ - (1<<4) | (1<<5), /* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */ - (1<<4) | (1<<5), /* PMC5: PM_GRP_MRK, PM_MRK_GRP_TIMEO */ - (1<<3) | (1<<4) | (1<<5), - /* PMC6: PM_MRK_ST_STS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */ - (1<<4) | (1<<5), /* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */ - (1<<4) /* PMC8: PM_MRK_LSU_FIN */ -}; - -/* - * Returns 1 if event counts things relating to marked instructions - * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not. - */ -static int p970_marked_instr_event(u64 event) -{ - int pmc, psel, unit, byte, bit; - unsigned int mask; - - pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; - psel = event & PM_PMCSEL_MSK; - if (pmc) { - if (direct_marked_event[pmc - 1] & (1 << psel)) - return 1; - if (psel == 0) /* add events */ - bit = (pmc <= 4)? pmc - 1: 8 - pmc; - else if (psel == 7 || psel == 13) /* decode events */ - bit = 4; - else - return 0; - } else - bit = psel; - - byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; - unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; - mask = 0; - switch (unit) { - case PM_VPU: - mask = 0x4c; /* byte 0 bits 2,3,6 */ - case PM_LSU0: - /* byte 2 bits 0,2,3,4,6; all of byte 1 */ - mask = 0x085dff00; - case PM_LSU1L: - mask = 0x50 << 24; /* byte 3 bits 4,6 */ - break; - } - return (mask >> (byte * 8 + bit)) & 1; -} - -/* Masks and values for using events from the various units */ -static u64 unit_cons[PM_LASTUNIT+1][2] = { - [PM_FPU] = { 0xc80000000000ull, 0x040000000000ull }, - [PM_VPU] = { 0xc80000000000ull, 0xc40000000000ull }, - [PM_ISU] = { 0x080000000000ull, 0x020000000000ull }, - [PM_IFU] = { 0xc80000000000ull, 0x840000000000ull }, - [PM_IDU] = { 0x380000000000ull, 0x010000000000ull }, - [PM_STS] = { 0x380000000000ull, 0x310000000000ull }, -}; - -static int p970_get_constraint(u64 event, u64 *maskp, u64 *valp) -{ - int pmc, byte, unit, sh, spcsel; - u64 mask = 0, value = 0; - int grp = -1; - - pmc = (event >> PM_PMC_SH) & PM_PMC_MSK; - if (pmc) { - if (pmc > 8) - return -1; - sh = (pmc - 1) * 2; - mask |= 2 << sh; - value |= 1 << sh; - grp = ((pmc - 1) >> 1) & 1; - } - unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK; - if (unit) { - if (unit > PM_LASTUNIT) - return -1; - mask |= unit_cons[unit][0]; - value |= unit_cons[unit][1]; - byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK; - /* - * Bus events on bytes 0 and 2 can be counted - * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8. - */ - if (!pmc) - grp = byte & 1; - /* Set byte lane select field */ - mask |= 0xfULL << (28 - 4 * byte); - value |= (u64)unit << (28 - 4 * byte); - } - if (grp == 0) { - /* increment PMC1/2/5/6 field */ - mask |= 0x8000000000ull; - value |= 0x1000000000ull; - } else if (grp == 1) { - /* increment PMC3/4/7/8 field */ - mask |= 0x800000000ull; - value |= 0x100000000ull; - } - spcsel = (event >> PM_SPCSEL_SH) & PM_SPCSEL_MSK; - if (spcsel) { - mask |= 3ull << 48; - value |= (u64)spcsel << 48; - } - *maskp = mask; - *valp = value; - return 0; -} - -static int p970_get_alternatives(u64 event, unsigned int flags, u64 alt[]) -{ - alt[0] = event; - - /* 2 alternatives for LSU empty */ - if (event == 0x2002 || event == 0x3002) { - alt[1] = event ^ 0x1000; - return 2; - } - - return 1; -} - -static int p970_compute_mmcr(u64 event[], int n_ev, - unsigned int hwc[], u64 mmcr[]) -{ - u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0; - unsigned int pmc, unit, byte, psel; - unsigned int ttm, grp; - unsigned int pmc_inuse = 0; - unsigned int pmc_grp_use[2]; - unsigned char busbyte[4]; - unsigned char unituse[16]; - unsigned char unitmap[] = { 0, 0<<3, 3<<3, 1<<3, 2<<3, 0|4, 3|4 }; - unsigned char ttmuse[2]; - unsigned char pmcsel[8]; - int i; - int spcsel; - - if (n_ev > 8) - return -1; - - /* First pass to count resource use */ - pmc_grp_use[0] = pmc_grp_use[1] = 0; - memset(busbyte, 0, sizeof(busbyte)); - memset(unituse, 0, sizeof(unituse)); - for (i = 0; i < n_ev; ++i) { - pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; - if (pmc) { - if (pmc_inuse & (1 << (pmc - 1))) - return -1; - pmc_inuse |= 1 << (pmc - 1); - /* count 1/2/5/6 vs 3/4/7/8 use */ - ++pmc_grp_use[((pmc - 1) >> 1) & 1]; - } - unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; - byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; - if (unit) { - if (unit > PM_LASTUNIT) - return -1; - if (!pmc) - ++pmc_grp_use[byte & 1]; - if (busbyte[byte] && busbyte[byte] != unit) - return -1; - busbyte[byte] = unit; - unituse[unit] = 1; - } - } - if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4) - return -1; - - /* - * Assign resources and set multiplexer selects. - * - * PM_ISU can go either on TTM0 or TTM1, but that's the only - * choice we have to deal with. - */ - if (unituse[PM_ISU] & - (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_VPU])) - unitmap[PM_ISU] = 2 | 4; /* move ISU to TTM1 */ - /* Set TTM[01]SEL fields. */ - ttmuse[0] = ttmuse[1] = 0; - for (i = PM_FPU; i <= PM_STS; ++i) { - if (!unituse[i]) - continue; - ttm = unitmap[i]; - ++ttmuse[(ttm >> 2) & 1]; - mmcr1 |= (u64)(ttm & ~4) << MMCR1_TTM1SEL_SH; - } - /* Check only one unit per TTMx */ - if (ttmuse[0] > 1 || ttmuse[1] > 1) - return -1; - - /* Set byte lane select fields and TTM3SEL. */ - for (byte = 0; byte < 4; ++byte) { - unit = busbyte[byte]; - if (!unit) - continue; - if (unit <= PM_STS) - ttm = (unitmap[unit] >> 2) & 1; - else if (unit == PM_LSU0) - ttm = 2; - else { - ttm = 3; - if (unit == PM_LSU1L && byte >= 2) - mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte); - } - mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte); - } - - /* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */ - memset(pmcsel, 0x8, sizeof(pmcsel)); /* 8 means don't count */ - for (i = 0; i < n_ev; ++i) { - pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK; - unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK; - byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK; - psel = event[i] & PM_PMCSEL_MSK; - if (!pmc) { - /* Bus event or any-PMC direct event */ - if (unit) - psel |= 0x10 | ((byte & 2) << 2); - else - psel |= 8; - for (pmc = 0; pmc < 8; ++pmc) { - if (pmc_inuse & (1 << pmc)) - continue; - grp = (pmc >> 1) & 1; - if (unit) { - if (grp == (byte & 1)) - break; - } else if (pmc_grp_use[grp] < 4) { - ++pmc_grp_use[grp]; - break; - } - } - pmc_inuse |= 1 << pmc; - } else { - /* Direct event */ - --pmc; - if (psel == 0 && (byte & 2)) - /* add events on higher-numbered bus */ - mmcr1 |= 1ull << mmcr1_adder_bits[pmc]; - } - pmcsel[pmc] = psel; - hwc[i] = pmc; - spcsel = (event[i] >> PM_SPCSEL_SH) & PM_SPCSEL_MSK; - mmcr1 |= spcsel; - if (p970_marked_instr_event(event[i])) - mmcra |= MMCRA_SAMPLE_ENABLE; - } - for (pmc = 0; pmc < 2; ++pmc) - mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc); - for (; pmc < 8; ++pmc) - mmcr1 |= (u64)pmcsel[pmc] << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)); - if (pmc_inuse & 1) - mmcr0 |= MMCR0_PMC1CE; - if (pmc_inuse & 0xfe) - mmcr0 |= MMCR0_PMCjCE; - - mmcra |= 0x2000; /* mark only one IOP per PPC instruction */ - - /* Return MMCRx values */ - mmcr[0] = mmcr0; - mmcr[1] = mmcr1; - mmcr[2] = mmcra; - return 0; -} - -static void p970_disable_pmc(unsigned int pmc, u64 mmcr[]) -{ - int shift, i; - - if (pmc <= 1) { - shift = MMCR0_PMC1SEL_SH - 7 * pmc; - i = 0; - } else { - shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2); - i = 1; - } - /* - * Setting the PMCxSEL field to 0x08 disables PMC x. - */ - mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift); -} - -static int ppc970_generic_events[] = { - [PERF_COUNT_HW_CPU_CYCLES] = 7, - [PERF_COUNT_HW_INSTRUCTIONS] = 1, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x8810, /* PM_LD_REF_L1 */ - [PERF_COUNT_HW_CACHE_MISSES] = 0x3810, /* PM_LD_MISS_L1 */ - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x431, /* PM_BR_ISSUED */ - [PERF_COUNT_HW_BRANCH_MISSES] = 0x327, /* PM_GRP_BR_MPRED */ -}; - -#define C(x) PERF_COUNT_HW_CACHE_##x - -/* - * Table of generalized cache-related events. - * 0 means not supported, -1 means nonsensical, other values - * are event codes. - */ -static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { - [C(L1D)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0x8810, 0x3810 }, - [C(OP_WRITE)] = { 0x7810, 0x813 }, - [C(OP_PREFETCH)] = { 0x731, 0 }, - }, - [C(L1I)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0, 0 }, - [C(OP_WRITE)] = { -1, -1 }, - [C(OP_PREFETCH)] = { 0, 0 }, - }, - [C(LL)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0, 0 }, - [C(OP_WRITE)] = { 0, 0 }, - [C(OP_PREFETCH)] = { 0x733, 0 }, - }, - [C(DTLB)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0, 0x704 }, - [C(OP_WRITE)] = { -1, -1 }, - [C(OP_PREFETCH)] = { -1, -1 }, - }, - [C(ITLB)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0, 0x700 }, - [C(OP_WRITE)] = { -1, -1 }, - [C(OP_PREFETCH)] = { -1, -1 }, - }, - [C(BPU)] = { /* RESULT_ACCESS RESULT_MISS */ - [C(OP_READ)] = { 0x431, 0x327 }, - [C(OP_WRITE)] = { -1, -1 }, - [C(OP_PREFETCH)] = { -1, -1 }, - }, -}; - -struct power_pmu ppc970_pmu = { - .n_counter = 8, - .max_alternatives = 2, - .add_fields = 0x001100005555ull, - .test_adder = 0x013300000000ull, - .compute_mmcr = p970_compute_mmcr, - .get_constraint = p970_get_constraint, - .get_alternatives = p970_get_alternatives, - .disable_pmc = p970_disable_pmc, - .n_generic = ARRAY_SIZE(ppc970_generic_events), - .generic_events = ppc970_generic_events, - .cache_events = &ppc970_cache_events, -}; diff --git a/trunk/arch/powerpc/mm/fault.c b/trunk/arch/powerpc/mm/fault.c index 5beffc8f481e..76993941cac9 100644 --- a/trunk/arch/powerpc/mm/fault.c +++ b/trunk/arch/powerpc/mm/fault.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include @@ -171,8 +170,6 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, die("Weird page fault", regs, SIGSEGV); } - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); - /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the * kernel and should generate an OOPS. Unfortunately, in the case of an @@ -312,8 +309,6 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, } if (ret & VM_FAULT_MAJOR) { current->maj_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, - regs, address); #ifdef CONFIG_PPC_SMLPAR if (firmware_has_feature(FW_FEATURE_CMO)) { preempt_disable(); @@ -321,11 +316,8 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, preempt_enable(); } #endif - } else { + } else current->min_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, - regs, address); - } up_read(&mm->mmap_sem); return 0; diff --git a/trunk/arch/powerpc/platforms/Kconfig.cputype b/trunk/arch/powerpc/platforms/Kconfig.cputype index 732ee93a8e98..9da795e49337 100644 --- a/trunk/arch/powerpc/platforms/Kconfig.cputype +++ b/trunk/arch/powerpc/platforms/Kconfig.cputype @@ -1,7 +1,6 @@ config PPC64 bool "64-bit kernel" default n - select HAVE_PERF_COUNTERS help This option selects whether a 32-bit or a 64-bit kernel will be built. diff --git a/trunk/arch/x86/Kconfig b/trunk/arch/x86/Kconfig index 68f5578fe38e..aafae3b140de 100644 --- a/trunk/arch/x86/Kconfig +++ b/trunk/arch/x86/Kconfig @@ -739,7 +739,6 @@ config X86_UP_IOAPIC config X86_LOCAL_APIC def_bool y depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC - select HAVE_PERF_COUNTERS if (!M386 && !M486) config X86_IO_APIC def_bool y diff --git a/trunk/arch/x86/ia32/ia32entry.S b/trunk/arch/x86/ia32/ia32entry.S index e590261ba059..dcef387ddc36 100644 --- a/trunk/arch/x86/ia32/ia32entry.S +++ b/trunk/arch/x86/ia32/ia32entry.S @@ -825,11 +825,10 @@ ia32_sys_call_table: .quad compat_sys_signalfd4 .quad sys_eventfd2 .quad sys_epoll_create1 - .quad sys_dup3 /* 330 */ + .quad sys_dup3 /* 330 */ .quad sys_pipe2 .quad sys_inotify_init1 .quad compat_sys_preadv .quad compat_sys_pwritev .quad compat_sys_rt_tgsigqueueinfo /* 335 */ - .quad sys_perf_counter_open ia32_syscall_end: diff --git a/trunk/arch/x86/include/asm/atomic_32.h b/trunk/arch/x86/include/asm/atomic_32.h index aff9f1fcdcd7..85b46fba4229 100644 --- a/trunk/arch/x86/include/asm/atomic_32.h +++ b/trunk/arch/x86/include/asm/atomic_32.h @@ -247,241 +247,5 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) #define smp_mb__before_atomic_inc() barrier() #define smp_mb__after_atomic_inc() barrier() -/* An 64bit atomic type */ - -typedef struct { - unsigned long long counter; -} atomic64_t; - -#define ATOMIC64_INIT(val) { (val) } - -/** - * atomic64_read - read atomic64 variable - * @v: pointer of type atomic64_t - * - * Atomically reads the value of @v. - * Doesn't imply a read memory barrier. - */ -#define __atomic64_read(ptr) ((ptr)->counter) - -static inline unsigned long long -cmpxchg8b(unsigned long long *ptr, unsigned long long old, unsigned long long new) -{ - asm volatile( - - LOCK_PREFIX "cmpxchg8b (%[ptr])\n" - - : "=A" (old) - - : [ptr] "D" (ptr), - "A" (old), - "b" (ll_low(new)), - "c" (ll_high(new)) - - : "memory"); - - return old; -} - -static inline unsigned long long -atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val, - unsigned long long new_val) -{ - return cmpxchg8b(&ptr->counter, old_val, new_val); -} - -/** - * atomic64_xchg - xchg atomic64 variable - * @ptr: pointer to type atomic64_t - * @new_val: value to assign - * @old_val: old value that was there - * - * Atomically xchgs the value of @ptr to @new_val and returns - * the old value. - */ - -static inline unsigned long long -atomic64_xchg(atomic64_t *ptr, unsigned long long new_val) -{ - unsigned long long old_val; - - do { - old_val = atomic_read(ptr); - } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val); - - return old_val; -} - -/** - * atomic64_set - set atomic64 variable - * @ptr: pointer to type atomic64_t - * @new_val: value to assign - * - * Atomically sets the value of @ptr to @new_val. - */ -static inline void atomic64_set(atomic64_t *ptr, unsigned long long new_val) -{ - atomic64_xchg(ptr, new_val); -} - -/** - * atomic64_read - read atomic64 variable - * @ptr: pointer to type atomic64_t - * - * Atomically reads the value of @ptr and returns it. - */ -static inline unsigned long long atomic64_read(atomic64_t *ptr) -{ - unsigned long long curr_val; - - do { - curr_val = __atomic64_read(ptr); - } while (atomic64_cmpxchg(ptr, curr_val, curr_val) != curr_val); - - return curr_val; -} - -/** - * atomic64_add_return - add and return - * @delta: integer value to add - * @ptr: pointer to type atomic64_t - * - * Atomically adds @delta to @ptr and returns @delta + *@ptr - */ -static inline unsigned long long -atomic64_add_return(unsigned long long delta, atomic64_t *ptr) -{ - unsigned long long old_val, new_val; - - do { - old_val = atomic_read(ptr); - new_val = old_val + delta; - - } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val); - - return new_val; -} - -static inline long atomic64_sub_return(unsigned long long delta, atomic64_t *ptr) -{ - return atomic64_add_return(-delta, ptr); -} - -static inline long atomic64_inc_return(atomic64_t *ptr) -{ - return atomic64_add_return(1, ptr); -} - -static inline long atomic64_dec_return(atomic64_t *ptr) -{ - return atomic64_sub_return(1, ptr); -} - -/** - * atomic64_add - add integer to atomic64 variable - * @delta: integer value to add - * @ptr: pointer to type atomic64_t - * - * Atomically adds @delta to @ptr. - */ -static inline void atomic64_add(unsigned long long delta, atomic64_t *ptr) -{ - atomic64_add_return(delta, ptr); -} - -/** - * atomic64_sub - subtract the atomic64 variable - * @delta: integer value to subtract - * @ptr: pointer to type atomic64_t - * - * Atomically subtracts @delta from @ptr. - */ -static inline void atomic64_sub(unsigned long long delta, atomic64_t *ptr) -{ - atomic64_add(-delta, ptr); -} - -/** - * atomic64_sub_and_test - subtract value from variable and test result - * @delta: integer value to subtract - * @ptr: pointer to type atomic64_t - * - * Atomically subtracts @delta from @ptr and returns - * true if the result is zero, or false for all - * other cases. - */ -static inline int -atomic64_sub_and_test(unsigned long long delta, atomic64_t *ptr) -{ - unsigned long long old_val = atomic64_sub_return(delta, ptr); - - return old_val == 0; -} - -/** - * atomic64_inc - increment atomic64 variable - * @ptr: pointer to type atomic64_t - * - * Atomically increments @ptr by 1. - */ -static inline void atomic64_inc(atomic64_t *ptr) -{ - atomic64_add(1, ptr); -} - -/** - * atomic64_dec - decrement atomic64 variable - * @ptr: pointer to type atomic64_t - * - * Atomically decrements @ptr by 1. - */ -static inline void atomic64_dec(atomic64_t *ptr) -{ - atomic64_sub(1, ptr); -} - -/** - * atomic64_dec_and_test - decrement and test - * @ptr: pointer to type atomic64_t - * - * Atomically decrements @ptr by 1 and - * returns true if the result is 0, or false for all other - * cases. - */ -static inline int atomic64_dec_and_test(atomic64_t *ptr) -{ - return atomic64_sub_and_test(1, ptr); -} - -/** - * atomic64_inc_and_test - increment and test - * @ptr: pointer to type atomic64_t - * - * Atomically increments @ptr by 1 - * and returns true if the result is zero, or false for all - * other cases. - */ -static inline int atomic64_inc_and_test(atomic64_t *ptr) -{ - return atomic64_sub_and_test(-1, ptr); -} - -/** - * atomic64_add_negative - add and test if negative - * @delta: integer value to add - * @ptr: pointer to type atomic64_t - * - * Atomically adds @delta to @ptr and returns true - * if the result is negative, or false when - * result is greater than or equal to zero. - */ -static inline int -atomic64_add_negative(unsigned long long delta, atomic64_t *ptr) -{ - long long old_val = atomic64_add_return(delta, ptr); - - return old_val < 0; -} - #include #endif /* _ASM_X86_ATOMIC_32_H */ diff --git a/trunk/arch/x86/include/asm/entry_arch.h b/trunk/arch/x86/include/asm/entry_arch.h index d750a10ccad6..c2e6bedaf258 100644 --- a/trunk/arch/x86/include/asm/entry_arch.h +++ b/trunk/arch/x86/include/asm/entry_arch.h @@ -49,7 +49,7 @@ BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) #ifdef CONFIG_PERF_COUNTERS -BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) +BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR) #endif #ifdef CONFIG_X86_MCE_P4THERMAL diff --git a/trunk/arch/x86/include/asm/hardirq.h b/trunk/arch/x86/include/asm/hardirq.h index 9ebc5c255032..37555e52f980 100644 --- a/trunk/arch/x86/include/asm/hardirq.h +++ b/trunk/arch/x86/include/asm/hardirq.h @@ -13,8 +13,6 @@ typedef struct { unsigned int irq_spurious_count; #endif unsigned int generic_irqs; /* arch dependent */ - unsigned int apic_perf_irqs; - unsigned int apic_pending_irqs; #ifdef CONFIG_SMP unsigned int irq_resched_count; unsigned int irq_call_count; diff --git a/trunk/arch/x86/include/asm/hw_irq.h b/trunk/arch/x86/include/asm/hw_irq.h index 6df45f639666..3bd1777a4c8b 100644 --- a/trunk/arch/x86/include/asm/hw_irq.h +++ b/trunk/arch/x86/include/asm/hw_irq.h @@ -29,8 +29,6 @@ extern void apic_timer_interrupt(void); extern void generic_interrupt(void); extern void error_interrupt(void); -extern void perf_pending_interrupt(void); - extern void spurious_interrupt(void); extern void thermal_interrupt(void); extern void reschedule_interrupt(void); diff --git a/trunk/arch/x86/include/asm/intel_arch_perfmon.h b/trunk/arch/x86/include/asm/intel_arch_perfmon.h new file mode 100644 index 000000000000..fa0fd068bc2e --- /dev/null +++ b/trunk/arch/x86/include/asm/intel_arch_perfmon.h @@ -0,0 +1,31 @@ +#ifndef _ASM_X86_INTEL_ARCH_PERFMON_H +#define _ASM_X86_INTEL_ARCH_PERFMON_H + +#define MSR_ARCH_PERFMON_PERFCTR0 0xc1 +#define MSR_ARCH_PERFMON_PERFCTR1 0xc2 + +#define MSR_ARCH_PERFMON_EVENTSEL0 0x186 +#define MSR_ARCH_PERFMON_EVENTSEL1 0x187 + +#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) +#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) +#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) +#define ARCH_PERFMON_EVENTSEL_USR (1 << 16) + +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c) +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX (0) +#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ + (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) + +union cpuid10_eax { + struct { + unsigned int version_id:8; + unsigned int num_counters:8; + unsigned int bit_width:8; + unsigned int mask_length:8; + } split; + unsigned int full; +}; + +#endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */ diff --git a/trunk/arch/x86/include/asm/irq_vectors.h b/trunk/arch/x86/include/asm/irq_vectors.h index e997be98c9b9..910b5a3d6751 100644 --- a/trunk/arch/x86/include/asm/irq_vectors.h +++ b/trunk/arch/x86/include/asm/irq_vectors.h @@ -108,14 +108,14 @@ #define LOCAL_TIMER_VECTOR 0xef /* - * Generic system vector for platform specific use + * Performance monitoring interrupt vector: */ -#define GENERIC_INTERRUPT_VECTOR 0xed +#define LOCAL_PERF_VECTOR 0xee /* - * Performance monitoring pending work vector: + * Generic system vector for platform specific use */ -#define LOCAL_PENDING_VECTOR 0xec +#define GENERIC_INTERRUPT_VECTOR 0xed /* * First APIC vector available to drivers: (vectors 0x30-0xee) we diff --git a/trunk/arch/x86/include/asm/perf_counter.h b/trunk/arch/x86/include/asm/perf_counter.h deleted file mode 100644 index 876ed97147b3..000000000000 --- a/trunk/arch/x86/include/asm/perf_counter.h +++ /dev/null @@ -1,100 +0,0 @@ -#ifndef _ASM_X86_PERF_COUNTER_H -#define _ASM_X86_PERF_COUNTER_H - -/* - * Performance counter hw details: - */ - -#define X86_PMC_MAX_GENERIC 8 -#define X86_PMC_MAX_FIXED 3 - -#define X86_PMC_IDX_GENERIC 0 -#define X86_PMC_IDX_FIXED 32 -#define X86_PMC_IDX_MAX 64 - -#define MSR_ARCH_PERFMON_PERFCTR0 0xc1 -#define MSR_ARCH_PERFMON_PERFCTR1 0xc2 - -#define MSR_ARCH_PERFMON_EVENTSEL0 0x186 -#define MSR_ARCH_PERFMON_EVENTSEL1 0x187 - -#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) -#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) -#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) -#define ARCH_PERFMON_EVENTSEL_USR (1 << 16) - -/* - * Includes eventsel and unit mask as well: - */ -#define ARCH_PERFMON_EVENT_MASK 0xffff - -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 -#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ - (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) - -#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 - -/* - * Intel "Architectural Performance Monitoring" CPUID - * detection/enumeration details: - */ -union cpuid10_eax { - struct { - unsigned int version_id:8; - unsigned int num_counters:8; - unsigned int bit_width:8; - unsigned int mask_length:8; - } split; - unsigned int full; -}; - -union cpuid10_edx { - struct { - unsigned int num_counters_fixed:4; - unsigned int reserved:28; - } split; - unsigned int full; -}; - - -/* - * Fixed-purpose performance counters: - */ - -/* - * All 3 fixed-mode PMCs are configured via this single MSR: - */ -#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d - -/* - * The counts are available in three separate MSRs: - */ - -/* Instr_Retired.Any: */ -#define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 -#define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) - -/* CPU_CLK_Unhalted.Core: */ -#define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a -#define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) - -/* CPU_CLK_Unhalted.Ref: */ -#define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b -#define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) - -extern void set_perf_counter_pending(void); - -#define clear_perf_counter_pending() do { } while (0) -#define test_perf_counter_pending() (0) - -#ifdef CONFIG_PERF_COUNTERS -extern void init_hw_perf_counters(void); -extern void perf_counters_lapic_init(void); -#else -static inline void init_hw_perf_counters(void) { } -static inline void perf_counters_lapic_init(void) { } -#endif - -#endif /* _ASM_X86_PERF_COUNTER_H */ diff --git a/trunk/arch/x86/include/asm/unistd_32.h b/trunk/arch/x86/include/asm/unistd_32.h index 732a30706153..708dae61262d 100644 --- a/trunk/arch/x86/include/asm/unistd_32.h +++ b/trunk/arch/x86/include/asm/unistd_32.h @@ -341,7 +341,6 @@ #define __NR_preadv 333 #define __NR_pwritev 334 #define __NR_rt_tgsigqueueinfo 335 -#define __NR_perf_counter_open 336 #ifdef __KERNEL__ diff --git a/trunk/arch/x86/include/asm/unistd_64.h b/trunk/arch/x86/include/asm/unistd_64.h index 900e1617e672..4e2b05404400 100644 --- a/trunk/arch/x86/include/asm/unistd_64.h +++ b/trunk/arch/x86/include/asm/unistd_64.h @@ -659,8 +659,7 @@ __SYSCALL(__NR_preadv, sys_preadv) __SYSCALL(__NR_pwritev, sys_pwritev) #define __NR_rt_tgsigqueueinfo 297 __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) -#define __NR_perf_counter_open 298 -__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open) + #ifndef __NO_STUBS #define __ARCH_WANT_OLD_READDIR diff --git a/trunk/arch/x86/kernel/apic/apic.c b/trunk/arch/x86/kernel/apic/apic.c index 076d3881f3da..a4c9cf0bf70b 100644 --- a/trunk/arch/x86/kernel/apic/apic.c +++ b/trunk/arch/x86/kernel/apic/apic.c @@ -14,7 +14,6 @@ * Mikael Pettersson : PM converted to driver model. */ -#include #include #include #include @@ -35,7 +34,6 @@ #include #include -#include #include #include #include @@ -1189,7 +1187,6 @@ void __cpuinit setup_local_APIC(void) apic_write(APIC_ESR, 0); } #endif - perf_counters_lapic_init(); preempt_disable(); diff --git a/trunk/arch/x86/kernel/apic/io_apic.c b/trunk/arch/x86/kernel/apic/io_apic.c index 94605e7f6a54..1946fac42ab3 100644 --- a/trunk/arch/x86/kernel/apic/io_apic.c +++ b/trunk/arch/x86/kernel/apic/io_apic.c @@ -177,18 +177,16 @@ int __init arch_early_irq_init(void) struct irq_cfg *cfg; struct irq_desc *desc; int count; - int node; int i; cfg = irq_cfgx; count = ARRAY_SIZE(irq_cfgx); - node= cpu_to_node(boot_cpu_id); for (i = 0; i < count; i++) { desc = irq_to_desc(i); desc->chip_data = &cfg[i]; - alloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); - alloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); + alloc_bootmem_cpumask_var(&cfg[i].domain); + alloc_bootmem_cpumask_var(&cfg[i].old_domain); if (i < NR_IRQS_LEGACY) cpumask_setall(cfg[i].domain); } diff --git a/trunk/arch/x86/kernel/cpu/Makefile b/trunk/arch/x86/kernel/cpu/Makefile index 3efcb2b96a15..4e242f9a06e4 100644 --- a/trunk/arch/x86/kernel/cpu/Makefile +++ b/trunk/arch/x86/kernel/cpu/Makefile @@ -1,5 +1,5 @@ # -# Makefile for x86-compatible CPU details, features and quirks +# Makefile for x86-compatible CPU details and quirks # # Don't trace early stages of a secondary CPU boot @@ -23,13 +23,11 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o -obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o +obj-$(CONFIG_X86_MCE) += mcheck/ +obj-$(CONFIG_MTRR) += mtrr/ +obj-$(CONFIG_CPU_FREQ) += cpufreq/ -obj-$(CONFIG_X86_MCE) += mcheck/ -obj-$(CONFIG_MTRR) += mtrr/ -obj-$(CONFIG_CPU_FREQ) += cpufreq/ - -obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o +obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o quiet_cmd_mkcapflags = MKCAP $@ cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ diff --git a/trunk/arch/x86/kernel/cpu/common.c b/trunk/arch/x86/kernel/cpu/common.c index 3ffdcfa9abdf..b0517aa2bd3b 100644 --- a/trunk/arch/x86/kernel/cpu/common.c +++ b/trunk/arch/x86/kernel/cpu/common.c @@ -13,7 +13,6 @@ #include #include -#include #include #include #include @@ -875,7 +874,6 @@ void __init identify_boot_cpu(void) #else vgetcpu_set_mode(); #endif - init_hw_perf_counters(); } void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) diff --git a/trunk/arch/x86/kernel/cpu/perf_counter.c b/trunk/arch/x86/kernel/cpu/perf_counter.c deleted file mode 100644 index 895c82e78455..000000000000 --- a/trunk/arch/x86/kernel/cpu/perf_counter.c +++ /dev/null @@ -1,1704 +0,0 @@ -/* - * Performance counter x86 architecture code - * - * Copyright (C) 2008 Thomas Gleixner - * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar - * Copyright (C) 2009 Jaswinder Singh Rajput - * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter - * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra - * - * For licencing details see kernel-base/COPYING - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static u64 perf_counter_mask __read_mostly; - -struct cpu_hw_counters { - struct perf_counter *counters[X86_PMC_IDX_MAX]; - unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - unsigned long interrupts; - int enabled; -}; - -/* - * struct x86_pmu - generic x86 pmu - */ -struct x86_pmu { - const char *name; - int version; - int (*handle_irq)(struct pt_regs *); - void (*disable_all)(void); - void (*enable_all)(void); - void (*enable)(struct hw_perf_counter *, int); - void (*disable)(struct hw_perf_counter *, int); - unsigned eventsel; - unsigned perfctr; - u64 (*event_map)(int); - u64 (*raw_event)(u64); - int max_events; - int num_counters; - int num_counters_fixed; - int counter_bits; - u64 counter_mask; - u64 max_period; - u64 intel_ctrl; -}; - -static struct x86_pmu x86_pmu __read_mostly; - -static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { - .enabled = 1, -}; - -/* - * Intel PerfMon v3. Used on Core2 and later. - */ -static const u64 intel_perfmon_event_map[] = -{ - [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, - [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, - [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, -}; - -static u64 intel_pmu_event_map(int event) -{ - return intel_perfmon_event_map[event]; -} - -/* - * Generalized hw caching related event table, filled - * in on a per model basis. A value of 0 means - * 'not supported', -1 means 'event makes no sense on - * this CPU', any other value means the raw event - * ID. - */ - -#define C(x) PERF_COUNT_HW_CACHE_##x - -static u64 __read_mostly hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX]; - -static const u64 nehalem_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ - [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ - [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ - [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ - [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0x0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ - [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ - [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ - [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0x0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ - [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ - [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, -}; - -static const u64 core2_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ - [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ - [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ - [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ - [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ - [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ - [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ - [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, -}; - -static const u64 atom_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ - [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ - [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ - [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ - [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ - [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, -}; - -static u64 intel_pmu_raw_event(u64 event) -{ -#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL -#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL -#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL -#define CORE_EVNTSEL_INV_MASK 0x00800000ULL -#define CORE_EVNTSEL_COUNTER_MASK 0xFF000000ULL - -#define CORE_EVNTSEL_MASK \ - (CORE_EVNTSEL_EVENT_MASK | \ - CORE_EVNTSEL_UNIT_MASK | \ - CORE_EVNTSEL_EDGE_MASK | \ - CORE_EVNTSEL_INV_MASK | \ - CORE_EVNTSEL_COUNTER_MASK) - - return event & CORE_EVNTSEL_MASK; -} - -static const u64 amd_0f_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ - [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ - [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ - [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, -}; - -/* - * AMD Performance Monitor K7 and later. - */ -static const u64 amd_perfmon_event_map[] = -{ - [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, - [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, -}; - -static u64 amd_pmu_event_map(int event) -{ - return amd_perfmon_event_map[event]; -} - -static u64 amd_pmu_raw_event(u64 event) -{ -#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL -#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL -#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL -#define K7_EVNTSEL_INV_MASK 0x000800000ULL -#define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL - -#define K7_EVNTSEL_MASK \ - (K7_EVNTSEL_EVENT_MASK | \ - K7_EVNTSEL_UNIT_MASK | \ - K7_EVNTSEL_EDGE_MASK | \ - K7_EVNTSEL_INV_MASK | \ - K7_EVNTSEL_COUNTER_MASK) - - return event & K7_EVNTSEL_MASK; -} - -/* - * Propagate counter elapsed time into the generic counter. - * Can only be executed on the CPU where the counter is active. - * Returns the delta events processed. - */ -static u64 -x86_perf_counter_update(struct perf_counter *counter, - struct hw_perf_counter *hwc, int idx) -{ - int shift = 64 - x86_pmu.counter_bits; - u64 prev_raw_count, new_raw_count; - s64 delta; - - /* - * Careful: an NMI might modify the previous counter value. - * - * Our tactic to handle this is to first atomically read and - * exchange a new raw count - then add that new-prev delta - * count to the generic counter atomically: - */ -again: - prev_raw_count = atomic64_read(&hwc->prev_count); - rdmsrl(hwc->counter_base + idx, new_raw_count); - - if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count) != prev_raw_count) - goto again; - - /* - * Now we have the new raw value and have updated the prev - * timestamp already. We can now calculate the elapsed delta - * (counter-)time and add that to the generic counter. - * - * Careful, not all hw sign-extends above the physical width - * of the count. - */ - delta = (new_raw_count << shift) - (prev_raw_count << shift); - delta >>= shift; - - atomic64_add(delta, &counter->count); - atomic64_sub(delta, &hwc->period_left); - - return new_raw_count; -} - -static atomic_t active_counters; -static DEFINE_MUTEX(pmc_reserve_mutex); - -static bool reserve_pmc_hardware(void) -{ - int i; - - if (nmi_watchdog == NMI_LOCAL_APIC) - disable_lapic_nmi_watchdog(); - - for (i = 0; i < x86_pmu.num_counters; i++) { - if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) - goto perfctr_fail; - } - - for (i = 0; i < x86_pmu.num_counters; i++) { - if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) - goto eventsel_fail; - } - - return true; - -eventsel_fail: - for (i--; i >= 0; i--) - release_evntsel_nmi(x86_pmu.eventsel + i); - - i = x86_pmu.num_counters; - -perfctr_fail: - for (i--; i >= 0; i--) - release_perfctr_nmi(x86_pmu.perfctr + i); - - if (nmi_watchdog == NMI_LOCAL_APIC) - enable_lapic_nmi_watchdog(); - - return false; -} - -static void release_pmc_hardware(void) -{ - int i; - - for (i = 0; i < x86_pmu.num_counters; i++) { - release_perfctr_nmi(x86_pmu.perfctr + i); - release_evntsel_nmi(x86_pmu.eventsel + i); - } - - if (nmi_watchdog == NMI_LOCAL_APIC) - enable_lapic_nmi_watchdog(); -} - -static void hw_perf_counter_destroy(struct perf_counter *counter) -{ - if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) { - release_pmc_hardware(); - mutex_unlock(&pmc_reserve_mutex); - } -} - -static inline int x86_pmu_initialized(void) -{ - return x86_pmu.handle_irq != NULL; -} - -static inline int -set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr) -{ - unsigned int cache_type, cache_op, cache_result; - u64 config, val; - - config = attr->config; - - cache_type = (config >> 0) & 0xff; - if (cache_type >= PERF_COUNT_HW_CACHE_MAX) - return -EINVAL; - - cache_op = (config >> 8) & 0xff; - if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) - return -EINVAL; - - cache_result = (config >> 16) & 0xff; - if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) - return -EINVAL; - - val = hw_cache_event_ids[cache_type][cache_op][cache_result]; - - if (val == 0) - return -ENOENT; - - if (val == -1) - return -EINVAL; - - hwc->config |= val; - - return 0; -} - -/* - * Setup the hardware configuration for a given attr_type - */ -static int __hw_perf_counter_init(struct perf_counter *counter) -{ - struct perf_counter_attr *attr = &counter->attr; - struct hw_perf_counter *hwc = &counter->hw; - int err; - - if (!x86_pmu_initialized()) - return -ENODEV; - - err = 0; - if (!atomic_inc_not_zero(&active_counters)) { - mutex_lock(&pmc_reserve_mutex); - if (atomic_read(&active_counters) == 0 && !reserve_pmc_hardware()) - err = -EBUSY; - else - atomic_inc(&active_counters); - mutex_unlock(&pmc_reserve_mutex); - } - if (err) - return err; - - /* - * Generate PMC IRQs: - * (keep 'enabled' bit clear for now) - */ - hwc->config = ARCH_PERFMON_EVENTSEL_INT; - - /* - * Count user and OS events unless requested not to. - */ - if (!attr->exclude_user) - hwc->config |= ARCH_PERFMON_EVENTSEL_USR; - if (!attr->exclude_kernel) - hwc->config |= ARCH_PERFMON_EVENTSEL_OS; - - if (!hwc->sample_period) { - hwc->sample_period = x86_pmu.max_period; - hwc->last_period = hwc->sample_period; - atomic64_set(&hwc->period_left, hwc->sample_period); - } - - counter->destroy = hw_perf_counter_destroy; - - /* - * Raw event type provide the config in the event structure - */ - if (attr->type == PERF_TYPE_RAW) { - hwc->config |= x86_pmu.raw_event(attr->config); - return 0; - } - - if (attr->type == PERF_TYPE_HW_CACHE) - return set_ext_hw_attr(hwc, attr); - - if (attr->config >= x86_pmu.max_events) - return -EINVAL; - /* - * The generic map: - */ - hwc->config |= x86_pmu.event_map(attr->config); - - return 0; -} - -static void intel_pmu_disable_all(void) -{ - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); -} - -static void amd_pmu_disable_all(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - int idx; - - if (!cpuc->enabled) - return; - - cpuc->enabled = 0; - /* - * ensure we write the disable before we start disabling the - * counters proper, so that amd_pmu_enable_counter() does the - * right thing. - */ - barrier(); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - u64 val; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - rdmsrl(MSR_K7_EVNTSEL0 + idx, val); - if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) - continue; - val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsrl(MSR_K7_EVNTSEL0 + idx, val); - } -} - -void hw_perf_disable(void) -{ - if (!x86_pmu_initialized()) - return; - return x86_pmu.disable_all(); -} - -static void intel_pmu_enable_all(void) -{ - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); -} - -static void amd_pmu_enable_all(void) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - int idx; - - if (cpuc->enabled) - return; - - cpuc->enabled = 1; - barrier(); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - u64 val; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - rdmsrl(MSR_K7_EVNTSEL0 + idx, val); - if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) - continue; - val |= ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsrl(MSR_K7_EVNTSEL0 + idx, val); - } -} - -void hw_perf_enable(void) -{ - if (!x86_pmu_initialized()) - return; - x86_pmu.enable_all(); -} - -static inline u64 intel_pmu_get_status(void) -{ - u64 status; - - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); - - return status; -} - -static inline void intel_pmu_ack_status(u64 ack) -{ - wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); -} - -static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) -{ - int err; - err = checking_wrmsrl(hwc->config_base + idx, - hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); -} - -static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) -{ - int err; - err = checking_wrmsrl(hwc->config_base + idx, - hwc->config); -} - -static inline void -intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx) -{ - int idx = __idx - X86_PMC_IDX_FIXED; - u64 ctrl_val, mask; - int err; - - mask = 0xfULL << (idx * 4); - - rdmsrl(hwc->config_base, ctrl_val); - ctrl_val &= ~mask; - err = checking_wrmsrl(hwc->config_base, ctrl_val); -} - -static inline void -intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) -{ - if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { - intel_pmu_disable_fixed(hwc, idx); - return; - } - - x86_pmu_disable_counter(hwc, idx); -} - -static inline void -amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) -{ - x86_pmu_disable_counter(hwc, idx); -} - -static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]); - -/* - * Set the next IRQ period, based on the hwc->period_left value. - * To be called with the counter disabled in hw: - */ -static int -x86_perf_counter_set_period(struct perf_counter *counter, - struct hw_perf_counter *hwc, int idx) -{ - s64 left = atomic64_read(&hwc->period_left); - s64 period = hwc->sample_period; - int err, ret = 0; - - /* - * If we are way outside a reasoable range then just skip forward: - */ - if (unlikely(left <= -period)) { - left = period; - atomic64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - - if (unlikely(left <= 0)) { - left += period; - atomic64_set(&hwc->period_left, left); - hwc->last_period = period; - ret = 1; - } - /* - * Quirk: certain CPUs dont like it if just 1 event is left: - */ - if (unlikely(left < 2)) - left = 2; - - if (left > x86_pmu.max_period) - left = x86_pmu.max_period; - - per_cpu(prev_left[idx], smp_processor_id()) = left; - - /* - * The hw counter starts counting from this counter offset, - * mark it to be able to extra future deltas: - */ - atomic64_set(&hwc->prev_count, (u64)-left); - - err = checking_wrmsrl(hwc->counter_base + idx, - (u64)(-left) & x86_pmu.counter_mask); - - return ret; -} - -static inline void -intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx) -{ - int idx = __idx - X86_PMC_IDX_FIXED; - u64 ctrl_val, bits, mask; - int err; - - /* - * Enable IRQ generation (0x8), - * and enable ring-3 counting (0x2) and ring-0 counting (0x1) - * if requested: - */ - bits = 0x8ULL; - if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) - bits |= 0x2; - if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) - bits |= 0x1; - bits <<= (idx * 4); - mask = 0xfULL << (idx * 4); - - rdmsrl(hwc->config_base, ctrl_val); - ctrl_val &= ~mask; - ctrl_val |= bits; - err = checking_wrmsrl(hwc->config_base, ctrl_val); -} - -static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) -{ - if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { - intel_pmu_enable_fixed(hwc, idx); - return; - } - - x86_pmu_enable_counter(hwc, idx); -} - -static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - - if (cpuc->enabled) - x86_pmu_enable_counter(hwc, idx); - else - x86_pmu_disable_counter(hwc, idx); -} - -static int -fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) -{ - unsigned int event; - - if (!x86_pmu.num_counters_fixed) - return -1; - - event = hwc->config & ARCH_PERFMON_EVENT_MASK; - - if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) - return X86_PMC_IDX_FIXED_INSTRUCTIONS; - if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) - return X86_PMC_IDX_FIXED_CPU_CYCLES; - if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES))) - return X86_PMC_IDX_FIXED_BUS_CYCLES; - - return -1; -} - -/* - * Find a PMC slot for the freshly enabled / scheduled in counter: - */ -static int x86_pmu_enable(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - struct hw_perf_counter *hwc = &counter->hw; - int idx; - - idx = fixed_mode_idx(counter, hwc); - if (idx >= 0) { - /* - * Try to get the fixed counter, if that is already taken - * then try to get a generic counter: - */ - if (test_and_set_bit(idx, cpuc->used_mask)) - goto try_generic; - - hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; - /* - * We set it so that counter_base + idx in wrmsr/rdmsr maps to - * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: - */ - hwc->counter_base = - MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; - hwc->idx = idx; - } else { - idx = hwc->idx; - /* Try to get the previous generic counter again */ - if (test_and_set_bit(idx, cpuc->used_mask)) { -try_generic: - idx = find_first_zero_bit(cpuc->used_mask, - x86_pmu.num_counters); - if (idx == x86_pmu.num_counters) - return -EAGAIN; - - set_bit(idx, cpuc->used_mask); - hwc->idx = idx; - } - hwc->config_base = x86_pmu.eventsel; - hwc->counter_base = x86_pmu.perfctr; - } - - perf_counters_lapic_init(); - - x86_pmu.disable(hwc, idx); - - cpuc->counters[idx] = counter; - set_bit(idx, cpuc->active_mask); - - x86_perf_counter_set_period(counter, hwc, idx); - x86_pmu.enable(hwc, idx); - - return 0; -} - -static void x86_pmu_unthrottle(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - struct hw_perf_counter *hwc = &counter->hw; - - if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX || - cpuc->counters[hwc->idx] != counter)) - return; - - x86_pmu.enable(hwc, hwc->idx); -} - -void perf_counter_print_debug(void) -{ - u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; - struct cpu_hw_counters *cpuc; - unsigned long flags; - int cpu, idx; - - if (!x86_pmu.num_counters) - return; - - local_irq_save(flags); - - cpu = smp_processor_id(); - cpuc = &per_cpu(cpu_hw_counters, cpu); - - if (x86_pmu.version >= 2) { - rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); - rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); - rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); - - pr_info("\n"); - pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); - pr_info("CPU#%d: status: %016llx\n", cpu, status); - pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); - pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); - } - pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); - rdmsrl(x86_pmu.perfctr + idx, pmc_count); - - prev_left = per_cpu(prev_left[idx], cpu); - - pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", - cpu, idx, pmc_ctrl); - pr_info("CPU#%d: gen-PMC%d count: %016llx\n", - cpu, idx, pmc_count); - pr_info("CPU#%d: gen-PMC%d left: %016llx\n", - cpu, idx, prev_left); - } - for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { - rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); - - pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", - cpu, idx, pmc_count); - } - local_irq_restore(flags); -} - -static void x86_pmu_disable(struct perf_counter *counter) -{ - struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); - struct hw_perf_counter *hwc = &counter->hw; - int idx = hwc->idx; - - /* - * Must be done before we disable, otherwise the nmi handler - * could reenable again: - */ - clear_bit(idx, cpuc->active_mask); - x86_pmu.disable(hwc, idx); - - /* - * Make sure the cleared pointer becomes visible before we - * (potentially) free the counter: - */ - barrier(); - - /* - * Drain the remaining delta count out of a counter - * that we are disabling: - */ - x86_perf_counter_update(counter, hwc, idx); - cpuc->counters[idx] = NULL; - clear_bit(idx, cpuc->used_mask); -} - -/* - * Save and restart an expired counter. Called by NMI contexts, - * so it has to be careful about preempting normal counter ops: - */ -static int intel_pmu_save_and_restart(struct perf_counter *counter) -{ - struct hw_perf_counter *hwc = &counter->hw; - int idx = hwc->idx; - int ret; - - x86_perf_counter_update(counter, hwc, idx); - ret = x86_perf_counter_set_period(counter, hwc, idx); - - if (counter->state == PERF_COUNTER_STATE_ACTIVE) - intel_pmu_enable_counter(hwc, idx); - - return ret; -} - -static void intel_pmu_reset(void) -{ - unsigned long flags; - int idx; - - if (!x86_pmu.num_counters) - return; - - local_irq_save(flags); - - printk("clearing PMU state on CPU#%d\n", smp_processor_id()); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); - checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); - } - for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { - checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); - } - - local_irq_restore(flags); -} - - -/* - * This handler is triggered by the local APIC, so the APIC IRQ handling - * rules apply: - */ -static int intel_pmu_handle_irq(struct pt_regs *regs) -{ - struct perf_sample_data data; - struct cpu_hw_counters *cpuc; - int bit, cpu, loops; - u64 ack, status; - - data.regs = regs; - data.addr = 0; - - cpu = smp_processor_id(); - cpuc = &per_cpu(cpu_hw_counters, cpu); - - perf_disable(); - status = intel_pmu_get_status(); - if (!status) { - perf_enable(); - return 0; - } - - loops = 0; -again: - if (++loops > 100) { - WARN_ONCE(1, "perfcounters: irq loop stuck!\n"); - perf_counter_print_debug(); - intel_pmu_reset(); - perf_enable(); - return 1; - } - - inc_irq_stat(apic_perf_irqs); - ack = status; - for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { - struct perf_counter *counter = cpuc->counters[bit]; - - clear_bit(bit, (unsigned long *) &status); - if (!test_bit(bit, cpuc->active_mask)) - continue; - - if (!intel_pmu_save_and_restart(counter)) - continue; - - if (perf_counter_overflow(counter, 1, &data)) - intel_pmu_disable_counter(&counter->hw, bit); - } - - intel_pmu_ack_status(ack); - - /* - * Repeat if there is more work to be done: - */ - status = intel_pmu_get_status(); - if (status) - goto again; - - perf_enable(); - - return 1; -} - -static int amd_pmu_handle_irq(struct pt_regs *regs) -{ - struct perf_sample_data data; - struct cpu_hw_counters *cpuc; - struct perf_counter *counter; - struct hw_perf_counter *hwc; - int cpu, idx, handled = 0; - u64 val; - - data.regs = regs; - data.addr = 0; - - cpu = smp_processor_id(); - cpuc = &per_cpu(cpu_hw_counters, cpu); - - for (idx = 0; idx < x86_pmu.num_counters; idx++) { - if (!test_bit(idx, cpuc->active_mask)) - continue; - - counter = cpuc->counters[idx]; - hwc = &counter->hw; - - val = x86_perf_counter_update(counter, hwc, idx); - if (val & (1ULL << (x86_pmu.counter_bits - 1))) - continue; - - /* - * counter overflow - */ - handled = 1; - data.period = counter->hw.last_period; - - if (!x86_perf_counter_set_period(counter, hwc, idx)) - continue; - - if (perf_counter_overflow(counter, 1, &data)) - amd_pmu_disable_counter(hwc, idx); - } - - if (handled) - inc_irq_stat(apic_perf_irqs); - - return handled; -} - -void smp_perf_pending_interrupt(struct pt_regs *regs) -{ - irq_enter(); - ack_APIC_irq(); - inc_irq_stat(apic_pending_irqs); - perf_counter_do_pending(); - irq_exit(); -} - -void set_perf_counter_pending(void) -{ - apic->send_IPI_self(LOCAL_PENDING_VECTOR); -} - -void perf_counters_lapic_init(void) -{ - if (!x86_pmu_initialized()) - return; - - /* - * Always use NMI for PMU - */ - apic_write(APIC_LVTPC, APIC_DM_NMI); -} - -static int __kprobes -perf_counter_nmi_handler(struct notifier_block *self, - unsigned long cmd, void *__args) -{ - struct die_args *args = __args; - struct pt_regs *regs; - - if (!atomic_read(&active_counters)) - return NOTIFY_DONE; - - switch (cmd) { - case DIE_NMI: - case DIE_NMI_IPI: - break; - - default: - return NOTIFY_DONE; - } - - regs = args->regs; - - apic_write(APIC_LVTPC, APIC_DM_NMI); - /* - * Can't rely on the handled return value to say it was our NMI, two - * counters could trigger 'simultaneously' raising two back-to-back NMIs. - * - * If the first NMI handles both, the latter will be empty and daze - * the CPU. - */ - x86_pmu.handle_irq(regs); - - return NOTIFY_STOP; -} - -static __read_mostly struct notifier_block perf_counter_nmi_notifier = { - .notifier_call = perf_counter_nmi_handler, - .next = NULL, - .priority = 1 -}; - -static struct x86_pmu intel_pmu = { - .name = "Intel", - .handle_irq = intel_pmu_handle_irq, - .disable_all = intel_pmu_disable_all, - .enable_all = intel_pmu_enable_all, - .enable = intel_pmu_enable_counter, - .disable = intel_pmu_disable_counter, - .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, - .perfctr = MSR_ARCH_PERFMON_PERFCTR0, - .event_map = intel_pmu_event_map, - .raw_event = intel_pmu_raw_event, - .max_events = ARRAY_SIZE(intel_perfmon_event_map), - /* - * Intel PMCs cannot be accessed sanely above 32 bit width, - * so we install an artificial 1<<31 period regardless of - * the generic counter period: - */ - .max_period = (1ULL << 31) - 1, -}; - -static struct x86_pmu amd_pmu = { - .name = "AMD", - .handle_irq = amd_pmu_handle_irq, - .disable_all = amd_pmu_disable_all, - .enable_all = amd_pmu_enable_all, - .enable = amd_pmu_enable_counter, - .disable = amd_pmu_disable_counter, - .eventsel = MSR_K7_EVNTSEL0, - .perfctr = MSR_K7_PERFCTR0, - .event_map = amd_pmu_event_map, - .raw_event = amd_pmu_raw_event, - .max_events = ARRAY_SIZE(amd_perfmon_event_map), - .num_counters = 4, - .counter_bits = 48, - .counter_mask = (1ULL << 48) - 1, - /* use highest bit to detect overflow */ - .max_period = (1ULL << 47) - 1, -}; - -static int intel_pmu_init(void) -{ - union cpuid10_edx edx; - union cpuid10_eax eax; - unsigned int unused; - unsigned int ebx; - int version; - - if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) - return -ENODEV; - - /* - * Check whether the Architectural PerfMon supports - * Branch Misses Retired Event or not. - */ - cpuid(10, &eax.full, &ebx, &unused, &edx.full); - if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) - return -ENODEV; - - version = eax.split.version_id; - if (version < 2) - return -ENODEV; - - x86_pmu = intel_pmu; - x86_pmu.version = version; - x86_pmu.num_counters = eax.split.num_counters; - x86_pmu.counter_bits = eax.split.bit_width; - x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1; - - /* - * Quirk: v2 perfmon does not report fixed-purpose counters, so - * assume at least 3 counters: - */ - x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); - - rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); - - /* - * Install the hw-cache-events table: - */ - switch (boot_cpu_data.x86_model) { - case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ - case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ - case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ - case 29: /* six-core 45 nm xeon "Dunnington" */ - memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - - pr_cont("Core2 events, "); - break; - default: - case 26: - memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - - pr_cont("Nehalem/Corei7 events, "); - break; - case 28: - memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - - pr_cont("Atom events, "); - break; - } - return 0; -} - -static int amd_pmu_init(void) -{ - x86_pmu = amd_pmu; - - switch (boot_cpu_data.x86) { - case 0x0f: - case 0x10: - case 0x11: - memcpy(hw_cache_event_ids, amd_0f_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - - pr_cont("AMD Family 0f/10/11 events, "); - break; - } - return 0; -} - -void __init init_hw_perf_counters(void) -{ - int err; - - pr_info("Performance Counters: "); - - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - err = intel_pmu_init(); - break; - case X86_VENDOR_AMD: - err = amd_pmu_init(); - break; - default: - return; - } - if (err != 0) { - pr_cont("no PMU driver, software counters only.\n"); - return; - } - - pr_cont("%s PMU driver.\n", x86_pmu.name); - - if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { - x86_pmu.num_counters = X86_PMC_MAX_GENERIC; - WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", - x86_pmu.num_counters, X86_PMC_MAX_GENERIC); - } - perf_counter_mask = (1 << x86_pmu.num_counters) - 1; - perf_max_counters = x86_pmu.num_counters; - - if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { - x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; - WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", - x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); - } - - perf_counter_mask |= - ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; - - perf_counters_lapic_init(); - register_die_notifier(&perf_counter_nmi_notifier); - - pr_info("... version: %d\n", x86_pmu.version); - pr_info("... bit width: %d\n", x86_pmu.counter_bits); - pr_info("... generic counters: %d\n", x86_pmu.num_counters); - pr_info("... value mask: %016Lx\n", x86_pmu.counter_mask); - pr_info("... max period: %016Lx\n", x86_pmu.max_period); - pr_info("... fixed-purpose counters: %d\n", x86_pmu.num_counters_fixed); - pr_info("... counter mask: %016Lx\n", perf_counter_mask); -} - -static inline void x86_pmu_read(struct perf_counter *counter) -{ - x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); -} - -static const struct pmu pmu = { - .enable = x86_pmu_enable, - .disable = x86_pmu_disable, - .read = x86_pmu_read, - .unthrottle = x86_pmu_unthrottle, -}; - -const struct pmu *hw_perf_counter_init(struct perf_counter *counter) -{ - int err; - - err = __hw_perf_counter_init(counter); - if (err) - return ERR_PTR(err); - - return &pmu; -} - -/* - * callchain support - */ - -static inline -void callchain_store(struct perf_callchain_entry *entry, unsigned long ip) -{ - if (entry->nr < MAX_STACK_DEPTH) - entry->ip[entry->nr++] = ip; -} - -static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry); -static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry); - - -static void -backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) -{ - /* Ignore warnings */ -} - -static void backtrace_warning(void *data, char *msg) -{ - /* Ignore warnings */ -} - -static int backtrace_stack(void *data, char *name) -{ - /* Don't bother with IRQ stacks for now */ - return -1; -} - -static void backtrace_address(void *data, unsigned long addr, int reliable) -{ - struct perf_callchain_entry *entry = data; - - if (reliable) - callchain_store(entry, addr); -} - -static const struct stacktrace_ops backtrace_ops = { - .warning = backtrace_warning, - .warning_symbol = backtrace_warning_symbol, - .stack = backtrace_stack, - .address = backtrace_address, -}; - -static void -perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - unsigned long bp; - char *stack; - int nr = entry->nr; - - callchain_store(entry, instruction_pointer(regs)); - - stack = ((char *)regs + sizeof(struct pt_regs)); -#ifdef CONFIG_FRAME_POINTER - bp = frame_pointer(regs); -#else - bp = 0; -#endif - - dump_trace(NULL, regs, (void *)stack, bp, &backtrace_ops, entry); - - entry->kernel = entry->nr - nr; -} - - -struct stack_frame { - const void __user *next_fp; - unsigned long return_address; -}; - -static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) -{ - int ret; - - if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) - return 0; - - ret = 1; - pagefault_disable(); - if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) - ret = 0; - pagefault_enable(); - - return ret; -} - -static void -perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - struct stack_frame frame; - const void __user *fp; - int nr = entry->nr; - - regs = (struct pt_regs *)current->thread.sp0 - 1; - fp = (void __user *)regs->bp; - - callchain_store(entry, regs->ip); - - while (entry->nr < MAX_STACK_DEPTH) { - frame.next_fp = NULL; - frame.return_address = 0; - - if (!copy_stack_frame(fp, &frame)) - break; - - if ((unsigned long)fp < user_stack_pointer(regs)) - break; - - callchain_store(entry, frame.return_address); - fp = frame.next_fp; - } - - entry->user = entry->nr - nr; -} - -static void -perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) -{ - int is_user; - - if (!regs) - return; - - is_user = user_mode(regs); - - if (!current || current->pid == 0) - return; - - if (is_user && current->state != TASK_RUNNING) - return; - - if (!is_user) - perf_callchain_kernel(regs, entry); - - if (current->mm) - perf_callchain_user(regs, entry); -} - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) -{ - struct perf_callchain_entry *entry; - - if (in_nmi()) - entry = &__get_cpu_var(nmi_entry); - else - entry = &__get_cpu_var(irq_entry); - - entry->nr = 0; - entry->hv = 0; - entry->kernel = 0; - entry->user = 0; - - perf_do_callchain(regs, entry); - - return entry; -} diff --git a/trunk/arch/x86/kernel/cpu/perfctr-watchdog.c b/trunk/arch/x86/kernel/cpu/perfctr-watchdog.c index d6f5b9fbde32..f6c70a164e32 100644 --- a/trunk/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/trunk/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -19,8 +19,8 @@ #include #include -#include -#include +#include +#include struct nmi_watchdog_ctlblk { unsigned int cccr_msr; diff --git a/trunk/arch/x86/kernel/entry_64.S b/trunk/arch/x86/kernel/entry_64.S index a4742a340d8d..1c17d7c751a4 100644 --- a/trunk/arch/x86/kernel/entry_64.S +++ b/trunk/arch/x86/kernel/entry_64.S @@ -1012,11 +1012,6 @@ apicinterrupt ERROR_APIC_VECTOR \ apicinterrupt SPURIOUS_APIC_VECTOR \ spurious_interrupt smp_spurious_interrupt -#ifdef CONFIG_PERF_COUNTERS -apicinterrupt LOCAL_PENDING_VECTOR \ - perf_pending_interrupt smp_perf_pending_interrupt -#endif - /* * Exception entry points. */ diff --git a/trunk/arch/x86/kernel/irq.c b/trunk/arch/x86/kernel/irq.c index 38287b5f116e..9a391bbb8ba8 100644 --- a/trunk/arch/x86/kernel/irq.c +++ b/trunk/arch/x86/kernel/irq.c @@ -62,14 +62,6 @@ static int show_other_interrupts(struct seq_file *p, int prec) for_each_online_cpu(j) seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); seq_printf(p, " Spurious interrupts\n"); - seq_printf(p, "%*s: ", prec, "CNT"); - for_each_online_cpu(j) - seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); - seq_printf(p, " Performance counter interrupts\n"); - seq_printf(p, "%*s: ", prec, "PND"); - for_each_online_cpu(j) - seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); - seq_printf(p, " Performance pending work\n"); #endif if (generic_interrupt_extension) { seq_printf(p, "%*s: ", prec, "PLT"); @@ -173,8 +165,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu) #ifdef CONFIG_X86_LOCAL_APIC sum += irq_stats(cpu)->apic_timer_irqs; sum += irq_stats(cpu)->irq_spurious_count; - sum += irq_stats(cpu)->apic_perf_irqs; - sum += irq_stats(cpu)->apic_pending_irqs; #endif if (generic_interrupt_extension) sum += irq_stats(cpu)->generic_irqs; diff --git a/trunk/arch/x86/kernel/irqinit.c b/trunk/arch/x86/kernel/irqinit.c index 267c6624c77f..2e08b10ad51a 100644 --- a/trunk/arch/x86/kernel/irqinit.c +++ b/trunk/arch/x86/kernel/irqinit.c @@ -181,15 +181,10 @@ static void __init apic_intr_init(void) { smp_intr_init(); -#ifdef CONFIG_X86_THERMAL_VECTOR +#ifdef CONFIG_X86_64 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); -#endif -#ifdef CONFIG_X86_THRESHOLD alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); #endif -#if defined(CONFIG_X86_NEW_MCE) && defined(CONFIG_X86_LOCAL_APIC) - alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt); -#endif #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) /* self generated IPI for local APIC timer */ @@ -204,9 +199,17 @@ static void __init apic_intr_init(void) /* Performance monitoring interrupts: */ # ifdef CONFIG_PERF_COUNTERS + alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt); alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); # endif +#endif + +#ifdef CONFIG_X86_32 +#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) + /* thermal monitor LVT interrupt */ + alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); +#endif #endif } diff --git a/trunk/arch/x86/kernel/signal.c b/trunk/arch/x86/kernel/signal.c index 0a813b17b172..14425166b8e3 100644 --- a/trunk/arch/x86/kernel/signal.c +++ b/trunk/arch/x86/kernel/signal.c @@ -6,6 +6,7 @@ * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes * 2000-2002 x86-64 support by Andi Kleen */ + #include #include #include diff --git a/trunk/arch/x86/kernel/syscall_table_32.S b/trunk/arch/x86/kernel/syscall_table_32.S index d51321ddafda..734f92c02dde 100644 --- a/trunk/arch/x86/kernel/syscall_table_32.S +++ b/trunk/arch/x86/kernel/syscall_table_32.S @@ -335,4 +335,3 @@ ENTRY(sys_call_table) .long sys_preadv .long sys_pwritev .long sys_rt_tgsigqueueinfo /* 335 */ - .long sys_perf_counter_open diff --git a/trunk/arch/x86/kernel/traps.c b/trunk/arch/x86/kernel/traps.c index 07d60c870ce2..ede024531f8f 100644 --- a/trunk/arch/x86/kernel/traps.c +++ b/trunk/arch/x86/kernel/traps.c @@ -942,13 +942,8 @@ void __init trap_init(void) #endif set_intr_gate(19, &simd_coprocessor_error); - /* Reserve all the builtin and the syscall vector: */ - for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) - set_bit(i, used_vectors); - #ifdef CONFIG_IA32_EMULATION set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); - set_bit(IA32_SYSCALL_VECTOR, used_vectors); #endif #ifdef CONFIG_X86_32 @@ -965,9 +960,14 @@ void __init trap_init(void) } set_system_trap_gate(SYSCALL_VECTOR, &system_call); - set_bit(SYSCALL_VECTOR, used_vectors); #endif + /* Reserve all the builtin and the syscall vector: */ + for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) + set_bit(i, used_vectors); + + set_bit(IA32_SYSCALL_VECTOR, used_vectors); + /* * Should be a barrier for any external CPU state: */ diff --git a/trunk/arch/x86/mm/fault.c b/trunk/arch/x86/mm/fault.c index c6acc6326374..5ec7ae366615 100644 --- a/trunk/arch/x86/mm/fault.c +++ b/trunk/arch/x86/mm/fault.c @@ -10,7 +10,6 @@ #include /* max_low_pfn */ #include /* __kprobes, ... */ #include /* kmmio_handler, ... */ -#include /* perf_swcounter_event */ #include /* dotraplinkage, ... */ #include /* pgd_*(), ... */ @@ -1014,8 +1013,6 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) if (unlikely(error_code & PF_RSVD)) pgtable_bad(regs, error_code, address); - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); - /* * If we're in an interrupt, have no user context or are running * in an atomic region then we must not take the fault: @@ -1109,15 +1106,10 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) return; } - if (fault & VM_FAULT_MAJOR) { + if (fault & VM_FAULT_MAJOR) tsk->maj_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, - regs, address); - } else { + else tsk->min_flt++; - perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, - regs, address); - } check_v8086_mode(regs, address, tsk); diff --git a/trunk/arch/x86/mm/memtest.c b/trunk/arch/x86/mm/memtest.c index 18d244f70205..c0bedcd10f97 100644 --- a/trunk/arch/x86/mm/memtest.c +++ b/trunk/arch/x86/mm/memtest.c @@ -40,20 +40,21 @@ static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad) static void __init memtest(u64 pattern, u64 start_phys, u64 size) { - u64 *p, *start, *end; + u64 *p; + void *start, *end; u64 start_bad, last_bad; u64 start_phys_aligned; - const size_t incr = sizeof(pattern); + size_t incr; + incr = sizeof(pattern); start_phys_aligned = ALIGN(start_phys, incr); start = __va(start_phys_aligned); - end = start + (size - (start_phys_aligned - start_phys)) / incr; + end = start + size - (start_phys_aligned - start_phys); start_bad = 0; last_bad = 0; for (p = start; p < end; p++) *p = pattern; - for (p = start; p < end; p++, start_phys_aligned += incr) { if (*p == pattern) continue; diff --git a/trunk/arch/x86/oprofile/nmi_int.c b/trunk/arch/x86/oprofile/nmi_int.c index b07dd8d0b321..3b285e656e27 100644 --- a/trunk/arch/x86/oprofile/nmi_int.c +++ b/trunk/arch/x86/oprofile/nmi_int.c @@ -40,9 +40,8 @@ static int profile_exceptions_notify(struct notifier_block *self, switch (val) { case DIE_NMI: - case DIE_NMI_IPI: - model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)); - ret = NOTIFY_STOP; + if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu))) + ret = NOTIFY_STOP; break; default: break; @@ -135,7 +134,7 @@ static void nmi_cpu_setup(void *dummy) static struct notifier_block profile_exceptions_nb = { .notifier_call = profile_exceptions_notify, .next = NULL, - .priority = 2 + .priority = 0 }; static int nmi_setup(void) diff --git a/trunk/arch/x86/oprofile/op_model_ppro.c b/trunk/arch/x86/oprofile/op_model_ppro.c index 4da7230b3d17..10131fbdaada 100644 --- a/trunk/arch/x86/oprofile/op_model_ppro.c +++ b/trunk/arch/x86/oprofile/op_model_ppro.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include "op_x86_model.h" #include "op_counter.h" @@ -136,13 +136,6 @@ static int ppro_check_ctrs(struct pt_regs * const regs, u64 val; int i; - /* - * This can happen if perf counters are in use when - * we steal the die notifier NMI. - */ - if (unlikely(!reset_value)) - goto out; - for (i = 0 ; i < num_counters; ++i) { if (!reset_value[i]) continue; @@ -153,7 +146,6 @@ static int ppro_check_ctrs(struct pt_regs * const regs, } } -out: /* Only P6 based Pentium M need to re-unmask the apic vector but it * doesn't hurt other P6 variant */ apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); diff --git a/trunk/arch/x86/vdso/vdso32-setup.c b/trunk/arch/x86/vdso/vdso32-setup.c index 58bc00f68b12..1241f118ab56 100644 --- a/trunk/arch/x86/vdso/vdso32-setup.c +++ b/trunk/arch/x86/vdso/vdso32-setup.c @@ -338,8 +338,6 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) } } - current->mm->context.vdso = (void *)addr; - if (compat_uses_vma || !compat) { /* * MAYWRITE to allow gdb to COW and set breakpoints @@ -360,13 +358,11 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) goto up_fail; } + current->mm->context.vdso = (void *)addr; current_thread_info()->sysenter_return = VDSO32_SYMBOL(addr, SYSENTER_RETURN); up_fail: - if (ret) - current->mm->context.vdso = NULL; - up_write(&mm->mmap_sem); return ret; diff --git a/trunk/arch/x86/vdso/vma.c b/trunk/arch/x86/vdso/vma.c index 21e1aeb9f3ea..cac083386e03 100644 --- a/trunk/arch/x86/vdso/vma.c +++ b/trunk/arch/x86/vdso/vma.c @@ -116,18 +116,15 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) goto up_fail; } - current->mm->context.vdso = (void *)addr; - ret = install_special_mapping(mm, addr, vdso_size, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| VM_ALWAYSDUMP, vdso_pages); - if (ret) { - current->mm->context.vdso = NULL; + if (ret) goto up_fail; - } + current->mm->context.vdso = (void *)addr; up_fail: up_write(&mm->mmap_sem); return ret; diff --git a/trunk/drivers/ata/ahci.c b/trunk/drivers/ata/ahci.c index 15a23031833f..6b91c26a4635 100644 --- a/trunk/drivers/ata/ahci.c +++ b/trunk/drivers/ata/ahci.c @@ -77,6 +77,8 @@ static ssize_t ahci_led_store(struct ata_port *ap, const char *buf, size_t size); static ssize_t ahci_transmit_led_message(struct ata_port *ap, u32 state, ssize_t size); +#define MAX_SLOTS 8 +#define MAX_RETRY 15 enum { AHCI_PCI_BAR = 5, @@ -229,10 +231,6 @@ enum { ICH_MAP = 0x90, /* ICH MAP register */ - /* em constants */ - EM_MAX_SLOTS = 8, - EM_MAX_RETRY = 5, - /* em_ctl bits */ EM_CTL_RST = (1 << 9), /* Reset */ EM_CTL_TM = (1 << 8), /* Transmit Message */ @@ -284,8 +282,8 @@ struct ahci_port_priv { unsigned int ncq_saw_dmas:1; unsigned int ncq_saw_sdb:1; u32 intr_mask; /* interrupts to enable */ - /* enclosure management info per PM slot */ - struct ahci_em_priv em_priv[EM_MAX_SLOTS]; + struct ahci_em_priv em_priv[MAX_SLOTS];/* enclosure management info + * per PM slot */ }; static int ahci_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val); @@ -315,6 +313,7 @@ static void ahci_error_handler(struct ata_port *ap); static void ahci_post_internal_cmd(struct ata_queued_cmd *qc); static int ahci_port_resume(struct ata_port *ap); static void ahci_dev_config(struct ata_device *dev); +static unsigned int ahci_fill_sg(struct ata_queued_cmd *qc, void *cmd_tbl); static void ahci_fill_cmd_slot(struct ahci_port_priv *pp, unsigned int tag, u32 opts); #ifdef CONFIG_PM @@ -405,14 +404,14 @@ static struct ata_port_operations ahci_sb600_ops = { #define AHCI_HFLAGS(flags) .private_data = (void *)(flags) static const struct ata_port_info ahci_port_info[] = { - [board_ahci] = + /* board_ahci */ { .flags = AHCI_FLAG_COMMON, .pio_mask = ATA_PIO4, .udma_mask = ATA_UDMA6, .port_ops = &ahci_ops, }, - [board_ahci_vt8251] = + /* board_ahci_vt8251 */ { AHCI_HFLAGS (AHCI_HFLAG_NO_NCQ | AHCI_HFLAG_NO_PMP), .flags = AHCI_FLAG_COMMON, @@ -420,7 +419,7 @@ static const struct ata_port_info ahci_port_info[] = { .udma_mask = ATA_UDMA6, .port_ops = &ahci_vt8251_ops, }, - [board_ahci_ign_iferr] = + /* board_ahci_ign_iferr */ { AHCI_HFLAGS (AHCI_HFLAG_IGN_IRQ_IF_ERR), .flags = AHCI_FLAG_COMMON, @@ -428,16 +427,17 @@ static const struct ata_port_info ahci_port_info[] = { .udma_mask = ATA_UDMA6, .port_ops = &ahci_ops, }, - [board_ahci_sb600] = + /* board_ahci_sb600 */ { AHCI_HFLAGS (AHCI_HFLAG_IGN_SERR_INTERNAL | - AHCI_HFLAG_NO_MSI | AHCI_HFLAG_SECT255), + AHCI_HFLAG_32BIT_ONLY | AHCI_HFLAG_NO_MSI | + AHCI_HFLAG_SECT255), .flags = AHCI_FLAG_COMMON, .pio_mask = ATA_PIO4, .udma_mask = ATA_UDMA6, .port_ops = &ahci_sb600_ops, }, - [board_ahci_mv] = + /* board_ahci_mv */ { AHCI_HFLAGS (AHCI_HFLAG_NO_NCQ | AHCI_HFLAG_NO_MSI | AHCI_HFLAG_MV_PATA | AHCI_HFLAG_NO_PMP), @@ -447,7 +447,7 @@ static const struct ata_port_info ahci_port_info[] = { .udma_mask = ATA_UDMA6, .port_ops = &ahci_ops, }, - [board_ahci_sb700] = /* for SB700 and SB800 */ + /* board_ahci_sb700, for SB700 and SB800 */ { AHCI_HFLAGS (AHCI_HFLAG_IGN_SERR_INTERNAL), .flags = AHCI_FLAG_COMMON, @@ -455,7 +455,7 @@ static const struct ata_port_info ahci_port_info[] = { .udma_mask = ATA_UDMA6, .port_ops = &ahci_sb600_ops, }, - [board_ahci_mcp65] = + /* board_ahci_mcp65 */ { AHCI_HFLAGS (AHCI_HFLAG_YES_NCQ), .flags = AHCI_FLAG_COMMON, @@ -463,7 +463,7 @@ static const struct ata_port_info ahci_port_info[] = { .udma_mask = ATA_UDMA6, .port_ops = &ahci_ops, }, - [board_ahci_nopmp] = + /* board_ahci_nopmp */ { AHCI_HFLAGS (AHCI_HFLAG_NO_PMP), .flags = AHCI_FLAG_COMMON, @@ -1141,12 +1141,12 @@ static void ahci_start_port(struct ata_port *ap) emp = &pp->em_priv[link->pmp]; /* EM Transmit bit maybe busy during init */ - for (i = 0; i < EM_MAX_RETRY; i++) { + for (i = 0; i < MAX_RETRY; i++) { rc = ahci_transmit_led_message(ap, emp->led_state, 4); if (rc == -EBUSY) - msleep(1); + udelay(100); else break; } @@ -1340,7 +1340,7 @@ static ssize_t ahci_transmit_led_message(struct ata_port *ap, u32 state, /* get the slot number from the message */ pmp = (state & EM_MSG_LED_PMP_SLOT) >> 8; - if (pmp < EM_MAX_SLOTS) + if (pmp < MAX_SLOTS) emp = &pp->em_priv[pmp]; else return -EINVAL; @@ -1408,7 +1408,7 @@ static ssize_t ahci_led_store(struct ata_port *ap, const char *buf, /* get the slot number from the message */ pmp = (state & EM_MSG_LED_PMP_SLOT) >> 8; - if (pmp < EM_MAX_SLOTS) + if (pmp < MAX_SLOTS) emp = &pp->em_priv[pmp]; else return -EINVAL; @@ -2584,51 +2584,6 @@ static void ahci_p5wdh_workaround(struct ata_host *host) } } -/* - * SB600 ahci controller on ASUS M2A-VM can't do 64bit DMA with older - * BIOS. The oldest version known to be broken is 0901 and working is - * 1501 which was released on 2007-10-26. Force 32bit DMA on anything - * older than 1501. Please read bko#9412 for more info. - */ -static bool ahci_asus_m2a_vm_32bit_only(struct pci_dev *pdev) -{ - static const struct dmi_system_id sysids[] = { - { - .ident = "ASUS M2A-VM", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, - "ASUSTeK Computer INC."), - DMI_MATCH(DMI_BOARD_NAME, "M2A-VM"), - }, - }, - { } - }; - const char *cutoff_mmdd = "10/26"; - const char *date; - int year; - - if (pdev->bus->number != 0 || pdev->devfn != PCI_DEVFN(0x12, 0) || - !dmi_check_system(sysids)) - return false; - - /* - * Argh.... both version and date are free form strings. - * Let's hope they're using the same date format across - * different versions. - */ - date = dmi_get_system_info(DMI_BIOS_DATE); - year = dmi_get_year(DMI_BIOS_DATE); - if (date && strlen(date) >= 10 && date[2] == '/' && date[5] == '/' && - (year > 2007 || - (year == 2007 && strncmp(date, cutoff_mmdd, 5) >= 0))) - return false; - - dev_printk(KERN_WARNING, &pdev->dev, "ASUS M2A-VM: BIOS too old, " - "forcing 32bit DMA, update BIOS\n"); - - return true; -} - static bool ahci_broken_system_poweroff(struct pci_dev *pdev) { static const struct dmi_system_id broken_systems[] = { @@ -2789,10 +2744,6 @@ static int ahci_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (board_id == board_ahci_sb700 && pdev->revision >= 0x40) hpriv->flags &= ~AHCI_HFLAG_IGN_SERR_INTERNAL; - /* apply ASUS M2A_VM quirk */ - if (ahci_asus_m2a_vm_32bit_only(pdev)) - hpriv->flags |= AHCI_HFLAG_32BIT_ONLY; - if (!(hpriv->flags & AHCI_HFLAG_NO_MSI)) pci_enable_msi(pdev); diff --git a/trunk/drivers/ata/ata_piix.c b/trunk/drivers/ata/ata_piix.c index d0a14cf2bd74..1aeb7082b0c4 100644 --- a/trunk/drivers/ata/ata_piix.c +++ b/trunk/drivers/ata/ata_piix.c @@ -223,8 +223,10 @@ static const struct pci_device_id piix_pci_tbl[] = { /* ICH8 Mobile PATA Controller */ { 0x8086, 0x2850, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich_pata_100 }, - /* SATA ports */ - + /* NOTE: The following PCI ids must be kept in sync with the + * list in drivers/pci/quirks.c. + */ + /* 82801EB (ICH5) */ { 0x8086, 0x24d1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich5_sata }, /* 82801EB (ICH5) */ @@ -1507,8 +1509,8 @@ static int __devinit piix_init_one(struct pci_dev *pdev, dev_printk(KERN_DEBUG, &pdev->dev, "version " DRV_VERSION "\n"); - /* no hotplugging support for later devices (FIXME) */ - if (!in_module_init && ent->driver_data >= ich5_sata) + /* no hotplugging support (FIXME) */ + if (!in_module_init) return -ENODEV; if (piix_broken_system_poweroff(pdev)) { @@ -1589,7 +1591,6 @@ static int __devinit piix_init_one(struct pci_dev *pdev, host->ports[1]->mwdma_mask = 0; host->ports[1]->udma_mask = 0; } - host->flags |= ATA_HOST_PARALLEL_SCAN; pci_set_master(pdev); return ata_pci_sff_activate_host(host, ata_sff_interrupt, &piix_sht); diff --git a/trunk/drivers/ata/libata-core.c b/trunk/drivers/ata/libata-core.c index ca4d208ddf3b..c9242301cfa1 100644 --- a/trunk/drivers/ata/libata-core.c +++ b/trunk/drivers/ata/libata-core.c @@ -5031,6 +5031,7 @@ int ata_qc_complete_multiple(struct ata_port *ap, u32 qc_active) { int nr_done = 0; u32 done_mask; + int i; done_mask = ap->qc_active ^ qc_active; @@ -5040,16 +5041,16 @@ int ata_qc_complete_multiple(struct ata_port *ap, u32 qc_active) return -EINVAL; } - while (done_mask) { + for (i = 0; i < ATA_MAX_QUEUE; i++) { struct ata_queued_cmd *qc; - unsigned int tag = __ffs(done_mask); - qc = ata_qc_from_tag(ap, tag); - if (qc) { + if (!(done_mask & (1 << i))) + continue; + + if ((qc = ata_qc_from_tag(ap, i))) { ata_qc_complete(qc); nr_done++; } - done_mask &= ~(1 << tag); } return nr_done; diff --git a/trunk/drivers/ata/libata-sff.c b/trunk/drivers/ata/libata-sff.c index bbbb1fab1755..bb18415d3d63 100644 --- a/trunk/drivers/ata/libata-sff.c +++ b/trunk/drivers/ata/libata-sff.c @@ -727,23 +727,17 @@ unsigned int ata_sff_data_xfer(struct ata_device *dev, unsigned char *buf, else iowrite16_rep(data_addr, buf, words); - /* Transfer trailing byte, if any. */ + /* Transfer trailing 1 byte, if any. */ if (unlikely(buflen & 0x01)) { - unsigned char pad[2]; + __le16 align_buf[1] = { 0 }; + unsigned char *trailing_buf = buf + buflen - 1; - /* Point buf to the tail of buffer */ - buf += buflen - 1; - - /* - * Use io*16_rep() accessors here as well to avoid pointlessly - * swapping bytes to and fro on the big endian machines... - */ if (rw == READ) { - ioread16_rep(data_addr, pad, 1); - *buf = pad[0]; + align_buf[0] = cpu_to_le16(ioread16(data_addr)); + memcpy(trailing_buf, align_buf, 1); } else { - pad[0] = *buf; - iowrite16_rep(data_addr, pad, 1); + memcpy(align_buf, trailing_buf, 1); + iowrite16(le16_to_cpu(align_buf[0]), data_addr); } words++; } diff --git a/trunk/drivers/ata/sata_nv.c b/trunk/drivers/ata/sata_nv.c index b2d11f300c39..6cda12ba8122 100644 --- a/trunk/drivers/ata/sata_nv.c +++ b/trunk/drivers/ata/sata_nv.c @@ -305,8 +305,8 @@ static irqreturn_t nv_ck804_interrupt(int irq, void *dev_instance); static int nv_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val); static int nv_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val); -static int nv_hardreset(struct ata_link *link, unsigned int *class, - unsigned long deadline); +static int nv_noclassify_hardreset(struct ata_link *link, unsigned int *class, + unsigned long deadline); static void nv_nf2_freeze(struct ata_port *ap); static void nv_nf2_thaw(struct ata_port *ap); static void nv_ck804_freeze(struct ata_port *ap); @@ -406,82 +406,49 @@ static struct scsi_host_template nv_swncq_sht = { .slave_configure = nv_swncq_slave_config, }; -/* - * NV SATA controllers have various different problems with hardreset - * protocol depending on the specific controller and device. - * - * GENERIC: - * - * bko11195 reports that link doesn't come online after hardreset on - * generic nv's and there have been several other similar reports on - * linux-ide. - * - * bko12351#c23 reports that warmplug on MCP61 doesn't work with - * softreset. - * - * NF2/3: - * - * bko3352 reports nf2/3 controllers can't determine device signature - * reliably after hardreset. The following thread reports detection - * failure on cold boot with the standard debouncing timing. - * - * http://thread.gmane.org/gmane.linux.ide/34098 - * - * bko12176 reports that hardreset fails to bring up the link during - * boot on nf2. - * - * CK804: - * - * For initial probing after boot and hot plugging, hardreset mostly - * works fine on CK804 but curiously, reprobing on the initial port - * by rescanning or rmmod/insmod fails to acquire the initial D2H Reg - * FIS in somewhat undeterministic way. - * - * SWNCQ: - * - * bko12351 reports that when SWNCQ is enabled, for hotplug to work, - * hardreset should be used and hardreset can't report proper - * signature, which suggests that mcp5x is closer to nf2 as long as - * reset quirkiness is concerned. - * - * bko12703 reports that boot probing fails for intel SSD with - * hardreset. Link fails to come online. Softreset works fine. - * - * The failures are varied but the following patterns seem true for - * all flavors. - * - * - Softreset during boot always works. - * - * - Hardreset during boot sometimes fails to bring up the link on - * certain comibnations and device signature acquisition is - * unreliable. - * - * - Hardreset is often necessary after hotplug. - * - * So, preferring softreset for boot probing and error handling (as - * hardreset might bring down the link) but using hardreset for - * post-boot probing should work around the above issues in most - * cases. Define nv_hardreset() which only kicks in for post-boot - * probing and use it for all variants. - */ -static struct ata_port_operations nv_generic_ops = { +static struct ata_port_operations nv_common_ops = { .inherits = &ata_bmdma_port_ops, .lost_interrupt = ATA_OP_NULL, .scr_read = nv_scr_read, .scr_write = nv_scr_write, - .hardreset = nv_hardreset, }; +/* OSDL bz11195 reports that link doesn't come online after hardreset + * on generic nv's and there have been several other similar reports + * on linux-ide. Disable hardreset for generic nv's. + */ +static struct ata_port_operations nv_generic_ops = { + .inherits = &nv_common_ops, + .hardreset = ATA_OP_NULL, +}; + +/* nf2 is ripe with hardreset related problems. + * + * kernel bz#3352 reports nf2/3 controllers can't determine device + * signature reliably. The following thread reports detection failure + * on cold boot with the standard debouncing timing. + * + * http://thread.gmane.org/gmane.linux.ide/34098 + * + * And bz#12176 reports that hardreset simply doesn't work on nf2. + * Give up on it and just don't do hardreset. + */ static struct ata_port_operations nv_nf2_ops = { .inherits = &nv_generic_ops, .freeze = nv_nf2_freeze, .thaw = nv_nf2_thaw, }; +/* For initial probing after boot and hot plugging, hardreset mostly + * works fine on CK804 but curiously, reprobing on the initial port by + * rescanning or rmmod/insmod fails to acquire the initial D2H Reg FIS + * in somewhat undeterministic way. Use noclassify hardreset. + */ static struct ata_port_operations nv_ck804_ops = { - .inherits = &nv_generic_ops, + .inherits = &nv_common_ops, .freeze = nv_ck804_freeze, .thaw = nv_ck804_thaw, + .hardreset = nv_noclassify_hardreset, .host_stop = nv_ck804_host_stop, }; @@ -509,8 +476,19 @@ static struct ata_port_operations nv_adma_ops = { .host_stop = nv_adma_host_stop, }; +/* Kernel bz#12351 reports that when SWNCQ is enabled, for hotplug to + * work, hardreset should be used and hardreset can't report proper + * signature, which suggests that mcp5x is closer to nf2 as long as + * reset quirkiness is concerned. Define separate ops for mcp5x with + * nv_noclassify_hardreset(). + */ +static struct ata_port_operations nv_mcp5x_ops = { + .inherits = &nv_common_ops, + .hardreset = nv_noclassify_hardreset, +}; + static struct ata_port_operations nv_swncq_ops = { - .inherits = &nv_generic_ops, + .inherits = &nv_mcp5x_ops, .qc_defer = ata_std_qc_defer, .qc_prep = nv_swncq_qc_prep, @@ -579,7 +557,7 @@ static const struct ata_port_info nv_port_info[] = { .pio_mask = NV_PIO_MASK, .mwdma_mask = NV_MWDMA_MASK, .udma_mask = NV_UDMA_MASK, - .port_ops = &nv_generic_ops, + .port_ops = &nv_mcp5x_ops, .private_data = NV_PI_PRIV(nv_generic_interrupt, &nv_sht), }, /* SWNCQ */ @@ -1581,24 +1559,15 @@ static int nv_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val) return 0; } -static int nv_hardreset(struct ata_link *link, unsigned int *class, - unsigned long deadline) +static int nv_noclassify_hardreset(struct ata_link *link, unsigned int *class, + unsigned long deadline) { - struct ata_eh_context *ehc = &link->eh_context; + bool online; + int rc; - /* Do hardreset iff it's post-boot probing, please read the - * comment above port ops for details. - */ - if (!(link->ap->pflags & ATA_PFLAG_LOADING) && - !ata_dev_enabled(link->device)) - sata_link_hardreset(link, sata_deb_timing_hotplug, deadline, - NULL, NULL); - else if (!(ehc->i.flags & ATA_EHI_QUIET)) - ata_link_printk(link, KERN_INFO, - "nv: skipping hardreset on occupied port\n"); - - /* device signature acquisition is unreliable */ - return -EAGAIN; + rc = sata_link_hardreset(link, sata_deb_timing_hotplug, deadline, + &online, NULL); + return online ? -EAGAIN : rc; } static void nv_nf2_freeze(struct ata_port *ap) diff --git a/trunk/drivers/ata/sata_sil.c b/trunk/drivers/ata/sata_sil.c index 030ec079b184..e67ce8e5caa5 100644 --- a/trunk/drivers/ata/sata_sil.c +++ b/trunk/drivers/ata/sata_sil.c @@ -183,7 +183,7 @@ static struct scsi_host_template sil_sht = { }; static struct ata_port_operations sil_ops = { - .inherits = &ata_bmdma32_port_ops, + .inherits = &ata_bmdma_port_ops, .dev_config = sil_dev_config, .set_mode = sil_set_mode, .bmdma_setup = sil_bmdma_setup, diff --git a/trunk/drivers/ata/sata_sx4.c b/trunk/drivers/ata/sata_sx4.c index bbcf970068ad..eb05a3c82a9e 100644 --- a/trunk/drivers/ata/sata_sx4.c +++ b/trunk/drivers/ata/sata_sx4.c @@ -193,7 +193,6 @@ enum { PDC_TIMER_MASK_INT, }; -#define ECC_ERASE_BUF_SZ (128 * 1024) struct pdc_port_priv { u8 dimm_buf[(ATA_PRD_SZ * ATA_MAX_PRD) + 512]; @@ -1281,6 +1280,7 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host) { int speed, size, length; u32 addr, spd0, pci_status; + u32 tmp = 0; u32 time_period = 0; u32 tcount = 0; u32 ticks = 0; @@ -1395,17 +1395,14 @@ static unsigned int pdc20621_dimm_init(struct ata_host *host) pdc20621_i2c_read(host, PDC_DIMM0_SPD_DEV_ADDRESS, PDC_DIMM_SPD_TYPE, &spd0); if (spd0 == 0x02) { - void *buf; VPRINTK("Start ECC initialization\n"); addr = 0; length = size * 1024 * 1024; - buf = kzalloc(ECC_ERASE_BUF_SZ, GFP_KERNEL); while (addr < length) { - pdc20621_put_to_dimm(host, buf, addr, - ECC_ERASE_BUF_SZ); - addr += ECC_ERASE_BUF_SZ; + pdc20621_put_to_dimm(host, (void *) &tmp, addr, + sizeof(u32)); + addr += sizeof(u32); } - kfree(buf); VPRINTK("Finish ECC initialization\n"); } return 0; diff --git a/trunk/drivers/char/sysrq.c b/trunk/drivers/char/sysrq.c index 39a05b5fa9cb..d6a807f4077d 100644 --- a/trunk/drivers/char/sysrq.c +++ b/trunk/drivers/char/sysrq.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -244,7 +243,6 @@ static void sysrq_handle_showregs(int key, struct tty_struct *tty) struct pt_regs *regs = get_irq_regs(); if (regs) show_regs(regs); - perf_counter_print_debug(); } static struct sysrq_key_op sysrq_showregs_op = { .handler = sysrq_handle_showregs, diff --git a/trunk/drivers/char/vt.c b/trunk/drivers/char/vt.c index de9ebee8657b..08151d4de489 100644 --- a/trunk/drivers/char/vt.c +++ b/trunk/drivers/char/vt.c @@ -95,6 +95,7 @@ #include #include #include +#include #include #include #include @@ -103,7 +104,6 @@ #include #include #include -#include #define MAX_NR_CON_DRIVER 16 @@ -2875,11 +2875,14 @@ static int __init con_init(void) mod_timer(&console_timer, jiffies + blankinterval); } + /* + * kmalloc is not running yet - we use the bootmem allocator. + */ for (currcons = 0; currcons < MIN_NR_CONSOLES; currcons++) { - vc_cons[currcons].d = vc = kzalloc(sizeof(struct vc_data), GFP_NOWAIT); + vc_cons[currcons].d = vc = alloc_bootmem(sizeof(struct vc_data)); INIT_WORK(&vc_cons[currcons].SAK_work, vc_SAK); visual_init(vc, currcons, 1); - vc->vc_screenbuf = kzalloc(vc->vc_screenbuf_size, GFP_NOWAIT); + vc->vc_screenbuf = (unsigned short *)alloc_bootmem(vc->vc_screenbuf_size); vc->vc_kmalloced = 0; vc_init(vc, vc->vc_rows, vc->vc_cols, currcons || !vc->vc_sw->con_save_screen); diff --git a/trunk/drivers/firmware/dmi_scan.c b/trunk/drivers/firmware/dmi_scan.c index 24c84ae81527..5f1b5400d96a 100644 --- a/trunk/drivers/firmware/dmi_scan.c +++ b/trunk/drivers/firmware/dmi_scan.c @@ -596,7 +596,6 @@ int dmi_get_year(int field) return year; } -EXPORT_SYMBOL(dmi_get_year); /** * dmi_walk - Walk the DMI table and get called back for every record diff --git a/trunk/drivers/video/console/vgacon.c b/trunk/drivers/video/console/vgacon.c index 59d7d5ec17a4..38e86b84dce0 100644 --- a/trunk/drivers/video/console/vgacon.c +++ b/trunk/drivers/video/console/vgacon.c @@ -180,7 +180,7 @@ static inline void vga_set_mem_top(struct vc_data *c) } #ifdef CONFIG_VGACON_SOFT_SCROLLBACK -#include +#include /* software scrollback */ static void *vgacon_scrollback; static int vgacon_scrollback_tail; @@ -210,7 +210,8 @@ static void vgacon_scrollback_init(int pitch) */ static void __init_refok vgacon_scrollback_startup(void) { - vgacon_scrollback = kcalloc(CONFIG_VGACON_SOFT_SCROLLBACK_SIZE, 1024, GFP_NOWAIT); + vgacon_scrollback = alloc_bootmem(CONFIG_VGACON_SOFT_SCROLLBACK_SIZE + * 1024); vgacon_scrollback_init(vga_video_num_columns * 2); } diff --git a/trunk/fs/block_dev.c b/trunk/fs/block_dev.c index 931f6b8c4b2f..2dfc6cdcebbe 100644 --- a/trunk/fs/block_dev.c +++ b/trunk/fs/block_dev.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include "internal.h" @@ -493,11 +492,6 @@ void __init bdev_cache_init(void) bd_mnt = kern_mount(&bd_type); if (IS_ERR(bd_mnt)) panic("Cannot create bdev pseudo-fs"); - /* - * This vfsmount structure is only used to obtain the - * blockdev_superblock, so tell kmemleak not to report it. - */ - kmemleak_not_leak(bd_mnt); blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */ } diff --git a/trunk/fs/exec.c b/trunk/fs/exec.c index e639957d7a57..a7fcd975c6b2 100644 --- a/trunk/fs/exec.c +++ b/trunk/fs/exec.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include @@ -923,7 +922,6 @@ void set_task_comm(struct task_struct *tsk, char *buf) task_lock(tsk); strlcpy(tsk->comm, buf, sizeof(tsk->comm)); task_unlock(tsk); - perf_counter_comm(tsk); } int flush_old_exec(struct linux_binprm * bprm) @@ -992,13 +990,6 @@ int flush_old_exec(struct linux_binprm * bprm) current->personality &= ~bprm->per_clear; - /* - * Flush performance counters when crossing a - * security domain: - */ - if (!get_dumpable(current->mm)) - perf_counter_exit_task(current); - /* An exec changes our domain. We are no longer part of the thread group */ diff --git a/trunk/fs/jfs/jfs_imap.c b/trunk/fs/jfs/jfs_imap.c index 0fc30407f039..346057218edc 100644 --- a/trunk/fs/jfs/jfs_imap.c +++ b/trunk/fs/jfs/jfs_imap.c @@ -2571,7 +2571,6 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp) txAbort(tid, 0); txEnd(tid); - mutex_unlock(&JFS_IP(ipimap)->commit_mutex); /* release the inode map lock */ IWRITE_UNLOCK(ipimap); diff --git a/trunk/fs/jfs/super.c b/trunk/fs/jfs/super.c index d9b0e92b3602..6f21adf9479a 100644 --- a/trunk/fs/jfs/super.c +++ b/trunk/fs/jfs/super.c @@ -720,10 +720,8 @@ static ssize_t jfs_quota_write(struct super_block *sb, int type, blk++; } out: - if (len == towrite) { - mutex_unlock(&inode->i_mutex); + if (len == towrite) return err; - } if (inode->i_size < off+len-towrite) i_size_write(inode, off+len-towrite); inode->i_version++; diff --git a/trunk/fs/notify/Kconfig b/trunk/fs/notify/Kconfig index 50914d7303c6..31dac7e3b0f1 100644 --- a/trunk/fs/notify/Kconfig +++ b/trunk/fs/notify/Kconfig @@ -1,2 +1,15 @@ +config FSNOTIFY + bool "Filesystem notification backend" + default y + ---help--- + fsnotify is a backend for filesystem notification. fsnotify does + not provide any userspace interface but does provide the basis + needed for other notification schemes such as dnotify, inotify, + and fanotify. + + Say Y here to enable fsnotify suport. + + If unsure, say Y. + source "fs/notify/dnotify/Kconfig" source "fs/notify/inotify/Kconfig" diff --git a/trunk/fs/notify/Makefile b/trunk/fs/notify/Makefile index 5a95b6010ce7..db5467b5b58d 100644 --- a/trunk/fs/notify/Makefile +++ b/trunk/fs/notify/Makefile @@ -1,2 +1,4 @@ +obj-$(CONFIG_FSNOTIFY) += fsnotify.o notification.o group.o + obj-y += dnotify/ obj-y += inotify/ diff --git a/trunk/fs/notify/fsnotify.c b/trunk/fs/notify/fsnotify.c new file mode 100644 index 000000000000..56bee0f10c38 --- /dev/null +++ b/trunk/fs/notify/fsnotify.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2008 Red Hat, Inc., Eric Paris + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include + +#include +#include "fsnotify.h" + +/* + * This is the main call to fsnotify. The VFS calls into hook specific functions + * in linux/fsnotify.h. Those functions then in turn call here. Here will call + * out to all of the registered fsnotify_group. Those groups can then use the + * notification event in whatever means they feel necessary. + */ +void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is) +{ + struct fsnotify_group *group; + struct fsnotify_event *event = NULL; + int idx; + + if (list_empty(&fsnotify_groups)) + return; + + if (!(mask & fsnotify_mask)) + return; + + /* + * SRCU!! the groups list is very very much read only and the path is + * very hot. The VAST majority of events are not going to need to do + * anything other than walk the list so it's crazy to pre-allocate. + */ + idx = srcu_read_lock(&fsnotify_grp_srcu); + list_for_each_entry_rcu(group, &fsnotify_groups, group_list) { + if (mask & group->mask) { + if (!event) { + event = fsnotify_create_event(to_tell, mask, data, data_is); + /* shit, we OOM'd and now we can't tell, maybe + * someday someone else will want to do something + * here */ + if (!event) + break; + } + group->ops->handle_event(group, event); + } + } + srcu_read_unlock(&fsnotify_grp_srcu, idx); + /* + * fsnotify_create_event() took a reference so the event can't be cleaned + * up while we are still trying to add it to lists, drop that one. + */ + if (event) + fsnotify_put_event(event); +} +EXPORT_SYMBOL_GPL(fsnotify); + +static __init int fsnotify_init(void) +{ + return init_srcu_struct(&fsnotify_grp_srcu); +} +subsys_initcall(fsnotify_init); diff --git a/trunk/fs/notify/fsnotify.h b/trunk/fs/notify/fsnotify.h new file mode 100644 index 000000000000..c6a8bd476572 --- /dev/null +++ b/trunk/fs/notify/fsnotify.h @@ -0,0 +1,15 @@ +#ifndef __FS_NOTIFY_FSNOTIFY_H_ +#define __FS_NOTIFY_FSNOTIFY_H_ + +#include +#include +#include +#include + +/* protects reads of fsnotify_groups */ +extern struct srcu_struct fsnotify_grp_srcu; +/* all groups which receive fsnotify events */ +extern struct list_head fsnotify_groups; +/* all bitwise OR of all event types (FS_*) for all fsnotify_groups */ +extern __u32 fsnotify_mask; +#endif /* __FS_NOTIFY_FSNOTIFY_H_ */ diff --git a/trunk/fs/notify/group.c b/trunk/fs/notify/group.c new file mode 100644 index 000000000000..c6812953b968 --- /dev/null +++ b/trunk/fs/notify/group.c @@ -0,0 +1,198 @@ +/* + * Copyright (C) 2008 Red Hat, Inc., Eric Paris + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include "fsnotify.h" + +#include + +/* protects writes to fsnotify_groups and fsnotify_mask */ +static DEFINE_MUTEX(fsnotify_grp_mutex); +/* protects reads while running the fsnotify_groups list */ +struct srcu_struct fsnotify_grp_srcu; +/* all groups registered to receive filesystem notifications */ +LIST_HEAD(fsnotify_groups); +/* bitwise OR of all events (FS_*) interesting to some group on this system */ +__u32 fsnotify_mask; + +/* + * When a new group registers or changes it's set of interesting events + * this function updates the fsnotify_mask to contain all interesting events + */ +void fsnotify_recalc_global_mask(void) +{ + struct fsnotify_group *group; + __u32 mask = 0; + int idx; + + idx = srcu_read_lock(&fsnotify_grp_srcu); + list_for_each_entry_rcu(group, &fsnotify_groups, group_list) + mask |= group->mask; + srcu_read_unlock(&fsnotify_grp_srcu, idx); + fsnotify_mask = mask; +} + +/* + * Take a reference to a group so things found under the fsnotify_grp_mutex + * can't get freed under us + */ +static void fsnotify_get_group(struct fsnotify_group *group) +{ + atomic_inc(&group->refcnt); +} + +/* + * Final freeing of a group + */ +static void fsnotify_destroy_group(struct fsnotify_group *group) +{ + if (group->ops->free_group_priv) + group->ops->free_group_priv(group); + + kfree(group); +} + +/* + * Remove this group from the global list of groups that will get events + * this can be done even if there are still references and things still using + * this group. This just stops the group from getting new events. + */ +static void __fsnotify_evict_group(struct fsnotify_group *group) +{ + BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex)); + + if (group->on_group_list) + list_del_rcu(&group->group_list); + group->on_group_list = 0; +} + +/* + * Called when a group is no longer interested in getting events. This can be + * used if a group is misbehaving or if for some reason a group should no longer + * get any filesystem events. + */ +void fsnotify_evict_group(struct fsnotify_group *group) +{ + mutex_lock(&fsnotify_grp_mutex); + __fsnotify_evict_group(group); + mutex_unlock(&fsnotify_grp_mutex); +} + +/* + * Drop a reference to a group. Free it if it's through. + */ +void fsnotify_put_group(struct fsnotify_group *group) +{ + if (!atomic_dec_and_mutex_lock(&group->refcnt, &fsnotify_grp_mutex)) + return; + + /* + * OK, now we know that there's no other users *and* we hold mutex, + * so no new references will appear + */ + __fsnotify_evict_group(group); + + /* + * now it's off the list, so the only thing we might care about is + * srcu access.... + */ + mutex_unlock(&fsnotify_grp_mutex); + synchronize_srcu(&fsnotify_grp_srcu); + + /* and now it is really dead. _Nothing_ could be seeing it */ + fsnotify_recalc_global_mask(); + fsnotify_destroy_group(group); +} + +/* + * Simply run the fsnotify_groups list and find a group which matches + * the given parameters. If a group is found we take a reference to that + * group. + */ +static struct fsnotify_group *fsnotify_find_group(unsigned int group_num, __u32 mask, + const struct fsnotify_ops *ops) +{ + struct fsnotify_group *group_iter; + struct fsnotify_group *group = NULL; + + BUG_ON(!mutex_is_locked(&fsnotify_grp_mutex)); + + list_for_each_entry_rcu(group_iter, &fsnotify_groups, group_list) { + if (group_iter->group_num == group_num) { + if ((group_iter->mask == mask) && + (group_iter->ops == ops)) { + fsnotify_get_group(group_iter); + group = group_iter; + } else + group = ERR_PTR(-EEXIST); + } + } + return group; +} + +/* + * Either finds an existing group which matches the group_num, mask, and ops or + * creates a new group and adds it to the global group list. In either case we + * take a reference for the group returned. + */ +struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, __u32 mask, + const struct fsnotify_ops *ops) +{ + struct fsnotify_group *group, *tgroup; + + /* very low use, simpler locking if we just always alloc */ + group = kmalloc(sizeof(struct fsnotify_group), GFP_KERNEL); + if (!group) + return ERR_PTR(-ENOMEM); + + atomic_set(&group->refcnt, 1); + + group->on_group_list = 0; + group->group_num = group_num; + group->mask = mask; + + group->ops = ops; + + mutex_lock(&fsnotify_grp_mutex); + tgroup = fsnotify_find_group(group_num, mask, ops); + if (tgroup) { + /* group already exists */ + mutex_unlock(&fsnotify_grp_mutex); + /* destroy the new one we made */ + fsnotify_put_group(group); + return tgroup; + } + + /* group not found, add a new one */ + list_add_rcu(&group->group_list, &fsnotify_groups); + group->on_group_list = 1; + + mutex_unlock(&fsnotify_grp_mutex); + + if (mask) + fsnotify_recalc_global_mask(); + + return group; +} diff --git a/trunk/fs/notify/inotify/inotify.c b/trunk/fs/notify/inotify/inotify.c index 220c13f0d73d..40b1cf914ccb 100644 --- a/trunk/fs/notify/inotify/inotify.c +++ b/trunk/fs/notify/inotify/inotify.c @@ -32,6 +32,7 @@ #include #include #include +#include static atomic_t inotify_cookie; @@ -905,6 +906,25 @@ EXPORT_SYMBOL_GPL(inotify_rm_watch); */ static int __init inotify_setup(void) { + BUILD_BUG_ON(IN_ACCESS != FS_ACCESS); + BUILD_BUG_ON(IN_MODIFY != FS_MODIFY); + BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB); + BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE); + BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE); + BUILD_BUG_ON(IN_OPEN != FS_OPEN); + BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM); + BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO); + BUILD_BUG_ON(IN_CREATE != FS_CREATE); + BUILD_BUG_ON(IN_DELETE != FS_DELETE); + BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF); + BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF); + BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW); + + BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT); + BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR); + BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED); + BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT); + atomic_set(&inotify_cookie, 0); return 0; diff --git a/trunk/fs/notify/notification.c b/trunk/fs/notify/notification.c new file mode 100644 index 000000000000..b8e9a87f8f58 --- /dev/null +++ b/trunk/fs/notify/notification.c @@ -0,0 +1,121 @@ +/* + * Copyright (C) 2008 Red Hat, Inc., Eric Paris + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include "fsnotify.h" + +static struct kmem_cache *fsnotify_event_cachep; + +void fsnotify_get_event(struct fsnotify_event *event) +{ + atomic_inc(&event->refcnt); +} + +void fsnotify_put_event(struct fsnotify_event *event) +{ + if (!event) + return; + + if (atomic_dec_and_test(&event->refcnt)) { + if (event->data_type == FSNOTIFY_EVENT_PATH) + path_put(&event->path); + + kmem_cache_free(fsnotify_event_cachep, event); + } +} + +/* + * Allocate a new event which will be sent to each group's handle_event function + * if the group was interested in this particular event. + */ +struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, + void *data, int data_type) +{ + struct fsnotify_event *event; + + event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL); + if (!event) + return NULL; + + atomic_set(&event->refcnt, 1); + + spin_lock_init(&event->lock); + + event->path.dentry = NULL; + event->path.mnt = NULL; + event->inode = NULL; + + event->to_tell = to_tell; + + switch (data_type) { + case FSNOTIFY_EVENT_FILE: { + struct file *file = data; + struct path *path = &file->f_path; + event->path.dentry = path->dentry; + event->path.mnt = path->mnt; + path_get(&event->path); + event->data_type = FSNOTIFY_EVENT_PATH; + break; + } + case FSNOTIFY_EVENT_PATH: { + struct path *path = data; + event->path.dentry = path->dentry; + event->path.mnt = path->mnt; + path_get(&event->path); + event->data_type = FSNOTIFY_EVENT_PATH; + break; + } + case FSNOTIFY_EVENT_INODE: + event->inode = data; + event->data_type = FSNOTIFY_EVENT_INODE; + break; + case FSNOTIFY_EVENT_NONE: + event->inode = NULL; + event->path.dentry = NULL; + event->path.mnt = NULL; + break; + default: + BUG(); + } + + event->mask = mask; + + return event; +} + +__init int fsnotify_notification_init(void) +{ + fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC); + + return 0; +} +subsys_initcall(fsnotify_notification_init); + diff --git a/trunk/include/asm-generic/atomic.h b/trunk/include/asm-generic/atomic.h index 81d3be459efb..3673a13b6703 100644 --- a/trunk/include/asm-generic/atomic.h +++ b/trunk/include/asm-generic/atomic.h @@ -134,7 +134,7 @@ static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) #define atomic_long_cmpxchg(l, old, new) \ (atomic64_cmpxchg((atomic64_t *)(l), (old), (new))) #define atomic_long_xchg(v, new) \ - (atomic64_xchg((atomic64_t *)(v), (new))) + (atomic64_xchg((atomic64_t *)(l), (new))) #else /* BITS_PER_LONG == 64 */ diff --git a/trunk/include/linux/fsnotify.h b/trunk/include/linux/fsnotify.h index 00fbd5b245c9..6c9ebefdac8e 100644 --- a/trunk/include/linux/fsnotify.h +++ b/trunk/include/linux/fsnotify.h @@ -13,6 +13,7 @@ #include #include +#include #include /* @@ -34,6 +35,16 @@ static inline void fsnotify_d_move(struct dentry *entry) inotify_d_move(entry); } +/* + * fsnotify_link_count - inode's link count changed + */ +static inline void fsnotify_link_count(struct inode *inode) +{ + inotify_inode_queue_event(inode, IN_ATTRIB, 0, NULL, NULL); + + fsnotify(inode, FS_ATTRIB, inode, FSNOTIFY_EVENT_INODE); +} + /* * fsnotify_move - file old_name at old_dir was moved to new_name at new_dir */ @@ -43,28 +54,47 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, { struct inode *source = moved->d_inode; u32 cookie = inotify_get_cookie(); + __u32 old_dir_mask = 0; + __u32 new_dir_mask = 0; - if (old_dir == new_dir) + if (old_dir == new_dir) { inode_dir_notify(old_dir, DN_RENAME); - else { + old_dir_mask = FS_DN_RENAME; + } else { inode_dir_notify(old_dir, DN_DELETE); + old_dir_mask = FS_DELETE; inode_dir_notify(new_dir, DN_CREATE); + new_dir_mask = FS_CREATE; } - if (isdir) + if (isdir) { isdir = IN_ISDIR; + old_dir_mask |= FS_IN_ISDIR; + new_dir_mask |= FS_IN_ISDIR; + } + + old_dir_mask |= FS_MOVED_FROM; + new_dir_mask |= FS_MOVED_TO; + inotify_inode_queue_event(old_dir, IN_MOVED_FROM|isdir,cookie,old_name, source); inotify_inode_queue_event(new_dir, IN_MOVED_TO|isdir, cookie, new_name, source); + fsnotify(old_dir, old_dir_mask, old_dir, FSNOTIFY_EVENT_INODE); + fsnotify(new_dir, new_dir_mask, new_dir, FSNOTIFY_EVENT_INODE); + if (target) { inotify_inode_queue_event(target, IN_DELETE_SELF, 0, NULL, NULL); inotify_inode_is_dead(target); + + /* this is really a link_count change not a removal */ + fsnotify_link_count(target); } if (source) { inotify_inode_queue_event(source, IN_MOVE_SELF, 0, NULL, NULL); + fsnotify(source, FS_MOVE_SELF, moved->d_inode, FSNOTIFY_EVENT_INODE); } audit_inode_child(new_name, moved, new_dir); } @@ -74,10 +104,12 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, */ static inline void fsnotify_nameremove(struct dentry *dentry, int isdir) { + __u32 mask = FS_DELETE; + if (isdir) - isdir = IN_ISDIR; + mask |= FS_IN_ISDIR; dnotify_parent(dentry, DN_DELETE); - inotify_dentry_parent_queue_event(dentry, IN_DELETE|isdir, 0, dentry->d_name.name); + inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); } /* @@ -87,14 +119,8 @@ static inline void fsnotify_inoderemove(struct inode *inode) { inotify_inode_queue_event(inode, IN_DELETE_SELF, 0, NULL, NULL); inotify_inode_is_dead(inode); -} -/* - * fsnotify_link_count - inode's link count changed - */ -static inline void fsnotify_link_count(struct inode *inode) -{ - inotify_inode_queue_event(inode, IN_ATTRIB, 0, NULL, NULL); + fsnotify(inode, FS_DELETE_SELF, inode, FSNOTIFY_EVENT_INODE); } /* @@ -106,6 +132,8 @@ static inline void fsnotify_create(struct inode *inode, struct dentry *dentry) inotify_inode_queue_event(inode, IN_CREATE, 0, dentry->d_name.name, dentry->d_inode); audit_inode_child(dentry->d_name.name, dentry, inode); + + fsnotify(inode, FS_CREATE, dentry->d_inode, FSNOTIFY_EVENT_INODE); } /* @@ -120,6 +148,8 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct inode); fsnotify_link_count(inode); audit_inode_child(new_dentry->d_name.name, new_dentry, dir); + + fsnotify(dir, FS_CREATE, inode, FSNOTIFY_EVENT_INODE); } /* @@ -127,10 +157,14 @@ static inline void fsnotify_link(struct inode *dir, struct inode *inode, struct */ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) { + __u32 mask = (FS_CREATE | FS_IN_ISDIR); + struct inode *d_inode = dentry->d_inode; + inode_dir_notify(inode, DN_CREATE); - inotify_inode_queue_event(inode, IN_CREATE | IN_ISDIR, 0, - dentry->d_name.name, dentry->d_inode); + inotify_inode_queue_event(inode, mask, 0, dentry->d_name.name, d_inode); audit_inode_child(dentry->d_name.name, dentry, inode); + + fsnotify(inode, mask, d_inode, FSNOTIFY_EVENT_INODE); } /* @@ -139,14 +173,16 @@ static inline void fsnotify_mkdir(struct inode *inode, struct dentry *dentry) static inline void fsnotify_access(struct dentry *dentry) { struct inode *inode = dentry->d_inode; - u32 mask = IN_ACCESS; + __u32 mask = FS_ACCESS; if (S_ISDIR(inode->i_mode)) - mask |= IN_ISDIR; + mask |= FS_IN_ISDIR; dnotify_parent(dentry, DN_ACCESS); inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); } /* @@ -155,14 +191,16 @@ static inline void fsnotify_access(struct dentry *dentry) static inline void fsnotify_modify(struct dentry *dentry) { struct inode *inode = dentry->d_inode; - u32 mask = IN_MODIFY; + __u32 mask = FS_MODIFY; if (S_ISDIR(inode->i_mode)) - mask |= IN_ISDIR; + mask |= FS_IN_ISDIR; dnotify_parent(dentry, DN_MODIFY); inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); } /* @@ -171,13 +209,15 @@ static inline void fsnotify_modify(struct dentry *dentry) static inline void fsnotify_open(struct dentry *dentry) { struct inode *inode = dentry->d_inode; - u32 mask = IN_OPEN; + __u32 mask = FS_OPEN; if (S_ISDIR(inode->i_mode)) - mask |= IN_ISDIR; + mask |= FS_IN_ISDIR; inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); } /* @@ -189,13 +229,15 @@ static inline void fsnotify_close(struct file *file) struct inode *inode = dentry->d_inode; const char *name = dentry->d_name.name; fmode_t mode = file->f_mode; - u32 mask = (mode & FMODE_WRITE) ? IN_CLOSE_WRITE : IN_CLOSE_NOWRITE; + __u32 mask = (mode & FMODE_WRITE) ? FS_CLOSE_WRITE : FS_CLOSE_NOWRITE; if (S_ISDIR(inode->i_mode)) - mask |= IN_ISDIR; + mask |= FS_IN_ISDIR; inotify_dentry_parent_queue_event(dentry, mask, 0, name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + + fsnotify(inode, mask, file, FSNOTIFY_EVENT_FILE); } /* @@ -204,13 +246,15 @@ static inline void fsnotify_close(struct file *file) static inline void fsnotify_xattr(struct dentry *dentry) { struct inode *inode = dentry->d_inode; - u32 mask = IN_ATTRIB; + __u32 mask = FS_ATTRIB; if (S_ISDIR(inode->i_mode)) - mask |= IN_ISDIR; + mask |= FS_IN_ISDIR; inotify_dentry_parent_queue_event(dentry, mask, 0, dentry->d_name.name); inotify_inode_queue_event(inode, mask, 0, NULL, NULL); + + fsnotify(inode, mask, inode, FSNOTIFY_EVENT_INODE); } /* @@ -221,34 +265,34 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) { struct inode *inode = dentry->d_inode; int dn_mask = 0; - u32 in_mask = 0; + __u32 in_mask = 0; if (ia_valid & ATTR_UID) { - in_mask |= IN_ATTRIB; + in_mask |= FS_ATTRIB; dn_mask |= DN_ATTRIB; } if (ia_valid & ATTR_GID) { - in_mask |= IN_ATTRIB; + in_mask |= FS_ATTRIB; dn_mask |= DN_ATTRIB; } if (ia_valid & ATTR_SIZE) { - in_mask |= IN_MODIFY; + in_mask |= FS_MODIFY; dn_mask |= DN_MODIFY; } /* both times implies a utime(s) call */ if ((ia_valid & (ATTR_ATIME | ATTR_MTIME)) == (ATTR_ATIME | ATTR_MTIME)) { - in_mask |= IN_ATTRIB; + in_mask |= FS_ATTRIB; dn_mask |= DN_ATTRIB; } else if (ia_valid & ATTR_ATIME) { - in_mask |= IN_ACCESS; + in_mask |= FS_ACCESS; dn_mask |= DN_ACCESS; } else if (ia_valid & ATTR_MTIME) { - in_mask |= IN_MODIFY; + in_mask |= FS_MODIFY; dn_mask |= DN_MODIFY; } if (ia_valid & ATTR_MODE) { - in_mask |= IN_ATTRIB; + in_mask |= FS_ATTRIB; dn_mask |= DN_ATTRIB; } @@ -256,14 +300,15 @@ static inline void fsnotify_change(struct dentry *dentry, unsigned int ia_valid) dnotify_parent(dentry, dn_mask); if (in_mask) { if (S_ISDIR(inode->i_mode)) - in_mask |= IN_ISDIR; + in_mask |= FS_IN_ISDIR; inotify_inode_queue_event(inode, in_mask, 0, NULL, NULL); inotify_dentry_parent_queue_event(dentry, in_mask, 0, dentry->d_name.name); + fsnotify(inode, in_mask, inode, FSNOTIFY_EVENT_INODE); } } -#ifdef CONFIG_INOTIFY /* inotify helpers */ +#if defined(CONFIG_INOTIFY) || defined(CONFIG_FSNOTIFY) /* notify helpers */ /* * fsnotify_oldname_init - save off the old filename before we change it @@ -281,7 +326,7 @@ static inline void fsnotify_oldname_free(const char *old_name) kfree(old_name); } -#else /* CONFIG_INOTIFY */ +#else /* CONFIG_INOTIFY || CONFIG_FSNOTIFY */ static inline const char *fsnotify_oldname_init(const char *name) { diff --git a/trunk/include/linux/fsnotify_backend.h b/trunk/include/linux/fsnotify_backend.h new file mode 100644 index 000000000000..1a55718b38aa --- /dev/null +++ b/trunk/include/linux/fsnotify_backend.h @@ -0,0 +1,177 @@ +/* + * Filesystem access notification for Linux + * + * Copyright (C) 2008 Red Hat, Inc., Eric Paris + */ + +#ifndef __LINUX_FSNOTIFY_BACKEND_H +#define __LINUX_FSNOTIFY_BACKEND_H + +#ifdef __KERNEL__ + +#include /* struct inode */ +#include +#include /* struct path */ +#include +#include + +#include + +/* + * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily + * convert between them. dnotify only needs conversion at watch creation + * so no perf loss there. fanotify isn't defined yet, so it can use the + * wholes if it needs more events. + */ +#define FS_ACCESS 0x00000001 /* File was accessed */ +#define FS_MODIFY 0x00000002 /* File was modified */ +#define FS_ATTRIB 0x00000004 /* Metadata changed */ +#define FS_CLOSE_WRITE 0x00000008 /* Writtable file was closed */ +#define FS_CLOSE_NOWRITE 0x00000010 /* Unwrittable file closed */ +#define FS_OPEN 0x00000020 /* File was opened */ +#define FS_MOVED_FROM 0x00000040 /* File was moved from X */ +#define FS_MOVED_TO 0x00000080 /* File was moved to Y */ +#define FS_CREATE 0x00000100 /* Subfile was created */ +#define FS_DELETE 0x00000200 /* Subfile was deleted */ +#define FS_DELETE_SELF 0x00000400 /* Self was deleted */ +#define FS_MOVE_SELF 0x00000800 /* Self was moved */ + +#define FS_UNMOUNT 0x00002000 /* inode on umount fs */ +#define FS_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ +#define FS_IN_IGNORED 0x00008000 /* last inotify event here */ + +#define FS_IN_ISDIR 0x40000000 /* event occurred against dir */ +#define FS_IN_ONESHOT 0x80000000 /* only send event once */ + +#define FS_DN_RENAME 0x10000000 /* file renamed */ +#define FS_DN_MULTISHOT 0x20000000 /* dnotify multishot */ + +struct fsnotify_group; +struct fsnotify_event; + +/* + * Each group much define these ops. The fsnotify infrastructure will call + * these operations for each relevant group. + * + * handle_event - main call for a group to handle an fs event + * free_group_priv - called when a group refcnt hits 0 to clean up the private union + */ +struct fsnotify_ops { + int (*handle_event)(struct fsnotify_group *group, struct fsnotify_event *event); + void (*free_group_priv)(struct fsnotify_group *group); +}; + +/* + * A group is a "thing" that wants to receive notification about filesystem + * events. The mask holds the subset of event types this group cares about. + * refcnt on a group is up to the implementor and at any moment if it goes 0 + * everything will be cleaned up. + */ +struct fsnotify_group { + /* + * global list of all groups receiving events from fsnotify. + * anchored by fsnotify_groups and protected by either fsnotify_grp_mutex + * or fsnotify_grp_srcu depending on write vs read. + */ + struct list_head group_list; + + /* + * Defines all of the event types in which this group is interested. + * This mask is a bitwise OR of the FS_* events from above. Each time + * this mask changes for a group (if it changes) the correct functions + * must be called to update the global structures which indicate global + * interest in event types. + */ + __u32 mask; + + /* + * How the refcnt is used is up to each group. When the refcnt hits 0 + * fsnotify will clean up all of the resources associated with this group. + * As an example, the dnotify group will always have a refcnt=1 and that + * will never change. Inotify, on the other hand, has a group per + * inotify_init() and the refcnt will hit 0 only when that fd has been + * closed. + */ + atomic_t refcnt; /* things with interest in this group */ + unsigned int group_num; /* simply prevents accidental group collision */ + + const struct fsnotify_ops *ops; /* how this group handles things */ + + /* prevents double list_del of group_list. protected by global fsnotify_gr_mutex */ + bool on_group_list; + + /* groups can define private fields here or use the void *private */ + union { + void *private; + }; +}; + +/* + * all of the information about the original object we want to now send to + * a group. If you want to carry more info from the accessing task to the + * listener this structure is where you need to be adding fields. + */ +struct fsnotify_event { + spinlock_t lock; /* protection for the associated event_holder and private_list */ + /* to_tell may ONLY be dereferenced during handle_event(). */ + struct inode *to_tell; /* either the inode the event happened to or its parent */ + /* + * depending on the event type we should have either a path or inode + * We hold a reference on path, but NOT on inode. Since we have the ref on + * the path, it may be dereferenced at any point during this object's + * lifetime. That reference is dropped when this object's refcnt hits + * 0. If this event contains an inode instead of a path, the inode may + * ONLY be used during handle_event(). + */ + union { + struct path path; + struct inode *inode; + }; +/* when calling fsnotify tell it if the data is a path or inode */ +#define FSNOTIFY_EVENT_NONE 0 +#define FSNOTIFY_EVENT_PATH 1 +#define FSNOTIFY_EVENT_INODE 2 +#define FSNOTIFY_EVENT_FILE 3 + int data_type; /* which of the above union we have */ + atomic_t refcnt; /* how many groups still are using/need to send this event */ + __u32 mask; /* the type of access, bitwise OR for FS_* event types */ +}; + +#ifdef CONFIG_FSNOTIFY + +/* called from the vfs helpers */ + +/* main fsnotify call to send events */ +extern void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is); + + +/* called from fsnotify listeners, such as fanotify or dnotify */ + +/* must call when a group changes its ->mask */ +extern void fsnotify_recalc_global_mask(void); +/* get a reference to an existing or create a new group */ +extern struct fsnotify_group *fsnotify_obtain_group(unsigned int group_num, + __u32 mask, + const struct fsnotify_ops *ops); +/* drop reference on a group from fsnotify_obtain_group */ +extern void fsnotify_put_group(struct fsnotify_group *group); + +/* take a reference to an event */ +extern void fsnotify_get_event(struct fsnotify_event *event); +extern void fsnotify_put_event(struct fsnotify_event *event); +/* find private data previously attached to an event */ +extern struct fsnotify_event_private_data *fsnotify_get_priv_from_event(struct fsnotify_group *group, + struct fsnotify_event *event); + +/* put here because inotify does some weird stuff when destroying watches */ +extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, + void *data, int data_is); +#else + +static inline void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is) +{} +#endif /* CONFIG_FSNOTIFY */ + +#endif /* __KERNEL __ */ + +#endif /* __LINUX_FSNOTIFY_BACKEND_H */ diff --git a/trunk/include/linux/init_task.h b/trunk/include/linux/init_task.h index 28b1f30601b5..6646bfc7b892 100644 --- a/trunk/include/linux/init_task.h +++ b/trunk/include/linux/init_task.h @@ -108,15 +108,6 @@ extern struct group_info init_groups; extern struct cred init_cred; -#ifdef CONFIG_PERF_COUNTERS -# define INIT_PERF_COUNTERS(tsk) \ - .perf_counter_mutex = \ - __MUTEX_INITIALIZER(tsk.perf_counter_mutex), \ - .perf_counter_list = LIST_HEAD_INIT(tsk.perf_counter_list), -#else -# define INIT_PERF_COUNTERS(tsk) -#endif - /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -180,7 +171,6 @@ extern struct cred init_cred; }, \ .dirties = INIT_PROP_LOCAL_SINGLE(dirties), \ INIT_IDS \ - INIT_PERF_COUNTERS(tsk) \ INIT_TRACE_IRQFLAGS \ INIT_LOCKDEP \ INIT_FTRACE_GRAPH \ diff --git a/trunk/include/linux/irq.h b/trunk/include/linux/irq.h index 1e50c34f0062..eedbb8e5e0cc 100644 --- a/trunk/include/linux/irq.h +++ b/trunk/include/linux/irq.h @@ -430,19 +430,23 @@ extern int set_irq_msi(unsigned int irq, struct msi_desc *entry); * Returns true if successful (or not required). */ static inline bool alloc_desc_masks(struct irq_desc *desc, int node, - bool boot) + bool boot) { - gfp_t gfp = GFP_ATOMIC; +#ifdef CONFIG_CPUMASK_OFFSTACK + if (boot) { + alloc_bootmem_cpumask_var(&desc->affinity); - if (boot) - gfp = GFP_NOWAIT; +#ifdef CONFIG_GENERIC_PENDING_IRQ + alloc_bootmem_cpumask_var(&desc->pending_mask); +#endif + return true; + } -#ifdef CONFIG_CPUMASK_OFFSTACK - if (!alloc_cpumask_var_node(&desc->affinity, gfp, node)) + if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node)) return false; #ifdef CONFIG_GENERIC_PENDING_IRQ - if (!alloc_cpumask_var_node(&desc->pending_mask, gfp, node)) { + if (!alloc_cpumask_var_node(&desc->pending_mask, GFP_ATOMIC, node)) { free_cpumask_var(desc->affinity); return false; } diff --git a/trunk/include/linux/kernel_stat.h b/trunk/include/linux/kernel_stat.h index a77c6007dc99..0c8b89f28a95 100644 --- a/trunk/include/linux/kernel_stat.h +++ b/trunk/include/linux/kernel_stat.h @@ -81,12 +81,7 @@ static inline unsigned int kstat_irqs(unsigned int irq) return sum; } - -/* - * Lock/unlock the current runqueue - to extract task statistics: - */ extern unsigned long long task_delta_exec(struct task_struct *); - extern void account_user_time(struct task_struct *, cputime_t, cputime_t); extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t); extern void account_steal_time(cputime_t); diff --git a/trunk/include/linux/kmemleak.h b/trunk/include/linux/kmemleak.h deleted file mode 100644 index 7796aed6cdd5..000000000000 --- a/trunk/include/linux/kmemleak.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * include/linux/kmemleak.h - * - * Copyright (C) 2008 ARM Limited - * Written by Catalin Marinas - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef __KMEMLEAK_H -#define __KMEMLEAK_H - -#ifdef CONFIG_DEBUG_KMEMLEAK - -extern void kmemleak_init(void); -extern void kmemleak_alloc(const void *ptr, size_t size, int min_count, - gfp_t gfp); -extern void kmemleak_free(const void *ptr); -extern void kmemleak_padding(const void *ptr, unsigned long offset, - size_t size); -extern void kmemleak_not_leak(const void *ptr); -extern void kmemleak_ignore(const void *ptr); -extern void kmemleak_scan_area(const void *ptr, unsigned long offset, - size_t length, gfp_t gfp); -extern void kmemleak_no_scan(const void *ptr); - -static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, - int min_count, unsigned long flags, - gfp_t gfp) -{ - if (!(flags & SLAB_NOLEAKTRACE)) - kmemleak_alloc(ptr, size, min_count, gfp); -} - -static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags) -{ - if (!(flags & SLAB_NOLEAKTRACE)) - kmemleak_free(ptr); -} - -static inline void kmemleak_erase(void **ptr) -{ - *ptr = NULL; -} - -#else - -static inline void kmemleak_init(void) -{ -} -static inline void kmemleak_alloc(const void *ptr, size_t size, int min_count, - gfp_t gfp) -{ -} -static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, - int min_count, unsigned long flags, - gfp_t gfp) -{ -} -static inline void kmemleak_free(const void *ptr) -{ -} -static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags) -{ -} -static inline void kmemleak_not_leak(const void *ptr) -{ -} -static inline void kmemleak_ignore(const void *ptr) -{ -} -static inline void kmemleak_scan_area(const void *ptr, unsigned long offset, - size_t length, gfp_t gfp) -{ -} -static inline void kmemleak_erase(void **ptr) -{ -} -static inline void kmemleak_no_scan(const void *ptr) -{ -} - -#endif /* CONFIG_DEBUG_KMEMLEAK */ - -#endif /* __KMEMLEAK_H */ diff --git a/trunk/include/linux/percpu.h b/trunk/include/linux/percpu.h index 26fd9d12f050..1581ff235c7e 100644 --- a/trunk/include/linux/percpu.h +++ b/trunk/include/linux/percpu.h @@ -86,12 +86,7 @@ struct percpu_data { void *ptrs[1]; }; -/* pointer disguising messes up the kmemleak objects tracking */ -#ifndef CONFIG_DEBUG_KMEMLEAK #define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata) -#else -#define __percpu_disguise(pdata) (struct percpu_data *)(pdata) -#endif #define per_cpu_ptr(ptr, cpu) \ ({ \ diff --git a/trunk/include/linux/perf_counter.h b/trunk/include/linux/perf_counter.h deleted file mode 100644 index 6e133954e2e4..000000000000 --- a/trunk/include/linux/perf_counter.h +++ /dev/null @@ -1,697 +0,0 @@ -/* - * Performance counters: - * - * Copyright (C) 2008-2009, Thomas Gleixner - * Copyright (C) 2008-2009, Red Hat, Inc., Ingo Molnar - * Copyright (C) 2008-2009, Red Hat, Inc., Peter Zijlstra - * - * Data type definitions, declarations, prototypes. - * - * Started by: Thomas Gleixner and Ingo Molnar - * - * For licencing details see kernel-base/COPYING - */ -#ifndef _LINUX_PERF_COUNTER_H -#define _LINUX_PERF_COUNTER_H - -#include -#include -#include - -/* - * User-space ABI bits: - */ - -/* - * attr.type - */ -enum perf_type_id { - PERF_TYPE_HARDWARE = 0, - PERF_TYPE_SOFTWARE = 1, - PERF_TYPE_TRACEPOINT = 2, - PERF_TYPE_HW_CACHE = 3, - PERF_TYPE_RAW = 4, - - PERF_TYPE_MAX, /* non-ABI */ -}; - -/* - * Generalized performance counter event types, used by the - * attr.event_id parameter of the sys_perf_counter_open() - * syscall: - */ -enum perf_hw_id { - /* - * Common hardware events, generalized by the kernel: - */ - PERF_COUNT_HW_CPU_CYCLES = 0, - PERF_COUNT_HW_INSTRUCTIONS = 1, - PERF_COUNT_HW_CACHE_REFERENCES = 2, - PERF_COUNT_HW_CACHE_MISSES = 3, - PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, - PERF_COUNT_HW_BRANCH_MISSES = 5, - PERF_COUNT_HW_BUS_CYCLES = 6, - - PERF_COUNT_HW_MAX, /* non-ABI */ -}; - -/* - * Generalized hardware cache counters: - * - * { L1-D, L1-I, LLC, ITLB, DTLB, BPU } x - * { read, write, prefetch } x - * { accesses, misses } - */ -enum perf_hw_cache_id { - PERF_COUNT_HW_CACHE_L1D = 0, - PERF_COUNT_HW_CACHE_L1I = 1, - PERF_COUNT_HW_CACHE_LL = 2, - PERF_COUNT_HW_CACHE_DTLB = 3, - PERF_COUNT_HW_CACHE_ITLB = 4, - PERF_COUNT_HW_CACHE_BPU = 5, - - PERF_COUNT_HW_CACHE_MAX, /* non-ABI */ -}; - -enum perf_hw_cache_op_id { - PERF_COUNT_HW_CACHE_OP_READ = 0, - PERF_COUNT_HW_CACHE_OP_WRITE = 1, - PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, - - PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */ -}; - -enum perf_hw_cache_op_result_id { - PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0, - PERF_COUNT_HW_CACHE_RESULT_MISS = 1, - - PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */ -}; - -/* - * Special "software" counters provided by the kernel, even if the hardware - * does not support performance counters. These counters measure various - * physical and sw events of the kernel (and allow the profiling of them as - * well): - */ -enum perf_sw_ids { - PERF_COUNT_SW_CPU_CLOCK = 0, - PERF_COUNT_SW_TASK_CLOCK = 1, - PERF_COUNT_SW_PAGE_FAULTS = 2, - PERF_COUNT_SW_CONTEXT_SWITCHES = 3, - PERF_COUNT_SW_CPU_MIGRATIONS = 4, - PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, - PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, - - PERF_COUNT_SW_MAX, /* non-ABI */ -}; - -/* - * Bits that can be set in attr.sample_type to request information - * in the overflow packets. - */ -enum perf_counter_sample_format { - PERF_SAMPLE_IP = 1U << 0, - PERF_SAMPLE_TID = 1U << 1, - PERF_SAMPLE_TIME = 1U << 2, - PERF_SAMPLE_ADDR = 1U << 3, - PERF_SAMPLE_GROUP = 1U << 4, - PERF_SAMPLE_CALLCHAIN = 1U << 5, - PERF_SAMPLE_ID = 1U << 6, - PERF_SAMPLE_CPU = 1U << 7, - PERF_SAMPLE_PERIOD = 1U << 8, -}; - -/* - * Bits that can be set in attr.read_format to request that - * reads on the counter should return the indicated quantities, - * in increasing order of bit value, after the counter value. - */ -enum perf_counter_read_format { - PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, - PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, - PERF_FORMAT_ID = 1U << 2, -}; - -/* - * Hardware event to monitor via a performance monitoring counter: - */ -struct perf_counter_attr { - /* - * Major type: hardware/software/tracepoint/etc. - */ - __u32 type; - __u32 __reserved_1; - - /* - * Type specific configuration information. - */ - __u64 config; - - union { - __u64 sample_period; - __u64 sample_freq; - }; - - __u64 sample_type; - __u64 read_format; - - __u64 disabled : 1, /* off by default */ - inherit : 1, /* children inherit it */ - pinned : 1, /* must always be on PMU */ - exclusive : 1, /* only group on PMU */ - exclude_user : 1, /* don't count user */ - exclude_kernel : 1, /* ditto kernel */ - exclude_hv : 1, /* ditto hypervisor */ - exclude_idle : 1, /* don't count when idle */ - mmap : 1, /* include mmap data */ - comm : 1, /* include comm data */ - freq : 1, /* use freq, not period */ - - __reserved_2 : 53; - - __u32 wakeup_events; /* wakeup every n events */ - __u32 __reserved_3; - - __u64 __reserved_4; -}; - -/* - * Ioctls that can be done on a perf counter fd: - */ -#define PERF_COUNTER_IOC_ENABLE _IO ('$', 0) -#define PERF_COUNTER_IOC_DISABLE _IO ('$', 1) -#define PERF_COUNTER_IOC_REFRESH _IO ('$', 2) -#define PERF_COUNTER_IOC_RESET _IO ('$', 3) -#define PERF_COUNTER_IOC_PERIOD _IOW('$', 4, u64) - -enum perf_counter_ioc_flags { - PERF_IOC_FLAG_GROUP = 1U << 0, -}; - -/* - * Structure of the page that can be mapped via mmap - */ -struct perf_counter_mmap_page { - __u32 version; /* version number of this structure */ - __u32 compat_version; /* lowest version this is compat with */ - - /* - * Bits needed to read the hw counters in user-space. - * - * u32 seq; - * s64 count; - * - * do { - * seq = pc->lock; - * - * barrier() - * if (pc->index) { - * count = pmc_read(pc->index - 1); - * count += pc->offset; - * } else - * goto regular_read; - * - * barrier(); - * } while (pc->lock != seq); - * - * NOTE: for obvious reason this only works on self-monitoring - * processes. - */ - __u32 lock; /* seqlock for synchronization */ - __u32 index; /* hardware counter identifier */ - __s64 offset; /* add to hardware counter value */ - - /* - * Control data for the mmap() data buffer. - * - * User-space reading this value should issue an rmb(), on SMP capable - * platforms, after reading this value -- see perf_counter_wakeup(). - */ - __u64 data_head; /* head in the data section */ -}; - -#define PERF_EVENT_MISC_CPUMODE_MASK (3 << 0) -#define PERF_EVENT_MISC_CPUMODE_UNKNOWN (0 << 0) -#define PERF_EVENT_MISC_KERNEL (1 << 0) -#define PERF_EVENT_MISC_USER (2 << 0) -#define PERF_EVENT_MISC_HYPERVISOR (3 << 0) -#define PERF_EVENT_MISC_OVERFLOW (1 << 2) - -struct perf_event_header { - __u32 type; - __u16 misc; - __u16 size; -}; - -enum perf_event_type { - - /* - * The MMAP events record the PROT_EXEC mappings so that we can - * correlate userspace IPs to code. They have the following structure: - * - * struct { - * struct perf_event_header header; - * - * u32 pid, tid; - * u64 addr; - * u64 len; - * u64 pgoff; - * char filename[]; - * }; - */ - PERF_EVENT_MMAP = 1, - - /* - * struct { - * struct perf_event_header header; - * - * u32 pid, tid; - * char comm[]; - * }; - */ - PERF_EVENT_COMM = 3, - - /* - * struct { - * struct perf_event_header header; - * u64 time; - * u64 id; - * u64 sample_period; - * }; - */ - PERF_EVENT_PERIOD = 4, - - /* - * struct { - * struct perf_event_header header; - * u64 time; - * u64 id; - * }; - */ - PERF_EVENT_THROTTLE = 5, - PERF_EVENT_UNTHROTTLE = 6, - - /* - * struct { - * struct perf_event_header header; - * u32 pid, ppid; - * }; - */ - PERF_EVENT_FORK = 7, - - /* - * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field - * will be PERF_RECORD_* - * - * struct { - * struct perf_event_header header; - * - * { u64 ip; } && PERF_RECORD_IP - * { u32 pid, tid; } && PERF_RECORD_TID - * { u64 time; } && PERF_RECORD_TIME - * { u64 addr; } && PERF_RECORD_ADDR - * { u64 config; } && PERF_RECORD_CONFIG - * { u32 cpu, res; } && PERF_RECORD_CPU - * - * { u64 nr; - * { u64 id, val; } cnt[nr]; } && PERF_RECORD_GROUP - * - * { u16 nr, - * hv, - * kernel, - * user; - * u64 ips[nr]; } && PERF_RECORD_CALLCHAIN - * }; - */ -}; - -#ifdef __KERNEL__ -/* - * Kernel-internal data types and definitions: - */ - -#ifdef CONFIG_PERF_COUNTERS -# include -#endif - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct task_struct; - -/** - * struct hw_perf_counter - performance counter hardware details: - */ -struct hw_perf_counter { -#ifdef CONFIG_PERF_COUNTERS - union { - struct { /* hardware */ - u64 config; - unsigned long config_base; - unsigned long counter_base; - int idx; - }; - union { /* software */ - atomic64_t count; - struct hrtimer hrtimer; - }; - }; - atomic64_t prev_count; - u64 sample_period; - u64 last_period; - atomic64_t period_left; - u64 interrupts; - - u64 freq_count; - u64 freq_interrupts; - u64 freq_stamp; -#endif -}; - -struct perf_counter; - -/** - * struct pmu - generic performance monitoring unit - */ -struct pmu { - int (*enable) (struct perf_counter *counter); - void (*disable) (struct perf_counter *counter); - void (*read) (struct perf_counter *counter); - void (*unthrottle) (struct perf_counter *counter); -}; - -/** - * enum perf_counter_active_state - the states of a counter - */ -enum perf_counter_active_state { - PERF_COUNTER_STATE_ERROR = -2, - PERF_COUNTER_STATE_OFF = -1, - PERF_COUNTER_STATE_INACTIVE = 0, - PERF_COUNTER_STATE_ACTIVE = 1, -}; - -struct file; - -struct perf_mmap_data { - struct rcu_head rcu_head; - int nr_pages; /* nr of data pages */ - int nr_locked; /* nr pages mlocked */ - - atomic_t poll; /* POLL_ for wakeups */ - atomic_t events; /* event limit */ - - atomic_long_t head; /* write position */ - atomic_long_t done_head; /* completed head */ - - atomic_t lock; /* concurrent writes */ - - atomic_t wakeup; /* needs a wakeup */ - - struct perf_counter_mmap_page *user_page; - void *data_pages[0]; -}; - -struct perf_pending_entry { - struct perf_pending_entry *next; - void (*func)(struct perf_pending_entry *); -}; - -/** - * struct perf_counter - performance counter kernel representation: - */ -struct perf_counter { -#ifdef CONFIG_PERF_COUNTERS - struct list_head list_entry; - struct list_head event_entry; - struct list_head sibling_list; - int nr_siblings; - struct perf_counter *group_leader; - const struct pmu *pmu; - - enum perf_counter_active_state state; - atomic64_t count; - - /* - * These are the total time in nanoseconds that the counter - * has been enabled (i.e. eligible to run, and the task has - * been scheduled in, if this is a per-task counter) - * and running (scheduled onto the CPU), respectively. - * - * They are computed from tstamp_enabled, tstamp_running and - * tstamp_stopped when the counter is in INACTIVE or ACTIVE state. - */ - u64 total_time_enabled; - u64 total_time_running; - - /* - * These are timestamps used for computing total_time_enabled - * and total_time_running when the counter is in INACTIVE or - * ACTIVE state, measured in nanoseconds from an arbitrary point - * in time. - * tstamp_enabled: the notional time when the counter was enabled - * tstamp_running: the notional time when the counter was scheduled on - * tstamp_stopped: in INACTIVE state, the notional time when the - * counter was scheduled off. - */ - u64 tstamp_enabled; - u64 tstamp_running; - u64 tstamp_stopped; - - struct perf_counter_attr attr; - struct hw_perf_counter hw; - - struct perf_counter_context *ctx; - struct file *filp; - - /* - * These accumulate total time (in nanoseconds) that children - * counters have been enabled and running, respectively. - */ - atomic64_t child_total_time_enabled; - atomic64_t child_total_time_running; - - /* - * Protect attach/detach and child_list: - */ - struct mutex child_mutex; - struct list_head child_list; - struct perf_counter *parent; - - int oncpu; - int cpu; - - struct list_head owner_entry; - struct task_struct *owner; - - /* mmap bits */ - struct mutex mmap_mutex; - atomic_t mmap_count; - struct perf_mmap_data *data; - - /* poll related */ - wait_queue_head_t waitq; - struct fasync_struct *fasync; - - /* delayed work for NMIs and such */ - int pending_wakeup; - int pending_kill; - int pending_disable; - struct perf_pending_entry pending; - - atomic_t event_limit; - - void (*destroy)(struct perf_counter *); - struct rcu_head rcu_head; - - struct pid_namespace *ns; - u64 id; -#endif -}; - -/** - * struct perf_counter_context - counter context structure - * - * Used as a container for task counters and CPU counters as well: - */ -struct perf_counter_context { - /* - * Protect the states of the counters in the list, - * nr_active, and the list: - */ - spinlock_t lock; - /* - * Protect the list of counters. Locking either mutex or lock - * is sufficient to ensure the list doesn't change; to change - * the list you need to lock both the mutex and the spinlock. - */ - struct mutex mutex; - - struct list_head counter_list; - struct list_head event_list; - int nr_counters; - int nr_active; - int is_active; - atomic_t refcount; - struct task_struct *task; - - /* - * Context clock, runs when context enabled. - */ - u64 time; - u64 timestamp; - - /* - * These fields let us detect when two contexts have both - * been cloned (inherited) from a common ancestor. - */ - struct perf_counter_context *parent_ctx; - u64 parent_gen; - u64 generation; - int pin_count; - struct rcu_head rcu_head; -}; - -/** - * struct perf_counter_cpu_context - per cpu counter context structure - */ -struct perf_cpu_context { - struct perf_counter_context ctx; - struct perf_counter_context *task_ctx; - int active_oncpu; - int max_pertask; - int exclusive; - - /* - * Recursion avoidance: - * - * task, softirq, irq, nmi context - */ - int recursion[4]; -}; - -#ifdef CONFIG_PERF_COUNTERS - -/* - * Set by architecture code: - */ -extern int perf_max_counters; - -extern const struct pmu *hw_perf_counter_init(struct perf_counter *counter); - -extern void perf_counter_task_sched_in(struct task_struct *task, int cpu); -extern void perf_counter_task_sched_out(struct task_struct *task, - struct task_struct *next, int cpu); -extern void perf_counter_task_tick(struct task_struct *task, int cpu); -extern int perf_counter_init_task(struct task_struct *child); -extern void perf_counter_exit_task(struct task_struct *child); -extern void perf_counter_free_task(struct task_struct *task); -extern void perf_counter_do_pending(void); -extern void perf_counter_print_debug(void); -extern void __perf_disable(void); -extern bool __perf_enable(void); -extern void perf_disable(void); -extern void perf_enable(void); -extern int perf_counter_task_disable(void); -extern int perf_counter_task_enable(void); -extern int hw_perf_group_sched_in(struct perf_counter *group_leader, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx, int cpu); -extern void perf_counter_update_userpage(struct perf_counter *counter); - -struct perf_sample_data { - struct pt_regs *regs; - u64 addr; - u64 period; -}; - -extern int perf_counter_overflow(struct perf_counter *counter, int nmi, - struct perf_sample_data *data); - -/* - * Return 1 for a software counter, 0 for a hardware counter - */ -static inline int is_software_counter(struct perf_counter *counter) -{ - return (counter->attr.type != PERF_TYPE_RAW) && - (counter->attr.type != PERF_TYPE_HARDWARE); -} - -extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64); - -extern void __perf_counter_mmap(struct vm_area_struct *vma); - -static inline void perf_counter_mmap(struct vm_area_struct *vma) -{ - if (vma->vm_flags & VM_EXEC) - __perf_counter_mmap(vma); -} - -extern void perf_counter_comm(struct task_struct *tsk); -extern void perf_counter_fork(struct task_struct *tsk); - -extern void perf_counter_task_migration(struct task_struct *task, int cpu); - -#define MAX_STACK_DEPTH 255 - -struct perf_callchain_entry { - u16 nr; - u16 hv; - u16 kernel; - u16 user; - u64 ip[MAX_STACK_DEPTH]; -}; - -extern struct perf_callchain_entry *perf_callchain(struct pt_regs *regs); - -extern int sysctl_perf_counter_paranoid; -extern int sysctl_perf_counter_mlock; -extern int sysctl_perf_counter_sample_rate; - -extern void perf_counter_init(void); - -#ifndef perf_misc_flags -#define perf_misc_flags(regs) (user_mode(regs) ? PERF_EVENT_MISC_USER : \ - PERF_EVENT_MISC_KERNEL) -#define perf_instruction_pointer(regs) instruction_pointer(regs) -#endif - -#else -static inline void -perf_counter_task_sched_in(struct task_struct *task, int cpu) { } -static inline void -perf_counter_task_sched_out(struct task_struct *task, - struct task_struct *next, int cpu) { } -static inline void -perf_counter_task_tick(struct task_struct *task, int cpu) { } -static inline int perf_counter_init_task(struct task_struct *child) { return 0; } -static inline void perf_counter_exit_task(struct task_struct *child) { } -static inline void perf_counter_free_task(struct task_struct *task) { } -static inline void perf_counter_do_pending(void) { } -static inline void perf_counter_print_debug(void) { } -static inline void perf_disable(void) { } -static inline void perf_enable(void) { } -static inline int perf_counter_task_disable(void) { return -EINVAL; } -static inline int perf_counter_task_enable(void) { return -EINVAL; } - -static inline void -perf_swcounter_event(u32 event, u64 nr, int nmi, - struct pt_regs *regs, u64 addr) { } - -static inline void perf_counter_mmap(struct vm_area_struct *vma) { } -static inline void perf_counter_comm(struct task_struct *tsk) { } -static inline void perf_counter_fork(struct task_struct *tsk) { } -static inline void perf_counter_init(void) { } -static inline void perf_counter_task_migration(struct task_struct *task, - int cpu) { } -#endif - -#endif /* __KERNEL__ */ -#endif /* _LINUX_PERF_COUNTER_H */ diff --git a/trunk/include/linux/prctl.h b/trunk/include/linux/prctl.h index b00df4c79c63..48d887e3c6e7 100644 --- a/trunk/include/linux/prctl.h +++ b/trunk/include/linux/prctl.h @@ -85,7 +85,4 @@ #define PR_SET_TIMERSLACK 29 #define PR_GET_TIMERSLACK 30 -#define PR_TASK_PERF_COUNTERS_DISABLE 31 -#define PR_TASK_PERF_COUNTERS_ENABLE 32 - #endif /* _LINUX_PRCTL_H */ diff --git a/trunk/include/linux/sched.h b/trunk/include/linux/sched.h index 4896fdfec913..42bf2766111e 100644 --- a/trunk/include/linux/sched.h +++ b/trunk/include/linux/sched.h @@ -99,7 +99,6 @@ struct robust_list_head; struct bio; struct fs_struct; struct bts_context; -struct perf_counter_context; /* * List of flags we want to share for kernel threads, @@ -140,7 +139,6 @@ extern unsigned long nr_running(void); extern unsigned long nr_uninterruptible(void); extern unsigned long nr_iowait(void); extern void calc_global_load(void); -extern u64 cpu_nr_migrations(int cpu); extern unsigned long get_parent_ip(unsigned long addr); @@ -676,10 +674,6 @@ struct user_struct { struct work_struct work; #endif #endif - -#ifdef CONFIG_PERF_COUNTERS - atomic_long_t locked_vm; -#endif }; extern int uids_sysfs_init(void); @@ -1079,10 +1073,9 @@ struct sched_entity { u64 last_wakeup; u64 avg_overlap; - u64 nr_migrations; - u64 start_runtime; u64 avg_wakeup; + u64 nr_migrations; #ifdef CONFIG_SCHEDSTATS u64 wait_start; @@ -1403,11 +1396,6 @@ struct task_struct { struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; #endif -#ifdef CONFIG_PERF_COUNTERS - struct perf_counter_context *perf_counter_ctxp; - struct mutex perf_counter_mutex; - struct list_head perf_counter_list; -#endif #ifdef CONFIG_NUMA struct mempolicy *mempolicy; short il_next; @@ -2422,13 +2410,6 @@ static inline void inc_syscw(struct task_struct *tsk) #define TASK_SIZE_OF(tsk) TASK_SIZE #endif -/* - * Call the function if the target task is executing on a CPU right now: - */ -extern void task_oncpu_function_call(struct task_struct *p, - void (*func) (void *info), void *info); - - #ifdef CONFIG_MM_OWNER extern void mm_update_next_owner(struct mm_struct *mm); extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p); diff --git a/trunk/include/linux/slab.h b/trunk/include/linux/slab.h index 48803064cedf..24c5602bee99 100644 --- a/trunk/include/linux/slab.h +++ b/trunk/include/linux/slab.h @@ -62,8 +62,6 @@ # define SLAB_DEBUG_OBJECTS 0x00000000UL #endif -#define SLAB_NOLEAKTRACE 0x00800000UL /* Avoid kmemleak tracing */ - /* The following flags affect the page allocator grouping pages by mobility */ #define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ diff --git a/trunk/include/linux/syscalls.h b/trunk/include/linux/syscalls.h index c6c84ad8bd71..30520844b8da 100644 --- a/trunk/include/linux/syscalls.h +++ b/trunk/include/linux/syscalls.h @@ -55,7 +55,6 @@ struct compat_timeval; struct robust_list_head; struct getcpu_cache; struct old_linux_dirent; -struct perf_counter_attr; #include #include @@ -756,8 +755,4 @@ asmlinkage long sys_pipe(int __user *); int kernel_execve(const char *filename, char *const argv[], char *const envp[]); - -asmlinkage long sys_perf_counter_open( - const struct perf_counter_attr __user *attr_uptr, - pid_t pid, int cpu, int group_fd, unsigned long flags); #endif diff --git a/trunk/init/Kconfig b/trunk/init/Kconfig index 9b68fee8d79e..d4e9671347ee 100644 --- a/trunk/init/Kconfig +++ b/trunk/init/Kconfig @@ -933,40 +933,6 @@ config AIO by some high performance threaded applications. Disabling this option saves about 7k. -config HAVE_PERF_COUNTERS - bool - -menu "Performance Counters" - -config PERF_COUNTERS - bool "Kernel Performance Counters" - depends on HAVE_PERF_COUNTERS - select ANON_INODES - help - Enable kernel support for performance counter hardware. - - Performance counters are special hardware registers available - on most modern CPUs. These registers count the number of certain - types of hw events: such as instructions executed, cachemisses - suffered, or branches mis-predicted - without slowing down the - kernel or applications. These registers can also trigger interrupts - when a threshold number of events have passed - and can thus be - used to profile the code that runs on that CPU. - - The Linux Performance Counter subsystem provides an abstraction of - these hardware capabilities, available via a system call. It - provides per task and per CPU counters, and it provides event - capabilities on top of those. - - Say Y if unsure. - -config EVENT_PROFILE - bool "Tracepoint profile sources" - depends on PERF_COUNTERS && EVENT_TRACER - default y - -endmenu - config VM_EVENT_COUNTERS default y bool "Enable VM event counters for /proc/vmstat" if EMBEDDED diff --git a/trunk/init/main.c b/trunk/init/main.c index 5616661eac01..bb7dc57eee36 100644 --- a/trunk/init/main.c +++ b/trunk/init/main.c @@ -56,7 +56,6 @@ #include #include #include -#include #include #include #include @@ -534,16 +533,6 @@ void __init __weak thread_info_cache_init(void) { } -/* - * Set up kernel memory allocators - */ -static void __init mm_init(void) -{ - mem_init(); - kmem_cache_init(); - vmalloc_init(); -} - asmlinkage void __init start_kernel(void) { char * command_line; @@ -585,23 +574,6 @@ asmlinkage void __init start_kernel(void) setup_nr_cpu_ids(); smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ - build_all_zonelists(); - page_alloc_init(); - - printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line); - parse_early_param(); - parse_args("Booting kernel", static_command_line, __start___param, - __stop___param - __start___param, - &unknown_bootoption); - /* - * These use large bootmem allocations and must precede - * kmem_cache_init() - */ - pidhash_init(); - vfs_caches_init_early(); - sort_main_extable(); - trap_init(); - mm_init(); /* * Set up the scheduler prior starting any interrupts (such as the * timer interrupt). Full topology setup happens at smp_init() @@ -613,16 +585,25 @@ asmlinkage void __init start_kernel(void) * fragile until we cpu_idle() for the first time. */ preempt_disable(); + build_all_zonelists(); + page_alloc_init(); + printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line); + parse_early_param(); + parse_args("Booting kernel", static_command_line, __start___param, + __stop___param - __start___param, + &unknown_bootoption); if (!irqs_disabled()) { printk(KERN_WARNING "start_kernel(): bug: interrupts were " "enabled *very* early, fixing it\n"); local_irq_disable(); } + sort_main_extable(); + trap_init(); rcu_init(); /* init some links before init_ISA_irqs() */ early_irq_init(); init_IRQ(); - prio_tree_init(); + pidhash_init(); init_timers(); hrtimers_init(); softirq_init(); @@ -664,12 +645,15 @@ asmlinkage void __init start_kernel(void) initrd_start = 0; } #endif + vmalloc_init(); + vfs_caches_init_early(); cpuset_init_early(); page_cgroup_init(); + mem_init(); enable_debug_pagealloc(); cpu_hotplug_init(); + kmem_cache_init(); kmemtrace_init(); - kmemleak_init(); debug_objects_mem_init(); idr_init_cache(); setup_per_cpu_pageset(); @@ -679,6 +663,7 @@ asmlinkage void __init start_kernel(void) calibrate_delay(); pidmap_init(); pgtable_cache_init(); + prio_tree_init(); anon_vma_init(); #ifdef CONFIG_X86 if (efi_enabled) diff --git a/trunk/kernel/Makefile b/trunk/kernel/Makefile index 90b53f6dc226..a35eee3436de 100644 --- a/trunk/kernel/Makefile +++ b/trunk/kernel/Makefile @@ -96,7 +96,6 @@ obj-$(CONFIG_TRACING) += trace/ obj-$(CONFIG_X86_DS) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o obj-$(CONFIG_SLOW_WORK) += slow-work.o -obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff --git a/trunk/kernel/cpuset.c b/trunk/kernel/cpuset.c index d5a7e17474ee..026faccca869 100644 --- a/trunk/kernel/cpuset.c +++ b/trunk/kernel/cpuset.c @@ -1857,7 +1857,7 @@ struct cgroup_subsys cpuset_subsys = { int __init cpuset_init_early(void) { - alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_NOWAIT); + alloc_bootmem_cpumask_var(&top_cpuset.cpus_allowed); top_cpuset.mems_generation = cpuset_mems_generation++; return 0; diff --git a/trunk/kernel/exit.c b/trunk/kernel/exit.c index b6c90b5ef509..51d1fe3fb7ad 100644 --- a/trunk/kernel/exit.c +++ b/trunk/kernel/exit.c @@ -48,7 +48,6 @@ #include #include #include -#include #include #include @@ -155,9 +154,6 @@ static void delayed_put_task_struct(struct rcu_head *rhp) { struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); -#ifdef CONFIG_PERF_COUNTERS - WARN_ON_ONCE(tsk->perf_counter_ctxp); -#endif trace_sched_process_free(tsk); put_task_struct(tsk); } @@ -174,7 +170,6 @@ void release_task(struct task_struct * p) atomic_dec(&__task_cred(p)->user->processes); proc_flush_task(p); - write_lock_irq(&tasklist_lock); tracehook_finish_release_task(p); __exit_signal(p); @@ -976,19 +971,16 @@ NORET_TYPE void do_exit(long code) module_put(tsk->binfmt->module); proc_exit_connector(tsk); - - /* - * Flush inherited counters to the parent - before the parent - * gets woken up by child-exit notifications. - */ - perf_counter_exit_task(tsk); - exit_notify(tsk, group_dead); #ifdef CONFIG_NUMA mpol_put(tsk->mempolicy); tsk->mempolicy = NULL; #endif #ifdef CONFIG_FUTEX + /* + * This must happen late, after the PID is not + * hashed anymore: + */ if (unlikely(!list_empty(&tsk->pi_state_list))) exit_pi_state_list(tsk); if (unlikely(current->pi_state_cache)) diff --git a/trunk/kernel/fork.c b/trunk/kernel/fork.c index 4430eb1376f2..bb762b4dd217 100644 --- a/trunk/kernel/fork.c +++ b/trunk/kernel/fork.c @@ -62,7 +62,6 @@ #include #include #include -#include #include #include @@ -1097,10 +1096,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); - retval = perf_counter_init_task(p); - if (retval) - goto bad_fork_cleanup_policy; - if ((retval = audit_alloc(p))) goto bad_fork_cleanup_policy; /* copy all the process information */ @@ -1295,7 +1290,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, bad_fork_cleanup_audit: audit_free(p); bad_fork_cleanup_policy: - perf_counter_free_task(p); #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_cgroup: @@ -1409,12 +1403,6 @@ long do_fork(unsigned long clone_flags, if (clone_flags & CLONE_VFORK) { p->vfork_done = &vfork; init_completion(&vfork); - } else if (!(clone_flags & CLONE_VM)) { - /* - * vfork will do an exec which will call - * set_task_comm() - */ - perf_counter_fork(p); } audit_finish_fork(p); diff --git a/trunk/kernel/irq/handle.c b/trunk/kernel/irq/handle.c index 104578541230..a60018402f42 100644 --- a/trunk/kernel/irq/handle.c +++ b/trunk/kernel/irq/handle.c @@ -150,7 +150,6 @@ int __init early_irq_init(void) { struct irq_desc *desc; int legacy_count; - int node; int i; init_irq_default_affinity(); @@ -161,20 +160,20 @@ int __init early_irq_init(void) desc = irq_desc_legacy; legacy_count = ARRAY_SIZE(irq_desc_legacy); - node = first_online_node; /* allocate irq_desc_ptrs array based on nr_irqs */ - irq_desc_ptrs = kcalloc(nr_irqs, sizeof(void *), GFP_NOWAIT); + irq_desc_ptrs = alloc_bootmem(nr_irqs * sizeof(void *)); /* allocate based on nr_cpu_ids */ - kstat_irqs_legacy = kzalloc_node(NR_IRQS_LEGACY * nr_cpu_ids * - sizeof(int), GFP_NOWAIT, node); + /* FIXME: invert kstat_irgs, and it'd be a per_cpu_alloc'd thing */ + kstat_irqs_legacy = alloc_bootmem(NR_IRQS_LEGACY * nr_cpu_ids * + sizeof(int)); for (i = 0; i < legacy_count; i++) { desc[i].irq = i; desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids; lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); - alloc_desc_masks(&desc[i], node, true); + alloc_desc_masks(&desc[i], 0, true); init_desc_masks(&desc[i]); irq_desc_ptrs[i] = desc + i; } diff --git a/trunk/kernel/module.c b/trunk/kernel/module.c index 35f7de00bf0d..278e9b6762bb 100644 --- a/trunk/kernel/module.c +++ b/trunk/kernel/module.c @@ -53,7 +53,6 @@ #include #include #include -#include #if 0 #define DEBUGP printk @@ -434,7 +433,6 @@ static void *percpu_modalloc(unsigned long size, unsigned long align, unsigned long extra; unsigned int i; void *ptr; - int cpu; if (align > PAGE_SIZE) { printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n", @@ -464,11 +462,6 @@ static void *percpu_modalloc(unsigned long size, unsigned long align, if (!split_block(i, size)) return NULL; - /* add the per-cpu scanning areas */ - for_each_possible_cpu(cpu) - kmemleak_alloc(ptr + per_cpu_offset(cpu), size, 0, - GFP_KERNEL); - /* Mark allocated */ pcpu_size[i] = -pcpu_size[i]; return ptr; @@ -483,7 +476,6 @@ static void percpu_modfree(void *freeme) { unsigned int i; void *ptr = __per_cpu_start + block_size(pcpu_size[0]); - int cpu; /* First entry is core kernel percpu data. */ for (i = 1; i < pcpu_num_used; ptr += block_size(pcpu_size[i]), i++) { @@ -495,10 +487,6 @@ static void percpu_modfree(void *freeme) BUG(); free: - /* remove the per-cpu scanning areas */ - for_each_possible_cpu(cpu) - kmemleak_free(freeme + per_cpu_offset(cpu)); - /* Merge with previous? */ if (pcpu_size[i-1] >= 0) { pcpu_size[i-1] += pcpu_size[i]; @@ -1891,36 +1879,6 @@ static void *module_alloc_update_bounds(unsigned long size) return ret; } -#ifdef CONFIG_DEBUG_KMEMLEAK -static void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, char *secstrings) -{ - unsigned int i; - - /* only scan the sections containing data */ - kmemleak_scan_area(mod->module_core, (unsigned long)mod - - (unsigned long)mod->module_core, - sizeof(struct module), GFP_KERNEL); - - for (i = 1; i < hdr->e_shnum; i++) { - if (!(sechdrs[i].sh_flags & SHF_ALLOC)) - continue; - if (strncmp(secstrings + sechdrs[i].sh_name, ".data", 5) != 0 - && strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) != 0) - continue; - - kmemleak_scan_area(mod->module_core, sechdrs[i].sh_addr - - (unsigned long)mod->module_core, - sechdrs[i].sh_size, GFP_KERNEL); - } -} -#else -static inline void kmemleak_load_module(struct module *mod, Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, char *secstrings) -{ -} -#endif - /* Allocate and load the module: note that size of section 0 is always zero, and we rely on this for optional sections. */ static noinline struct module *load_module(void __user *umod, @@ -2091,12 +2049,6 @@ static noinline struct module *load_module(void __user *umod, /* Do the allocs. */ ptr = module_alloc_update_bounds(mod->core_size); - /* - * The pointer to this block is stored in the module structure - * which is inside the block. Just mark it as not being a - * leak. - */ - kmemleak_not_leak(ptr); if (!ptr) { err = -ENOMEM; goto free_percpu; @@ -2105,13 +2057,6 @@ static noinline struct module *load_module(void __user *umod, mod->module_core = ptr; ptr = module_alloc_update_bounds(mod->init_size); - /* - * The pointer to this block is stored in the module structure - * which is inside the block. This block doesn't need to be - * scanned as it contains data and code that will be freed - * after the module is initialized. - */ - kmemleak_ignore(ptr); if (!ptr && mod->init_size) { err = -ENOMEM; goto free_core; @@ -2142,7 +2087,6 @@ static noinline struct module *load_module(void __user *umod, } /* Module has been moved. */ mod = (void *)sechdrs[modindex].sh_addr; - kmemleak_load_module(mod, hdr, sechdrs, secstrings); #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP) mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t), diff --git a/trunk/kernel/mutex.c b/trunk/kernel/mutex.c index 947b3ad551f8..e5cc0cd28d54 100644 --- a/trunk/kernel/mutex.c +++ b/trunk/kernel/mutex.c @@ -89,7 +89,7 @@ __mutex_lock_slowpath(atomic_t *lock_count); * * This function is similar to (but not equivalent to) down(). */ -void __sched mutex_lock(struct mutex *lock) +void inline __sched mutex_lock(struct mutex *lock) { might_sleep(); /* diff --git a/trunk/kernel/perf_counter.c b/trunk/kernel/perf_counter.c deleted file mode 100644 index ef5d8a5b2453..000000000000 --- a/trunk/kernel/perf_counter.c +++ /dev/null @@ -1,4260 +0,0 @@ -/* - * Performance counter core code - * - * Copyright (C) 2008 Thomas Gleixner - * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar - * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra - * Copyright © 2009 Paul Mackerras, IBM Corp. - * - * For licensing details see kernel-base/COPYING - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -/* - * Each CPU has a list of per CPU counters: - */ -DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); - -int perf_max_counters __read_mostly = 1; -static int perf_reserved_percpu __read_mostly; -static int perf_overcommit __read_mostly = 1; - -static atomic_t nr_counters __read_mostly; -static atomic_t nr_mmap_counters __read_mostly; -static atomic_t nr_comm_counters __read_mostly; - -/* - * perf counter paranoia level: - * 0 - not paranoid - * 1 - disallow cpu counters to unpriv - * 2 - disallow kernel profiling to unpriv - */ -int sysctl_perf_counter_paranoid __read_mostly; - -static inline bool perf_paranoid_cpu(void) -{ - return sysctl_perf_counter_paranoid > 0; -} - -static inline bool perf_paranoid_kernel(void) -{ - return sysctl_perf_counter_paranoid > 1; -} - -int sysctl_perf_counter_mlock __read_mostly = 512; /* 'free' kb per user */ - -/* - * max perf counter sample rate - */ -int sysctl_perf_counter_sample_rate __read_mostly = 100000; - -static atomic64_t perf_counter_id; - -/* - * Lock for (sysadmin-configurable) counter reservations: - */ -static DEFINE_SPINLOCK(perf_resource_lock); - -/* - * Architecture provided APIs - weak aliases: - */ -extern __weak const struct pmu *hw_perf_counter_init(struct perf_counter *counter) -{ - return NULL; -} - -void __weak hw_perf_disable(void) { barrier(); } -void __weak hw_perf_enable(void) { barrier(); } - -void __weak hw_perf_counter_setup(int cpu) { barrier(); } - -int __weak -hw_perf_group_sched_in(struct perf_counter *group_leader, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx, int cpu) -{ - return 0; -} - -void __weak perf_counter_print_debug(void) { } - -static DEFINE_PER_CPU(int, disable_count); - -void __perf_disable(void) -{ - __get_cpu_var(disable_count)++; -} - -bool __perf_enable(void) -{ - return !--__get_cpu_var(disable_count); -} - -void perf_disable(void) -{ - __perf_disable(); - hw_perf_disable(); -} - -void perf_enable(void) -{ - if (__perf_enable()) - hw_perf_enable(); -} - -static void get_ctx(struct perf_counter_context *ctx) -{ - atomic_inc(&ctx->refcount); -} - -static void free_ctx(struct rcu_head *head) -{ - struct perf_counter_context *ctx; - - ctx = container_of(head, struct perf_counter_context, rcu_head); - kfree(ctx); -} - -static void put_ctx(struct perf_counter_context *ctx) -{ - if (atomic_dec_and_test(&ctx->refcount)) { - if (ctx->parent_ctx) - put_ctx(ctx->parent_ctx); - if (ctx->task) - put_task_struct(ctx->task); - call_rcu(&ctx->rcu_head, free_ctx); - } -} - -/* - * Get the perf_counter_context for a task and lock it. - * This has to cope with with the fact that until it is locked, - * the context could get moved to another task. - */ -static struct perf_counter_context * -perf_lock_task_context(struct task_struct *task, unsigned long *flags) -{ - struct perf_counter_context *ctx; - - rcu_read_lock(); - retry: - ctx = rcu_dereference(task->perf_counter_ctxp); - if (ctx) { - /* - * If this context is a clone of another, it might - * get swapped for another underneath us by - * perf_counter_task_sched_out, though the - * rcu_read_lock() protects us from any context - * getting freed. Lock the context and check if it - * got swapped before we could get the lock, and retry - * if so. If we locked the right context, then it - * can't get swapped on us any more. - */ - spin_lock_irqsave(&ctx->lock, *flags); - if (ctx != rcu_dereference(task->perf_counter_ctxp)) { - spin_unlock_irqrestore(&ctx->lock, *flags); - goto retry; - } - } - rcu_read_unlock(); - return ctx; -} - -/* - * Get the context for a task and increment its pin_count so it - * can't get swapped to another task. This also increments its - * reference count so that the context can't get freed. - */ -static struct perf_counter_context *perf_pin_task_context(struct task_struct *task) -{ - struct perf_counter_context *ctx; - unsigned long flags; - - ctx = perf_lock_task_context(task, &flags); - if (ctx) { - ++ctx->pin_count; - get_ctx(ctx); - spin_unlock_irqrestore(&ctx->lock, flags); - } - return ctx; -} - -static void perf_unpin_context(struct perf_counter_context *ctx) -{ - unsigned long flags; - - spin_lock_irqsave(&ctx->lock, flags); - --ctx->pin_count; - spin_unlock_irqrestore(&ctx->lock, flags); - put_ctx(ctx); -} - -/* - * Add a counter from the lists for its context. - * Must be called with ctx->mutex and ctx->lock held. - */ -static void -list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx) -{ - struct perf_counter *group_leader = counter->group_leader; - - /* - * Depending on whether it is a standalone or sibling counter, - * add it straight to the context's counter list, or to the group - * leader's sibling list: - */ - if (group_leader == counter) - list_add_tail(&counter->list_entry, &ctx->counter_list); - else { - list_add_tail(&counter->list_entry, &group_leader->sibling_list); - group_leader->nr_siblings++; - } - - list_add_rcu(&counter->event_entry, &ctx->event_list); - ctx->nr_counters++; -} - -/* - * Remove a counter from the lists for its context. - * Must be called with ctx->mutex and ctx->lock held. - */ -static void -list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx) -{ - struct perf_counter *sibling, *tmp; - - if (list_empty(&counter->list_entry)) - return; - ctx->nr_counters--; - - list_del_init(&counter->list_entry); - list_del_rcu(&counter->event_entry); - - if (counter->group_leader != counter) - counter->group_leader->nr_siblings--; - - /* - * If this was a group counter with sibling counters then - * upgrade the siblings to singleton counters by adding them - * to the context list directly: - */ - list_for_each_entry_safe(sibling, tmp, - &counter->sibling_list, list_entry) { - - list_move_tail(&sibling->list_entry, &ctx->counter_list); - sibling->group_leader = sibling; - } -} - -static void -counter_sched_out(struct perf_counter *counter, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx) -{ - if (counter->state != PERF_COUNTER_STATE_ACTIVE) - return; - - counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->tstamp_stopped = ctx->time; - counter->pmu->disable(counter); - counter->oncpu = -1; - - if (!is_software_counter(counter)) - cpuctx->active_oncpu--; - ctx->nr_active--; - if (counter->attr.exclusive || !cpuctx->active_oncpu) - cpuctx->exclusive = 0; -} - -static void -group_sched_out(struct perf_counter *group_counter, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx) -{ - struct perf_counter *counter; - - if (group_counter->state != PERF_COUNTER_STATE_ACTIVE) - return; - - counter_sched_out(group_counter, cpuctx, ctx); - - /* - * Schedule out siblings (if any): - */ - list_for_each_entry(counter, &group_counter->sibling_list, list_entry) - counter_sched_out(counter, cpuctx, ctx); - - if (group_counter->attr.exclusive) - cpuctx->exclusive = 0; -} - -/* - * Cross CPU call to remove a performance counter - * - * We disable the counter on the hardware level first. After that we - * remove it from the context list. - */ -static void __perf_counter_remove_from_context(void *info) -{ - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_counter *counter = info; - struct perf_counter_context *ctx = counter->ctx; - - /* - * If this is a task context, we need to check whether it is - * the current task context of this cpu. If not it has been - * scheduled out before the smp call arrived. - */ - if (ctx->task && cpuctx->task_ctx != ctx) - return; - - spin_lock(&ctx->lock); - /* - * Protect the list operation against NMI by disabling the - * counters on a global level. - */ - perf_disable(); - - counter_sched_out(counter, cpuctx, ctx); - - list_del_counter(counter, ctx); - - if (!ctx->task) { - /* - * Allow more per task counters with respect to the - * reservation: - */ - cpuctx->max_pertask = - min(perf_max_counters - ctx->nr_counters, - perf_max_counters - perf_reserved_percpu); - } - - perf_enable(); - spin_unlock(&ctx->lock); -} - - -/* - * Remove the counter from a task's (or a CPU's) list of counters. - * - * Must be called with ctx->mutex held. - * - * CPU counters are removed with a smp call. For task counters we only - * call when the task is on a CPU. - * - * If counter->ctx is a cloned context, callers must make sure that - * every task struct that counter->ctx->task could possibly point to - * remains valid. This is OK when called from perf_release since - * that only calls us on the top-level context, which can't be a clone. - * When called from perf_counter_exit_task, it's OK because the - * context has been detached from its task. - */ -static void perf_counter_remove_from_context(struct perf_counter *counter) -{ - struct perf_counter_context *ctx = counter->ctx; - struct task_struct *task = ctx->task; - - if (!task) { - /* - * Per cpu counters are removed via an smp call and - * the removal is always sucessful. - */ - smp_call_function_single(counter->cpu, - __perf_counter_remove_from_context, - counter, 1); - return; - } - -retry: - task_oncpu_function_call(task, __perf_counter_remove_from_context, - counter); - - spin_lock_irq(&ctx->lock); - /* - * If the context is active we need to retry the smp call. - */ - if (ctx->nr_active && !list_empty(&counter->list_entry)) { - spin_unlock_irq(&ctx->lock); - goto retry; - } - - /* - * The lock prevents that this context is scheduled in so we - * can remove the counter safely, if the call above did not - * succeed. - */ - if (!list_empty(&counter->list_entry)) { - list_del_counter(counter, ctx); - } - spin_unlock_irq(&ctx->lock); -} - -static inline u64 perf_clock(void) -{ - return cpu_clock(smp_processor_id()); -} - -/* - * Update the record of the current time in a context. - */ -static void update_context_time(struct perf_counter_context *ctx) -{ - u64 now = perf_clock(); - - ctx->time += now - ctx->timestamp; - ctx->timestamp = now; -} - -/* - * Update the total_time_enabled and total_time_running fields for a counter. - */ -static void update_counter_times(struct perf_counter *counter) -{ - struct perf_counter_context *ctx = counter->ctx; - u64 run_end; - - if (counter->state < PERF_COUNTER_STATE_INACTIVE) - return; - - counter->total_time_enabled = ctx->time - counter->tstamp_enabled; - - if (counter->state == PERF_COUNTER_STATE_INACTIVE) - run_end = counter->tstamp_stopped; - else - run_end = ctx->time; - - counter->total_time_running = run_end - counter->tstamp_running; -} - -/* - * Update total_time_enabled and total_time_running for all counters in a group. - */ -static void update_group_times(struct perf_counter *leader) -{ - struct perf_counter *counter; - - update_counter_times(leader); - list_for_each_entry(counter, &leader->sibling_list, list_entry) - update_counter_times(counter); -} - -/* - * Cross CPU call to disable a performance counter - */ -static void __perf_counter_disable(void *info) -{ - struct perf_counter *counter = info; - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_counter_context *ctx = counter->ctx; - - /* - * If this is a per-task counter, need to check whether this - * counter's task is the current task on this cpu. - */ - if (ctx->task && cpuctx->task_ctx != ctx) - return; - - spin_lock(&ctx->lock); - - /* - * If the counter is on, turn it off. - * If it is in error state, leave it in error state. - */ - if (counter->state >= PERF_COUNTER_STATE_INACTIVE) { - update_context_time(ctx); - update_counter_times(counter); - if (counter == counter->group_leader) - group_sched_out(counter, cpuctx, ctx); - else - counter_sched_out(counter, cpuctx, ctx); - counter->state = PERF_COUNTER_STATE_OFF; - } - - spin_unlock(&ctx->lock); -} - -/* - * Disable a counter. - * - * If counter->ctx is a cloned context, callers must make sure that - * every task struct that counter->ctx->task could possibly point to - * remains valid. This condition is satisifed when called through - * perf_counter_for_each_child or perf_counter_for_each because they - * hold the top-level counter's child_mutex, so any descendant that - * goes to exit will block in sync_child_counter. - * When called from perf_pending_counter it's OK because counter->ctx - * is the current context on this CPU and preemption is disabled, - * hence we can't get into perf_counter_task_sched_out for this context. - */ -static void perf_counter_disable(struct perf_counter *counter) -{ - struct perf_counter_context *ctx = counter->ctx; - struct task_struct *task = ctx->task; - - if (!task) { - /* - * Disable the counter on the cpu that it's on - */ - smp_call_function_single(counter->cpu, __perf_counter_disable, - counter, 1); - return; - } - - retry: - task_oncpu_function_call(task, __perf_counter_disable, counter); - - spin_lock_irq(&ctx->lock); - /* - * If the counter is still active, we need to retry the cross-call. - */ - if (counter->state == PERF_COUNTER_STATE_ACTIVE) { - spin_unlock_irq(&ctx->lock); - goto retry; - } - - /* - * Since we have the lock this context can't be scheduled - * in, so we can change the state safely. - */ - if (counter->state == PERF_COUNTER_STATE_INACTIVE) { - update_counter_times(counter); - counter->state = PERF_COUNTER_STATE_OFF; - } - - spin_unlock_irq(&ctx->lock); -} - -static int -counter_sched_in(struct perf_counter *counter, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx, - int cpu) -{ - if (counter->state <= PERF_COUNTER_STATE_OFF) - return 0; - - counter->state = PERF_COUNTER_STATE_ACTIVE; - counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */ - /* - * The new state must be visible before we turn it on in the hardware: - */ - smp_wmb(); - - if (counter->pmu->enable(counter)) { - counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->oncpu = -1; - return -EAGAIN; - } - - counter->tstamp_running += ctx->time - counter->tstamp_stopped; - - if (!is_software_counter(counter)) - cpuctx->active_oncpu++; - ctx->nr_active++; - - if (counter->attr.exclusive) - cpuctx->exclusive = 1; - - return 0; -} - -static int -group_sched_in(struct perf_counter *group_counter, - struct perf_cpu_context *cpuctx, - struct perf_counter_context *ctx, - int cpu) -{ - struct perf_counter *counter, *partial_group; - int ret; - - if (group_counter->state == PERF_COUNTER_STATE_OFF) - return 0; - - ret = hw_perf_group_sched_in(group_counter, cpuctx, ctx, cpu); - if (ret) - return ret < 0 ? ret : 0; - - if (counter_sched_in(group_counter, cpuctx, ctx, cpu)) - return -EAGAIN; - - /* - * Schedule in siblings as one group (if any): - */ - list_for_each_entry(counter, &group_counter->sibling_list, list_entry) { - if (counter_sched_in(counter, cpuctx, ctx, cpu)) { - partial_group = counter; - goto group_error; - } - } - - return 0; - -group_error: - /* - * Groups can be scheduled in as one unit only, so undo any - * partial group before returning: - */ - list_for_each_entry(counter, &group_counter->sibling_list, list_entry) { - if (counter == partial_group) - break; - counter_sched_out(counter, cpuctx, ctx); - } - counter_sched_out(group_counter, cpuctx, ctx); - - return -EAGAIN; -} - -/* - * Return 1 for a group consisting entirely of software counters, - * 0 if the group contains any hardware counters. - */ -static int is_software_only_group(struct perf_counter *leader) -{ - struct perf_counter *counter; - - if (!is_software_counter(leader)) - return 0; - - list_for_each_entry(counter, &leader->sibling_list, list_entry) - if (!is_software_counter(counter)) - return 0; - - return 1; -} - -/* - * Work out whether we can put this counter group on the CPU now. - */ -static int group_can_go_on(struct perf_counter *counter, - struct perf_cpu_context *cpuctx, - int can_add_hw) -{ - /* - * Groups consisting entirely of software counters can always go on. - */ - if (is_software_only_group(counter)) - return 1; - /* - * If an exclusive group is already on, no other hardware - * counters can go on. - */ - if (cpuctx->exclusive) - return 0; - /* - * If this group is exclusive and there are already - * counters on the CPU, it can't go on. - */ - if (counter->attr.exclusive && cpuctx->active_oncpu) - return 0; - /* - * Otherwise, try to add it if all previous groups were able - * to go on. - */ - return can_add_hw; -} - -static void add_counter_to_ctx(struct perf_counter *counter, - struct perf_counter_context *ctx) -{ - list_add_counter(counter, ctx); - counter->tstamp_enabled = ctx->time; - counter->tstamp_running = ctx->time; - counter->tstamp_stopped = ctx->time; -} - -/* - * Cross CPU call to install and enable a performance counter - * - * Must be called with ctx->mutex held - */ -static void __perf_install_in_context(void *info) -{ - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_counter *counter = info; - struct perf_counter_context *ctx = counter->ctx; - struct perf_counter *leader = counter->group_leader; - int cpu = smp_processor_id(); - int err; - - /* - * If this is a task context, we need to check whether it is - * the current task context of this cpu. If not it has been - * scheduled out before the smp call arrived. - * Or possibly this is the right context but it isn't - * on this cpu because it had no counters. - */ - if (ctx->task && cpuctx->task_ctx != ctx) { - if (cpuctx->task_ctx || ctx->task != current) - return; - cpuctx->task_ctx = ctx; - } - - spin_lock(&ctx->lock); - ctx->is_active = 1; - update_context_time(ctx); - - /* - * Protect the list operation against NMI by disabling the - * counters on a global level. NOP for non NMI based counters. - */ - perf_disable(); - - add_counter_to_ctx(counter, ctx); - - /* - * Don't put the counter on if it is disabled or if - * it is in a group and the group isn't on. - */ - if (counter->state != PERF_COUNTER_STATE_INACTIVE || - (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE)) - goto unlock; - - /* - * An exclusive counter can't go on if there are already active - * hardware counters, and no hardware counter can go on if there - * is already an exclusive counter on. - */ - if (!group_can_go_on(counter, cpuctx, 1)) - err = -EEXIST; - else - err = counter_sched_in(counter, cpuctx, ctx, cpu); - - if (err) { - /* - * This counter couldn't go on. If it is in a group - * then we have to pull the whole group off. - * If the counter group is pinned then put it in error state. - */ - if (leader != counter) - group_sched_out(leader, cpuctx, ctx); - if (leader->attr.pinned) { - update_group_times(leader); - leader->state = PERF_COUNTER_STATE_ERROR; - } - } - - if (!err && !ctx->task && cpuctx->max_pertask) - cpuctx->max_pertask--; - - unlock: - perf_enable(); - - spin_unlock(&ctx->lock); -} - -/* - * Attach a performance counter to a context - * - * First we add the counter to the list with the hardware enable bit - * in counter->hw_config cleared. - * - * If the counter is attached to a task which is on a CPU we use a smp - * call to enable it in the task context. The task might have been - * scheduled away, but we check this in the smp call again. - * - * Must be called with ctx->mutex held. - */ -static void -perf_install_in_context(struct perf_counter_context *ctx, - struct perf_counter *counter, - int cpu) -{ - struct task_struct *task = ctx->task; - - if (!task) { - /* - * Per cpu counters are installed via an smp call and - * the install is always sucessful. - */ - smp_call_function_single(cpu, __perf_install_in_context, - counter, 1); - return; - } - -retry: - task_oncpu_function_call(task, __perf_install_in_context, - counter); - - spin_lock_irq(&ctx->lock); - /* - * we need to retry the smp call. - */ - if (ctx->is_active && list_empty(&counter->list_entry)) { - spin_unlock_irq(&ctx->lock); - goto retry; - } - - /* - * The lock prevents that this context is scheduled in so we - * can add the counter safely, if it the call above did not - * succeed. - */ - if (list_empty(&counter->list_entry)) - add_counter_to_ctx(counter, ctx); - spin_unlock_irq(&ctx->lock); -} - -/* - * Cross CPU call to enable a performance counter - */ -static void __perf_counter_enable(void *info) -{ - struct perf_counter *counter = info; - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_counter_context *ctx = counter->ctx; - struct perf_counter *leader = counter->group_leader; - int err; - - /* - * If this is a per-task counter, need to check whether this - * counter's task is the current task on this cpu. - */ - if (ctx->task && cpuctx->task_ctx != ctx) { - if (cpuctx->task_ctx || ctx->task != current) - return; - cpuctx->task_ctx = ctx; - } - - spin_lock(&ctx->lock); - ctx->is_active = 1; - update_context_time(ctx); - - if (counter->state >= PERF_COUNTER_STATE_INACTIVE) - goto unlock; - counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->tstamp_enabled = ctx->time - counter->total_time_enabled; - - /* - * If the counter is in a group and isn't the group leader, - * then don't put it on unless the group is on. - */ - if (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE) - goto unlock; - - if (!group_can_go_on(counter, cpuctx, 1)) { - err = -EEXIST; - } else { - perf_disable(); - if (counter == leader) - err = group_sched_in(counter, cpuctx, ctx, - smp_processor_id()); - else - err = counter_sched_in(counter, cpuctx, ctx, - smp_processor_id()); - perf_enable(); - } - - if (err) { - /* - * If this counter can't go on and it's part of a - * group, then the whole group has to come off. - */ - if (leader != counter) - group_sched_out(leader, cpuctx, ctx); - if (leader->attr.pinned) { - update_group_times(leader); - leader->state = PERF_COUNTER_STATE_ERROR; - } - } - - unlock: - spin_unlock(&ctx->lock); -} - -/* - * Enable a counter. - * - * If counter->ctx is a cloned context, callers must make sure that - * every task struct that counter->ctx->task could possibly point to - * remains valid. This condition is satisfied when called through - * perf_counter_for_each_child or perf_counter_for_each as described - * for perf_counter_disable. - */ -static void perf_counter_enable(struct perf_counter *counter) -{ - struct perf_counter_context *ctx = counter->ctx; - struct task_struct *task = ctx->task; - - if (!task) { - /* - * Enable the counter on the cpu that it's on - */ - smp_call_function_single(counter->cpu, __perf_counter_enable, - counter, 1); - return; - } - - spin_lock_irq(&ctx->lock); - if (counter->state >= PERF_COUNTER_STATE_INACTIVE) - goto out; - - /* - * If the counter is in error state, clear that first. - * That way, if we see the counter in error state below, we - * know that it has gone back into error state, as distinct - * from the task having been scheduled away before the - * cross-call arrived. - */ - if (counter->state == PERF_COUNTER_STATE_ERROR) - counter->state = PERF_COUNTER_STATE_OFF; - - retry: - spin_unlock_irq(&ctx->lock); - task_oncpu_function_call(task, __perf_counter_enable, counter); - - spin_lock_irq(&ctx->lock); - - /* - * If the context is active and the counter is still off, - * we need to retry the cross-call. - */ - if (ctx->is_active && counter->state == PERF_COUNTER_STATE_OFF) - goto retry; - - /* - * Since we have the lock this context can't be scheduled - * in, so we can change the state safely. - */ - if (counter->state == PERF_COUNTER_STATE_OFF) { - counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->tstamp_enabled = - ctx->time - counter->total_time_enabled; - } - out: - spin_unlock_irq(&ctx->lock); -} - -static int perf_counter_refresh(struct perf_counter *counter, int refresh) -{ - /* - * not supported on inherited counters - */ - if (counter->attr.inherit) - return -EINVAL; - - atomic_add(refresh, &counter->event_limit); - perf_counter_enable(counter); - - return 0; -} - -void __perf_counter_sched_out(struct perf_counter_context *ctx, - struct perf_cpu_context *cpuctx) -{ - struct perf_counter *counter; - - spin_lock(&ctx->lock); - ctx->is_active = 0; - if (likely(!ctx->nr_counters)) - goto out; - update_context_time(ctx); - - perf_disable(); - if (ctx->nr_active) { - list_for_each_entry(counter, &ctx->counter_list, list_entry) { - if (counter != counter->group_leader) - counter_sched_out(counter, cpuctx, ctx); - else - group_sched_out(counter, cpuctx, ctx); - } - } - perf_enable(); - out: - spin_unlock(&ctx->lock); -} - -/* - * Test whether two contexts are equivalent, i.e. whether they - * have both been cloned from the same version of the same context - * and they both have the same number of enabled counters. - * If the number of enabled counters is the same, then the set - * of enabled counters should be the same, because these are both - * inherited contexts, therefore we can't access individual counters - * in them directly with an fd; we can only enable/disable all - * counters via prctl, or enable/disable all counters in a family - * via ioctl, which will have the same effect on both contexts. - */ -static int context_equiv(struct perf_counter_context *ctx1, - struct perf_counter_context *ctx2) -{ - return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx - && ctx1->parent_gen == ctx2->parent_gen - && !ctx1->pin_count && !ctx2->pin_count; -} - -/* - * Called from scheduler to remove the counters of the current task, - * with interrupts disabled. - * - * We stop each counter and update the counter value in counter->count. - * - * This does not protect us against NMI, but disable() - * sets the disabled bit in the control field of counter _before_ - * accessing the counter control register. If a NMI hits, then it will - * not restart the counter. - */ -void perf_counter_task_sched_out(struct task_struct *task, - struct task_struct *next, int cpu) -{ - struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); - struct perf_counter_context *ctx = task->perf_counter_ctxp; - struct perf_counter_context *next_ctx; - struct perf_counter_context *parent; - struct pt_regs *regs; - int do_switch = 1; - - regs = task_pt_regs(task); - perf_swcounter_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, regs, 0); - - if (likely(!ctx || !cpuctx->task_ctx)) - return; - - update_context_time(ctx); - - rcu_read_lock(); - parent = rcu_dereference(ctx->parent_ctx); - next_ctx = next->perf_counter_ctxp; - if (parent && next_ctx && - rcu_dereference(next_ctx->parent_ctx) == parent) { - /* - * Looks like the two contexts are clones, so we might be - * able to optimize the context switch. We lock both - * contexts and check that they are clones under the - * lock (including re-checking that neither has been - * uncloned in the meantime). It doesn't matter which - * order we take the locks because no other cpu could - * be trying to lock both of these tasks. - */ - spin_lock(&ctx->lock); - spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING); - if (context_equiv(ctx, next_ctx)) { - /* - * XXX do we need a memory barrier of sorts - * wrt to rcu_dereference() of perf_counter_ctxp - */ - task->perf_counter_ctxp = next_ctx; - next->perf_counter_ctxp = ctx; - ctx->task = next; - next_ctx->task = task; - do_switch = 0; - } - spin_unlock(&next_ctx->lock); - spin_unlock(&ctx->lock); - } - rcu_read_unlock(); - - if (do_switch) { - __perf_counter_sched_out(ctx, cpuctx); - cpuctx->task_ctx = NULL; - } -} - -/* - * Called with IRQs disabled - */ -static void __perf_counter_task_sched_out(struct perf_counter_context *ctx) -{ - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - - if (!cpuctx->task_ctx) - return; - - if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) - return; - - __perf_counter_sched_out(ctx, cpuctx); - cpuctx->task_ctx = NULL; -} - -/* - * Called with IRQs disabled - */ -static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx) -{ - __perf_counter_sched_out(&cpuctx->ctx, cpuctx); -} - -static void -__perf_counter_sched_in(struct perf_counter_context *ctx, - struct perf_cpu_context *cpuctx, int cpu) -{ - struct perf_counter *counter; - int can_add_hw = 1; - - spin_lock(&ctx->lock); - ctx->is_active = 1; - if (likely(!ctx->nr_counters)) - goto out; - - ctx->timestamp = perf_clock(); - - perf_disable(); - - /* - * First go through the list and put on any pinned groups - * in order to give them the best chance of going on. - */ - list_for_each_entry(counter, &ctx->counter_list, list_entry) { - if (counter->state <= PERF_COUNTER_STATE_OFF || - !counter->attr.pinned) - continue; - if (counter->cpu != -1 && counter->cpu != cpu) - continue; - - if (counter != counter->group_leader) - counter_sched_in(counter, cpuctx, ctx, cpu); - else { - if (group_can_go_on(counter, cpuctx, 1)) - group_sched_in(counter, cpuctx, ctx, cpu); - } - - /* - * If this pinned group hasn't been scheduled, - * put it in error state. - */ - if (counter->state == PERF_COUNTER_STATE_INACTIVE) { - update_group_times(counter); - counter->state = PERF_COUNTER_STATE_ERROR; - } - } - - list_for_each_entry(counter, &ctx->counter_list, list_entry) { - /* - * Ignore counters in OFF or ERROR state, and - * ignore pinned counters since we did them already. - */ - if (counter->state <= PERF_COUNTER_STATE_OFF || - counter->attr.pinned) - continue; - - /* - * Listen to the 'cpu' scheduling filter constraint - * of counters: - */ - if (counter->cpu != -1 && counter->cpu != cpu) - continue; - - if (counter != counter->group_leader) { - if (counter_sched_in(counter, cpuctx, ctx, cpu)) - can_add_hw = 0; - } else { - if (group_can_go_on(counter, cpuctx, can_add_hw)) { - if (group_sched_in(counter, cpuctx, ctx, cpu)) - can_add_hw = 0; - } - } - } - perf_enable(); - out: - spin_unlock(&ctx->lock); -} - -/* - * Called from scheduler to add the counters of the current task - * with interrupts disabled. - * - * We restore the counter value and then enable it. - * - * This does not protect us against NMI, but enable() - * sets the enabled bit in the control field of counter _before_ - * accessing the counter control register. If a NMI hits, then it will - * keep the counter running. - */ -void perf_counter_task_sched_in(struct task_struct *task, int cpu) -{ - struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); - struct perf_counter_context *ctx = task->perf_counter_ctxp; - - if (likely(!ctx)) - return; - if (cpuctx->task_ctx == ctx) - return; - __perf_counter_sched_in(ctx, cpuctx, cpu); - cpuctx->task_ctx = ctx; -} - -static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu) -{ - struct perf_counter_context *ctx = &cpuctx->ctx; - - __perf_counter_sched_in(ctx, cpuctx, cpu); -} - -#define MAX_INTERRUPTS (~0ULL) - -static void perf_log_throttle(struct perf_counter *counter, int enable); -static void perf_log_period(struct perf_counter *counter, u64 period); - -static void perf_adjust_period(struct perf_counter *counter, u64 events) -{ - struct hw_perf_counter *hwc = &counter->hw; - u64 period, sample_period; - s64 delta; - - events *= hwc->sample_period; - period = div64_u64(events, counter->attr.sample_freq); - - delta = (s64)(period - hwc->sample_period); - delta = (delta + 7) / 8; /* low pass filter */ - - sample_period = hwc->sample_period + delta; - - if (!sample_period) - sample_period = 1; - - perf_log_period(counter, sample_period); - - hwc->sample_period = sample_period; -} - -static void perf_ctx_adjust_freq(struct perf_counter_context *ctx) -{ - struct perf_counter *counter; - struct hw_perf_counter *hwc; - u64 interrupts, freq; - - spin_lock(&ctx->lock); - list_for_each_entry(counter, &ctx->counter_list, list_entry) { - if (counter->state != PERF_COUNTER_STATE_ACTIVE) - continue; - - hwc = &counter->hw; - - interrupts = hwc->interrupts; - hwc->interrupts = 0; - - /* - * unthrottle counters on the tick - */ - if (interrupts == MAX_INTERRUPTS) { - perf_log_throttle(counter, 1); - counter->pmu->unthrottle(counter); - interrupts = 2*sysctl_perf_counter_sample_rate/HZ; - } - - if (!counter->attr.freq || !counter->attr.sample_freq) - continue; - - /* - * if the specified freq < HZ then we need to skip ticks - */ - if (counter->attr.sample_freq < HZ) { - freq = counter->attr.sample_freq; - - hwc->freq_count += freq; - hwc->freq_interrupts += interrupts; - - if (hwc->freq_count < HZ) - continue; - - interrupts = hwc->freq_interrupts; - hwc->freq_interrupts = 0; - hwc->freq_count -= HZ; - } else - freq = HZ; - - perf_adjust_period(counter, freq * interrupts); - - /* - * In order to avoid being stalled by an (accidental) huge - * sample period, force reset the sample period if we didn't - * get any events in this freq period. - */ - if (!interrupts) { - perf_disable(); - counter->pmu->disable(counter); - atomic_set(&hwc->period_left, 0); - counter->pmu->enable(counter); - perf_enable(); - } - } - spin_unlock(&ctx->lock); -} - -/* - * Round-robin a context's counters: - */ -static void rotate_ctx(struct perf_counter_context *ctx) -{ - struct perf_counter *counter; - - if (!ctx->nr_counters) - return; - - spin_lock(&ctx->lock); - /* - * Rotate the first entry last (works just fine for group counters too): - */ - perf_disable(); - list_for_each_entry(counter, &ctx->counter_list, list_entry) { - list_move_tail(&counter->list_entry, &ctx->counter_list); - break; - } - perf_enable(); - - spin_unlock(&ctx->lock); -} - -void perf_counter_task_tick(struct task_struct *curr, int cpu) -{ - struct perf_cpu_context *cpuctx; - struct perf_counter_context *ctx; - - if (!atomic_read(&nr_counters)) - return; - - cpuctx = &per_cpu(perf_cpu_context, cpu); - ctx = curr->perf_counter_ctxp; - - perf_ctx_adjust_freq(&cpuctx->ctx); - if (ctx) - perf_ctx_adjust_freq(ctx); - - perf_counter_cpu_sched_out(cpuctx); - if (ctx) - __perf_counter_task_sched_out(ctx); - - rotate_ctx(&cpuctx->ctx); - if (ctx) - rotate_ctx(ctx); - - perf_counter_cpu_sched_in(cpuctx, cpu); - if (ctx) - perf_counter_task_sched_in(curr, cpu); -} - -/* - * Cross CPU call to read the hardware counter - */ -static void __read(void *info) -{ - struct perf_counter *counter = info; - struct perf_counter_context *ctx = counter->ctx; - unsigned long flags; - - local_irq_save(flags); - if (ctx->is_active) - update_context_time(ctx); - counter->pmu->read(counter); - update_counter_times(counter); - local_irq_restore(flags); -} - -static u64 perf_counter_read(struct perf_counter *counter) -{ - /* - * If counter is enabled and currently active on a CPU, update the - * value in the counter structure: - */ - if (counter->state == PERF_COUNTER_STATE_ACTIVE) { - smp_call_function_single(counter->oncpu, - __read, counter, 1); - } else if (counter->state == PERF_COUNTER_STATE_INACTIVE) { - update_counter_times(counter); - } - - return atomic64_read(&counter->count); -} - -/* - * Initialize the perf_counter context in a task_struct: - */ -static void -__perf_counter_init_context(struct perf_counter_context *ctx, - struct task_struct *task) -{ - memset(ctx, 0, sizeof(*ctx)); - spin_lock_init(&ctx->lock); - mutex_init(&ctx->mutex); - INIT_LIST_HEAD(&ctx->counter_list); - INIT_LIST_HEAD(&ctx->event_list); - atomic_set(&ctx->refcount, 1); - ctx->task = task; -} - -static struct perf_counter_context *find_get_context(pid_t pid, int cpu) -{ - struct perf_counter_context *parent_ctx; - struct perf_counter_context *ctx; - struct perf_cpu_context *cpuctx; - struct task_struct *task; - unsigned long flags; - int err; - - /* - * If cpu is not a wildcard then this is a percpu counter: - */ - if (cpu != -1) { - /* Must be root to operate on a CPU counter: */ - if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) - return ERR_PTR(-EACCES); - - if (cpu < 0 || cpu > num_possible_cpus()) - return ERR_PTR(-EINVAL); - - /* - * We could be clever and allow to attach a counter to an - * offline CPU and activate it when the CPU comes up, but - * that's for later. - */ - if (!cpu_isset(cpu, cpu_online_map)) - return ERR_PTR(-ENODEV); - - cpuctx = &per_cpu(perf_cpu_context, cpu); - ctx = &cpuctx->ctx; - get_ctx(ctx); - - return ctx; - } - - rcu_read_lock(); - if (!pid) - task = current; - else - task = find_task_by_vpid(pid); - if (task) - get_task_struct(task); - rcu_read_unlock(); - - if (!task) - return ERR_PTR(-ESRCH); - - /* - * Can't attach counters to a dying task. - */ - err = -ESRCH; - if (task->flags & PF_EXITING) - goto errout; - - /* Reuse ptrace permission checks for now. */ - err = -EACCES; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) - goto errout; - - retry: - ctx = perf_lock_task_context(task, &flags); - if (ctx) { - parent_ctx = ctx->parent_ctx; - if (parent_ctx) { - put_ctx(parent_ctx); - ctx->parent_ctx = NULL; /* no longer a clone */ - } - /* - * Get an extra reference before dropping the lock so that - * this context won't get freed if the task exits. - */ - get_ctx(ctx); - spin_unlock_irqrestore(&ctx->lock, flags); - } - - if (!ctx) { - ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL); - err = -ENOMEM; - if (!ctx) - goto errout; - __perf_counter_init_context(ctx, task); - get_ctx(ctx); - if (cmpxchg(&task->perf_counter_ctxp, NULL, ctx)) { - /* - * We raced with some other task; use - * the context they set. - */ - kfree(ctx); - goto retry; - } - get_task_struct(task); - } - - put_task_struct(task); - return ctx; - - errout: - put_task_struct(task); - return ERR_PTR(err); -} - -static void free_counter_rcu(struct rcu_head *head) -{ - struct perf_counter *counter; - - counter = container_of(head, struct perf_counter, rcu_head); - if (counter->ns) - put_pid_ns(counter->ns); - kfree(counter); -} - -static void perf_pending_sync(struct perf_counter *counter); - -static void free_counter(struct perf_counter *counter) -{ - perf_pending_sync(counter); - - atomic_dec(&nr_counters); - if (counter->attr.mmap) - atomic_dec(&nr_mmap_counters); - if (counter->attr.comm) - atomic_dec(&nr_comm_counters); - - if (counter->destroy) - counter->destroy(counter); - - put_ctx(counter->ctx); - call_rcu(&counter->rcu_head, free_counter_rcu); -} - -/* - * Called when the last reference to the file is gone. - */ -static int perf_release(struct inode *inode, struct file *file) -{ - struct perf_counter *counter = file->private_data; - struct perf_counter_context *ctx = counter->ctx; - - file->private_data = NULL; - - WARN_ON_ONCE(ctx->parent_ctx); - mutex_lock(&ctx->mutex); - perf_counter_remove_from_context(counter); - mutex_unlock(&ctx->mutex); - - mutex_lock(&counter->owner->perf_counter_mutex); - list_del_init(&counter->owner_entry); - mutex_unlock(&counter->owner->perf_counter_mutex); - put_task_struct(counter->owner); - - free_counter(counter); - - return 0; -} - -/* - * Read the performance counter - simple non blocking version for now - */ -static ssize_t -perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count) -{ - u64 values[3]; - int n; - - /* - * Return end-of-file for a read on a counter that is in - * error state (i.e. because it was pinned but it couldn't be - * scheduled on to the CPU at some point). - */ - if (counter->state == PERF_COUNTER_STATE_ERROR) - return 0; - - WARN_ON_ONCE(counter->ctx->parent_ctx); - mutex_lock(&counter->child_mutex); - values[0] = perf_counter_read(counter); - n = 1; - if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) - values[n++] = counter->total_time_enabled + - atomic64_read(&counter->child_total_time_enabled); - if (counter->attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) - values[n++] = counter->total_time_running + - atomic64_read(&counter->child_total_time_running); - if (counter->attr.read_format & PERF_FORMAT_ID) - values[n++] = counter->id; - mutex_unlock(&counter->child_mutex); - - if (count < n * sizeof(u64)) - return -EINVAL; - count = n * sizeof(u64); - - if (copy_to_user(buf, values, count)) - return -EFAULT; - - return count; -} - -static ssize_t -perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) -{ - struct perf_counter *counter = file->private_data; - - return perf_read_hw(counter, buf, count); -} - -static unsigned int perf_poll(struct file *file, poll_table *wait) -{ - struct perf_counter *counter = file->private_data; - struct perf_mmap_data *data; - unsigned int events = POLL_HUP; - - rcu_read_lock(); - data = rcu_dereference(counter->data); - if (data) - events = atomic_xchg(&data->poll, 0); - rcu_read_unlock(); - - poll_wait(file, &counter->waitq, wait); - - return events; -} - -static void perf_counter_reset(struct perf_counter *counter) -{ - (void)perf_counter_read(counter); - atomic64_set(&counter->count, 0); - perf_counter_update_userpage(counter); -} - -static void perf_counter_for_each_sibling(struct perf_counter *counter, - void (*func)(struct perf_counter *)) -{ - struct perf_counter_context *ctx = counter->ctx; - struct perf_counter *sibling; - - WARN_ON_ONCE(ctx->parent_ctx); - mutex_lock(&ctx->mutex); - counter = counter->group_leader; - - func(counter); - list_for_each_entry(sibling, &counter->sibling_list, list_entry) - func(sibling); - mutex_unlock(&ctx->mutex); -} - -/* - * Holding the top-level counter's child_mutex means that any - * descendant process that has inherited this counter will block - * in sync_child_counter if it goes to exit, thus satisfying the - * task existence requirements of perf_counter_enable/disable. - */ -static void perf_counter_for_each_child(struct perf_counter *counter, - void (*func)(struct perf_counter *)) -{ - struct perf_counter *child; - - WARN_ON_ONCE(counter->ctx->parent_ctx); - mutex_lock(&counter->child_mutex); - func(counter); - list_for_each_entry(child, &counter->child_list, child_list) - func(child); - mutex_unlock(&counter->child_mutex); -} - -static void perf_counter_for_each(struct perf_counter *counter, - void (*func)(struct perf_counter *)) -{ - struct perf_counter *child; - - WARN_ON_ONCE(counter->ctx->parent_ctx); - mutex_lock(&counter->child_mutex); - perf_counter_for_each_sibling(counter, func); - list_for_each_entry(child, &counter->child_list, child_list) - perf_counter_for_each_sibling(child, func); - mutex_unlock(&counter->child_mutex); -} - -static int perf_counter_period(struct perf_counter *counter, u64 __user *arg) -{ - struct perf_counter_context *ctx = counter->ctx; - unsigned long size; - int ret = 0; - u64 value; - - if (!counter->attr.sample_period) - return -EINVAL; - - size = copy_from_user(&value, arg, sizeof(value)); - if (size != sizeof(value)) - return -EFAULT; - - if (!value) - return -EINVAL; - - spin_lock_irq(&ctx->lock); - if (counter->attr.freq) { - if (value > sysctl_perf_counter_sample_rate) { - ret = -EINVAL; - goto unlock; - } - - counter->attr.sample_freq = value; - } else { - perf_log_period(counter, value); - - counter->attr.sample_period = value; - counter->hw.sample_period = value; - } -unlock: - spin_unlock_irq(&ctx->lock); - - return ret; -} - -static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) -{ - struct perf_counter *counter = file->private_data; - void (*func)(struct perf_counter *); - u32 flags = arg; - - switch (cmd) { - case PERF_COUNTER_IOC_ENABLE: - func = perf_counter_enable; - break; - case PERF_COUNTER_IOC_DISABLE: - func = perf_counter_disable; - break; - case PERF_COUNTER_IOC_RESET: - func = perf_counter_reset; - break; - - case PERF_COUNTER_IOC_REFRESH: - return perf_counter_refresh(counter, arg); - - case PERF_COUNTER_IOC_PERIOD: - return perf_counter_period(counter, (u64 __user *)arg); - - default: - return -ENOTTY; - } - - if (flags & PERF_IOC_FLAG_GROUP) - perf_counter_for_each(counter, func); - else - perf_counter_for_each_child(counter, func); - - return 0; -} - -int perf_counter_task_enable(void) -{ - struct perf_counter *counter; - - mutex_lock(¤t->perf_counter_mutex); - list_for_each_entry(counter, ¤t->perf_counter_list, owner_entry) - perf_counter_for_each_child(counter, perf_counter_enable); - mutex_unlock(¤t->perf_counter_mutex); - - return 0; -} - -int perf_counter_task_disable(void) -{ - struct perf_counter *counter; - - mutex_lock(¤t->perf_counter_mutex); - list_for_each_entry(counter, ¤t->perf_counter_list, owner_entry) - perf_counter_for_each_child(counter, perf_counter_disable); - mutex_unlock(¤t->perf_counter_mutex); - - return 0; -} - -/* - * Callers need to ensure there can be no nesting of this function, otherwise - * the seqlock logic goes bad. We can not serialize this because the arch - * code calls this from NMI context. - */ -void perf_counter_update_userpage(struct perf_counter *counter) -{ - struct perf_counter_mmap_page *userpg; - struct perf_mmap_data *data; - - rcu_read_lock(); - data = rcu_dereference(counter->data); - if (!data) - goto unlock; - - userpg = data->user_page; - - /* - * Disable preemption so as to not let the corresponding user-space - * spin too long if we get preempted. - */ - preempt_disable(); - ++userpg->lock; - barrier(); - userpg->index = counter->hw.idx; - userpg->offset = atomic64_read(&counter->count); - if (counter->state == PERF_COUNTER_STATE_ACTIVE) - userpg->offset -= atomic64_read(&counter->hw.prev_count); - - barrier(); - ++userpg->lock; - preempt_enable(); -unlock: - rcu_read_unlock(); -} - -static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct perf_counter *counter = vma->vm_file->private_data; - struct perf_mmap_data *data; - int ret = VM_FAULT_SIGBUS; - - rcu_read_lock(); - data = rcu_dereference(counter->data); - if (!data) - goto unlock; - - if (vmf->pgoff == 0) { - vmf->page = virt_to_page(data->user_page); - } else { - int nr = vmf->pgoff - 1; - - if ((unsigned)nr > data->nr_pages) - goto unlock; - - vmf->page = virt_to_page(data->data_pages[nr]); - } - get_page(vmf->page); - ret = 0; -unlock: - rcu_read_unlock(); - - return ret; -} - -static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages) -{ - struct perf_mmap_data *data; - unsigned long size; - int i; - - WARN_ON(atomic_read(&counter->mmap_count)); - - size = sizeof(struct perf_mmap_data); - size += nr_pages * sizeof(void *); - - data = kzalloc(size, GFP_KERNEL); - if (!data) - goto fail; - - data->user_page = (void *)get_zeroed_page(GFP_KERNEL); - if (!data->user_page) - goto fail_user_page; - - for (i = 0; i < nr_pages; i++) { - data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL); - if (!data->data_pages[i]) - goto fail_data_pages; - } - - data->nr_pages = nr_pages; - atomic_set(&data->lock, -1); - - rcu_assign_pointer(counter->data, data); - - return 0; - -fail_data_pages: - for (i--; i >= 0; i--) - free_page((unsigned long)data->data_pages[i]); - - free_page((unsigned long)data->user_page); - -fail_user_page: - kfree(data); - -fail: - return -ENOMEM; -} - -static void __perf_mmap_data_free(struct rcu_head *rcu_head) -{ - struct perf_mmap_data *data; - int i; - - data = container_of(rcu_head, struct perf_mmap_data, rcu_head); - - free_page((unsigned long)data->user_page); - for (i = 0; i < data->nr_pages; i++) - free_page((unsigned long)data->data_pages[i]); - kfree(data); -} - -static void perf_mmap_data_free(struct perf_counter *counter) -{ - struct perf_mmap_data *data = counter->data; - - WARN_ON(atomic_read(&counter->mmap_count)); - - rcu_assign_pointer(counter->data, NULL); - call_rcu(&data->rcu_head, __perf_mmap_data_free); -} - -static void perf_mmap_open(struct vm_area_struct *vma) -{ - struct perf_counter *counter = vma->vm_file->private_data; - - atomic_inc(&counter->mmap_count); -} - -static void perf_mmap_close(struct vm_area_struct *vma) -{ - struct perf_counter *counter = vma->vm_file->private_data; - - WARN_ON_ONCE(counter->ctx->parent_ctx); - if (atomic_dec_and_mutex_lock(&counter->mmap_count, &counter->mmap_mutex)) { - struct user_struct *user = current_user(); - - atomic_long_sub(counter->data->nr_pages + 1, &user->locked_vm); - vma->vm_mm->locked_vm -= counter->data->nr_locked; - perf_mmap_data_free(counter); - mutex_unlock(&counter->mmap_mutex); - } -} - -static struct vm_operations_struct perf_mmap_vmops = { - .open = perf_mmap_open, - .close = perf_mmap_close, - .fault = perf_mmap_fault, -}; - -static int perf_mmap(struct file *file, struct vm_area_struct *vma) -{ - struct perf_counter *counter = file->private_data; - unsigned long user_locked, user_lock_limit; - struct user_struct *user = current_user(); - unsigned long locked, lock_limit; - unsigned long vma_size; - unsigned long nr_pages; - long user_extra, extra; - int ret = 0; - - if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE)) - return -EINVAL; - - vma_size = vma->vm_end - vma->vm_start; - nr_pages = (vma_size / PAGE_SIZE) - 1; - - /* - * If we have data pages ensure they're a power-of-two number, so we - * can do bitmasks instead of modulo. - */ - if (nr_pages != 0 && !is_power_of_2(nr_pages)) - return -EINVAL; - - if (vma_size != PAGE_SIZE * (1 + nr_pages)) - return -EINVAL; - - if (vma->vm_pgoff != 0) - return -EINVAL; - - WARN_ON_ONCE(counter->ctx->parent_ctx); - mutex_lock(&counter->mmap_mutex); - if (atomic_inc_not_zero(&counter->mmap_count)) { - if (nr_pages != counter->data->nr_pages) - ret = -EINVAL; - goto unlock; - } - - user_extra = nr_pages + 1; - user_lock_limit = sysctl_perf_counter_mlock >> (PAGE_SHIFT - 10); - - /* - * Increase the limit linearly with more CPUs: - */ - user_lock_limit *= num_online_cpus(); - - user_locked = atomic_long_read(&user->locked_vm) + user_extra; - - extra = 0; - if (user_locked > user_lock_limit) - extra = user_locked - user_lock_limit; - - lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; - lock_limit >>= PAGE_SHIFT; - locked = vma->vm_mm->locked_vm + extra; - - if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { - ret = -EPERM; - goto unlock; - } - - WARN_ON(counter->data); - ret = perf_mmap_data_alloc(counter, nr_pages); - if (ret) - goto unlock; - - atomic_set(&counter->mmap_count, 1); - atomic_long_add(user_extra, &user->locked_vm); - vma->vm_mm->locked_vm += extra; - counter->data->nr_locked = extra; -unlock: - mutex_unlock(&counter->mmap_mutex); - - vma->vm_flags &= ~VM_MAYWRITE; - vma->vm_flags |= VM_RESERVED; - vma->vm_ops = &perf_mmap_vmops; - - return ret; -} - -static int perf_fasync(int fd, struct file *filp, int on) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - struct perf_counter *counter = filp->private_data; - int retval; - - mutex_lock(&inode->i_mutex); - retval = fasync_helper(fd, filp, on, &counter->fasync); - mutex_unlock(&inode->i_mutex); - - if (retval < 0) - return retval; - - return 0; -} - -static const struct file_operations perf_fops = { - .release = perf_release, - .read = perf_read, - .poll = perf_poll, - .unlocked_ioctl = perf_ioctl, - .compat_ioctl = perf_ioctl, - .mmap = perf_mmap, - .fasync = perf_fasync, -}; - -/* - * Perf counter wakeup - * - * If there's data, ensure we set the poll() state and publish everything - * to user-space before waking everybody up. - */ - -void perf_counter_wakeup(struct perf_counter *counter) -{ - wake_up_all(&counter->waitq); - - if (counter->pending_kill) { - kill_fasync(&counter->fasync, SIGIO, counter->pending_kill); - counter->pending_kill = 0; - } -} - -/* - * Pending wakeups - * - * Handle the case where we need to wakeup up from NMI (or rq->lock) context. - * - * The NMI bit means we cannot possibly take locks. Therefore, maintain a - * single linked list and use cmpxchg() to add entries lockless. - */ - -static void perf_pending_counter(struct perf_pending_entry *entry) -{ - struct perf_counter *counter = container_of(entry, - struct perf_counter, pending); - - if (counter->pending_disable) { - counter->pending_disable = 0; - perf_counter_disable(counter); - } - - if (counter->pending_wakeup) { - counter->pending_wakeup = 0; - perf_counter_wakeup(counter); - } -} - -#define PENDING_TAIL ((struct perf_pending_entry *)-1UL) - -static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = { - PENDING_TAIL, -}; - -static void perf_pending_queue(struct perf_pending_entry *entry, - void (*func)(struct perf_pending_entry *)) -{ - struct perf_pending_entry **head; - - if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL) - return; - - entry->func = func; - - head = &get_cpu_var(perf_pending_head); - - do { - entry->next = *head; - } while (cmpxchg(head, entry->next, entry) != entry->next); - - set_perf_counter_pending(); - - put_cpu_var(perf_pending_head); -} - -static int __perf_pending_run(void) -{ - struct perf_pending_entry *list; - int nr = 0; - - list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL); - while (list != PENDING_TAIL) { - void (*func)(struct perf_pending_entry *); - struct perf_pending_entry *entry = list; - - list = list->next; - - func = entry->func; - entry->next = NULL; - /* - * Ensure we observe the unqueue before we issue the wakeup, - * so that we won't be waiting forever. - * -- see perf_not_pending(). - */ - smp_wmb(); - - func(entry); - nr++; - } - - return nr; -} - -static inline int perf_not_pending(struct perf_counter *counter) -{ - /* - * If we flush on whatever cpu we run, there is a chance we don't - * need to wait. - */ - get_cpu(); - __perf_pending_run(); - put_cpu(); - - /* - * Ensure we see the proper queue state before going to sleep - * so that we do not miss the wakeup. -- see perf_pending_handle() - */ - smp_rmb(); - return counter->pending.next == NULL; -} - -static void perf_pending_sync(struct perf_counter *counter) -{ - wait_event(counter->waitq, perf_not_pending(counter)); -} - -void perf_counter_do_pending(void) -{ - __perf_pending_run(); -} - -/* - * Callchain support -- arch specific - */ - -__weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) -{ - return NULL; -} - -/* - * Output - */ - -struct perf_output_handle { - struct perf_counter *counter; - struct perf_mmap_data *data; - unsigned long head; - unsigned long offset; - int nmi; - int overflow; - int locked; - unsigned long flags; -}; - -static void perf_output_wakeup(struct perf_output_handle *handle) -{ - atomic_set(&handle->data->poll, POLL_IN); - - if (handle->nmi) { - handle->counter->pending_wakeup = 1; - perf_pending_queue(&handle->counter->pending, - perf_pending_counter); - } else - perf_counter_wakeup(handle->counter); -} - -/* - * Curious locking construct. - * - * We need to ensure a later event doesn't publish a head when a former - * event isn't done writing. However since we need to deal with NMIs we - * cannot fully serialize things. - * - * What we do is serialize between CPUs so we only have to deal with NMI - * nesting on a single CPU. - * - * We only publish the head (and generate a wakeup) when the outer-most - * event completes. - */ -static void perf_output_lock(struct perf_output_handle *handle) -{ - struct perf_mmap_data *data = handle->data; - int cpu; - - handle->locked = 0; - - local_irq_save(handle->flags); - cpu = smp_processor_id(); - - if (in_nmi() && atomic_read(&data->lock) == cpu) - return; - - while (atomic_cmpxchg(&data->lock, -1, cpu) != -1) - cpu_relax(); - - handle->locked = 1; -} - -static void perf_output_unlock(struct perf_output_handle *handle) -{ - struct perf_mmap_data *data = handle->data; - unsigned long head; - int cpu; - - data->done_head = data->head; - - if (!handle->locked) - goto out; - -again: - /* - * The xchg implies a full barrier that ensures all writes are done - * before we publish the new head, matched by a rmb() in userspace when - * reading this position. - */ - while ((head = atomic_long_xchg(&data->done_head, 0))) - data->user_page->data_head = head; - - /* - * NMI can happen here, which means we can miss a done_head update. - */ - - cpu = atomic_xchg(&data->lock, -1); - WARN_ON_ONCE(cpu != smp_processor_id()); - - /* - * Therefore we have to validate we did not indeed do so. - */ - if (unlikely(atomic_long_read(&data->done_head))) { - /* - * Since we had it locked, we can lock it again. - */ - while (atomic_cmpxchg(&data->lock, -1, cpu) != -1) - cpu_relax(); - - goto again; - } - - if (atomic_xchg(&data->wakeup, 0)) - perf_output_wakeup(handle); -out: - local_irq_restore(handle->flags); -} - -static int perf_output_begin(struct perf_output_handle *handle, - struct perf_counter *counter, unsigned int size, - int nmi, int overflow) -{ - struct perf_mmap_data *data; - unsigned int offset, head; - - /* - * For inherited counters we send all the output towards the parent. - */ - if (counter->parent) - counter = counter->parent; - - rcu_read_lock(); - data = rcu_dereference(counter->data); - if (!data) - goto out; - - handle->data = data; - handle->counter = counter; - handle->nmi = nmi; - handle->overflow = overflow; - - if (!data->nr_pages) - goto fail; - - perf_output_lock(handle); - - do { - offset = head = atomic_long_read(&data->head); - head += size; - } while (atomic_long_cmpxchg(&data->head, offset, head) != offset); - - handle->offset = offset; - handle->head = head; - - if ((offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT)) - atomic_set(&data->wakeup, 1); - - return 0; - -fail: - perf_output_wakeup(handle); -out: - rcu_read_unlock(); - - return -ENOSPC; -} - -static void perf_output_copy(struct perf_output_handle *handle, - const void *buf, unsigned int len) -{ - unsigned int pages_mask; - unsigned int offset; - unsigned int size; - void **pages; - - offset = handle->offset; - pages_mask = handle->data->nr_pages - 1; - pages = handle->data->data_pages; - - do { - unsigned int page_offset; - int nr; - - nr = (offset >> PAGE_SHIFT) & pages_mask; - page_offset = offset & (PAGE_SIZE - 1); - size = min_t(unsigned int, PAGE_SIZE - page_offset, len); - - memcpy(pages[nr] + page_offset, buf, size); - - len -= size; - buf += size; - offset += size; - } while (len); - - handle->offset = offset; - - /* - * Check we didn't copy past our reservation window, taking the - * possible unsigned int wrap into account. - */ - WARN_ON_ONCE(((long)(handle->head - handle->offset)) < 0); -} - -#define perf_output_put(handle, x) \ - perf_output_copy((handle), &(x), sizeof(x)) - -static void perf_output_end(struct perf_output_handle *handle) -{ - struct perf_counter *counter = handle->counter; - struct perf_mmap_data *data = handle->data; - - int wakeup_events = counter->attr.wakeup_events; - - if (handle->overflow && wakeup_events) { - int events = atomic_inc_return(&data->events); - if (events >= wakeup_events) { - atomic_sub(wakeup_events, &data->events); - atomic_set(&data->wakeup, 1); - } - } - - perf_output_unlock(handle); - rcu_read_unlock(); -} - -static u32 perf_counter_pid(struct perf_counter *counter, struct task_struct *p) -{ - /* - * only top level counters have the pid namespace they were created in - */ - if (counter->parent) - counter = counter->parent; - - return task_tgid_nr_ns(p, counter->ns); -} - -static u32 perf_counter_tid(struct perf_counter *counter, struct task_struct *p) -{ - /* - * only top level counters have the pid namespace they were created in - */ - if (counter->parent) - counter = counter->parent; - - return task_pid_nr_ns(p, counter->ns); -} - -static void perf_counter_output(struct perf_counter *counter, int nmi, - struct perf_sample_data *data) -{ - int ret; - u64 sample_type = counter->attr.sample_type; - struct perf_output_handle handle; - struct perf_event_header header; - u64 ip; - struct { - u32 pid, tid; - } tid_entry; - struct { - u64 id; - u64 counter; - } group_entry; - struct perf_callchain_entry *callchain = NULL; - int callchain_size = 0; - u64 time; - struct { - u32 cpu, reserved; - } cpu_entry; - - header.type = 0; - header.size = sizeof(header); - - header.misc = PERF_EVENT_MISC_OVERFLOW; - header.misc |= perf_misc_flags(data->regs); - - if (sample_type & PERF_SAMPLE_IP) { - ip = perf_instruction_pointer(data->regs); - header.type |= PERF_SAMPLE_IP; - header.size += sizeof(ip); - } - - if (sample_type & PERF_SAMPLE_TID) { - /* namespace issues */ - tid_entry.pid = perf_counter_pid(counter, current); - tid_entry.tid = perf_counter_tid(counter, current); - - header.type |= PERF_SAMPLE_TID; - header.size += sizeof(tid_entry); - } - - if (sample_type & PERF_SAMPLE_TIME) { - /* - * Maybe do better on x86 and provide cpu_clock_nmi() - */ - time = sched_clock(); - - header.type |= PERF_SAMPLE_TIME; - header.size += sizeof(u64); - } - - if (sample_type & PERF_SAMPLE_ADDR) { - header.type |= PERF_SAMPLE_ADDR; - header.size += sizeof(u64); - } - - if (sample_type & PERF_SAMPLE_ID) { - header.type |= PERF_SAMPLE_ID; - header.size += sizeof(u64); - } - - if (sample_type & PERF_SAMPLE_CPU) { - header.type |= PERF_SAMPLE_CPU; - header.size += sizeof(cpu_entry); - - cpu_entry.cpu = raw_smp_processor_id(); - } - - if (sample_type & PERF_SAMPLE_PERIOD) { - header.type |= PERF_SAMPLE_PERIOD; - header.size += sizeof(u64); - } - - if (sample_type & PERF_SAMPLE_GROUP) { - header.type |= PERF_SAMPLE_GROUP; - header.size += sizeof(u64) + - counter->nr_siblings * sizeof(group_entry); - } - - if (sample_type & PERF_SAMPLE_CALLCHAIN) { - callchain = perf_callchain(data->regs); - - if (callchain) { - callchain_size = (1 + callchain->nr) * sizeof(u64); - - header.type |= PERF_SAMPLE_CALLCHAIN; - header.size += callchain_size; - } - } - - ret = perf_output_begin(&handle, counter, header.size, nmi, 1); - if (ret) - return; - - perf_output_put(&handle, header); - - if (sample_type & PERF_SAMPLE_IP) - perf_output_put(&handle, ip); - - if (sample_type & PERF_SAMPLE_TID) - perf_output_put(&handle, tid_entry); - - if (sample_type & PERF_SAMPLE_TIME) - perf_output_put(&handle, time); - - if (sample_type & PERF_SAMPLE_ADDR) - perf_output_put(&handle, data->addr); - - if (sample_type & PERF_SAMPLE_ID) - perf_output_put(&handle, counter->id); - - if (sample_type & PERF_SAMPLE_CPU) - perf_output_put(&handle, cpu_entry); - - if (sample_type & PERF_SAMPLE_PERIOD) - perf_output_put(&handle, data->period); - - /* - * XXX PERF_SAMPLE_GROUP vs inherited counters seems difficult. - */ - if (sample_type & PERF_SAMPLE_GROUP) { - struct perf_counter *leader, *sub; - u64 nr = counter->nr_siblings; - - perf_output_put(&handle, nr); - - leader = counter->group_leader; - list_for_each_entry(sub, &leader->sibling_list, list_entry) { - if (sub != counter) - sub->pmu->read(sub); - - group_entry.id = sub->id; - group_entry.counter = atomic64_read(&sub->count); - - perf_output_put(&handle, group_entry); - } - } - - if (callchain) - perf_output_copy(&handle, callchain, callchain_size); - - perf_output_end(&handle); -} - -/* - * fork tracking - */ - -struct perf_fork_event { - struct task_struct *task; - - struct { - struct perf_event_header header; - - u32 pid; - u32 ppid; - } event; -}; - -static void perf_counter_fork_output(struct perf_counter *counter, - struct perf_fork_event *fork_event) -{ - struct perf_output_handle handle; - int size = fork_event->event.header.size; - struct task_struct *task = fork_event->task; - int ret = perf_output_begin(&handle, counter, size, 0, 0); - - if (ret) - return; - - fork_event->event.pid = perf_counter_pid(counter, task); - fork_event->event.ppid = perf_counter_pid(counter, task->real_parent); - - perf_output_put(&handle, fork_event->event); - perf_output_end(&handle); -} - -static int perf_counter_fork_match(struct perf_counter *counter) -{ - if (counter->attr.comm || counter->attr.mmap) - return 1; - - return 0; -} - -static void perf_counter_fork_ctx(struct perf_counter_context *ctx, - struct perf_fork_event *fork_event) -{ - struct perf_counter *counter; - - if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) - return; - - rcu_read_lock(); - list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { - if (perf_counter_fork_match(counter)) - perf_counter_fork_output(counter, fork_event); - } - rcu_read_unlock(); -} - -static void perf_counter_fork_event(struct perf_fork_event *fork_event) -{ - struct perf_cpu_context *cpuctx; - struct perf_counter_context *ctx; - - cpuctx = &get_cpu_var(perf_cpu_context); - perf_counter_fork_ctx(&cpuctx->ctx, fork_event); - put_cpu_var(perf_cpu_context); - - rcu_read_lock(); - /* - * doesn't really matter which of the child contexts the - * events ends up in. - */ - ctx = rcu_dereference(current->perf_counter_ctxp); - if (ctx) - perf_counter_fork_ctx(ctx, fork_event); - rcu_read_unlock(); -} - -void perf_counter_fork(struct task_struct *task) -{ - struct perf_fork_event fork_event; - - if (!atomic_read(&nr_comm_counters) && - !atomic_read(&nr_mmap_counters)) - return; - - fork_event = (struct perf_fork_event){ - .task = task, - .event = { - .header = { - .type = PERF_EVENT_FORK, - .size = sizeof(fork_event.event), - }, - }, - }; - - perf_counter_fork_event(&fork_event); -} - -/* - * comm tracking - */ - -struct perf_comm_event { - struct task_struct *task; - char *comm; - int comm_size; - - struct { - struct perf_event_header header; - - u32 pid; - u32 tid; - } event; -}; - -static void perf_counter_comm_output(struct perf_counter *counter, - struct perf_comm_event *comm_event) -{ - struct perf_output_handle handle; - int size = comm_event->event.header.size; - int ret = perf_output_begin(&handle, counter, size, 0, 0); - - if (ret) - return; - - comm_event->event.pid = perf_counter_pid(counter, comm_event->task); - comm_event->event.tid = perf_counter_tid(counter, comm_event->task); - - perf_output_put(&handle, comm_event->event); - perf_output_copy(&handle, comm_event->comm, - comm_event->comm_size); - perf_output_end(&handle); -} - -static int perf_counter_comm_match(struct perf_counter *counter) -{ - if (counter->attr.comm) - return 1; - - return 0; -} - -static void perf_counter_comm_ctx(struct perf_counter_context *ctx, - struct perf_comm_event *comm_event) -{ - struct perf_counter *counter; - - if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) - return; - - rcu_read_lock(); - list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { - if (perf_counter_comm_match(counter)) - perf_counter_comm_output(counter, comm_event); - } - rcu_read_unlock(); -} - -static void perf_counter_comm_event(struct perf_comm_event *comm_event) -{ - struct perf_cpu_context *cpuctx; - struct perf_counter_context *ctx; - unsigned int size; - char *comm = comm_event->task->comm; - - size = ALIGN(strlen(comm)+1, sizeof(u64)); - - comm_event->comm = comm; - comm_event->comm_size = size; - - comm_event->event.header.size = sizeof(comm_event->event) + size; - - cpuctx = &get_cpu_var(perf_cpu_context); - perf_counter_comm_ctx(&cpuctx->ctx, comm_event); - put_cpu_var(perf_cpu_context); - - rcu_read_lock(); - /* - * doesn't really matter which of the child contexts the - * events ends up in. - */ - ctx = rcu_dereference(current->perf_counter_ctxp); - if (ctx) - perf_counter_comm_ctx(ctx, comm_event); - rcu_read_unlock(); -} - -void perf_counter_comm(struct task_struct *task) -{ - struct perf_comm_event comm_event; - - if (!atomic_read(&nr_comm_counters)) - return; - - comm_event = (struct perf_comm_event){ - .task = task, - .event = { - .header = { .type = PERF_EVENT_COMM, }, - }, - }; - - perf_counter_comm_event(&comm_event); -} - -/* - * mmap tracking - */ - -struct perf_mmap_event { - struct vm_area_struct *vma; - - const char *file_name; - int file_size; - - struct { - struct perf_event_header header; - - u32 pid; - u32 tid; - u64 start; - u64 len; - u64 pgoff; - } event; -}; - -static void perf_counter_mmap_output(struct perf_counter *counter, - struct perf_mmap_event *mmap_event) -{ - struct perf_output_handle handle; - int size = mmap_event->event.header.size; - int ret = perf_output_begin(&handle, counter, size, 0, 0); - - if (ret) - return; - - mmap_event->event.pid = perf_counter_pid(counter, current); - mmap_event->event.tid = perf_counter_tid(counter, current); - - perf_output_put(&handle, mmap_event->event); - perf_output_copy(&handle, mmap_event->file_name, - mmap_event->file_size); - perf_output_end(&handle); -} - -static int perf_counter_mmap_match(struct perf_counter *counter, - struct perf_mmap_event *mmap_event) -{ - if (counter->attr.mmap) - return 1; - - return 0; -} - -static void perf_counter_mmap_ctx(struct perf_counter_context *ctx, - struct perf_mmap_event *mmap_event) -{ - struct perf_counter *counter; - - if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) - return; - - rcu_read_lock(); - list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { - if (perf_counter_mmap_match(counter, mmap_event)) - perf_counter_mmap_output(counter, mmap_event); - } - rcu_read_unlock(); -} - -static void perf_counter_mmap_event(struct perf_mmap_event *mmap_event) -{ - struct perf_cpu_context *cpuctx; - struct perf_counter_context *ctx; - struct vm_area_struct *vma = mmap_event->vma; - struct file *file = vma->vm_file; - unsigned int size; - char tmp[16]; - char *buf = NULL; - const char *name; - - if (file) { - buf = kzalloc(PATH_MAX, GFP_KERNEL); - if (!buf) { - name = strncpy(tmp, "//enomem", sizeof(tmp)); - goto got_name; - } - name = d_path(&file->f_path, buf, PATH_MAX); - if (IS_ERR(name)) { - name = strncpy(tmp, "//toolong", sizeof(tmp)); - goto got_name; - } - } else { - name = arch_vma_name(mmap_event->vma); - if (name) - goto got_name; - - if (!vma->vm_mm) { - name = strncpy(tmp, "[vdso]", sizeof(tmp)); - goto got_name; - } - - name = strncpy(tmp, "//anon", sizeof(tmp)); - goto got_name; - } - -got_name: - size = ALIGN(strlen(name)+1, sizeof(u64)); - - mmap_event->file_name = name; - mmap_event->file_size = size; - - mmap_event->event.header.size = sizeof(mmap_event->event) + size; - - cpuctx = &get_cpu_var(perf_cpu_context); - perf_counter_mmap_ctx(&cpuctx->ctx, mmap_event); - put_cpu_var(perf_cpu_context); - - rcu_read_lock(); - /* - * doesn't really matter which of the child contexts the - * events ends up in. - */ - ctx = rcu_dereference(current->perf_counter_ctxp); - if (ctx) - perf_counter_mmap_ctx(ctx, mmap_event); - rcu_read_unlock(); - - kfree(buf); -} - -void __perf_counter_mmap(struct vm_area_struct *vma) -{ - struct perf_mmap_event mmap_event; - - if (!atomic_read(&nr_mmap_counters)) - return; - - mmap_event = (struct perf_mmap_event){ - .vma = vma, - .event = { - .header = { .type = PERF_EVENT_MMAP, }, - .start = vma->vm_start, - .len = vma->vm_end - vma->vm_start, - .pgoff = vma->vm_pgoff, - }, - }; - - perf_counter_mmap_event(&mmap_event); -} - -/* - * Log sample_period changes so that analyzing tools can re-normalize the - * event flow. - */ - -struct freq_event { - struct perf_event_header header; - u64 time; - u64 id; - u64 period; -}; - -static void perf_log_period(struct perf_counter *counter, u64 period) -{ - struct perf_output_handle handle; - struct freq_event event; - int ret; - - if (counter->hw.sample_period == period) - return; - - if (counter->attr.sample_type & PERF_SAMPLE_PERIOD) - return; - - event = (struct freq_event) { - .header = { - .type = PERF_EVENT_PERIOD, - .misc = 0, - .size = sizeof(event), - }, - .time = sched_clock(), - .id = counter->id, - .period = period, - }; - - ret = perf_output_begin(&handle, counter, sizeof(event), 1, 0); - if (ret) - return; - - perf_output_put(&handle, event); - perf_output_end(&handle); -} - -/* - * IRQ throttle logging - */ - -static void perf_log_throttle(struct perf_counter *counter, int enable) -{ - struct perf_output_handle handle; - int ret; - - struct { - struct perf_event_header header; - u64 time; - u64 id; - } throttle_event = { - .header = { - .type = PERF_EVENT_THROTTLE + 1, - .misc = 0, - .size = sizeof(throttle_event), - }, - .time = sched_clock(), - .id = counter->id, - }; - - ret = perf_output_begin(&handle, counter, sizeof(throttle_event), 1, 0); - if (ret) - return; - - perf_output_put(&handle, throttle_event); - perf_output_end(&handle); -} - -/* - * Generic counter overflow handling. - */ - -int perf_counter_overflow(struct perf_counter *counter, int nmi, - struct perf_sample_data *data) -{ - int events = atomic_read(&counter->event_limit); - int throttle = counter->pmu->unthrottle != NULL; - struct hw_perf_counter *hwc = &counter->hw; - int ret = 0; - - if (!throttle) { - hwc->interrupts++; - } else { - if (hwc->interrupts != MAX_INTERRUPTS) { - hwc->interrupts++; - if (HZ * hwc->interrupts > - (u64)sysctl_perf_counter_sample_rate) { - hwc->interrupts = MAX_INTERRUPTS; - perf_log_throttle(counter, 0); - ret = 1; - } - } else { - /* - * Keep re-disabling counters even though on the previous - * pass we disabled it - just in case we raced with a - * sched-in and the counter got enabled again: - */ - ret = 1; - } - } - - if (counter->attr.freq) { - u64 now = sched_clock(); - s64 delta = now - hwc->freq_stamp; - - hwc->freq_stamp = now; - - if (delta > 0 && delta < TICK_NSEC) - perf_adjust_period(counter, NSEC_PER_SEC / (int)delta); - } - - /* - * XXX event_limit might not quite work as expected on inherited - * counters - */ - - counter->pending_kill = POLL_IN; - if (events && atomic_dec_and_test(&counter->event_limit)) { - ret = 1; - counter->pending_kill = POLL_HUP; - if (nmi) { - counter->pending_disable = 1; - perf_pending_queue(&counter->pending, - perf_pending_counter); - } else - perf_counter_disable(counter); - } - - perf_counter_output(counter, nmi, data); - return ret; -} - -/* - * Generic software counter infrastructure - */ - -static void perf_swcounter_update(struct perf_counter *counter) -{ - struct hw_perf_counter *hwc = &counter->hw; - u64 prev, now; - s64 delta; - -again: - prev = atomic64_read(&hwc->prev_count); - now = atomic64_read(&hwc->count); - if (atomic64_cmpxchg(&hwc->prev_count, prev, now) != prev) - goto again; - - delta = now - prev; - - atomic64_add(delta, &counter->count); - atomic64_sub(delta, &hwc->period_left); -} - -static void perf_swcounter_set_period(struct perf_counter *counter) -{ - struct hw_perf_counter *hwc = &counter->hw; - s64 left = atomic64_read(&hwc->period_left); - s64 period = hwc->sample_period; - - if (unlikely(left <= -period)) { - left = period; - atomic64_set(&hwc->period_left, left); - hwc->last_period = period; - } - - if (unlikely(left <= 0)) { - left += period; - atomic64_add(period, &hwc->period_left); - hwc->last_period = period; - } - - atomic64_set(&hwc->prev_count, -left); - atomic64_set(&hwc->count, -left); -} - -static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer) -{ - enum hrtimer_restart ret = HRTIMER_RESTART; - struct perf_sample_data data; - struct perf_counter *counter; - u64 period; - - counter = container_of(hrtimer, struct perf_counter, hw.hrtimer); - counter->pmu->read(counter); - - data.addr = 0; - data.regs = get_irq_regs(); - /* - * In case we exclude kernel IPs or are somehow not in interrupt - * context, provide the next best thing, the user IP. - */ - if ((counter->attr.exclude_kernel || !data.regs) && - !counter->attr.exclude_user) - data.regs = task_pt_regs(current); - - if (data.regs) { - if (perf_counter_overflow(counter, 0, &data)) - ret = HRTIMER_NORESTART; - } - - period = max_t(u64, 10000, counter->hw.sample_period); - hrtimer_forward_now(hrtimer, ns_to_ktime(period)); - - return ret; -} - -static void perf_swcounter_overflow(struct perf_counter *counter, - int nmi, struct pt_regs *regs, u64 addr) -{ - struct perf_sample_data data = { - .regs = regs, - .addr = addr, - .period = counter->hw.last_period, - }; - - perf_swcounter_update(counter); - perf_swcounter_set_period(counter); - if (perf_counter_overflow(counter, nmi, &data)) - /* soft-disable the counter */ - ; - -} - -static int perf_swcounter_is_counting(struct perf_counter *counter) -{ - struct perf_counter_context *ctx; - unsigned long flags; - int count; - - if (counter->state == PERF_COUNTER_STATE_ACTIVE) - return 1; - - if (counter->state != PERF_COUNTER_STATE_INACTIVE) - return 0; - - /* - * If the counter is inactive, it could be just because - * its task is scheduled out, or because it's in a group - * which could not go on the PMU. We want to count in - * the first case but not the second. If the context is - * currently active then an inactive software counter must - * be the second case. If it's not currently active then - * we need to know whether the counter was active when the - * context was last active, which we can determine by - * comparing counter->tstamp_stopped with ctx->time. - * - * We are within an RCU read-side critical section, - * which protects the existence of *ctx. - */ - ctx = counter->ctx; - spin_lock_irqsave(&ctx->lock, flags); - count = 1; - /* Re-check state now we have the lock */ - if (counter->state < PERF_COUNTER_STATE_INACTIVE || - counter->ctx->is_active || - counter->tstamp_stopped < ctx->time) - count = 0; - spin_unlock_irqrestore(&ctx->lock, flags); - return count; -} - -static int perf_swcounter_match(struct perf_counter *counter, - enum perf_type_id type, - u32 event, struct pt_regs *regs) -{ - if (!perf_swcounter_is_counting(counter)) - return 0; - - if (counter->attr.type != type) - return 0; - if (counter->attr.config != event) - return 0; - - if (regs) { - if (counter->attr.exclude_user && user_mode(regs)) - return 0; - - if (counter->attr.exclude_kernel && !user_mode(regs)) - return 0; - } - - return 1; -} - -static void perf_swcounter_add(struct perf_counter *counter, u64 nr, - int nmi, struct pt_regs *regs, u64 addr) -{ - int neg = atomic64_add_negative(nr, &counter->hw.count); - - if (counter->hw.sample_period && !neg && regs) - perf_swcounter_overflow(counter, nmi, regs, addr); -} - -static void perf_swcounter_ctx_event(struct perf_counter_context *ctx, - enum perf_type_id type, u32 event, - u64 nr, int nmi, struct pt_regs *regs, - u64 addr) -{ - struct perf_counter *counter; - - if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) - return; - - rcu_read_lock(); - list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) { - if (perf_swcounter_match(counter, type, event, regs)) - perf_swcounter_add(counter, nr, nmi, regs, addr); - } - rcu_read_unlock(); -} - -static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx) -{ - if (in_nmi()) - return &cpuctx->recursion[3]; - - if (in_irq()) - return &cpuctx->recursion[2]; - - if (in_softirq()) - return &cpuctx->recursion[1]; - - return &cpuctx->recursion[0]; -} - -static void __perf_swcounter_event(enum perf_type_id type, u32 event, - u64 nr, int nmi, struct pt_regs *regs, - u64 addr) -{ - struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); - int *recursion = perf_swcounter_recursion_context(cpuctx); - struct perf_counter_context *ctx; - - if (*recursion) - goto out; - - (*recursion)++; - barrier(); - - perf_swcounter_ctx_event(&cpuctx->ctx, type, event, - nr, nmi, regs, addr); - rcu_read_lock(); - /* - * doesn't really matter which of the child contexts the - * events ends up in. - */ - ctx = rcu_dereference(current->perf_counter_ctxp); - if (ctx) - perf_swcounter_ctx_event(ctx, type, event, nr, nmi, regs, addr); - rcu_read_unlock(); - - barrier(); - (*recursion)--; - -out: - put_cpu_var(perf_cpu_context); -} - -void -perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr) -{ - __perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs, addr); -} - -static void perf_swcounter_read(struct perf_counter *counter) -{ - perf_swcounter_update(counter); -} - -static int perf_swcounter_enable(struct perf_counter *counter) -{ - perf_swcounter_set_period(counter); - return 0; -} - -static void perf_swcounter_disable(struct perf_counter *counter) -{ - perf_swcounter_update(counter); -} - -static const struct pmu perf_ops_generic = { - .enable = perf_swcounter_enable, - .disable = perf_swcounter_disable, - .read = perf_swcounter_read, -}; - -/* - * Software counter: cpu wall time clock - */ - -static void cpu_clock_perf_counter_update(struct perf_counter *counter) -{ - int cpu = raw_smp_processor_id(); - s64 prev; - u64 now; - - now = cpu_clock(cpu); - prev = atomic64_read(&counter->hw.prev_count); - atomic64_set(&counter->hw.prev_count, now); - atomic64_add(now - prev, &counter->count); -} - -static int cpu_clock_perf_counter_enable(struct perf_counter *counter) -{ - struct hw_perf_counter *hwc = &counter->hw; - int cpu = raw_smp_processor_id(); - - atomic64_set(&hwc->prev_count, cpu_clock(cpu)); - hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hwc->hrtimer.function = perf_swcounter_hrtimer; - if (hwc->sample_period) { - u64 period = max_t(u64, 10000, hwc->sample_period); - __hrtimer_start_range_ns(&hwc->hrtimer, - ns_to_ktime(period), 0, - HRTIMER_MODE_REL, 0); - } - - return 0; -} - -static void cpu_clock_perf_counter_disable(struct perf_counter *counter) -{ - if (counter->hw.sample_period) - hrtimer_cancel(&counter->hw.hrtimer); - cpu_clock_perf_counter_update(counter); -} - -static void cpu_clock_perf_counter_read(struct perf_counter *counter) -{ - cpu_clock_perf_counter_update(counter); -} - -static const struct pmu perf_ops_cpu_clock = { - .enable = cpu_clock_perf_counter_enable, - .disable = cpu_clock_perf_counter_disable, - .read = cpu_clock_perf_counter_read, -}; - -/* - * Software counter: task time clock - */ - -static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now) -{ - u64 prev; - s64 delta; - - prev = atomic64_xchg(&counter->hw.prev_count, now); - delta = now - prev; - atomic64_add(delta, &counter->count); -} - -static int task_clock_perf_counter_enable(struct perf_counter *counter) -{ - struct hw_perf_counter *hwc = &counter->hw; - u64 now; - - now = counter->ctx->time; - - atomic64_set(&hwc->prev_count, now); - hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - hwc->hrtimer.function = perf_swcounter_hrtimer; - if (hwc->sample_period) { - u64 period = max_t(u64, 10000, hwc->sample_period); - __hrtimer_start_range_ns(&hwc->hrtimer, - ns_to_ktime(period), 0, - HRTIMER_MODE_REL, 0); - } - - return 0; -} - -static void task_clock_perf_counter_disable(struct perf_counter *counter) -{ - if (counter->hw.sample_period) - hrtimer_cancel(&counter->hw.hrtimer); - task_clock_perf_counter_update(counter, counter->ctx->time); - -} - -static void task_clock_perf_counter_read(struct perf_counter *counter) -{ - u64 time; - - if (!in_nmi()) { - update_context_time(counter->ctx); - time = counter->ctx->time; - } else { - u64 now = perf_clock(); - u64 delta = now - counter->ctx->timestamp; - time = counter->ctx->time + delta; - } - - task_clock_perf_counter_update(counter, time); -} - -static const struct pmu perf_ops_task_clock = { - .enable = task_clock_perf_counter_enable, - .disable = task_clock_perf_counter_disable, - .read = task_clock_perf_counter_read, -}; - -/* - * Software counter: cpu migrations - */ -void perf_counter_task_migration(struct task_struct *task, int cpu) -{ - struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); - struct perf_counter_context *ctx; - - perf_swcounter_ctx_event(&cpuctx->ctx, PERF_TYPE_SOFTWARE, - PERF_COUNT_SW_CPU_MIGRATIONS, - 1, 1, NULL, 0); - - ctx = perf_pin_task_context(task); - if (ctx) { - perf_swcounter_ctx_event(ctx, PERF_TYPE_SOFTWARE, - PERF_COUNT_SW_CPU_MIGRATIONS, - 1, 1, NULL, 0); - perf_unpin_context(ctx); - } -} - -#ifdef CONFIG_EVENT_PROFILE -void perf_tpcounter_event(int event_id) -{ - struct pt_regs *regs = get_irq_regs(); - - if (!regs) - regs = task_pt_regs(current); - - __perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs, 0); -} -EXPORT_SYMBOL_GPL(perf_tpcounter_event); - -extern int ftrace_profile_enable(int); -extern void ftrace_profile_disable(int); - -static void tp_perf_counter_destroy(struct perf_counter *counter) -{ - ftrace_profile_disable(perf_event_id(&counter->attr)); -} - -static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) -{ - int event_id = perf_event_id(&counter->attr); - int ret; - - ret = ftrace_profile_enable(event_id); - if (ret) - return NULL; - - counter->destroy = tp_perf_counter_destroy; - - return &perf_ops_generic; -} -#else -static const struct pmu *tp_perf_counter_init(struct perf_counter *counter) -{ - return NULL; -} -#endif - -static const struct pmu *sw_perf_counter_init(struct perf_counter *counter) -{ - const struct pmu *pmu = NULL; - - /* - * Software counters (currently) can't in general distinguish - * between user, kernel and hypervisor events. - * However, context switches and cpu migrations are considered - * to be kernel events, and page faults are never hypervisor - * events. - */ - switch (counter->attr.config) { - case PERF_COUNT_SW_CPU_CLOCK: - pmu = &perf_ops_cpu_clock; - - break; - case PERF_COUNT_SW_TASK_CLOCK: - /* - * If the user instantiates this as a per-cpu counter, - * use the cpu_clock counter instead. - */ - if (counter->ctx->task) - pmu = &perf_ops_task_clock; - else - pmu = &perf_ops_cpu_clock; - - break; - case PERF_COUNT_SW_PAGE_FAULTS: - case PERF_COUNT_SW_PAGE_FAULTS_MIN: - case PERF_COUNT_SW_PAGE_FAULTS_MAJ: - case PERF_COUNT_SW_CONTEXT_SWITCHES: - case PERF_COUNT_SW_CPU_MIGRATIONS: - pmu = &perf_ops_generic; - break; - } - - return pmu; -} - -/* - * Allocate and initialize a counter structure - */ -static struct perf_counter * -perf_counter_alloc(struct perf_counter_attr *attr, - int cpu, - struct perf_counter_context *ctx, - struct perf_counter *group_leader, - gfp_t gfpflags) -{ - const struct pmu *pmu; - struct perf_counter *counter; - struct hw_perf_counter *hwc; - long err; - - counter = kzalloc(sizeof(*counter), gfpflags); - if (!counter) - return ERR_PTR(-ENOMEM); - - /* - * Single counters are their own group leaders, with an - * empty sibling list: - */ - if (!group_leader) - group_leader = counter; - - mutex_init(&counter->child_mutex); - INIT_LIST_HEAD(&counter->child_list); - - INIT_LIST_HEAD(&counter->list_entry); - INIT_LIST_HEAD(&counter->event_entry); - INIT_LIST_HEAD(&counter->sibling_list); - init_waitqueue_head(&counter->waitq); - - mutex_init(&counter->mmap_mutex); - - counter->cpu = cpu; - counter->attr = *attr; - counter->group_leader = group_leader; - counter->pmu = NULL; - counter->ctx = ctx; - counter->oncpu = -1; - - counter->ns = get_pid_ns(current->nsproxy->pid_ns); - counter->id = atomic64_inc_return(&perf_counter_id); - - counter->state = PERF_COUNTER_STATE_INACTIVE; - - if (attr->disabled) - counter->state = PERF_COUNTER_STATE_OFF; - - pmu = NULL; - - hwc = &counter->hw; - hwc->sample_period = attr->sample_period; - if (attr->freq && attr->sample_freq) - hwc->sample_period = 1; - - atomic64_set(&hwc->period_left, hwc->sample_period); - - /* - * we currently do not support PERF_SAMPLE_GROUP on inherited counters - */ - if (attr->inherit && (attr->sample_type & PERF_SAMPLE_GROUP)) - goto done; - - if (attr->type == PERF_TYPE_RAW) { - pmu = hw_perf_counter_init(counter); - goto done; - } - - switch (attr->type) { - case PERF_TYPE_HARDWARE: - case PERF_TYPE_HW_CACHE: - pmu = hw_perf_counter_init(counter); - break; - - case PERF_TYPE_SOFTWARE: - pmu = sw_perf_counter_init(counter); - break; - - case PERF_TYPE_TRACEPOINT: - pmu = tp_perf_counter_init(counter); - break; - } -done: - err = 0; - if (!pmu) - err = -EINVAL; - else if (IS_ERR(pmu)) - err = PTR_ERR(pmu); - - if (err) { - if (counter->ns) - put_pid_ns(counter->ns); - kfree(counter); - return ERR_PTR(err); - } - - counter->pmu = pmu; - - atomic_inc(&nr_counters); - if (counter->attr.mmap) - atomic_inc(&nr_mmap_counters); - if (counter->attr.comm) - atomic_inc(&nr_comm_counters); - - return counter; -} - -/** - * sys_perf_counter_open - open a performance counter, associate it to a task/cpu - * - * @attr_uptr: event type attributes for monitoring/sampling - * @pid: target pid - * @cpu: target cpu - * @group_fd: group leader counter fd - */ -SYSCALL_DEFINE5(perf_counter_open, - const struct perf_counter_attr __user *, attr_uptr, - pid_t, pid, int, cpu, int, group_fd, unsigned long, flags) -{ - struct perf_counter *counter, *group_leader; - struct perf_counter_attr attr; - struct perf_counter_context *ctx; - struct file *counter_file = NULL; - struct file *group_file = NULL; - int fput_needed = 0; - int fput_needed2 = 0; - int ret; - - /* for future expandability... */ - if (flags) - return -EINVAL; - - if (copy_from_user(&attr, attr_uptr, sizeof(attr)) != 0) - return -EFAULT; - - if (!attr.exclude_kernel) { - if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN)) - return -EACCES; - } - - if (attr.freq) { - if (attr.sample_freq > sysctl_perf_counter_sample_rate) - return -EINVAL; - } - - /* - * Get the target context (task or percpu): - */ - ctx = find_get_context(pid, cpu); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - /* - * Look up the group leader (we will attach this counter to it): - */ - group_leader = NULL; - if (group_fd != -1) { - ret = -EINVAL; - group_file = fget_light(group_fd, &fput_needed); - if (!group_file) - goto err_put_context; - if (group_file->f_op != &perf_fops) - goto err_put_context; - - group_leader = group_file->private_data; - /* - * Do not allow a recursive hierarchy (this new sibling - * becoming part of another group-sibling): - */ - if (group_leader->group_leader != group_leader) - goto err_put_context; - /* - * Do not allow to attach to a group in a different - * task or CPU context: - */ - if (group_leader->ctx != ctx) - goto err_put_context; - /* - * Only a group leader can be exclusive or pinned - */ - if (attr.exclusive || attr.pinned) - goto err_put_context; - } - - counter = perf_counter_alloc(&attr, cpu, ctx, group_leader, - GFP_KERNEL); - ret = PTR_ERR(counter); - if (IS_ERR(counter)) - goto err_put_context; - - ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0); - if (ret < 0) - goto err_free_put_context; - - counter_file = fget_light(ret, &fput_needed2); - if (!counter_file) - goto err_free_put_context; - - counter->filp = counter_file; - WARN_ON_ONCE(ctx->parent_ctx); - mutex_lock(&ctx->mutex); - perf_install_in_context(ctx, counter, cpu); - ++ctx->generation; - mutex_unlock(&ctx->mutex); - - counter->owner = current; - get_task_struct(current); - mutex_lock(¤t->perf_counter_mutex); - list_add_tail(&counter->owner_entry, ¤t->perf_counter_list); - mutex_unlock(¤t->perf_counter_mutex); - - fput_light(counter_file, fput_needed2); - -out_fput: - fput_light(group_file, fput_needed); - - return ret; - -err_free_put_context: - kfree(counter); - -err_put_context: - put_ctx(ctx); - - goto out_fput; -} - -/* - * inherit a counter from parent task to child task: - */ -static struct perf_counter * -inherit_counter(struct perf_counter *parent_counter, - struct task_struct *parent, - struct perf_counter_context *parent_ctx, - struct task_struct *child, - struct perf_counter *group_leader, - struct perf_counter_context *child_ctx) -{ - struct perf_counter *child_counter; - - /* - * Instead of creating recursive hierarchies of counters, - * we link inherited counters back to the original parent, - * which has a filp for sure, which we use as the reference - * count: - */ - if (parent_counter->parent) - parent_counter = parent_counter->parent; - - child_counter = perf_counter_alloc(&parent_counter->attr, - parent_counter->cpu, child_ctx, - group_leader, GFP_KERNEL); - if (IS_ERR(child_counter)) - return child_counter; - get_ctx(child_ctx); - - /* - * Make the child state follow the state of the parent counter, - * not its attr.disabled bit. We hold the parent's mutex, - * so we won't race with perf_counter_{en, dis}able_family. - */ - if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE) - child_counter->state = PERF_COUNTER_STATE_INACTIVE; - else - child_counter->state = PERF_COUNTER_STATE_OFF; - - if (parent_counter->attr.freq) - child_counter->hw.sample_period = parent_counter->hw.sample_period; - - /* - * Link it up in the child's context: - */ - add_counter_to_ctx(child_counter, child_ctx); - - child_counter->parent = parent_counter; - /* - * inherit into child's child as well: - */ - child_counter->attr.inherit = 1; - - /* - * Get a reference to the parent filp - we will fput it - * when the child counter exits. This is safe to do because - * we are in the parent and we know that the filp still - * exists and has a nonzero count: - */ - atomic_long_inc(&parent_counter->filp->f_count); - - /* - * Link this into the parent counter's child list - */ - WARN_ON_ONCE(parent_counter->ctx->parent_ctx); - mutex_lock(&parent_counter->child_mutex); - list_add_tail(&child_counter->child_list, &parent_counter->child_list); - mutex_unlock(&parent_counter->child_mutex); - - return child_counter; -} - -static int inherit_group(struct perf_counter *parent_counter, - struct task_struct *parent, - struct perf_counter_context *parent_ctx, - struct task_struct *child, - struct perf_counter_context *child_ctx) -{ - struct perf_counter *leader; - struct perf_counter *sub; - struct perf_counter *child_ctr; - - leader = inherit_counter(parent_counter, parent, parent_ctx, - child, NULL, child_ctx); - if (IS_ERR(leader)) - return PTR_ERR(leader); - list_for_each_entry(sub, &parent_counter->sibling_list, list_entry) { - child_ctr = inherit_counter(sub, parent, parent_ctx, - child, leader, child_ctx); - if (IS_ERR(child_ctr)) - return PTR_ERR(child_ctr); - } - return 0; -} - -static void sync_child_counter(struct perf_counter *child_counter, - struct perf_counter *parent_counter) -{ - u64 child_val; - - child_val = atomic64_read(&child_counter->count); - - /* - * Add back the child's count to the parent's count: - */ - atomic64_add(child_val, &parent_counter->count); - atomic64_add(child_counter->total_time_enabled, - &parent_counter->child_total_time_enabled); - atomic64_add(child_counter->total_time_running, - &parent_counter->child_total_time_running); - - /* - * Remove this counter from the parent's list - */ - WARN_ON_ONCE(parent_counter->ctx->parent_ctx); - mutex_lock(&parent_counter->child_mutex); - list_del_init(&child_counter->child_list); - mutex_unlock(&parent_counter->child_mutex); - - /* - * Release the parent counter, if this was the last - * reference to it. - */ - fput(parent_counter->filp); -} - -static void -__perf_counter_exit_task(struct perf_counter *child_counter, - struct perf_counter_context *child_ctx) -{ - struct perf_counter *parent_counter; - - update_counter_times(child_counter); - perf_counter_remove_from_context(child_counter); - - parent_counter = child_counter->parent; - /* - * It can happen that parent exits first, and has counters - * that are still around due to the child reference. These - * counters need to be zapped - but otherwise linger. - */ - if (parent_counter) { - sync_child_counter(child_counter, parent_counter); - free_counter(child_counter); - } -} - -/* - * When a child task exits, feed back counter values to parent counters. - */ -void perf_counter_exit_task(struct task_struct *child) -{ - struct perf_counter *child_counter, *tmp; - struct perf_counter_context *child_ctx; - unsigned long flags; - - if (likely(!child->perf_counter_ctxp)) - return; - - local_irq_save(flags); - /* - * We can't reschedule here because interrupts are disabled, - * and either child is current or it is a task that can't be - * scheduled, so we are now safe from rescheduling changing - * our context. - */ - child_ctx = child->perf_counter_ctxp; - __perf_counter_task_sched_out(child_ctx); - - /* - * Take the context lock here so that if find_get_context is - * reading child->perf_counter_ctxp, we wait until it has - * incremented the context's refcount before we do put_ctx below. - */ - spin_lock(&child_ctx->lock); - child->perf_counter_ctxp = NULL; - if (child_ctx->parent_ctx) { - /* - * This context is a clone; unclone it so it can't get - * swapped to another process while we're removing all - * the counters from it. - */ - put_ctx(child_ctx->parent_ctx); - child_ctx->parent_ctx = NULL; - } - spin_unlock(&child_ctx->lock); - local_irq_restore(flags); - - /* - * We can recurse on the same lock type through: - * - * __perf_counter_exit_task() - * sync_child_counter() - * fput(parent_counter->filp) - * perf_release() - * mutex_lock(&ctx->mutex) - * - * But since its the parent context it won't be the same instance. - */ - mutex_lock_nested(&child_ctx->mutex, SINGLE_DEPTH_NESTING); - -again: - list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list, - list_entry) - __perf_counter_exit_task(child_counter, child_ctx); - - /* - * If the last counter was a group counter, it will have appended all - * its siblings to the list, but we obtained 'tmp' before that which - * will still point to the list head terminating the iteration. - */ - if (!list_empty(&child_ctx->counter_list)) - goto again; - - mutex_unlock(&child_ctx->mutex); - - put_ctx(child_ctx); -} - -/* - * free an unexposed, unused context as created by inheritance by - * init_task below, used by fork() in case of fail. - */ -void perf_counter_free_task(struct task_struct *task) -{ - struct perf_counter_context *ctx = task->perf_counter_ctxp; - struct perf_counter *counter, *tmp; - - if (!ctx) - return; - - mutex_lock(&ctx->mutex); -again: - list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry) { - struct perf_counter *parent = counter->parent; - - if (WARN_ON_ONCE(!parent)) - continue; - - mutex_lock(&parent->child_mutex); - list_del_init(&counter->child_list); - mutex_unlock(&parent->child_mutex); - - fput(parent->filp); - - list_del_counter(counter, ctx); - free_counter(counter); - } - - if (!list_empty(&ctx->counter_list)) - goto again; - - mutex_unlock(&ctx->mutex); - - put_ctx(ctx); -} - -/* - * Initialize the perf_counter context in task_struct - */ -int perf_counter_init_task(struct task_struct *child) -{ - struct perf_counter_context *child_ctx, *parent_ctx; - struct perf_counter_context *cloned_ctx; - struct perf_counter *counter; - struct task_struct *parent = current; - int inherited_all = 1; - int ret = 0; - - child->perf_counter_ctxp = NULL; - - mutex_init(&child->perf_counter_mutex); - INIT_LIST_HEAD(&child->perf_counter_list); - - if (likely(!parent->perf_counter_ctxp)) - return 0; - - /* - * This is executed from the parent task context, so inherit - * counters that have been marked for cloning. - * First allocate and initialize a context for the child. - */ - - child_ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL); - if (!child_ctx) - return -ENOMEM; - - __perf_counter_init_context(child_ctx, child); - child->perf_counter_ctxp = child_ctx; - get_task_struct(child); - - /* - * If the parent's context is a clone, pin it so it won't get - * swapped under us. - */ - parent_ctx = perf_pin_task_context(parent); - - /* - * No need to check if parent_ctx != NULL here; since we saw - * it non-NULL earlier, the only reason for it to become NULL - * is if we exit, and since we're currently in the middle of - * a fork we can't be exiting at the same time. - */ - - /* - * Lock the parent list. No need to lock the child - not PID - * hashed yet and not running, so nobody can access it. - */ - mutex_lock(&parent_ctx->mutex); - - /* - * We dont have to disable NMIs - we are only looking at - * the list, not manipulating it: - */ - list_for_each_entry_rcu(counter, &parent_ctx->event_list, event_entry) { - if (counter != counter->group_leader) - continue; - - if (!counter->attr.inherit) { - inherited_all = 0; - continue; - } - - ret = inherit_group(counter, parent, parent_ctx, - child, child_ctx); - if (ret) { - inherited_all = 0; - break; - } - } - - if (inherited_all) { - /* - * Mark the child context as a clone of the parent - * context, or of whatever the parent is a clone of. - * Note that if the parent is a clone, it could get - * uncloned at any point, but that doesn't matter - * because the list of counters and the generation - * count can't have changed since we took the mutex. - */ - cloned_ctx = rcu_dereference(parent_ctx->parent_ctx); - if (cloned_ctx) { - child_ctx->parent_ctx = cloned_ctx; - child_ctx->parent_gen = parent_ctx->parent_gen; - } else { - child_ctx->parent_ctx = parent_ctx; - child_ctx->parent_gen = parent_ctx->generation; - } - get_ctx(child_ctx->parent_ctx); - } - - mutex_unlock(&parent_ctx->mutex); - - perf_unpin_context(parent_ctx); - - return ret; -} - -static void __cpuinit perf_counter_init_cpu(int cpu) -{ - struct perf_cpu_context *cpuctx; - - cpuctx = &per_cpu(perf_cpu_context, cpu); - __perf_counter_init_context(&cpuctx->ctx, NULL); - - spin_lock(&perf_resource_lock); - cpuctx->max_pertask = perf_max_counters - perf_reserved_percpu; - spin_unlock(&perf_resource_lock); - - hw_perf_counter_setup(cpu); -} - -#ifdef CONFIG_HOTPLUG_CPU -static void __perf_counter_exit_cpu(void *info) -{ - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - struct perf_counter_context *ctx = &cpuctx->ctx; - struct perf_counter *counter, *tmp; - - list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry) - __perf_counter_remove_from_context(counter); -} -static void perf_counter_exit_cpu(int cpu) -{ - struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); - struct perf_counter_context *ctx = &cpuctx->ctx; - - mutex_lock(&ctx->mutex); - smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1); - mutex_unlock(&ctx->mutex); -} -#else -static inline void perf_counter_exit_cpu(int cpu) { } -#endif - -static int __cpuinit -perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) -{ - unsigned int cpu = (long)hcpu; - - switch (action) { - - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - perf_counter_init_cpu(cpu); - break; - - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - perf_counter_exit_cpu(cpu); - break; - - default: - break; - } - - return NOTIFY_OK; -} - -/* - * This has to have a higher priority than migration_notifier in sched.c. - */ -static struct notifier_block __cpuinitdata perf_cpu_nb = { - .notifier_call = perf_cpu_notify, - .priority = 20, -}; - -void __init perf_counter_init(void) -{ - perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE, - (void *)(long)smp_processor_id()); - register_cpu_notifier(&perf_cpu_nb); -} - -static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, char *buf) -{ - return sprintf(buf, "%d\n", perf_reserved_percpu); -} - -static ssize_t -perf_set_reserve_percpu(struct sysdev_class *class, - const char *buf, - size_t count) -{ - struct perf_cpu_context *cpuctx; - unsigned long val; - int err, cpu, mpt; - - err = strict_strtoul(buf, 10, &val); - if (err) - return err; - if (val > perf_max_counters) - return -EINVAL; - - spin_lock(&perf_resource_lock); - perf_reserved_percpu = val; - for_each_online_cpu(cpu) { - cpuctx = &per_cpu(perf_cpu_context, cpu); - spin_lock_irq(&cpuctx->ctx.lock); - mpt = min(perf_max_counters - cpuctx->ctx.nr_counters, - perf_max_counters - perf_reserved_percpu); - cpuctx->max_pertask = mpt; - spin_unlock_irq(&cpuctx->ctx.lock); - } - spin_unlock(&perf_resource_lock); - - return count; -} - -static ssize_t perf_show_overcommit(struct sysdev_class *class, char *buf) -{ - return sprintf(buf, "%d\n", perf_overcommit); -} - -static ssize_t -perf_set_overcommit(struct sysdev_class *class, const char *buf, size_t count) -{ - unsigned long val; - int err; - - err = strict_strtoul(buf, 10, &val); - if (err) - return err; - if (val > 1) - return -EINVAL; - - spin_lock(&perf_resource_lock); - perf_overcommit = val; - spin_unlock(&perf_resource_lock); - - return count; -} - -static SYSDEV_CLASS_ATTR( - reserve_percpu, - 0644, - perf_show_reserve_percpu, - perf_set_reserve_percpu - ); - -static SYSDEV_CLASS_ATTR( - overcommit, - 0644, - perf_show_overcommit, - perf_set_overcommit - ); - -static struct attribute *perfclass_attrs[] = { - &attr_reserve_percpu.attr, - &attr_overcommit.attr, - NULL -}; - -static struct attribute_group perfclass_attr_group = { - .attrs = perfclass_attrs, - .name = "perf_counters", -}; - -static int __init perf_counter_sysfs_init(void) -{ - return sysfs_create_group(&cpu_sysdev_class.kset.kobj, - &perfclass_attr_group); -} -device_initcall(perf_counter_sysfs_init); diff --git a/trunk/kernel/profile.c b/trunk/kernel/profile.c index 28cf26ad2d24..7724e0409bae 100644 --- a/trunk/kernel/profile.c +++ b/trunk/kernel/profile.c @@ -111,6 +111,12 @@ int __ref profile_init(void) /* only text is profiled */ prof_len = (_etext - _stext) >> prof_shift; buffer_bytes = prof_len*sizeof(atomic_t); + if (!slab_is_available()) { + prof_buffer = alloc_bootmem(buffer_bytes); + alloc_bootmem_cpumask_var(&prof_cpu_mask); + cpumask_copy(prof_cpu_mask, cpu_possible_mask); + return 0; + } if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL)) return -ENOMEM; diff --git a/trunk/kernel/sched.c b/trunk/kernel/sched.c index f04aa9664504..14c447ae5d53 100644 --- a/trunk/kernel/sched.c +++ b/trunk/kernel/sched.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include @@ -69,6 +68,7 @@ #include #include #include +#include #include #include #include @@ -580,7 +580,6 @@ struct rq { struct load_weight load; unsigned long nr_load_updates; u64 nr_switches; - u64 nr_migrations_in; struct cfs_rq cfs; struct rt_rq rt; @@ -693,7 +692,7 @@ static inline int cpu_of(struct rq *rq) #define task_rq(p) cpu_rq(task_cpu(p)) #define cpu_curr(cpu) (cpu_rq(cpu)->curr) -inline void update_rq_clock(struct rq *rq) +static inline void update_rq_clock(struct rq *rq) { rq->clock = sched_clock_cpu(cpu_of(rq)); } @@ -1970,16 +1969,12 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu) p->se.sleep_start -= clock_offset; if (p->se.block_start) p->se.block_start -= clock_offset; -#endif if (old_cpu != new_cpu) { - p->se.nr_migrations++; - new_rq->nr_migrations_in++; -#ifdef CONFIG_SCHEDSTATS + schedstat_inc(p, se.nr_migrations); if (task_hot(p, old_rq->clock, NULL)) schedstat_inc(p, se.nr_forced2_migrations); -#endif - perf_counter_task_migration(p, new_cpu); } +#endif p->se.vruntime -= old_cfsrq->min_vruntime - new_cfsrq->min_vruntime; @@ -2374,27 +2369,6 @@ static int sched_balance_self(int cpu, int flag) #endif /* CONFIG_SMP */ -/** - * task_oncpu_function_call - call a function on the cpu on which a task runs - * @p: the task to evaluate - * @func: the function to be called - * @info: the function call argument - * - * Calls the function @func when the task is currently running. This might - * be on the current CPU, which just calls the function directly - */ -void task_oncpu_function_call(struct task_struct *p, - void (*func) (void *info), void *info) -{ - int cpu; - - preempt_disable(); - cpu = task_cpu(p); - if (task_curr(p)) - smp_call_function_single(cpu, func, info, 1); - preempt_enable(); -} - /*** * try_to_wake_up - wake up a thread * @p: the to-be-woken-up thread @@ -2562,7 +2536,6 @@ static void __sched_fork(struct task_struct *p) p->se.exec_start = 0; p->se.sum_exec_runtime = 0; p->se.prev_sum_exec_runtime = 0; - p->se.nr_migrations = 0; p->se.last_wakeup = 0; p->se.avg_overlap = 0; p->se.start_runtime = 0; @@ -2793,7 +2766,6 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev) */ prev_state = prev->state; finish_arch_switch(prev); - perf_counter_task_sched_in(current, cpu_of(rq)); finish_lock_switch(rq, prev); #ifdef CONFIG_SMP if (post_schedule) @@ -3008,15 +2980,6 @@ static void calc_load_account_active(struct rq *this_rq) } } -/* - * Externally visible per-cpu scheduler statistics: - * cpu_nr_migrations(cpu) - number of migrations into that cpu - */ -u64 cpu_nr_migrations(int cpu) -{ - return cpu_rq(cpu)->nr_migrations_in; -} - /* * Update rq->cpu_load[] statistics. This function is usually called every * scheduler tick (TICK_NSEC). @@ -5115,8 +5078,6 @@ void scheduler_tick(void) curr->sched_class->task_tick(rq, curr, 0); spin_unlock(&rq->lock); - perf_counter_task_tick(curr, cpu); - #ifdef CONFIG_SMP rq->idle_at_tick = idle_cpu(cpu); trigger_load_balance(rq, cpu); @@ -5332,7 +5293,6 @@ asmlinkage void __sched schedule(void) if (likely(prev != next)) { sched_info_switch(prev, next); - perf_counter_task_sched_out(prev, next, cpu); rq->nr_switches++; rq->curr = next; @@ -7576,10 +7536,8 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu) return NOTIFY_OK; } -/* - * Register at high priority so that task migration (migrate_all_tasks) - * happens before everything else. This has to be lower priority than - * the notifier in the perf_counter subsystem, though. +/* Register at highest priority so that task migration (migrate_all_tasks) + * happens before everything else. */ static struct notifier_block __cpuinitdata migration_notifier = { .notifier_call = migration_call, @@ -7824,21 +7782,24 @@ static void rq_attach_root(struct rq *rq, struct root_domain *rd) static int __init_refok init_rootdomain(struct root_domain *rd, bool bootmem) { - gfp_t gfp = GFP_KERNEL; - memset(rd, 0, sizeof(*rd)); - if (bootmem) - gfp = GFP_NOWAIT; + if (bootmem) { + alloc_bootmem_cpumask_var(&def_root_domain.span); + alloc_bootmem_cpumask_var(&def_root_domain.online); + alloc_bootmem_cpumask_var(&def_root_domain.rto_mask); + cpupri_init(&rd->cpupri, true); + return 0; + } - if (!alloc_cpumask_var(&rd->span, gfp)) + if (!alloc_cpumask_var(&rd->span, GFP_KERNEL)) goto out; - if (!alloc_cpumask_var(&rd->online, gfp)) + if (!alloc_cpumask_var(&rd->online, GFP_KERNEL)) goto free_span; - if (!alloc_cpumask_var(&rd->rto_mask, gfp)) + if (!alloc_cpumask_var(&rd->rto_mask, GFP_KERNEL)) goto free_online; - if (cpupri_init(&rd->cpupri, bootmem) != 0) + if (cpupri_init(&rd->cpupri, false) != 0) goto free_rto_mask; return 0; @@ -9162,7 +9123,7 @@ void __init sched_init(void) * we use alloc_bootmem(). */ if (alloc_size) { - ptr = (unsigned long)kzalloc(alloc_size, GFP_NOWAIT); + ptr = (unsigned long)alloc_bootmem(alloc_size); #ifdef CONFIG_FAIR_GROUP_SCHED init_task_group.se = (struct sched_entity **)ptr; @@ -9257,7 +9218,7 @@ void __init sched_init(void) * 1024) and two child groups A0 and A1 (of weight 1024 each), * then A0's share of the cpu resource is: * - * A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33% + * A0's bandwidth = 1024 / (10*1024 + 1024 + 1024) = 8.33% * * We achieve this by letting init_task_group's tasks sit * directly in rq->cfs (i.e init_task_group->se[] = NULL). @@ -9353,17 +9314,15 @@ void __init sched_init(void) current->sched_class = &fair_sched_class; /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */ - alloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT); + alloc_bootmem_cpumask_var(&nohz_cpu_mask); #ifdef CONFIG_SMP #ifdef CONFIG_NO_HZ - alloc_cpumask_var(&nohz.cpu_mask, GFP_NOWAIT); - alloc_cpumask_var(&nohz.ilb_grp_nohz_mask, GFP_NOWAIT); + alloc_bootmem_cpumask_var(&nohz.cpu_mask); + alloc_bootmem_cpumask_var(&nohz.ilb_grp_nohz_mask); #endif - alloc_cpumask_var(&cpu_isolated_map, GFP_NOWAIT); + alloc_bootmem_cpumask_var(&cpu_isolated_map); #endif /* SMP */ - perf_counter_init(); - scheduler_running = 1; } diff --git a/trunk/kernel/sched_cpupri.c b/trunk/kernel/sched_cpupri.c index 7deffc9f0e5f..344712a5e3ed 100644 --- a/trunk/kernel/sched_cpupri.c +++ b/trunk/kernel/sched_cpupri.c @@ -154,12 +154,8 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) */ int __init_refok cpupri_init(struct cpupri *cp, bool bootmem) { - gfp_t gfp = GFP_KERNEL; int i; - if (bootmem) - gfp = GFP_NOWAIT; - memset(cp, 0, sizeof(*cp)); for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) { @@ -167,7 +163,9 @@ int __init_refok cpupri_init(struct cpupri *cp, bool bootmem) spin_lock_init(&vec->lock); vec->count = 0; - if (!zalloc_cpumask_var(&vec->mask, gfp)) + if (bootmem) + alloc_bootmem_cpumask_var(&vec->mask); + else if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL)) goto cleanup; } diff --git a/trunk/kernel/slow-work.c b/trunk/kernel/slow-work.c index 521ed2004d63..b28d19135f43 100644 --- a/trunk/kernel/slow-work.c +++ b/trunk/kernel/slow-work.c @@ -372,8 +372,8 @@ static int slow_work_thread(void *_data) vsmax *= atomic_read(&slow_work_thread_count); vsmax /= 100; - prepare_to_wait_exclusive(&slow_work_thread_wq, &wait, - TASK_INTERRUPTIBLE); + prepare_to_wait(&slow_work_thread_wq, &wait, + TASK_INTERRUPTIBLE); if (!freezing(current) && !slow_work_threads_should_exit && !slow_work_available(vsmax) && diff --git a/trunk/kernel/sys.c b/trunk/kernel/sys.c index 438d99a38c87..e7998cf31498 100644 --- a/trunk/kernel/sys.c +++ b/trunk/kernel/sys.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -1794,12 +1793,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_SET_TSC: error = SET_TSC_CTL(arg2); break; - case PR_TASK_PERF_COUNTERS_DISABLE: - error = perf_counter_task_disable(); - break; - case PR_TASK_PERF_COUNTERS_ENABLE: - error = perf_counter_task_enable(); - break; case PR_GET_TIMERSLACK: error = current->timer_slack_ns; break; diff --git a/trunk/kernel/sys_ni.c b/trunk/kernel/sys_ni.c index 68320f6b07b5..27dad2967387 100644 --- a/trunk/kernel/sys_ni.c +++ b/trunk/kernel/sys_ni.c @@ -175,6 +175,3 @@ cond_syscall(compat_sys_timerfd_settime); cond_syscall(compat_sys_timerfd_gettime); cond_syscall(sys_eventfd); cond_syscall(sys_eventfd2); - -/* performance counters: */ -cond_syscall(sys_perf_counter_open); diff --git a/trunk/kernel/sysctl.c b/trunk/kernel/sysctl.c index ce664f98e3fb..944ba03cae19 100644 --- a/trunk/kernel/sysctl.c +++ b/trunk/kernel/sysctl.c @@ -49,7 +49,6 @@ #include #include #include -#include #include #include @@ -933,32 +932,6 @@ static struct ctl_table kern_table[] = { .child = slow_work_sysctls, }, #endif -#ifdef CONFIG_PERF_COUNTERS - { - .ctl_name = CTL_UNNUMBERED, - .procname = "perf_counter_paranoid", - .data = &sysctl_perf_counter_paranoid, - .maxlen = sizeof(sysctl_perf_counter_paranoid), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "perf_counter_mlock_kb", - .data = &sysctl_perf_counter_mlock, - .maxlen = sizeof(sysctl_perf_counter_mlock), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = CTL_UNNUMBERED, - .procname = "perf_counter_max_sample_rate", - .data = &sysctl_perf_counter_sample_rate, - .maxlen = sizeof(sysctl_perf_counter_sample_rate), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, -#endif /* * NOTE: do not add new entries to this table unless you have read * Documentation/sysctl/ctl_unnumbered.txt diff --git a/trunk/kernel/timer.c b/trunk/kernel/timer.c index c01e568935ea..a26ed294f938 100644 --- a/trunk/kernel/timer.c +++ b/trunk/kernel/timer.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include @@ -1130,8 +1129,6 @@ static void run_timer_softirq(struct softirq_action *h) { struct tvec_base *base = __get_cpu_var(tvec_bases); - perf_counter_do_pending(); - hrtimer_run_pending(); if (time_after_eq(jiffies, base->timer_jiffies)) diff --git a/trunk/lib/Kconfig.debug b/trunk/lib/Kconfig.debug index 116a35051be6..6cdcf38f2da9 100644 --- a/trunk/lib/Kconfig.debug +++ b/trunk/lib/Kconfig.debug @@ -336,38 +336,6 @@ config SLUB_STATS out which slabs are relevant to a particular load. Try running: slabinfo -DA -config DEBUG_KMEMLEAK - bool "Kernel memory leak detector" - depends on DEBUG_KERNEL && EXPERIMENTAL && (X86 || ARM) && \ - !MEMORY_HOTPLUG - select DEBUG_SLAB if SLAB - select SLUB_DEBUG if SLUB - select DEBUG_FS if SYSFS - select STACKTRACE if STACKTRACE_SUPPORT - select KALLSYMS - help - Say Y here if you want to enable the memory leak - detector. The memory allocation/freeing is traced in a way - similar to the Boehm's conservative garbage collector, the - difference being that the orphan objects are not freed but - only shown in /sys/kernel/debug/kmemleak. Enabling this - feature will introduce an overhead to memory - allocations. See Documentation/kmemleak.txt for more - details. - - In order to access the kmemleak file, debugfs needs to be - mounted (usually at /sys/kernel/debug). - -config DEBUG_KMEMLEAK_TEST - tristate "Simple test for the kernel memory leak detector" - depends on DEBUG_KMEMLEAK - help - Say Y or M here to build a test for the kernel memory leak - detector. This option enables a module that explicitly leaks - memory. - - If unsure, say N. - config DEBUG_PREEMPT bool "Debug preemptible kernel" depends on DEBUG_KERNEL && PREEMPT && (TRACE_IRQFLAGS_SUPPORT || PPC64) diff --git a/trunk/lib/cpumask.c b/trunk/lib/cpumask.c index 7bb4142a502f..eb23aaa0c7b8 100644 --- a/trunk/lib/cpumask.c +++ b/trunk/lib/cpumask.c @@ -92,8 +92,15 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) */ bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) { - *mask = kmalloc_node(cpumask_size(), flags, node); - + if (likely(slab_is_available())) + *mask = kmalloc_node(cpumask_size(), flags, node); + else { +#ifdef CONFIG_DEBUG_PER_CPU_MAPS + printk(KERN_ERR + "=> alloc_cpumask_var: kmalloc not available!\n"); +#endif + *mask = NULL; + } #ifdef CONFIG_DEBUG_PER_CPU_MAPS if (!*mask) { printk(KERN_ERR "=> alloc_cpumask_var: failed!\n"); diff --git a/trunk/mm/Makefile b/trunk/mm/Makefile index e89acb090b4d..ec73c68b6015 100644 --- a/trunk/mm/Makefile +++ b/trunk/mm/Makefile @@ -38,5 +38,3 @@ obj-$(CONFIG_SMP) += allocpercpu.o endif obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o -obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o -obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o diff --git a/trunk/mm/bootmem.c b/trunk/mm/bootmem.c index 282df0a09e6f..daf92713f7de 100644 --- a/trunk/mm/bootmem.c +++ b/trunk/mm/bootmem.c @@ -532,9 +532,6 @@ static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata, unsigned long size, unsigned long align, unsigned long goal, unsigned long limit) { - if (WARN_ON_ONCE(slab_is_available())) - return kzalloc(size, GFP_NOWAIT); - #ifdef CONFIG_HAVE_ARCH_BOOTMEM bootmem_data_t *p_bdata; @@ -665,9 +662,6 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) { - if (WARN_ON_ONCE(slab_is_available())) - return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); - return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); } @@ -699,9 +693,6 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, { void *ptr; - if (WARN_ON_ONCE(slab_is_available())) - return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); - ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0); if (ptr) return ptr; @@ -754,9 +745,6 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) { - if (WARN_ON_ONCE(slab_is_available())) - return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); - return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, ARCH_LOW_ADDRESS_LIMIT); } diff --git a/trunk/mm/kmemleak-test.c b/trunk/mm/kmemleak-test.c deleted file mode 100644 index d5292fc6f523..000000000000 --- a/trunk/mm/kmemleak-test.c +++ /dev/null @@ -1,111 +0,0 @@ -/* - * mm/kmemleak-test.c - * - * Copyright (C) 2008 ARM Limited - * Written by Catalin Marinas - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -struct test_node { - long header[25]; - struct list_head list; - long footer[25]; -}; - -static LIST_HEAD(test_list); -static DEFINE_PER_CPU(void *, test_pointer); - -/* - * Some very simple testing. This function needs to be extended for - * proper testing. - */ -static int __init kmemleak_test_init(void) -{ - struct test_node *elem; - int i; - - printk(KERN_INFO "Kmemleak testing\n"); - - /* make some orphan objects */ - pr_info("kmemleak: kmalloc(32) = %p\n", kmalloc(32, GFP_KERNEL)); - pr_info("kmemleak: kmalloc(32) = %p\n", kmalloc(32, GFP_KERNEL)); - pr_info("kmemleak: kmalloc(1024) = %p\n", kmalloc(1024, GFP_KERNEL)); - pr_info("kmemleak: kmalloc(1024) = %p\n", kmalloc(1024, GFP_KERNEL)); - pr_info("kmemleak: kmalloc(2048) = %p\n", kmalloc(2048, GFP_KERNEL)); - pr_info("kmemleak: kmalloc(2048) = %p\n", kmalloc(2048, GFP_KERNEL)); - pr_info("kmemleak: kmalloc(4096) = %p\n", kmalloc(4096, GFP_KERNEL)); - pr_info("kmemleak: kmalloc(4096) = %p\n", kmalloc(4096, GFP_KERNEL)); -#ifndef CONFIG_MODULES - pr_info("kmemleak: kmem_cache_alloc(files_cachep) = %p\n", - kmem_cache_alloc(files_cachep, GFP_KERNEL)); - pr_info("kmemleak: kmem_cache_alloc(files_cachep) = %p\n", - kmem_cache_alloc(files_cachep, GFP_KERNEL)); -#endif - pr_info("kmemleak: vmalloc(64) = %p\n", vmalloc(64)); - pr_info("kmemleak: vmalloc(64) = %p\n", vmalloc(64)); - pr_info("kmemleak: vmalloc(64) = %p\n", vmalloc(64)); - pr_info("kmemleak: vmalloc(64) = %p\n", vmalloc(64)); - pr_info("kmemleak: vmalloc(64) = %p\n", vmalloc(64)); - - /* - * Add elements to a list. They should only appear as orphan - * after the module is removed. - */ - for (i = 0; i < 10; i++) { - elem = kmalloc(sizeof(*elem), GFP_KERNEL); - pr_info("kmemleak: kmalloc(sizeof(*elem)) = %p\n", elem); - if (!elem) - return -ENOMEM; - memset(elem, 0, sizeof(*elem)); - INIT_LIST_HEAD(&elem->list); - - list_add_tail(&elem->list, &test_list); - } - - for_each_possible_cpu(i) { - per_cpu(test_pointer, i) = kmalloc(129, GFP_KERNEL); - pr_info("kmemleak: kmalloc(129) = %p\n", - per_cpu(test_pointer, i)); - } - - return 0; -} -module_init(kmemleak_test_init); - -static void __exit kmemleak_test_exit(void) -{ - struct test_node *elem, *tmp; - - /* - * Remove the list elements without actually freeing the - * memory. - */ - list_for_each_entry_safe(elem, tmp, &test_list, list) - list_del(&elem->list); -} -module_exit(kmemleak_test_exit); - -MODULE_LICENSE("GPL"); diff --git a/trunk/mm/kmemleak.c b/trunk/mm/kmemleak.c deleted file mode 100644 index 58ec86c9e58a..000000000000 --- a/trunk/mm/kmemleak.c +++ /dev/null @@ -1,1498 +0,0 @@ -/* - * mm/kmemleak.c - * - * Copyright (C) 2008 ARM Limited - * Written by Catalin Marinas - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * - * For more information on the algorithm and kmemleak usage, please see - * Documentation/kmemleak.txt. - * - * Notes on locking - * ---------------- - * - * The following locks and mutexes are used by kmemleak: - * - * - kmemleak_lock (rwlock): protects the object_list modifications and - * accesses to the object_tree_root. The object_list is the main list - * holding the metadata (struct kmemleak_object) for the allocated memory - * blocks. The object_tree_root is a priority search tree used to look-up - * metadata based on a pointer to the corresponding memory block. The - * kmemleak_object structures are added to the object_list and - * object_tree_root in the create_object() function called from the - * kmemleak_alloc() callback and removed in delete_object() called from the - * kmemleak_free() callback - * - kmemleak_object.lock (spinlock): protects a kmemleak_object. Accesses to - * the metadata (e.g. count) are protected by this lock. Note that some - * members of this structure may be protected by other means (atomic or - * kmemleak_lock). This lock is also held when scanning the corresponding - * memory block to avoid the kernel freeing it via the kmemleak_free() - * callback. This is less heavyweight than holding a global lock like - * kmemleak_lock during scanning - * - scan_mutex (mutex): ensures that only one thread may scan the memory for - * unreferenced objects at a time. The gray_list contains the objects which - * are already referenced or marked as false positives and need to be - * scanned. This list is only modified during a scanning episode when the - * scan_mutex is held. At the end of a scan, the gray_list is always empty. - * Note that the kmemleak_object.use_count is incremented when an object is - * added to the gray_list and therefore cannot be freed - * - kmemleak_mutex (mutex): prevents multiple users of the "kmemleak" debugfs - * file together with modifications to the memory scanning parameters - * including the scan_thread pointer - * - * The kmemleak_object structures have a use_count incremented or decremented - * using the get_object()/put_object() functions. When the use_count becomes - * 0, this count can no longer be incremented and put_object() schedules the - * kmemleak_object freeing via an RCU callback. All calls to the get_object() - * function must be protected by rcu_read_lock() to avoid accessing a freed - * structure. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include - -/* - * Kmemleak configuration and common defines. - */ -#define MAX_TRACE 16 /* stack trace length */ -#define REPORTS_NR 50 /* maximum number of reported leaks */ -#define MSECS_MIN_AGE 5000 /* minimum object age for reporting */ -#define MSECS_SCAN_YIELD 10 /* CPU yielding period */ -#define SECS_FIRST_SCAN 60 /* delay before the first scan */ -#define SECS_SCAN_WAIT 600 /* subsequent auto scanning delay */ - -#define BYTES_PER_POINTER sizeof(void *) - -/* scanning area inside a memory block */ -struct kmemleak_scan_area { - struct hlist_node node; - unsigned long offset; - size_t length; -}; - -/* - * Structure holding the metadata for each allocated memory block. - * Modifications to such objects should be made while holding the - * object->lock. Insertions or deletions from object_list, gray_list or - * tree_node are already protected by the corresponding locks or mutex (see - * the notes on locking above). These objects are reference-counted - * (use_count) and freed using the RCU mechanism. - */ -struct kmemleak_object { - spinlock_t lock; - unsigned long flags; /* object status flags */ - struct list_head object_list; - struct list_head gray_list; - struct prio_tree_node tree_node; - struct rcu_head rcu; /* object_list lockless traversal */ - /* object usage count; object freed when use_count == 0 */ - atomic_t use_count; - unsigned long pointer; - size_t size; - /* minimum number of a pointers found before it is considered leak */ - int min_count; - /* the total number of pointers found pointing to this object */ - int count; - /* memory ranges to be scanned inside an object (empty for all) */ - struct hlist_head area_list; - unsigned long trace[MAX_TRACE]; - unsigned int trace_len; - unsigned long jiffies; /* creation timestamp */ - pid_t pid; /* pid of the current task */ - char comm[TASK_COMM_LEN]; /* executable name */ -}; - -/* flag representing the memory block allocation status */ -#define OBJECT_ALLOCATED (1 << 0) -/* flag set after the first reporting of an unreference object */ -#define OBJECT_REPORTED (1 << 1) -/* flag set to not scan the object */ -#define OBJECT_NO_SCAN (1 << 2) - -/* the list of all allocated objects */ -static LIST_HEAD(object_list); -/* the list of gray-colored objects (see color_gray comment below) */ -static LIST_HEAD(gray_list); -/* prio search tree for object boundaries */ -static struct prio_tree_root object_tree_root; -/* rw_lock protecting the access to object_list and prio_tree_root */ -static DEFINE_RWLOCK(kmemleak_lock); - -/* allocation caches for kmemleak internal data */ -static struct kmem_cache *object_cache; -static struct kmem_cache *scan_area_cache; - -/* set if tracing memory operations is enabled */ -static atomic_t kmemleak_enabled = ATOMIC_INIT(0); -/* set in the late_initcall if there were no errors */ -static atomic_t kmemleak_initialized = ATOMIC_INIT(0); -/* enables or disables early logging of the memory operations */ -static atomic_t kmemleak_early_log = ATOMIC_INIT(1); -/* set if a fata kmemleak error has occurred */ -static atomic_t kmemleak_error = ATOMIC_INIT(0); - -/* minimum and maximum address that may be valid pointers */ -static unsigned long min_addr = ULONG_MAX; -static unsigned long max_addr; - -/* used for yielding the CPU to other tasks during scanning */ -static unsigned long next_scan_yield; -static struct task_struct *scan_thread; -static unsigned long jiffies_scan_yield; -static unsigned long jiffies_min_age; -/* delay between automatic memory scannings */ -static signed long jiffies_scan_wait; -/* enables or disables the task stacks scanning */ -static int kmemleak_stack_scan; -/* mutex protecting the memory scanning */ -static DEFINE_MUTEX(scan_mutex); -/* mutex protecting the access to the /sys/kernel/debug/kmemleak file */ -static DEFINE_MUTEX(kmemleak_mutex); - -/* number of leaks reported (for limitation purposes) */ -static int reported_leaks; - -/* - * Early object allocation/freeing logging. Kkmemleak is initialized after the - * kernel allocator. However, both the kernel allocator and kmemleak may - * allocate memory blocks which need to be tracked. Kkmemleak defines an - * arbitrary buffer to hold the allocation/freeing information before it is - * fully initialized. - */ - -/* kmemleak operation type for early logging */ -enum { - KMEMLEAK_ALLOC, - KMEMLEAK_FREE, - KMEMLEAK_NOT_LEAK, - KMEMLEAK_IGNORE, - KMEMLEAK_SCAN_AREA, - KMEMLEAK_NO_SCAN -}; - -/* - * Structure holding the information passed to kmemleak callbacks during the - * early logging. - */ -struct early_log { - int op_type; /* kmemleak operation type */ - const void *ptr; /* allocated/freed memory block */ - size_t size; /* memory block size */ - int min_count; /* minimum reference count */ - unsigned long offset; /* scan area offset */ - size_t length; /* scan area length */ -}; - -/* early logging buffer and current position */ -static struct early_log early_log[200]; -static int crt_early_log; - -static void kmemleak_disable(void); - -/* - * Print a warning and dump the stack trace. - */ -#define kmemleak_warn(x...) do { \ - pr_warning(x); \ - dump_stack(); \ -} while (0) - -/* - * Macro invoked when a serious kmemleak condition occured and cannot be - * recovered from. Kkmemleak will be disabled and further allocation/freeing - * tracing no longer available. - */ -#define kmemleak_panic(x...) do { \ - kmemleak_warn(x); \ - kmemleak_disable(); \ -} while (0) - -/* - * Object colors, encoded with count and min_count: - * - white - orphan object, not enough references to it (count < min_count) - * - gray - not orphan, not marked as false positive (min_count == 0) or - * sufficient references to it (count >= min_count) - * - black - ignore, it doesn't contain references (e.g. text section) - * (min_count == -1). No function defined for this color. - * Newly created objects don't have any color assigned (object->count == -1) - * before the next memory scan when they become white. - */ -static int color_white(const struct kmemleak_object *object) -{ - return object->count != -1 && object->count < object->min_count; -} - -static int color_gray(const struct kmemleak_object *object) -{ - return object->min_count != -1 && object->count >= object->min_count; -} - -/* - * Objects are considered referenced if their color is gray and they have not - * been deleted. - */ -static int referenced_object(struct kmemleak_object *object) -{ - return (object->flags & OBJECT_ALLOCATED) && color_gray(object); -} - -/* - * Objects are considered unreferenced only if their color is white, they have - * not be deleted and have a minimum age to avoid false positives caused by - * pointers temporarily stored in CPU registers. - */ -static int unreferenced_object(struct kmemleak_object *object) -{ - return (object->flags & OBJECT_ALLOCATED) && color_white(object) && - time_is_before_eq_jiffies(object->jiffies + jiffies_min_age); -} - -/* - * Printing of the (un)referenced objects information, either to the seq file - * or to the kernel log. The print_referenced/print_unreferenced functions - * must be called with the object->lock held. - */ -#define print_helper(seq, x...) do { \ - struct seq_file *s = (seq); \ - if (s) \ - seq_printf(s, x); \ - else \ - pr_info(x); \ -} while (0) - -static void print_referenced(struct kmemleak_object *object) -{ - pr_info("kmemleak: referenced object 0x%08lx (size %zu)\n", - object->pointer, object->size); -} - -static void print_unreferenced(struct seq_file *seq, - struct kmemleak_object *object) -{ - int i; - - print_helper(seq, "kmemleak: unreferenced object 0x%08lx (size %zu):\n", - object->pointer, object->size); - print_helper(seq, " comm \"%s\", pid %d, jiffies %lu\n", - object->comm, object->pid, object->jiffies); - print_helper(seq, " backtrace:\n"); - - for (i = 0; i < object->trace_len; i++) { - void *ptr = (void *)object->trace[i]; - print_helper(seq, " [<%p>] %pS\n", ptr, ptr); - } -} - -/* - * Print the kmemleak_object information. This function is used mainly for - * debugging special cases when kmemleak operations. It must be called with - * the object->lock held. - */ -static void dump_object_info(struct kmemleak_object *object) -{ - struct stack_trace trace; - - trace.nr_entries = object->trace_len; - trace.entries = object->trace; - - pr_notice("kmemleak: Object 0x%08lx (size %zu):\n", - object->tree_node.start, object->size); - pr_notice(" comm \"%s\", pid %d, jiffies %lu\n", - object->comm, object->pid, object->jiffies); - pr_notice(" min_count = %d\n", object->min_count); - pr_notice(" count = %d\n", object->count); - pr_notice(" backtrace:\n"); - print_stack_trace(&trace, 4); -} - -/* - * Look-up a memory block metadata (kmemleak_object) in the priority search - * tree based on a pointer value. If alias is 0, only values pointing to the - * beginning of the memory block are allowed. The kmemleak_lock must be held - * when calling this function. - */ -static struct kmemleak_object *lookup_object(unsigned long ptr, int alias) -{ - struct prio_tree_node *node; - struct prio_tree_iter iter; - struct kmemleak_object *object; - - prio_tree_iter_init(&iter, &object_tree_root, ptr, ptr); - node = prio_tree_next(&iter); - if (node) { - object = prio_tree_entry(node, struct kmemleak_object, - tree_node); - if (!alias && object->pointer != ptr) { - kmemleak_warn("kmemleak: Found object by alias"); - object = NULL; - } - } else - object = NULL; - - return object; -} - -/* - * Increment the object use_count. Return 1 if successful or 0 otherwise. Note - * that once an object's use_count reached 0, the RCU freeing was already - * registered and the object should no longer be used. This function must be - * called under the protection of rcu_read_lock(). - */ -static int get_object(struct kmemleak_object *object) -{ - return atomic_inc_not_zero(&object->use_count); -} - -/* - * RCU callback to free a kmemleak_object. - */ -static void free_object_rcu(struct rcu_head *rcu) -{ - struct hlist_node *elem, *tmp; - struct kmemleak_scan_area *area; - struct kmemleak_object *object = - container_of(rcu, struct kmemleak_object, rcu); - - /* - * Once use_count is 0 (guaranteed by put_object), there is no other - * code accessing this object, hence no need for locking. - */ - hlist_for_each_entry_safe(area, elem, tmp, &object->area_list, node) { - hlist_del(elem); - kmem_cache_free(scan_area_cache, area); - } - kmem_cache_free(object_cache, object); -} - -/* - * Decrement the object use_count. Once the count is 0, free the object using - * an RCU callback. Since put_object() may be called via the kmemleak_free() -> - * delete_object() path, the delayed RCU freeing ensures that there is no - * recursive call to the kernel allocator. Lock-less RCU object_list traversal - * is also possible. - */ -static void put_object(struct kmemleak_object *object) -{ - if (!atomic_dec_and_test(&object->use_count)) - return; - - /* should only get here after delete_object was called */ - WARN_ON(object->flags & OBJECT_ALLOCATED); - - call_rcu(&object->rcu, free_object_rcu); -} - -/* - * Look up an object in the prio search tree and increase its use_count. - */ -static struct kmemleak_object *find_and_get_object(unsigned long ptr, int alias) -{ - unsigned long flags; - struct kmemleak_object *object = NULL; - - rcu_read_lock(); - read_lock_irqsave(&kmemleak_lock, flags); - if (ptr >= min_addr && ptr < max_addr) - object = lookup_object(ptr, alias); - read_unlock_irqrestore(&kmemleak_lock, flags); - - /* check whether the object is still available */ - if (object && !get_object(object)) - object = NULL; - rcu_read_unlock(); - - return object; -} - -/* - * Create the metadata (struct kmemleak_object) corresponding to an allocated - * memory block and add it to the object_list and object_tree_root. - */ -static void create_object(unsigned long ptr, size_t size, int min_count, - gfp_t gfp) -{ - unsigned long flags; - struct kmemleak_object *object; - struct prio_tree_node *node; - struct stack_trace trace; - - object = kmem_cache_alloc(object_cache, gfp & ~GFP_SLAB_BUG_MASK); - if (!object) { - kmemleak_panic("kmemleak: Cannot allocate a kmemleak_object " - "structure\n"); - return; - } - - INIT_LIST_HEAD(&object->object_list); - INIT_LIST_HEAD(&object->gray_list); - INIT_HLIST_HEAD(&object->area_list); - spin_lock_init(&object->lock); - atomic_set(&object->use_count, 1); - object->flags = OBJECT_ALLOCATED; - object->pointer = ptr; - object->size = size; - object->min_count = min_count; - object->count = -1; /* no color initially */ - object->jiffies = jiffies; - - /* task information */ - if (in_irq()) { - object->pid = 0; - strncpy(object->comm, "hardirq", sizeof(object->comm)); - } else if (in_softirq()) { - object->pid = 0; - strncpy(object->comm, "softirq", sizeof(object->comm)); - } else { - object->pid = current->pid; - /* - * There is a small chance of a race with set_task_comm(), - * however using get_task_comm() here may cause locking - * dependency issues with current->alloc_lock. In the worst - * case, the command line is not correct. - */ - strncpy(object->comm, current->comm, sizeof(object->comm)); - } - - /* kernel backtrace */ - trace.max_entries = MAX_TRACE; - trace.nr_entries = 0; - trace.entries = object->trace; - trace.skip = 1; - save_stack_trace(&trace); - object->trace_len = trace.nr_entries; - - INIT_PRIO_TREE_NODE(&object->tree_node); - object->tree_node.start = ptr; - object->tree_node.last = ptr + size - 1; - - write_lock_irqsave(&kmemleak_lock, flags); - min_addr = min(min_addr, ptr); - max_addr = max(max_addr, ptr + size); - node = prio_tree_insert(&object_tree_root, &object->tree_node); - /* - * The code calling the kernel does not yet have the pointer to the - * memory block to be able to free it. However, we still hold the - * kmemleak_lock here in case parts of the kernel started freeing - * random memory blocks. - */ - if (node != &object->tree_node) { - unsigned long flags; - - kmemleak_panic("kmemleak: Cannot insert 0x%lx into the object " - "search tree (already existing)\n", ptr); - object = lookup_object(ptr, 1); - spin_lock_irqsave(&object->lock, flags); - dump_object_info(object); - spin_unlock_irqrestore(&object->lock, flags); - - goto out; - } - list_add_tail_rcu(&object->object_list, &object_list); -out: - write_unlock_irqrestore(&kmemleak_lock, flags); -} - -/* - * Remove the metadata (struct kmemleak_object) for a memory block from the - * object_list and object_tree_root and decrement its use_count. - */ -static void delete_object(unsigned long ptr) -{ - unsigned long flags; - struct kmemleak_object *object; - - write_lock_irqsave(&kmemleak_lock, flags); - object = lookup_object(ptr, 0); - if (!object) { - kmemleak_warn("kmemleak: Freeing unknown object at 0x%08lx\n", - ptr); - write_unlock_irqrestore(&kmemleak_lock, flags); - return; - } - prio_tree_remove(&object_tree_root, &object->tree_node); - list_del_rcu(&object->object_list); - write_unlock_irqrestore(&kmemleak_lock, flags); - - WARN_ON(!(object->flags & OBJECT_ALLOCATED)); - WARN_ON(atomic_read(&object->use_count) < 1); - - /* - * Locking here also ensures that the corresponding memory block - * cannot be freed when it is being scanned. - */ - spin_lock_irqsave(&object->lock, flags); - if (object->flags & OBJECT_REPORTED) - print_referenced(object); - object->flags &= ~OBJECT_ALLOCATED; - spin_unlock_irqrestore(&object->lock, flags); - put_object(object); -} - -/* - * Make a object permanently as gray-colored so that it can no longer be - * reported as a leak. This is used in general to mark a false positive. - */ -static void make_gray_object(unsigned long ptr) -{ - unsigned long flags; - struct kmemleak_object *object; - - object = find_and_get_object(ptr, 0); - if (!object) { - kmemleak_warn("kmemleak: Graying unknown object at 0x%08lx\n", - ptr); - return; - } - - spin_lock_irqsave(&object->lock, flags); - object->min_count = 0; - spin_unlock_irqrestore(&object->lock, flags); - put_object(object); -} - -/* - * Mark the object as black-colored so that it is ignored from scans and - * reporting. - */ -static void make_black_object(unsigned long ptr) -{ - unsigned long flags; - struct kmemleak_object *object; - - object = find_and_get_object(ptr, 0); - if (!object) { - kmemleak_warn("kmemleak: Blacking unknown object at 0x%08lx\n", - ptr); - return; - } - - spin_lock_irqsave(&object->lock, flags); - object->min_count = -1; - spin_unlock_irqrestore(&object->lock, flags); - put_object(object); -} - -/* - * Add a scanning area to the object. If at least one such area is added, - * kmemleak will only scan these ranges rather than the whole memory block. - */ -static void add_scan_area(unsigned long ptr, unsigned long offset, - size_t length, gfp_t gfp) -{ - unsigned long flags; - struct kmemleak_object *object; - struct kmemleak_scan_area *area; - - object = find_and_get_object(ptr, 0); - if (!object) { - kmemleak_warn("kmemleak: Adding scan area to unknown " - "object at 0x%08lx\n", ptr); - return; - } - - area = kmem_cache_alloc(scan_area_cache, gfp & ~GFP_SLAB_BUG_MASK); - if (!area) { - kmemleak_warn("kmemleak: Cannot allocate a scan area\n"); - goto out; - } - - spin_lock_irqsave(&object->lock, flags); - if (offset + length > object->size) { - kmemleak_warn("kmemleak: Scan area larger than object " - "0x%08lx\n", ptr); - dump_object_info(object); - kmem_cache_free(scan_area_cache, area); - goto out_unlock; - } - - INIT_HLIST_NODE(&area->node); - area->offset = offset; - area->length = length; - - hlist_add_head(&area->node, &object->area_list); -out_unlock: - spin_unlock_irqrestore(&object->lock, flags); -out: - put_object(object); -} - -/* - * Set the OBJECT_NO_SCAN flag for the object corresponding to the give - * pointer. Such object will not be scanned by kmemleak but references to it - * are searched. - */ -static void object_no_scan(unsigned long ptr) -{ - unsigned long flags; - struct kmemleak_object *object; - - object = find_and_get_object(ptr, 0); - if (!object) { - kmemleak_warn("kmemleak: Not scanning unknown object at " - "0x%08lx\n", ptr); - return; - } - - spin_lock_irqsave(&object->lock, flags); - object->flags |= OBJECT_NO_SCAN; - spin_unlock_irqrestore(&object->lock, flags); - put_object(object); -} - -/* - * Log an early kmemleak_* call to the early_log buffer. These calls will be - * processed later once kmemleak is fully initialized. - */ -static void log_early(int op_type, const void *ptr, size_t size, - int min_count, unsigned long offset, size_t length) -{ - unsigned long flags; - struct early_log *log; - - if (crt_early_log >= ARRAY_SIZE(early_log)) { - kmemleak_panic("kmemleak: Early log buffer exceeded\n"); - return; - } - - /* - * There is no need for locking since the kernel is still in UP mode - * at this stage. Disabling the IRQs is enough. - */ - local_irq_save(flags); - log = &early_log[crt_early_log]; - log->op_type = op_type; - log->ptr = ptr; - log->size = size; - log->min_count = min_count; - log->offset = offset; - log->length = length; - crt_early_log++; - local_irq_restore(flags); -} - -/* - * Memory allocation function callback. This function is called from the - * kernel allocators when a new block is allocated (kmem_cache_alloc, kmalloc, - * vmalloc etc.). - */ -void kmemleak_alloc(const void *ptr, size_t size, int min_count, gfp_t gfp) -{ - pr_debug("%s(0x%p, %zu, %d)\n", __func__, ptr, size, min_count); - - if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) - create_object((unsigned long)ptr, size, min_count, gfp); - else if (atomic_read(&kmemleak_early_log)) - log_early(KMEMLEAK_ALLOC, ptr, size, min_count, 0, 0); -} -EXPORT_SYMBOL_GPL(kmemleak_alloc); - -/* - * Memory freeing function callback. This function is called from the kernel - * allocators when a block is freed (kmem_cache_free, kfree, vfree etc.). - */ -void kmemleak_free(const void *ptr) -{ - pr_debug("%s(0x%p)\n", __func__, ptr); - - if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) - delete_object((unsigned long)ptr); - else if (atomic_read(&kmemleak_early_log)) - log_early(KMEMLEAK_FREE, ptr, 0, 0, 0, 0); -} -EXPORT_SYMBOL_GPL(kmemleak_free); - -/* - * Mark an already allocated memory block as a false positive. This will cause - * the block to no longer be reported as leak and always be scanned. - */ -void kmemleak_not_leak(const void *ptr) -{ - pr_debug("%s(0x%p)\n", __func__, ptr); - - if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) - make_gray_object((unsigned long)ptr); - else if (atomic_read(&kmemleak_early_log)) - log_early(KMEMLEAK_NOT_LEAK, ptr, 0, 0, 0, 0); -} -EXPORT_SYMBOL(kmemleak_not_leak); - -/* - * Ignore a memory block. This is usually done when it is known that the - * corresponding block is not a leak and does not contain any references to - * other allocated memory blocks. - */ -void kmemleak_ignore(const void *ptr) -{ - pr_debug("%s(0x%p)\n", __func__, ptr); - - if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) - make_black_object((unsigned long)ptr); - else if (atomic_read(&kmemleak_early_log)) - log_early(KMEMLEAK_IGNORE, ptr, 0, 0, 0, 0); -} -EXPORT_SYMBOL(kmemleak_ignore); - -/* - * Limit the range to be scanned in an allocated memory block. - */ -void kmemleak_scan_area(const void *ptr, unsigned long offset, size_t length, - gfp_t gfp) -{ - pr_debug("%s(0x%p)\n", __func__, ptr); - - if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) - add_scan_area((unsigned long)ptr, offset, length, gfp); - else if (atomic_read(&kmemleak_early_log)) - log_early(KMEMLEAK_SCAN_AREA, ptr, 0, 0, offset, length); -} -EXPORT_SYMBOL(kmemleak_scan_area); - -/* - * Inform kmemleak not to scan the given memory block. - */ -void kmemleak_no_scan(const void *ptr) -{ - pr_debug("%s(0x%p)\n", __func__, ptr); - - if (atomic_read(&kmemleak_enabled) && ptr && !IS_ERR(ptr)) - object_no_scan((unsigned long)ptr); - else if (atomic_read(&kmemleak_early_log)) - log_early(KMEMLEAK_NO_SCAN, ptr, 0, 0, 0, 0); -} -EXPORT_SYMBOL(kmemleak_no_scan); - -/* - * Yield the CPU so that other tasks get a chance to run. The yielding is - * rate-limited to avoid excessive number of calls to the schedule() function - * during memory scanning. - */ -static void scan_yield(void) -{ - might_sleep(); - - if (time_is_before_eq_jiffies(next_scan_yield)) { - schedule(); - next_scan_yield = jiffies + jiffies_scan_yield; - } -} - -/* - * Memory scanning is a long process and it needs to be interruptable. This - * function checks whether such interrupt condition occured. - */ -static int scan_should_stop(void) -{ - if (!atomic_read(&kmemleak_enabled)) - return 1; - - /* - * This function may be called from either process or kthread context, - * hence the need to check for both stop conditions. - */ - if (current->mm) - return signal_pending(current); - else - return kthread_should_stop(); - - return 0; -} - -/* - * Scan a memory block (exclusive range) for valid pointers and add those - * found to the gray list. - */ -static void scan_block(void *_start, void *_end, - struct kmemleak_object *scanned) -{ - unsigned long *ptr; - unsigned long *start = PTR_ALIGN(_start, BYTES_PER_POINTER); - unsigned long *end = _end - (BYTES_PER_POINTER - 1); - - for (ptr = start; ptr < end; ptr++) { - unsigned long flags; - unsigned long pointer = *ptr; - struct kmemleak_object *object; - - if (scan_should_stop()) - break; - - /* - * When scanning a memory block with a corresponding - * kmemleak_object, the CPU yielding is handled in the calling - * code since it holds the object->lock to avoid the block - * freeing. - */ - if (!scanned) - scan_yield(); - - object = find_and_get_object(pointer, 1); - if (!object) - continue; - if (object == scanned) { - /* self referenced, ignore */ - put_object(object); - continue; - } - - /* - * Avoid the lockdep recursive warning on object->lock being - * previously acquired in scan_object(). These locks are - * enclosed by scan_mutex. - */ - spin_lock_irqsave_nested(&object->lock, flags, - SINGLE_DEPTH_NESTING); - if (!color_white(object)) { - /* non-orphan, ignored or new */ - spin_unlock_irqrestore(&object->lock, flags); - put_object(object); - continue; - } - - /* - * Increase the object's reference count (number of pointers - * to the memory block). If this count reaches the required - * minimum, the object's color will become gray and it will be - * added to the gray_list. - */ - object->count++; - if (color_gray(object)) - list_add_tail(&object->gray_list, &gray_list); - else - put_object(object); - spin_unlock_irqrestore(&object->lock, flags); - } -} - -/* - * Scan a memory block corresponding to a kmemleak_object. A condition is - * that object->use_count >= 1. - */ -static void scan_object(struct kmemleak_object *object) -{ - struct kmemleak_scan_area *area; - struct hlist_node *elem; - unsigned long flags; - - /* - * Once the object->lock is aquired, the corresponding memory block - * cannot be freed (the same lock is aquired in delete_object). - */ - spin_lock_irqsave(&object->lock, flags); - if (object->flags & OBJECT_NO_SCAN) - goto out; - if (!(object->flags & OBJECT_ALLOCATED)) - /* already freed object */ - goto out; - if (hlist_empty(&object->area_list)) - scan_block((void *)object->pointer, - (void *)(object->pointer + object->size), object); - else - hlist_for_each_entry(area, elem, &object->area_list, node) - scan_block((void *)(object->pointer + area->offset), - (void *)(object->pointer + area->offset - + area->length), object); -out: - spin_unlock_irqrestore(&object->lock, flags); -} - -/* - * Scan data sections and all the referenced memory blocks allocated via the - * kernel's standard allocators. This function must be called with the - * scan_mutex held. - */ -static void kmemleak_scan(void) -{ - unsigned long flags; - struct kmemleak_object *object, *tmp; - struct task_struct *task; - int i; - - /* prepare the kmemleak_object's */ - rcu_read_lock(); - list_for_each_entry_rcu(object, &object_list, object_list) { - spin_lock_irqsave(&object->lock, flags); -#ifdef DEBUG - /* - * With a few exceptions there should be a maximum of - * 1 reference to any object at this point. - */ - if (atomic_read(&object->use_count) > 1) { - pr_debug("kmemleak: object->use_count = %d\n", - atomic_read(&object->use_count)); - dump_object_info(object); - } -#endif - /* reset the reference count (whiten the object) */ - object->count = 0; - if (color_gray(object) && get_object(object)) - list_add_tail(&object->gray_list, &gray_list); - - spin_unlock_irqrestore(&object->lock, flags); - } - rcu_read_unlock(); - - /* data/bss scanning */ - scan_block(_sdata, _edata, NULL); - scan_block(__bss_start, __bss_stop, NULL); - -#ifdef CONFIG_SMP - /* per-cpu sections scanning */ - for_each_possible_cpu(i) - scan_block(__per_cpu_start + per_cpu_offset(i), - __per_cpu_end + per_cpu_offset(i), NULL); -#endif - - /* - * Struct page scanning for each node. The code below is not yet safe - * with MEMORY_HOTPLUG. - */ - for_each_online_node(i) { - pg_data_t *pgdat = NODE_DATA(i); - unsigned long start_pfn = pgdat->node_start_pfn; - unsigned long end_pfn = start_pfn + pgdat->node_spanned_pages; - unsigned long pfn; - - for (pfn = start_pfn; pfn < end_pfn; pfn++) { - struct page *page; - - if (!pfn_valid(pfn)) - continue; - page = pfn_to_page(pfn); - /* only scan if page is in use */ - if (page_count(page) == 0) - continue; - scan_block(page, page + 1, NULL); - } - } - - /* - * Scanning the task stacks may introduce false negatives and it is - * not enabled by default. - */ - if (kmemleak_stack_scan) { - read_lock(&tasklist_lock); - for_each_process(task) - scan_block(task_stack_page(task), - task_stack_page(task) + THREAD_SIZE, NULL); - read_unlock(&tasklist_lock); - } - - /* - * Scan the objects already referenced from the sections scanned - * above. More objects will be referenced and, if there are no memory - * leaks, all the objects will be scanned. The list traversal is safe - * for both tail additions and removals from inside the loop. The - * kmemleak objects cannot be freed from outside the loop because their - * use_count was increased. - */ - object = list_entry(gray_list.next, typeof(*object), gray_list); - while (&object->gray_list != &gray_list) { - scan_yield(); - - /* may add new objects to the list */ - if (!scan_should_stop()) - scan_object(object); - - tmp = list_entry(object->gray_list.next, typeof(*object), - gray_list); - - /* remove the object from the list and release it */ - list_del(&object->gray_list); - put_object(object); - - object = tmp; - } - WARN_ON(!list_empty(&gray_list)); -} - -/* - * Thread function performing automatic memory scanning. Unreferenced objects - * at the end of a memory scan are reported but only the first time. - */ -static int kmemleak_scan_thread(void *arg) -{ - static int first_run = 1; - - pr_info("kmemleak: Automatic memory scanning thread started\n"); - - /* - * Wait before the first scan to allow the system to fully initialize. - */ - if (first_run) { - first_run = 0; - ssleep(SECS_FIRST_SCAN); - } - - while (!kthread_should_stop()) { - struct kmemleak_object *object; - signed long timeout = jiffies_scan_wait; - - mutex_lock(&scan_mutex); - - kmemleak_scan(); - reported_leaks = 0; - - rcu_read_lock(); - list_for_each_entry_rcu(object, &object_list, object_list) { - unsigned long flags; - - if (reported_leaks >= REPORTS_NR) - break; - spin_lock_irqsave(&object->lock, flags); - if (!(object->flags & OBJECT_REPORTED) && - unreferenced_object(object)) { - print_unreferenced(NULL, object); - object->flags |= OBJECT_REPORTED; - reported_leaks++; - } else if ((object->flags & OBJECT_REPORTED) && - referenced_object(object)) { - print_referenced(object); - object->flags &= ~OBJECT_REPORTED; - } - spin_unlock_irqrestore(&object->lock, flags); - } - rcu_read_unlock(); - - mutex_unlock(&scan_mutex); - /* wait before the next scan */ - while (timeout && !kthread_should_stop()) - timeout = schedule_timeout_interruptible(timeout); - } - - pr_info("kmemleak: Automatic memory scanning thread ended\n"); - - return 0; -} - -/* - * Start the automatic memory scanning thread. This function must be called - * with the kmemleak_mutex held. - */ -void start_scan_thread(void) -{ - if (scan_thread) - return; - scan_thread = kthread_run(kmemleak_scan_thread, NULL, "kmemleak"); - if (IS_ERR(scan_thread)) { - pr_warning("kmemleak: Failed to create the scan thread\n"); - scan_thread = NULL; - } -} - -/* - * Stop the automatic memory scanning thread. This function must be called - * with the kmemleak_mutex held. - */ -void stop_scan_thread(void) -{ - if (scan_thread) { - kthread_stop(scan_thread); - scan_thread = NULL; - } -} - -/* - * Iterate over the object_list and return the first valid object at or after - * the required position with its use_count incremented. The function triggers - * a memory scanning when the pos argument points to the first position. - */ -static void *kmemleak_seq_start(struct seq_file *seq, loff_t *pos) -{ - struct kmemleak_object *object; - loff_t n = *pos; - - if (!n) { - kmemleak_scan(); - reported_leaks = 0; - } - if (reported_leaks >= REPORTS_NR) - return NULL; - - rcu_read_lock(); - list_for_each_entry_rcu(object, &object_list, object_list) { - if (n-- > 0) - continue; - if (get_object(object)) - goto out; - } - object = NULL; -out: - rcu_read_unlock(); - return object; -} - -/* - * Return the next object in the object_list. The function decrements the - * use_count of the previous object and increases that of the next one. - */ -static void *kmemleak_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct kmemleak_object *prev_obj = v; - struct kmemleak_object *next_obj = NULL; - struct list_head *n = &prev_obj->object_list; - - ++(*pos); - if (reported_leaks >= REPORTS_NR) - goto out; - - rcu_read_lock(); - list_for_each_continue_rcu(n, &object_list) { - next_obj = list_entry(n, struct kmemleak_object, object_list); - if (get_object(next_obj)) - break; - } - rcu_read_unlock(); -out: - put_object(prev_obj); - return next_obj; -} - -/* - * Decrement the use_count of the last object required, if any. - */ -static void kmemleak_seq_stop(struct seq_file *seq, void *v) -{ - if (v) - put_object(v); -} - -/* - * Print the information for an unreferenced object to the seq file. - */ -static int kmemleak_seq_show(struct seq_file *seq, void *v) -{ - struct kmemleak_object *object = v; - unsigned long flags; - - spin_lock_irqsave(&object->lock, flags); - if (!unreferenced_object(object)) - goto out; - print_unreferenced(seq, object); - reported_leaks++; -out: - spin_unlock_irqrestore(&object->lock, flags); - return 0; -} - -static const struct seq_operations kmemleak_seq_ops = { - .start = kmemleak_seq_start, - .next = kmemleak_seq_next, - .stop = kmemleak_seq_stop, - .show = kmemleak_seq_show, -}; - -static int kmemleak_open(struct inode *inode, struct file *file) -{ - int ret = 0; - - if (!atomic_read(&kmemleak_enabled)) - return -EBUSY; - - ret = mutex_lock_interruptible(&kmemleak_mutex); - if (ret < 0) - goto out; - if (file->f_mode & FMODE_READ) { - ret = mutex_lock_interruptible(&scan_mutex); - if (ret < 0) - goto kmemleak_unlock; - ret = seq_open(file, &kmemleak_seq_ops); - if (ret < 0) - goto scan_unlock; - } - return ret; - -scan_unlock: - mutex_unlock(&scan_mutex); -kmemleak_unlock: - mutex_unlock(&kmemleak_mutex); -out: - return ret; -} - -static int kmemleak_release(struct inode *inode, struct file *file) -{ - int ret = 0; - - if (file->f_mode & FMODE_READ) { - seq_release(inode, file); - mutex_unlock(&scan_mutex); - } - mutex_unlock(&kmemleak_mutex); - - return ret; -} - -/* - * File write operation to configure kmemleak at run-time. The following - * commands can be written to the /sys/kernel/debug/kmemleak file: - * off - disable kmemleak (irreversible) - * stack=on - enable the task stacks scanning - * stack=off - disable the tasks stacks scanning - * scan=on - start the automatic memory scanning thread - * scan=off - stop the automatic memory scanning thread - * scan=... - set the automatic memory scanning period in seconds (0 to - * disable it) - */ -static ssize_t kmemleak_write(struct file *file, const char __user *user_buf, - size_t size, loff_t *ppos) -{ - char buf[64]; - int buf_size; - - if (!atomic_read(&kmemleak_enabled)) - return -EBUSY; - - buf_size = min(size, (sizeof(buf) - 1)); - if (strncpy_from_user(buf, user_buf, buf_size) < 0) - return -EFAULT; - buf[buf_size] = 0; - - if (strncmp(buf, "off", 3) == 0) - kmemleak_disable(); - else if (strncmp(buf, "stack=on", 8) == 0) - kmemleak_stack_scan = 1; - else if (strncmp(buf, "stack=off", 9) == 0) - kmemleak_stack_scan = 0; - else if (strncmp(buf, "scan=on", 7) == 0) - start_scan_thread(); - else if (strncmp(buf, "scan=off", 8) == 0) - stop_scan_thread(); - else if (strncmp(buf, "scan=", 5) == 0) { - unsigned long secs; - int err; - - err = strict_strtoul(buf + 5, 0, &secs); - if (err < 0) - return err; - stop_scan_thread(); - if (secs) { - jiffies_scan_wait = msecs_to_jiffies(secs * 1000); - start_scan_thread(); - } - } else - return -EINVAL; - - /* ignore the rest of the buffer, only one command at a time */ - *ppos += size; - return size; -} - -static const struct file_operations kmemleak_fops = { - .owner = THIS_MODULE, - .open = kmemleak_open, - .read = seq_read, - .write = kmemleak_write, - .llseek = seq_lseek, - .release = kmemleak_release, -}; - -/* - * Perform the freeing of the kmemleak internal objects after waiting for any - * current memory scan to complete. - */ -static int kmemleak_cleanup_thread(void *arg) -{ - struct kmemleak_object *object; - - mutex_lock(&kmemleak_mutex); - stop_scan_thread(); - mutex_unlock(&kmemleak_mutex); - - mutex_lock(&scan_mutex); - rcu_read_lock(); - list_for_each_entry_rcu(object, &object_list, object_list) - delete_object(object->pointer); - rcu_read_unlock(); - mutex_unlock(&scan_mutex); - - return 0; -} - -/* - * Start the clean-up thread. - */ -static void kmemleak_cleanup(void) -{ - struct task_struct *cleanup_thread; - - cleanup_thread = kthread_run(kmemleak_cleanup_thread, NULL, - "kmemleak-clean"); - if (IS_ERR(cleanup_thread)) - pr_warning("kmemleak: Failed to create the clean-up thread\n"); -} - -/* - * Disable kmemleak. No memory allocation/freeing will be traced once this - * function is called. Disabling kmemleak is an irreversible operation. - */ -static void kmemleak_disable(void) -{ - /* atomically check whether it was already invoked */ - if (atomic_cmpxchg(&kmemleak_error, 0, 1)) - return; - - /* stop any memory operation tracing */ - atomic_set(&kmemleak_early_log, 0); - atomic_set(&kmemleak_enabled, 0); - - /* check whether it is too early for a kernel thread */ - if (atomic_read(&kmemleak_initialized)) - kmemleak_cleanup(); - - pr_info("Kernel memory leak detector disabled\n"); -} - -/* - * Allow boot-time kmemleak disabling (enabled by default). - */ -static int kmemleak_boot_config(char *str) -{ - if (!str) - return -EINVAL; - if (strcmp(str, "off") == 0) - kmemleak_disable(); - else if (strcmp(str, "on") != 0) - return -EINVAL; - return 0; -} -early_param("kmemleak", kmemleak_boot_config); - -/* - * Kkmemleak initialization. - */ -void __init kmemleak_init(void) -{ - int i; - unsigned long flags; - - jiffies_scan_yield = msecs_to_jiffies(MSECS_SCAN_YIELD); - jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE); - jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000); - - object_cache = KMEM_CACHE(kmemleak_object, SLAB_NOLEAKTRACE); - scan_area_cache = KMEM_CACHE(kmemleak_scan_area, SLAB_NOLEAKTRACE); - INIT_PRIO_TREE_ROOT(&object_tree_root); - - /* the kernel is still in UP mode, so disabling the IRQs is enough */ - local_irq_save(flags); - if (!atomic_read(&kmemleak_error)) { - atomic_set(&kmemleak_enabled, 1); - atomic_set(&kmemleak_early_log, 0); - } - local_irq_restore(flags); - - /* - * This is the point where tracking allocations is safe. Automatic - * scanning is started during the late initcall. Add the early logged - * callbacks to the kmemleak infrastructure. - */ - for (i = 0; i < crt_early_log; i++) { - struct early_log *log = &early_log[i]; - - switch (log->op_type) { - case KMEMLEAK_ALLOC: - kmemleak_alloc(log->ptr, log->size, log->min_count, - GFP_KERNEL); - break; - case KMEMLEAK_FREE: - kmemleak_free(log->ptr); - break; - case KMEMLEAK_NOT_LEAK: - kmemleak_not_leak(log->ptr); - break; - case KMEMLEAK_IGNORE: - kmemleak_ignore(log->ptr); - break; - case KMEMLEAK_SCAN_AREA: - kmemleak_scan_area(log->ptr, log->offset, log->length, - GFP_KERNEL); - break; - case KMEMLEAK_NO_SCAN: - kmemleak_no_scan(log->ptr); - break; - default: - WARN_ON(1); - } - } -} - -/* - * Late initialization function. - */ -static int __init kmemleak_late_init(void) -{ - struct dentry *dentry; - - atomic_set(&kmemleak_initialized, 1); - - if (atomic_read(&kmemleak_error)) { - /* - * Some error occured and kmemleak was disabled. There is a - * small chance that kmemleak_disable() was called immediately - * after setting kmemleak_initialized and we may end up with - * two clean-up threads but serialized by scan_mutex. - */ - kmemleak_cleanup(); - return -ENOMEM; - } - - dentry = debugfs_create_file("kmemleak", S_IRUGO, NULL, NULL, - &kmemleak_fops); - if (!dentry) - pr_warning("kmemleak: Failed to create the debugfs kmemleak " - "file\n"); - mutex_lock(&kmemleak_mutex); - start_scan_thread(); - mutex_unlock(&kmemleak_mutex); - - pr_info("Kernel memory leak detector initialized\n"); - - return 0; -} -late_initcall(kmemleak_late_init); diff --git a/trunk/mm/mmap.c b/trunk/mm/mmap.c index 34579b23ebd5..2b43fa1aa3c8 100644 --- a/trunk/mm/mmap.c +++ b/trunk/mm/mmap.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include @@ -1223,8 +1222,6 @@ unsigned long mmap_region(struct file *file, unsigned long addr, if (correct_wcount) atomic_inc(&inode->i_writecount); out: - perf_counter_mmap(vma); - mm->total_vm += len >> PAGE_SHIFT; vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); if (vm_flags & VM_LOCKED) { @@ -2311,8 +2308,6 @@ int install_special_mapping(struct mm_struct *mm, mm->total_vm += len >> PAGE_SHIFT; - perf_counter_mmap(vma); - return 0; } diff --git a/trunk/mm/mprotect.c b/trunk/mm/mprotect.c index d80311baeb2d..258197b76fb4 100644 --- a/trunk/mm/mprotect.c +++ b/trunk/mm/mprotect.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include @@ -300,7 +299,6 @@ SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len, error = mprotect_fixup(vma, &prev, nstart, tmp, newflags); if (error) goto out; - perf_counter_mmap(vma); nstart = tmp; if (nstart < prev->vm_end) diff --git a/trunk/mm/page_alloc.c b/trunk/mm/page_alloc.c index 17d5f539a9aa..474c7e9dd51a 100644 --- a/trunk/mm/page_alloc.c +++ b/trunk/mm/page_alloc.c @@ -46,7 +46,6 @@ #include #include #include -#include #include #include @@ -4547,16 +4546,6 @@ void *__init alloc_large_system_hash(const char *tablename, if (_hash_mask) *_hash_mask = (1 << log2qty) - 1; - /* - * If hashdist is set, the table allocation is done with __vmalloc() - * which invokes the kmemleak_alloc() callback. This function may also - * be called before the slab and kmemleak are initialised when - * kmemleak simply buffers the request to be executed later - * (GFP_ATOMIC flag ignored in this case). - */ - if (!hashdist) - kmemleak_alloc(table, size, 1, GFP_ATOMIC); - return table; } diff --git a/trunk/mm/page_cgroup.c b/trunk/mm/page_cgroup.c index 3dd4a909a1de..791905c991df 100644 --- a/trunk/mm/page_cgroup.c +++ b/trunk/mm/page_cgroup.c @@ -47,8 +47,6 @@ static int __init alloc_node_page_cgroup(int nid) struct page_cgroup *base, *pc; unsigned long table_size; unsigned long start_pfn, nr_pages, index; - struct page *page; - unsigned int order; start_pfn = NODE_DATA(nid)->node_start_pfn; nr_pages = NODE_DATA(nid)->node_spanned_pages; @@ -57,13 +55,11 @@ static int __init alloc_node_page_cgroup(int nid) return 0; table_size = sizeof(struct page_cgroup) * nr_pages; - order = get_order(table_size); - page = alloc_pages_node(nid, GFP_NOWAIT | __GFP_ZERO, order); - if (!page) - page = alloc_pages_node(-1, GFP_NOWAIT | __GFP_ZERO, order); - if (!page) + + base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), + table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); + if (!base) return -ENOMEM; - base = page_address(page); for (index = 0; index < nr_pages; index++) { pc = base + index; __init_page_cgroup(pc, start_pfn + index); diff --git a/trunk/mm/slab.c b/trunk/mm/slab.c index f46b65d124e5..f85831da9080 100644 --- a/trunk/mm/slab.c +++ b/trunk/mm/slab.c @@ -107,7 +107,6 @@ #include #include #include -#include #include #include #include @@ -179,13 +178,13 @@ SLAB_STORE_USER | \ SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ - SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE) + SLAB_DEBUG_OBJECTS) #else # define CREATE_MASK (SLAB_HWCACHE_ALIGN | \ SLAB_CACHE_DMA | \ SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ - SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE) + SLAB_DEBUG_OBJECTS) #endif /* @@ -316,7 +315,7 @@ static int drain_freelist(struct kmem_cache *cache, struct kmem_list3 *l3, int tofree); static void free_block(struct kmem_cache *cachep, void **objpp, int len, int node); -static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp); +static int enable_cpucache(struct kmem_cache *cachep); static void cache_reap(struct work_struct *unused); /* @@ -959,20 +958,12 @@ static void __cpuinit start_cpu_timer(int cpu) } static struct array_cache *alloc_arraycache(int node, int entries, - int batchcount, gfp_t gfp) + int batchcount) { int memsize = sizeof(void *) * entries + sizeof(struct array_cache); struct array_cache *nc = NULL; - nc = kmalloc_node(memsize, gfp, node); - /* - * The array_cache structures contain pointers to free object. - * However, when such objects are allocated or transfered to another - * cache the pointers are not cleared and they could be counted as - * valid references during a kmemleak scan. Therefore, kmemleak must - * not scan such objects. - */ - kmemleak_no_scan(nc); + nc = kmalloc_node(memsize, GFP_KERNEL, node); if (nc) { nc->avail = 0; nc->limit = entries; @@ -1012,7 +1003,7 @@ static int transfer_objects(struct array_cache *to, #define drain_alien_cache(cachep, alien) do { } while (0) #define reap_alien(cachep, l3) do { } while (0) -static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) +static inline struct array_cache **alloc_alien_cache(int node, int limit) { return (struct array_cache **)BAD_ALIEN_MAGIC; } @@ -1043,7 +1034,7 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep, static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); static void *alternate_node_alloc(struct kmem_cache *, gfp_t); -static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) +static struct array_cache **alloc_alien_cache(int node, int limit) { struct array_cache **ac_ptr; int memsize = sizeof(void *) * nr_node_ids; @@ -1051,14 +1042,14 @@ static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) if (limit > 1) limit = 12; - ac_ptr = kmalloc_node(memsize, gfp, node); + ac_ptr = kmalloc_node(memsize, GFP_KERNEL, node); if (ac_ptr) { for_each_node(i) { if (i == node || !node_online(i)) { ac_ptr[i] = NULL; continue; } - ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp); + ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d); if (!ac_ptr[i]) { for (i--; i >= 0; i--) kfree(ac_ptr[i]); @@ -1291,20 +1282,20 @@ static int __cpuinit cpuup_prepare(long cpu) struct array_cache **alien = NULL; nc = alloc_arraycache(node, cachep->limit, - cachep->batchcount, GFP_KERNEL); + cachep->batchcount); if (!nc) goto bad; if (cachep->shared) { shared = alloc_arraycache(node, cachep->shared * cachep->batchcount, - 0xbaadf00d, GFP_KERNEL); + 0xbaadf00d); if (!shared) { kfree(nc); goto bad; } } if (use_alien_caches) { - alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL); + alien = alloc_alien_cache(node, cachep->limit); if (!alien) { kfree(shared); kfree(nc); @@ -1408,9 +1399,10 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, { struct kmem_list3 *ptr; - ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid); + ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, nodeid); BUG_ON(!ptr); + local_irq_disable(); memcpy(ptr, list, sizeof(struct kmem_list3)); /* * Do not assume that spinlocks can be initialized via memcpy: @@ -1419,6 +1411,7 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, MAKE_ALL_LISTS(cachep, ptr, nodeid); cachep->nodelists[nodeid] = ptr; + local_irq_enable(); } /* @@ -1582,8 +1575,9 @@ void __init kmem_cache_init(void) { struct array_cache *ptr; - ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); + ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); + local_irq_disable(); BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); memcpy(ptr, cpu_cache_get(&cache_cache), sizeof(struct arraycache_init)); @@ -1593,9 +1587,11 @@ void __init kmem_cache_init(void) spin_lock_init(&ptr->lock); cache_cache.array[smp_processor_id()] = ptr; + local_irq_enable(); - ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); + ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); + local_irq_disable(); BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep) != &initarray_generic.cache); memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), @@ -1607,6 +1603,7 @@ void __init kmem_cache_init(void) malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = ptr; + local_irq_enable(); } /* 5) Replace the bootstrap kmem_list3's */ { @@ -1630,7 +1627,7 @@ void __init kmem_cache_init(void) struct kmem_cache *cachep; mutex_lock(&cache_chain_mutex); list_for_each_entry(cachep, &cache_chain, next) - if (enable_cpucache(cachep, GFP_NOWAIT)) + if (enable_cpucache(cachep)) BUG(); mutex_unlock(&cache_chain_mutex); } @@ -2067,10 +2064,10 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, return left_over; } -static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) +static int __init_refok setup_cpu_cache(struct kmem_cache *cachep) { if (g_cpucache_up == FULL) - return enable_cpucache(cachep, gfp); + return enable_cpucache(cachep); if (g_cpucache_up == NONE) { /* @@ -2092,7 +2089,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) g_cpucache_up = PARTIAL_AC; } else { cachep->array[smp_processor_id()] = - kmalloc(sizeof(struct arraycache_init), gfp); + kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); if (g_cpucache_up == PARTIAL_AC) { set_up_list3s(cachep, SIZE_L3); @@ -2156,7 +2153,6 @@ kmem_cache_create (const char *name, size_t size, size_t align, { size_t left_over, slab_size, ralign; struct kmem_cache *cachep = NULL, *pc; - gfp_t gfp; /* * Sanity checks... these are all serious usage bugs. @@ -2172,10 +2168,8 @@ kmem_cache_create (const char *name, size_t size, size_t align, * We use cache_chain_mutex to ensure a consistent view of * cpu_online_mask as well. Please see cpuup_callback */ - if (slab_is_available()) { - get_online_cpus(); - mutex_lock(&cache_chain_mutex); - } + get_online_cpus(); + mutex_lock(&cache_chain_mutex); list_for_each_entry(pc, &cache_chain, next) { char tmp; @@ -2284,13 +2278,8 @@ kmem_cache_create (const char *name, size_t size, size_t align, */ align = ralign; - if (slab_is_available()) - gfp = GFP_KERNEL; - else - gfp = GFP_NOWAIT; - /* Get cache's description obj. */ - cachep = kmem_cache_zalloc(&cache_cache, gfp); + cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL); if (!cachep) goto oops; @@ -2393,7 +2382,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, cachep->ctor = ctor; cachep->name = name; - if (setup_cpu_cache(cachep, gfp)) { + if (setup_cpu_cache(cachep)) { __kmem_cache_destroy(cachep); cachep = NULL; goto oops; @@ -2405,10 +2394,8 @@ kmem_cache_create (const char *name, size_t size, size_t align, if (!cachep && (flags & SLAB_PANIC)) panic("kmem_cache_create(): failed to create slab `%s'\n", name); - if (slab_is_available()) { - mutex_unlock(&cache_chain_mutex); - put_online_cpus(); - } + mutex_unlock(&cache_chain_mutex); + put_online_cpus(); return cachep; } EXPORT_SYMBOL(kmem_cache_create); @@ -2634,14 +2621,6 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, /* Slab management obj is off-slab. */ slabp = kmem_cache_alloc_node(cachep->slabp_cache, local_flags, nodeid); - /* - * If the first object in the slab is leaked (it's allocated - * but no one has a reference to it), we want to make sure - * kmemleak does not treat the ->s_mem pointer as a reference - * to the object. Otherwise we will not report the leak. - */ - kmemleak_scan_area(slabp, offsetof(struct slab, list), - sizeof(struct list_head), local_flags); if (!slabp) return NULL; } else { @@ -3162,12 +3141,6 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) STATS_INC_ALLOCMISS(cachep); objp = cache_alloc_refill(cachep, flags); } - /* - * To avoid a false negative, if an object that is in one of the - * per-CPU caches is leaked, we need to make sure kmemleak doesn't - * treat the array pointers as a reference to the object. - */ - kmemleak_erase(&ac->entry[ac->avail]); return objp; } @@ -3387,8 +3360,6 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, out: local_irq_restore(save_flags); ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); - kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags, - flags); if (unlikely((flags & __GFP_ZERO) && ptr)) memset(ptr, 0, obj_size(cachep)); @@ -3444,8 +3415,6 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) objp = __do_cache_alloc(cachep, flags); local_irq_restore(save_flags); objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); - kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags, - flags); prefetchw(objp); if (unlikely((flags & __GFP_ZERO) && objp)) @@ -3561,7 +3530,6 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp) struct array_cache *ac = cpu_cache_get(cachep); check_irq_off(); - kmemleak_free_recursive(objp, cachep->flags); objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); /* @@ -3834,7 +3802,7 @@ EXPORT_SYMBOL_GPL(kmem_cache_name); /* * This initializes kmem_list3 or resizes various caches for all nodes. */ -static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) +static int alloc_kmemlist(struct kmem_cache *cachep) { int node; struct kmem_list3 *l3; @@ -3844,7 +3812,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) for_each_online_node(node) { if (use_alien_caches) { - new_alien = alloc_alien_cache(node, cachep->limit, gfp); + new_alien = alloc_alien_cache(node, cachep->limit); if (!new_alien) goto fail; } @@ -3853,7 +3821,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) if (cachep->shared) { new_shared = alloc_arraycache(node, cachep->shared*cachep->batchcount, - 0xbaadf00d, gfp); + 0xbaadf00d); if (!new_shared) { free_alien_cache(new_alien); goto fail; @@ -3882,7 +3850,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) free_alien_cache(new_alien); continue; } - l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node); + l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node); if (!l3) { free_alien_cache(new_alien); kfree(new_shared); @@ -3938,18 +3906,18 @@ static void do_ccupdate_local(void *info) /* Always called with the cache_chain_mutex held */ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, - int batchcount, int shared, gfp_t gfp) + int batchcount, int shared) { struct ccupdate_struct *new; int i; - new = kzalloc(sizeof(*new), gfp); + new = kzalloc(sizeof(*new), GFP_KERNEL); if (!new) return -ENOMEM; for_each_online_cpu(i) { new->new[i] = alloc_arraycache(cpu_to_node(i), limit, - batchcount, gfp); + batchcount); if (!new->new[i]) { for (i--; i >= 0; i--) kfree(new->new[i]); @@ -3976,11 +3944,11 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, kfree(ccold); } kfree(new); - return alloc_kmemlist(cachep, gfp); + return alloc_kmemlist(cachep); } /* Called with cache_chain_mutex held always */ -static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp) +static int enable_cpucache(struct kmem_cache *cachep) { int err; int limit, shared; @@ -4026,7 +3994,7 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp) if (limit > 32) limit = 32; #endif - err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp); + err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared); if (err) printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", cachep->name, -err); @@ -4332,8 +4300,7 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer, res = 0; } else { res = do_tune_cpucache(cachep, limit, - batchcount, shared, - GFP_KERNEL); + batchcount, shared); } break; } diff --git a/trunk/mm/slob.c b/trunk/mm/slob.c index 12f261499925..9b1737b0787b 100644 --- a/trunk/mm/slob.c +++ b/trunk/mm/slob.c @@ -67,7 +67,6 @@ #include #include #include -#include #include /* @@ -510,7 +509,6 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node) size, PAGE_SIZE << order, gfp, node); } - kmemleak_alloc(ret, size, 1, gfp); return ret; } EXPORT_SYMBOL(__kmalloc_node); @@ -523,7 +521,6 @@ void kfree(const void *block) if (unlikely(ZERO_OR_NULL_PTR(block))) return; - kmemleak_free(block); sp = slob_page(block); if (is_slob_page(sp)) { @@ -587,14 +584,12 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, } else if (flags & SLAB_PANIC) panic("Cannot create slab cache %s\n", name); - kmemleak_alloc(c, sizeof(struct kmem_cache), 1, GFP_KERNEL); return c; } EXPORT_SYMBOL(kmem_cache_create); void kmem_cache_destroy(struct kmem_cache *c) { - kmemleak_free(c); slob_free(c, sizeof(struct kmem_cache)); } EXPORT_SYMBOL(kmem_cache_destroy); @@ -618,7 +613,6 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node) if (c->ctor) c->ctor(b); - kmemleak_alloc_recursive(b, c->size, 1, c->flags, flags); return b; } EXPORT_SYMBOL(kmem_cache_alloc_node); @@ -641,7 +635,6 @@ static void kmem_rcu_free(struct rcu_head *head) void kmem_cache_free(struct kmem_cache *c, void *b) { - kmemleak_free_recursive(b, c->flags); if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) { struct slob_rcu *slob_rcu; slob_rcu = b + (c->size - sizeof(struct slob_rcu)); diff --git a/trunk/mm/slub.c b/trunk/mm/slub.c index 3964d3ce4c15..5e805a6fe36c 100644 --- a/trunk/mm/slub.c +++ b/trunk/mm/slub.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -144,7 +143,7 @@ * Set of flags that will prevent slab merging */ #define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ - SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE) + SLAB_TRACE | SLAB_DESTROY_BY_RCU) #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ SLAB_CACHE_DMA) @@ -1618,7 +1617,6 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, if (unlikely((gfpflags & __GFP_ZERO) && object)) memset(object, 0, objsize); - kmemleak_alloc_recursive(object, objsize, 1, s->flags, gfpflags); return object; } @@ -1748,7 +1746,6 @@ static __always_inline void slab_free(struct kmem_cache *s, struct kmem_cache_cpu *c; unsigned long flags; - kmemleak_free_recursive(x, s->flags); local_irq_save(flags); c = get_cpu_slab(s, smp_processor_id()); debug_check_no_locks_freed(object, c->objsize); @@ -2560,16 +2557,13 @@ static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s, if (gfp_flags & SLUB_DMA) flags = SLAB_CACHE_DMA; - /* - * This function is called with IRQs disabled during early-boot on - * single CPU so there's no need to take slub_lock here. - */ + down_write(&slub_lock); if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN, flags, NULL)) goto panic; list_add(&s->list, &slab_caches); - + up_write(&slub_lock); if (sysfs_slab_add(s)) goto panic; return s; @@ -3027,7 +3021,7 @@ void __init kmem_cache_init(void) * kmem_cache_open for slab_state == DOWN. */ create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node", - sizeof(struct kmem_cache_node), GFP_NOWAIT); + sizeof(struct kmem_cache_node), GFP_KERNEL); kmalloc_caches[0].refcount = -1; caches++; @@ -3040,16 +3034,16 @@ void __init kmem_cache_init(void) /* Caches that are not of the two-to-the-power-of size */ if (KMALLOC_MIN_SIZE <= 64) { create_kmalloc_cache(&kmalloc_caches[1], - "kmalloc-96", 96, GFP_NOWAIT); + "kmalloc-96", 96, GFP_KERNEL); caches++; create_kmalloc_cache(&kmalloc_caches[2], - "kmalloc-192", 192, GFP_NOWAIT); + "kmalloc-192", 192, GFP_KERNEL); caches++; } for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { create_kmalloc_cache(&kmalloc_caches[i], - "kmalloc", 1 << i, GFP_NOWAIT); + "kmalloc", 1 << i, GFP_KERNEL); caches++; } @@ -3086,7 +3080,7 @@ void __init kmem_cache_init(void) /* Provide the correct kmalloc names now that the caches are up */ for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) kmalloc_caches[i]. name = - kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i); + kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); #ifdef CONFIG_SMP register_cpu_notifier(&slab_notifier); diff --git a/trunk/mm/vmalloc.c b/trunk/mm/vmalloc.c index f8189a4b3e13..083716ea38c9 100644 --- a/trunk/mm/vmalloc.c +++ b/trunk/mm/vmalloc.c @@ -23,8 +23,8 @@ #include #include #include +#include #include -#include #include #include @@ -1032,7 +1032,7 @@ void __init vmalloc_init(void) /* Import existing vmlist entries. */ for (tmp = vmlist; tmp; tmp = tmp->next) { - va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT); + va = alloc_bootmem(sizeof(struct vmap_area)); va->flags = tmp->flags | VM_VM_AREA; va->va_start = (unsigned long)tmp->addr; va->va_end = va->va_start + tmp->size; @@ -1327,9 +1327,6 @@ static void __vunmap(const void *addr, int deallocate_pages) void vfree(const void *addr) { BUG_ON(in_interrupt()); - - kmemleak_free(addr); - __vunmap(addr, 1); } EXPORT_SYMBOL(vfree); @@ -1442,17 +1439,8 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask, void *__vmalloc_area(struct vm_struct *area, gfp_t gfp_mask, pgprot_t prot) { - void *addr = __vmalloc_area_node(area, gfp_mask, prot, -1, - __builtin_return_address(0)); - - /* - * A ref_count = 3 is needed because the vm_struct and vmap_area - * structures allocated in the __get_vm_area_node() function contain - * references to the virtual address of the vmalloc'ed block. - */ - kmemleak_alloc(addr, area->size - PAGE_SIZE, 3, gfp_mask); - - return addr; + return __vmalloc_area_node(area, gfp_mask, prot, -1, + __builtin_return_address(0)); } /** @@ -1471,8 +1459,6 @@ static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, int node, void *caller) { struct vm_struct *area; - void *addr; - unsigned long real_size = size; size = PAGE_ALIGN(size); if (!size || (size >> PAGE_SHIFT) > num_physpages) @@ -1484,16 +1470,7 @@ static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot, if (!area) return NULL; - addr = __vmalloc_area_node(area, gfp_mask, prot, node, caller); - - /* - * A ref_count = 3 is needed because the vm_struct and vmap_area - * structures allocated in the __get_vm_area_node() function contain - * references to the virtual address of the vmalloc'ed block. - */ - kmemleak_alloc(addr, real_size, 3, gfp_mask); - - return addr; + return __vmalloc_area_node(area, gfp_mask, prot, node, caller); } void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) diff --git a/trunk/tools/perf/.gitignore b/trunk/tools/perf/.gitignore deleted file mode 100644 index d69a759a1046..000000000000 --- a/trunk/tools/perf/.gitignore +++ /dev/null @@ -1,16 +0,0 @@ -PERF-BUILD-OPTIONS -PERF-CFLAGS -PERF-GUI-VARS -PERF-VERSION-FILE -perf -perf-help -perf-record -perf-report -perf-stat -perf-top -perf*.1 -perf*.xml -common-cmds.h -tags -TAGS -cscope* diff --git a/trunk/tools/perf/Documentation/Makefile b/trunk/tools/perf/Documentation/Makefile deleted file mode 100644 index 5457192e1b41..000000000000 --- a/trunk/tools/perf/Documentation/Makefile +++ /dev/null @@ -1,300 +0,0 @@ -MAN1_TXT= \ - $(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \ - $(wildcard perf-*.txt)) \ - perf.txt -MAN5_TXT= -MAN7_TXT= - -MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT) -MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT)) -MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT)) - -DOC_HTML=$(MAN_HTML) - -ARTICLES = -# with their own formatting rules. -SP_ARTICLES = -API_DOCS = $(patsubst %.txt,%,$(filter-out technical/api-index-skel.txt technical/api-index.txt, $(wildcard technical/api-*.txt))) -SP_ARTICLES += $(API_DOCS) -SP_ARTICLES += technical/api-index - -DOC_HTML += $(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES)) - -DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT)) -DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT)) -DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT)) - -prefix?=$(HOME) -bindir?=$(prefix)/bin -htmldir?=$(prefix)/share/doc/perf-doc -pdfdir?=$(prefix)/share/doc/perf-doc -mandir?=$(prefix)/share/man -man1dir=$(mandir)/man1 -man5dir=$(mandir)/man5 -man7dir=$(mandir)/man7 -# DESTDIR= - -ASCIIDOC=asciidoc -ASCIIDOC_EXTRA = -MANPAGE_XSL = manpage-normal.xsl -XMLTO_EXTRA = -INSTALL?=install -RM ?= rm -f -DOC_REF = origin/man -HTML_REF = origin/html - -infodir?=$(prefix)/share/info -MAKEINFO=makeinfo -INSTALL_INFO=install-info -DOCBOOK2X_TEXI=docbook2x-texi -DBLATEX=dblatex -ifndef PERL_PATH - PERL_PATH = /usr/bin/perl -endif - --include ../config.mak.autogen --include ../config.mak - -# -# For asciidoc ... -# -7.1.2, no extra settings are needed. -# 8.0-, set ASCIIDOC8. -# - -# -# For docbook-xsl ... -# -1.68.1, set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0) -# 1.69.0, no extra settings are needed? -# 1.69.1-1.71.0, set DOCBOOK_SUPPRESS_SP? -# 1.71.1, no extra settings are needed? -# 1.72.0, set DOCBOOK_XSL_172. -# 1.73.0-, set ASCIIDOC_NO_ROFF -# - -# -# If you had been using DOCBOOK_XSL_172 in an attempt to get rid -# of 'the ".ft C" problem' in your generated manpages, and you -# instead ended up with weird characters around callouts, try -# using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8). -# - -ifdef ASCIIDOC8 -ASCIIDOC_EXTRA += -a asciidoc7compatible -endif -ifdef DOCBOOK_XSL_172 -ASCIIDOC_EXTRA += -a perf-asciidoc-no-roff -MANPAGE_XSL = manpage-1.72.xsl -else - ifdef ASCIIDOC_NO_ROFF - # docbook-xsl after 1.72 needs the regular XSL, but will not - # pass-thru raw roff codes from asciidoc.conf, so turn them off. - ASCIIDOC_EXTRA += -a perf-asciidoc-no-roff - endif -endif -ifdef MAN_BOLD_LITERAL -XMLTO_EXTRA += -m manpage-bold-literal.xsl -endif -ifdef DOCBOOK_SUPPRESS_SP -XMLTO_EXTRA += -m manpage-suppress-sp.xsl -endif - -SHELL_PATH ?= $(SHELL) -# Shell quote; -SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) - -# -# Please note that there is a minor bug in asciidoc. -# The version after 6.0.3 _will_ include the patch found here: -# http://marc.theaimsgroup.com/?l=perf&m=111558757202243&w=2 -# -# Until that version is released you may have to apply the patch -# yourself - yes, all 6 characters of it! -# - -QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir -QUIET_SUBDIR1 = - -ifneq ($(findstring $(MAKEFLAGS),w),w) -PRINT_DIR = --no-print-directory -else # "make -w" -NO_SUBDIR = : -endif - -ifneq ($(findstring $(MAKEFLAGS),s),s) -ifndef V - QUIET_ASCIIDOC = @echo ' ' ASCIIDOC $@; - QUIET_XMLTO = @echo ' ' XMLTO $@; - QUIET_DB2TEXI = @echo ' ' DB2TEXI $@; - QUIET_MAKEINFO = @echo ' ' MAKEINFO $@; - QUIET_DBLATEX = @echo ' ' DBLATEX $@; - QUIET_XSLTPROC = @echo ' ' XSLTPROC $@; - QUIET_GEN = @echo ' ' GEN $@; - QUIET_STDERR = 2> /dev/null - QUIET_SUBDIR0 = +@subdir= - QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \ - $(MAKE) $(PRINT_DIR) -C $$subdir - export V -endif -endif - -all: html man - -html: $(DOC_HTML) - -$(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7): asciidoc.conf - -man: man1 man5 man7 -man1: $(DOC_MAN1) -man5: $(DOC_MAN5) -man7: $(DOC_MAN7) - -info: perf.info perfman.info - -pdf: user-manual.pdf - -install: install-man - -install-man: man - $(INSTALL) -d -m 755 $(DESTDIR)$(man1dir) -# $(INSTALL) -d -m 755 $(DESTDIR)$(man5dir) -# $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir) - $(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(man1dir) -# $(INSTALL) -m 644 $(DOC_MAN5) $(DESTDIR)$(man5dir) -# $(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir) - -install-info: info - $(INSTALL) -d -m 755 $(DESTDIR)$(infodir) - $(INSTALL) -m 644 perf.info perfman.info $(DESTDIR)$(infodir) - if test -r $(DESTDIR)$(infodir)/dir; then \ - $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\ - $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\ - else \ - echo "No directory found in $(DESTDIR)$(infodir)" >&2 ; \ - fi - -install-pdf: pdf - $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir) - $(INSTALL) -m 644 user-manual.pdf $(DESTDIR)$(pdfdir) - -install-html: html - '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir) - -../PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE - $(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) PERF-VERSION-FILE - --include ../PERF-VERSION-FILE - -# -# Determine "include::" file references in asciidoc files. -# -doc.dep : $(wildcard *.txt) build-docdep.perl - $(QUIET_GEN)$(RM) $@+ $@ && \ - $(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \ - mv $@+ $@ - --include doc.dep - -cmds_txt = cmds-ancillaryinterrogators.txt \ - cmds-ancillarymanipulators.txt \ - cmds-mainporcelain.txt \ - cmds-plumbinginterrogators.txt \ - cmds-plumbingmanipulators.txt \ - cmds-synchingrepositories.txt \ - cmds-synchelpers.txt \ - cmds-purehelpers.txt \ - cmds-foreignscminterface.txt - -$(cmds_txt): cmd-list.made - -cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT) - $(QUIET_GEN)$(RM) $@ && \ - $(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \ - date >$@ - -clean: - $(RM) *.xml *.xml+ *.html *.html+ *.1 *.5 *.7 - $(RM) *.texi *.texi+ *.texi++ perf.info perfman.info - $(RM) howto-index.txt howto/*.html doc.dep - $(RM) technical/api-*.html technical/api-index.txt - $(RM) $(cmds_txt) *.made - -$(MAN_HTML): %.html : %.txt - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \ - $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ - mv $@+ $@ - -%.1 %.5 %.7 : %.xml - $(QUIET_XMLTO)$(RM) $@ && \ - xmlto -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $< - -%.xml : %.txt - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - $(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \ - $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \ - mv $@+ $@ - -XSLT = docbook.xsl -XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css - -user-manual.html: user-manual.xml - $(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $< - -perf.info: user-manual.texi - $(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ user-manual.texi - -user-manual.texi: user-manual.xml - $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ - $(DOCBOOK2X_TEXI) user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \ - $(PERL_PATH) fix-texi.perl <$@++ >$@+ && \ - rm $@++ && \ - mv $@+ $@ - -user-manual.pdf: user-manual.xml - $(QUIET_DBLATEX)$(RM) $@+ $@ && \ - $(DBLATEX) -o $@+ -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \ - mv $@+ $@ - -perfman.texi: $(MAN_XML) cat-texi.perl - $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ - ($(foreach xml,$(MAN_XML),$(DOCBOOK2X_TEXI) --encoding=UTF-8 \ - --to-stdout $(xml) &&) true) > $@++ && \ - $(PERL_PATH) cat-texi.perl $@ <$@++ >$@+ && \ - rm $@++ && \ - mv $@+ $@ - -perfman.info: perfman.texi - $(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi - -$(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml - $(QUIET_DB2TEXI)$(RM) $@+ $@ && \ - $(DOCBOOK2X_TEXI) --to-stdout $*.xml >$@+ && \ - mv $@+ $@ - -howto-index.txt: howto-index.sh $(wildcard howto/*.txt) - $(QUIET_GEN)$(RM) $@+ $@ && \ - '$(SHELL_PATH_SQ)' ./howto-index.sh $(wildcard howto/*.txt) >$@+ && \ - mv $@+ $@ - -$(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt - $(QUIET_ASCIIDOC)$(ASCIIDOC) -b xhtml11 $*.txt - -WEBDOC_DEST = /pub/software/tools/perf/docs - -$(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt - $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ - sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b xhtml11 - >$@+ && \ - mv $@+ $@ - -install-webdoc : html - '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(WEBDOC_DEST) - -quick-install: quick-install-man - -quick-install-man: - '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(DOC_REF) $(DESTDIR)$(mandir) - -quick-install-html: - '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir) - -.PHONY: .FORCE-PERF-VERSION-FILE diff --git a/trunk/tools/perf/Documentation/asciidoc.conf b/trunk/tools/perf/Documentation/asciidoc.conf deleted file mode 100644 index 356b23a40339..000000000000 --- a/trunk/tools/perf/Documentation/asciidoc.conf +++ /dev/null @@ -1,91 +0,0 @@ -## linkperf: macro -# -# Usage: linkperf:command[manpage-section] -# -# Note, {0} is the manpage section, while {target} is the command. -# -# Show PERF link as: (
); if section is defined, else just show -# the command. - -[macros] -(?su)[\\]?(?Plinkperf):(?P\S*?)\[(?P.*?)\]= - -[attributes] -asterisk=* -plus=+ -caret=^ -startsb=[ -endsb=] -tilde=~ - -ifdef::backend-docbook[] -[linkperf-inlinemacro] -{0%{target}} -{0#} -{0#{target}{0}} -{0#} -endif::backend-docbook[] - -ifdef::backend-docbook[] -ifndef::perf-asciidoc-no-roff[] -# "unbreak" docbook-xsl v1.68 for manpages. v1.69 works with or without this. -# v1.72 breaks with this because it replaces dots not in roff requests. -[listingblock] -{title} - -ifdef::doctype-manpage[] - .ft C -endif::doctype-manpage[] -| -ifdef::doctype-manpage[] - .ft -endif::doctype-manpage[] - -{title#} -endif::perf-asciidoc-no-roff[] - -ifdef::perf-asciidoc-no-roff[] -ifdef::doctype-manpage[] -# The following two small workarounds insert a simple paragraph after screen -[listingblock] -{title} - -| - -{title#} - -[verseblock] -{title} -{title%} -{title#} -| - -{title#} -{title%} -endif::doctype-manpage[] -endif::perf-asciidoc-no-roff[] -endif::backend-docbook[] - -ifdef::doctype-manpage[] -ifdef::backend-docbook[] -[header] -template::[header-declarations] - - -{mantitle} -{manvolnum} -perf -{perf_version} -perf Manual - - - {manname} - {manpurpose} - -endif::backend-docbook[] -endif::doctype-manpage[] - -ifdef::backend-xhtml11[] -[linkperf-inlinemacro] -{target}{0?({0})} -endif::backend-xhtml11[] diff --git a/trunk/tools/perf/Documentation/manpage-1.72.xsl b/trunk/tools/perf/Documentation/manpage-1.72.xsl deleted file mode 100644 index b4d315cb8c47..000000000000 --- a/trunk/tools/perf/Documentation/manpage-1.72.xsl +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - - - - - diff --git a/trunk/tools/perf/Documentation/manpage-base.xsl b/trunk/tools/perf/Documentation/manpage-base.xsl deleted file mode 100644 index a264fa616093..000000000000 --- a/trunk/tools/perf/Documentation/manpage-base.xsl +++ /dev/null @@ -1,35 +0,0 @@ - - - - - - - - - - - - - - sp - - - - - - - - br - - - diff --git a/trunk/tools/perf/Documentation/manpage-bold-literal.xsl b/trunk/tools/perf/Documentation/manpage-bold-literal.xsl deleted file mode 100644 index 608eb5df6281..000000000000 --- a/trunk/tools/perf/Documentation/manpage-bold-literal.xsl +++ /dev/null @@ -1,17 +0,0 @@ - - - - - - - fB - - - fR - - - diff --git a/trunk/tools/perf/Documentation/manpage-normal.xsl b/trunk/tools/perf/Documentation/manpage-normal.xsl deleted file mode 100644 index a48f5b11f3dc..000000000000 --- a/trunk/tools/perf/Documentation/manpage-normal.xsl +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - -\ -. - - diff --git a/trunk/tools/perf/Documentation/manpage-suppress-sp.xsl b/trunk/tools/perf/Documentation/manpage-suppress-sp.xsl deleted file mode 100644 index a63c7632a87d..000000000000 --- a/trunk/tools/perf/Documentation/manpage-suppress-sp.xsl +++ /dev/null @@ -1,21 +0,0 @@ - - - - - - - - - - - - - - - diff --git a/trunk/tools/perf/Documentation/perf-annotate.txt b/trunk/tools/perf/Documentation/perf-annotate.txt deleted file mode 100644 index c9dcade06831..000000000000 --- a/trunk/tools/perf/Documentation/perf-annotate.txt +++ /dev/null @@ -1,29 +0,0 @@ -perf-annotate(1) -============== - -NAME ----- -perf-annotate - Read perf.data (created by perf record) and display annotated code - -SYNOPSIS --------- -[verse] -'perf annotate' [-i | --input=file] symbol_name - -DESCRIPTION ------------ -This command reads the input file and displays an annotated version of the -code. If the object file has debug symbols then the source code will be -displayed alongside assembly code. - -If there is no debug info in the object, then annotated assembly is displayed. - -OPTIONS -------- --i:: ---input=:: - Input file name. (default: perf.data) - -SEE ALSO --------- -linkperf:perf-record[1] diff --git a/trunk/tools/perf/Documentation/perf-help.txt b/trunk/tools/perf/Documentation/perf-help.txt deleted file mode 100644 index 514391818d1f..000000000000 --- a/trunk/tools/perf/Documentation/perf-help.txt +++ /dev/null @@ -1,38 +0,0 @@ -perf-help(1) -============ - -NAME ----- -perf-help - display help information about perf - -SYNOPSIS --------- -'perf help' [-a|--all] [COMMAND] - -DESCRIPTION ------------ - -With no options and no COMMAND given, the synopsis of the 'perf' -command and a list of the most commonly used perf commands are printed -on the standard output. - -If the option '--all' or '-a' is given, then all available commands are -printed on the standard output. - -If a perf command is named, a manual page for that command is brought -up. The 'man' program is used by default for this purpose, but this -can be overridden by other options or configuration variables. - -Note that `perf --help ...` is identical to `perf help ...` because the -former is internally converted into the latter. - -OPTIONS -------- --a:: ---all:: - Prints all the available commands on the standard output. This - option supersedes any other option. - -PERF ----- -Part of the linkperf:perf[1] suite diff --git a/trunk/tools/perf/Documentation/perf-list.txt b/trunk/tools/perf/Documentation/perf-list.txt deleted file mode 100644 index 8290b9422668..000000000000 --- a/trunk/tools/perf/Documentation/perf-list.txt +++ /dev/null @@ -1,25 +0,0 @@ -perf-list(1) -============ - -NAME ----- -perf-list - List all symbolic event types - -SYNOPSIS --------- -[verse] -'perf list' - -DESCRIPTION ------------ -This command displays the symbolic event types which can be selected in the -various perf commands with the -e option. - -OPTIONS -------- -None - -SEE ALSO --------- -linkperf:perf-stat[1], linkperf:perf-top[1], -linkperf:perf-record[1] diff --git a/trunk/tools/perf/Documentation/perf-record.txt b/trunk/tools/perf/Documentation/perf-record.txt deleted file mode 100644 index 1dbc1eeb4c01..000000000000 --- a/trunk/tools/perf/Documentation/perf-record.txt +++ /dev/null @@ -1,42 +0,0 @@ -perf-record(1) -============== - -NAME ----- -perf-record - Run a command and record its profile into perf.data - -SYNOPSIS --------- -[verse] -'perf record' [-e | --event=EVENT] [-l] [-a] -'perf record' [-e | --event=EVENT] [-l] [-a] -- [] - -DESCRIPTION ------------ -This command runs a command and gathers a performance counter profile -from it, into perf.data - without displaying anything. - -This file can then be inspected later on, using 'perf report'. - - -OPTIONS -------- -...:: - Any command you can specify in a shell. - --e:: ---event=:: - Select the PMU event. Selection can be a symbolic event name - (use 'perf list' to list all events) or a raw PMU - event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. - --a:: - system-wide collection - --l:: - scale counter values - -SEE ALSO --------- -linkperf:perf-stat[1], linkperf:perf-list[1] diff --git a/trunk/tools/perf/Documentation/perf-report.txt b/trunk/tools/perf/Documentation/perf-report.txt deleted file mode 100644 index 52d3fc6846a9..000000000000 --- a/trunk/tools/perf/Documentation/perf-report.txt +++ /dev/null @@ -1,26 +0,0 @@ -perf-report(1) -============== - -NAME ----- -perf-report - Read perf.data (created by perf record) and display the profile - -SYNOPSIS --------- -[verse] -'perf report' [-i | --input=file] - -DESCRIPTION ------------ -This command displays the performance counter profile information recorded -via perf report. - -OPTIONS -------- --i:: ---input=:: - Input file name. (default: perf.data) - -SEE ALSO --------- -linkperf:perf-stat[1] diff --git a/trunk/tools/perf/Documentation/perf-stat.txt b/trunk/tools/perf/Documentation/perf-stat.txt deleted file mode 100644 index c368a72721d7..000000000000 --- a/trunk/tools/perf/Documentation/perf-stat.txt +++ /dev/null @@ -1,66 +0,0 @@ -perf-stat(1) -============ - -NAME ----- -perf-stat - Run a command and gather performance counter statistics - -SYNOPSIS --------- -[verse] -'perf stat' [-e | --event=EVENT] [-l] [-a] -'perf stat' [-e | --event=EVENT] [-l] [-a] -- [] - -DESCRIPTION ------------ -This command runs a command and gathers performance counter statistics -from it. - - -OPTIONS -------- -...:: - Any command you can specify in a shell. - - --e:: ---event=:: - Select the PMU event. Selection can be a symbolic event name - (use 'perf list' to list all events) or a raw PMU - event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. - --i:: ---inherit:: - child tasks inherit counters --p:: ---pid=:: - stat events on existing pid - --a:: - system-wide collection - --l:: - scale counter values - -EXAMPLES --------- - -$ perf stat -- make -j - - Performance counter stats for 'make -j': - - 8117.370256 task clock ticks # 11.281 CPU utilization factor - 678 context switches # 0.000 M/sec - 133 CPU migrations # 0.000 M/sec - 235724 pagefaults # 0.029 M/sec - 24821162526 CPU cycles # 3057.784 M/sec - 18687303457 instructions # 2302.138 M/sec - 172158895 cache references # 21.209 M/sec - 27075259 cache misses # 3.335 M/sec - - Wall-clock time elapsed: 719.554352 msecs - -SEE ALSO --------- -linkperf:perf-top[1], linkperf:perf-list[1] diff --git a/trunk/tools/perf/Documentation/perf-top.txt b/trunk/tools/perf/Documentation/perf-top.txt deleted file mode 100644 index 539d01289725..000000000000 --- a/trunk/tools/perf/Documentation/perf-top.txt +++ /dev/null @@ -1,39 +0,0 @@ -perf-top(1) -=========== - -NAME ----- -perf-top - Run a command and profile it - -SYNOPSIS --------- -[verse] -'perf top' [-e | --event=EVENT] [-l] [-a] - -DESCRIPTION ------------ -This command runs a command and gathers a performance counter profile -from it. - - -OPTIONS -------- -...:: - Any command you can specify in a shell. - --e:: ---event=:: - Select the PMU event. Selection can be a symbolic event name - (use 'perf list' to list all events) or a raw PMU - event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. - --a:: - system-wide collection - --l:: - scale counter values - -SEE ALSO --------- -linkperf:perf-stat[1], linkperf:perf-list[1] diff --git a/trunk/tools/perf/Documentation/perf.txt b/trunk/tools/perf/Documentation/perf.txt deleted file mode 100644 index 69c832557199..000000000000 --- a/trunk/tools/perf/Documentation/perf.txt +++ /dev/null @@ -1,24 +0,0 @@ -perf(1) -======= - -NAME ----- -perf - Performance analysis tools for Linux - -SYNOPSIS --------- -[verse] -'perf' [--version] [--help] COMMAND [ARGS] - -DESCRIPTION ------------ -Performance counters for Linux are are a new kernel-based subsystem -that provide a framework for all things performance analysis. It -covers hardware level (CPU/PMU, Performance Monitoring Unit) features -and software features (software counters, tracepoints) as well. - -SEE ALSO --------- -linkperf:perf-stat[1], linkperf:perf-top[1], -linkperf:perf-record[1], linkperf:perf-report[1], -linkperf:perf-list[1] diff --git a/trunk/tools/perf/Makefile b/trunk/tools/perf/Makefile deleted file mode 100644 index 0cbd5d6874ec..000000000000 --- a/trunk/tools/perf/Makefile +++ /dev/null @@ -1,929 +0,0 @@ -# The default target of this Makefile is... -all:: - -# Define V=1 to have a more verbose compile. -# -# Define SNPRINTF_RETURNS_BOGUS if your are on a system which snprintf() -# or vsnprintf() return -1 instead of number of characters which would -# have been written to the final string if enough space had been available. -# -# Define FREAD_READS_DIRECTORIES if your are on a system which succeeds -# when attempting to read from an fopen'ed directory. -# -# Define NO_OPENSSL environment variable if you do not have OpenSSL. -# This also implies MOZILLA_SHA1. -# -# Define CURLDIR=/foo/bar if your curl header and library files are in -# /foo/bar/include and /foo/bar/lib directories. -# -# Define EXPATDIR=/foo/bar if your expat header and library files are in -# /foo/bar/include and /foo/bar/lib directories. -# -# Define NO_D_INO_IN_DIRENT if you don't have d_ino in your struct dirent. -# -# Define NO_D_TYPE_IN_DIRENT if your platform defines DT_UNKNOWN but lacks -# d_type in struct dirent (latest Cygwin -- will be fixed soonish). -# -# Define NO_C99_FORMAT if your formatted IO functions (printf/scanf et.al.) -# do not support the 'size specifiers' introduced by C99, namely ll, hh, -# j, z, t. (representing long long int, char, intmax_t, size_t, ptrdiff_t). -# some C compilers supported these specifiers prior to C99 as an extension. -# -# Define NO_STRCASESTR if you don't have strcasestr. -# -# Define NO_MEMMEM if you don't have memmem. -# -# Define NO_STRTOUMAX if you don't have strtoumax in the C library. -# If your compiler also does not support long long or does not have -# strtoull, define NO_STRTOULL. -# -# Define NO_SETENV if you don't have setenv in the C library. -# -# Define NO_UNSETENV if you don't have unsetenv in the C library. -# -# Define NO_MKDTEMP if you don't have mkdtemp in the C library. -# -# Define NO_SYS_SELECT_H if you don't have sys/select.h. -# -# Define NO_SYMLINK_HEAD if you never want .perf/HEAD to be a symbolic link. -# Enable it on Windows. By default, symrefs are still used. -# -# Define NO_SVN_TESTS if you want to skip time-consuming SVN interoperability -# tests. These tests take up a significant amount of the total test time -# but are not needed unless you plan to talk to SVN repos. -# -# Define NO_FINK if you are building on Darwin/Mac OS X, have Fink -# installed in /sw, but don't want PERF to link against any libraries -# installed there. If defined you may specify your own (or Fink's) -# include directories and library directories by defining CFLAGS -# and LDFLAGS appropriately. -# -# Define NO_DARWIN_PORTS if you are building on Darwin/Mac OS X, -# have DarwinPorts installed in /opt/local, but don't want PERF to -# link against any libraries installed there. If defined you may -# specify your own (or DarwinPort's) include directories and -# library directories by defining CFLAGS and LDFLAGS appropriately. -# -# Define PPC_SHA1 environment variable when running make to make use of -# a bundled SHA1 routine optimized for PowerPC. -# -# Define ARM_SHA1 environment variable when running make to make use of -# a bundled SHA1 routine optimized for ARM. -# -# Define MOZILLA_SHA1 environment variable when running make to make use of -# a bundled SHA1 routine coming from Mozilla. It is GPL'd and should be fast -# on non-x86 architectures (e.g. PowerPC), while the OpenSSL version (default -# choice) has very fast version optimized for i586. -# -# Define NEEDS_SSL_WITH_CRYPTO if you need -lcrypto with -lssl (Darwin). -# -# Define NEEDS_LIBICONV if linking with libc is not enough (Darwin). -# -# Define NEEDS_SOCKET if linking with libc is not enough (SunOS, -# Patrick Mauritz). -# -# Define NO_MMAP if you want to avoid mmap. -# -# Define NO_PTHREADS if you do not have or do not want to use Pthreads. -# -# Define NO_PREAD if you have a problem with pread() system call (e.g. -# cygwin.dll before v1.5.22). -# -# Define NO_FAST_WORKING_DIRECTORY if accessing objects in pack files is -# generally faster on your platform than accessing the working directory. -# -# Define NO_TRUSTABLE_FILEMODE if your filesystem may claim to support -# the executable mode bit, but doesn't really do so. -# -# Define NO_IPV6 if you lack IPv6 support and getaddrinfo(). -# -# Define NO_SOCKADDR_STORAGE if your platform does not have struct -# sockaddr_storage. -# -# Define NO_ICONV if your libc does not properly support iconv. -# -# Define OLD_ICONV if your library has an old iconv(), where the second -# (input buffer pointer) parameter is declared with type (const char **). -# -# Define NO_DEFLATE_BOUND if your zlib does not have deflateBound. -# -# Define NO_R_TO_GCC_LINKER if your gcc does not like "-R/path/lib" -# that tells runtime paths to dynamic libraries; -# "-Wl,-rpath=/path/lib" is used instead. -# -# Define USE_NSEC below if you want perf to care about sub-second file mtimes -# and ctimes. Note that you need recent glibc (at least 2.2.4) for this, and -# it will BREAK YOUR LOCAL DIFFS! show-diff and anything using it will likely -# randomly break unless your underlying filesystem supports those sub-second -# times (my ext3 doesn't). -# -# Define USE_ST_TIMESPEC if your "struct stat" uses "st_ctimespec" instead of -# "st_ctim" -# -# Define NO_NSEC if your "struct stat" does not have "st_ctim.tv_nsec" -# available. This automatically turns USE_NSEC off. -# -# Define USE_STDEV below if you want perf to care about the underlying device -# change being considered an inode change from the update-index perspective. -# -# Define NO_ST_BLOCKS_IN_STRUCT_STAT if your platform does not have st_blocks -# field that counts the on-disk footprint in 512-byte blocks. -# -# Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8 -# -# Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72. -# -# Define NO_PERL_MAKEMAKER if you cannot use Makefiles generated by perl's -# MakeMaker (e.g. using ActiveState under Cygwin). -# -# Define NO_PERL if you do not want Perl scripts or libraries at all. -# -# Define INTERNAL_QSORT to use Git's implementation of qsort(), which -# is a simplified version of the merge sort used in glibc. This is -# recommended if Git triggers O(n^2) behavior in your platform's qsort(). -# -# Define NO_EXTERNAL_GREP if you don't want "perf grep" to ever call -# your external grep (e.g., if your system lacks grep, if its grep is -# broken, or spawning external process is slower than built-in grep perf has). - -PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE - @$(SHELL_PATH) util/PERF-VERSION-GEN --include PERF-VERSION-FILE - -uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not') -uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not') -uname_O := $(shell sh -c 'uname -o 2>/dev/null || echo not') -uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not') -uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not') -uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not') - -# CFLAGS and LDFLAGS are for the users to override from the command line. - -CFLAGS = -ggdb3 -Wall -Werror -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -O6 -LDFLAGS = -lpthread -lrt -lelf -ALL_CFLAGS = $(CFLAGS) -ALL_LDFLAGS = $(LDFLAGS) -STRIP ?= strip - -# Among the variables below, these: -# perfexecdir -# template_dir -# mandir -# infodir -# htmldir -# ETC_PERFCONFIG (but not sysconfdir) -# can be specified as a relative path some/where/else; -# this is interpreted as relative to $(prefix) and "perf" at -# runtime figures out where they are based on the path to the executable. -# This can help installing the suite in a relocatable way. - -prefix = $(HOME) -bindir_relative = bin -bindir = $(prefix)/$(bindir_relative) -mandir = share/man -infodir = share/info -perfexecdir = libexec/perf-core -sharedir = $(prefix)/share -template_dir = share/perf-core/templates -htmldir = share/doc/perf-doc -ifeq ($(prefix),/usr) -sysconfdir = /etc -ETC_PERFCONFIG = $(sysconfdir)/perfconfig -else -sysconfdir = $(prefix)/etc -ETC_PERFCONFIG = etc/perfconfig -endif -lib = lib -# DESTDIR= - -export prefix bindir sharedir sysconfdir - -CC = gcc -AR = ar -RM = rm -f -TAR = tar -FIND = find -INSTALL = install -RPMBUILD = rpmbuild -PTHREAD_LIBS = -lpthread - -# sparse is architecture-neutral, which means that we need to tell it -# explicitly what architecture to check for. Fix this up for yours.. -SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ - - - -### --- END CONFIGURATION SECTION --- - -# Those must not be GNU-specific; they are shared with perl/ which may -# be built by a different compiler. (Note that this is an artifact now -# but it still might be nice to keep that distinction.) -BASIC_CFLAGS = -BASIC_LDFLAGS = - -# Guard against environment variables -BUILTIN_OBJS = -BUILT_INS = -COMPAT_CFLAGS = -COMPAT_OBJS = -LIB_H = -LIB_OBJS = -SCRIPT_PERL = -SCRIPT_SH = -TEST_PROGRAMS = - -# -# No scripts right now: -# - -# SCRIPT_SH += perf-am.sh - -# -# No Perl scripts right now: -# - -# SCRIPT_PERL += perf-add--interactive.perl - -SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH)) \ - $(patsubst %.perl,%,$(SCRIPT_PERL)) - -# Empty... -EXTRA_PROGRAMS = - -# ... and all the rest that could be moved out of bindir to perfexecdir -PROGRAMS += $(EXTRA_PROGRAMS) - -# -# Single 'perf' binary right now: -# -PROGRAMS += perf - -# List built-in command $C whose implementation cmd_$C() is not in -# builtin-$C.o but is linked in as part of some other command. -# -# None right now: -# -# BUILT_INS += perf-init $X - -# what 'all' will build and 'install' will install, in perfexecdir -ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS) - -# what 'all' will build but not install in perfexecdir -OTHER_PROGRAMS = perf$X - -# Set paths to tools early so that they can be used for version tests. -ifndef SHELL_PATH - SHELL_PATH = /bin/sh -endif -ifndef PERL_PATH - PERL_PATH = /usr/bin/perl -endif - -export PERL_PATH - -LIB_FILE=libperf.a - -LIB_H += ../../include/linux/perf_counter.h -LIB_H += perf.h -LIB_H += util/list.h -LIB_H += util/rbtree.h -LIB_H += util/levenshtein.h -LIB_H += util/parse-options.h -LIB_H += util/parse-events.h -LIB_H += util/quote.h -LIB_H += util/util.h -LIB_H += util/help.h -LIB_H += util/strbuf.h -LIB_H += util/string.h -LIB_H += util/run-command.h -LIB_H += util/sigchain.h -LIB_H += util/symbol.h -LIB_H += util/color.h - -LIB_OBJS += util/abspath.o -LIB_OBJS += util/alias.o -LIB_OBJS += util/config.o -LIB_OBJS += util/ctype.o -LIB_OBJS += util/environment.o -LIB_OBJS += util/exec_cmd.o -LIB_OBJS += util/help.o -LIB_OBJS += util/levenshtein.o -LIB_OBJS += util/parse-options.o -LIB_OBJS += util/parse-events.o -LIB_OBJS += util/path.o -LIB_OBJS += util/rbtree.o -LIB_OBJS += util/run-command.o -LIB_OBJS += util/quote.o -LIB_OBJS += util/strbuf.o -LIB_OBJS += util/string.o -LIB_OBJS += util/usage.o -LIB_OBJS += util/wrapper.o -LIB_OBJS += util/sigchain.o -LIB_OBJS += util/symbol.o -LIB_OBJS += util/color.o -LIB_OBJS += util/pager.o - -BUILTIN_OBJS += builtin-annotate.o -BUILTIN_OBJS += builtin-help.o -BUILTIN_OBJS += builtin-list.o -BUILTIN_OBJS += builtin-record.o -BUILTIN_OBJS += builtin-report.o -BUILTIN_OBJS += builtin-stat.o -BUILTIN_OBJS += builtin-top.o - -PERFLIBS = $(LIB_FILE) -EXTLIBS = - -# -# Platform specific tweaks -# - -# We choose to avoid "if .. else if .. else .. endif endif" -# because maintaining the nesting to match is a pain. If -# we had "elif" things would have been much nicer... - --include config.mak.autogen --include config.mak - -ifeq ($(uname_S),Darwin) - ifndef NO_FINK - ifeq ($(shell test -d /sw/lib && echo y),y) - BASIC_CFLAGS += -I/sw/include - BASIC_LDFLAGS += -L/sw/lib - endif - endif - ifndef NO_DARWIN_PORTS - ifeq ($(shell test -d /opt/local/lib && echo y),y) - BASIC_CFLAGS += -I/opt/local/include - BASIC_LDFLAGS += -L/opt/local/lib - endif - endif - PTHREAD_LIBS = -endif - -ifndef CC_LD_DYNPATH - ifdef NO_R_TO_GCC_LINKER - # Some gcc does not accept and pass -R to the linker to specify - # the runtime dynamic library path. - CC_LD_DYNPATH = -Wl,-rpath, - else - CC_LD_DYNPATH = -R - endif -endif - -ifdef ZLIB_PATH - BASIC_CFLAGS += -I$(ZLIB_PATH)/include - EXTLIBS += -L$(ZLIB_PATH)/$(lib) $(CC_LD_DYNPATH)$(ZLIB_PATH)/$(lib) -endif -EXTLIBS += -lz - -ifdef NEEDS_SOCKET - EXTLIBS += -lsocket -endif -ifdef NEEDS_NSL - EXTLIBS += -lnsl -endif -ifdef NO_D_TYPE_IN_DIRENT - BASIC_CFLAGS += -DNO_D_TYPE_IN_DIRENT -endif -ifdef NO_D_INO_IN_DIRENT - BASIC_CFLAGS += -DNO_D_INO_IN_DIRENT -endif -ifdef NO_ST_BLOCKS_IN_STRUCT_STAT - BASIC_CFLAGS += -DNO_ST_BLOCKS_IN_STRUCT_STAT -endif -ifdef USE_NSEC - BASIC_CFLAGS += -DUSE_NSEC -endif -ifdef USE_ST_TIMESPEC - BASIC_CFLAGS += -DUSE_ST_TIMESPEC -endif -ifdef NO_NSEC - BASIC_CFLAGS += -DNO_NSEC -endif -ifdef NO_C99_FORMAT - BASIC_CFLAGS += -DNO_C99_FORMAT -endif -ifdef SNPRINTF_RETURNS_BOGUS - COMPAT_CFLAGS += -DSNPRINTF_RETURNS_BOGUS - COMPAT_OBJS += compat/snprintf.o -endif -ifdef FREAD_READS_DIRECTORIES - COMPAT_CFLAGS += -DFREAD_READS_DIRECTORIES - COMPAT_OBJS += compat/fopen.o -endif -ifdef NO_SYMLINK_HEAD - BASIC_CFLAGS += -DNO_SYMLINK_HEAD -endif -ifdef NO_STRCASESTR - COMPAT_CFLAGS += -DNO_STRCASESTR - COMPAT_OBJS += compat/strcasestr.o -endif -ifdef NO_STRTOUMAX - COMPAT_CFLAGS += -DNO_STRTOUMAX - COMPAT_OBJS += compat/strtoumax.o -endif -ifdef NO_STRTOULL - COMPAT_CFLAGS += -DNO_STRTOULL -endif -ifdef NO_SETENV - COMPAT_CFLAGS += -DNO_SETENV - COMPAT_OBJS += compat/setenv.o -endif -ifdef NO_MKDTEMP - COMPAT_CFLAGS += -DNO_MKDTEMP - COMPAT_OBJS += compat/mkdtemp.o -endif -ifdef NO_UNSETENV - COMPAT_CFLAGS += -DNO_UNSETENV - COMPAT_OBJS += compat/unsetenv.o -endif -ifdef NO_SYS_SELECT_H - BASIC_CFLAGS += -DNO_SYS_SELECT_H -endif -ifdef NO_MMAP - COMPAT_CFLAGS += -DNO_MMAP - COMPAT_OBJS += compat/mmap.o -else - ifdef USE_WIN32_MMAP - COMPAT_CFLAGS += -DUSE_WIN32_MMAP - COMPAT_OBJS += compat/win32mmap.o - endif -endif -ifdef NO_PREAD - COMPAT_CFLAGS += -DNO_PREAD - COMPAT_OBJS += compat/pread.o -endif -ifdef NO_FAST_WORKING_DIRECTORY - BASIC_CFLAGS += -DNO_FAST_WORKING_DIRECTORY -endif -ifdef NO_TRUSTABLE_FILEMODE - BASIC_CFLAGS += -DNO_TRUSTABLE_FILEMODE -endif -ifdef NO_IPV6 - BASIC_CFLAGS += -DNO_IPV6 -endif -ifdef NO_UINTMAX_T - BASIC_CFLAGS += -Duintmax_t=uint32_t -endif -ifdef NO_SOCKADDR_STORAGE -ifdef NO_IPV6 - BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in -else - BASIC_CFLAGS += -Dsockaddr_storage=sockaddr_in6 -endif -endif -ifdef NO_INET_NTOP - LIB_OBJS += compat/inet_ntop.o -endif -ifdef NO_INET_PTON - LIB_OBJS += compat/inet_pton.o -endif - -ifdef NO_ICONV - BASIC_CFLAGS += -DNO_ICONV -endif - -ifdef OLD_ICONV - BASIC_CFLAGS += -DOLD_ICONV -endif - -ifdef NO_DEFLATE_BOUND - BASIC_CFLAGS += -DNO_DEFLATE_BOUND -endif - -ifdef PPC_SHA1 - SHA1_HEADER = "ppc/sha1.h" - LIB_OBJS += ppc/sha1.o ppc/sha1ppc.o -else -ifdef ARM_SHA1 - SHA1_HEADER = "arm/sha1.h" - LIB_OBJS += arm/sha1.o arm/sha1_arm.o -else -ifdef MOZILLA_SHA1 - SHA1_HEADER = "mozilla-sha1/sha1.h" - LIB_OBJS += mozilla-sha1/sha1.o -else - SHA1_HEADER = - EXTLIBS += $(LIB_4_CRYPTO) -endif -endif -endif -ifdef NO_PERL_MAKEMAKER - export NO_PERL_MAKEMAKER -endif -ifdef NO_HSTRERROR - COMPAT_CFLAGS += -DNO_HSTRERROR - COMPAT_OBJS += compat/hstrerror.o -endif -ifdef NO_MEMMEM - COMPAT_CFLAGS += -DNO_MEMMEM - COMPAT_OBJS += compat/memmem.o -endif -ifdef INTERNAL_QSORT - COMPAT_CFLAGS += -DINTERNAL_QSORT - COMPAT_OBJS += compat/qsort.o -endif -ifdef RUNTIME_PREFIX - COMPAT_CFLAGS += -DRUNTIME_PREFIX -endif - -ifdef DIR_HAS_BSD_GROUP_SEMANTICS - COMPAT_CFLAGS += -DDIR_HAS_BSD_GROUP_SEMANTICS -endif -ifdef NO_EXTERNAL_GREP - BASIC_CFLAGS += -DNO_EXTERNAL_GREP -endif - -ifeq ($(PERL_PATH),) -NO_PERL=NoThanks -endif - -QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir -QUIET_SUBDIR1 = - -ifneq ($(findstring $(MAKEFLAGS),w),w) -PRINT_DIR = --no-print-directory -else # "make -w" -NO_SUBDIR = : -endif - -ifneq ($(findstring $(MAKEFLAGS),s),s) -ifndef V - QUIET_CC = @echo ' ' CC $@; - QUIET_AR = @echo ' ' AR $@; - QUIET_LINK = @echo ' ' LINK $@; - QUIET_BUILT_IN = @echo ' ' BUILTIN $@; - QUIET_GEN = @echo ' ' GEN $@; - QUIET_SUBDIR0 = +@subdir= - QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \ - $(MAKE) $(PRINT_DIR) -C $$subdir - export V - export QUIET_GEN - export QUIET_BUILT_IN -endif -endif - -ifdef ASCIIDOC8 - export ASCIIDOC8 -endif - -# Shell quote (do not use $(call) to accommodate ancient setups); - -SHA1_HEADER_SQ = $(subst ','\'',$(SHA1_HEADER)) -ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG)) - -DESTDIR_SQ = $(subst ','\'',$(DESTDIR)) -bindir_SQ = $(subst ','\'',$(bindir)) -bindir_relative_SQ = $(subst ','\'',$(bindir_relative)) -mandir_SQ = $(subst ','\'',$(mandir)) -infodir_SQ = $(subst ','\'',$(infodir)) -perfexecdir_SQ = $(subst ','\'',$(perfexecdir)) -template_dir_SQ = $(subst ','\'',$(template_dir)) -htmldir_SQ = $(subst ','\'',$(htmldir)) -prefix_SQ = $(subst ','\'',$(prefix)) - -SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH)) -PERL_PATH_SQ = $(subst ','\'',$(PERL_PATH)) - -LIBS = $(PERFLIBS) $(EXTLIBS) - -BASIC_CFLAGS += -DSHA1_HEADER='$(SHA1_HEADER_SQ)' \ - $(COMPAT_CFLAGS) -LIB_OBJS += $(COMPAT_OBJS) - -ALL_CFLAGS += $(BASIC_CFLAGS) -ALL_LDFLAGS += $(BASIC_LDFLAGS) - -export TAR INSTALL DESTDIR SHELL_PATH - - -### Build rules - -SHELL = $(SHELL_PATH) - -all:: shell_compatibility_test $(ALL_PROGRAMS) $(BUILT_INS) $(OTHER_PROGRAMS) PERF-BUILD-OPTIONS -ifneq (,$X) - $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), test '$p' -ef '$p$X' || $(RM) '$p';) -endif - -all:: - -please_set_SHELL_PATH_to_a_more_modern_shell: - @$$(:) - -shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell - -strip: $(PROGRAMS) perf$X - $(STRIP) $(STRIP_OPTS) $(PROGRAMS) perf$X - -perf.o: perf.c common-cmds.h PERF-CFLAGS - $(QUIET_CC)$(CC) -DPERF_VERSION='"$(PERF_VERSION)"' \ - '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ - $(ALL_CFLAGS) -c $(filter %.c,$^) - -perf$X: perf.o $(BUILTIN_OBJS) $(PERFLIBS) - $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ perf.o \ - $(BUILTIN_OBJS) $(ALL_LDFLAGS) $(LIBS) - -builtin-help.o: builtin-help.c common-cmds.h PERF-CFLAGS - $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \ - '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ - '-DPERF_MAN_PATH="$(mandir_SQ)"' \ - '-DPERF_INFO_PATH="$(infodir_SQ)"' $< - -$(BUILT_INS): perf$X - $(QUIET_BUILT_IN)$(RM) $@ && \ - ln perf$X $@ 2>/dev/null || \ - ln -s perf$X $@ 2>/dev/null || \ - cp perf$X $@ - -common-cmds.h: util/generate-cmdlist.sh command-list.txt - -common-cmds.h: $(wildcard Documentation/perf-*.txt) - $(QUIET_GEN)util/generate-cmdlist.sh > $@+ && mv $@+ $@ - -$(patsubst %.sh,%,$(SCRIPT_SH)) : % : %.sh - $(QUIET_GEN)$(RM) $@ $@+ && \ - sed -e '1s|#!.*/sh|#!$(SHELL_PATH_SQ)|' \ - -e 's|@SHELL_PATH@|$(SHELL_PATH_SQ)|' \ - -e 's|@@PERL@@|$(PERL_PATH_SQ)|g' \ - -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \ - -e 's/@@NO_CURL@@/$(NO_CURL)/g' \ - $@.sh >$@+ && \ - chmod +x $@+ && \ - mv $@+ $@ - -configure: configure.ac - $(QUIET_GEN)$(RM) $@ $<+ && \ - sed -e 's/@@PERF_VERSION@@/$(PERF_VERSION)/g' \ - $< > $<+ && \ - autoconf -o $@ $<+ && \ - $(RM) $<+ - -# These can record PERF_VERSION -perf.o perf.spec \ - $(patsubst %.sh,%,$(SCRIPT_SH)) \ - $(patsubst %.perl,%,$(SCRIPT_PERL)) \ - : PERF-VERSION-FILE - -%.o: %.c PERF-CFLAGS - $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $< -%.s: %.c PERF-CFLAGS - $(QUIET_CC)$(CC) -S $(ALL_CFLAGS) $< -%.o: %.S - $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) $< - -util/exec_cmd.o: util/exec_cmd.c PERF-CFLAGS - $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \ - '-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \ - '-DBINDIR="$(bindir_relative_SQ)"' \ - '-DPREFIX="$(prefix_SQ)"' \ - $< - -builtin-init-db.o: builtin-init-db.c PERF-CFLAGS - $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DDEFAULT_PERF_TEMPLATE_DIR='"$(template_dir_SQ)"' $< - -util/config.o: util/config.c PERF-CFLAGS - $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< - -perf-%$X: %.o $(PERFLIBS) - $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS) - -$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H) -$(patsubst perf-%$X,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h) -builtin-revert.o wt-status.o: wt-status.h - -$(LIB_FILE): $(LIB_OBJS) - $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS) - -doc: - $(MAKE) -C Documentation all - -man: - $(MAKE) -C Documentation man - -html: - $(MAKE) -C Documentation html - -info: - $(MAKE) -C Documentation info - -pdf: - $(MAKE) -C Documentation pdf - -TAGS: - $(RM) TAGS - $(FIND) . -name '*.[hcS]' -print | xargs etags -a - -tags: - $(RM) tags - $(FIND) . -name '*.[hcS]' -print | xargs ctags -a - -cscope: - $(RM) cscope* - $(FIND) . -name '*.[hcS]' -print | xargs cscope -b - -### Detect prefix changes -TRACK_CFLAGS = $(subst ','\'',$(ALL_CFLAGS)):\ - $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ) - -PERF-CFLAGS: .FORCE-PERF-CFLAGS - @FLAGS='$(TRACK_CFLAGS)'; \ - if test x"$$FLAGS" != x"`cat PERF-CFLAGS 2>/dev/null`" ; then \ - echo 1>&2 " * new build flags or prefix"; \ - echo "$$FLAGS" >PERF-CFLAGS; \ - fi - -# We need to apply sq twice, once to protect from the shell -# that runs PERF-BUILD-OPTIONS, and then again to protect it -# and the first level quoting from the shell that runs "echo". -PERF-BUILD-OPTIONS: .FORCE-PERF-BUILD-OPTIONS - @echo SHELL_PATH=\''$(subst ','\'',$(SHELL_PATH_SQ))'\' >$@ - @echo TAR=\''$(subst ','\'',$(subst ','\'',$(TAR)))'\' >>$@ - @echo NO_CURL=\''$(subst ','\'',$(subst ','\'',$(NO_CURL)))'\' >>$@ - @echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@ - -### Testing rules - -# -# None right now: -# -# TEST_PROGRAMS += test-something$X - -all:: $(TEST_PROGRAMS) - -# GNU make supports exporting all variables by "export" without parameters. -# However, the environment gets quite big, and some programs have problems -# with that. - -export NO_SVN_TESTS - -check: common-cmds.h - if sparse; \ - then \ - for i in *.c */*.c; \ - do \ - sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; \ - done; \ - else \ - echo 2>&1 "Did you mean 'make test'?"; \ - exit 1; \ - fi - -remove-dashes: - ./fixup-builtins $(BUILT_INS) $(PROGRAMS) $(SCRIPTS) - -### Installation rules - -ifneq ($(filter /%,$(firstword $(template_dir))),) -template_instdir = $(template_dir) -else -template_instdir = $(prefix)/$(template_dir) -endif -export template_instdir - -ifneq ($(filter /%,$(firstword $(perfexecdir))),) -perfexec_instdir = $(perfexecdir) -else -perfexec_instdir = $(prefix)/$(perfexecdir) -endif -perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir)) -export perfexec_instdir - -install: all - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)' - $(INSTALL) perf$X '$(DESTDIR_SQ)$(bindir_SQ)' -ifdef BUILT_INS - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' - $(INSTALL) $(BUILT_INS) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' -ifneq (,$X) - $(foreach p,$(patsubst %$X,%,$(filter %$X,$(ALL_PROGRAMS) $(BUILT_INS) perf$X)), $(RM) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$p';) -endif -endif - -install-doc: - $(MAKE) -C Documentation install - -install-man: - $(MAKE) -C Documentation install-man - -install-html: - $(MAKE) -C Documentation install-html - -install-info: - $(MAKE) -C Documentation install-info - -install-pdf: - $(MAKE) -C Documentation install-pdf - -quick-install-doc: - $(MAKE) -C Documentation quick-install - -quick-install-man: - $(MAKE) -C Documentation quick-install-man - -quick-install-html: - $(MAKE) -C Documentation quick-install-html - - -### Maintainer's dist rules -# -# None right now -# -# -# perf.spec: perf.spec.in -# sed -e 's/@@VERSION@@/$(PERF_VERSION)/g' < $< > $@+ -# mv $@+ $@ -# -# PERF_TARNAME=perf-$(PERF_VERSION) -# dist: perf.spec perf-archive$(X) configure -# ./perf-archive --format=tar \ -# --prefix=$(PERF_TARNAME)/ HEAD^{tree} > $(PERF_TARNAME).tar -# @mkdir -p $(PERF_TARNAME) -# @cp perf.spec configure $(PERF_TARNAME) -# @echo $(PERF_VERSION) > $(PERF_TARNAME)/version -# $(TAR) rf $(PERF_TARNAME).tar \ -# $(PERF_TARNAME)/perf.spec \ -# $(PERF_TARNAME)/configure \ -# $(PERF_TARNAME)/version -# @$(RM) -r $(PERF_TARNAME) -# gzip -f -9 $(PERF_TARNAME).tar -# -# htmldocs = perf-htmldocs-$(PERF_VERSION) -# manpages = perf-manpages-$(PERF_VERSION) -# dist-doc: -# $(RM) -r .doc-tmp-dir -# mkdir .doc-tmp-dir -# $(MAKE) -C Documentation WEBDOC_DEST=../.doc-tmp-dir install-webdoc -# cd .doc-tmp-dir && $(TAR) cf ../$(htmldocs).tar . -# gzip -n -9 -f $(htmldocs).tar -# : -# $(RM) -r .doc-tmp-dir -# mkdir -p .doc-tmp-dir/man1 .doc-tmp-dir/man5 .doc-tmp-dir/man7 -# $(MAKE) -C Documentation DESTDIR=./ \ -# man1dir=../.doc-tmp-dir/man1 \ -# man5dir=../.doc-tmp-dir/man5 \ -# man7dir=../.doc-tmp-dir/man7 \ -# install -# cd .doc-tmp-dir && $(TAR) cf ../$(manpages).tar . -# gzip -n -9 -f $(manpages).tar -# $(RM) -r .doc-tmp-dir -# -# rpm: dist -# $(RPMBUILD) -ta $(PERF_TARNAME).tar.gz - -### Cleaning rules - -distclean: clean -# $(RM) configure - -clean: - $(RM) *.o */*.o $(LIB_FILE) - $(RM) $(ALL_PROGRAMS) $(BUILT_INS) perf$X - $(RM) $(TEST_PROGRAMS) - $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo common-cmds.h TAGS tags cscope* - $(RM) -r autom4te.cache - $(RM) config.log config.mak.autogen config.mak.append config.status config.cache - $(RM) -r $(PERF_TARNAME) .doc-tmp-dir - $(RM) $(PERF_TARNAME).tar.gz perf-core_$(PERF_VERSION)-*.tar.gz - $(RM) $(htmldocs).tar.gz $(manpages).tar.gz - $(MAKE) -C Documentation/ clean - $(RM) PERF-VERSION-FILE PERF-CFLAGS PERF-BUILD-OPTIONS - -.PHONY: all install clean strip -.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell -.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS -.PHONY: .FORCE-PERF-BUILD-OPTIONS - -### Make sure built-ins do not have dups and listed in perf.c -# -check-builtins:: - ./check-builtins.sh - -### Test suite coverage testing -# -# None right now -# -# .PHONY: coverage coverage-clean coverage-build coverage-report -# -# coverage: -# $(MAKE) coverage-build -# $(MAKE) coverage-report -# -# coverage-clean: -# rm -f *.gcda *.gcno -# -# COVERAGE_CFLAGS = $(CFLAGS) -O0 -ftest-coverage -fprofile-arcs -# COVERAGE_LDFLAGS = $(CFLAGS) -O0 -lgcov -# -# coverage-build: coverage-clean -# $(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" all -# $(MAKE) CFLAGS="$(COVERAGE_CFLAGS)" LDFLAGS="$(COVERAGE_LDFLAGS)" \ -# -j1 test -# -# coverage-report: -# gcov -b *.c */*.c -# grep '^function.*called 0 ' *.c.gcov */*.c.gcov \ -# | sed -e 's/\([^:]*\)\.gcov: *function \([^ ]*\) called.*/\1: \2/' \ -# | tee coverage-untested-functions diff --git a/trunk/tools/perf/builtin-annotate.c b/trunk/tools/perf/builtin-annotate.c deleted file mode 100644 index b1ed5f766cb3..000000000000 --- a/trunk/tools/perf/builtin-annotate.c +++ /dev/null @@ -1,1356 +0,0 @@ -/* - * builtin-annotate.c - * - * Builtin annotate command: Analyze the perf.data input file, - * look up and read DSOs and symbol information and display - * a histogram of results, along various sorting keys. - */ -#include "builtin.h" - -#include "util/util.h" - -#include "util/color.h" -#include "util/list.h" -#include "util/cache.h" -#include "util/rbtree.h" -#include "util/symbol.h" -#include "util/string.h" - -#include "perf.h" - -#include "util/parse-options.h" -#include "util/parse-events.h" - -#define SHOW_KERNEL 1 -#define SHOW_USER 2 -#define SHOW_HV 4 - -static char const *input_name = "perf.data"; -static char *vmlinux = "vmlinux"; - -static char default_sort_order[] = "comm,symbol"; -static char *sort_order = default_sort_order; - -static int input; -static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; - -static int dump_trace = 0; -#define dprintf(x...) do { if (dump_trace) printf(x); } while (0) - -static int verbose; - -static unsigned long page_size; -static unsigned long mmap_window = 32; - -struct ip_event { - struct perf_event_header header; - __u64 ip; - __u32 pid, tid; -}; - -struct mmap_event { - struct perf_event_header header; - __u32 pid, tid; - __u64 start; - __u64 len; - __u64 pgoff; - char filename[PATH_MAX]; -}; - -struct comm_event { - struct perf_event_header header; - __u32 pid, tid; - char comm[16]; -}; - -struct fork_event { - struct perf_event_header header; - __u32 pid, ppid; -}; - -struct period_event { - struct perf_event_header header; - __u64 time; - __u64 id; - __u64 sample_period; -}; - -typedef union event_union { - struct perf_event_header header; - struct ip_event ip; - struct mmap_event mmap; - struct comm_event comm; - struct fork_event fork; - struct period_event period; -} event_t; - -static LIST_HEAD(dsos); -static struct dso *kernel_dso; -static struct dso *vdso; - - -static void dsos__add(struct dso *dso) -{ - list_add_tail(&dso->node, &dsos); -} - -static struct dso *dsos__find(const char *name) -{ - struct dso *pos; - - list_for_each_entry(pos, &dsos, node) - if (strcmp(pos->name, name) == 0) - return pos; - return NULL; -} - -static struct dso *dsos__findnew(const char *name) -{ - struct dso *dso = dsos__find(name); - int nr; - - if (dso) - return dso; - - dso = dso__new(name, 0); - if (!dso) - goto out_delete_dso; - - nr = dso__load(dso, NULL, verbose); - if (nr < 0) { - if (verbose) - fprintf(stderr, "Failed to open: %s\n", name); - goto out_delete_dso; - } - if (!nr && verbose) { - fprintf(stderr, - "No symbols found in: %s, maybe install a debug package?\n", - name); - } - - dsos__add(dso); - - return dso; - -out_delete_dso: - dso__delete(dso); - return NULL; -} - -static void dsos__fprintf(FILE *fp) -{ - struct dso *pos; - - list_for_each_entry(pos, &dsos, node) - dso__fprintf(pos, fp); -} - -static struct symbol *vdso__find_symbol(struct dso *dso, __u64 ip) -{ - return dso__find_symbol(kernel_dso, ip); -} - -static int load_kernel(void) -{ - int err; - - kernel_dso = dso__new("[kernel]", 0); - if (!kernel_dso) - return -1; - - err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose); - if (err) { - dso__delete(kernel_dso); - kernel_dso = NULL; - } else - dsos__add(kernel_dso); - - vdso = dso__new("[vdso]", 0); - if (!vdso) - return -1; - - vdso->find_symbol = vdso__find_symbol; - - dsos__add(vdso); - - return err; -} - -struct map { - struct list_head node; - __u64 start; - __u64 end; - __u64 pgoff; - __u64 (*map_ip)(struct map *, __u64); - struct dso *dso; -}; - -static __u64 map__map_ip(struct map *map, __u64 ip) -{ - return ip - map->start + map->pgoff; -} - -static __u64 vdso__map_ip(struct map *map, __u64 ip) -{ - return ip; -} - -static struct map *map__new(struct mmap_event *event) -{ - struct map *self = malloc(sizeof(*self)); - - if (self != NULL) { - const char *filename = event->filename; - - self->start = event->start; - self->end = event->start + event->len; - self->pgoff = event->pgoff; - - self->dso = dsos__findnew(filename); - if (self->dso == NULL) - goto out_delete; - - if (self->dso == vdso) - self->map_ip = vdso__map_ip; - else - self->map_ip = map__map_ip; - } - return self; -out_delete: - free(self); - return NULL; -} - -static struct map *map__clone(struct map *self) -{ - struct map *map = malloc(sizeof(*self)); - - if (!map) - return NULL; - - memcpy(map, self, sizeof(*self)); - - return map; -} - -static int map__overlap(struct map *l, struct map *r) -{ - if (l->start > r->start) { - struct map *t = l; - l = r; - r = t; - } - - if (l->end > r->start) - return 1; - - return 0; -} - -static size_t map__fprintf(struct map *self, FILE *fp) -{ - return fprintf(fp, " %Lx-%Lx %Lx %s\n", - self->start, self->end, self->pgoff, self->dso->name); -} - - -struct thread { - struct rb_node rb_node; - struct list_head maps; - pid_t pid; - char *comm; -}; - -static struct thread *thread__new(pid_t pid) -{ - struct thread *self = malloc(sizeof(*self)); - - if (self != NULL) { - self->pid = pid; - self->comm = malloc(32); - if (self->comm) - snprintf(self->comm, 32, ":%d", self->pid); - INIT_LIST_HEAD(&self->maps); - } - - return self; -} - -static int thread__set_comm(struct thread *self, const char *comm) -{ - if (self->comm) - free(self->comm); - self->comm = strdup(comm); - return self->comm ? 0 : -ENOMEM; -} - -static size_t thread__fprintf(struct thread *self, FILE *fp) -{ - struct map *pos; - size_t ret = fprintf(fp, "Thread %d %s\n", self->pid, self->comm); - - list_for_each_entry(pos, &self->maps, node) - ret += map__fprintf(pos, fp); - - return ret; -} - - -static struct rb_root threads; -static struct thread *last_match; - -static struct thread *threads__findnew(pid_t pid) -{ - struct rb_node **p = &threads.rb_node; - struct rb_node *parent = NULL; - struct thread *th; - - /* - * Font-end cache - PID lookups come in blocks, - * so most of the time we dont have to look up - * the full rbtree: - */ - if (last_match && last_match->pid == pid) - return last_match; - - while (*p != NULL) { - parent = *p; - th = rb_entry(parent, struct thread, rb_node); - - if (th->pid == pid) { - last_match = th; - return th; - } - - if (pid < th->pid) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - th = thread__new(pid); - if (th != NULL) { - rb_link_node(&th->rb_node, parent, p); - rb_insert_color(&th->rb_node, &threads); - last_match = th; - } - - return th; -} - -static void thread__insert_map(struct thread *self, struct map *map) -{ - struct map *pos, *tmp; - - list_for_each_entry_safe(pos, tmp, &self->maps, node) { - if (map__overlap(pos, map)) { - list_del_init(&pos->node); - /* XXX leaks dsos */ - free(pos); - } - } - - list_add_tail(&map->node, &self->maps); -} - -static int thread__fork(struct thread *self, struct thread *parent) -{ - struct map *map; - - if (self->comm) - free(self->comm); - self->comm = strdup(parent->comm); - if (!self->comm) - return -ENOMEM; - - list_for_each_entry(map, &parent->maps, node) { - struct map *new = map__clone(map); - if (!new) - return -ENOMEM; - thread__insert_map(self, new); - } - - return 0; -} - -static struct map *thread__find_map(struct thread *self, __u64 ip) -{ - struct map *pos; - - if (self == NULL) - return NULL; - - list_for_each_entry(pos, &self->maps, node) - if (ip >= pos->start && ip <= pos->end) - return pos; - - return NULL; -} - -static size_t threads__fprintf(FILE *fp) -{ - size_t ret = 0; - struct rb_node *nd; - - for (nd = rb_first(&threads); nd; nd = rb_next(nd)) { - struct thread *pos = rb_entry(nd, struct thread, rb_node); - - ret += thread__fprintf(pos, fp); - } - - return ret; -} - -/* - * histogram, sorted on item, collects counts - */ - -static struct rb_root hist; - -struct hist_entry { - struct rb_node rb_node; - - struct thread *thread; - struct map *map; - struct dso *dso; - struct symbol *sym; - __u64 ip; - char level; - - uint32_t count; -}; - -/* - * configurable sorting bits - */ - -struct sort_entry { - struct list_head list; - - char *header; - - int64_t (*cmp)(struct hist_entry *, struct hist_entry *); - int64_t (*collapse)(struct hist_entry *, struct hist_entry *); - size_t (*print)(FILE *fp, struct hist_entry *); -}; - -/* --sort pid */ - -static int64_t -sort__thread_cmp(struct hist_entry *left, struct hist_entry *right) -{ - return right->thread->pid - left->thread->pid; -} - -static size_t -sort__thread_print(FILE *fp, struct hist_entry *self) -{ - return fprintf(fp, "%16s:%5d", self->thread->comm ?: "", self->thread->pid); -} - -static struct sort_entry sort_thread = { - .header = " Command: Pid", - .cmp = sort__thread_cmp, - .print = sort__thread_print, -}; - -/* --sort comm */ - -static int64_t -sort__comm_cmp(struct hist_entry *left, struct hist_entry *right) -{ - return right->thread->pid - left->thread->pid; -} - -static int64_t -sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) -{ - char *comm_l = left->thread->comm; - char *comm_r = right->thread->comm; - - if (!comm_l || !comm_r) { - if (!comm_l && !comm_r) - return 0; - else if (!comm_l) - return -1; - else - return 1; - } - - return strcmp(comm_l, comm_r); -} - -static size_t -sort__comm_print(FILE *fp, struct hist_entry *self) -{ - return fprintf(fp, "%16s", self->thread->comm); -} - -static struct sort_entry sort_comm = { - .header = " Command", - .cmp = sort__comm_cmp, - .collapse = sort__comm_collapse, - .print = sort__comm_print, -}; - -/* --sort dso */ - -static int64_t -sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) -{ - struct dso *dso_l = left->dso; - struct dso *dso_r = right->dso; - - if (!dso_l || !dso_r) { - if (!dso_l && !dso_r) - return 0; - else if (!dso_l) - return -1; - else - return 1; - } - - return strcmp(dso_l->name, dso_r->name); -} - -static size_t -sort__dso_print(FILE *fp, struct hist_entry *self) -{ - if (self->dso) - return fprintf(fp, "%-25s", self->dso->name); - - return fprintf(fp, "%016llx ", (__u64)self->ip); -} - -static struct sort_entry sort_dso = { - .header = "Shared Object ", - .cmp = sort__dso_cmp, - .print = sort__dso_print, -}; - -/* --sort symbol */ - -static int64_t -sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) -{ - __u64 ip_l, ip_r; - - if (left->sym == right->sym) - return 0; - - ip_l = left->sym ? left->sym->start : left->ip; - ip_r = right->sym ? right->sym->start : right->ip; - - return (int64_t)(ip_r - ip_l); -} - -static size_t -sort__sym_print(FILE *fp, struct hist_entry *self) -{ - size_t ret = 0; - - if (verbose) - ret += fprintf(fp, "%#018llx ", (__u64)self->ip); - - if (self->sym) { - ret += fprintf(fp, "[%c] %s", - self->dso == kernel_dso ? 'k' : '.', self->sym->name); - } else { - ret += fprintf(fp, "%#016llx", (__u64)self->ip); - } - - return ret; -} - -static struct sort_entry sort_sym = { - .header = "Symbol", - .cmp = sort__sym_cmp, - .print = sort__sym_print, -}; - -static int sort__need_collapse = 0; - -struct sort_dimension { - char *name; - struct sort_entry *entry; - int taken; -}; - -static struct sort_dimension sort_dimensions[] = { - { .name = "pid", .entry = &sort_thread, }, - { .name = "comm", .entry = &sort_comm, }, - { .name = "dso", .entry = &sort_dso, }, - { .name = "symbol", .entry = &sort_sym, }, -}; - -static LIST_HEAD(hist_entry__sort_list); - -static int sort_dimension__add(char *tok) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) { - struct sort_dimension *sd = &sort_dimensions[i]; - - if (sd->taken) - continue; - - if (strncasecmp(tok, sd->name, strlen(tok))) - continue; - - if (sd->entry->collapse) - sort__need_collapse = 1; - - list_add_tail(&sd->entry->list, &hist_entry__sort_list); - sd->taken = 1; - - return 0; - } - - return -ESRCH; -} - -static int64_t -hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) -{ - struct sort_entry *se; - int64_t cmp = 0; - - list_for_each_entry(se, &hist_entry__sort_list, list) { - cmp = se->cmp(left, right); - if (cmp) - break; - } - - return cmp; -} - -static int64_t -hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) -{ - struct sort_entry *se; - int64_t cmp = 0; - - list_for_each_entry(se, &hist_entry__sort_list, list) { - int64_t (*f)(struct hist_entry *, struct hist_entry *); - - f = se->collapse ?: se->cmp; - - cmp = f(left, right); - if (cmp) - break; - } - - return cmp; -} - -/* - * collect histogram counts - */ -static void hist_hit(struct hist_entry *he, __u64 ip) -{ - unsigned int sym_size, offset; - struct symbol *sym = he->sym; - - he->count++; - - if (!sym || !sym->hist) - return; - - sym_size = sym->end - sym->start; - offset = ip - sym->start; - - if (offset >= sym_size) - return; - - sym->hist_sum++; - sym->hist[offset]++; - - if (verbose >= 3) - printf("%p %s: count++ [ip: %p, %08Lx] => %Ld\n", - (void *)(unsigned long)he->sym->start, - he->sym->name, - (void *)(unsigned long)ip, ip - he->sym->start, - sym->hist[offset]); -} - -static int -hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, - struct symbol *sym, __u64 ip, char level) -{ - struct rb_node **p = &hist.rb_node; - struct rb_node *parent = NULL; - struct hist_entry *he; - struct hist_entry entry = { - .thread = thread, - .map = map, - .dso = dso, - .sym = sym, - .ip = ip, - .level = level, - .count = 1, - }; - int cmp; - - while (*p != NULL) { - parent = *p; - he = rb_entry(parent, struct hist_entry, rb_node); - - cmp = hist_entry__cmp(&entry, he); - - if (!cmp) { - hist_hit(he, ip); - - return 0; - } - - if (cmp < 0) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - he = malloc(sizeof(*he)); - if (!he) - return -ENOMEM; - *he = entry; - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, &hist); - - return 0; -} - -static void hist_entry__free(struct hist_entry *he) -{ - free(he); -} - -/* - * collapse the histogram - */ - -static struct rb_root collapse_hists; - -static void collapse__insert_entry(struct hist_entry *he) -{ - struct rb_node **p = &collapse_hists.rb_node; - struct rb_node *parent = NULL; - struct hist_entry *iter; - int64_t cmp; - - while (*p != NULL) { - parent = *p; - iter = rb_entry(parent, struct hist_entry, rb_node); - - cmp = hist_entry__collapse(iter, he); - - if (!cmp) { - iter->count += he->count; - hist_entry__free(he); - return; - } - - if (cmp < 0) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, &collapse_hists); -} - -static void collapse__resort(void) -{ - struct rb_node *next; - struct hist_entry *n; - - if (!sort__need_collapse) - return; - - next = rb_first(&hist); - while (next) { - n = rb_entry(next, struct hist_entry, rb_node); - next = rb_next(&n->rb_node); - - rb_erase(&n->rb_node, &hist); - collapse__insert_entry(n); - } -} - -/* - * reverse the map, sort on count. - */ - -static struct rb_root output_hists; - -static void output__insert_entry(struct hist_entry *he) -{ - struct rb_node **p = &output_hists.rb_node; - struct rb_node *parent = NULL; - struct hist_entry *iter; - - while (*p != NULL) { - parent = *p; - iter = rb_entry(parent, struct hist_entry, rb_node); - - if (he->count > iter->count) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, &output_hists); -} - -static void output__resort(void) -{ - struct rb_node *next; - struct hist_entry *n; - struct rb_root *tree = &hist; - - if (sort__need_collapse) - tree = &collapse_hists; - - next = rb_first(tree); - - while (next) { - n = rb_entry(next, struct hist_entry, rb_node); - next = rb_next(&n->rb_node); - - rb_erase(&n->rb_node, tree); - output__insert_entry(n); - } -} - -static void register_idle_thread(void) -{ - struct thread *thread = threads__findnew(0); - - if (thread == NULL || - thread__set_comm(thread, "[idle]")) { - fprintf(stderr, "problem inserting idle task.\n"); - exit(-1); - } -} - -static unsigned long total = 0, - total_mmap = 0, - total_comm = 0, - total_fork = 0, - total_unknown = 0; - -static int -process_overflow_event(event_t *event, unsigned long offset, unsigned long head) -{ - char level; - int show = 0; - struct dso *dso = NULL; - struct thread *thread = threads__findnew(event->ip.pid); - __u64 ip = event->ip.ip; - struct map *map = NULL; - - dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->header.misc, - event->ip.pid, - (void *)(long)ip); - - dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid); - - if (thread == NULL) { - fprintf(stderr, "problem processing %d event, skipping it.\n", - event->header.type); - return -1; - } - - if (event->header.misc & PERF_EVENT_MISC_KERNEL) { - show = SHOW_KERNEL; - level = 'k'; - - dso = kernel_dso; - - dprintf(" ...... dso: %s\n", dso->name); - - } else if (event->header.misc & PERF_EVENT_MISC_USER) { - - show = SHOW_USER; - level = '.'; - - map = thread__find_map(thread, ip); - if (map != NULL) { - ip = map->map_ip(map, ip); - dso = map->dso; - } else { - /* - * If this is outside of all known maps, - * and is a negative address, try to look it - * up in the kernel dso, as it might be a - * vsyscall (which executes in user-mode): - */ - if ((long long)ip < 0) - dso = kernel_dso; - } - dprintf(" ...... dso: %s\n", dso ? dso->name : ""); - - } else { - show = SHOW_HV; - level = 'H'; - dprintf(" ...... dso: [hypervisor]\n"); - } - - if (show & show_mask) { - struct symbol *sym = NULL; - - if (dso) - sym = dso->find_symbol(dso, ip); - - if (hist_entry__add(thread, map, dso, sym, ip, level)) { - fprintf(stderr, - "problem incrementing symbol count, skipping event\n"); - return -1; - } - } - total++; - - return 0; -} - -static int -process_mmap_event(event_t *event, unsigned long offset, unsigned long head) -{ - struct thread *thread = threads__findnew(event->mmap.pid); - struct map *map = map__new(&event->mmap); - - dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->mmap.pid, - (void *)(long)event->mmap.start, - (void *)(long)event->mmap.len, - (void *)(long)event->mmap.pgoff, - event->mmap.filename); - - if (thread == NULL || map == NULL) { - dprintf("problem processing PERF_EVENT_MMAP, skipping event.\n"); - return 0; - } - - thread__insert_map(thread, map); - total_mmap++; - - return 0; -} - -static int -process_comm_event(event_t *event, unsigned long offset, unsigned long head) -{ - struct thread *thread = threads__findnew(event->comm.pid); - - dprintf("%p [%p]: PERF_EVENT_COMM: %s:%d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->comm.comm, event->comm.pid); - - if (thread == NULL || - thread__set_comm(thread, event->comm.comm)) { - dprintf("problem processing PERF_EVENT_COMM, skipping event.\n"); - return -1; - } - total_comm++; - - return 0; -} - -static int -process_fork_event(event_t *event, unsigned long offset, unsigned long head) -{ - struct thread *thread = threads__findnew(event->fork.pid); - struct thread *parent = threads__findnew(event->fork.ppid); - - dprintf("%p [%p]: PERF_EVENT_FORK: %d:%d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->fork.pid, event->fork.ppid); - - if (!thread || !parent || thread__fork(thread, parent)) { - dprintf("problem processing PERF_EVENT_FORK, skipping event.\n"); - return -1; - } - total_fork++; - - return 0; -} - -static int -process_period_event(event_t *event, unsigned long offset, unsigned long head) -{ - dprintf("%p [%p]: PERF_EVENT_PERIOD: time:%Ld, id:%Ld: period:%Ld\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->period.time, - event->period.id, - event->period.sample_period); - - return 0; -} - -static int -process_event(event_t *event, unsigned long offset, unsigned long head) -{ - if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) - return process_overflow_event(event, offset, head); - - switch (event->header.type) { - case PERF_EVENT_MMAP: - return process_mmap_event(event, offset, head); - - case PERF_EVENT_COMM: - return process_comm_event(event, offset, head); - - case PERF_EVENT_FORK: - return process_fork_event(event, offset, head); - - case PERF_EVENT_PERIOD: - return process_period_event(event, offset, head); - /* - * We dont process them right now but they are fine: - */ - - case PERF_EVENT_THROTTLE: - case PERF_EVENT_UNTHROTTLE: - return 0; - - default: - return -1; - } - - return 0; -} - -static int -parse_line(FILE *file, struct symbol *sym, __u64 start, __u64 len) -{ - char *line = NULL, *tmp, *tmp2; - unsigned int offset; - size_t line_len; - __u64 line_ip; - int ret; - char *c; - - if (getline(&line, &line_len, file) < 0) - return -1; - if (!line) - return -1; - - c = strchr(line, '\n'); - if (c) - *c = 0; - - line_ip = -1; - offset = 0; - ret = -2; - - /* - * Strip leading spaces: - */ - tmp = line; - while (*tmp) { - if (*tmp != ' ') - break; - tmp++; - } - - if (*tmp) { - /* - * Parse hexa addresses followed by ':' - */ - line_ip = strtoull(tmp, &tmp2, 16); - if (*tmp2 != ':') - line_ip = -1; - } - - if (line_ip != -1) { - unsigned int hits = 0; - double percent = 0.0; - char *color = PERF_COLOR_NORMAL; - - offset = line_ip - start; - if (offset < len) - hits = sym->hist[offset]; - - if (sym->hist_sum) - percent = 100.0 * hits / sym->hist_sum; - - /* - * We color high-overhead entries in red, mid-overhead - * entries in green - and keep the low overhead places - * normal: - */ - if (percent >= 5.0) - color = PERF_COLOR_RED; - else { - if (percent > 0.5) - color = PERF_COLOR_GREEN; - } - - color_fprintf(stdout, color, " %7.2f", percent); - printf(" : "); - color_fprintf(stdout, PERF_COLOR_BLUE, "%s\n", line); - } else { - if (!*line) - printf(" :\n"); - else - printf(" : %s\n", line); - } - - return 0; -} - -static void annotate_sym(struct dso *dso, struct symbol *sym) -{ - char *filename = dso->name; - __u64 start, end, len; - char command[PATH_MAX*2]; - FILE *file; - - if (!filename) - return; - if (dso == kernel_dso) - filename = vmlinux; - - printf("\n------------------------------------------------\n"); - printf(" Percent | Source code & Disassembly of %s\n", filename); - printf("------------------------------------------------\n"); - - if (verbose >= 2) - printf("annotating [%p] %30s : [%p] %30s\n", dso, dso->name, sym, sym->name); - - start = sym->obj_start; - if (!start) - start = sym->start; - - end = start + sym->end - sym->start + 1; - len = sym->end - sym->start; - - sprintf(command, "objdump --start-address=0x%016Lx --stop-address=0x%016Lx -dS %s", (__u64)start, (__u64)end, filename); - - if (verbose >= 3) - printf("doing: %s\n", command); - - file = popen(command, "r"); - if (!file) - return; - - while (!feof(file)) { - if (parse_line(file, sym, start, len) < 0) - break; - } - - pclose(file); -} - -static void find_annotations(void) -{ - struct rb_node *nd; - struct dso *dso; - int count = 0; - - list_for_each_entry(dso, &dsos, node) { - - for (nd = rb_first(&dso->syms); nd; nd = rb_next(nd)) { - struct symbol *sym = rb_entry(nd, struct symbol, rb_node); - - if (sym->hist) { - annotate_sym(dso, sym); - count++; - } - } - } - - if (!count) - printf(" Error: symbol '%s' not present amongst the samples.\n", sym_hist_filter); -} - -static int __cmd_annotate(void) -{ - int ret, rc = EXIT_FAILURE; - unsigned long offset = 0; - unsigned long head = 0; - struct stat stat; - event_t *event; - uint32_t size; - char *buf; - - register_idle_thread(); - - input = open(input_name, O_RDONLY); - if (input < 0) { - perror("failed to open file"); - exit(-1); - } - - ret = fstat(input, &stat); - if (ret < 0) { - perror("failed to stat file"); - exit(-1); - } - - if (!stat.st_size) { - fprintf(stderr, "zero-sized file, nothing to do!\n"); - exit(0); - } - - if (load_kernel() < 0) { - perror("failed to load kernel symbols"); - return EXIT_FAILURE; - } - -remap: - buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, - MAP_SHARED, input, offset); - if (buf == MAP_FAILED) { - perror("failed to mmap file"); - exit(-1); - } - -more: - event = (event_t *)(buf + head); - - size = event->header.size; - if (!size) - size = 8; - - if (head + event->header.size >= page_size * mmap_window) { - unsigned long shift = page_size * (head / page_size); - int ret; - - ret = munmap(buf, page_size * mmap_window); - assert(ret == 0); - - offset += shift; - head -= shift; - goto remap; - } - - size = event->header.size; - - dprintf("%p [%p]: event: %d\n", - (void *)(offset + head), - (void *)(long)event->header.size, - event->header.type); - - if (!size || process_event(event, offset, head) < 0) { - - dprintf("%p [%p]: skipping unknown header type: %d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->header.type); - - total_unknown++; - - /* - * assume we lost track of the stream, check alignment, and - * increment a single u64 in the hope to catch on again 'soon'. - */ - - if (unlikely(head & 7)) - head &= ~7ULL; - - size = 8; - } - - head += size; - - if (offset + head < stat.st_size) - goto more; - - rc = EXIT_SUCCESS; - close(input); - - dprintf(" IP events: %10ld\n", total); - dprintf(" mmap events: %10ld\n", total_mmap); - dprintf(" comm events: %10ld\n", total_comm); - dprintf(" fork events: %10ld\n", total_fork); - dprintf(" unknown events: %10ld\n", total_unknown); - - if (dump_trace) - return 0; - - if (verbose >= 3) - threads__fprintf(stdout); - - if (verbose >= 2) - dsos__fprintf(stdout); - - collapse__resort(); - output__resort(); - - find_annotations(); - - return rc; -} - -static const char * const annotate_usage[] = { - "perf annotate [] ", - NULL -}; - -static const struct option options[] = { - OPT_STRING('i', "input", &input_name, "file", - "input file name"), - OPT_STRING('s', "symbol", &sym_hist_filter, "symbol", - "symbol to annotate"), - OPT_BOOLEAN('v', "verbose", &verbose, - "be more verbose (show symbol address, etc)"), - OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, - "dump raw trace in ASCII"), - OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), - OPT_END() -}; - -static void setup_sorting(void) -{ - char *tmp, *tok, *str = strdup(sort_order); - - for (tok = strtok_r(str, ", ", &tmp); - tok; tok = strtok_r(NULL, ", ", &tmp)) { - if (sort_dimension__add(tok) < 0) { - error("Unknown --sort key: `%s'", tok); - usage_with_options(annotate_usage, options); - } - } - - free(str); -} - -int cmd_annotate(int argc, const char **argv, const char *prefix) -{ - symbol__init(); - - page_size = getpagesize(); - - argc = parse_options(argc, argv, options, annotate_usage, 0); - - setup_sorting(); - - if (argc) { - /* - * Special case: if there's an argument left then assume tha - * it's a symbol filter: - */ - if (argc > 1) - usage_with_options(annotate_usage, options); - - sym_hist_filter = argv[0]; - } - - if (!sym_hist_filter) - usage_with_options(annotate_usage, options); - - setup_pager(); - - return __cmd_annotate(); -} diff --git a/trunk/tools/perf/builtin-help.c b/trunk/tools/perf/builtin-help.c deleted file mode 100644 index 0f32dc3f3c4c..000000000000 --- a/trunk/tools/perf/builtin-help.c +++ /dev/null @@ -1,461 +0,0 @@ -/* - * builtin-help.c - * - * Builtin help command - */ -#include "util/cache.h" -#include "builtin.h" -#include "util/exec_cmd.h" -#include "common-cmds.h" -#include "util/parse-options.h" -#include "util/run-command.h" -#include "util/help.h" - -static struct man_viewer_list { - struct man_viewer_list *next; - char name[FLEX_ARRAY]; -} *man_viewer_list; - -static struct man_viewer_info_list { - struct man_viewer_info_list *next; - const char *info; - char name[FLEX_ARRAY]; -} *man_viewer_info_list; - -enum help_format { - HELP_FORMAT_MAN, - HELP_FORMAT_INFO, - HELP_FORMAT_WEB, -}; - -static int show_all = 0; -static enum help_format help_format = HELP_FORMAT_MAN; -static struct option builtin_help_options[] = { - OPT_BOOLEAN('a', "all", &show_all, "print all available commands"), - OPT_SET_INT('m', "man", &help_format, "show man page", HELP_FORMAT_MAN), - OPT_SET_INT('w', "web", &help_format, "show manual in web browser", - HELP_FORMAT_WEB), - OPT_SET_INT('i', "info", &help_format, "show info page", - HELP_FORMAT_INFO), - OPT_END(), -}; - -static const char * const builtin_help_usage[] = { - "perf help [--all] [--man|--web|--info] [command]", - NULL -}; - -static enum help_format parse_help_format(const char *format) -{ - if (!strcmp(format, "man")) - return HELP_FORMAT_MAN; - if (!strcmp(format, "info")) - return HELP_FORMAT_INFO; - if (!strcmp(format, "web") || !strcmp(format, "html")) - return HELP_FORMAT_WEB; - die("unrecognized help format '%s'", format); -} - -static const char *get_man_viewer_info(const char *name) -{ - struct man_viewer_info_list *viewer; - - for (viewer = man_viewer_info_list; viewer; viewer = viewer->next) - { - if (!strcasecmp(name, viewer->name)) - return viewer->info; - } - return NULL; -} - -static int check_emacsclient_version(void) -{ - struct strbuf buffer = STRBUF_INIT; - struct child_process ec_process; - const char *argv_ec[] = { "emacsclient", "--version", NULL }; - int version; - - /* emacsclient prints its version number on stderr */ - memset(&ec_process, 0, sizeof(ec_process)); - ec_process.argv = argv_ec; - ec_process.err = -1; - ec_process.stdout_to_stderr = 1; - if (start_command(&ec_process)) { - fprintf(stderr, "Failed to start emacsclient.\n"); - return -1; - } - strbuf_read(&buffer, ec_process.err, 20); - close(ec_process.err); - - /* - * Don't bother checking return value, because "emacsclient --version" - * seems to always exits with code 1. - */ - finish_command(&ec_process); - - if (prefixcmp(buffer.buf, "emacsclient")) { - fprintf(stderr, "Failed to parse emacsclient version.\n"); - strbuf_release(&buffer); - return -1; - } - - strbuf_remove(&buffer, 0, strlen("emacsclient")); - version = atoi(buffer.buf); - - if (version < 22) { - fprintf(stderr, - "emacsclient version '%d' too old (< 22).\n", - version); - strbuf_release(&buffer); - return -1; - } - - strbuf_release(&buffer); - return 0; -} - -static void exec_woman_emacs(const char* path, const char *page) -{ - if (!check_emacsclient_version()) { - /* This works only with emacsclient version >= 22. */ - struct strbuf man_page = STRBUF_INIT; - - if (!path) - path = "emacsclient"; - strbuf_addf(&man_page, "(woman \"%s\")", page); - execlp(path, "emacsclient", "-e", man_page.buf, NULL); - warning("failed to exec '%s': %s", path, strerror(errno)); - } -} - -static void exec_man_konqueror(const char* path, const char *page) -{ - const char *display = getenv("DISPLAY"); - if (display && *display) { - struct strbuf man_page = STRBUF_INIT; - const char *filename = "kfmclient"; - - /* It's simpler to launch konqueror using kfmclient. */ - if (path) { - const char *file = strrchr(path, '/'); - if (file && !strcmp(file + 1, "konqueror")) { - char *new = strdup(path); - char *dest = strrchr(new, '/'); - - /* strlen("konqueror") == strlen("kfmclient") */ - strcpy(dest + 1, "kfmclient"); - path = new; - } - if (file) - filename = file; - } else - path = "kfmclient"; - strbuf_addf(&man_page, "man:%s(1)", page); - execlp(path, filename, "newTab", man_page.buf, NULL); - warning("failed to exec '%s': %s", path, strerror(errno)); - } -} - -static void exec_man_man(const char* path, const char *page) -{ - if (!path) - path = "man"; - execlp(path, "man", page, NULL); - warning("failed to exec '%s': %s", path, strerror(errno)); -} - -static void exec_man_cmd(const char *cmd, const char *page) -{ - struct strbuf shell_cmd = STRBUF_INIT; - strbuf_addf(&shell_cmd, "%s %s", cmd, page); - execl("/bin/sh", "sh", "-c", shell_cmd.buf, NULL); - warning("failed to exec '%s': %s", cmd, strerror(errno)); -} - -static void add_man_viewer(const char *name) -{ - struct man_viewer_list **p = &man_viewer_list; - size_t len = strlen(name); - - while (*p) - p = &((*p)->next); - *p = calloc(1, (sizeof(**p) + len + 1)); - strncpy((*p)->name, name, len); -} - -static int supported_man_viewer(const char *name, size_t len) -{ - return (!strncasecmp("man", name, len) || - !strncasecmp("woman", name, len) || - !strncasecmp("konqueror", name, len)); -} - -static void do_add_man_viewer_info(const char *name, - size_t len, - const char *value) -{ - struct man_viewer_info_list *new = calloc(1, sizeof(*new) + len + 1); - - strncpy(new->name, name, len); - new->info = strdup(value); - new->next = man_viewer_info_list; - man_viewer_info_list = new; -} - -static int add_man_viewer_path(const char *name, - size_t len, - const char *value) -{ - if (supported_man_viewer(name, len)) - do_add_man_viewer_info(name, len, value); - else - warning("'%s': path for unsupported man viewer.\n" - "Please consider using 'man..cmd' instead.", - name); - - return 0; -} - -static int add_man_viewer_cmd(const char *name, - size_t len, - const char *value) -{ - if (supported_man_viewer(name, len)) - warning("'%s': cmd for supported man viewer.\n" - "Please consider using 'man..path' instead.", - name); - else - do_add_man_viewer_info(name, len, value); - - return 0; -} - -static int add_man_viewer_info(const char *var, const char *value) -{ - const char *name = var + 4; - const char *subkey = strrchr(name, '.'); - - if (!subkey) - return error("Config with no key for man viewer: %s", name); - - if (!strcmp(subkey, ".path")) { - if (!value) - return config_error_nonbool(var); - return add_man_viewer_path(name, subkey - name, value); - } - if (!strcmp(subkey, ".cmd")) { - if (!value) - return config_error_nonbool(var); - return add_man_viewer_cmd(name, subkey - name, value); - } - - warning("'%s': unsupported man viewer sub key.", subkey); - return 0; -} - -static int perf_help_config(const char *var, const char *value, void *cb) -{ - if (!strcmp(var, "help.format")) { - if (!value) - return config_error_nonbool(var); - help_format = parse_help_format(value); - return 0; - } - if (!strcmp(var, "man.viewer")) { - if (!value) - return config_error_nonbool(var); - add_man_viewer(value); - return 0; - } - if (!prefixcmp(var, "man.")) - return add_man_viewer_info(var, value); - - return perf_default_config(var, value, cb); -} - -static struct cmdnames main_cmds, other_cmds; - -void list_common_cmds_help(void) -{ - int i, longest = 0; - - for (i = 0; i < ARRAY_SIZE(common_cmds); i++) { - if (longest < strlen(common_cmds[i].name)) - longest = strlen(common_cmds[i].name); - } - - puts(" The most commonly used perf commands are:"); - for (i = 0; i < ARRAY_SIZE(common_cmds); i++) { - printf(" %s ", common_cmds[i].name); - mput_char(' ', longest - strlen(common_cmds[i].name)); - puts(common_cmds[i].help); - } -} - -static int is_perf_command(const char *s) -{ - return is_in_cmdlist(&main_cmds, s) || - is_in_cmdlist(&other_cmds, s); -} - -static const char *prepend(const char *prefix, const char *cmd) -{ - size_t pre_len = strlen(prefix); - size_t cmd_len = strlen(cmd); - char *p = malloc(pre_len + cmd_len + 1); - memcpy(p, prefix, pre_len); - strcpy(p + pre_len, cmd); - return p; -} - -static const char *cmd_to_page(const char *perf_cmd) -{ - if (!perf_cmd) - return "perf"; - else if (!prefixcmp(perf_cmd, "perf")) - return perf_cmd; - else if (is_perf_command(perf_cmd)) - return prepend("perf-", perf_cmd); - else - return prepend("perf-", perf_cmd); -} - -static void setup_man_path(void) -{ - struct strbuf new_path = STRBUF_INIT; - const char *old_path = getenv("MANPATH"); - - /* We should always put ':' after our path. If there is no - * old_path, the ':' at the end will let 'man' to try - * system-wide paths after ours to find the manual page. If - * there is old_path, we need ':' as delimiter. */ - strbuf_addstr(&new_path, system_path(PERF_MAN_PATH)); - strbuf_addch(&new_path, ':'); - if (old_path) - strbuf_addstr(&new_path, old_path); - - setenv("MANPATH", new_path.buf, 1); - - strbuf_release(&new_path); -} - -static void exec_viewer(const char *name, const char *page) -{ - const char *info = get_man_viewer_info(name); - - if (!strcasecmp(name, "man")) - exec_man_man(info, page); - else if (!strcasecmp(name, "woman")) - exec_woman_emacs(info, page); - else if (!strcasecmp(name, "konqueror")) - exec_man_konqueror(info, page); - else if (info) - exec_man_cmd(info, page); - else - warning("'%s': unknown man viewer.", name); -} - -static void show_man_page(const char *perf_cmd) -{ - struct man_viewer_list *viewer; - const char *page = cmd_to_page(perf_cmd); - const char *fallback = getenv("PERF_MAN_VIEWER"); - - setup_man_path(); - for (viewer = man_viewer_list; viewer; viewer = viewer->next) - { - exec_viewer(viewer->name, page); /* will return when unable */ - } - if (fallback) - exec_viewer(fallback, page); - exec_viewer("man", page); - die("no man viewer handled the request"); -} - -static void show_info_page(const char *perf_cmd) -{ - const char *page = cmd_to_page(perf_cmd); - setenv("INFOPATH", system_path(PERF_INFO_PATH), 1); - execlp("info", "info", "perfman", page, NULL); -} - -static void get_html_page_path(struct strbuf *page_path, const char *page) -{ - struct stat st; - const char *html_path = system_path(PERF_HTML_PATH); - - /* Check that we have a perf documentation directory. */ - if (stat(mkpath("%s/perf.html", html_path), &st) - || !S_ISREG(st.st_mode)) - die("'%s': not a documentation directory.", html_path); - - strbuf_init(page_path, 0); - strbuf_addf(page_path, "%s/%s.html", html_path, page); -} - -/* - * If open_html is not defined in a platform-specific way (see for - * example compat/mingw.h), we use the script web--browse to display - * HTML. - */ -#ifndef open_html -static void open_html(const char *path) -{ - execl_perf_cmd("web--browse", "-c", "help.browser", path, NULL); -} -#endif - -static void show_html_page(const char *perf_cmd) -{ - const char *page = cmd_to_page(perf_cmd); - struct strbuf page_path; /* it leaks but we exec bellow */ - - get_html_page_path(&page_path, page); - - open_html(page_path.buf); -} - -int cmd_help(int argc, const char **argv, const char *prefix) -{ - const char *alias; - load_command_list("perf-", &main_cmds, &other_cmds); - - perf_config(perf_help_config, NULL); - - argc = parse_options(argc, argv, builtin_help_options, - builtin_help_usage, 0); - - if (show_all) { - printf("\n usage: %s\n\n", perf_usage_string); - list_commands("perf commands", &main_cmds, &other_cmds); - printf(" %s\n\n", perf_more_info_string); - return 0; - } - - if (!argv[0]) { - printf("\n usage: %s\n\n", perf_usage_string); - list_common_cmds_help(); - printf("\n %s\n\n", perf_more_info_string); - return 0; - } - - alias = alias_lookup(argv[0]); - if (alias && !is_perf_command(argv[0])) { - printf("`perf %s' is aliased to `%s'\n", argv[0], alias); - return 0; - } - - switch (help_format) { - case HELP_FORMAT_MAN: - show_man_page(argv[0]); - break; - case HELP_FORMAT_INFO: - show_info_page(argv[0]); - break; - case HELP_FORMAT_WEB: - show_html_page(argv[0]); - break; - } - - return 0; -} diff --git a/trunk/tools/perf/builtin-list.c b/trunk/tools/perf/builtin-list.c deleted file mode 100644 index fe60e37c96ef..000000000000 --- a/trunk/tools/perf/builtin-list.c +++ /dev/null @@ -1,20 +0,0 @@ -/* - * builtin-list.c - * - * Builtin list command: list all event types - * - * Copyright (C) 2009, Thomas Gleixner - * Copyright (C) 2008-2009, Red Hat Inc, Ingo Molnar - */ -#include "builtin.h" - -#include "perf.h" - -#include "util/parse-options.h" -#include "util/parse-events.h" - -int cmd_list(int argc, const char **argv, const char *prefix) -{ - print_events(); - return 0; -} diff --git a/trunk/tools/perf/builtin-record.c b/trunk/tools/perf/builtin-record.c deleted file mode 100644 index 29259e74dcfa..000000000000 --- a/trunk/tools/perf/builtin-record.c +++ /dev/null @@ -1,582 +0,0 @@ -/* - * builtin-record.c - * - * Builtin record command: Record the profile of a workload - * (or a CPU, or a PID) into the perf.data output file - for - * later analysis via perf report. - */ -#include "builtin.h" - -#include "perf.h" - -#include "util/util.h" -#include "util/parse-options.h" -#include "util/parse-events.h" -#include "util/string.h" - -#include -#include - -#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1) -#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask)) - -static int fd[MAX_NR_CPUS][MAX_COUNTERS]; - -static long default_interval = 100000; - -static int nr_cpus = 0; -static unsigned int page_size; -static unsigned int mmap_pages = 128; -static int freq = 0; -static int output; -static const char *output_name = "perf.data"; -static int group = 0; -static unsigned int realtime_prio = 0; -static int system_wide = 0; -static pid_t target_pid = -1; -static int inherit = 1; -static int force = 0; -static int append_file = 0; -static int verbose = 0; - -static long samples; -static struct timeval last_read; -static struct timeval this_read; - -static __u64 bytes_written; - -static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; - -static int nr_poll; -static int nr_cpu; - -struct mmap_event { - struct perf_event_header header; - __u32 pid; - __u32 tid; - __u64 start; - __u64 len; - __u64 pgoff; - char filename[PATH_MAX]; -}; - -struct comm_event { - struct perf_event_header header; - __u32 pid; - __u32 tid; - char comm[16]; -}; - - -struct mmap_data { - int counter; - void *base; - unsigned int mask; - unsigned int prev; -}; - -static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; - -static unsigned int mmap_read_head(struct mmap_data *md) -{ - struct perf_counter_mmap_page *pc = md->base; - int head; - - head = pc->data_head; - rmb(); - - return head; -} - -static void mmap_read(struct mmap_data *md) -{ - unsigned int head = mmap_read_head(md); - unsigned int old = md->prev; - unsigned char *data = md->base + page_size; - unsigned long size; - void *buf; - int diff; - - gettimeofday(&this_read, NULL); - - /* - * If we're further behind than half the buffer, there's a chance - * the writer will bite our tail and mess up the samples under us. - * - * If we somehow ended up ahead of the head, we got messed up. - * - * In either case, truncate and restart at head. - */ - diff = head - old; - if (diff > md->mask / 2 || diff < 0) { - struct timeval iv; - unsigned long msecs; - - timersub(&this_read, &last_read, &iv); - msecs = iv.tv_sec*1000 + iv.tv_usec/1000; - - fprintf(stderr, "WARNING: failed to keep up with mmap data." - " Last read %lu msecs ago.\n", msecs); - - /* - * head points to a known good entry, start there. - */ - old = head; - } - - last_read = this_read; - - if (old != head) - samples++; - - size = head - old; - - if ((old & md->mask) + size != (head & md->mask)) { - buf = &data[old & md->mask]; - size = md->mask + 1 - (old & md->mask); - old += size; - - while (size) { - int ret = write(output, buf, size); - - if (ret < 0) - die("failed to write"); - - size -= ret; - buf += ret; - - bytes_written += ret; - } - } - - buf = &data[old & md->mask]; - size = head - old; - old += size; - - while (size) { - int ret = write(output, buf, size); - - if (ret < 0) - die("failed to write"); - - size -= ret; - buf += ret; - - bytes_written += ret; - } - - md->prev = old; -} - -static volatile int done = 0; -static volatile int signr = -1; - -static void sig_handler(int sig) -{ - done = 1; - signr = sig; -} - -static void sig_atexit(void) -{ - if (signr == -1) - return; - - signal(signr, SIG_DFL); - kill(getpid(), signr); -} - -static void pid_synthesize_comm_event(pid_t pid, int full) -{ - struct comm_event comm_ev; - char filename[PATH_MAX]; - char bf[BUFSIZ]; - int fd, ret; - size_t size; - char *field, *sep; - DIR *tasks; - struct dirent dirent, *next; - - snprintf(filename, sizeof(filename), "/proc/%d/stat", pid); - - fd = open(filename, O_RDONLY); - if (fd < 0) { - fprintf(stderr, "couldn't open %s\n", filename); - exit(EXIT_FAILURE); - } - if (read(fd, bf, sizeof(bf)) < 0) { - fprintf(stderr, "couldn't read %s\n", filename); - exit(EXIT_FAILURE); - } - close(fd); - - /* 9027 (cat) R 6747 9027 6747 34816 9027 ... */ - memset(&comm_ev, 0, sizeof(comm_ev)); - field = strchr(bf, '('); - if (field == NULL) - goto out_failure; - sep = strchr(++field, ')'); - if (sep == NULL) - goto out_failure; - size = sep - field; - memcpy(comm_ev.comm, field, size++); - - comm_ev.pid = pid; - comm_ev.header.type = PERF_EVENT_COMM; - size = ALIGN(size, sizeof(__u64)); - comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size); - - if (!full) { - comm_ev.tid = pid; - - ret = write(output, &comm_ev, comm_ev.header.size); - if (ret < 0) { - perror("failed to write"); - exit(-1); - } - return; - } - - snprintf(filename, sizeof(filename), "/proc/%d/task", pid); - - tasks = opendir(filename); - while (!readdir_r(tasks, &dirent, &next) && next) { - char *end; - pid = strtol(dirent.d_name, &end, 10); - if (*end) - continue; - - comm_ev.tid = pid; - - ret = write(output, &comm_ev, comm_ev.header.size); - if (ret < 0) { - perror("failed to write"); - exit(-1); - } - } - closedir(tasks); - return; - -out_failure: - fprintf(stderr, "couldn't get COMM and pgid, malformed %s\n", - filename); - exit(EXIT_FAILURE); -} - -static void pid_synthesize_mmap_samples(pid_t pid) -{ - char filename[PATH_MAX]; - FILE *fp; - - snprintf(filename, sizeof(filename), "/proc/%d/maps", pid); - - fp = fopen(filename, "r"); - if (fp == NULL) { - fprintf(stderr, "couldn't open %s\n", filename); - exit(EXIT_FAILURE); - } - while (1) { - char bf[BUFSIZ], *pbf = bf; - struct mmap_event mmap_ev = { - .header.type = PERF_EVENT_MMAP, - }; - int n; - size_t size; - if (fgets(bf, sizeof(bf), fp) == NULL) - break; - - /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */ - n = hex2u64(pbf, &mmap_ev.start); - if (n < 0) - continue; - pbf += n + 1; - n = hex2u64(pbf, &mmap_ev.len); - if (n < 0) - continue; - pbf += n + 3; - if (*pbf == 'x') { /* vm_exec */ - char *execname = strrchr(bf, ' '); - - if (execname == NULL || execname[1] != '/') - continue; - - execname += 1; - size = strlen(execname); - execname[size - 1] = '\0'; /* Remove \n */ - memcpy(mmap_ev.filename, execname, size); - size = ALIGN(size, sizeof(__u64)); - mmap_ev.len -= mmap_ev.start; - mmap_ev.header.size = (sizeof(mmap_ev) - - (sizeof(mmap_ev.filename) - size)); - mmap_ev.pid = pid; - mmap_ev.tid = pid; - - if (write(output, &mmap_ev, mmap_ev.header.size) < 0) { - perror("failed to write"); - exit(-1); - } - } - } - - fclose(fp); -} - -static void synthesize_samples(void) -{ - DIR *proc; - struct dirent dirent, *next; - - proc = opendir("/proc"); - - while (!readdir_r(proc, &dirent, &next) && next) { - char *end; - pid_t pid; - - pid = strtol(dirent.d_name, &end, 10); - if (*end) /* only interested in proper numerical dirents */ - continue; - - pid_synthesize_comm_event(pid, 1); - pid_synthesize_mmap_samples(pid); - } - - closedir(proc); -} - -static int group_fd; - -static void create_counter(int counter, int cpu, pid_t pid) -{ - struct perf_counter_attr *attr = attrs + counter; - int track = 1; - - attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; - if (freq) { - attr->sample_type |= PERF_SAMPLE_PERIOD; - attr->freq = 1; - attr->sample_freq = freq; - } - attr->mmap = track; - attr->comm = track; - attr->inherit = (cpu < 0) && inherit; - attr->disabled = 1; - - track = 0; /* only the first counter needs these */ - -try_again: - fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0); - - if (fd[nr_cpu][counter] < 0) { - int err = errno; - - if (err == EPERM) - die("Permission error - are you root?\n"); - - /* - * If it's cycles then fall back to hrtimer - * based cpu-clock-tick sw counter, which - * is always available even if no PMU support: - */ - if (attr->type == PERF_TYPE_HARDWARE - && attr->config == PERF_COUNT_HW_CPU_CYCLES) { - - if (verbose) - warning(" ... trying to fall back to cpu-clock-ticks\n"); - attr->type = PERF_TYPE_SOFTWARE; - attr->config = PERF_COUNT_SW_CPU_CLOCK; - goto try_again; - } - printf("\n"); - error("perfcounter syscall returned with %d (%s)\n", - fd[nr_cpu][counter], strerror(err)); - die("No CONFIG_PERF_COUNTERS=y kernel support configured?\n"); - exit(-1); - } - - assert(fd[nr_cpu][counter] >= 0); - fcntl(fd[nr_cpu][counter], F_SETFL, O_NONBLOCK); - - /* - * First counter acts as the group leader: - */ - if (group && group_fd == -1) - group_fd = fd[nr_cpu][counter]; - - event_array[nr_poll].fd = fd[nr_cpu][counter]; - event_array[nr_poll].events = POLLIN; - nr_poll++; - - mmap_array[nr_cpu][counter].counter = counter; - mmap_array[nr_cpu][counter].prev = 0; - mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1; - mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size, - PROT_READ, MAP_SHARED, fd[nr_cpu][counter], 0); - if (mmap_array[nr_cpu][counter].base == MAP_FAILED) { - error("failed to mmap with %d (%s)\n", errno, strerror(errno)); - exit(-1); - } - - ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_ENABLE); -} - -static void open_counters(int cpu, pid_t pid) -{ - int counter; - - if (pid > 0) { - pid_synthesize_comm_event(pid, 0); - pid_synthesize_mmap_samples(pid); - } - - group_fd = -1; - for (counter = 0; counter < nr_counters; counter++) - create_counter(counter, cpu, pid); - - nr_cpu++; -} - -static int __cmd_record(int argc, const char **argv) -{ - int i, counter; - struct stat st; - pid_t pid; - int flags; - int ret; - - page_size = sysconf(_SC_PAGE_SIZE); - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - assert(nr_cpus <= MAX_NR_CPUS); - assert(nr_cpus >= 0); - - if (!stat(output_name, &st) && !force && !append_file) { - fprintf(stderr, "Error, output file %s exists, use -A to append or -f to overwrite.\n", - output_name); - exit(-1); - } - - flags = O_CREAT|O_RDWR; - if (append_file) - flags |= O_APPEND; - else - flags |= O_TRUNC; - - output = open(output_name, flags, S_IRUSR|S_IWUSR); - if (output < 0) { - perror("failed to create output file"); - exit(-1); - } - - if (!system_wide) { - open_counters(-1, target_pid != -1 ? target_pid : getpid()); - } else for (i = 0; i < nr_cpus; i++) - open_counters(i, target_pid); - - atexit(sig_atexit); - signal(SIGCHLD, sig_handler); - signal(SIGINT, sig_handler); - - if (target_pid == -1 && argc) { - pid = fork(); - if (pid < 0) - perror("failed to fork"); - - if (!pid) { - if (execvp(argv[0], (char **)argv)) { - perror(argv[0]); - exit(-1); - } - } - } - - if (realtime_prio) { - struct sched_param param; - - param.sched_priority = realtime_prio; - if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { - printf("Could not set realtime priority.\n"); - exit(-1); - } - } - - if (system_wide) - synthesize_samples(); - - while (!done) { - int hits = samples; - - for (i = 0; i < nr_cpu; i++) { - for (counter = 0; counter < nr_counters; counter++) - mmap_read(&mmap_array[i][counter]); - } - - if (hits == samples) - ret = poll(event_array, nr_poll, 100); - } - - /* - * Approximate RIP event size: 24 bytes. - */ - fprintf(stderr, - "[ perf record: Captured and wrote %.3f MB %s (~%lld samples) ]\n", - (double)bytes_written / 1024.0 / 1024.0, - output_name, - bytes_written / 24); - - return 0; -} - -static const char * const record_usage[] = { - "perf record [] []", - "perf record [] -- []", - NULL -}; - -static const struct option options[] = { - OPT_CALLBACK('e', "event", NULL, "event", - "event selector. use 'perf list' to list available events", - parse_events), - OPT_INTEGER('p', "pid", &target_pid, - "record events on existing pid"), - OPT_INTEGER('r', "realtime", &realtime_prio, - "collect data with this RT SCHED_FIFO priority"), - OPT_BOOLEAN('a', "all-cpus", &system_wide, - "system-wide collection from all CPUs"), - OPT_BOOLEAN('A', "append", &append_file, - "append to the output file to do incremental profiling"), - OPT_BOOLEAN('f', "force", &force, - "overwrite existing data file"), - OPT_LONG('c', "count", &default_interval, - "event period to sample"), - OPT_STRING('o', "output", &output_name, "file", - "output file name"), - OPT_BOOLEAN('i', "inherit", &inherit, - "child tasks inherit counters"), - OPT_INTEGER('F', "freq", &freq, - "profile at this frequency"), - OPT_INTEGER('m', "mmap-pages", &mmap_pages, - "number of mmap data pages"), - OPT_BOOLEAN('v', "verbose", &verbose, - "be more verbose (show counter open errors, etc)"), - OPT_END() -}; - -int cmd_record(int argc, const char **argv, const char *prefix) -{ - int counter; - - argc = parse_options(argc, argv, options, record_usage, 0); - if (!argc && target_pid == -1 && !system_wide) - usage_with_options(record_usage, options); - - if (!nr_counters) - nr_counters = 1; - - for (counter = 0; counter < nr_counters; counter++) { - if (attrs[counter].sample_period) - continue; - - attrs[counter].sample_period = default_interval; - } - - return __cmd_record(argc, argv); -} diff --git a/trunk/tools/perf/builtin-report.c b/trunk/tools/perf/builtin-report.c deleted file mode 100644 index 82fa93b4db99..000000000000 --- a/trunk/tools/perf/builtin-report.c +++ /dev/null @@ -1,1316 +0,0 @@ -/* - * builtin-report.c - * - * Builtin report command: Analyze the perf.data input file, - * look up and read DSOs and symbol information and display - * a histogram of results, along various sorting keys. - */ -#include "builtin.h" - -#include "util/util.h" - -#include "util/color.h" -#include "util/list.h" -#include "util/cache.h" -#include "util/rbtree.h" -#include "util/symbol.h" -#include "util/string.h" - -#include "perf.h" - -#include "util/parse-options.h" -#include "util/parse-events.h" - -#define SHOW_KERNEL 1 -#define SHOW_USER 2 -#define SHOW_HV 4 - -static char const *input_name = "perf.data"; -static char *vmlinux = NULL; - -static char default_sort_order[] = "comm,dso"; -static char *sort_order = default_sort_order; - -static int input; -static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; - -static int dump_trace = 0; -#define dprintf(x...) do { if (dump_trace) printf(x); } while (0) - -static int verbose; -static int full_paths; - -static unsigned long page_size; -static unsigned long mmap_window = 32; - -struct ip_event { - struct perf_event_header header; - __u64 ip; - __u32 pid, tid; - __u64 period; -}; - -struct mmap_event { - struct perf_event_header header; - __u32 pid, tid; - __u64 start; - __u64 len; - __u64 pgoff; - char filename[PATH_MAX]; -}; - -struct comm_event { - struct perf_event_header header; - __u32 pid, tid; - char comm[16]; -}; - -struct fork_event { - struct perf_event_header header; - __u32 pid, ppid; -}; - -struct period_event { - struct perf_event_header header; - __u64 time; - __u64 id; - __u64 sample_period; -}; - -typedef union event_union { - struct perf_event_header header; - struct ip_event ip; - struct mmap_event mmap; - struct comm_event comm; - struct fork_event fork; - struct period_event period; -} event_t; - -static LIST_HEAD(dsos); -static struct dso *kernel_dso; -static struct dso *vdso; - -static void dsos__add(struct dso *dso) -{ - list_add_tail(&dso->node, &dsos); -} - -static struct dso *dsos__find(const char *name) -{ - struct dso *pos; - - list_for_each_entry(pos, &dsos, node) - if (strcmp(pos->name, name) == 0) - return pos; - return NULL; -} - -static struct dso *dsos__findnew(const char *name) -{ - struct dso *dso = dsos__find(name); - int nr; - - if (dso) - return dso; - - dso = dso__new(name, 0); - if (!dso) - goto out_delete_dso; - - nr = dso__load(dso, NULL, verbose); - if (nr < 0) { - if (verbose) - fprintf(stderr, "Failed to open: %s\n", name); - goto out_delete_dso; - } - if (!nr && verbose) { - fprintf(stderr, - "No symbols found in: %s, maybe install a debug package?\n", - name); - } - - dsos__add(dso); - - return dso; - -out_delete_dso: - dso__delete(dso); - return NULL; -} - -static void dsos__fprintf(FILE *fp) -{ - struct dso *pos; - - list_for_each_entry(pos, &dsos, node) - dso__fprintf(pos, fp); -} - -static struct symbol *vdso__find_symbol(struct dso *dso, __u64 ip) -{ - return dso__find_symbol(kernel_dso, ip); -} - -static int load_kernel(void) -{ - int err; - - kernel_dso = dso__new("[kernel]", 0); - if (!kernel_dso) - return -1; - - err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose); - if (err) { - dso__delete(kernel_dso); - kernel_dso = NULL; - } else - dsos__add(kernel_dso); - - vdso = dso__new("[vdso]", 0); - if (!vdso) - return -1; - - vdso->find_symbol = vdso__find_symbol; - - dsos__add(vdso); - - return err; -} - -static char __cwd[PATH_MAX]; -static char *cwd = __cwd; -static int cwdlen; - -static int strcommon(const char *pathname) -{ - int n = 0; - - while (pathname[n] == cwd[n] && n < cwdlen) - ++n; - - return n; -} - -struct map { - struct list_head node; - __u64 start; - __u64 end; - __u64 pgoff; - __u64 (*map_ip)(struct map *, __u64); - struct dso *dso; -}; - -static __u64 map__map_ip(struct map *map, __u64 ip) -{ - return ip - map->start + map->pgoff; -} - -static __u64 vdso__map_ip(struct map *map, __u64 ip) -{ - return ip; -} - -static inline int is_anon_memory(const char *filename) -{ - return strcmp(filename, "//anon") == 0; -} - -static struct map *map__new(struct mmap_event *event) -{ - struct map *self = malloc(sizeof(*self)); - - if (self != NULL) { - const char *filename = event->filename; - char newfilename[PATH_MAX]; - int anon; - - if (cwd) { - int n = strcommon(filename); - - if (n == cwdlen) { - snprintf(newfilename, sizeof(newfilename), - ".%s", filename + n); - filename = newfilename; - } - } - - anon = is_anon_memory(filename); - - if (anon) { - snprintf(newfilename, sizeof(newfilename), "/tmp/perf-%d.map", event->pid); - filename = newfilename; - } - - self->start = event->start; - self->end = event->start + event->len; - self->pgoff = event->pgoff; - - self->dso = dsos__findnew(filename); - if (self->dso == NULL) - goto out_delete; - - if (self->dso == vdso || anon) - self->map_ip = vdso__map_ip; - else - self->map_ip = map__map_ip; - } - return self; -out_delete: - free(self); - return NULL; -} - -static struct map *map__clone(struct map *self) -{ - struct map *map = malloc(sizeof(*self)); - - if (!map) - return NULL; - - memcpy(map, self, sizeof(*self)); - - return map; -} - -static int map__overlap(struct map *l, struct map *r) -{ - if (l->start > r->start) { - struct map *t = l; - l = r; - r = t; - } - - if (l->end > r->start) - return 1; - - return 0; -} - -static size_t map__fprintf(struct map *self, FILE *fp) -{ - return fprintf(fp, " %Lx-%Lx %Lx %s\n", - self->start, self->end, self->pgoff, self->dso->name); -} - - -struct thread { - struct rb_node rb_node; - struct list_head maps; - pid_t pid; - char *comm; -}; - -static struct thread *thread__new(pid_t pid) -{ - struct thread *self = malloc(sizeof(*self)); - - if (self != NULL) { - self->pid = pid; - self->comm = malloc(32); - if (self->comm) - snprintf(self->comm, 32, ":%d", self->pid); - INIT_LIST_HEAD(&self->maps); - } - - return self; -} - -static int thread__set_comm(struct thread *self, const char *comm) -{ - if (self->comm) - free(self->comm); - self->comm = strdup(comm); - return self->comm ? 0 : -ENOMEM; -} - -static size_t thread__fprintf(struct thread *self, FILE *fp) -{ - struct map *pos; - size_t ret = fprintf(fp, "Thread %d %s\n", self->pid, self->comm); - - list_for_each_entry(pos, &self->maps, node) - ret += map__fprintf(pos, fp); - - return ret; -} - - -static struct rb_root threads; -static struct thread *last_match; - -static struct thread *threads__findnew(pid_t pid) -{ - struct rb_node **p = &threads.rb_node; - struct rb_node *parent = NULL; - struct thread *th; - - /* - * Font-end cache - PID lookups come in blocks, - * so most of the time we dont have to look up - * the full rbtree: - */ - if (last_match && last_match->pid == pid) - return last_match; - - while (*p != NULL) { - parent = *p; - th = rb_entry(parent, struct thread, rb_node); - - if (th->pid == pid) { - last_match = th; - return th; - } - - if (pid < th->pid) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - th = thread__new(pid); - if (th != NULL) { - rb_link_node(&th->rb_node, parent, p); - rb_insert_color(&th->rb_node, &threads); - last_match = th; - } - - return th; -} - -static void thread__insert_map(struct thread *self, struct map *map) -{ - struct map *pos, *tmp; - - list_for_each_entry_safe(pos, tmp, &self->maps, node) { - if (map__overlap(pos, map)) { - list_del_init(&pos->node); - /* XXX leaks dsos */ - free(pos); - } - } - - list_add_tail(&map->node, &self->maps); -} - -static int thread__fork(struct thread *self, struct thread *parent) -{ - struct map *map; - - if (self->comm) - free(self->comm); - self->comm = strdup(parent->comm); - if (!self->comm) - return -ENOMEM; - - list_for_each_entry(map, &parent->maps, node) { - struct map *new = map__clone(map); - if (!new) - return -ENOMEM; - thread__insert_map(self, new); - } - - return 0; -} - -static struct map *thread__find_map(struct thread *self, __u64 ip) -{ - struct map *pos; - - if (self == NULL) - return NULL; - - list_for_each_entry(pos, &self->maps, node) - if (ip >= pos->start && ip <= pos->end) - return pos; - - return NULL; -} - -static size_t threads__fprintf(FILE *fp) -{ - size_t ret = 0; - struct rb_node *nd; - - for (nd = rb_first(&threads); nd; nd = rb_next(nd)) { - struct thread *pos = rb_entry(nd, struct thread, rb_node); - - ret += thread__fprintf(pos, fp); - } - - return ret; -} - -/* - * histogram, sorted on item, collects counts - */ - -static struct rb_root hist; - -struct hist_entry { - struct rb_node rb_node; - - struct thread *thread; - struct map *map; - struct dso *dso; - struct symbol *sym; - __u64 ip; - char level; - - __u64 count; -}; - -/* - * configurable sorting bits - */ - -struct sort_entry { - struct list_head list; - - char *header; - - int64_t (*cmp)(struct hist_entry *, struct hist_entry *); - int64_t (*collapse)(struct hist_entry *, struct hist_entry *); - size_t (*print)(FILE *fp, struct hist_entry *); -}; - -/* --sort pid */ - -static int64_t -sort__thread_cmp(struct hist_entry *left, struct hist_entry *right) -{ - return right->thread->pid - left->thread->pid; -} - -static size_t -sort__thread_print(FILE *fp, struct hist_entry *self) -{ - return fprintf(fp, "%16s:%5d", self->thread->comm ?: "", self->thread->pid); -} - -static struct sort_entry sort_thread = { - .header = " Command: Pid", - .cmp = sort__thread_cmp, - .print = sort__thread_print, -}; - -/* --sort comm */ - -static int64_t -sort__comm_cmp(struct hist_entry *left, struct hist_entry *right) -{ - return right->thread->pid - left->thread->pid; -} - -static int64_t -sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) -{ - char *comm_l = left->thread->comm; - char *comm_r = right->thread->comm; - - if (!comm_l || !comm_r) { - if (!comm_l && !comm_r) - return 0; - else if (!comm_l) - return -1; - else - return 1; - } - - return strcmp(comm_l, comm_r); -} - -static size_t -sort__comm_print(FILE *fp, struct hist_entry *self) -{ - return fprintf(fp, "%16s", self->thread->comm); -} - -static struct sort_entry sort_comm = { - .header = " Command", - .cmp = sort__comm_cmp, - .collapse = sort__comm_collapse, - .print = sort__comm_print, -}; - -/* --sort dso */ - -static int64_t -sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) -{ - struct dso *dso_l = left->dso; - struct dso *dso_r = right->dso; - - if (!dso_l || !dso_r) { - if (!dso_l && !dso_r) - return 0; - else if (!dso_l) - return -1; - else - return 1; - } - - return strcmp(dso_l->name, dso_r->name); -} - -static size_t -sort__dso_print(FILE *fp, struct hist_entry *self) -{ - if (self->dso) - return fprintf(fp, "%-25s", self->dso->name); - - return fprintf(fp, "%016llx ", (__u64)self->ip); -} - -static struct sort_entry sort_dso = { - .header = "Shared Object ", - .cmp = sort__dso_cmp, - .print = sort__dso_print, -}; - -/* --sort symbol */ - -static int64_t -sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) -{ - __u64 ip_l, ip_r; - - if (left->sym == right->sym) - return 0; - - ip_l = left->sym ? left->sym->start : left->ip; - ip_r = right->sym ? right->sym->start : right->ip; - - return (int64_t)(ip_r - ip_l); -} - -static size_t -sort__sym_print(FILE *fp, struct hist_entry *self) -{ - size_t ret = 0; - - if (verbose) - ret += fprintf(fp, "%#018llx ", (__u64)self->ip); - - if (self->sym) { - ret += fprintf(fp, "[%c] %s", - self->dso == kernel_dso ? 'k' : '.', self->sym->name); - } else { - ret += fprintf(fp, "%#016llx", (__u64)self->ip); - } - - return ret; -} - -static struct sort_entry sort_sym = { - .header = "Symbol", - .cmp = sort__sym_cmp, - .print = sort__sym_print, -}; - -static int sort__need_collapse = 0; - -struct sort_dimension { - char *name; - struct sort_entry *entry; - int taken; -}; - -static struct sort_dimension sort_dimensions[] = { - { .name = "pid", .entry = &sort_thread, }, - { .name = "comm", .entry = &sort_comm, }, - { .name = "dso", .entry = &sort_dso, }, - { .name = "symbol", .entry = &sort_sym, }, -}; - -static LIST_HEAD(hist_entry__sort_list); - -static int sort_dimension__add(char *tok) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) { - struct sort_dimension *sd = &sort_dimensions[i]; - - if (sd->taken) - continue; - - if (strncasecmp(tok, sd->name, strlen(tok))) - continue; - - if (sd->entry->collapse) - sort__need_collapse = 1; - - list_add_tail(&sd->entry->list, &hist_entry__sort_list); - sd->taken = 1; - - return 0; - } - - return -ESRCH; -} - -static int64_t -hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) -{ - struct sort_entry *se; - int64_t cmp = 0; - - list_for_each_entry(se, &hist_entry__sort_list, list) { - cmp = se->cmp(left, right); - if (cmp) - break; - } - - return cmp; -} - -static int64_t -hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) -{ - struct sort_entry *se; - int64_t cmp = 0; - - list_for_each_entry(se, &hist_entry__sort_list, list) { - int64_t (*f)(struct hist_entry *, struct hist_entry *); - - f = se->collapse ?: se->cmp; - - cmp = f(left, right); - if (cmp) - break; - } - - return cmp; -} - -static size_t -hist_entry__fprintf(FILE *fp, struct hist_entry *self, __u64 total_samples) -{ - struct sort_entry *se; - size_t ret; - - if (total_samples) { - double percent = self->count * 100.0 / total_samples; - char *color = PERF_COLOR_NORMAL; - - /* - * We color high-overhead entries in red, mid-overhead - * entries in green - and keep the low overhead places - * normal: - */ - if (percent >= 5.0) { - color = PERF_COLOR_RED; - } else { - if (percent >= 0.5) - color = PERF_COLOR_GREEN; - } - - ret = color_fprintf(fp, color, " %6.2f%%", - (self->count * 100.0) / total_samples); - } else - ret = fprintf(fp, "%12Ld ", self->count); - - list_for_each_entry(se, &hist_entry__sort_list, list) { - fprintf(fp, " "); - ret += se->print(fp, self); - } - - ret += fprintf(fp, "\n"); - - return ret; -} - -/* - * collect histogram counts - */ - -static int -hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, - struct symbol *sym, __u64 ip, char level, __u64 count) -{ - struct rb_node **p = &hist.rb_node; - struct rb_node *parent = NULL; - struct hist_entry *he; - struct hist_entry entry = { - .thread = thread, - .map = map, - .dso = dso, - .sym = sym, - .ip = ip, - .level = level, - .count = count, - }; - int cmp; - - while (*p != NULL) { - parent = *p; - he = rb_entry(parent, struct hist_entry, rb_node); - - cmp = hist_entry__cmp(&entry, he); - - if (!cmp) { - he->count += count; - return 0; - } - - if (cmp < 0) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - he = malloc(sizeof(*he)); - if (!he) - return -ENOMEM; - *he = entry; - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, &hist); - - return 0; -} - -static void hist_entry__free(struct hist_entry *he) -{ - free(he); -} - -/* - * collapse the histogram - */ - -static struct rb_root collapse_hists; - -static void collapse__insert_entry(struct hist_entry *he) -{ - struct rb_node **p = &collapse_hists.rb_node; - struct rb_node *parent = NULL; - struct hist_entry *iter; - int64_t cmp; - - while (*p != NULL) { - parent = *p; - iter = rb_entry(parent, struct hist_entry, rb_node); - - cmp = hist_entry__collapse(iter, he); - - if (!cmp) { - iter->count += he->count; - hist_entry__free(he); - return; - } - - if (cmp < 0) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, &collapse_hists); -} - -static void collapse__resort(void) -{ - struct rb_node *next; - struct hist_entry *n; - - if (!sort__need_collapse) - return; - - next = rb_first(&hist); - while (next) { - n = rb_entry(next, struct hist_entry, rb_node); - next = rb_next(&n->rb_node); - - rb_erase(&n->rb_node, &hist); - collapse__insert_entry(n); - } -} - -/* - * reverse the map, sort on count. - */ - -static struct rb_root output_hists; - -static void output__insert_entry(struct hist_entry *he) -{ - struct rb_node **p = &output_hists.rb_node; - struct rb_node *parent = NULL; - struct hist_entry *iter; - - while (*p != NULL) { - parent = *p; - iter = rb_entry(parent, struct hist_entry, rb_node); - - if (he->count > iter->count) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - rb_link_node(&he->rb_node, parent, p); - rb_insert_color(&he->rb_node, &output_hists); -} - -static void output__resort(void) -{ - struct rb_node *next; - struct hist_entry *n; - struct rb_root *tree = &hist; - - if (sort__need_collapse) - tree = &collapse_hists; - - next = rb_first(tree); - - while (next) { - n = rb_entry(next, struct hist_entry, rb_node); - next = rb_next(&n->rb_node); - - rb_erase(&n->rb_node, tree); - output__insert_entry(n); - } -} - -static size_t output__fprintf(FILE *fp, __u64 total_samples) -{ - struct hist_entry *pos; - struct sort_entry *se; - struct rb_node *nd; - size_t ret = 0; - - fprintf(fp, "\n"); - fprintf(fp, "#\n"); - fprintf(fp, "# (%Ld samples)\n", (__u64)total_samples); - fprintf(fp, "#\n"); - - fprintf(fp, "# Overhead"); - list_for_each_entry(se, &hist_entry__sort_list, list) - fprintf(fp, " %s", se->header); - fprintf(fp, "\n"); - - fprintf(fp, "# ........"); - list_for_each_entry(se, &hist_entry__sort_list, list) { - int i; - - fprintf(fp, " "); - for (i = 0; i < strlen(se->header); i++) - fprintf(fp, "."); - } - fprintf(fp, "\n"); - - fprintf(fp, "#\n"); - - for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) { - pos = rb_entry(nd, struct hist_entry, rb_node); - ret += hist_entry__fprintf(fp, pos, total_samples); - } - - if (!strcmp(sort_order, default_sort_order)) { - fprintf(fp, "#\n"); - fprintf(fp, "# (For more details, try: perf report --sort comm,dso,symbol)\n"); - fprintf(fp, "#\n"); - } - fprintf(fp, "\n"); - - return ret; -} - -static void register_idle_thread(void) -{ - struct thread *thread = threads__findnew(0); - - if (thread == NULL || - thread__set_comm(thread, "[idle]")) { - fprintf(stderr, "problem inserting idle task.\n"); - exit(-1); - } -} - -static unsigned long total = 0, - total_mmap = 0, - total_comm = 0, - total_fork = 0, - total_unknown = 0; - -static int -process_overflow_event(event_t *event, unsigned long offset, unsigned long head) -{ - char level; - int show = 0; - struct dso *dso = NULL; - struct thread *thread = threads__findnew(event->ip.pid); - __u64 ip = event->ip.ip; - __u64 period = 1; - struct map *map = NULL; - - if (event->header.type & PERF_SAMPLE_PERIOD) - period = event->ip.period; - - dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->header.misc, - event->ip.pid, - (void *)(long)ip, - (long long)period); - - dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid); - - if (thread == NULL) { - fprintf(stderr, "problem processing %d event, skipping it.\n", - event->header.type); - return -1; - } - - if (event->header.misc & PERF_EVENT_MISC_KERNEL) { - show = SHOW_KERNEL; - level = 'k'; - - dso = kernel_dso; - - dprintf(" ...... dso: %s\n", dso->name); - - } else if (event->header.misc & PERF_EVENT_MISC_USER) { - - show = SHOW_USER; - level = '.'; - - map = thread__find_map(thread, ip); - if (map != NULL) { - ip = map->map_ip(map, ip); - dso = map->dso; - } else { - /* - * If this is outside of all known maps, - * and is a negative address, try to look it - * up in the kernel dso, as it might be a - * vsyscall (which executes in user-mode): - */ - if ((long long)ip < 0) - dso = kernel_dso; - } - dprintf(" ...... dso: %s\n", dso ? dso->name : ""); - - } else { - show = SHOW_HV; - level = 'H'; - dprintf(" ...... dso: [hypervisor]\n"); - } - - if (show & show_mask) { - struct symbol *sym = NULL; - - if (dso) - sym = dso->find_symbol(dso, ip); - - if (hist_entry__add(thread, map, dso, sym, ip, level, period)) { - fprintf(stderr, - "problem incrementing symbol count, skipping event\n"); - return -1; - } - } - total += period; - - return 0; -} - -static int -process_mmap_event(event_t *event, unsigned long offset, unsigned long head) -{ - struct thread *thread = threads__findnew(event->mmap.pid); - struct map *map = map__new(&event->mmap); - - dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->mmap.pid, - (void *)(long)event->mmap.start, - (void *)(long)event->mmap.len, - (void *)(long)event->mmap.pgoff, - event->mmap.filename); - - if (thread == NULL || map == NULL) { - dprintf("problem processing PERF_EVENT_MMAP, skipping event.\n"); - return 0; - } - - thread__insert_map(thread, map); - total_mmap++; - - return 0; -} - -static int -process_comm_event(event_t *event, unsigned long offset, unsigned long head) -{ - struct thread *thread = threads__findnew(event->comm.pid); - - dprintf("%p [%p]: PERF_EVENT_COMM: %s:%d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->comm.comm, event->comm.pid); - - if (thread == NULL || - thread__set_comm(thread, event->comm.comm)) { - dprintf("problem processing PERF_EVENT_COMM, skipping event.\n"); - return -1; - } - total_comm++; - - return 0; -} - -static int -process_fork_event(event_t *event, unsigned long offset, unsigned long head) -{ - struct thread *thread = threads__findnew(event->fork.pid); - struct thread *parent = threads__findnew(event->fork.ppid); - - dprintf("%p [%p]: PERF_EVENT_FORK: %d:%d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->fork.pid, event->fork.ppid); - - if (!thread || !parent || thread__fork(thread, parent)) { - dprintf("problem processing PERF_EVENT_FORK, skipping event.\n"); - return -1; - } - total_fork++; - - return 0; -} - -static int -process_period_event(event_t *event, unsigned long offset, unsigned long head) -{ - dprintf("%p [%p]: PERF_EVENT_PERIOD: time:%Ld, id:%Ld: period:%Ld\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->period.time, - event->period.id, - event->period.sample_period); - - return 0; -} - -static int -process_event(event_t *event, unsigned long offset, unsigned long head) -{ - if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) - return process_overflow_event(event, offset, head); - - switch (event->header.type) { - case PERF_EVENT_MMAP: - return process_mmap_event(event, offset, head); - - case PERF_EVENT_COMM: - return process_comm_event(event, offset, head); - - case PERF_EVENT_FORK: - return process_fork_event(event, offset, head); - - case PERF_EVENT_PERIOD: - return process_period_event(event, offset, head); - /* - * We dont process them right now but they are fine: - */ - - case PERF_EVENT_THROTTLE: - case PERF_EVENT_UNTHROTTLE: - return 0; - - default: - return -1; - } - - return 0; -} - -static int __cmd_report(void) -{ - int ret, rc = EXIT_FAILURE; - unsigned long offset = 0; - unsigned long head = 0; - struct stat stat; - event_t *event; - uint32_t size; - char *buf; - - register_idle_thread(); - - input = open(input_name, O_RDONLY); - if (input < 0) { - fprintf(stderr, " failed to open file: %s", input_name); - if (!strcmp(input_name, "perf.data")) - fprintf(stderr, " (try 'perf record' first)"); - fprintf(stderr, "\n"); - exit(-1); - } - - ret = fstat(input, &stat); - if (ret < 0) { - perror("failed to stat file"); - exit(-1); - } - - if (!stat.st_size) { - fprintf(stderr, "zero-sized file, nothing to do!\n"); - exit(0); - } - - if (load_kernel() < 0) { - perror("failed to load kernel symbols"); - return EXIT_FAILURE; - } - - if (!full_paths) { - if (getcwd(__cwd, sizeof(__cwd)) == NULL) { - perror("failed to get the current directory"); - return EXIT_FAILURE; - } - cwdlen = strlen(cwd); - } else { - cwd = NULL; - cwdlen = 0; - } -remap: - buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, - MAP_SHARED, input, offset); - if (buf == MAP_FAILED) { - perror("failed to mmap file"); - exit(-1); - } - -more: - event = (event_t *)(buf + head); - - size = event->header.size; - if (!size) - size = 8; - - if (head + event->header.size >= page_size * mmap_window) { - unsigned long shift = page_size * (head / page_size); - int ret; - - ret = munmap(buf, page_size * mmap_window); - assert(ret == 0); - - offset += shift; - head -= shift; - goto remap; - } - - size = event->header.size; - - dprintf("%p [%p]: event: %d\n", - (void *)(offset + head), - (void *)(long)event->header.size, - event->header.type); - - if (!size || process_event(event, offset, head) < 0) { - - dprintf("%p [%p]: skipping unknown header type: %d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->header.type); - - total_unknown++; - - /* - * assume we lost track of the stream, check alignment, and - * increment a single u64 in the hope to catch on again 'soon'. - */ - - if (unlikely(head & 7)) - head &= ~7ULL; - - size = 8; - } - - head += size; - - if (offset + head < stat.st_size) - goto more; - - rc = EXIT_SUCCESS; - close(input); - - dprintf(" IP events: %10ld\n", total); - dprintf(" mmap events: %10ld\n", total_mmap); - dprintf(" comm events: %10ld\n", total_comm); - dprintf(" fork events: %10ld\n", total_fork); - dprintf(" unknown events: %10ld\n", total_unknown); - - if (dump_trace) - return 0; - - if (verbose >= 3) - threads__fprintf(stdout); - - if (verbose >= 2) - dsos__fprintf(stdout); - - collapse__resort(); - output__resort(); - output__fprintf(stdout, total); - - return rc; -} - -static const char * const report_usage[] = { - "perf report [] ", - NULL -}; - -static const struct option options[] = { - OPT_STRING('i', "input", &input_name, "file", - "input file name"), - OPT_BOOLEAN('v', "verbose", &verbose, - "be more verbose (show symbol address, etc)"), - OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, - "dump raw trace in ASCII"), - OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), - OPT_STRING('s', "sort", &sort_order, "key[,key2...]", - "sort by key(s): pid, comm, dso, symbol. Default: pid,symbol"), - OPT_BOOLEAN('P', "full-paths", &full_paths, - "Don't shorten the pathnames taking into account the cwd"), - OPT_END() -}; - -static void setup_sorting(void) -{ - char *tmp, *tok, *str = strdup(sort_order); - - for (tok = strtok_r(str, ", ", &tmp); - tok; tok = strtok_r(NULL, ", ", &tmp)) { - if (sort_dimension__add(tok) < 0) { - error("Unknown --sort key: `%s'", tok); - usage_with_options(report_usage, options); - } - } - - free(str); -} - -int cmd_report(int argc, const char **argv, const char *prefix) -{ - symbol__init(); - - page_size = getpagesize(); - - argc = parse_options(argc, argv, options, report_usage, 0); - - setup_sorting(); - - /* - * Any (unrecognized) arguments left? - */ - if (argc) - usage_with_options(report_usage, options); - - setup_pager(); - - return __cmd_report(); -} diff --git a/trunk/tools/perf/builtin-stat.c b/trunk/tools/perf/builtin-stat.c deleted file mode 100644 index c43e4a97dc42..000000000000 --- a/trunk/tools/perf/builtin-stat.c +++ /dev/null @@ -1,367 +0,0 @@ -/* - * builtin-stat.c - * - * Builtin stat command: Give a precise performance counters summary - * overview about any workload, CPU or specific PID. - * - * Sample output: - - $ perf stat ~/hackbench 10 - Time: 0.104 - - Performance counter stats for '/home/mingo/hackbench': - - 1255.538611 task clock ticks # 10.143 CPU utilization factor - 54011 context switches # 0.043 M/sec - 385 CPU migrations # 0.000 M/sec - 17755 pagefaults # 0.014 M/sec - 3808323185 CPU cycles # 3033.219 M/sec - 1575111190 instructions # 1254.530 M/sec - 17367895 cache references # 13.833 M/sec - 7674421 cache misses # 6.112 M/sec - - Wall-clock time elapsed: 123.786620 msecs - - * - * Copyright (C) 2008, Red Hat Inc, Ingo Molnar - * - * Improvements and fixes by: - * - * Arjan van de Ven - * Yanmin Zhang - * Wu Fengguang - * Mike Galbraith - * Paul Mackerras - * - * Released under the GPL v2. (and only v2, not any later version) - */ - -#include "perf.h" -#include "builtin.h" -#include "util/util.h" -#include "util/parse-options.h" -#include "util/parse-events.h" - -#include - -static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { - - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, - - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES}, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, - -}; - -static int system_wide = 0; -static int inherit = 1; -static int verbose = 0; - -static int fd[MAX_NR_CPUS][MAX_COUNTERS]; - -static int target_pid = -1; -static int nr_cpus = 0; -static unsigned int page_size; - -static int scale = 1; - -static const unsigned int default_count[] = { - 1000000, - 1000000, - 10000, - 10000, - 1000000, - 10000, -}; - -static __u64 event_res[MAX_COUNTERS][3]; -static __u64 event_scaled[MAX_COUNTERS]; - -static __u64 runtime_nsecs; -static __u64 walltime_nsecs; -static __u64 runtime_cycles; - -static void create_perf_stat_counter(int counter) -{ - struct perf_counter_attr *attr = attrs + counter; - - if (scale) - attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | - PERF_FORMAT_TOTAL_TIME_RUNNING; - - if (system_wide) { - int cpu; - for (cpu = 0; cpu < nr_cpus; cpu ++) { - fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); - if (fd[cpu][counter] < 0 && verbose) { - printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[cpu][counter], strerror(errno)); - } - } - } else { - attr->inherit = inherit; - attr->disabled = 1; - - fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0); - if (fd[0][counter] < 0 && verbose) { - printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[0][counter], strerror(errno)); - } - } -} - -/* - * Does the counter have nsecs as a unit? - */ -static inline int nsec_counter(int counter) -{ - if (attrs[counter].type != PERF_TYPE_SOFTWARE) - return 0; - - if (attrs[counter].config == PERF_COUNT_SW_CPU_CLOCK) - return 1; - - if (attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) - return 1; - - return 0; -} - -/* - * Read out the results of a single counter: - */ -static void read_counter(int counter) -{ - __u64 *count, single_count[3]; - ssize_t res; - int cpu, nv; - int scaled; - - count = event_res[counter]; - - count[0] = count[1] = count[2] = 0; - - nv = scale ? 3 : 1; - for (cpu = 0; cpu < nr_cpus; cpu ++) { - if (fd[cpu][counter] < 0) - continue; - - res = read(fd[cpu][counter], single_count, nv * sizeof(__u64)); - assert(res == nv * sizeof(__u64)); - - count[0] += single_count[0]; - if (scale) { - count[1] += single_count[1]; - count[2] += single_count[2]; - } - } - - scaled = 0; - if (scale) { - if (count[2] == 0) { - event_scaled[counter] = -1; - count[0] = 0; - return; - } - - if (count[2] < count[1]) { - event_scaled[counter] = 1; - count[0] = (unsigned long long) - ((double)count[0] * count[1] / count[2] + 0.5); - } - } - /* - * Save the full runtime - to allow normalization during printout: - */ - if (attrs[counter].type == PERF_TYPE_SOFTWARE && - attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) - runtime_nsecs = count[0]; - if (attrs[counter].type == PERF_TYPE_HARDWARE && - attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES) - runtime_cycles = count[0]; -} - -/* - * Print out the results of a single counter: - */ -static void print_counter(int counter) -{ - __u64 *count; - int scaled; - - count = event_res[counter]; - scaled = event_scaled[counter]; - - if (scaled == -1) { - fprintf(stderr, " %14s %-20s\n", - "", event_name(counter)); - return; - } - - if (nsec_counter(counter)) { - double msecs = (double)count[0] / 1000000; - - fprintf(stderr, " %14.6f %-20s", - msecs, event_name(counter)); - if (attrs[counter].type == PERF_TYPE_SOFTWARE && - attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { - - if (walltime_nsecs) - fprintf(stderr, " # %11.3f CPU utilization factor", - (double)count[0] / (double)walltime_nsecs); - } - } else { - fprintf(stderr, " %14Ld %-20s", - count[0], event_name(counter)); - if (runtime_nsecs) - fprintf(stderr, " # %11.3f M/sec", - (double)count[0]/runtime_nsecs*1000.0); - if (runtime_cycles && - attrs[counter].type == PERF_TYPE_HARDWARE && - attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { - - fprintf(stderr, " # %1.3f per cycle", - (double)count[0] / (double)runtime_cycles); - } - } - if (scaled) - fprintf(stderr, " (scaled from %.2f%%)", - (double) count[2] / count[1] * 100); - fprintf(stderr, "\n"); -} - -static int do_perf_stat(int argc, const char **argv) -{ - unsigned long long t0, t1; - int counter; - int status; - int pid; - int i; - - if (!system_wide) - nr_cpus = 1; - - for (counter = 0; counter < nr_counters; counter++) - create_perf_stat_counter(counter); - - /* - * Enable counters and exec the command: - */ - t0 = rdclock(); - prctl(PR_TASK_PERF_COUNTERS_ENABLE); - - if ((pid = fork()) < 0) - perror("failed to fork"); - - if (!pid) { - if (execvp(argv[0], (char **)argv)) { - perror(argv[0]); - exit(-1); - } - } - - while (wait(&status) >= 0) - ; - - prctl(PR_TASK_PERF_COUNTERS_DISABLE); - t1 = rdclock(); - - walltime_nsecs = t1 - t0; - - fflush(stdout); - - fprintf(stderr, "\n"); - fprintf(stderr, " Performance counter stats for \'%s", argv[0]); - - for (i = 1; i < argc; i++) - fprintf(stderr, " %s", argv[i]); - - fprintf(stderr, "\':\n"); - fprintf(stderr, "\n"); - - for (counter = 0; counter < nr_counters; counter++) - read_counter(counter); - - for (counter = 0; counter < nr_counters; counter++) - print_counter(counter); - - - fprintf(stderr, "\n"); - fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", - (double)(t1-t0)/1e6); - fprintf(stderr, "\n"); - - return 0; -} - -static volatile int signr = -1; - -static void skip_signal(int signo) -{ - signr = signo; -} - -static void sig_atexit(void) -{ - if (signr == -1) - return; - - signal(signr, SIG_DFL); - kill(getpid(), signr); -} - -static const char * const stat_usage[] = { - "perf stat [] ", - NULL -}; - -static const struct option options[] = { - OPT_CALLBACK('e', "event", NULL, "event", - "event selector. use 'perf list' to list available events", - parse_events), - OPT_BOOLEAN('i', "inherit", &inherit, - "child tasks inherit counters"), - OPT_INTEGER('p', "pid", &target_pid, - "stat events on existing pid"), - OPT_BOOLEAN('a', "all-cpus", &system_wide, - "system-wide collection from all CPUs"), - OPT_BOOLEAN('S', "scale", &scale, - "scale/normalize counters"), - OPT_BOOLEAN('v', "verbose", &verbose, - "be more verbose (show counter open errors, etc)"), - OPT_END() -}; - -int cmd_stat(int argc, const char **argv, const char *prefix) -{ - page_size = sysconf(_SC_PAGE_SIZE); - - memcpy(attrs, default_attrs, sizeof(attrs)); - - argc = parse_options(argc, argv, options, stat_usage, 0); - if (!argc) - usage_with_options(stat_usage, options); - - if (!nr_counters) - nr_counters = 8; - - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - assert(nr_cpus <= MAX_NR_CPUS); - assert(nr_cpus >= 0); - - /* - * We dont want to block the signals - that would cause - * child tasks to inherit that and Ctrl-C would not work. - * What we want is for Ctrl-C to work in the exec()-ed - * task, but being ignored by perf stat itself: - */ - atexit(sig_atexit); - signal(SIGINT, skip_signal); - signal(SIGALRM, skip_signal); - signal(SIGABRT, skip_signal); - - return do_perf_stat(argc, argv); -} diff --git a/trunk/tools/perf/builtin-top.c b/trunk/tools/perf/builtin-top.c deleted file mode 100644 index fe338d3c5d7e..000000000000 --- a/trunk/tools/perf/builtin-top.c +++ /dev/null @@ -1,736 +0,0 @@ -/* - * builtin-top.c - * - * Builtin top command: Display a continuously updated profile of - * any workload, CPU or specific PID. - * - * Copyright (C) 2008, Red Hat Inc, Ingo Molnar - * - * Improvements and fixes by: - * - * Arjan van de Ven - * Yanmin Zhang - * Wu Fengguang - * Mike Galbraith - * Paul Mackerras - * - * Released under the GPL v2. (and only v2, not any later version) - */ -#include "builtin.h" - -#include "perf.h" - -#include "util/symbol.h" -#include "util/color.h" -#include "util/util.h" -#include "util/rbtree.h" -#include "util/parse-options.h" -#include "util/parse-events.h" - -#include -#include - -#include - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -static int fd[MAX_NR_CPUS][MAX_COUNTERS]; - -static int system_wide = 0; - -static int default_interval = 100000; - -static __u64 count_filter = 5; -static int print_entries = 15; - -static int target_pid = -1; -static int profile_cpu = -1; -static int nr_cpus = 0; -static unsigned int realtime_prio = 0; -static int group = 0; -static unsigned int page_size; -static unsigned int mmap_pages = 16; -static int freq = 0; -static int verbose = 0; - -static char *sym_filter; -static unsigned long filter_start; -static unsigned long filter_end; - -static int delay_secs = 2; -static int zero; -static int dump_symtab; - -/* - * Symbols - */ - -static __u64 min_ip; -static __u64 max_ip = -1ll; - -struct sym_entry { - struct rb_node rb_node; - struct list_head node; - unsigned long count[MAX_COUNTERS]; - unsigned long snap_count; - double weight; - int skip; -}; - -struct sym_entry *sym_filter_entry; - -struct dso *kernel_dso; - -/* - * Symbols will be added here in record_ip and will get out - * after decayed. - */ -static LIST_HEAD(active_symbols); -static pthread_mutex_t active_symbols_lock = PTHREAD_MUTEX_INITIALIZER; - -/* - * Ordering weight: count-1 * count-2 * ... / count-n - */ -static double sym_weight(const struct sym_entry *sym) -{ - double weight = sym->snap_count; - int counter; - - for (counter = 1; counter < nr_counters-1; counter++) - weight *= sym->count[counter]; - - weight /= (sym->count[counter] + 1); - - return weight; -} - -static long samples; -static long userspace_samples; -static const char CONSOLE_CLEAR[] = ""; - -static void __list_insert_active_sym(struct sym_entry *syme) -{ - list_add(&syme->node, &active_symbols); -} - -static void list_remove_active_sym(struct sym_entry *syme) -{ - pthread_mutex_lock(&active_symbols_lock); - list_del_init(&syme->node); - pthread_mutex_unlock(&active_symbols_lock); -} - -static void rb_insert_active_sym(struct rb_root *tree, struct sym_entry *se) -{ - struct rb_node **p = &tree->rb_node; - struct rb_node *parent = NULL; - struct sym_entry *iter; - - while (*p != NULL) { - parent = *p; - iter = rb_entry(parent, struct sym_entry, rb_node); - - if (se->weight > iter->weight) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - rb_link_node(&se->rb_node, parent, p); - rb_insert_color(&se->rb_node, tree); -} - -static void print_sym_table(void) -{ - int printed = 0, j; - int counter; - float samples_per_sec = samples/delay_secs; - float ksamples_per_sec = (samples-userspace_samples)/delay_secs; - float sum_ksamples = 0.0; - struct sym_entry *syme, *n; - struct rb_root tmp = RB_ROOT; - struct rb_node *nd; - - samples = userspace_samples = 0; - - /* Sort the active symbols */ - pthread_mutex_lock(&active_symbols_lock); - syme = list_entry(active_symbols.next, struct sym_entry, node); - pthread_mutex_unlock(&active_symbols_lock); - - list_for_each_entry_safe_from(syme, n, &active_symbols, node) { - syme->snap_count = syme->count[0]; - if (syme->snap_count != 0) { - syme->weight = sym_weight(syme); - rb_insert_active_sym(&tmp, syme); - sum_ksamples += syme->snap_count; - - for (j = 0; j < nr_counters; j++) - syme->count[j] = zero ? 0 : syme->count[j] * 7 / 8; - } else - list_remove_active_sym(syme); - } - - puts(CONSOLE_CLEAR); - - printf( -"------------------------------------------------------------------------------\n"); - printf( " PerfTop:%8.0f irqs/sec kernel:%4.1f%% [", - samples_per_sec, - 100.0 - (100.0*((samples_per_sec-ksamples_per_sec)/samples_per_sec))); - - if (nr_counters == 1) { - printf("%Ld", attrs[0].sample_period); - if (freq) - printf("Hz "); - else - printf(" "); - } - - for (counter = 0; counter < nr_counters; counter++) { - if (counter) - printf("/"); - - printf("%s", event_name(counter)); - } - - printf( "], "); - - if (target_pid != -1) - printf(" (target_pid: %d", target_pid); - else - printf(" (all"); - - if (profile_cpu != -1) - printf(", cpu: %d)\n", profile_cpu); - else { - if (target_pid != -1) - printf(")\n"); - else - printf(", %d CPUs)\n", nr_cpus); - } - - printf("------------------------------------------------------------------------------\n\n"); - - if (nr_counters == 1) - printf(" samples pcnt"); - else - printf(" weight samples pcnt"); - - printf(" RIP kernel function\n" - " ______ _______ _____ ________________ _______________\n\n" - ); - - for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { - struct sym_entry *syme = rb_entry(nd, struct sym_entry, rb_node); - struct symbol *sym = (struct symbol *)(syme + 1); - char *color = PERF_COLOR_NORMAL; - double pcnt; - - if (++printed > print_entries || syme->snap_count < count_filter) - continue; - - pcnt = 100.0 - (100.0 * ((sum_ksamples - syme->snap_count) / - sum_ksamples)); - - /* - * We color high-overhead entries in red, mid-overhead - * entries in green - and keep the low overhead places - * normal: - */ - if (pcnt >= 5.0) { - color = PERF_COLOR_RED; - } else { - if (pcnt >= 0.5) - color = PERF_COLOR_GREEN; - } - - if (nr_counters == 1) - printf("%20.2f - ", syme->weight); - else - printf("%9.1f %10ld - ", syme->weight, syme->snap_count); - - color_fprintf(stdout, color, "%4.1f%%", pcnt); - printf(" - %016llx : %s\n", sym->start, sym->name); - } -} - -static void *display_thread(void *arg) -{ - struct pollfd stdin_poll = { .fd = 0, .events = POLLIN }; - int delay_msecs = delay_secs * 1000; - - printf("PerfTop refresh period: %d seconds\n", delay_secs); - - do { - print_sym_table(); - } while (!poll(&stdin_poll, 1, delay_msecs) == 1); - - printf("key pressed - exiting.\n"); - exit(0); - - return NULL; -} - -static int symbol_filter(struct dso *self, struct symbol *sym) -{ - static int filter_match; - struct sym_entry *syme; - const char *name = sym->name; - - if (!strcmp(name, "_text") || - !strcmp(name, "_etext") || - !strcmp(name, "_sinittext") || - !strncmp("init_module", name, 11) || - !strncmp("cleanup_module", name, 14) || - strstr(name, "_text_start") || - strstr(name, "_text_end")) - return 1; - - syme = dso__sym_priv(self, sym); - /* Tag samples to be skipped. */ - if (!strcmp("default_idle", name) || - !strcmp("cpu_idle", name) || - !strcmp("enter_idle", name) || - !strcmp("exit_idle", name) || - !strcmp("mwait_idle", name)) - syme->skip = 1; - - if (filter_match == 1) { - filter_end = sym->start; - filter_match = -1; - if (filter_end - filter_start > 10000) { - fprintf(stderr, - "hm, too large filter symbol <%s> - skipping.\n", - sym_filter); - fprintf(stderr, "symbol filter start: %016lx\n", - filter_start); - fprintf(stderr, " end: %016lx\n", - filter_end); - filter_end = filter_start = 0; - sym_filter = NULL; - sleep(1); - } - } - - if (filter_match == 0 && sym_filter && !strcmp(name, sym_filter)) { - filter_match = 1; - filter_start = sym->start; - } - - - return 0; -} - -static int parse_symbols(void) -{ - struct rb_node *node; - struct symbol *sym; - - kernel_dso = dso__new("[kernel]", sizeof(struct sym_entry)); - if (kernel_dso == NULL) - return -1; - - if (dso__load_kernel(kernel_dso, NULL, symbol_filter, 1) != 0) - goto out_delete_dso; - - node = rb_first(&kernel_dso->syms); - sym = rb_entry(node, struct symbol, rb_node); - min_ip = sym->start; - - node = rb_last(&kernel_dso->syms); - sym = rb_entry(node, struct symbol, rb_node); - max_ip = sym->end; - - if (dump_symtab) - dso__fprintf(kernel_dso, stderr); - - return 0; - -out_delete_dso: - dso__delete(kernel_dso); - kernel_dso = NULL; - return -1; -} - -#define TRACE_COUNT 3 - -/* - * Binary search in the histogram table and record the hit: - */ -static void record_ip(__u64 ip, int counter) -{ - struct symbol *sym = dso__find_symbol(kernel_dso, ip); - - if (sym != NULL) { - struct sym_entry *syme = dso__sym_priv(kernel_dso, sym); - - if (!syme->skip) { - syme->count[counter]++; - pthread_mutex_lock(&active_symbols_lock); - if (list_empty(&syme->node) || !syme->node.next) - __list_insert_active_sym(syme); - pthread_mutex_unlock(&active_symbols_lock); - return; - } - } - - samples--; -} - -static void process_event(__u64 ip, int counter) -{ - samples++; - - if (ip < min_ip || ip > max_ip) { - userspace_samples++; - return; - } - - record_ip(ip, counter); -} - -struct mmap_data { - int counter; - void *base; - unsigned int mask; - unsigned int prev; -}; - -static unsigned int mmap_read_head(struct mmap_data *md) -{ - struct perf_counter_mmap_page *pc = md->base; - int head; - - head = pc->data_head; - rmb(); - - return head; -} - -struct timeval last_read, this_read; - -static void mmap_read_counter(struct mmap_data *md) -{ - unsigned int head = mmap_read_head(md); - unsigned int old = md->prev; - unsigned char *data = md->base + page_size; - int diff; - - gettimeofday(&this_read, NULL); - - /* - * If we're further behind than half the buffer, there's a chance - * the writer will bite our tail and mess up the samples under us. - * - * If we somehow ended up ahead of the head, we got messed up. - * - * In either case, truncate and restart at head. - */ - diff = head - old; - if (diff > md->mask / 2 || diff < 0) { - struct timeval iv; - unsigned long msecs; - - timersub(&this_read, &last_read, &iv); - msecs = iv.tv_sec*1000 + iv.tv_usec/1000; - - fprintf(stderr, "WARNING: failed to keep up with mmap data." - " Last read %lu msecs ago.\n", msecs); - - /* - * head points to a known good entry, start there. - */ - old = head; - } - - last_read = this_read; - - for (; old != head;) { - struct ip_event { - struct perf_event_header header; - __u64 ip; - __u32 pid, target_pid; - }; - struct mmap_event { - struct perf_event_header header; - __u32 pid, target_pid; - __u64 start; - __u64 len; - __u64 pgoff; - char filename[PATH_MAX]; - }; - - typedef union event_union { - struct perf_event_header header; - struct ip_event ip; - struct mmap_event mmap; - } event_t; - - event_t *event = (event_t *)&data[old & md->mask]; - - event_t event_copy; - - size_t size = event->header.size; - - /* - * Event straddles the mmap boundary -- header should always - * be inside due to u64 alignment of output. - */ - if ((old & md->mask) + size != ((old + size) & md->mask)) { - unsigned int offset = old; - unsigned int len = min(sizeof(*event), size), cpy; - void *dst = &event_copy; - - do { - cpy = min(md->mask + 1 - (offset & md->mask), len); - memcpy(dst, &data[offset & md->mask], cpy); - offset += cpy; - dst += cpy; - len -= cpy; - } while (len); - - event = &event_copy; - } - - old += size; - - if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) { - if (event->header.type & PERF_SAMPLE_IP) - process_event(event->ip.ip, md->counter); - } - } - - md->prev = old; -} - -static struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS]; -static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; - -static void mmap_read(void) -{ - int i, counter; - - for (i = 0; i < nr_cpus; i++) { - for (counter = 0; counter < nr_counters; counter++) - mmap_read_counter(&mmap_array[i][counter]); - } -} - -int nr_poll; -int group_fd; - -static void start_counter(int i, int counter) -{ - struct perf_counter_attr *attr; - unsigned int cpu; - - cpu = profile_cpu; - if (target_pid == -1 && profile_cpu == -1) - cpu = i; - - attr = attrs + counter; - - attr->sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_TID; - attr->freq = freq; - -try_again: - fd[i][counter] = sys_perf_counter_open(attr, target_pid, cpu, group_fd, 0); - - if (fd[i][counter] < 0) { - int err = errno; - - if (err == EPERM) - die("No permission - are you root?\n"); - /* - * If it's cycles then fall back to hrtimer - * based cpu-clock-tick sw counter, which - * is always available even if no PMU support: - */ - if (attr->type == PERF_TYPE_HARDWARE - && attr->config == PERF_COUNT_HW_CPU_CYCLES) { - - if (verbose) - warning(" ... trying to fall back to cpu-clock-ticks\n"); - - attr->type = PERF_TYPE_SOFTWARE; - attr->config = PERF_COUNT_SW_CPU_CLOCK; - goto try_again; - } - printf("\n"); - error("perfcounter syscall returned with %d (%s)\n", - fd[i][counter], strerror(err)); - die("No CONFIG_PERF_COUNTERS=y kernel support configured?\n"); - exit(-1); - } - assert(fd[i][counter] >= 0); - fcntl(fd[i][counter], F_SETFL, O_NONBLOCK); - - /* - * First counter acts as the group leader: - */ - if (group && group_fd == -1) - group_fd = fd[i][counter]; - - event_array[nr_poll].fd = fd[i][counter]; - event_array[nr_poll].events = POLLIN; - nr_poll++; - - mmap_array[i][counter].counter = counter; - mmap_array[i][counter].prev = 0; - mmap_array[i][counter].mask = mmap_pages*page_size - 1; - mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size, - PROT_READ, MAP_SHARED, fd[i][counter], 0); - if (mmap_array[i][counter].base == MAP_FAILED) - die("failed to mmap with %d (%s)\n", errno, strerror(errno)); -} - -static int __cmd_top(void) -{ - pthread_t thread; - int i, counter; - int ret; - - for (i = 0; i < nr_cpus; i++) { - group_fd = -1; - for (counter = 0; counter < nr_counters; counter++) - start_counter(i, counter); - } - - /* Wait for a minimal set of events before starting the snapshot */ - poll(event_array, nr_poll, 100); - - mmap_read(); - - if (pthread_create(&thread, NULL, display_thread, NULL)) { - printf("Could not create display thread.\n"); - exit(-1); - } - - if (realtime_prio) { - struct sched_param param; - - param.sched_priority = realtime_prio; - if (sched_setscheduler(0, SCHED_FIFO, ¶m)) { - printf("Could not set realtime priority.\n"); - exit(-1); - } - } - - while (1) { - int hits = samples; - - mmap_read(); - - if (hits == samples) - ret = poll(event_array, nr_poll, 100); - } - - return 0; -} - -static const char * const top_usage[] = { - "perf top []", - NULL -}; - -static const struct option options[] = { - OPT_CALLBACK('e', "event", NULL, "event", - "event selector. use 'perf list' to list available events", - parse_events), - OPT_INTEGER('c', "count", &default_interval, - "event period to sample"), - OPT_INTEGER('p', "pid", &target_pid, - "profile events on existing pid"), - OPT_BOOLEAN('a', "all-cpus", &system_wide, - "system-wide collection from all CPUs"), - OPT_INTEGER('C', "CPU", &profile_cpu, - "CPU to profile on"), - OPT_INTEGER('m', "mmap-pages", &mmap_pages, - "number of mmap data pages"), - OPT_INTEGER('r', "realtime", &realtime_prio, - "collect data with this RT SCHED_FIFO priority"), - OPT_INTEGER('d', "delay", &delay_secs, - "number of seconds to delay between refreshes"), - OPT_BOOLEAN('D', "dump-symtab", &dump_symtab, - "dump the symbol table used for profiling"), - OPT_INTEGER('f', "count-filter", &count_filter, - "only display functions with more events than this"), - OPT_BOOLEAN('g', "group", &group, - "put the counters into a counter group"), - OPT_STRING('s', "sym-filter", &sym_filter, "pattern", - "only display symbols matchig this pattern"), - OPT_BOOLEAN('z', "zero", &group, - "zero history across updates"), - OPT_INTEGER('F', "freq", &freq, - "profile at this frequency"), - OPT_INTEGER('E', "entries", &print_entries, - "display this many functions"), - OPT_BOOLEAN('v', "verbose", &verbose, - "be more verbose (show counter open errors, etc)"), - OPT_END() -}; - -int cmd_top(int argc, const char **argv, const char *prefix) -{ - int counter; - - page_size = sysconf(_SC_PAGE_SIZE); - - argc = parse_options(argc, argv, options, top_usage, 0); - if (argc) - usage_with_options(top_usage, options); - - if (freq) { - default_interval = freq; - freq = 1; - } - - /* CPU and PID are mutually exclusive */ - if (target_pid != -1 && profile_cpu != -1) { - printf("WARNING: PID switch overriding CPU\n"); - sleep(1); - profile_cpu = -1; - } - - if (!nr_counters) - nr_counters = 1; - - if (delay_secs < 1) - delay_secs = 1; - - parse_symbols(); - - /* - * Fill in the ones not specifically initialized via -c: - */ - for (counter = 0; counter < nr_counters; counter++) { - if (attrs[counter].sample_period) - continue; - - attrs[counter].sample_period = default_interval; - } - - nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); - assert(nr_cpus <= MAX_NR_CPUS); - assert(nr_cpus >= 0); - - if (target_pid != -1 || profile_cpu != -1) - nr_cpus = 1; - - return __cmd_top(); -} diff --git a/trunk/tools/perf/builtin.h b/trunk/tools/perf/builtin.h deleted file mode 100644 index 51d168230ee7..000000000000 --- a/trunk/tools/perf/builtin.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef BUILTIN_H -#define BUILTIN_H - -#include "util/util.h" -#include "util/strbuf.h" - -extern const char perf_version_string[]; -extern const char perf_usage_string[]; -extern const char perf_more_info_string[]; - -extern void list_common_cmds_help(void); -extern const char *help_unknown_cmd(const char *cmd); -extern void prune_packed_objects(int); -extern int read_line_with_nul(char *buf, int size, FILE *file); -extern int check_pager_config(const char *cmd); - -extern int cmd_annotate(int argc, const char **argv, const char *prefix); -extern int cmd_help(int argc, const char **argv, const char *prefix); -extern int cmd_record(int argc, const char **argv, const char *prefix); -extern int cmd_report(int argc, const char **argv, const char *prefix); -extern int cmd_stat(int argc, const char **argv, const char *prefix); -extern int cmd_top(int argc, const char **argv, const char *prefix); -extern int cmd_version(int argc, const char **argv, const char *prefix); -extern int cmd_list(int argc, const char **argv, const char *prefix); - -#endif diff --git a/trunk/tools/perf/command-list.txt b/trunk/tools/perf/command-list.txt deleted file mode 100644 index eebce30afbc0..000000000000 --- a/trunk/tools/perf/command-list.txt +++ /dev/null @@ -1,10 +0,0 @@ -# -# List of known perf commands. -# command name category [deprecated] [common] -# -perf-annotate mainporcelain common -perf-list mainporcelain common -perf-record mainporcelain common -perf-report mainporcelain common -perf-stat mainporcelain common -perf-top mainporcelain common diff --git a/trunk/tools/perf/design.txt b/trunk/tools/perf/design.txt deleted file mode 100644 index 860e116d979c..000000000000 --- a/trunk/tools/perf/design.txt +++ /dev/null @@ -1,442 +0,0 @@ - -Performance Counters for Linux ------------------------------- - -Performance counters are special hardware registers available on most modern -CPUs. These registers count the number of certain types of hw events: such -as instructions executed, cachemisses suffered, or branches mis-predicted - -without slowing down the kernel or applications. These registers can also -trigger interrupts when a threshold number of events have passed - and can -thus be used to profile the code that runs on that CPU. - -The Linux Performance Counter subsystem provides an abstraction of these -hardware capabilities. It provides per task and per CPU counters, counter -groups, and it provides event capabilities on top of those. It -provides "virtual" 64-bit counters, regardless of the width of the -underlying hardware counters. - -Performance counters are accessed via special file descriptors. -There's one file descriptor per virtual counter used. - -The special file descriptor is opened via the perf_counter_open() -system call: - - int sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr, - pid_t pid, int cpu, int group_fd, - unsigned long flags); - -The syscall returns the new fd. The fd can be used via the normal -VFS system calls: read() can be used to read the counter, fcntl() -can be used to set the blocking mode, etc. - -Multiple counters can be kept open at a time, and the counters -can be poll()ed. - -When creating a new counter fd, 'perf_counter_hw_event' is: - -struct perf_counter_hw_event { - /* - * The MSB of the config word signifies if the rest contains cpu - * specific (raw) counter configuration data, if unset, the next - * 7 bits are an event type and the rest of the bits are the event - * identifier. - */ - __u64 config; - - __u64 irq_period; - __u32 record_type; - __u32 read_format; - - __u64 disabled : 1, /* off by default */ - inherit : 1, /* children inherit it */ - pinned : 1, /* must always be on PMU */ - exclusive : 1, /* only group on PMU */ - exclude_user : 1, /* don't count user */ - exclude_kernel : 1, /* ditto kernel */ - exclude_hv : 1, /* ditto hypervisor */ - exclude_idle : 1, /* don't count when idle */ - mmap : 1, /* include mmap data */ - munmap : 1, /* include munmap data */ - comm : 1, /* include comm data */ - - __reserved_1 : 52; - - __u32 extra_config_len; - __u32 wakeup_events; /* wakeup every n events */ - - __u64 __reserved_2; - __u64 __reserved_3; -}; - -The 'config' field specifies what the counter should count. It -is divided into 3 bit-fields: - -raw_type: 1 bit (most significant bit) 0x8000_0000_0000_0000 -type: 7 bits (next most significant) 0x7f00_0000_0000_0000 -event_id: 56 bits (least significant) 0x00ff_ffff_ffff_ffff - -If 'raw_type' is 1, then the counter will count a hardware event -specified by the remaining 63 bits of event_config. The encoding is -machine-specific. - -If 'raw_type' is 0, then the 'type' field says what kind of counter -this is, with the following encoding: - -enum perf_event_types { - PERF_TYPE_HARDWARE = 0, - PERF_TYPE_SOFTWARE = 1, - PERF_TYPE_TRACEPOINT = 2, -}; - -A counter of PERF_TYPE_HARDWARE will count the hardware event -specified by 'event_id': - -/* - * Generalized performance counter event types, used by the hw_event.event_id - * parameter of the sys_perf_counter_open() syscall: - */ -enum hw_event_ids { - /* - * Common hardware events, generalized by the kernel: - */ - PERF_COUNT_HW_CPU_CYCLES = 0, - PERF_COUNT_HW_INSTRUCTIONS = 1, - PERF_COUNT_HW_CACHE_REFERENCES = 2, - PERF_COUNT_HW_CACHE_MISSES = 3, - PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, - PERF_COUNT_HW_BRANCH_MISSES = 5, - PERF_COUNT_HW_BUS_CYCLES = 6, -}; - -These are standardized types of events that work relatively uniformly -on all CPUs that implement Performance Counters support under Linux, -although there may be variations (e.g., different CPUs might count -cache references and misses at different levels of the cache hierarchy). -If a CPU is not able to count the selected event, then the system call -will return -EINVAL. - -More hw_event_types are supported as well, but they are CPU-specific -and accessed as raw events. For example, to count "External bus -cycles while bus lock signal asserted" events on Intel Core CPUs, pass -in a 0x4064 event_id value and set hw_event.raw_type to 1. - -A counter of type PERF_TYPE_SOFTWARE will count one of the available -software events, selected by 'event_id': - -/* - * Special "software" counters provided by the kernel, even if the hardware - * does not support performance counters. These counters measure various - * physical and sw events of the kernel (and allow the profiling of them as - * well): - */ -enum sw_event_ids { - PERF_COUNT_SW_CPU_CLOCK = 0, - PERF_COUNT_SW_TASK_CLOCK = 1, - PERF_COUNT_SW_PAGE_FAULTS = 2, - PERF_COUNT_SW_CONTEXT_SWITCHES = 3, - PERF_COUNT_SW_CPU_MIGRATIONS = 4, - PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, - PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, -}; - -Counters of the type PERF_TYPE_TRACEPOINT are available when the ftrace event -tracer is available, and event_id values can be obtained from -/debug/tracing/events/*/*/id - - -Counters come in two flavours: counting counters and sampling -counters. A "counting" counter is one that is used for counting the -number of events that occur, and is characterised by having -irq_period = 0. - - -A read() on a counter returns the current value of the counter and possible -additional values as specified by 'read_format', each value is a u64 (8 bytes) -in size. - -/* - * Bits that can be set in hw_event.read_format to request that - * reads on the counter should return the indicated quantities, - * in increasing order of bit value, after the counter value. - */ -enum perf_counter_read_format { - PERF_FORMAT_TOTAL_TIME_ENABLED = 1, - PERF_FORMAT_TOTAL_TIME_RUNNING = 2, -}; - -Using these additional values one can establish the overcommit ratio for a -particular counter allowing one to take the round-robin scheduling effect -into account. - - -A "sampling" counter is one that is set up to generate an interrupt -every N events, where N is given by 'irq_period'. A sampling counter -has irq_period > 0. The record_type controls what data is recorded on each -interrupt: - -/* - * Bits that can be set in hw_event.record_type to request information - * in the overflow packets. - */ -enum perf_counter_record_format { - PERF_RECORD_IP = 1U << 0, - PERF_RECORD_TID = 1U << 1, - PERF_RECORD_TIME = 1U << 2, - PERF_RECORD_ADDR = 1U << 3, - PERF_RECORD_GROUP = 1U << 4, - PERF_RECORD_CALLCHAIN = 1U << 5, -}; - -Such (and other) events will be recorded in a ring-buffer, which is -available to user-space using mmap() (see below). - -The 'disabled' bit specifies whether the counter starts out disabled -or enabled. If it is initially disabled, it can be enabled by ioctl -or prctl (see below). - -The 'inherit' bit, if set, specifies that this counter should count -events on descendant tasks as well as the task specified. This only -applies to new descendents, not to any existing descendents at the -time the counter is created (nor to any new descendents of existing -descendents). - -The 'pinned' bit, if set, specifies that the counter should always be -on the CPU if at all possible. It only applies to hardware counters -and only to group leaders. If a pinned counter cannot be put onto the -CPU (e.g. because there are not enough hardware counters or because of -a conflict with some other event), then the counter goes into an -'error' state, where reads return end-of-file (i.e. read() returns 0) -until the counter is subsequently enabled or disabled. - -The 'exclusive' bit, if set, specifies that when this counter's group -is on the CPU, it should be the only group using the CPU's counters. -In future, this will allow sophisticated monitoring programs to supply -extra configuration information via 'extra_config_len' to exploit -advanced features of the CPU's Performance Monitor Unit (PMU) that are -not otherwise accessible and that might disrupt other hardware -counters. - -The 'exclude_user', 'exclude_kernel' and 'exclude_hv' bits provide a -way to request that counting of events be restricted to times when the -CPU is in user, kernel and/or hypervisor mode. - -The 'mmap' and 'munmap' bits allow recording of PROT_EXEC mmap/munmap -operations, these can be used to relate userspace IP addresses to actual -code, even after the mapping (or even the whole process) is gone, -these events are recorded in the ring-buffer (see below). - -The 'comm' bit allows tracking of process comm data on process creation. -This too is recorded in the ring-buffer (see below). - -The 'pid' parameter to the perf_counter_open() system call allows the -counter to be specific to a task: - - pid == 0: if the pid parameter is zero, the counter is attached to the - current task. - - pid > 0: the counter is attached to a specific task (if the current task - has sufficient privilege to do so) - - pid < 0: all tasks are counted (per cpu counters) - -The 'cpu' parameter allows a counter to be made specific to a CPU: - - cpu >= 0: the counter is restricted to a specific CPU - cpu == -1: the counter counts on all CPUs - -(Note: the combination of 'pid == -1' and 'cpu == -1' is not valid.) - -A 'pid > 0' and 'cpu == -1' counter is a per task counter that counts -events of that task and 'follows' that task to whatever CPU the task -gets schedule to. Per task counters can be created by any user, for -their own tasks. - -A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts -all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege. - -The 'flags' parameter is currently unused and must be zero. - -The 'group_fd' parameter allows counter "groups" to be set up. A -counter group has one counter which is the group "leader". The leader -is created first, with group_fd = -1 in the perf_counter_open call -that creates it. The rest of the group members are created -subsequently, with group_fd giving the fd of the group leader. -(A single counter on its own is created with group_fd = -1 and is -considered to be a group with only 1 member.) - -A counter group is scheduled onto the CPU as a unit, that is, it will -only be put onto the CPU if all of the counters in the group can be -put onto the CPU. This means that the values of the member counters -can be meaningfully compared, added, divided (to get ratios), etc., -with each other, since they have counted events for the same set of -executed instructions. - - -Like stated, asynchronous events, like counter overflow or PROT_EXEC mmap -tracking are logged into a ring-buffer. This ring-buffer is created and -accessed through mmap(). - -The mmap size should be 1+2^n pages, where the first page is a meta-data page -(struct perf_counter_mmap_page) that contains various bits of information such -as where the ring-buffer head is. - -/* - * Structure of the page that can be mapped via mmap - */ -struct perf_counter_mmap_page { - __u32 version; /* version number of this structure */ - __u32 compat_version; /* lowest version this is compat with */ - - /* - * Bits needed to read the hw counters in user-space. - * - * u32 seq; - * s64 count; - * - * do { - * seq = pc->lock; - * - * barrier() - * if (pc->index) { - * count = pmc_read(pc->index - 1); - * count += pc->offset; - * } else - * goto regular_read; - * - * barrier(); - * } while (pc->lock != seq); - * - * NOTE: for obvious reason this only works on self-monitoring - * processes. - */ - __u32 lock; /* seqlock for synchronization */ - __u32 index; /* hardware counter identifier */ - __s64 offset; /* add to hardware counter value */ - - /* - * Control data for the mmap() data buffer. - * - * User-space reading this value should issue an rmb(), on SMP capable - * platforms, after reading this value -- see perf_counter_wakeup(). - */ - __u32 data_head; /* head in the data section */ -}; - -NOTE: the hw-counter userspace bits are arch specific and are currently only - implemented on powerpc. - -The following 2^n pages are the ring-buffer which contains events of the form: - -#define PERF_EVENT_MISC_KERNEL (1 << 0) -#define PERF_EVENT_MISC_USER (1 << 1) -#define PERF_EVENT_MISC_OVERFLOW (1 << 2) - -struct perf_event_header { - __u32 type; - __u16 misc; - __u16 size; -}; - -enum perf_event_type { - - /* - * The MMAP events record the PROT_EXEC mappings so that we can - * correlate userspace IPs to code. They have the following structure: - * - * struct { - * struct perf_event_header header; - * - * u32 pid, tid; - * u64 addr; - * u64 len; - * u64 pgoff; - * char filename[]; - * }; - */ - PERF_EVENT_MMAP = 1, - PERF_EVENT_MUNMAP = 2, - - /* - * struct { - * struct perf_event_header header; - * - * u32 pid, tid; - * char comm[]; - * }; - */ - PERF_EVENT_COMM = 3, - - /* - * When header.misc & PERF_EVENT_MISC_OVERFLOW the event_type field - * will be PERF_RECORD_* - * - * struct { - * struct perf_event_header header; - * - * { u64 ip; } && PERF_RECORD_IP - * { u32 pid, tid; } && PERF_RECORD_TID - * { u64 time; } && PERF_RECORD_TIME - * { u64 addr; } && PERF_RECORD_ADDR - * - * { u64 nr; - * { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP - * - * { u16 nr, - * hv, - * kernel, - * user; - * u64 ips[nr]; } && PERF_RECORD_CALLCHAIN - * }; - */ -}; - -NOTE: PERF_RECORD_CALLCHAIN is arch specific and currently only implemented - on x86. - -Notification of new events is possible through poll()/select()/epoll() and -fcntl() managing signals. - -Normally a notification is generated for every page filled, however one can -additionally set perf_counter_hw_event.wakeup_events to generate one every -so many counter overflow events. - -Future work will include a splice() interface to the ring-buffer. - - -Counters can be enabled and disabled in two ways: via ioctl and via -prctl. When a counter is disabled, it doesn't count or generate -events but does continue to exist and maintain its count value. - -An individual counter or counter group can be enabled with - - ioctl(fd, PERF_COUNTER_IOC_ENABLE); - -or disabled with - - ioctl(fd, PERF_COUNTER_IOC_DISABLE); - -Enabling or disabling the leader of a group enables or disables the -whole group; that is, while the group leader is disabled, none of the -counters in the group will count. Enabling or disabling a member of a -group other than the leader only affects that counter - disabling an -non-leader stops that counter from counting but doesn't affect any -other counter. - -Additionally, non-inherited overflow counters can use - - ioctl(fd, PERF_COUNTER_IOC_REFRESH, nr); - -to enable a counter for 'nr' events, after which it gets disabled again. - -A process can enable or disable all the counter groups that are -attached to it, using prctl: - - prctl(PR_TASK_PERF_COUNTERS_ENABLE); - - prctl(PR_TASK_PERF_COUNTERS_DISABLE); - -This applies to all counters on the current process, whether created -by this process or by another, and doesn't affect any counters that -this process has created on other processes. It only enables or -disables the group leaders, not any other members in the groups. - diff --git a/trunk/tools/perf/perf.c b/trunk/tools/perf/perf.c deleted file mode 100644 index 4eb725933703..000000000000 --- a/trunk/tools/perf/perf.c +++ /dev/null @@ -1,428 +0,0 @@ -/* - * perf.c - * - * Performance analysis utility. - * - * This is the main hub from which the sub-commands (perf stat, - * perf top, perf record, perf report, etc.) are started. - */ -#include "builtin.h" - -#include "util/exec_cmd.h" -#include "util/cache.h" -#include "util/quote.h" -#include "util/run-command.h" - -const char perf_usage_string[] = - "perf [--version] [--help] COMMAND [ARGS]"; - -const char perf_more_info_string[] = - "See 'perf help COMMAND' for more information on a specific command."; - -static int use_pager = -1; -struct pager_config { - const char *cmd; - int val; -}; - -static int pager_command_config(const char *var, const char *value, void *data) -{ - struct pager_config *c = data; - if (!prefixcmp(var, "pager.") && !strcmp(var + 6, c->cmd)) - c->val = perf_config_bool(var, value); - return 0; -} - -/* returns 0 for "no pager", 1 for "use pager", and -1 for "not specified" */ -int check_pager_config(const char *cmd) -{ - struct pager_config c; - c.cmd = cmd; - c.val = -1; - perf_config(pager_command_config, &c); - return c.val; -} - -static void commit_pager_choice(void) { - switch (use_pager) { - case 0: - setenv("PERF_PAGER", "cat", 1); - break; - case 1: - /* setup_pager(); */ - break; - default: - break; - } -} - -static int handle_options(const char*** argv, int* argc, int* envchanged) -{ - int handled = 0; - - while (*argc > 0) { - const char *cmd = (*argv)[0]; - if (cmd[0] != '-') - break; - - /* - * For legacy reasons, the "version" and "help" - * commands can be written with "--" prepended - * to make them look like flags. - */ - if (!strcmp(cmd, "--help") || !strcmp(cmd, "--version")) - break; - - /* - * Check remaining flags. - */ - if (!prefixcmp(cmd, "--exec-path")) { - cmd += 11; - if (*cmd == '=') - perf_set_argv_exec_path(cmd + 1); - else { - puts(perf_exec_path()); - exit(0); - } - } else if (!strcmp(cmd, "--html-path")) { - puts(system_path(PERF_HTML_PATH)); - exit(0); - } else if (!strcmp(cmd, "-p") || !strcmp(cmd, "--paginate")) { - use_pager = 1; - } else if (!strcmp(cmd, "--no-pager")) { - use_pager = 0; - if (envchanged) - *envchanged = 1; - } else if (!strcmp(cmd, "--perf-dir")) { - if (*argc < 2) { - fprintf(stderr, "No directory given for --perf-dir.\n" ); - usage(perf_usage_string); - } - setenv(PERF_DIR_ENVIRONMENT, (*argv)[1], 1); - if (envchanged) - *envchanged = 1; - (*argv)++; - (*argc)--; - handled++; - } else if (!prefixcmp(cmd, "--perf-dir=")) { - setenv(PERF_DIR_ENVIRONMENT, cmd + 10, 1); - if (envchanged) - *envchanged = 1; - } else if (!strcmp(cmd, "--work-tree")) { - if (*argc < 2) { - fprintf(stderr, "No directory given for --work-tree.\n" ); - usage(perf_usage_string); - } - setenv(PERF_WORK_TREE_ENVIRONMENT, (*argv)[1], 1); - if (envchanged) - *envchanged = 1; - (*argv)++; - (*argc)--; - } else if (!prefixcmp(cmd, "--work-tree=")) { - setenv(PERF_WORK_TREE_ENVIRONMENT, cmd + 12, 1); - if (envchanged) - *envchanged = 1; - } else { - fprintf(stderr, "Unknown option: %s\n", cmd); - usage(perf_usage_string); - } - - (*argv)++; - (*argc)--; - handled++; - } - return handled; -} - -static int handle_alias(int *argcp, const char ***argv) -{ - int envchanged = 0, ret = 0, saved_errno = errno; - int count, option_count; - const char** new_argv; - const char *alias_command; - char *alias_string; - - alias_command = (*argv)[0]; - alias_string = alias_lookup(alias_command); - if (alias_string) { - if (alias_string[0] == '!') { - if (*argcp > 1) { - struct strbuf buf; - - strbuf_init(&buf, PATH_MAX); - strbuf_addstr(&buf, alias_string); - sq_quote_argv(&buf, (*argv) + 1, PATH_MAX); - free(alias_string); - alias_string = buf.buf; - } - ret = system(alias_string + 1); - if (ret >= 0 && WIFEXITED(ret) && - WEXITSTATUS(ret) != 127) - exit(WEXITSTATUS(ret)); - die("Failed to run '%s' when expanding alias '%s'", - alias_string + 1, alias_command); - } - count = split_cmdline(alias_string, &new_argv); - if (count < 0) - die("Bad alias.%s string", alias_command); - option_count = handle_options(&new_argv, &count, &envchanged); - if (envchanged) - die("alias '%s' changes environment variables\n" - "You can use '!perf' in the alias to do this.", - alias_command); - memmove(new_argv - option_count, new_argv, - count * sizeof(char *)); - new_argv -= option_count; - - if (count < 1) - die("empty alias for %s", alias_command); - - if (!strcmp(alias_command, new_argv[0])) - die("recursive alias: %s", alias_command); - - new_argv = realloc(new_argv, sizeof(char*) * - (count + *argcp + 1)); - /* insert after command name */ - memcpy(new_argv + count, *argv + 1, sizeof(char*) * *argcp); - new_argv[count+*argcp] = NULL; - - *argv = new_argv; - *argcp += count - 1; - - ret = 1; - } - - errno = saved_errno; - - return ret; -} - -const char perf_version_string[] = PERF_VERSION; - -#define RUN_SETUP (1<<0) -#define USE_PAGER (1<<1) -/* - * require working tree to be present -- anything uses this needs - * RUN_SETUP for reading from the configuration file. - */ -#define NEED_WORK_TREE (1<<2) - -struct cmd_struct { - const char *cmd; - int (*fn)(int, const char **, const char *); - int option; -}; - -static int run_builtin(struct cmd_struct *p, int argc, const char **argv) -{ - int status; - struct stat st; - const char *prefix; - - prefix = NULL; - if (p->option & RUN_SETUP) - prefix = NULL; /* setup_perf_directory(); */ - - if (use_pager == -1 && p->option & RUN_SETUP) - use_pager = check_pager_config(p->cmd); - if (use_pager == -1 && p->option & USE_PAGER) - use_pager = 1; - commit_pager_choice(); - - if (p->option & NEED_WORK_TREE) - /* setup_work_tree() */; - - status = p->fn(argc, argv, prefix); - if (status) - return status & 0xff; - - /* Somebody closed stdout? */ - if (fstat(fileno(stdout), &st)) - return 0; - /* Ignore write errors for pipes and sockets.. */ - if (S_ISFIFO(st.st_mode) || S_ISSOCK(st.st_mode)) - return 0; - - /* Check for ENOSPC and EIO errors.. */ - if (fflush(stdout)) - die("write failure on standard output: %s", strerror(errno)); - if (ferror(stdout)) - die("unknown write failure on standard output"); - if (fclose(stdout)) - die("close failed on standard output: %s", strerror(errno)); - return 0; -} - -static void handle_internal_command(int argc, const char **argv) -{ - const char *cmd = argv[0]; - static struct cmd_struct commands[] = { - { "help", cmd_help, 0 }, - { "list", cmd_list, 0 }, - { "record", cmd_record, 0 }, - { "report", cmd_report, 0 }, - { "stat", cmd_stat, 0 }, - { "top", cmd_top, 0 }, - { "annotate", cmd_annotate, 0 }, - { "version", cmd_version, 0 }, - }; - int i; - static const char ext[] = STRIP_EXTENSION; - - if (sizeof(ext) > 1) { - i = strlen(argv[0]) - strlen(ext); - if (i > 0 && !strcmp(argv[0] + i, ext)) { - char *argv0 = strdup(argv[0]); - argv[0] = cmd = argv0; - argv0[i] = '\0'; - } - } - - /* Turn "perf cmd --help" into "perf help cmd" */ - if (argc > 1 && !strcmp(argv[1], "--help")) { - argv[1] = argv[0]; - argv[0] = cmd = "help"; - } - - for (i = 0; i < ARRAY_SIZE(commands); i++) { - struct cmd_struct *p = commands+i; - if (strcmp(p->cmd, cmd)) - continue; - exit(run_builtin(p, argc, argv)); - } -} - -static void execv_dashed_external(const char **argv) -{ - struct strbuf cmd = STRBUF_INIT; - const char *tmp; - int status; - - strbuf_addf(&cmd, "perf-%s", argv[0]); - - /* - * argv[0] must be the perf command, but the argv array - * belongs to the caller, and may be reused in - * subsequent loop iterations. Save argv[0] and - * restore it on error. - */ - tmp = argv[0]; - argv[0] = cmd.buf; - - /* - * if we fail because the command is not found, it is - * OK to return. Otherwise, we just pass along the status code. - */ - status = run_command_v_opt(argv, 0); - if (status != -ERR_RUN_COMMAND_EXEC) { - if (IS_RUN_COMMAND_ERR(status)) - die("unable to run '%s'", argv[0]); - exit(-status); - } - errno = ENOENT; /* as if we called execvp */ - - argv[0] = tmp; - - strbuf_release(&cmd); -} - -static int run_argv(int *argcp, const char ***argv) -{ - int done_alias = 0; - - while (1) { - /* See if it's an internal command */ - handle_internal_command(*argcp, *argv); - - /* .. then try the external ones */ - execv_dashed_external(*argv); - - /* It could be an alias -- this works around the insanity - * of overriding "perf log" with "perf show" by having - * alias.log = show - */ - if (done_alias || !handle_alias(argcp, argv)) - break; - done_alias = 1; - } - - return done_alias; -} - - -int main(int argc, const char **argv) -{ - const char *cmd; - - cmd = perf_extract_argv0_path(argv[0]); - if (!cmd) - cmd = "perf-help"; - - /* - * "perf-xxxx" is the same as "perf xxxx", but we obviously: - * - * - cannot take flags in between the "perf" and the "xxxx". - * - cannot execute it externally (since it would just do - * the same thing over again) - * - * So we just directly call the internal command handler, and - * die if that one cannot handle it. - */ - if (!prefixcmp(cmd, "perf-")) { - cmd += 5; - argv[0] = cmd; - handle_internal_command(argc, argv); - die("cannot handle %s internally", cmd); - } - - /* Look for flags.. */ - argv++; - argc--; - handle_options(&argv, &argc, NULL); - commit_pager_choice(); - if (argc > 0) { - if (!prefixcmp(argv[0], "--")) - argv[0] += 2; - } else { - /* The user didn't specify a command; give them help */ - printf("\n usage: %s\n\n", perf_usage_string); - list_common_cmds_help(); - printf("\n %s\n\n", perf_more_info_string); - exit(1); - } - cmd = argv[0]; - - /* - * We use PATH to find perf commands, but we prepend some higher - * precidence paths: the "--exec-path" option, the PERF_EXEC_PATH - * environment, and the $(perfexecdir) from the Makefile at build - * time. - */ - setup_path(); - - while (1) { - static int done_help = 0; - static int was_alias = 0; - - was_alias = run_argv(&argc, &argv); - if (errno != ENOENT) - break; - - if (was_alias) { - fprintf(stderr, "Expansion of alias '%s' failed; " - "'%s' is not a perf-command\n", - cmd, argv[0]); - exit(1); - } - if (!done_help) { - cmd = argv[0] = help_unknown_cmd(cmd); - done_help = 1; - } else - break; - } - - fprintf(stderr, "Failed to run command '%s': %s\n", - cmd, strerror(errno)); - - return 1; -} diff --git a/trunk/tools/perf/perf.h b/trunk/tools/perf/perf.h deleted file mode 100644 index af0a5046d743..000000000000 --- a/trunk/tools/perf/perf.h +++ /dev/null @@ -1,67 +0,0 @@ -#ifndef _PERF_PERF_H -#define _PERF_PERF_H - -#if defined(__x86_64__) || defined(__i386__) -#include "../../arch/x86/include/asm/unistd.h" -#define rmb() asm volatile("lfence" ::: "memory") -#define cpu_relax() asm volatile("rep; nop" ::: "memory"); -#endif - -#ifdef __powerpc__ -#include "../../arch/powerpc/include/asm/unistd.h" -#define rmb() asm volatile ("sync" ::: "memory") -#define cpu_relax() asm volatile ("" ::: "memory"); -#endif - -#include -#include -#include -#include - -#include "../../include/linux/perf_counter.h" - -/* - * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all - * counters in the current task. - */ -#define PR_TASK_PERF_COUNTERS_DISABLE 31 -#define PR_TASK_PERF_COUNTERS_ENABLE 32 - -#ifndef NSEC_PER_SEC -# define NSEC_PER_SEC 1000000000ULL -#endif - -static inline unsigned long long rdclock(void) -{ - struct timespec ts; - - clock_gettime(CLOCK_MONOTONIC, &ts); - return ts.tv_sec * 1000000000ULL + ts.tv_nsec; -} - -/* - * Pick up some kernel type conventions: - */ -#define __user -#define asmlinkage - -#define unlikely(x) __builtin_expect(!!(x), 0) -#define min(x, y) ({ \ - typeof(x) _min1 = (x); \ - typeof(y) _min2 = (y); \ - (void) (&_min1 == &_min2); \ - _min1 < _min2 ? _min1 : _min2; }) - -static inline int -sys_perf_counter_open(struct perf_counter_attr *attr_uptr, - pid_t pid, int cpu, int group_fd, - unsigned long flags) -{ - return syscall(__NR_perf_counter_open, attr_uptr, pid, cpu, - group_fd, flags); -} - -#define MAX_COUNTERS 256 -#define MAX_NR_CPUS 256 - -#endif diff --git a/trunk/tools/perf/util/PERF-VERSION-GEN b/trunk/tools/perf/util/PERF-VERSION-GEN deleted file mode 100755 index c561d1538c03..000000000000 --- a/trunk/tools/perf/util/PERF-VERSION-GEN +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/sh - -GVF=PERF-VERSION-FILE -DEF_VER=v0.0.1.PERF - -LF=' -' - -# First see if there is a version file (included in release tarballs), -# then try git-describe, then default. -if test -f version -then - VN=$(cat version) || VN="$DEF_VER" -elif test -d .git -o -f .git && - VN=$(git describe --abbrev=4 HEAD 2>/dev/null) && - case "$VN" in - *$LF*) (exit 1) ;; - v[0-9]*) - git update-index -q --refresh - test -z "$(git diff-index --name-only HEAD --)" || - VN="$VN-dirty" ;; - esac -then - VN=$(echo "$VN" | sed -e 's/-/./g'); -else - VN="$DEF_VER" -fi - -VN=$(expr "$VN" : v*'\(.*\)') - -if test -r $GVF -then - VC=$(sed -e 's/^PERF_VERSION = //' <$GVF) -else - VC=unset -fi -test "$VN" = "$VC" || { - echo >&2 "PERF_VERSION = $VN" - echo "PERF_VERSION = $VN" >$GVF -} - - diff --git a/trunk/tools/perf/util/abspath.c b/trunk/tools/perf/util/abspath.c deleted file mode 100644 index 61d33b81fc97..000000000000 --- a/trunk/tools/perf/util/abspath.c +++ /dev/null @@ -1,117 +0,0 @@ -#include "cache.h" - -/* - * Do not use this for inspecting *tracked* content. When path is a - * symlink to a directory, we do not want to say it is a directory when - * dealing with tracked content in the working tree. - */ -static int is_directory(const char *path) -{ - struct stat st; - return (!stat(path, &st) && S_ISDIR(st.st_mode)); -} - -/* We allow "recursive" symbolic links. Only within reason, though. */ -#define MAXDEPTH 5 - -const char *make_absolute_path(const char *path) -{ - static char bufs[2][PATH_MAX + 1], *buf = bufs[0], *next_buf = bufs[1]; - char cwd[1024] = ""; - int buf_index = 1, len; - - int depth = MAXDEPTH; - char *last_elem = NULL; - struct stat st; - - if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX) - die ("Too long path: %.*s", 60, path); - - while (depth--) { - if (!is_directory(buf)) { - char *last_slash = strrchr(buf, '/'); - if (last_slash) { - *last_slash = '\0'; - last_elem = xstrdup(last_slash + 1); - } else { - last_elem = xstrdup(buf); - *buf = '\0'; - } - } - - if (*buf) { - if (!*cwd && !getcwd(cwd, sizeof(cwd))) - die ("Could not get current working directory"); - - if (chdir(buf)) - die ("Could not switch to '%s'", buf); - } - if (!getcwd(buf, PATH_MAX)) - die ("Could not get current working directory"); - - if (last_elem) { - int len = strlen(buf); - if (len + strlen(last_elem) + 2 > PATH_MAX) - die ("Too long path name: '%s/%s'", - buf, last_elem); - buf[len] = '/'; - strcpy(buf + len + 1, last_elem); - free(last_elem); - last_elem = NULL; - } - - if (!lstat(buf, &st) && S_ISLNK(st.st_mode)) { - len = readlink(buf, next_buf, PATH_MAX); - if (len < 0) - die ("Invalid symlink: %s", buf); - if (PATH_MAX <= len) - die("symbolic link too long: %s", buf); - next_buf[len] = '\0'; - buf = next_buf; - buf_index = 1 - buf_index; - next_buf = bufs[buf_index]; - } else - break; - } - - if (*cwd && chdir(cwd)) - die ("Could not change back to '%s'", cwd); - - return buf; -} - -static const char *get_pwd_cwd(void) -{ - static char cwd[PATH_MAX + 1]; - char *pwd; - struct stat cwd_stat, pwd_stat; - if (getcwd(cwd, PATH_MAX) == NULL) - return NULL; - pwd = getenv("PWD"); - if (pwd && strcmp(pwd, cwd)) { - stat(cwd, &cwd_stat); - if (!stat(pwd, &pwd_stat) && - pwd_stat.st_dev == cwd_stat.st_dev && - pwd_stat.st_ino == cwd_stat.st_ino) { - strlcpy(cwd, pwd, PATH_MAX); - } - } - return cwd; -} - -const char *make_nonrelative_path(const char *path) -{ - static char buf[PATH_MAX + 1]; - - if (is_absolute_path(path)) { - if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX) - die("Too long path: %.*s", 60, path); - } else { - const char *cwd = get_pwd_cwd(); - if (!cwd) - die("Cannot determine the current working directory"); - if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX) - die("Too long path: %.*s", 60, path); - } - return buf; -} diff --git a/trunk/tools/perf/util/alias.c b/trunk/tools/perf/util/alias.c deleted file mode 100644 index 9b3dd2b428df..000000000000 --- a/trunk/tools/perf/util/alias.c +++ /dev/null @@ -1,77 +0,0 @@ -#include "cache.h" - -static const char *alias_key; -static char *alias_val; - -static int alias_lookup_cb(const char *k, const char *v, void *cb) -{ - if (!prefixcmp(k, "alias.") && !strcmp(k+6, alias_key)) { - if (!v) - return config_error_nonbool(k); - alias_val = strdup(v); - return 0; - } - return 0; -} - -char *alias_lookup(const char *alias) -{ - alias_key = alias; - alias_val = NULL; - perf_config(alias_lookup_cb, NULL); - return alias_val; -} - -int split_cmdline(char *cmdline, const char ***argv) -{ - int src, dst, count = 0, size = 16; - char quoted = 0; - - *argv = malloc(sizeof(char*) * size); - - /* split alias_string */ - (*argv)[count++] = cmdline; - for (src = dst = 0; cmdline[src];) { - char c = cmdline[src]; - if (!quoted && isspace(c)) { - cmdline[dst++] = 0; - while (cmdline[++src] - && isspace(cmdline[src])) - ; /* skip */ - if (count >= size) { - size += 16; - *argv = realloc(*argv, sizeof(char*) * size); - } - (*argv)[count++] = cmdline + dst; - } else if (!quoted && (c == '\'' || c == '"')) { - quoted = c; - src++; - } else if (c == quoted) { - quoted = 0; - src++; - } else { - if (c == '\\' && quoted != '\'') { - src++; - c = cmdline[src]; - if (!c) { - free(*argv); - *argv = NULL; - return error("cmdline ends with \\"); - } - } - cmdline[dst++] = c; - src++; - } - } - - cmdline[dst] = 0; - - if (quoted) { - free(*argv); - *argv = NULL; - return error("unclosed quote"); - } - - return count; -} - diff --git a/trunk/tools/perf/util/cache.h b/trunk/tools/perf/util/cache.h deleted file mode 100644 index 393d6146d13b..000000000000 --- a/trunk/tools/perf/util/cache.h +++ /dev/null @@ -1,119 +0,0 @@ -#ifndef CACHE_H -#define CACHE_H - -#include "util.h" -#include "strbuf.h" - -#define PERF_DIR_ENVIRONMENT "PERF_DIR" -#define PERF_WORK_TREE_ENVIRONMENT "PERF_WORK_TREE" -#define DEFAULT_PERF_DIR_ENVIRONMENT ".perf" -#define DB_ENVIRONMENT "PERF_OBJECT_DIRECTORY" -#define INDEX_ENVIRONMENT "PERF_INDEX_FILE" -#define GRAFT_ENVIRONMENT "PERF_GRAFT_FILE" -#define TEMPLATE_DIR_ENVIRONMENT "PERF_TEMPLATE_DIR" -#define CONFIG_ENVIRONMENT "PERF_CONFIG" -#define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH" -#define CEILING_DIRECTORIES_ENVIRONMENT "PERF_CEILING_DIRECTORIES" -#define PERFATTRIBUTES_FILE ".perfattributes" -#define INFOATTRIBUTES_FILE "info/attributes" -#define ATTRIBUTE_MACRO_PREFIX "[attr]" - -typedef int (*config_fn_t)(const char *, const char *, void *); -extern int perf_default_config(const char *, const char *, void *); -extern int perf_config_from_file(config_fn_t fn, const char *, void *); -extern int perf_config(config_fn_t fn, void *); -extern int perf_parse_ulong(const char *, unsigned long *); -extern int perf_config_int(const char *, const char *); -extern unsigned long perf_config_ulong(const char *, const char *); -extern int perf_config_bool_or_int(const char *, const char *, int *); -extern int perf_config_bool(const char *, const char *); -extern int perf_config_string(const char **, const char *, const char *); -extern int perf_config_set(const char *, const char *); -extern int perf_config_set_multivar(const char *, const char *, const char *, int); -extern int perf_config_rename_section(const char *, const char *); -extern const char *perf_etc_perfconfig(void); -extern int check_repository_format_version(const char *var, const char *value, void *cb); -extern int perf_config_system(void); -extern int perf_config_global(void); -extern int config_error_nonbool(const char *); -extern const char *config_exclusive_filename; - -#define MAX_PERFNAME (1000) -extern char perf_default_email[MAX_PERFNAME]; -extern char perf_default_name[MAX_PERFNAME]; -extern int user_ident_explicitly_given; - -extern const char *perf_log_output_encoding; -extern const char *perf_mailmap_file; - -/* IO helper functions */ -extern void maybe_flush_or_die(FILE *, const char *); -extern int copy_fd(int ifd, int ofd); -extern int copy_file(const char *dst, const char *src, int mode); -extern ssize_t read_in_full(int fd, void *buf, size_t count); -extern ssize_t write_in_full(int fd, const void *buf, size_t count); -extern void write_or_die(int fd, const void *buf, size_t count); -extern int write_or_whine(int fd, const void *buf, size_t count, const char *msg); -extern int write_or_whine_pipe(int fd, const void *buf, size_t count, const char *msg); -extern void fsync_or_die(int fd, const char *); - -/* pager.c */ -extern void setup_pager(void); -extern const char *pager_program; -extern int pager_in_use(void); -extern int pager_use_color; - -extern const char *editor_program; -extern const char *excludes_file; - -char *alias_lookup(const char *alias); -int split_cmdline(char *cmdline, const char ***argv); - -#define alloc_nr(x) (((x)+16)*3/2) - -/* - * Realloc the buffer pointed at by variable 'x' so that it can hold - * at least 'nr' entries; the number of entries currently allocated - * is 'alloc', using the standard growing factor alloc_nr() macro. - * - * DO NOT USE any expression with side-effect for 'x' or 'alloc'. - */ -#define ALLOC_GROW(x, nr, alloc) \ - do { \ - if ((nr) > alloc) { \ - if (alloc_nr(alloc) < (nr)) \ - alloc = (nr); \ - else \ - alloc = alloc_nr(alloc); \ - x = xrealloc((x), alloc * sizeof(*(x))); \ - } \ - } while(0) - - -static inline int is_absolute_path(const char *path) -{ - return path[0] == '/'; -} - -const char *make_absolute_path(const char *path); -const char *make_nonrelative_path(const char *path); -const char *make_relative_path(const char *abs, const char *base); -int normalize_path_copy(char *dst, const char *src); -int longest_ancestor_length(const char *path, const char *prefix_list); -char *strip_path_suffix(const char *path, const char *suffix); - -extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2))); -extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2))); -/* perf_mkstemp() - create tmp file honoring TMPDIR variable */ -extern int perf_mkstemp(char *path, size_t len, const char *template); - -extern char *mksnpath(char *buf, size_t n, const char *fmt, ...) - __attribute__((format (printf, 3, 4))); -extern char *perf_snpath(char *buf, size_t n, const char *fmt, ...) - __attribute__((format (printf, 3, 4))); -extern char *perf_pathdup(const char *fmt, ...) - __attribute__((format (printf, 1, 2))); - -extern size_t strlcpy(char *dest, const char *src, size_t size); - -#endif /* CACHE_H */ diff --git a/trunk/tools/perf/util/color.c b/trunk/tools/perf/util/color.c deleted file mode 100644 index 9a8c20ccc53e..000000000000 --- a/trunk/tools/perf/util/color.c +++ /dev/null @@ -1,241 +0,0 @@ -#include "cache.h" -#include "color.h" - -int perf_use_color_default = -1; - -static int parse_color(const char *name, int len) -{ - static const char * const color_names[] = { - "normal", "black", "red", "green", "yellow", - "blue", "magenta", "cyan", "white" - }; - char *end; - int i; - for (i = 0; i < ARRAY_SIZE(color_names); i++) { - const char *str = color_names[i]; - if (!strncasecmp(name, str, len) && !str[len]) - return i - 1; - } - i = strtol(name, &end, 10); - if (end - name == len && i >= -1 && i <= 255) - return i; - return -2; -} - -static int parse_attr(const char *name, int len) -{ - static const int attr_values[] = { 1, 2, 4, 5, 7 }; - static const char * const attr_names[] = { - "bold", "dim", "ul", "blink", "reverse" - }; - int i; - for (i = 0; i < ARRAY_SIZE(attr_names); i++) { - const char *str = attr_names[i]; - if (!strncasecmp(name, str, len) && !str[len]) - return attr_values[i]; - } - return -1; -} - -void color_parse(const char *value, const char *var, char *dst) -{ - color_parse_mem(value, strlen(value), var, dst); -} - -void color_parse_mem(const char *value, int value_len, const char *var, - char *dst) -{ - const char *ptr = value; - int len = value_len; - int attr = -1; - int fg = -2; - int bg = -2; - - if (!strncasecmp(value, "reset", len)) { - strcpy(dst, PERF_COLOR_RESET); - return; - } - - /* [fg [bg]] [attr] */ - while (len > 0) { - const char *word = ptr; - int val, wordlen = 0; - - while (len > 0 && !isspace(word[wordlen])) { - wordlen++; - len--; - } - - ptr = word + wordlen; - while (len > 0 && isspace(*ptr)) { - ptr++; - len--; - } - - val = parse_color(word, wordlen); - if (val >= -1) { - if (fg == -2) { - fg = val; - continue; - } - if (bg == -2) { - bg = val; - continue; - } - goto bad; - } - val = parse_attr(word, wordlen); - if (val < 0 || attr != -1) - goto bad; - attr = val; - } - - if (attr >= 0 || fg >= 0 || bg >= 0) { - int sep = 0; - - *dst++ = '\033'; - *dst++ = '['; - if (attr >= 0) { - *dst++ = '0' + attr; - sep++; - } - if (fg >= 0) { - if (sep++) - *dst++ = ';'; - if (fg < 8) { - *dst++ = '3'; - *dst++ = '0' + fg; - } else { - dst += sprintf(dst, "38;5;%d", fg); - } - } - if (bg >= 0) { - if (sep++) - *dst++ = ';'; - if (bg < 8) { - *dst++ = '4'; - *dst++ = '0' + bg; - } else { - dst += sprintf(dst, "48;5;%d", bg); - } - } - *dst++ = 'm'; - } - *dst = 0; - return; -bad: - die("bad color value '%.*s' for variable '%s'", value_len, value, var); -} - -int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty) -{ - if (value) { - if (!strcasecmp(value, "never")) - return 0; - if (!strcasecmp(value, "always")) - return 1; - if (!strcasecmp(value, "auto")) - goto auto_color; - } - - /* Missing or explicit false to turn off colorization */ - if (!perf_config_bool(var, value)) - return 0; - - /* any normal truth value defaults to 'auto' */ - auto_color: - if (stdout_is_tty < 0) - stdout_is_tty = isatty(1); - if (stdout_is_tty || (pager_in_use() && pager_use_color)) { - char *term = getenv("TERM"); - if (term && strcmp(term, "dumb")) - return 1; - } - return 0; -} - -int perf_color_default_config(const char *var, const char *value, void *cb) -{ - if (!strcmp(var, "color.ui")) { - perf_use_color_default = perf_config_colorbool(var, value, -1); - return 0; - } - - return perf_default_config(var, value, cb); -} - -static int color_vfprintf(FILE *fp, const char *color, const char *fmt, - va_list args, const char *trail) -{ - int r = 0; - - /* - * Auto-detect: - */ - if (perf_use_color_default < 0) { - if (isatty(1) || pager_in_use()) - perf_use_color_default = 1; - else - perf_use_color_default = 0; - } - - if (perf_use_color_default && *color) - r += fprintf(fp, "%s", color); - r += vfprintf(fp, fmt, args); - if (perf_use_color_default && *color) - r += fprintf(fp, "%s", PERF_COLOR_RESET); - if (trail) - r += fprintf(fp, "%s", trail); - return r; -} - - - -int color_fprintf(FILE *fp, const char *color, const char *fmt, ...) -{ - va_list args; - int r; - - va_start(args, fmt); - r = color_vfprintf(fp, color, fmt, args, NULL); - va_end(args); - return r; -} - -int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...) -{ - va_list args; - int r; - va_start(args, fmt); - r = color_vfprintf(fp, color, fmt, args, "\n"); - va_end(args); - return r; -} - -/* - * This function splits the buffer by newlines and colors the lines individually. - * - * Returns 0 on success. - */ -int color_fwrite_lines(FILE *fp, const char *color, - size_t count, const char *buf) -{ - if (!*color) - return fwrite(buf, count, 1, fp) != 1; - while (count) { - char *p = memchr(buf, '\n', count); - if (p != buf && (fputs(color, fp) < 0 || - fwrite(buf, p ? p - buf : count, 1, fp) != 1 || - fputs(PERF_COLOR_RESET, fp) < 0)) - return -1; - if (!p) - return 0; - if (fputc('\n', fp) < 0) - return -1; - count -= p + 1 - buf; - buf = p + 1; - } - return 0; -} - - diff --git a/trunk/tools/perf/util/color.h b/trunk/tools/perf/util/color.h deleted file mode 100644 index 5abfd379582b..000000000000 --- a/trunk/tools/perf/util/color.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef COLOR_H -#define COLOR_H - -/* "\033[1;38;5;2xx;48;5;2xxm\0" is 23 bytes */ -#define COLOR_MAXLEN 24 - -#define PERF_COLOR_NORMAL "" -#define PERF_COLOR_RESET "\033[m" -#define PERF_COLOR_BOLD "\033[1m" -#define PERF_COLOR_RED "\033[31m" -#define PERF_COLOR_GREEN "\033[32m" -#define PERF_COLOR_YELLOW "\033[33m" -#define PERF_COLOR_BLUE "\033[34m" -#define PERF_COLOR_MAGENTA "\033[35m" -#define PERF_COLOR_CYAN "\033[36m" -#define PERF_COLOR_BG_RED "\033[41m" - -/* - * This variable stores the value of color.ui - */ -extern int perf_use_color_default; - - -/* - * Use this instead of perf_default_config if you need the value of color.ui. - */ -int perf_color_default_config(const char *var, const char *value, void *cb); - -int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty); -void color_parse(const char *value, const char *var, char *dst); -void color_parse_mem(const char *value, int len, const char *var, char *dst); -int color_fprintf(FILE *fp, const char *color, const char *fmt, ...); -int color_fprintf_ln(FILE *fp, const char *color, const char *fmt, ...); -int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf); - -#endif /* COLOR_H */ diff --git a/trunk/tools/perf/util/config.c b/trunk/tools/perf/util/config.c deleted file mode 100644 index 3dd13faa6a27..000000000000 --- a/trunk/tools/perf/util/config.c +++ /dev/null @@ -1,873 +0,0 @@ -/* - * GIT - The information manager from hell - * - * Copyright (C) Linus Torvalds, 2005 - * Copyright (C) Johannes Schindelin, 2005 - * - */ -#include "util.h" -#include "cache.h" -#include "exec_cmd.h" - -#define MAXNAME (256) - -static FILE *config_file; -static const char *config_file_name; -static int config_linenr; -static int config_file_eof; - -const char *config_exclusive_filename = NULL; - -static int get_next_char(void) -{ - int c; - FILE *f; - - c = '\n'; - if ((f = config_file) != NULL) { - c = fgetc(f); - if (c == '\r') { - /* DOS like systems */ - c = fgetc(f); - if (c != '\n') { - ungetc(c, f); - c = '\r'; - } - } - if (c == '\n') - config_linenr++; - if (c == EOF) { - config_file_eof = 1; - c = '\n'; - } - } - return c; -} - -static char *parse_value(void) -{ - static char value[1024]; - int quote = 0, comment = 0, len = 0, space = 0; - - for (;;) { - int c = get_next_char(); - if (len >= sizeof(value) - 1) - return NULL; - if (c == '\n') { - if (quote) - return NULL; - value[len] = 0; - return value; - } - if (comment) - continue; - if (isspace(c) && !quote) { - space = 1; - continue; - } - if (!quote) { - if (c == ';' || c == '#') { - comment = 1; - continue; - } - } - if (space) { - if (len) - value[len++] = ' '; - space = 0; - } - if (c == '\\') { - c = get_next_char(); - switch (c) { - case '\n': - continue; - case 't': - c = '\t'; - break; - case 'b': - c = '\b'; - break; - case 'n': - c = '\n'; - break; - /* Some characters escape as themselves */ - case '\\': case '"': - break; - /* Reject unknown escape sequences */ - default: - return NULL; - } - value[len++] = c; - continue; - } - if (c == '"') { - quote = 1-quote; - continue; - } - value[len++] = c; - } -} - -static inline int iskeychar(int c) -{ - return isalnum(c) || c == '-'; -} - -static int get_value(config_fn_t fn, void *data, char *name, unsigned int len) -{ - int c; - char *value; - - /* Get the full name */ - for (;;) { - c = get_next_char(); - if (config_file_eof) - break; - if (!iskeychar(c)) - break; - name[len++] = tolower(c); - if (len >= MAXNAME) - return -1; - } - name[len] = 0; - while (c == ' ' || c == '\t') - c = get_next_char(); - - value = NULL; - if (c != '\n') { - if (c != '=') - return -1; - value = parse_value(); - if (!value) - return -1; - } - return fn(name, value, data); -} - -static int get_extended_base_var(char *name, int baselen, int c) -{ - do { - if (c == '\n') - return -1; - c = get_next_char(); - } while (isspace(c)); - - /* We require the format to be '[base "extension"]' */ - if (c != '"') - return -1; - name[baselen++] = '.'; - - for (;;) { - int c = get_next_char(); - if (c == '\n') - return -1; - if (c == '"') - break; - if (c == '\\') { - c = get_next_char(); - if (c == '\n') - return -1; - } - name[baselen++] = c; - if (baselen > MAXNAME / 2) - return -1; - } - - /* Final ']' */ - if (get_next_char() != ']') - return -1; - return baselen; -} - -static int get_base_var(char *name) -{ - int baselen = 0; - - for (;;) { - int c = get_next_char(); - if (config_file_eof) - return -1; - if (c == ']') - return baselen; - if (isspace(c)) - return get_extended_base_var(name, baselen, c); - if (!iskeychar(c) && c != '.') - return -1; - if (baselen > MAXNAME / 2) - return -1; - name[baselen++] = tolower(c); - } -} - -static int perf_parse_file(config_fn_t fn, void *data) -{ - int comment = 0; - int baselen = 0; - static char var[MAXNAME]; - - /* U+FEFF Byte Order Mark in UTF8 */ - static const unsigned char *utf8_bom = (unsigned char *) "\xef\xbb\xbf"; - const unsigned char *bomptr = utf8_bom; - - for (;;) { - int c = get_next_char(); - if (bomptr && *bomptr) { - /* We are at the file beginning; skip UTF8-encoded BOM - * if present. Sane editors won't put this in on their - * own, but e.g. Windows Notepad will do it happily. */ - if ((unsigned char) c == *bomptr) { - bomptr++; - continue; - } else { - /* Do not tolerate partial BOM. */ - if (bomptr != utf8_bom) - break; - /* No BOM at file beginning. Cool. */ - bomptr = NULL; - } - } - if (c == '\n') { - if (config_file_eof) - return 0; - comment = 0; - continue; - } - if (comment || isspace(c)) - continue; - if (c == '#' || c == ';') { - comment = 1; - continue; - } - if (c == '[') { - baselen = get_base_var(var); - if (baselen <= 0) - break; - var[baselen++] = '.'; - var[baselen] = 0; - continue; - } - if (!isalpha(c)) - break; - var[baselen] = tolower(c); - if (get_value(fn, data, var, baselen+1) < 0) - break; - } - die("bad config file line %d in %s", config_linenr, config_file_name); -} - -static int parse_unit_factor(const char *end, unsigned long *val) -{ - if (!*end) - return 1; - else if (!strcasecmp(end, "k")) { - *val *= 1024; - return 1; - } - else if (!strcasecmp(end, "m")) { - *val *= 1024 * 1024; - return 1; - } - else if (!strcasecmp(end, "g")) { - *val *= 1024 * 1024 * 1024; - return 1; - } - return 0; -} - -static int perf_parse_long(const char *value, long *ret) -{ - if (value && *value) { - char *end; - long val = strtol(value, &end, 0); - unsigned long factor = 1; - if (!parse_unit_factor(end, &factor)) - return 0; - *ret = val * factor; - return 1; - } - return 0; -} - -int perf_parse_ulong(const char *value, unsigned long *ret) -{ - if (value && *value) { - char *end; - unsigned long val = strtoul(value, &end, 0); - if (!parse_unit_factor(end, &val)) - return 0; - *ret = val; - return 1; - } - return 0; -} - -static void die_bad_config(const char *name) -{ - if (config_file_name) - die("bad config value for '%s' in %s", name, config_file_name); - die("bad config value for '%s'", name); -} - -int perf_config_int(const char *name, const char *value) -{ - long ret = 0; - if (!perf_parse_long(value, &ret)) - die_bad_config(name); - return ret; -} - -unsigned long perf_config_ulong(const char *name, const char *value) -{ - unsigned long ret; - if (!perf_parse_ulong(value, &ret)) - die_bad_config(name); - return ret; -} - -int perf_config_bool_or_int(const char *name, const char *value, int *is_bool) -{ - *is_bool = 1; - if (!value) - return 1; - if (!*value) - return 0; - if (!strcasecmp(value, "true") || !strcasecmp(value, "yes") || !strcasecmp(value, "on")) - return 1; - if (!strcasecmp(value, "false") || !strcasecmp(value, "no") || !strcasecmp(value, "off")) - return 0; - *is_bool = 0; - return perf_config_int(name, value); -} - -int perf_config_bool(const char *name, const char *value) -{ - int discard; - return !!perf_config_bool_or_int(name, value, &discard); -} - -int perf_config_string(const char **dest, const char *var, const char *value) -{ - if (!value) - return config_error_nonbool(var); - *dest = strdup(value); - return 0; -} - -static int perf_default_core_config(const char *var, const char *value) -{ - /* Add other config variables here and to Documentation/config.txt. */ - return 0; -} - -int perf_default_config(const char *var, const char *value, void *dummy) -{ - if (!prefixcmp(var, "core.")) - return perf_default_core_config(var, value); - - /* Add other config variables here and to Documentation/config.txt. */ - return 0; -} - -int perf_config_from_file(config_fn_t fn, const char *filename, void *data) -{ - int ret; - FILE *f = fopen(filename, "r"); - - ret = -1; - if (f) { - config_file = f; - config_file_name = filename; - config_linenr = 1; - config_file_eof = 0; - ret = perf_parse_file(fn, data); - fclose(f); - config_file_name = NULL; - } - return ret; -} - -const char *perf_etc_perfconfig(void) -{ - static const char *system_wide; - if (!system_wide) - system_wide = system_path(ETC_PERFCONFIG); - return system_wide; -} - -static int perf_env_bool(const char *k, int def) -{ - const char *v = getenv(k); - return v ? perf_config_bool(k, v) : def; -} - -int perf_config_system(void) -{ - return !perf_env_bool("PERF_CONFIG_NOSYSTEM", 0); -} - -int perf_config_global(void) -{ - return !perf_env_bool("PERF_CONFIG_NOGLOBAL", 0); -} - -int perf_config(config_fn_t fn, void *data) -{ - int ret = 0, found = 0; - char *repo_config = NULL; - const char *home = NULL; - - /* Setting $PERF_CONFIG makes perf read _only_ the given config file. */ - if (config_exclusive_filename) - return perf_config_from_file(fn, config_exclusive_filename, data); - if (perf_config_system() && !access(perf_etc_perfconfig(), R_OK)) { - ret += perf_config_from_file(fn, perf_etc_perfconfig(), - data); - found += 1; - } - - home = getenv("HOME"); - if (perf_config_global() && home) { - char *user_config = strdup(mkpath("%s/.perfconfig", home)); - if (!access(user_config, R_OK)) { - ret += perf_config_from_file(fn, user_config, data); - found += 1; - } - free(user_config); - } - - repo_config = perf_pathdup("config"); - if (!access(repo_config, R_OK)) { - ret += perf_config_from_file(fn, repo_config, data); - found += 1; - } - free(repo_config); - if (found == 0) - return -1; - return ret; -} - -/* - * Find all the stuff for perf_config_set() below. - */ - -#define MAX_MATCHES 512 - -static struct { - int baselen; - char* key; - int do_not_match; - regex_t* value_regex; - int multi_replace; - size_t offset[MAX_MATCHES]; - enum { START, SECTION_SEEN, SECTION_END_SEEN, KEY_SEEN } state; - int seen; -} store; - -static int matches(const char* key, const char* value) -{ - return !strcmp(key, store.key) && - (store.value_regex == NULL || - (store.do_not_match ^ - !regexec(store.value_regex, value, 0, NULL, 0))); -} - -static int store_aux(const char* key, const char* value, void *cb) -{ - const char *ep; - size_t section_len; - - switch (store.state) { - case KEY_SEEN: - if (matches(key, value)) { - if (store.seen == 1 && store.multi_replace == 0) { - warning("%s has multiple values", key); - } else if (store.seen >= MAX_MATCHES) { - error("too many matches for %s", key); - return 1; - } - - store.offset[store.seen] = ftell(config_file); - store.seen++; - } - break; - case SECTION_SEEN: - /* - * What we are looking for is in store.key (both - * section and var), and its section part is baselen - * long. We found key (again, both section and var). - * We would want to know if this key is in the same - * section as what we are looking for. We already - * know we are in the same section as what should - * hold store.key. - */ - ep = strrchr(key, '.'); - section_len = ep - key; - - if ((section_len != store.baselen) || - memcmp(key, store.key, section_len+1)) { - store.state = SECTION_END_SEEN; - break; - } - - /* - * Do not increment matches: this is no match, but we - * just made sure we are in the desired section. - */ - store.offset[store.seen] = ftell(config_file); - /* fallthru */ - case SECTION_END_SEEN: - case START: - if (matches(key, value)) { - store.offset[store.seen] = ftell(config_file); - store.state = KEY_SEEN; - store.seen++; - } else { - if (strrchr(key, '.') - key == store.baselen && - !strncmp(key, store.key, store.baselen)) { - store.state = SECTION_SEEN; - store.offset[store.seen] = ftell(config_file); - } - } - } - return 0; -} - -static int store_write_section(int fd, const char* key) -{ - const char *dot; - int i, success; - struct strbuf sb = STRBUF_INIT; - - dot = memchr(key, '.', store.baselen); - if (dot) { - strbuf_addf(&sb, "[%.*s \"", (int)(dot - key), key); - for (i = dot - key + 1; i < store.baselen; i++) { - if (key[i] == '"' || key[i] == '\\') - strbuf_addch(&sb, '\\'); - strbuf_addch(&sb, key[i]); - } - strbuf_addstr(&sb, "\"]\n"); - } else { - strbuf_addf(&sb, "[%.*s]\n", store.baselen, key); - } - - success = write_in_full(fd, sb.buf, sb.len) == sb.len; - strbuf_release(&sb); - - return success; -} - -static int store_write_pair(int fd, const char* key, const char* value) -{ - int i, success; - int length = strlen(key + store.baselen + 1); - const char *quote = ""; - struct strbuf sb = STRBUF_INIT; - - /* - * Check to see if the value needs to be surrounded with a dq pair. - * Note that problematic characters are always backslash-quoted; this - * check is about not losing leading or trailing SP and strings that - * follow beginning-of-comment characters (i.e. ';' and '#') by the - * configuration parser. - */ - if (value[0] == ' ') - quote = "\""; - for (i = 0; value[i]; i++) - if (value[i] == ';' || value[i] == '#') - quote = "\""; - if (i && value[i - 1] == ' ') - quote = "\""; - - strbuf_addf(&sb, "\t%.*s = %s", - length, key + store.baselen + 1, quote); - - for (i = 0; value[i]; i++) - switch (value[i]) { - case '\n': - strbuf_addstr(&sb, "\\n"); - break; - case '\t': - strbuf_addstr(&sb, "\\t"); - break; - case '"': - case '\\': - strbuf_addch(&sb, '\\'); - default: - strbuf_addch(&sb, value[i]); - break; - } - strbuf_addf(&sb, "%s\n", quote); - - success = write_in_full(fd, sb.buf, sb.len) == sb.len; - strbuf_release(&sb); - - return success; -} - -static ssize_t find_beginning_of_line(const char* contents, size_t size, - size_t offset_, int* found_bracket) -{ - size_t equal_offset = size, bracket_offset = size; - ssize_t offset; - -contline: - for (offset = offset_-2; offset > 0 - && contents[offset] != '\n'; offset--) - switch (contents[offset]) { - case '=': equal_offset = offset; break; - case ']': bracket_offset = offset; break; - } - if (offset > 0 && contents[offset-1] == '\\') { - offset_ = offset; - goto contline; - } - if (bracket_offset < equal_offset) { - *found_bracket = 1; - offset = bracket_offset+1; - } else - offset++; - - return offset; -} - -int perf_config_set(const char* key, const char* value) -{ - return perf_config_set_multivar(key, value, NULL, 0); -} - -/* - * If value==NULL, unset in (remove from) config, - * if value_regex!=NULL, disregard key/value pairs where value does not match. - * if multi_replace==0, nothing, or only one matching key/value is replaced, - * else all matching key/values (regardless how many) are removed, - * before the new pair is written. - * - * Returns 0 on success. - * - * This function does this: - * - * - it locks the config file by creating ".perf/config.lock" - * - * - it then parses the config using store_aux() as validator to find - * the position on the key/value pair to replace. If it is to be unset, - * it must be found exactly once. - * - * - the config file is mmap()ed and the part before the match (if any) is - * written to the lock file, then the changed part and the rest. - * - * - the config file is removed and the lock file rename()d to it. - * - */ -int perf_config_set_multivar(const char* key, const char* value, - const char* value_regex, int multi_replace) -{ - int i, dot; - int fd = -1, in_fd; - int ret = 0; - char* config_filename; - const char* last_dot = strrchr(key, '.'); - - if (config_exclusive_filename) - config_filename = strdup(config_exclusive_filename); - else - config_filename = perf_pathdup("config"); - - /* - * Since "key" actually contains the section name and the real - * key name separated by a dot, we have to know where the dot is. - */ - - if (last_dot == NULL) { - error("key does not contain a section: %s", key); - ret = 2; - goto out_free; - } - store.baselen = last_dot - key; - - store.multi_replace = multi_replace; - - /* - * Validate the key and while at it, lower case it for matching. - */ - store.key = malloc(strlen(key) + 1); - dot = 0; - for (i = 0; key[i]; i++) { - unsigned char c = key[i]; - if (c == '.') - dot = 1; - /* Leave the extended basename untouched.. */ - if (!dot || i > store.baselen) { - if (!iskeychar(c) || (i == store.baselen+1 && !isalpha(c))) { - error("invalid key: %s", key); - free(store.key); - ret = 1; - goto out_free; - } - c = tolower(c); - } else if (c == '\n') { - error("invalid key (newline): %s", key); - free(store.key); - ret = 1; - goto out_free; - } - store.key[i] = c; - } - store.key[i] = 0; - - /* - * If .perf/config does not exist yet, write a minimal version. - */ - in_fd = open(config_filename, O_RDONLY); - if ( in_fd < 0 ) { - free(store.key); - - if ( ENOENT != errno ) { - error("opening %s: %s", config_filename, - strerror(errno)); - ret = 3; /* same as "invalid config file" */ - goto out_free; - } - /* if nothing to unset, error out */ - if (value == NULL) { - ret = 5; - goto out_free; - } - - store.key = (char*)key; - if (!store_write_section(fd, key) || - !store_write_pair(fd, key, value)) - goto write_err_out; - } else { - struct stat st; - char* contents; - size_t contents_sz, copy_begin, copy_end; - int i, new_line = 0; - - if (value_regex == NULL) - store.value_regex = NULL; - else { - if (value_regex[0] == '!') { - store.do_not_match = 1; - value_regex++; - } else - store.do_not_match = 0; - - store.value_regex = (regex_t*)malloc(sizeof(regex_t)); - if (regcomp(store.value_regex, value_regex, - REG_EXTENDED)) { - error("invalid pattern: %s", value_regex); - free(store.value_regex); - ret = 6; - goto out_free; - } - } - - store.offset[0] = 0; - store.state = START; - store.seen = 0; - - /* - * After this, store.offset will contain the *end* offset - * of the last match, or remain at 0 if no match was found. - * As a side effect, we make sure to transform only a valid - * existing config file. - */ - if (perf_config_from_file(store_aux, config_filename, NULL)) { - error("invalid config file %s", config_filename); - free(store.key); - if (store.value_regex != NULL) { - regfree(store.value_regex); - free(store.value_regex); - } - ret = 3; - goto out_free; - } - - free(store.key); - if (store.value_regex != NULL) { - regfree(store.value_regex); - free(store.value_regex); - } - - /* if nothing to unset, or too many matches, error out */ - if ((store.seen == 0 && value == NULL) || - (store.seen > 1 && multi_replace == 0)) { - ret = 5; - goto out_free; - } - - fstat(in_fd, &st); - contents_sz = xsize_t(st.st_size); - contents = mmap(NULL, contents_sz, PROT_READ, - MAP_PRIVATE, in_fd, 0); - close(in_fd); - - if (store.seen == 0) - store.seen = 1; - - for (i = 0, copy_begin = 0; i < store.seen; i++) { - if (store.offset[i] == 0) { - store.offset[i] = copy_end = contents_sz; - } else if (store.state != KEY_SEEN) { - copy_end = store.offset[i]; - } else - copy_end = find_beginning_of_line( - contents, contents_sz, - store.offset[i]-2, &new_line); - - if (copy_end > 0 && contents[copy_end-1] != '\n') - new_line = 1; - - /* write the first part of the config */ - if (copy_end > copy_begin) { - if (write_in_full(fd, contents + copy_begin, - copy_end - copy_begin) < - copy_end - copy_begin) - goto write_err_out; - if (new_line && - write_in_full(fd, "\n", 1) != 1) - goto write_err_out; - } - copy_begin = store.offset[i]; - } - - /* write the pair (value == NULL means unset) */ - if (value != NULL) { - if (store.state == START) { - if (!store_write_section(fd, key)) - goto write_err_out; - } - if (!store_write_pair(fd, key, value)) - goto write_err_out; - } - - /* write the rest of the config */ - if (copy_begin < contents_sz) - if (write_in_full(fd, contents + copy_begin, - contents_sz - copy_begin) < - contents_sz - copy_begin) - goto write_err_out; - - munmap(contents, contents_sz); - } - - ret = 0; - -out_free: - free(config_filename); - return ret; - -write_err_out: - goto out_free; - -} - -/* - * Call this to report error for your variable that should not - * get a boolean value (i.e. "[my] var" means "true"). - */ -int config_error_nonbool(const char *var) -{ - return error("Missing value for '%s'", var); -} diff --git a/trunk/tools/perf/util/ctype.c b/trunk/tools/perf/util/ctype.c deleted file mode 100644 index b90ec004f29c..000000000000 --- a/trunk/tools/perf/util/ctype.c +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Sane locale-independent, ASCII ctype. - * - * No surprises, and works with signed and unsigned chars. - */ -#include "cache.h" - -enum { - S = GIT_SPACE, - A = GIT_ALPHA, - D = GIT_DIGIT, - G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */ - R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | * */ -}; - -unsigned char sane_ctype[256] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0, /* 0.. 15 */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16.. 31 */ - S, 0, 0, 0, R, 0, 0, 0, R, R, G, R, 0, 0, R, 0, /* 32.. 47 */ - D, D, D, D, D, D, D, D, D, D, 0, 0, 0, 0, 0, G, /* 48.. 63 */ - 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */ - A, A, A, A, A, A, A, A, A, A, A, G, G, 0, R, 0, /* 80.. 95 */ - 0, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */ - A, A, A, A, A, A, A, A, A, A, A, R, R, 0, 0, 0, /* 112..127 */ - /* Nothing in the 128.. range */ -}; diff --git a/trunk/tools/perf/util/environment.c b/trunk/tools/perf/util/environment.c deleted file mode 100644 index 275b0ee345f5..000000000000 --- a/trunk/tools/perf/util/environment.c +++ /dev/null @@ -1,9 +0,0 @@ -/* - * We put all the perf config variables in this same object - * file, so that programs can link against the config parser - * without having to link against all the rest of perf. - */ -#include "cache.h" - -const char *pager_program; -int pager_use_color = 1; diff --git a/trunk/tools/perf/util/exec_cmd.c b/trunk/tools/perf/util/exec_cmd.c deleted file mode 100644 index d39292263153..000000000000 --- a/trunk/tools/perf/util/exec_cmd.c +++ /dev/null @@ -1,165 +0,0 @@ -#include "cache.h" -#include "exec_cmd.h" -#include "quote.h" -#define MAX_ARGS 32 - -extern char **environ; -static const char *argv_exec_path; -static const char *argv0_path; - -const char *system_path(const char *path) -{ -#ifdef RUNTIME_PREFIX - static const char *prefix; -#else - static const char *prefix = PREFIX; -#endif - struct strbuf d = STRBUF_INIT; - - if (is_absolute_path(path)) - return path; - -#ifdef RUNTIME_PREFIX - assert(argv0_path); - assert(is_absolute_path(argv0_path)); - - if (!prefix && - !(prefix = strip_path_suffix(argv0_path, PERF_EXEC_PATH)) && - !(prefix = strip_path_suffix(argv0_path, BINDIR)) && - !(prefix = strip_path_suffix(argv0_path, "perf"))) { - prefix = PREFIX; - fprintf(stderr, "RUNTIME_PREFIX requested, " - "but prefix computation failed. " - "Using static fallback '%s'.\n", prefix); - } -#endif - - strbuf_addf(&d, "%s/%s", prefix, path); - path = strbuf_detach(&d, NULL); - return path; -} - -const char *perf_extract_argv0_path(const char *argv0) -{ - const char *slash; - - if (!argv0 || !*argv0) - return NULL; - slash = argv0 + strlen(argv0); - - while (argv0 <= slash && !is_dir_sep(*slash)) - slash--; - - if (slash >= argv0) { - argv0_path = strndup(argv0, slash - argv0); - return slash + 1; - } - - return argv0; -} - -void perf_set_argv_exec_path(const char *exec_path) -{ - argv_exec_path = exec_path; - /* - * Propagate this setting to external programs. - */ - setenv(EXEC_PATH_ENVIRONMENT, exec_path, 1); -} - - -/* Returns the highest-priority, location to look for perf programs. */ -const char *perf_exec_path(void) -{ - const char *env; - - if (argv_exec_path) - return argv_exec_path; - - env = getenv(EXEC_PATH_ENVIRONMENT); - if (env && *env) { - return env; - } - - return system_path(PERF_EXEC_PATH); -} - -static void add_path(struct strbuf *out, const char *path) -{ - if (path && *path) { - if (is_absolute_path(path)) - strbuf_addstr(out, path); - else - strbuf_addstr(out, make_nonrelative_path(path)); - - strbuf_addch(out, PATH_SEP); - } -} - -void setup_path(void) -{ - const char *old_path = getenv("PATH"); - struct strbuf new_path = STRBUF_INIT; - - add_path(&new_path, perf_exec_path()); - add_path(&new_path, argv0_path); - - if (old_path) - strbuf_addstr(&new_path, old_path); - else - strbuf_addstr(&new_path, "/usr/local/bin:/usr/bin:/bin"); - - setenv("PATH", new_path.buf, 1); - - strbuf_release(&new_path); -} - -const char **prepare_perf_cmd(const char **argv) -{ - int argc; - const char **nargv; - - for (argc = 0; argv[argc]; argc++) - ; /* just counting */ - nargv = malloc(sizeof(*nargv) * (argc + 2)); - - nargv[0] = "perf"; - for (argc = 0; argv[argc]; argc++) - nargv[argc + 1] = argv[argc]; - nargv[argc + 1] = NULL; - return nargv; -} - -int execv_perf_cmd(const char **argv) { - const char **nargv = prepare_perf_cmd(argv); - - /* execvp() can only ever return if it fails */ - execvp("perf", (char **)nargv); - - free(nargv); - return -1; -} - - -int execl_perf_cmd(const char *cmd,...) -{ - int argc; - const char *argv[MAX_ARGS + 1]; - const char *arg; - va_list param; - - va_start(param, cmd); - argv[0] = cmd; - argc = 1; - while (argc < MAX_ARGS) { - arg = argv[argc++] = va_arg(param, char *); - if (!arg) - break; - } - va_end(param); - if (MAX_ARGS <= argc) - return error("too many args to run %s", cmd); - - argv[argc] = NULL; - return execv_perf_cmd(argv); -} diff --git a/trunk/tools/perf/util/exec_cmd.h b/trunk/tools/perf/util/exec_cmd.h deleted file mode 100644 index effe25eb1545..000000000000 --- a/trunk/tools/perf/util/exec_cmd.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef PERF_EXEC_CMD_H -#define PERF_EXEC_CMD_H - -extern void perf_set_argv_exec_path(const char *exec_path); -extern const char *perf_extract_argv0_path(const char *path); -extern const char *perf_exec_path(void); -extern void setup_path(void); -extern const char **prepare_perf_cmd(const char **argv); -extern int execv_perf_cmd(const char **argv); /* NULL terminated */ -extern int execl_perf_cmd(const char *cmd, ...); -extern const char *system_path(const char *path); - -#endif /* PERF_EXEC_CMD_H */ diff --git a/trunk/tools/perf/util/generate-cmdlist.sh b/trunk/tools/perf/util/generate-cmdlist.sh deleted file mode 100755 index f06f6fd148f8..000000000000 --- a/trunk/tools/perf/util/generate-cmdlist.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/sh - -echo "/* Automatically generated by $0 */ -struct cmdname_help -{ - char name[16]; - char help[80]; -}; - -static struct cmdname_help common_cmds[] = {" - -sed -n -e 's/^perf-\([^ ]*\)[ ].* common.*/\1/p' command-list.txt | -sort | -while read cmd -do - sed -n ' - /^NAME/,/perf-'"$cmd"'/H - ${ - x - s/.*perf-'"$cmd"' - \(.*\)/ {"'"$cmd"'", "\1"},/ - p - }' "Documentation/perf-$cmd.txt" -done -echo "};" diff --git a/trunk/tools/perf/util/help.c b/trunk/tools/perf/util/help.c deleted file mode 100644 index 6653f7dd1d78..000000000000 --- a/trunk/tools/perf/util/help.c +++ /dev/null @@ -1,367 +0,0 @@ -#include "cache.h" -#include "../builtin.h" -#include "exec_cmd.h" -#include "levenshtein.h" -#include "help.h" - -/* most GUI terminals set COLUMNS (although some don't export it) */ -static int term_columns(void) -{ - char *col_string = getenv("COLUMNS"); - int n_cols; - - if (col_string && (n_cols = atoi(col_string)) > 0) - return n_cols; - -#ifdef TIOCGWINSZ - { - struct winsize ws; - if (!ioctl(1, TIOCGWINSZ, &ws)) { - if (ws.ws_col) - return ws.ws_col; - } - } -#endif - - return 80; -} - -void add_cmdname(struct cmdnames *cmds, const char *name, int len) -{ - struct cmdname *ent = malloc(sizeof(*ent) + len + 1); - - ent->len = len; - memcpy(ent->name, name, len); - ent->name[len] = 0; - - ALLOC_GROW(cmds->names, cmds->cnt + 1, cmds->alloc); - cmds->names[cmds->cnt++] = ent; -} - -static void clean_cmdnames(struct cmdnames *cmds) -{ - int i; - for (i = 0; i < cmds->cnt; ++i) - free(cmds->names[i]); - free(cmds->names); - cmds->cnt = 0; - cmds->alloc = 0; -} - -static int cmdname_compare(const void *a_, const void *b_) -{ - struct cmdname *a = *(struct cmdname **)a_; - struct cmdname *b = *(struct cmdname **)b_; - return strcmp(a->name, b->name); -} - -static void uniq(struct cmdnames *cmds) -{ - int i, j; - - if (!cmds->cnt) - return; - - for (i = j = 1; i < cmds->cnt; i++) - if (strcmp(cmds->names[i]->name, cmds->names[i-1]->name)) - cmds->names[j++] = cmds->names[i]; - - cmds->cnt = j; -} - -void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes) -{ - int ci, cj, ei; - int cmp; - - ci = cj = ei = 0; - while (ci < cmds->cnt && ei < excludes->cnt) { - cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name); - if (cmp < 0) - cmds->names[cj++] = cmds->names[ci++]; - else if (cmp == 0) - ci++, ei++; - else if (cmp > 0) - ei++; - } - - while (ci < cmds->cnt) - cmds->names[cj++] = cmds->names[ci++]; - - cmds->cnt = cj; -} - -static void pretty_print_string_list(struct cmdnames *cmds, int longest) -{ - int cols = 1, rows; - int space = longest + 1; /* min 1 SP between words */ - int max_cols = term_columns() - 1; /* don't print *on* the edge */ - int i, j; - - if (space < max_cols) - cols = max_cols / space; - rows = (cmds->cnt + cols - 1) / cols; - - for (i = 0; i < rows; i++) { - printf(" "); - - for (j = 0; j < cols; j++) { - int n = j * rows + i; - int size = space; - if (n >= cmds->cnt) - break; - if (j == cols-1 || n + rows >= cmds->cnt) - size = 1; - printf("%-*s", size, cmds->names[n]->name); - } - putchar('\n'); - } -} - -static int is_executable(const char *name) -{ - struct stat st; - - if (stat(name, &st) || /* stat, not lstat */ - !S_ISREG(st.st_mode)) - return 0; - -#ifdef __MINGW32__ - /* cannot trust the executable bit, peek into the file instead */ - char buf[3] = { 0 }; - int n; - int fd = open(name, O_RDONLY); - st.st_mode &= ~S_IXUSR; - if (fd >= 0) { - n = read(fd, buf, 2); - if (n == 2) - /* DOS executables start with "MZ" */ - if (!strcmp(buf, "#!") || !strcmp(buf, "MZ")) - st.st_mode |= S_IXUSR; - close(fd); - } -#endif - return st.st_mode & S_IXUSR; -} - -static void list_commands_in_dir(struct cmdnames *cmds, - const char *path, - const char *prefix) -{ - int prefix_len; - DIR *dir = opendir(path); - struct dirent *de; - struct strbuf buf = STRBUF_INIT; - int len; - - if (!dir) - return; - if (!prefix) - prefix = "perf-"; - prefix_len = strlen(prefix); - - strbuf_addf(&buf, "%s/", path); - len = buf.len; - - while ((de = readdir(dir)) != NULL) { - int entlen; - - if (prefixcmp(de->d_name, prefix)) - continue; - - strbuf_setlen(&buf, len); - strbuf_addstr(&buf, de->d_name); - if (!is_executable(buf.buf)) - continue; - - entlen = strlen(de->d_name) - prefix_len; - if (has_extension(de->d_name, ".exe")) - entlen -= 4; - - add_cmdname(cmds, de->d_name + prefix_len, entlen); - } - closedir(dir); - strbuf_release(&buf); -} - -void load_command_list(const char *prefix, - struct cmdnames *main_cmds, - struct cmdnames *other_cmds) -{ - const char *env_path = getenv("PATH"); - const char *exec_path = perf_exec_path(); - - if (exec_path) { - list_commands_in_dir(main_cmds, exec_path, prefix); - qsort(main_cmds->names, main_cmds->cnt, - sizeof(*main_cmds->names), cmdname_compare); - uniq(main_cmds); - } - - if (env_path) { - char *paths, *path, *colon; - path = paths = strdup(env_path); - while (1) { - if ((colon = strchr(path, PATH_SEP))) - *colon = 0; - if (!exec_path || strcmp(path, exec_path)) - list_commands_in_dir(other_cmds, path, prefix); - - if (!colon) - break; - path = colon + 1; - } - free(paths); - - qsort(other_cmds->names, other_cmds->cnt, - sizeof(*other_cmds->names), cmdname_compare); - uniq(other_cmds); - } - exclude_cmds(other_cmds, main_cmds); -} - -void list_commands(const char *title, struct cmdnames *main_cmds, - struct cmdnames *other_cmds) -{ - int i, longest = 0; - - for (i = 0; i < main_cmds->cnt; i++) - if (longest < main_cmds->names[i]->len) - longest = main_cmds->names[i]->len; - for (i = 0; i < other_cmds->cnt; i++) - if (longest < other_cmds->names[i]->len) - longest = other_cmds->names[i]->len; - - if (main_cmds->cnt) { - const char *exec_path = perf_exec_path(); - printf("available %s in '%s'\n", title, exec_path); - printf("----------------"); - mput_char('-', strlen(title) + strlen(exec_path)); - putchar('\n'); - pretty_print_string_list(main_cmds, longest); - putchar('\n'); - } - - if (other_cmds->cnt) { - printf("%s available from elsewhere on your $PATH\n", title); - printf("---------------------------------------"); - mput_char('-', strlen(title)); - putchar('\n'); - pretty_print_string_list(other_cmds, longest); - putchar('\n'); - } -} - -int is_in_cmdlist(struct cmdnames *c, const char *s) -{ - int i; - for (i = 0; i < c->cnt; i++) - if (!strcmp(s, c->names[i]->name)) - return 1; - return 0; -} - -static int autocorrect; -static struct cmdnames aliases; - -static int perf_unknown_cmd_config(const char *var, const char *value, void *cb) -{ - if (!strcmp(var, "help.autocorrect")) - autocorrect = perf_config_int(var,value); - /* Also use aliases for command lookup */ - if (!prefixcmp(var, "alias.")) - add_cmdname(&aliases, var + 6, strlen(var + 6)); - - return perf_default_config(var, value, cb); -} - -static int levenshtein_compare(const void *p1, const void *p2) -{ - const struct cmdname *const *c1 = p1, *const *c2 = p2; - const char *s1 = (*c1)->name, *s2 = (*c2)->name; - int l1 = (*c1)->len; - int l2 = (*c2)->len; - return l1 != l2 ? l1 - l2 : strcmp(s1, s2); -} - -static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old) -{ - int i; - ALLOC_GROW(cmds->names, cmds->cnt + old->cnt, cmds->alloc); - - for (i = 0; i < old->cnt; i++) - cmds->names[cmds->cnt++] = old->names[i]; - free(old->names); - old->cnt = 0; - old->names = NULL; -} - -const char *help_unknown_cmd(const char *cmd) -{ - int i, n = 0, best_similarity = 0; - struct cmdnames main_cmds, other_cmds; - - memset(&main_cmds, 0, sizeof(main_cmds)); - memset(&other_cmds, 0, sizeof(main_cmds)); - memset(&aliases, 0, sizeof(aliases)); - - perf_config(perf_unknown_cmd_config, NULL); - - load_command_list("perf-", &main_cmds, &other_cmds); - - add_cmd_list(&main_cmds, &aliases); - add_cmd_list(&main_cmds, &other_cmds); - qsort(main_cmds.names, main_cmds.cnt, - sizeof(main_cmds.names), cmdname_compare); - uniq(&main_cmds); - - if (main_cmds.cnt) { - /* This reuses cmdname->len for similarity index */ - for (i = 0; i < main_cmds.cnt; ++i) - main_cmds.names[i]->len = - levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4); - - qsort(main_cmds.names, main_cmds.cnt, - sizeof(*main_cmds.names), levenshtein_compare); - - best_similarity = main_cmds.names[0]->len; - n = 1; - while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len) - ++n; - } - - if (autocorrect && n == 1) { - const char *assumed = main_cmds.names[0]->name; - - main_cmds.names[0] = NULL; - clean_cmdnames(&main_cmds); - fprintf(stderr, "WARNING: You called a Git program named '%s', " - "which does not exist.\n" - "Continuing under the assumption that you meant '%s'\n", - cmd, assumed); - if (autocorrect > 0) { - fprintf(stderr, "in %0.1f seconds automatically...\n", - (float)autocorrect/10.0); - poll(NULL, 0, autocorrect * 100); - } - return assumed; - } - - fprintf(stderr, "perf: '%s' is not a perf-command. See 'perf --help'.\n", cmd); - - if (main_cmds.cnt && best_similarity < 6) { - fprintf(stderr, "\nDid you mean %s?\n", - n < 2 ? "this": "one of these"); - - for (i = 0; i < n; i++) - fprintf(stderr, "\t%s\n", main_cmds.names[i]->name); - } - - exit(1); -} - -int cmd_version(int argc, const char **argv, const char *prefix) -{ - printf("perf version %s\n", perf_version_string); - return 0; -} diff --git a/trunk/tools/perf/util/help.h b/trunk/tools/perf/util/help.h deleted file mode 100644 index 56bc15406ffc..000000000000 --- a/trunk/tools/perf/util/help.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef HELP_H -#define HELP_H - -struct cmdnames { - int alloc; - int cnt; - struct cmdname { - size_t len; /* also used for similarity index in help.c */ - char name[FLEX_ARRAY]; - } **names; -}; - -static inline void mput_char(char c, unsigned int num) -{ - while(num--) - putchar(c); -} - -void load_command_list(const char *prefix, - struct cmdnames *main_cmds, - struct cmdnames *other_cmds); -void add_cmdname(struct cmdnames *cmds, const char *name, int len); -/* Here we require that excludes is a sorted list. */ -void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes); -int is_in_cmdlist(struct cmdnames *c, const char *s); -void list_commands(const char *title, struct cmdnames *main_cmds, - struct cmdnames *other_cmds); - -#endif /* HELP_H */ diff --git a/trunk/tools/perf/util/levenshtein.c b/trunk/tools/perf/util/levenshtein.c deleted file mode 100644 index e521d1516df6..000000000000 --- a/trunk/tools/perf/util/levenshtein.c +++ /dev/null @@ -1,84 +0,0 @@ -#include "cache.h" -#include "levenshtein.h" - -/* - * This function implements the Damerau-Levenshtein algorithm to - * calculate a distance between strings. - * - * Basically, it says how many letters need to be swapped, substituted, - * deleted from, or added to string1, at least, to get string2. - * - * The idea is to build a distance matrix for the substrings of both - * strings. To avoid a large space complexity, only the last three rows - * are kept in memory (if swaps had the same or higher cost as one deletion - * plus one insertion, only two rows would be needed). - * - * At any stage, "i + 1" denotes the length of the current substring of - * string1 that the distance is calculated for. - * - * row2 holds the current row, row1 the previous row (i.e. for the substring - * of string1 of length "i"), and row0 the row before that. - * - * In other words, at the start of the big loop, row2[j + 1] contains the - * Damerau-Levenshtein distance between the substring of string1 of length - * "i" and the substring of string2 of length "j + 1". - * - * All the big loop does is determine the partial minimum-cost paths. - * - * It does so by calculating the costs of the path ending in characters - * i (in string1) and j (in string2), respectively, given that the last - * operation is a substition, a swap, a deletion, or an insertion. - * - * This implementation allows the costs to be weighted: - * - * - w (as in "sWap") - * - s (as in "Substitution") - * - a (for insertion, AKA "Add") - * - d (as in "Deletion") - * - * Note that this algorithm calculates a distance _iff_ d == a. - */ -int levenshtein(const char *string1, const char *string2, - int w, int s, int a, int d) -{ - int len1 = strlen(string1), len2 = strlen(string2); - int *row0 = malloc(sizeof(int) * (len2 + 1)); - int *row1 = malloc(sizeof(int) * (len2 + 1)); - int *row2 = malloc(sizeof(int) * (len2 + 1)); - int i, j; - - for (j = 0; j <= len2; j++) - row1[j] = j * a; - for (i = 0; i < len1; i++) { - int *dummy; - - row2[0] = (i + 1) * d; - for (j = 0; j < len2; j++) { - /* substitution */ - row2[j + 1] = row1[j] + s * (string1[i] != string2[j]); - /* swap */ - if (i > 0 && j > 0 && string1[i - 1] == string2[j] && - string1[i] == string2[j - 1] && - row2[j + 1] > row0[j - 1] + w) - row2[j + 1] = row0[j - 1] + w; - /* deletion */ - if (row2[j + 1] > row1[j + 1] + d) - row2[j + 1] = row1[j + 1] + d; - /* insertion */ - if (row2[j + 1] > row2[j] + a) - row2[j + 1] = row2[j] + a; - } - - dummy = row0; - row0 = row1; - row1 = row2; - row2 = dummy; - } - - i = row1[len2]; - free(row0); - free(row1); - free(row2); - - return i; -} diff --git a/trunk/tools/perf/util/levenshtein.h b/trunk/tools/perf/util/levenshtein.h deleted file mode 100644 index 0173abeef52c..000000000000 --- a/trunk/tools/perf/util/levenshtein.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef LEVENSHTEIN_H -#define LEVENSHTEIN_H - -int levenshtein(const char *string1, const char *string2, - int swap_penalty, int substition_penalty, - int insertion_penalty, int deletion_penalty); - -#endif diff --git a/trunk/tools/perf/util/list.h b/trunk/tools/perf/util/list.h deleted file mode 100644 index e2548e8072cf..000000000000 --- a/trunk/tools/perf/util/list.h +++ /dev/null @@ -1,603 +0,0 @@ -#ifndef _LINUX_LIST_H -#define _LINUX_LIST_H -/* - Copyright (C) Cast of dozens, comes from the Linux kernel - - This program is free software; you can redistribute it and/or modify it - under the terms of version 2 of the GNU General Public License as - published by the Free Software Foundation. -*/ - -#include - -/* - * These are non-NULL pointers that will result in page faults - * under normal circumstances, used to verify that nobody uses - * non-initialized list entries. - */ -#define LIST_POISON1 ((void *)0x00100100) -#define LIST_POISON2 ((void *)0x00200200) - -/** - * container_of - cast a member of a structure out to the containing structure - * @ptr: the pointer to the member. - * @type: the type of the container struct this is embedded in. - * @member: the name of the member within the struct. - * - */ -#define container_of(ptr, type, member) ({ \ - const typeof( ((type *)0)->member ) *__mptr = (ptr); \ - (type *)( (char *)__mptr - offsetof(type,member) );}) - -/* - * Simple doubly linked list implementation. - * - * Some of the internal functions ("__xxx") are useful when - * manipulating whole lists rather than single entries, as - * sometimes we already know the next/prev entries and we can - * generate better code by using them directly rather than - * using the generic single-entry routines. - */ - -struct list_head { - struct list_head *next, *prev; -}; - -#define LIST_HEAD_INIT(name) { &(name), &(name) } - -#define LIST_HEAD(name) \ - struct list_head name = LIST_HEAD_INIT(name) - -static inline void INIT_LIST_HEAD(struct list_head *list) -{ - list->next = list; - list->prev = list; -} - -/* - * Insert a new entry between two known consecutive entries. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __list_add(struct list_head *new, - struct list_head *prev, - struct list_head *next) -{ - next->prev = new; - new->next = next; - new->prev = prev; - prev->next = new; -} - -/** - * list_add - add a new entry - * @new: new entry to be added - * @head: list head to add it after - * - * Insert a new entry after the specified head. - * This is good for implementing stacks. - */ -static inline void list_add(struct list_head *new, struct list_head *head) -{ - __list_add(new, head, head->next); -} - -/** - * list_add_tail - add a new entry - * @new: new entry to be added - * @head: list head to add it before - * - * Insert a new entry before the specified head. - * This is useful for implementing queues. - */ -static inline void list_add_tail(struct list_head *new, struct list_head *head) -{ - __list_add(new, head->prev, head); -} - -/* - * Delete a list entry by making the prev/next entries - * point to each other. - * - * This is only for internal list manipulation where we know - * the prev/next entries already! - */ -static inline void __list_del(struct list_head * prev, struct list_head * next) -{ - next->prev = prev; - prev->next = next; -} - -/** - * list_del - deletes entry from list. - * @entry: the element to delete from the list. - * Note: list_empty on entry does not return true after this, the entry is - * in an undefined state. - */ -static inline void list_del(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); - entry->next = LIST_POISON1; - entry->prev = LIST_POISON2; -} - -/** - * list_del_range - deletes range of entries from list. - * @beging: first element in the range to delete from the list. - * @beging: first element in the range to delete from the list. - * Note: list_empty on the range of entries does not return true after this, - * the entries is in an undefined state. - */ -static inline void list_del_range(struct list_head *begin, - struct list_head *end) -{ - begin->prev->next = end->next; - end->next->prev = begin->prev; -} - -/** - * list_replace - replace old entry by new one - * @old : the element to be replaced - * @new : the new element to insert - * Note: if 'old' was empty, it will be overwritten. - */ -static inline void list_replace(struct list_head *old, - struct list_head *new) -{ - new->next = old->next; - new->next->prev = new; - new->prev = old->prev; - new->prev->next = new; -} - -static inline void list_replace_init(struct list_head *old, - struct list_head *new) -{ - list_replace(old, new); - INIT_LIST_HEAD(old); -} - -/** - * list_del_init - deletes entry from list and reinitialize it. - * @entry: the element to delete from the list. - */ -static inline void list_del_init(struct list_head *entry) -{ - __list_del(entry->prev, entry->next); - INIT_LIST_HEAD(entry); -} - -/** - * list_move - delete from one list and add as another's head - * @list: the entry to move - * @head: the head that will precede our entry - */ -static inline void list_move(struct list_head *list, struct list_head *head) -{ - __list_del(list->prev, list->next); - list_add(list, head); -} - -/** - * list_move_tail - delete from one list and add as another's tail - * @list: the entry to move - * @head: the head that will follow our entry - */ -static inline void list_move_tail(struct list_head *list, - struct list_head *head) -{ - __list_del(list->prev, list->next); - list_add_tail(list, head); -} - -/** - * list_is_last - tests whether @list is the last entry in list @head - * @list: the entry to test - * @head: the head of the list - */ -static inline int list_is_last(const struct list_head *list, - const struct list_head *head) -{ - return list->next == head; -} - -/** - * list_empty - tests whether a list is empty - * @head: the list to test. - */ -static inline int list_empty(const struct list_head *head) -{ - return head->next == head; -} - -/** - * list_empty_careful - tests whether a list is empty and not being modified - * @head: the list to test - * - * Description: - * tests whether a list is empty _and_ checks that no other CPU might be - * in the process of modifying either member (next or prev) - * - * NOTE: using list_empty_careful() without synchronization - * can only be safe if the only activity that can happen - * to the list entry is list_del_init(). Eg. it cannot be used - * if another CPU could re-list_add() it. - */ -static inline int list_empty_careful(const struct list_head *head) -{ - struct list_head *next = head->next; - return (next == head) && (next == head->prev); -} - -static inline void __list_splice(struct list_head *list, - struct list_head *head) -{ - struct list_head *first = list->next; - struct list_head *last = list->prev; - struct list_head *at = head->next; - - first->prev = head; - head->next = first; - - last->next = at; - at->prev = last; -} - -/** - * list_splice - join two lists - * @list: the new list to add. - * @head: the place to add it in the first list. - */ -static inline void list_splice(struct list_head *list, struct list_head *head) -{ - if (!list_empty(list)) - __list_splice(list, head); -} - -/** - * list_splice_init - join two lists and reinitialise the emptied list. - * @list: the new list to add. - * @head: the place to add it in the first list. - * - * The list at @list is reinitialised - */ -static inline void list_splice_init(struct list_head *list, - struct list_head *head) -{ - if (!list_empty(list)) { - __list_splice(list, head); - INIT_LIST_HEAD(list); - } -} - -/** - * list_entry - get the struct for this entry - * @ptr: the &struct list_head pointer. - * @type: the type of the struct this is embedded in. - * @member: the name of the list_struct within the struct. - */ -#define list_entry(ptr, type, member) \ - container_of(ptr, type, member) - -/** - * list_first_entry - get the first element from a list - * @ptr: the list head to take the element from. - * @type: the type of the struct this is embedded in. - * @member: the name of the list_struct within the struct. - * - * Note, that list is expected to be not empty. - */ -#define list_first_entry(ptr, type, member) \ - list_entry((ptr)->next, type, member) - -/** - * list_for_each - iterate over a list - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - */ -#define list_for_each(pos, head) \ - for (pos = (head)->next; pos != (head); \ - pos = pos->next) - -/** - * __list_for_each - iterate over a list - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - * - * This variant differs from list_for_each() in that it's the - * simplest possible list iteration code, no prefetching is done. - * Use this for code that knows the list to be very short (empty - * or 1 entry) most of the time. - */ -#define __list_for_each(pos, head) \ - for (pos = (head)->next; pos != (head); pos = pos->next) - -/** - * list_for_each_prev - iterate over a list backwards - * @pos: the &struct list_head to use as a loop cursor. - * @head: the head for your list. - */ -#define list_for_each_prev(pos, head) \ - for (pos = (head)->prev; pos != (head); \ - pos = pos->prev) - -/** - * list_for_each_safe - iterate over a list safe against removal of list entry - * @pos: the &struct list_head to use as a loop cursor. - * @n: another &struct list_head to use as temporary storage - * @head: the head for your list. - */ -#define list_for_each_safe(pos, n, head) \ - for (pos = (head)->next, n = pos->next; pos != (head); \ - pos = n, n = pos->next) - -/** - * list_for_each_entry - iterate over list of given type - * @pos: the type * to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry(pos, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member)) - -/** - * list_for_each_entry_reverse - iterate backwards over list of given type. - * @pos: the type * to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry_reverse(pos, head, member) \ - for (pos = list_entry((head)->prev, typeof(*pos), member); \ - &pos->member != (head); \ - pos = list_entry(pos->member.prev, typeof(*pos), member)) - -/** - * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue - * @pos: the type * to use as a start point - * @head: the head of the list - * @member: the name of the list_struct within the struct. - * - * Prepares a pos entry for use as a start point in list_for_each_entry_continue. - */ -#define list_prepare_entry(pos, head, member) \ - ((pos) ? : list_entry(head, typeof(*pos), member)) - -/** - * list_for_each_entry_continue - continue iteration over list of given type - * @pos: the type * to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * Continue to iterate over list of given type, continuing after - * the current position. - */ -#define list_for_each_entry_continue(pos, head, member) \ - for (pos = list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member)) - -/** - * list_for_each_entry_from - iterate over list of given type from the current point - * @pos: the type * to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * Iterate over list of given type, continuing from current position. - */ -#define list_for_each_entry_from(pos, head, member) \ - for (; &pos->member != (head); \ - pos = list_entry(pos->member.next, typeof(*pos), member)) - -/** - * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry - * @pos: the type * to use as a loop cursor. - * @n: another type * to use as temporary storage - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - */ -#define list_for_each_entry_safe(pos, n, head, member) \ - for (pos = list_entry((head)->next, typeof(*pos), member), \ - n = list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = list_entry(n->member.next, typeof(*n), member)) - -/** - * list_for_each_entry_safe_continue - * @pos: the type * to use as a loop cursor. - * @n: another type * to use as temporary storage - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * Iterate over list of given type, continuing after current point, - * safe against removal of list entry. - */ -#define list_for_each_entry_safe_continue(pos, n, head, member) \ - for (pos = list_entry(pos->member.next, typeof(*pos), member), \ - n = list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = list_entry(n->member.next, typeof(*n), member)) - -/** - * list_for_each_entry_safe_from - * @pos: the type * to use as a loop cursor. - * @n: another type * to use as temporary storage - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * Iterate over list of given type from current point, safe against - * removal of list entry. - */ -#define list_for_each_entry_safe_from(pos, n, head, member) \ - for (n = list_entry(pos->member.next, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = list_entry(n->member.next, typeof(*n), member)) - -/** - * list_for_each_entry_safe_reverse - * @pos: the type * to use as a loop cursor. - * @n: another type * to use as temporary storage - * @head: the head for your list. - * @member: the name of the list_struct within the struct. - * - * Iterate backwards over list of given type, safe against removal - * of list entry. - */ -#define list_for_each_entry_safe_reverse(pos, n, head, member) \ - for (pos = list_entry((head)->prev, typeof(*pos), member), \ - n = list_entry(pos->member.prev, typeof(*pos), member); \ - &pos->member != (head); \ - pos = n, n = list_entry(n->member.prev, typeof(*n), member)) - -/* - * Double linked lists with a single pointer list head. - * Mostly useful for hash tables where the two pointer list head is - * too wasteful. - * You lose the ability to access the tail in O(1). - */ - -struct hlist_head { - struct hlist_node *first; -}; - -struct hlist_node { - struct hlist_node *next, **pprev; -}; - -#define HLIST_HEAD_INIT { .first = NULL } -#define HLIST_HEAD(name) struct hlist_head name = { .first = NULL } -#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL) -static inline void INIT_HLIST_NODE(struct hlist_node *h) -{ - h->next = NULL; - h->pprev = NULL; -} - -static inline int hlist_unhashed(const struct hlist_node *h) -{ - return !h->pprev; -} - -static inline int hlist_empty(const struct hlist_head *h) -{ - return !h->first; -} - -static inline void __hlist_del(struct hlist_node *n) -{ - struct hlist_node *next = n->next; - struct hlist_node **pprev = n->pprev; - *pprev = next; - if (next) - next->pprev = pprev; -} - -static inline void hlist_del(struct hlist_node *n) -{ - __hlist_del(n); - n->next = LIST_POISON1; - n->pprev = LIST_POISON2; -} - -static inline void hlist_del_init(struct hlist_node *n) -{ - if (!hlist_unhashed(n)) { - __hlist_del(n); - INIT_HLIST_NODE(n); - } -} - -static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) -{ - struct hlist_node *first = h->first; - n->next = first; - if (first) - first->pprev = &n->next; - h->first = n; - n->pprev = &h->first; -} - -/* next must be != NULL */ -static inline void hlist_add_before(struct hlist_node *n, - struct hlist_node *next) -{ - n->pprev = next->pprev; - n->next = next; - next->pprev = &n->next; - *(n->pprev) = n; -} - -static inline void hlist_add_after(struct hlist_node *n, - struct hlist_node *next) -{ - next->next = n->next; - n->next = next; - next->pprev = &n->next; - - if(next->next) - next->next->pprev = &next->next; -} - -#define hlist_entry(ptr, type, member) container_of(ptr,type,member) - -#define hlist_for_each(pos, head) \ - for (pos = (head)->first; pos; \ - pos = pos->next) - -#define hlist_for_each_safe(pos, n, head) \ - for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \ - pos = n) - -/** - * hlist_for_each_entry - iterate over list of given type - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. - * @head: the head for your list. - * @member: the name of the hlist_node within the struct. - */ -#define hlist_for_each_entry(tpos, pos, head, member) \ - for (pos = (head)->first; \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) - -/** - * hlist_for_each_entry_continue - iterate over a hlist continuing after current point - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. - * @member: the name of the hlist_node within the struct. - */ -#define hlist_for_each_entry_continue(tpos, pos, member) \ - for (pos = (pos)->next; \ - pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) - -/** - * hlist_for_each_entry_from - iterate over a hlist continuing from current point - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. - * @member: the name of the hlist_node within the struct. - */ -#define hlist_for_each_entry_from(tpos, pos, member) \ - for (; pos && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = pos->next) - -/** - * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry - * @tpos: the type * to use as a loop cursor. - * @pos: the &struct hlist_node to use as a loop cursor. - * @n: another &struct hlist_node to use as temporary storage - * @head: the head for your list. - * @member: the name of the hlist_node within the struct. - */ -#define hlist_for_each_entry_safe(tpos, pos, n, head, member) \ - for (pos = (head)->first; \ - pos && ({ n = pos->next; 1; }) && \ - ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ - pos = n) - -#endif diff --git a/trunk/tools/perf/util/pager.c b/trunk/tools/perf/util/pager.c deleted file mode 100644 index a28bccae5458..000000000000 --- a/trunk/tools/perf/util/pager.c +++ /dev/null @@ -1,99 +0,0 @@ -#include "cache.h" -#include "run-command.h" -#include "sigchain.h" - -/* - * This is split up from the rest of git so that we can do - * something different on Windows. - */ - -static int spawned_pager; - -#ifndef __MINGW32__ -static void pager_preexec(void) -{ - /* - * Work around bug in "less" by not starting it until we - * have real input - */ - fd_set in; - - FD_ZERO(&in); - FD_SET(0, &in); - select(1, &in, NULL, &in, NULL); - - setenv("LESS", "FRSX", 0); -} -#endif - -static const char *pager_argv[] = { "sh", "-c", NULL, NULL }; -static struct child_process pager_process; - -static void wait_for_pager(void) -{ - fflush(stdout); - fflush(stderr); - /* signal EOF to pager */ - close(1); - close(2); - finish_command(&pager_process); -} - -static void wait_for_pager_signal(int signo) -{ - wait_for_pager(); - sigchain_pop(signo); - raise(signo); -} - -void setup_pager(void) -{ - const char *pager = getenv("PERF_PAGER"); - - if (!isatty(1)) - return; - if (!pager) { - if (!pager_program) - perf_config(perf_default_config, NULL); - pager = pager_program; - } - if (!pager) - pager = getenv("PAGER"); - if (!pager) - pager = "less"; - else if (!*pager || !strcmp(pager, "cat")) - return; - - spawned_pager = 1; /* means we are emitting to terminal */ - - /* spawn the pager */ - pager_argv[2] = pager; - pager_process.argv = pager_argv; - pager_process.in = -1; -#ifndef __MINGW32__ - pager_process.preexec_cb = pager_preexec; -#endif - if (start_command(&pager_process)) - return; - - /* original process continues, but writes to the pipe */ - dup2(pager_process.in, 1); - if (isatty(2)) - dup2(pager_process.in, 2); - close(pager_process.in); - - /* this makes sure that the parent terminates after the pager */ - sigchain_push_common(wait_for_pager_signal); - atexit(wait_for_pager); -} - -int pager_in_use(void) -{ - const char *env; - - if (spawned_pager) - return 1; - - env = getenv("PERF_PAGER_IN_USE"); - return env ? perf_config_bool("PERF_PAGER_IN_USE", env) : 0; -} diff --git a/trunk/tools/perf/util/parse-events.c b/trunk/tools/perf/util/parse-events.c deleted file mode 100644 index 9d5f1ca50e6f..000000000000 --- a/trunk/tools/perf/util/parse-events.c +++ /dev/null @@ -1,316 +0,0 @@ - -#include "../perf.h" -#include "util.h" -#include "parse-options.h" -#include "parse-events.h" -#include "exec_cmd.h" -#include "string.h" - -extern char *strcasestr(const char *haystack, const char *needle); - -int nr_counters; - -struct perf_counter_attr attrs[MAX_COUNTERS]; - -struct event_symbol { - __u8 type; - __u64 config; - char *symbol; -}; - -#define C(x, y) .type = PERF_TYPE_##x, .config = PERF_COUNT_##y -#define CR(x, y) .type = PERF_TYPE_##x, .config = y - -static struct event_symbol event_symbols[] = { - { C(HARDWARE, HW_CPU_CYCLES), "cpu-cycles", }, - { C(HARDWARE, HW_CPU_CYCLES), "cycles", }, - { C(HARDWARE, HW_INSTRUCTIONS), "instructions", }, - { C(HARDWARE, HW_CACHE_REFERENCES), "cache-references", }, - { C(HARDWARE, HW_CACHE_MISSES), "cache-misses", }, - { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branch-instructions", }, - { C(HARDWARE, HW_BRANCH_INSTRUCTIONS),"branches", }, - { C(HARDWARE, HW_BRANCH_MISSES), "branch-misses", }, - { C(HARDWARE, HW_BUS_CYCLES), "bus-cycles", }, - - { C(SOFTWARE, SW_CPU_CLOCK), "cpu-clock", }, - { C(SOFTWARE, SW_TASK_CLOCK), "task-clock", }, - { C(SOFTWARE, SW_PAGE_FAULTS), "page-faults", }, - { C(SOFTWARE, SW_PAGE_FAULTS), "faults", }, - { C(SOFTWARE, SW_PAGE_FAULTS_MIN), "minor-faults", }, - { C(SOFTWARE, SW_PAGE_FAULTS_MAJ), "major-faults", }, - { C(SOFTWARE, SW_CONTEXT_SWITCHES), "context-switches", }, - { C(SOFTWARE, SW_CONTEXT_SWITCHES), "cs", }, - { C(SOFTWARE, SW_CPU_MIGRATIONS), "cpu-migrations", }, - { C(SOFTWARE, SW_CPU_MIGRATIONS), "migrations", }, -}; - -#define __PERF_COUNTER_FIELD(config, name) \ - ((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT) - -#define PERF_COUNTER_RAW(config) __PERF_COUNTER_FIELD(config, RAW) -#define PERF_COUNTER_CONFIG(config) __PERF_COUNTER_FIELD(config, CONFIG) -#define PERF_COUNTER_TYPE(config) __PERF_COUNTER_FIELD(config, TYPE) -#define PERF_COUNTER_ID(config) __PERF_COUNTER_FIELD(config, EVENT) - -static char *hw_event_names[] = { - "cycles", - "instructions", - "cache-references", - "cache-misses", - "branches", - "branch-misses", - "bus-cycles", -}; - -static char *sw_event_names[] = { - "cpu-clock-ticks", - "task-clock-ticks", - "page-faults", - "context-switches", - "CPU-migrations", - "minor-faults", - "major-faults", -}; - -#define MAX_ALIASES 8 - -static char *hw_cache [][MAX_ALIASES] = { - { "L1-data" , "l1-d", "l1d", "l1" }, - { "L1-instruction" , "l1-i", "l1i" }, - { "L2" , "l2" }, - { "Data-TLB" , "dtlb", "d-tlb" }, - { "Instruction-TLB" , "itlb", "i-tlb" }, - { "Branch" , "bpu" , "btb", "bpc" }, -}; - -static char *hw_cache_op [][MAX_ALIASES] = { - { "Load" , "read" }, - { "Store" , "write" }, - { "Prefetch" , "speculative-read", "speculative-load" }, -}; - -static char *hw_cache_result [][MAX_ALIASES] = { - { "Reference" , "ops", "access" }, - { "Miss" }, -}; - -char *event_name(int counter) -{ - __u64 config = attrs[counter].config; - int type = attrs[counter].type; - static char buf[32]; - - if (attrs[counter].type == PERF_TYPE_RAW) { - sprintf(buf, "raw 0x%llx", config); - return buf; - } - - switch (type) { - case PERF_TYPE_HARDWARE: - if (config < PERF_COUNT_HW_MAX) - return hw_event_names[config]; - return "unknown-hardware"; - - case PERF_TYPE_HW_CACHE: { - __u8 cache_type, cache_op, cache_result; - static char name[100]; - - cache_type = (config >> 0) & 0xff; - if (cache_type > PERF_COUNT_HW_CACHE_MAX) - return "unknown-ext-hardware-cache-type"; - - cache_op = (config >> 8) & 0xff; - if (cache_op > PERF_COUNT_HW_CACHE_OP_MAX) - return "unknown-ext-hardware-cache-op"; - - cache_result = (config >> 16) & 0xff; - if (cache_result > PERF_COUNT_HW_CACHE_RESULT_MAX) - return "unknown-ext-hardware-cache-result"; - - sprintf(name, "%s-Cache-%s-%ses", - hw_cache[cache_type][0], - hw_cache_op[cache_op][0], - hw_cache_result[cache_result][0]); - - return name; - } - - case PERF_TYPE_SOFTWARE: - if (config < PERF_COUNT_SW_MAX) - return sw_event_names[config]; - return "unknown-software"; - - default: - break; - } - - return "unknown"; -} - -static int parse_aliases(const char *str, char *names[][MAX_ALIASES], int size) -{ - int i, j; - - for (i = 0; i < size; i++) { - for (j = 0; j < MAX_ALIASES; j++) { - if (!names[i][j]) - break; - if (strcasestr(str, names[i][j])) - return i; - } - } - - return -1; -} - -static int parse_generic_hw_symbols(const char *str, struct perf_counter_attr *attr) -{ - int cache_type = -1, cache_op = 0, cache_result = 0; - - cache_type = parse_aliases(str, hw_cache, PERF_COUNT_HW_CACHE_MAX); - /* - * No fallback - if we cannot get a clear cache type - * then bail out: - */ - if (cache_type == -1) - return -EINVAL; - - cache_op = parse_aliases(str, hw_cache_op, PERF_COUNT_HW_CACHE_OP_MAX); - /* - * Fall back to reads: - */ - if (cache_op == -1) - cache_op = PERF_COUNT_HW_CACHE_OP_READ; - - cache_result = parse_aliases(str, hw_cache_result, - PERF_COUNT_HW_CACHE_RESULT_MAX); - /* - * Fall back to accesses: - */ - if (cache_result == -1) - cache_result = PERF_COUNT_HW_CACHE_RESULT_ACCESS; - - attr->config = cache_type | (cache_op << 8) | (cache_result << 16); - attr->type = PERF_TYPE_HW_CACHE; - - return 0; -} - -/* - * Each event can have multiple symbolic names. - * Symbolic names are (almost) exactly matched. - */ -static int parse_event_symbols(const char *str, struct perf_counter_attr *attr) -{ - __u64 config, id; - int type; - unsigned int i; - const char *sep, *pstr; - - if (str[0] == 'r' && hex2u64(str + 1, &config) > 0) { - attr->type = PERF_TYPE_RAW; - attr->config = config; - - return 0; - } - - pstr = str; - sep = strchr(pstr, ':'); - if (sep) { - type = atoi(pstr); - pstr = sep + 1; - id = atoi(pstr); - sep = strchr(pstr, ':'); - if (sep) { - pstr = sep + 1; - if (strchr(pstr, 'k')) - attr->exclude_user = 1; - if (strchr(pstr, 'u')) - attr->exclude_kernel = 1; - } - attr->type = type; - attr->config = id; - - return 0; - } - - for (i = 0; i < ARRAY_SIZE(event_symbols); i++) { - if (!strncmp(str, event_symbols[i].symbol, - strlen(event_symbols[i].symbol))) { - - attr->type = event_symbols[i].type; - attr->config = event_symbols[i].config; - - return 0; - } - } - - return parse_generic_hw_symbols(str, attr); -} - -int parse_events(const struct option *opt, const char *str, int unset) -{ - struct perf_counter_attr attr; - int ret; - - memset(&attr, 0, sizeof(attr)); -again: - if (nr_counters == MAX_COUNTERS) - return -1; - - ret = parse_event_symbols(str, &attr); - if (ret < 0) - return ret; - - attrs[nr_counters] = attr; - nr_counters++; - - str = strstr(str, ","); - if (str) { - str++; - goto again; - } - - return 0; -} - -static const char * const event_type_descriptors[] = { - "", - "Hardware event", - "Software event", - "Tracepoint event", - "Hardware cache event", -}; - -/* - * Print the help text for the event symbols: - */ -void print_events(void) -{ - struct event_symbol *syms = event_symbols; - unsigned int i, type, prev_type = -1; - - fprintf(stderr, "\n"); - fprintf(stderr, "List of pre-defined events (to be used in -e):\n"); - - for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) { - type = syms->type + 1; - if (type > ARRAY_SIZE(event_type_descriptors)) - type = 0; - - if (type != prev_type) - fprintf(stderr, "\n"); - - fprintf(stderr, " %-30s [%s]\n", syms->symbol, - event_type_descriptors[type]); - - prev_type = type; - } - - fprintf(stderr, "\n"); - fprintf(stderr, " %-30s [raw hardware event descriptor]\n", - "rNNN"); - fprintf(stderr, "\n"); - - exit(129); -} diff --git a/trunk/tools/perf/util/parse-events.h b/trunk/tools/perf/util/parse-events.h deleted file mode 100644 index e3d552908e60..000000000000 --- a/trunk/tools/perf/util/parse-events.h +++ /dev/null @@ -1,17 +0,0 @@ - -/* - * Parse symbolic events/counts passed in as options: - */ - -extern int nr_counters; - -extern struct perf_counter_attr attrs[MAX_COUNTERS]; - -extern char *event_name(int ctr); - -extern int parse_events(const struct option *opt, const char *str, int unset); - -#define EVENTS_HELP_MAX (128*1024) - -extern void print_events(void); - diff --git a/trunk/tools/perf/util/parse-options.c b/trunk/tools/perf/util/parse-options.c deleted file mode 100644 index b3affb1658d2..000000000000 --- a/trunk/tools/perf/util/parse-options.c +++ /dev/null @@ -1,508 +0,0 @@ -#include "util.h" -#include "parse-options.h" -#include "cache.h" - -#define OPT_SHORT 1 -#define OPT_UNSET 2 - -static int opterror(const struct option *opt, const char *reason, int flags) -{ - if (flags & OPT_SHORT) - return error("switch `%c' %s", opt->short_name, reason); - if (flags & OPT_UNSET) - return error("option `no-%s' %s", opt->long_name, reason); - return error("option `%s' %s", opt->long_name, reason); -} - -static int get_arg(struct parse_opt_ctx_t *p, const struct option *opt, - int flags, const char **arg) -{ - if (p->opt) { - *arg = p->opt; - p->opt = NULL; - } else if (p->argc == 1 && (opt->flags & PARSE_OPT_LASTARG_DEFAULT)) { - *arg = (const char *)opt->defval; - } else if (p->argc > 1) { - p->argc--; - *arg = *++p->argv; - } else - return opterror(opt, "requires a value", flags); - return 0; -} - -static int get_value(struct parse_opt_ctx_t *p, - const struct option *opt, int flags) -{ - const char *s, *arg = NULL; - const int unset = flags & OPT_UNSET; - - if (unset && p->opt) - return opterror(opt, "takes no value", flags); - if (unset && (opt->flags & PARSE_OPT_NONEG)) - return opterror(opt, "isn't available", flags); - - if (!(flags & OPT_SHORT) && p->opt) { - switch (opt->type) { - case OPTION_CALLBACK: - if (!(opt->flags & PARSE_OPT_NOARG)) - break; - /* FALLTHROUGH */ - case OPTION_BOOLEAN: - case OPTION_BIT: - case OPTION_SET_INT: - case OPTION_SET_PTR: - return opterror(opt, "takes no value", flags); - default: - break; - } - } - - switch (opt->type) { - case OPTION_BIT: - if (unset) - *(int *)opt->value &= ~opt->defval; - else - *(int *)opt->value |= opt->defval; - return 0; - - case OPTION_BOOLEAN: - *(int *)opt->value = unset ? 0 : *(int *)opt->value + 1; - return 0; - - case OPTION_SET_INT: - *(int *)opt->value = unset ? 0 : opt->defval; - return 0; - - case OPTION_SET_PTR: - *(void **)opt->value = unset ? NULL : (void *)opt->defval; - return 0; - - case OPTION_STRING: - if (unset) - *(const char **)opt->value = NULL; - else if (opt->flags & PARSE_OPT_OPTARG && !p->opt) - *(const char **)opt->value = (const char *)opt->defval; - else - return get_arg(p, opt, flags, (const char **)opt->value); - return 0; - - case OPTION_CALLBACK: - if (unset) - return (*opt->callback)(opt, NULL, 1) ? (-1) : 0; - if (opt->flags & PARSE_OPT_NOARG) - return (*opt->callback)(opt, NULL, 0) ? (-1) : 0; - if (opt->flags & PARSE_OPT_OPTARG && !p->opt) - return (*opt->callback)(opt, NULL, 0) ? (-1) : 0; - if (get_arg(p, opt, flags, &arg)) - return -1; - return (*opt->callback)(opt, arg, 0) ? (-1) : 0; - - case OPTION_INTEGER: - if (unset) { - *(int *)opt->value = 0; - return 0; - } - if (opt->flags & PARSE_OPT_OPTARG && !p->opt) { - *(int *)opt->value = opt->defval; - return 0; - } - if (get_arg(p, opt, flags, &arg)) - return -1; - *(int *)opt->value = strtol(arg, (char **)&s, 10); - if (*s) - return opterror(opt, "expects a numerical value", flags); - return 0; - - case OPTION_LONG: - if (unset) { - *(long *)opt->value = 0; - return 0; - } - if (opt->flags & PARSE_OPT_OPTARG && !p->opt) { - *(long *)opt->value = opt->defval; - return 0; - } - if (get_arg(p, opt, flags, &arg)) - return -1; - *(long *)opt->value = strtol(arg, (char **)&s, 10); - if (*s) - return opterror(opt, "expects a numerical value", flags); - return 0; - - default: - die("should not happen, someone must be hit on the forehead"); - } -} - -static int parse_short_opt(struct parse_opt_ctx_t *p, const struct option *options) -{ - for (; options->type != OPTION_END; options++) { - if (options->short_name == *p->opt) { - p->opt = p->opt[1] ? p->opt + 1 : NULL; - return get_value(p, options, OPT_SHORT); - } - } - return -2; -} - -static int parse_long_opt(struct parse_opt_ctx_t *p, const char *arg, - const struct option *options) -{ - const char *arg_end = strchr(arg, '='); - const struct option *abbrev_option = NULL, *ambiguous_option = NULL; - int abbrev_flags = 0, ambiguous_flags = 0; - - if (!arg_end) - arg_end = arg + strlen(arg); - - for (; options->type != OPTION_END; options++) { - const char *rest; - int flags = 0; - - if (!options->long_name) - continue; - - rest = skip_prefix(arg, options->long_name); - if (options->type == OPTION_ARGUMENT) { - if (!rest) - continue; - if (*rest == '=') - return opterror(options, "takes no value", flags); - if (*rest) - continue; - p->out[p->cpidx++] = arg - 2; - return 0; - } - if (!rest) { - /* abbreviated? */ - if (!strncmp(options->long_name, arg, arg_end - arg)) { -is_abbreviated: - if (abbrev_option) { - /* - * If this is abbreviated, it is - * ambiguous. So when there is no - * exact match later, we need to - * error out. - */ - ambiguous_option = abbrev_option; - ambiguous_flags = abbrev_flags; - } - if (!(flags & OPT_UNSET) && *arg_end) - p->opt = arg_end + 1; - abbrev_option = options; - abbrev_flags = flags; - continue; - } - /* negated and abbreviated very much? */ - if (!prefixcmp("no-", arg)) { - flags |= OPT_UNSET; - goto is_abbreviated; - } - /* negated? */ - if (strncmp(arg, "no-", 3)) - continue; - flags |= OPT_UNSET; - rest = skip_prefix(arg + 3, options->long_name); - /* abbreviated and negated? */ - if (!rest && !prefixcmp(options->long_name, arg + 3)) - goto is_abbreviated; - if (!rest) - continue; - } - if (*rest) { - if (*rest != '=') - continue; - p->opt = rest + 1; - } - return get_value(p, options, flags); - } - - if (ambiguous_option) - return error("Ambiguous option: %s " - "(could be --%s%s or --%s%s)", - arg, - (ambiguous_flags & OPT_UNSET) ? "no-" : "", - ambiguous_option->long_name, - (abbrev_flags & OPT_UNSET) ? "no-" : "", - abbrev_option->long_name); - if (abbrev_option) - return get_value(p, abbrev_option, abbrev_flags); - return -2; -} - -static void check_typos(const char *arg, const struct option *options) -{ - if (strlen(arg) < 3) - return; - - if (!prefixcmp(arg, "no-")) { - error ("did you mean `--%s` (with two dashes ?)", arg); - exit(129); - } - - for (; options->type != OPTION_END; options++) { - if (!options->long_name) - continue; - if (!prefixcmp(options->long_name, arg)) { - error ("did you mean `--%s` (with two dashes ?)", arg); - exit(129); - } - } -} - -void parse_options_start(struct parse_opt_ctx_t *ctx, - int argc, const char **argv, int flags) -{ - memset(ctx, 0, sizeof(*ctx)); - ctx->argc = argc - 1; - ctx->argv = argv + 1; - ctx->out = argv; - ctx->cpidx = ((flags & PARSE_OPT_KEEP_ARGV0) != 0); - ctx->flags = flags; - if ((flags & PARSE_OPT_KEEP_UNKNOWN) && - (flags & PARSE_OPT_STOP_AT_NON_OPTION)) - die("STOP_AT_NON_OPTION and KEEP_UNKNOWN don't go together"); -} - -static int usage_with_options_internal(const char * const *, - const struct option *, int); - -int parse_options_step(struct parse_opt_ctx_t *ctx, - const struct option *options, - const char * const usagestr[]) -{ - int internal_help = !(ctx->flags & PARSE_OPT_NO_INTERNAL_HELP); - - /* we must reset ->opt, unknown short option leave it dangling */ - ctx->opt = NULL; - - for (; ctx->argc; ctx->argc--, ctx->argv++) { - const char *arg = ctx->argv[0]; - - if (*arg != '-' || !arg[1]) { - if (ctx->flags & PARSE_OPT_STOP_AT_NON_OPTION) - break; - ctx->out[ctx->cpidx++] = ctx->argv[0]; - continue; - } - - if (arg[1] != '-') { - ctx->opt = arg + 1; - if (internal_help && *ctx->opt == 'h') - return parse_options_usage(usagestr, options); - switch (parse_short_opt(ctx, options)) { - case -1: - return parse_options_usage(usagestr, options); - case -2: - goto unknown; - } - if (ctx->opt) - check_typos(arg + 1, options); - while (ctx->opt) { - if (internal_help && *ctx->opt == 'h') - return parse_options_usage(usagestr, options); - switch (parse_short_opt(ctx, options)) { - case -1: - return parse_options_usage(usagestr, options); - case -2: - /* fake a short option thing to hide the fact that we may have - * started to parse aggregated stuff - * - * This is leaky, too bad. - */ - ctx->argv[0] = strdup(ctx->opt - 1); - *(char *)ctx->argv[0] = '-'; - goto unknown; - } - } - continue; - } - - if (!arg[2]) { /* "--" */ - if (!(ctx->flags & PARSE_OPT_KEEP_DASHDASH)) { - ctx->argc--; - ctx->argv++; - } - break; - } - - if (internal_help && !strcmp(arg + 2, "help-all")) - return usage_with_options_internal(usagestr, options, 1); - if (internal_help && !strcmp(arg + 2, "help")) - return parse_options_usage(usagestr, options); - switch (parse_long_opt(ctx, arg + 2, options)) { - case -1: - return parse_options_usage(usagestr, options); - case -2: - goto unknown; - } - continue; -unknown: - if (!(ctx->flags & PARSE_OPT_KEEP_UNKNOWN)) - return PARSE_OPT_UNKNOWN; - ctx->out[ctx->cpidx++] = ctx->argv[0]; - ctx->opt = NULL; - } - return PARSE_OPT_DONE; -} - -int parse_options_end(struct parse_opt_ctx_t *ctx) -{ - memmove(ctx->out + ctx->cpidx, ctx->argv, ctx->argc * sizeof(*ctx->out)); - ctx->out[ctx->cpidx + ctx->argc] = NULL; - return ctx->cpidx + ctx->argc; -} - -int parse_options(int argc, const char **argv, const struct option *options, - const char * const usagestr[], int flags) -{ - struct parse_opt_ctx_t ctx; - - parse_options_start(&ctx, argc, argv, flags); - switch (parse_options_step(&ctx, options, usagestr)) { - case PARSE_OPT_HELP: - exit(129); - case PARSE_OPT_DONE: - break; - default: /* PARSE_OPT_UNKNOWN */ - if (ctx.argv[0][1] == '-') { - error("unknown option `%s'", ctx.argv[0] + 2); - } else { - error("unknown switch `%c'", *ctx.opt); - } - usage_with_options(usagestr, options); - } - - return parse_options_end(&ctx); -} - -#define USAGE_OPTS_WIDTH 24 -#define USAGE_GAP 2 - -int usage_with_options_internal(const char * const *usagestr, - const struct option *opts, int full) -{ - if (!usagestr) - return PARSE_OPT_HELP; - - fprintf(stderr, "\n usage: %s\n", *usagestr++); - while (*usagestr && **usagestr) - fprintf(stderr, " or: %s\n", *usagestr++); - while (*usagestr) { - fprintf(stderr, "%s%s\n", - **usagestr ? " " : "", - *usagestr); - usagestr++; - } - - if (opts->type != OPTION_GROUP) - fputc('\n', stderr); - - for (; opts->type != OPTION_END; opts++) { - size_t pos; - int pad; - - if (opts->type == OPTION_GROUP) { - fputc('\n', stderr); - if (*opts->help) - fprintf(stderr, "%s\n", opts->help); - continue; - } - if (!full && (opts->flags & PARSE_OPT_HIDDEN)) - continue; - - pos = fprintf(stderr, " "); - if (opts->short_name) - pos += fprintf(stderr, "-%c", opts->short_name); - if (opts->long_name && opts->short_name) - pos += fprintf(stderr, ", "); - if (opts->long_name) - pos += fprintf(stderr, "--%s", opts->long_name); - - switch (opts->type) { - case OPTION_ARGUMENT: - break; - case OPTION_INTEGER: - if (opts->flags & PARSE_OPT_OPTARG) - if (opts->long_name) - pos += fprintf(stderr, "[=]"); - else - pos += fprintf(stderr, "[]"); - else - pos += fprintf(stderr, " "); - break; - case OPTION_CALLBACK: - if (opts->flags & PARSE_OPT_NOARG) - break; - /* FALLTHROUGH */ - case OPTION_STRING: - if (opts->argh) { - if (opts->flags & PARSE_OPT_OPTARG) - if (opts->long_name) - pos += fprintf(stderr, "[=<%s>]", opts->argh); - else - pos += fprintf(stderr, "[<%s>]", opts->argh); - else - pos += fprintf(stderr, " <%s>", opts->argh); - } else { - if (opts->flags & PARSE_OPT_OPTARG) - if (opts->long_name) - pos += fprintf(stderr, "[=...]"); - else - pos += fprintf(stderr, "[...]"); - else - pos += fprintf(stderr, " ..."); - } - break; - default: /* OPTION_{BIT,BOOLEAN,SET_INT,SET_PTR} */ - break; - } - - if (pos <= USAGE_OPTS_WIDTH) - pad = USAGE_OPTS_WIDTH - pos; - else { - fputc('\n', stderr); - pad = USAGE_OPTS_WIDTH; - } - fprintf(stderr, "%*s%s\n", pad + USAGE_GAP, "", opts->help); - } - fputc('\n', stderr); - - return PARSE_OPT_HELP; -} - -void usage_with_options(const char * const *usagestr, - const struct option *opts) -{ - usage_with_options_internal(usagestr, opts, 0); - exit(129); -} - -int parse_options_usage(const char * const *usagestr, - const struct option *opts) -{ - return usage_with_options_internal(usagestr, opts, 0); -} - - -int parse_opt_verbosity_cb(const struct option *opt, const char *arg, - int unset) -{ - int *target = opt->value; - - if (unset) - /* --no-quiet, --no-verbose */ - *target = 0; - else if (opt->short_name == 'v') { - if (*target >= 0) - (*target)++; - else - *target = 1; - } else { - if (*target <= 0) - (*target)--; - else - *target = -1; - } - return 0; -} diff --git a/trunk/tools/perf/util/parse-options.h b/trunk/tools/perf/util/parse-options.h deleted file mode 100644 index a1039a6ce0eb..000000000000 --- a/trunk/tools/perf/util/parse-options.h +++ /dev/null @@ -1,174 +0,0 @@ -#ifndef PARSE_OPTIONS_H -#define PARSE_OPTIONS_H - -enum parse_opt_type { - /* special types */ - OPTION_END, - OPTION_ARGUMENT, - OPTION_GROUP, - /* options with no arguments */ - OPTION_BIT, - OPTION_BOOLEAN, /* _INCR would have been a better name */ - OPTION_SET_INT, - OPTION_SET_PTR, - /* options with arguments (usually) */ - OPTION_STRING, - OPTION_INTEGER, - OPTION_LONG, - OPTION_CALLBACK, -}; - -enum parse_opt_flags { - PARSE_OPT_KEEP_DASHDASH = 1, - PARSE_OPT_STOP_AT_NON_OPTION = 2, - PARSE_OPT_KEEP_ARGV0 = 4, - PARSE_OPT_KEEP_UNKNOWN = 8, - PARSE_OPT_NO_INTERNAL_HELP = 16, -}; - -enum parse_opt_option_flags { - PARSE_OPT_OPTARG = 1, - PARSE_OPT_NOARG = 2, - PARSE_OPT_NONEG = 4, - PARSE_OPT_HIDDEN = 8, - PARSE_OPT_LASTARG_DEFAULT = 16, -}; - -struct option; -typedef int parse_opt_cb(const struct option *, const char *arg, int unset); - -/* - * `type`:: - * holds the type of the option, you must have an OPTION_END last in your - * array. - * - * `short_name`:: - * the character to use as a short option name, '\0' if none. - * - * `long_name`:: - * the long option name, without the leading dashes, NULL if none. - * - * `value`:: - * stores pointers to the values to be filled. - * - * `argh`:: - * token to explain the kind of argument this option wants. Keep it - * homogenous across the repository. - * - * `help`:: - * the short help associated to what the option does. - * Must never be NULL (except for OPTION_END). - * OPTION_GROUP uses this pointer to store the group header. - * - * `flags`:: - * mask of parse_opt_option_flags. - * PARSE_OPT_OPTARG: says that the argument is optionnal (not for BOOLEANs) - * PARSE_OPT_NOARG: says that this option takes no argument, for CALLBACKs - * PARSE_OPT_NONEG: says that this option cannot be negated - * PARSE_OPT_HIDDEN this option is skipped in the default usage, showed in - * the long one. - * - * `callback`:: - * pointer to the callback to use for OPTION_CALLBACK. - * - * `defval`:: - * default value to fill (*->value) with for PARSE_OPT_OPTARG. - * OPTION_{BIT,SET_INT,SET_PTR} store the {mask,integer,pointer} to put in - * the value when met. - * CALLBACKS can use it like they want. - */ -struct option { - enum parse_opt_type type; - int short_name; - const char *long_name; - void *value; - const char *argh; - const char *help; - - int flags; - parse_opt_cb *callback; - intptr_t defval; -}; - -#define OPT_END() { OPTION_END } -#define OPT_ARGUMENT(l, h) { OPTION_ARGUMENT, 0, (l), NULL, NULL, (h) } -#define OPT_GROUP(h) { OPTION_GROUP, 0, NULL, NULL, NULL, (h) } -#define OPT_BIT(s, l, v, h, b) { OPTION_BIT, (s), (l), (v), NULL, (h), 0, NULL, (b) } -#define OPT_BOOLEAN(s, l, v, h) { OPTION_BOOLEAN, (s), (l), (v), NULL, (h) } -#define OPT_SET_INT(s, l, v, h, i) { OPTION_SET_INT, (s), (l), (v), NULL, (h), 0, NULL, (i) } -#define OPT_SET_PTR(s, l, v, h, p) { OPTION_SET_PTR, (s), (l), (v), NULL, (h), 0, NULL, (p) } -#define OPT_INTEGER(s, l, v, h) { OPTION_INTEGER, (s), (l), (v), NULL, (h) } -#define OPT_LONG(s, l, v, h) { OPTION_LONG, (s), (l), (v), NULL, (h) } -#define OPT_STRING(s, l, v, a, h) { OPTION_STRING, (s), (l), (v), (a), (h) } -#define OPT_DATE(s, l, v, h) \ - { OPTION_CALLBACK, (s), (l), (v), "time",(h), 0, \ - parse_opt_approxidate_cb } -#define OPT_CALLBACK(s, l, v, a, h, f) \ - { OPTION_CALLBACK, (s), (l), (v), (a), (h), 0, (f) } - -/* parse_options() will filter out the processed options and leave the - * non-option argments in argv[]. - * Returns the number of arguments left in argv[]. - */ -extern int parse_options(int argc, const char **argv, - const struct option *options, - const char * const usagestr[], int flags); - -extern NORETURN void usage_with_options(const char * const *usagestr, - const struct option *options); - -/*----- incremantal advanced APIs -----*/ - -enum { - PARSE_OPT_HELP = -1, - PARSE_OPT_DONE, - PARSE_OPT_UNKNOWN, -}; - -/* - * It's okay for the caller to consume argv/argc in the usual way. - * Other fields of that structure are private to parse-options and should not - * be modified in any way. - */ -struct parse_opt_ctx_t { - const char **argv; - const char **out; - int argc, cpidx; - const char *opt; - int flags; -}; - -extern int parse_options_usage(const char * const *usagestr, - const struct option *opts); - -extern void parse_options_start(struct parse_opt_ctx_t *ctx, - int argc, const char **argv, int flags); - -extern int parse_options_step(struct parse_opt_ctx_t *ctx, - const struct option *options, - const char * const usagestr[]); - -extern int parse_options_end(struct parse_opt_ctx_t *ctx); - - -/*----- some often used options -----*/ -extern int parse_opt_abbrev_cb(const struct option *, const char *, int); -extern int parse_opt_approxidate_cb(const struct option *, const char *, int); -extern int parse_opt_verbosity_cb(const struct option *, const char *, int); - -#define OPT__VERBOSE(var) OPT_BOOLEAN('v', "verbose", (var), "be verbose") -#define OPT__QUIET(var) OPT_BOOLEAN('q', "quiet", (var), "be quiet") -#define OPT__VERBOSITY(var) \ - { OPTION_CALLBACK, 'v', "verbose", (var), NULL, "be more verbose", \ - PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 }, \ - { OPTION_CALLBACK, 'q', "quiet", (var), NULL, "be more quiet", \ - PARSE_OPT_NOARG, &parse_opt_verbosity_cb, 0 } -#define OPT__DRY_RUN(var) OPT_BOOLEAN('n', "dry-run", (var), "dry run") -#define OPT__ABBREV(var) \ - { OPTION_CALLBACK, 0, "abbrev", (var), "n", \ - "use digits to display SHA-1s", \ - PARSE_OPT_OPTARG, &parse_opt_abbrev_cb, 0 } - -extern const char *parse_options_fix_filename(const char *prefix, const char *file); - -#endif diff --git a/trunk/tools/perf/util/path.c b/trunk/tools/perf/util/path.c deleted file mode 100644 index a501a40dd2cb..000000000000 --- a/trunk/tools/perf/util/path.c +++ /dev/null @@ -1,353 +0,0 @@ -/* - * I'm tired of doing "vsnprintf()" etc just to open a - * file, so here's a "return static buffer with printf" - * interface for paths. - * - * It's obviously not thread-safe. Sue me. But it's quite - * useful for doing things like - * - * f = open(mkpath("%s/%s.perf", base, name), O_RDONLY); - * - * which is what it's designed for. - */ -#include "cache.h" - -static char bad_path[] = "/bad-path/"; -/* - * Two hacks: - */ - -static char *get_perf_dir(void) -{ - return "."; -} - -size_t strlcpy(char *dest, const char *src, size_t size) -{ - size_t ret = strlen(src); - - if (size) { - size_t len = (ret >= size) ? size - 1 : ret; - memcpy(dest, src, len); - dest[len] = '\0'; - } - return ret; -} - - -static char *get_pathname(void) -{ - static char pathname_array[4][PATH_MAX]; - static int index; - return pathname_array[3 & ++index]; -} - -static char *cleanup_path(char *path) -{ - /* Clean it up */ - if (!memcmp(path, "./", 2)) { - path += 2; - while (*path == '/') - path++; - } - return path; -} - -char *mksnpath(char *buf, size_t n, const char *fmt, ...) -{ - va_list args; - unsigned len; - - va_start(args, fmt); - len = vsnprintf(buf, n, fmt, args); - va_end(args); - if (len >= n) { - strlcpy(buf, bad_path, n); - return buf; - } - return cleanup_path(buf); -} - -static char *perf_vsnpath(char *buf, size_t n, const char *fmt, va_list args) -{ - const char *perf_dir = get_perf_dir(); - size_t len; - - len = strlen(perf_dir); - if (n < len + 1) - goto bad; - memcpy(buf, perf_dir, len); - if (len && !is_dir_sep(perf_dir[len-1])) - buf[len++] = '/'; - len += vsnprintf(buf + len, n - len, fmt, args); - if (len >= n) - goto bad; - return cleanup_path(buf); -bad: - strlcpy(buf, bad_path, n); - return buf; -} - -char *perf_snpath(char *buf, size_t n, const char *fmt, ...) -{ - va_list args; - va_start(args, fmt); - (void)perf_vsnpath(buf, n, fmt, args); - va_end(args); - return buf; -} - -char *perf_pathdup(const char *fmt, ...) -{ - char path[PATH_MAX]; - va_list args; - va_start(args, fmt); - (void)perf_vsnpath(path, sizeof(path), fmt, args); - va_end(args); - return xstrdup(path); -} - -char *mkpath(const char *fmt, ...) -{ - va_list args; - unsigned len; - char *pathname = get_pathname(); - - va_start(args, fmt); - len = vsnprintf(pathname, PATH_MAX, fmt, args); - va_end(args); - if (len >= PATH_MAX) - return bad_path; - return cleanup_path(pathname); -} - -char *perf_path(const char *fmt, ...) -{ - const char *perf_dir = get_perf_dir(); - char *pathname = get_pathname(); - va_list args; - unsigned len; - - len = strlen(perf_dir); - if (len > PATH_MAX-100) - return bad_path; - memcpy(pathname, perf_dir, len); - if (len && perf_dir[len-1] != '/') - pathname[len++] = '/'; - va_start(args, fmt); - len += vsnprintf(pathname + len, PATH_MAX - len, fmt, args); - va_end(args); - if (len >= PATH_MAX) - return bad_path; - return cleanup_path(pathname); -} - - -/* perf_mkstemp() - create tmp file honoring TMPDIR variable */ -int perf_mkstemp(char *path, size_t len, const char *template) -{ - const char *tmp; - size_t n; - - tmp = getenv("TMPDIR"); - if (!tmp) - tmp = "/tmp"; - n = snprintf(path, len, "%s/%s", tmp, template); - if (len <= n) { - errno = ENAMETOOLONG; - return -1; - } - return mkstemp(path); -} - - -const char *make_relative_path(const char *abs, const char *base) -{ - static char buf[PATH_MAX + 1]; - int baselen; - if (!base) - return abs; - baselen = strlen(base); - if (prefixcmp(abs, base)) - return abs; - if (abs[baselen] == '/') - baselen++; - else if (base[baselen - 1] != '/') - return abs; - strcpy(buf, abs + baselen); - return buf; -} - -/* - * It is okay if dst == src, but they should not overlap otherwise. - * - * Performs the following normalizations on src, storing the result in dst: - * - Ensures that components are separated by '/' (Windows only) - * - Squashes sequences of '/'. - * - Removes "." components. - * - Removes ".." components, and the components the precede them. - * Returns failure (non-zero) if a ".." component appears as first path - * component anytime during the normalization. Otherwise, returns success (0). - * - * Note that this function is purely textual. It does not follow symlinks, - * verify the existence of the path, or make any system calls. - */ -int normalize_path_copy(char *dst, const char *src) -{ - char *dst0; - - if (has_dos_drive_prefix(src)) { - *dst++ = *src++; - *dst++ = *src++; - } - dst0 = dst; - - if (is_dir_sep(*src)) { - *dst++ = '/'; - while (is_dir_sep(*src)) - src++; - } - - for (;;) { - char c = *src; - - /* - * A path component that begins with . could be - * special: - * (1) "." and ends -- ignore and terminate. - * (2) "./" -- ignore them, eat slash and continue. - * (3) ".." and ends -- strip one and terminate. - * (4) "../" -- strip one, eat slash and continue. - */ - if (c == '.') { - if (!src[1]) { - /* (1) */ - src++; - } else if (is_dir_sep(src[1])) { - /* (2) */ - src += 2; - while (is_dir_sep(*src)) - src++; - continue; - } else if (src[1] == '.') { - if (!src[2]) { - /* (3) */ - src += 2; - goto up_one; - } else if (is_dir_sep(src[2])) { - /* (4) */ - src += 3; - while (is_dir_sep(*src)) - src++; - goto up_one; - } - } - } - - /* copy up to the next '/', and eat all '/' */ - while ((c = *src++) != '\0' && !is_dir_sep(c)) - *dst++ = c; - if (is_dir_sep(c)) { - *dst++ = '/'; - while (is_dir_sep(c)) - c = *src++; - src--; - } else if (!c) - break; - continue; - - up_one: - /* - * dst0..dst is prefix portion, and dst[-1] is '/'; - * go up one level. - */ - dst--; /* go to trailing '/' */ - if (dst <= dst0) - return -1; - /* Windows: dst[-1] cannot be backslash anymore */ - while (dst0 < dst && dst[-1] != '/') - dst--; - } - *dst = '\0'; - return 0; -} - -/* - * path = Canonical absolute path - * prefix_list = Colon-separated list of absolute paths - * - * Determines, for each path in prefix_list, whether the "prefix" really - * is an ancestor directory of path. Returns the length of the longest - * ancestor directory, excluding any trailing slashes, or -1 if no prefix - * is an ancestor. (Note that this means 0 is returned if prefix_list is - * "/".) "/foo" is not considered an ancestor of "/foobar". Directories - * are not considered to be their own ancestors. path must be in a - * canonical form: empty components, or "." or ".." components are not - * allowed. prefix_list may be null, which is like "". - */ -int longest_ancestor_length(const char *path, const char *prefix_list) -{ - char buf[PATH_MAX+1]; - const char *ceil, *colon; - int len, max_len = -1; - - if (prefix_list == NULL || !strcmp(path, "/")) - return -1; - - for (colon = ceil = prefix_list; *colon; ceil = colon+1) { - for (colon = ceil; *colon && *colon != PATH_SEP; colon++); - len = colon - ceil; - if (len == 0 || len > PATH_MAX || !is_absolute_path(ceil)) - continue; - strlcpy(buf, ceil, len+1); - if (normalize_path_copy(buf, buf) < 0) - continue; - len = strlen(buf); - if (len > 0 && buf[len-1] == '/') - buf[--len] = '\0'; - - if (!strncmp(path, buf, len) && - path[len] == '/' && - len > max_len) { - max_len = len; - } - } - - return max_len; -} - -/* strip arbitrary amount of directory separators at end of path */ -static inline int chomp_trailing_dir_sep(const char *path, int len) -{ - while (len && is_dir_sep(path[len - 1])) - len--; - return len; -} - -/* - * If path ends with suffix (complete path components), returns the - * part before suffix (sans trailing directory separators). - * Otherwise returns NULL. - */ -char *strip_path_suffix(const char *path, const char *suffix) -{ - int path_len = strlen(path), suffix_len = strlen(suffix); - - while (suffix_len) { - if (!path_len) - return NULL; - - if (is_dir_sep(path[path_len - 1])) { - if (!is_dir_sep(suffix[suffix_len - 1])) - return NULL; - path_len = chomp_trailing_dir_sep(path, path_len); - suffix_len = chomp_trailing_dir_sep(suffix, suffix_len); - } - else if (path[--path_len] != suffix[--suffix_len]) - return NULL; - } - - if (path_len && !is_dir_sep(path[path_len - 1])) - return NULL; - return xstrndup(path, chomp_trailing_dir_sep(path, path_len)); -} diff --git a/trunk/tools/perf/util/quote.c b/trunk/tools/perf/util/quote.c deleted file mode 100644 index f18c5212bc92..000000000000 --- a/trunk/tools/perf/util/quote.c +++ /dev/null @@ -1,481 +0,0 @@ -#include "cache.h" -#include "quote.h" - -int quote_path_fully = 1; - -/* Help to copy the thing properly quoted for the shell safety. - * any single quote is replaced with '\'', any exclamation point - * is replaced with '\!', and the whole thing is enclosed in a - * - * E.g. - * original sq_quote result - * name ==> name ==> 'name' - * a b ==> a b ==> 'a b' - * a'b ==> a'\''b ==> 'a'\''b' - * a!b ==> a'\!'b ==> 'a'\!'b' - */ -static inline int need_bs_quote(char c) -{ - return (c == '\'' || c == '!'); -} - -void sq_quote_buf(struct strbuf *dst, const char *src) -{ - char *to_free = NULL; - - if (dst->buf == src) - to_free = strbuf_detach(dst, NULL); - - strbuf_addch(dst, '\''); - while (*src) { - size_t len = strcspn(src, "'!"); - strbuf_add(dst, src, len); - src += len; - while (need_bs_quote(*src)) { - strbuf_addstr(dst, "'\\"); - strbuf_addch(dst, *src++); - strbuf_addch(dst, '\''); - } - } - strbuf_addch(dst, '\''); - free(to_free); -} - -void sq_quote_print(FILE *stream, const char *src) -{ - char c; - - fputc('\'', stream); - while ((c = *src++)) { - if (need_bs_quote(c)) { - fputs("'\\", stream); - fputc(c, stream); - fputc('\'', stream); - } else { - fputc(c, stream); - } - } - fputc('\'', stream); -} - -void sq_quote_argv(struct strbuf *dst, const char** argv, size_t maxlen) -{ - int i; - - /* Copy into destination buffer. */ - strbuf_grow(dst, 255); - for (i = 0; argv[i]; ++i) { - strbuf_addch(dst, ' '); - sq_quote_buf(dst, argv[i]); - if (maxlen && dst->len > maxlen) - die("Too many or long arguments"); - } -} - -char *sq_dequote_step(char *arg, char **next) -{ - char *dst = arg; - char *src = arg; - char c; - - if (*src != '\'') - return NULL; - for (;;) { - c = *++src; - if (!c) - return NULL; - if (c != '\'') { - *dst++ = c; - continue; - } - /* We stepped out of sq */ - switch (*++src) { - case '\0': - *dst = 0; - if (next) - *next = NULL; - return arg; - case '\\': - c = *++src; - if (need_bs_quote(c) && *++src == '\'') { - *dst++ = c; - continue; - } - /* Fallthrough */ - default: - if (!next || !isspace(*src)) - return NULL; - do { - c = *++src; - } while (isspace(c)); - *dst = 0; - *next = src; - return arg; - } - } -} - -char *sq_dequote(char *arg) -{ - return sq_dequote_step(arg, NULL); -} - -int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc) -{ - char *next = arg; - - if (!*arg) - return 0; - do { - char *dequoted = sq_dequote_step(next, &next); - if (!dequoted) - return -1; - ALLOC_GROW(*argv, *nr + 1, *alloc); - (*argv)[(*nr)++] = dequoted; - } while (next); - - return 0; -} - -/* 1 means: quote as octal - * 0 means: quote as octal if (quote_path_fully) - * -1 means: never quote - * c: quote as "\\c" - */ -#define X8(x) x, x, x, x, x, x, x, x -#define X16(x) X8(x), X8(x) -static signed char const sq_lookup[256] = { - /* 0 1 2 3 4 5 6 7 */ - /* 0x00 */ 1, 1, 1, 1, 1, 1, 1, 'a', - /* 0x08 */ 'b', 't', 'n', 'v', 'f', 'r', 1, 1, - /* 0x10 */ X16(1), - /* 0x20 */ -1, -1, '"', -1, -1, -1, -1, -1, - /* 0x28 */ X16(-1), X16(-1), X16(-1), - /* 0x58 */ -1, -1, -1, -1,'\\', -1, -1, -1, - /* 0x60 */ X16(-1), X8(-1), - /* 0x78 */ -1, -1, -1, -1, -1, -1, -1, 1, - /* 0x80 */ /* set to 0 */ -}; - -static inline int sq_must_quote(char c) -{ - return sq_lookup[(unsigned char)c] + quote_path_fully > 0; -} - -/* returns the longest prefix not needing a quote up to maxlen if positive. - This stops at the first \0 because it's marked as a character needing an - escape */ -static size_t next_quote_pos(const char *s, ssize_t maxlen) -{ - size_t len; - if (maxlen < 0) { - for (len = 0; !sq_must_quote(s[len]); len++); - } else { - for (len = 0; len < maxlen && !sq_must_quote(s[len]); len++); - } - return len; -} - -/* - * C-style name quoting. - * - * (1) if sb and fp are both NULL, inspect the input name and counts the - * number of bytes that are needed to hold c_style quoted version of name, - * counting the double quotes around it but not terminating NUL, and - * returns it. - * However, if name does not need c_style quoting, it returns 0. - * - * (2) if sb or fp are not NULL, it emits the c_style quoted version - * of name, enclosed with double quotes if asked and needed only. - * Return value is the same as in (1). - */ -static size_t quote_c_style_counted(const char *name, ssize_t maxlen, - struct strbuf *sb, FILE *fp, int no_dq) -{ -#undef EMIT -#define EMIT(c) \ - do { \ - if (sb) strbuf_addch(sb, (c)); \ - if (fp) fputc((c), fp); \ - count++; \ - } while (0) -#define EMITBUF(s, l) \ - do { \ - int __ret; \ - if (sb) strbuf_add(sb, (s), (l)); \ - if (fp) __ret = fwrite((s), (l), 1, fp); \ - count += (l); \ - } while (0) - - size_t len, count = 0; - const char *p = name; - - for (;;) { - int ch; - - len = next_quote_pos(p, maxlen); - if (len == maxlen || !p[len]) - break; - - if (!no_dq && p == name) - EMIT('"'); - - EMITBUF(p, len); - EMIT('\\'); - p += len; - ch = (unsigned char)*p++; - if (sq_lookup[ch] >= ' ') { - EMIT(sq_lookup[ch]); - } else { - EMIT(((ch >> 6) & 03) + '0'); - EMIT(((ch >> 3) & 07) + '0'); - EMIT(((ch >> 0) & 07) + '0'); - } - } - - EMITBUF(p, len); - if (p == name) /* no ending quote needed */ - return 0; - - if (!no_dq) - EMIT('"'); - return count; -} - -size_t quote_c_style(const char *name, struct strbuf *sb, FILE *fp, int nodq) -{ - return quote_c_style_counted(name, -1, sb, fp, nodq); -} - -void quote_two_c_style(struct strbuf *sb, const char *prefix, const char *path, int nodq) -{ - if (quote_c_style(prefix, NULL, NULL, 0) || - quote_c_style(path, NULL, NULL, 0)) { - if (!nodq) - strbuf_addch(sb, '"'); - quote_c_style(prefix, sb, NULL, 1); - quote_c_style(path, sb, NULL, 1); - if (!nodq) - strbuf_addch(sb, '"'); - } else { - strbuf_addstr(sb, prefix); - strbuf_addstr(sb, path); - } -} - -void write_name_quoted(const char *name, FILE *fp, int terminator) -{ - if (terminator) { - quote_c_style(name, NULL, fp, 0); - } else { - fputs(name, fp); - } - fputc(terminator, fp); -} - -extern void write_name_quotedpfx(const char *pfx, size_t pfxlen, - const char *name, FILE *fp, int terminator) -{ - int needquote = 0; - - if (terminator) { - needquote = next_quote_pos(pfx, pfxlen) < pfxlen - || name[next_quote_pos(name, -1)]; - } - if (needquote) { - fputc('"', fp); - quote_c_style_counted(pfx, pfxlen, NULL, fp, 1); - quote_c_style(name, NULL, fp, 1); - fputc('"', fp); - } else { - int ret; - - ret = fwrite(pfx, pfxlen, 1, fp); - fputs(name, fp); - } - fputc(terminator, fp); -} - -/* quote path as relative to the given prefix */ -char *quote_path_relative(const char *in, int len, - struct strbuf *out, const char *prefix) -{ - int needquote; - - if (len < 0) - len = strlen(in); - - /* "../" prefix itself does not need quoting, but "in" might. */ - needquote = next_quote_pos(in, len) < len; - strbuf_setlen(out, 0); - strbuf_grow(out, len); - - if (needquote) - strbuf_addch(out, '"'); - if (prefix) { - int off = 0; - while (prefix[off] && off < len && prefix[off] == in[off]) - if (prefix[off] == '/') { - prefix += off + 1; - in += off + 1; - len -= off + 1; - off = 0; - } else - off++; - - for (; *prefix; prefix++) - if (*prefix == '/') - strbuf_addstr(out, "../"); - } - - quote_c_style_counted (in, len, out, NULL, 1); - - if (needquote) - strbuf_addch(out, '"'); - if (!out->len) - strbuf_addstr(out, "./"); - - return out->buf; -} - -/* - * C-style name unquoting. - * - * Quoted should point at the opening double quote. - * + Returns 0 if it was able to unquote the string properly, and appends the - * result in the strbuf `sb'. - * + Returns -1 in case of error, and doesn't touch the strbuf. Though note - * that this function will allocate memory in the strbuf, so calling - * strbuf_release is mandatory whichever result unquote_c_style returns. - * - * Updates endp pointer to point at one past the ending double quote if given. - */ -int unquote_c_style(struct strbuf *sb, const char *quoted, const char **endp) -{ - size_t oldlen = sb->len, len; - int ch, ac; - - if (*quoted++ != '"') - return -1; - - for (;;) { - len = strcspn(quoted, "\"\\"); - strbuf_add(sb, quoted, len); - quoted += len; - - switch (*quoted++) { - case '"': - if (endp) - *endp = quoted; - return 0; - case '\\': - break; - default: - goto error; - } - - switch ((ch = *quoted++)) { - case 'a': ch = '\a'; break; - case 'b': ch = '\b'; break; - case 'f': ch = '\f'; break; - case 'n': ch = '\n'; break; - case 'r': ch = '\r'; break; - case 't': ch = '\t'; break; - case 'v': ch = '\v'; break; - - case '\\': case '"': - break; /* verbatim */ - - /* octal values with first digit over 4 overflow */ - case '0': case '1': case '2': case '3': - ac = ((ch - '0') << 6); - if ((ch = *quoted++) < '0' || '7' < ch) - goto error; - ac |= ((ch - '0') << 3); - if ((ch = *quoted++) < '0' || '7' < ch) - goto error; - ac |= (ch - '0'); - ch = ac; - break; - default: - goto error; - } - strbuf_addch(sb, ch); - } - - error: - strbuf_setlen(sb, oldlen); - return -1; -} - -/* quoting as a string literal for other languages */ - -void perl_quote_print(FILE *stream, const char *src) -{ - const char sq = '\''; - const char bq = '\\'; - char c; - - fputc(sq, stream); - while ((c = *src++)) { - if (c == sq || c == bq) - fputc(bq, stream); - fputc(c, stream); - } - fputc(sq, stream); -} - -void python_quote_print(FILE *stream, const char *src) -{ - const char sq = '\''; - const char bq = '\\'; - const char nl = '\n'; - char c; - - fputc(sq, stream); - while ((c = *src++)) { - if (c == nl) { - fputc(bq, stream); - fputc('n', stream); - continue; - } - if (c == sq || c == bq) - fputc(bq, stream); - fputc(c, stream); - } - fputc(sq, stream); -} - -void tcl_quote_print(FILE *stream, const char *src) -{ - char c; - - fputc('"', stream); - while ((c = *src++)) { - switch (c) { - case '[': case ']': - case '{': case '}': - case '$': case '\\': case '"': - fputc('\\', stream); - default: - fputc(c, stream); - break; - case '\f': - fputs("\\f", stream); - break; - case '\r': - fputs("\\r", stream); - break; - case '\n': - fputs("\\n", stream); - break; - case '\t': - fputs("\\t", stream); - break; - case '\v': - fputs("\\v", stream); - break; - } - } - fputc('"', stream); -} diff --git a/trunk/tools/perf/util/quote.h b/trunk/tools/perf/util/quote.h deleted file mode 100644 index 5dfad89816db..000000000000 --- a/trunk/tools/perf/util/quote.h +++ /dev/null @@ -1,68 +0,0 @@ -#ifndef QUOTE_H -#define QUOTE_H - -#include -#include - -/* Help to copy the thing properly quoted for the shell safety. - * any single quote is replaced with '\'', any exclamation point - * is replaced with '\!', and the whole thing is enclosed in a - * single quote pair. - * - * For example, if you are passing the result to system() as an - * argument: - * - * sprintf(cmd, "foobar %s %s", sq_quote(arg0), sq_quote(arg1)) - * - * would be appropriate. If the system() is going to call ssh to - * run the command on the other side: - * - * sprintf(cmd, "git-diff-tree %s %s", sq_quote(arg0), sq_quote(arg1)); - * sprintf(rcmd, "ssh %s %s", sq_util/quote.host), sq_quote(cmd)); - * - * Note that the above examples leak memory! Remember to free result from - * sq_quote() in a real application. - * - * sq_quote_buf() writes to an existing buffer of specified size; it - * will return the number of characters that would have been written - * excluding the final null regardless of the buffer size. - */ - -extern void sq_quote_print(FILE *stream, const char *src); - -extern void sq_quote_buf(struct strbuf *, const char *src); -extern void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen); - -/* This unwraps what sq_quote() produces in place, but returns - * NULL if the input does not look like what sq_quote would have - * produced. - */ -extern char *sq_dequote(char *); - -/* - * Same as the above, but can be used to unwrap many arguments in the - * same string separated by space. "next" is changed to point to the - * next argument that should be passed as first parameter. When there - * is no more argument to be dequoted, "next" is updated to point to NULL. - */ -extern char *sq_dequote_step(char *arg, char **next); -extern int sq_dequote_to_argv(char *arg, const char ***argv, int *nr, int *alloc); - -extern int unquote_c_style(struct strbuf *, const char *quoted, const char **endp); -extern size_t quote_c_style(const char *name, struct strbuf *, FILE *, int no_dq); -extern void quote_two_c_style(struct strbuf *, const char *, const char *, int); - -extern void write_name_quoted(const char *name, FILE *, int terminator); -extern void write_name_quotedpfx(const char *pfx, size_t pfxlen, - const char *name, FILE *, int terminator); - -/* quote path as relative to the given prefix */ -char *quote_path_relative(const char *in, int len, - struct strbuf *out, const char *prefix); - -/* quoting as a string literal for other languages */ -extern void perl_quote_print(FILE *stream, const char *src); -extern void python_quote_print(FILE *stream, const char *src); -extern void tcl_quote_print(FILE *stream, const char *src); - -#endif diff --git a/trunk/tools/perf/util/rbtree.c b/trunk/tools/perf/util/rbtree.c deleted file mode 100644 index b15ba9c7cb3f..000000000000 --- a/trunk/tools/perf/util/rbtree.c +++ /dev/null @@ -1,383 +0,0 @@ -/* - Red Black Trees - (C) 1999 Andrea Arcangeli - (C) 2002 David Woodhouse - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - linux/lib/rbtree.c -*/ - -#include "rbtree.h" - -static void __rb_rotate_left(struct rb_node *node, struct rb_root *root) -{ - struct rb_node *right = node->rb_right; - struct rb_node *parent = rb_parent(node); - - if ((node->rb_right = right->rb_left)) - rb_set_parent(right->rb_left, node); - right->rb_left = node; - - rb_set_parent(right, parent); - - if (parent) - { - if (node == parent->rb_left) - parent->rb_left = right; - else - parent->rb_right = right; - } - else - root->rb_node = right; - rb_set_parent(node, right); -} - -static void __rb_rotate_right(struct rb_node *node, struct rb_root *root) -{ - struct rb_node *left = node->rb_left; - struct rb_node *parent = rb_parent(node); - - if ((node->rb_left = left->rb_right)) - rb_set_parent(left->rb_right, node); - left->rb_right = node; - - rb_set_parent(left, parent); - - if (parent) - { - if (node == parent->rb_right) - parent->rb_right = left; - else - parent->rb_left = left; - } - else - root->rb_node = left; - rb_set_parent(node, left); -} - -void rb_insert_color(struct rb_node *node, struct rb_root *root) -{ - struct rb_node *parent, *gparent; - - while ((parent = rb_parent(node)) && rb_is_red(parent)) - { - gparent = rb_parent(parent); - - if (parent == gparent->rb_left) - { - { - register struct rb_node *uncle = gparent->rb_right; - if (uncle && rb_is_red(uncle)) - { - rb_set_black(uncle); - rb_set_black(parent); - rb_set_red(gparent); - node = gparent; - continue; - } - } - - if (parent->rb_right == node) - { - register struct rb_node *tmp; - __rb_rotate_left(parent, root); - tmp = parent; - parent = node; - node = tmp; - } - - rb_set_black(parent); - rb_set_red(gparent); - __rb_rotate_right(gparent, root); - } else { - { - register struct rb_node *uncle = gparent->rb_left; - if (uncle && rb_is_red(uncle)) - { - rb_set_black(uncle); - rb_set_black(parent); - rb_set_red(gparent); - node = gparent; - continue; - } - } - - if (parent->rb_left == node) - { - register struct rb_node *tmp; - __rb_rotate_right(parent, root); - tmp = parent; - parent = node; - node = tmp; - } - - rb_set_black(parent); - rb_set_red(gparent); - __rb_rotate_left(gparent, root); - } - } - - rb_set_black(root->rb_node); -} - -static void __rb_erase_color(struct rb_node *node, struct rb_node *parent, - struct rb_root *root) -{ - struct rb_node *other; - - while ((!node || rb_is_black(node)) && node != root->rb_node) - { - if (parent->rb_left == node) - { - other = parent->rb_right; - if (rb_is_red(other)) - { - rb_set_black(other); - rb_set_red(parent); - __rb_rotate_left(parent, root); - other = parent->rb_right; - } - if ((!other->rb_left || rb_is_black(other->rb_left)) && - (!other->rb_right || rb_is_black(other->rb_right))) - { - rb_set_red(other); - node = parent; - parent = rb_parent(node); - } - else - { - if (!other->rb_right || rb_is_black(other->rb_right)) - { - rb_set_black(other->rb_left); - rb_set_red(other); - __rb_rotate_right(other, root); - other = parent->rb_right; - } - rb_set_color(other, rb_color(parent)); - rb_set_black(parent); - rb_set_black(other->rb_right); - __rb_rotate_left(parent, root); - node = root->rb_node; - break; - } - } - else - { - other = parent->rb_left; - if (rb_is_red(other)) - { - rb_set_black(other); - rb_set_red(parent); - __rb_rotate_right(parent, root); - other = parent->rb_left; - } - if ((!other->rb_left || rb_is_black(other->rb_left)) && - (!other->rb_right || rb_is_black(other->rb_right))) - { - rb_set_red(other); - node = parent; - parent = rb_parent(node); - } - else - { - if (!other->rb_left || rb_is_black(other->rb_left)) - { - rb_set_black(other->rb_right); - rb_set_red(other); - __rb_rotate_left(other, root); - other = parent->rb_left; - } - rb_set_color(other, rb_color(parent)); - rb_set_black(parent); - rb_set_black(other->rb_left); - __rb_rotate_right(parent, root); - node = root->rb_node; - break; - } - } - } - if (node) - rb_set_black(node); -} - -void rb_erase(struct rb_node *node, struct rb_root *root) -{ - struct rb_node *child, *parent; - int color; - - if (!node->rb_left) - child = node->rb_right; - else if (!node->rb_right) - child = node->rb_left; - else - { - struct rb_node *old = node, *left; - - node = node->rb_right; - while ((left = node->rb_left) != NULL) - node = left; - child = node->rb_right; - parent = rb_parent(node); - color = rb_color(node); - - if (child) - rb_set_parent(child, parent); - if (parent == old) { - parent->rb_right = child; - parent = node; - } else - parent->rb_left = child; - - node->rb_parent_color = old->rb_parent_color; - node->rb_right = old->rb_right; - node->rb_left = old->rb_left; - - if (rb_parent(old)) - { - if (rb_parent(old)->rb_left == old) - rb_parent(old)->rb_left = node; - else - rb_parent(old)->rb_right = node; - } else - root->rb_node = node; - - rb_set_parent(old->rb_left, node); - if (old->rb_right) - rb_set_parent(old->rb_right, node); - goto color; - } - - parent = rb_parent(node); - color = rb_color(node); - - if (child) - rb_set_parent(child, parent); - if (parent) - { - if (parent->rb_left == node) - parent->rb_left = child; - else - parent->rb_right = child; - } - else - root->rb_node = child; - - color: - if (color == RB_BLACK) - __rb_erase_color(child, parent, root); -} - -/* - * This function returns the first node (in sort order) of the tree. - */ -struct rb_node *rb_first(const struct rb_root *root) -{ - struct rb_node *n; - - n = root->rb_node; - if (!n) - return NULL; - while (n->rb_left) - n = n->rb_left; - return n; -} - -struct rb_node *rb_last(const struct rb_root *root) -{ - struct rb_node *n; - - n = root->rb_node; - if (!n) - return NULL; - while (n->rb_right) - n = n->rb_right; - return n; -} - -struct rb_node *rb_next(const struct rb_node *node) -{ - struct rb_node *parent; - - if (rb_parent(node) == node) - return NULL; - - /* If we have a right-hand child, go down and then left as far - as we can. */ - if (node->rb_right) { - node = node->rb_right; - while (node->rb_left) - node=node->rb_left; - return (struct rb_node *)node; - } - - /* No right-hand children. Everything down and left is - smaller than us, so any 'next' node must be in the general - direction of our parent. Go up the tree; any time the - ancestor is a right-hand child of its parent, keep going - up. First time it's a left-hand child of its parent, said - parent is our 'next' node. */ - while ((parent = rb_parent(node)) && node == parent->rb_right) - node = parent; - - return parent; -} - -struct rb_node *rb_prev(const struct rb_node *node) -{ - struct rb_node *parent; - - if (rb_parent(node) == node) - return NULL; - - /* If we have a left-hand child, go down and then right as far - as we can. */ - if (node->rb_left) { - node = node->rb_left; - while (node->rb_right) - node=node->rb_right; - return (struct rb_node *)node; - } - - /* No left-hand children. Go up till we find an ancestor which - is a right-hand child of its parent */ - while ((parent = rb_parent(node)) && node == parent->rb_left) - node = parent; - - return parent; -} - -void rb_replace_node(struct rb_node *victim, struct rb_node *new, - struct rb_root *root) -{ - struct rb_node *parent = rb_parent(victim); - - /* Set the surrounding nodes to point to the replacement */ - if (parent) { - if (victim == parent->rb_left) - parent->rb_left = new; - else - parent->rb_right = new; - } else { - root->rb_node = new; - } - if (victim->rb_left) - rb_set_parent(victim->rb_left, new); - if (victim->rb_right) - rb_set_parent(victim->rb_right, new); - - /* Copy the pointers/colour from the victim to the replacement */ - *new = *victim; -} diff --git a/trunk/tools/perf/util/rbtree.h b/trunk/tools/perf/util/rbtree.h deleted file mode 100644 index 6bdc488a47fb..000000000000 --- a/trunk/tools/perf/util/rbtree.h +++ /dev/null @@ -1,171 +0,0 @@ -/* - Red Black Trees - (C) 1999 Andrea Arcangeli - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - linux/include/linux/rbtree.h - - To use rbtrees you'll have to implement your own insert and search cores. - This will avoid us to use callbacks and to drop drammatically performances. - I know it's not the cleaner way, but in C (not in C++) to get - performances and genericity... - - Some example of insert and search follows here. The search is a plain - normal search over an ordered tree. The insert instead must be implemented - int two steps: as first thing the code must insert the element in - order as a red leaf in the tree, then the support library function - rb_insert_color() must be called. Such function will do the - not trivial work to rebalance the rbtree if necessary. - ------------------------------------------------------------------------ -static inline struct page * rb_search_page_cache(struct inode * inode, - unsigned long offset) -{ - struct rb_node * n = inode->i_rb_page_cache.rb_node; - struct page * page; - - while (n) - { - page = rb_entry(n, struct page, rb_page_cache); - - if (offset < page->offset) - n = n->rb_left; - else if (offset > page->offset) - n = n->rb_right; - else - return page; - } - return NULL; -} - -static inline struct page * __rb_insert_page_cache(struct inode * inode, - unsigned long offset, - struct rb_node * node) -{ - struct rb_node ** p = &inode->i_rb_page_cache.rb_node; - struct rb_node * parent = NULL; - struct page * page; - - while (*p) - { - parent = *p; - page = rb_entry(parent, struct page, rb_page_cache); - - if (offset < page->offset) - p = &(*p)->rb_left; - else if (offset > page->offset) - p = &(*p)->rb_right; - else - return page; - } - - rb_link_node(node, parent, p); - - return NULL; -} - -static inline struct page * rb_insert_page_cache(struct inode * inode, - unsigned long offset, - struct rb_node * node) -{ - struct page * ret; - if ((ret = __rb_insert_page_cache(inode, offset, node))) - goto out; - rb_insert_color(node, &inode->i_rb_page_cache); - out: - return ret; -} ------------------------------------------------------------------------ -*/ - -#ifndef _LINUX_RBTREE_H -#define _LINUX_RBTREE_H - -#include - -/** - * container_of - cast a member of a structure out to the containing structure - * @ptr: the pointer to the member. - * @type: the type of the container struct this is embedded in. - * @member: the name of the member within the struct. - * - */ -#define container_of(ptr, type, member) ({ \ - const typeof( ((type *)0)->member ) *__mptr = (ptr); \ - (type *)( (char *)__mptr - offsetof(type,member) );}) - -struct rb_node -{ - unsigned long rb_parent_color; -#define RB_RED 0 -#define RB_BLACK 1 - struct rb_node *rb_right; - struct rb_node *rb_left; -} __attribute__((aligned(sizeof(long)))); - /* The alignment might seem pointless, but allegedly CRIS needs it */ - -struct rb_root -{ - struct rb_node *rb_node; -}; - - -#define rb_parent(r) ((struct rb_node *)((r)->rb_parent_color & ~3)) -#define rb_color(r) ((r)->rb_parent_color & 1) -#define rb_is_red(r) (!rb_color(r)) -#define rb_is_black(r) rb_color(r) -#define rb_set_red(r) do { (r)->rb_parent_color &= ~1; } while (0) -#define rb_set_black(r) do { (r)->rb_parent_color |= 1; } while (0) - -static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) -{ - rb->rb_parent_color = (rb->rb_parent_color & 3) | (unsigned long)p; -} -static inline void rb_set_color(struct rb_node *rb, int color) -{ - rb->rb_parent_color = (rb->rb_parent_color & ~1) | color; -} - -#define RB_ROOT (struct rb_root) { NULL, } -#define rb_entry(ptr, type, member) container_of(ptr, type, member) - -#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) -#define RB_EMPTY_NODE(node) (rb_parent(node) == node) -#define RB_CLEAR_NODE(node) (rb_set_parent(node, node)) - -extern void rb_insert_color(struct rb_node *, struct rb_root *); -extern void rb_erase(struct rb_node *, struct rb_root *); - -/* Find logical next and previous nodes in a tree */ -extern struct rb_node *rb_next(const struct rb_node *); -extern struct rb_node *rb_prev(const struct rb_node *); -extern struct rb_node *rb_first(const struct rb_root *); -extern struct rb_node *rb_last(const struct rb_root *); - -/* Fast replacement of a single node without remove/rebalance/add/rebalance */ -extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, - struct rb_root *root); - -static inline void rb_link_node(struct rb_node * node, struct rb_node * parent, - struct rb_node ** rb_link) -{ - node->rb_parent_color = (unsigned long )parent; - node->rb_left = node->rb_right = NULL; - - *rb_link = node; -} - -#endif /* _LINUX_RBTREE_H */ diff --git a/trunk/tools/perf/util/run-command.c b/trunk/tools/perf/util/run-command.c deleted file mode 100644 index b2f5e854f40a..000000000000 --- a/trunk/tools/perf/util/run-command.c +++ /dev/null @@ -1,395 +0,0 @@ -#include "cache.h" -#include "run-command.h" -#include "exec_cmd.h" - -static inline void close_pair(int fd[2]) -{ - close(fd[0]); - close(fd[1]); -} - -static inline void dup_devnull(int to) -{ - int fd = open("/dev/null", O_RDWR); - dup2(fd, to); - close(fd); -} - -int start_command(struct child_process *cmd) -{ - int need_in, need_out, need_err; - int fdin[2], fdout[2], fderr[2]; - - /* - * In case of errors we must keep the promise to close FDs - * that have been passed in via ->in and ->out. - */ - - need_in = !cmd->no_stdin && cmd->in < 0; - if (need_in) { - if (pipe(fdin) < 0) { - if (cmd->out > 0) - close(cmd->out); - return -ERR_RUN_COMMAND_PIPE; - } - cmd->in = fdin[1]; - } - - need_out = !cmd->no_stdout - && !cmd->stdout_to_stderr - && cmd->out < 0; - if (need_out) { - if (pipe(fdout) < 0) { - if (need_in) - close_pair(fdin); - else if (cmd->in) - close(cmd->in); - return -ERR_RUN_COMMAND_PIPE; - } - cmd->out = fdout[0]; - } - - need_err = !cmd->no_stderr && cmd->err < 0; - if (need_err) { - if (pipe(fderr) < 0) { - if (need_in) - close_pair(fdin); - else if (cmd->in) - close(cmd->in); - if (need_out) - close_pair(fdout); - else if (cmd->out) - close(cmd->out); - return -ERR_RUN_COMMAND_PIPE; - } - cmd->err = fderr[0]; - } - -#ifndef __MINGW32__ - fflush(NULL); - cmd->pid = fork(); - if (!cmd->pid) { - if (cmd->no_stdin) - dup_devnull(0); - else if (need_in) { - dup2(fdin[0], 0); - close_pair(fdin); - } else if (cmd->in) { - dup2(cmd->in, 0); - close(cmd->in); - } - - if (cmd->no_stderr) - dup_devnull(2); - else if (need_err) { - dup2(fderr[1], 2); - close_pair(fderr); - } - - if (cmd->no_stdout) - dup_devnull(1); - else if (cmd->stdout_to_stderr) - dup2(2, 1); - else if (need_out) { - dup2(fdout[1], 1); - close_pair(fdout); - } else if (cmd->out > 1) { - dup2(cmd->out, 1); - close(cmd->out); - } - - if (cmd->dir && chdir(cmd->dir)) - die("exec %s: cd to %s failed (%s)", cmd->argv[0], - cmd->dir, strerror(errno)); - if (cmd->env) { - for (; *cmd->env; cmd->env++) { - if (strchr(*cmd->env, '=')) - putenv((char*)*cmd->env); - else - unsetenv(*cmd->env); - } - } - if (cmd->preexec_cb) - cmd->preexec_cb(); - if (cmd->perf_cmd) { - execv_perf_cmd(cmd->argv); - } else { - execvp(cmd->argv[0], (char *const*) cmd->argv); - } - exit(127); - } -#else - int s0 = -1, s1 = -1, s2 = -1; /* backups of stdin, stdout, stderr */ - const char **sargv = cmd->argv; - char **env = environ; - - if (cmd->no_stdin) { - s0 = dup(0); - dup_devnull(0); - } else if (need_in) { - s0 = dup(0); - dup2(fdin[0], 0); - } else if (cmd->in) { - s0 = dup(0); - dup2(cmd->in, 0); - } - - if (cmd->no_stderr) { - s2 = dup(2); - dup_devnull(2); - } else if (need_err) { - s2 = dup(2); - dup2(fderr[1], 2); - } - - if (cmd->no_stdout) { - s1 = dup(1); - dup_devnull(1); - } else if (cmd->stdout_to_stderr) { - s1 = dup(1); - dup2(2, 1); - } else if (need_out) { - s1 = dup(1); - dup2(fdout[1], 1); - } else if (cmd->out > 1) { - s1 = dup(1); - dup2(cmd->out, 1); - } - - if (cmd->dir) - die("chdir in start_command() not implemented"); - if (cmd->env) { - env = copy_environ(); - for (; *cmd->env; cmd->env++) - env = env_setenv(env, *cmd->env); - } - - if (cmd->perf_cmd) { - cmd->argv = prepare_perf_cmd(cmd->argv); - } - - cmd->pid = mingw_spawnvpe(cmd->argv[0], cmd->argv, env); - - if (cmd->env) - free_environ(env); - if (cmd->perf_cmd) - free(cmd->argv); - - cmd->argv = sargv; - if (s0 >= 0) - dup2(s0, 0), close(s0); - if (s1 >= 0) - dup2(s1, 1), close(s1); - if (s2 >= 0) - dup2(s2, 2), close(s2); -#endif - - if (cmd->pid < 0) { - int err = errno; - if (need_in) - close_pair(fdin); - else if (cmd->in) - close(cmd->in); - if (need_out) - close_pair(fdout); - else if (cmd->out) - close(cmd->out); - if (need_err) - close_pair(fderr); - return err == ENOENT ? - -ERR_RUN_COMMAND_EXEC : - -ERR_RUN_COMMAND_FORK; - } - - if (need_in) - close(fdin[0]); - else if (cmd->in) - close(cmd->in); - - if (need_out) - close(fdout[1]); - else if (cmd->out) - close(cmd->out); - - if (need_err) - close(fderr[1]); - - return 0; -} - -static int wait_or_whine(pid_t pid) -{ - for (;;) { - int status, code; - pid_t waiting = waitpid(pid, &status, 0); - - if (waiting < 0) { - if (errno == EINTR) - continue; - error("waitpid failed (%s)", strerror(errno)); - return -ERR_RUN_COMMAND_WAITPID; - } - if (waiting != pid) - return -ERR_RUN_COMMAND_WAITPID_WRONG_PID; - if (WIFSIGNALED(status)) - return -ERR_RUN_COMMAND_WAITPID_SIGNAL; - - if (!WIFEXITED(status)) - return -ERR_RUN_COMMAND_WAITPID_NOEXIT; - code = WEXITSTATUS(status); - switch (code) { - case 127: - return -ERR_RUN_COMMAND_EXEC; - case 0: - return 0; - default: - return -code; - } - } -} - -int finish_command(struct child_process *cmd) -{ - return wait_or_whine(cmd->pid); -} - -int run_command(struct child_process *cmd) -{ - int code = start_command(cmd); - if (code) - return code; - return finish_command(cmd); -} - -static void prepare_run_command_v_opt(struct child_process *cmd, - const char **argv, - int opt) -{ - memset(cmd, 0, sizeof(*cmd)); - cmd->argv = argv; - cmd->no_stdin = opt & RUN_COMMAND_NO_STDIN ? 1 : 0; - cmd->perf_cmd = opt & RUN_PERF_CMD ? 1 : 0; - cmd->stdout_to_stderr = opt & RUN_COMMAND_STDOUT_TO_STDERR ? 1 : 0; -} - -int run_command_v_opt(const char **argv, int opt) -{ - struct child_process cmd; - prepare_run_command_v_opt(&cmd, argv, opt); - return run_command(&cmd); -} - -int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env) -{ - struct child_process cmd; - prepare_run_command_v_opt(&cmd, argv, opt); - cmd.dir = dir; - cmd.env = env; - return run_command(&cmd); -} - -#ifdef __MINGW32__ -static __stdcall unsigned run_thread(void *data) -{ - struct async *async = data; - return async->proc(async->fd_for_proc, async->data); -} -#endif - -int start_async(struct async *async) -{ - int pipe_out[2]; - - if (pipe(pipe_out) < 0) - return error("cannot create pipe: %s", strerror(errno)); - async->out = pipe_out[0]; - -#ifndef __MINGW32__ - /* Flush stdio before fork() to avoid cloning buffers */ - fflush(NULL); - - async->pid = fork(); - if (async->pid < 0) { - error("fork (async) failed: %s", strerror(errno)); - close_pair(pipe_out); - return -1; - } - if (!async->pid) { - close(pipe_out[0]); - exit(!!async->proc(pipe_out[1], async->data)); - } - close(pipe_out[1]); -#else - async->fd_for_proc = pipe_out[1]; - async->tid = (HANDLE) _beginthreadex(NULL, 0, run_thread, async, 0, NULL); - if (!async->tid) { - error("cannot create thread: %s", strerror(errno)); - close_pair(pipe_out); - return -1; - } -#endif - return 0; -} - -int finish_async(struct async *async) -{ -#ifndef __MINGW32__ - int ret = 0; - - if (wait_or_whine(async->pid)) - ret = error("waitpid (async) failed"); -#else - DWORD ret = 0; - if (WaitForSingleObject(async->tid, INFINITE) != WAIT_OBJECT_0) - ret = error("waiting for thread failed: %lu", GetLastError()); - else if (!GetExitCodeThread(async->tid, &ret)) - ret = error("cannot get thread exit code: %lu", GetLastError()); - CloseHandle(async->tid); -#endif - return ret; -} - -int run_hook(const char *index_file, const char *name, ...) -{ - struct child_process hook; - const char **argv = NULL, *env[2]; - char index[PATH_MAX]; - va_list args; - int ret; - size_t i = 0, alloc = 0; - - if (access(perf_path("hooks/%s", name), X_OK) < 0) - return 0; - - va_start(args, name); - ALLOC_GROW(argv, i + 1, alloc); - argv[i++] = perf_path("hooks/%s", name); - while (argv[i-1]) { - ALLOC_GROW(argv, i + 1, alloc); - argv[i++] = va_arg(args, const char *); - } - va_end(args); - - memset(&hook, 0, sizeof(hook)); - hook.argv = argv; - hook.no_stdin = 1; - hook.stdout_to_stderr = 1; - if (index_file) { - snprintf(index, sizeof(index), "PERF_INDEX_FILE=%s", index_file); - env[0] = index; - env[1] = NULL; - hook.env = env; - } - - ret = start_command(&hook); - free(argv); - if (ret) { - warning("Could not spawn %s", argv[0]); - return ret; - } - ret = finish_command(&hook); - if (ret == -ERR_RUN_COMMAND_WAITPID_SIGNAL) - warning("%s exited due to uncaught signal", argv[0]); - - return ret; -} diff --git a/trunk/tools/perf/util/run-command.h b/trunk/tools/perf/util/run-command.h deleted file mode 100644 index 328289f23669..000000000000 --- a/trunk/tools/perf/util/run-command.h +++ /dev/null @@ -1,93 +0,0 @@ -#ifndef RUN_COMMAND_H -#define RUN_COMMAND_H - -enum { - ERR_RUN_COMMAND_FORK = 10000, - ERR_RUN_COMMAND_EXEC, - ERR_RUN_COMMAND_PIPE, - ERR_RUN_COMMAND_WAITPID, - ERR_RUN_COMMAND_WAITPID_WRONG_PID, - ERR_RUN_COMMAND_WAITPID_SIGNAL, - ERR_RUN_COMMAND_WAITPID_NOEXIT, -}; -#define IS_RUN_COMMAND_ERR(x) (-(x) >= ERR_RUN_COMMAND_FORK) - -struct child_process { - const char **argv; - pid_t pid; - /* - * Using .in, .out, .err: - * - Specify 0 for no redirections (child inherits stdin, stdout, - * stderr from parent). - * - Specify -1 to have a pipe allocated as follows: - * .in: returns the writable pipe end; parent writes to it, - * the readable pipe end becomes child's stdin - * .out, .err: returns the readable pipe end; parent reads from - * it, the writable pipe end becomes child's stdout/stderr - * The caller of start_command() must close the returned FDs - * after it has completed reading from/writing to it! - * - Specify > 0 to set a channel to a particular FD as follows: - * .in: a readable FD, becomes child's stdin - * .out: a writable FD, becomes child's stdout/stderr - * .err > 0 not supported - * The specified FD is closed by start_command(), even in case - * of errors! - */ - int in; - int out; - int err; - const char *dir; - const char *const *env; - unsigned no_stdin:1; - unsigned no_stdout:1; - unsigned no_stderr:1; - unsigned perf_cmd:1; /* if this is to be perf sub-command */ - unsigned stdout_to_stderr:1; - void (*preexec_cb)(void); -}; - -int start_command(struct child_process *); -int finish_command(struct child_process *); -int run_command(struct child_process *); - -extern int run_hook(const char *index_file, const char *name, ...); - -#define RUN_COMMAND_NO_STDIN 1 -#define RUN_PERF_CMD 2 /*If this is to be perf sub-command */ -#define RUN_COMMAND_STDOUT_TO_STDERR 4 -int run_command_v_opt(const char **argv, int opt); - -/* - * env (the environment) is to be formatted like environ: "VAR=VALUE". - * To unset an environment variable use just "VAR". - */ -int run_command_v_opt_cd_env(const char **argv, int opt, const char *dir, const char *const *env); - -/* - * The purpose of the following functions is to feed a pipe by running - * a function asynchronously and providing output that the caller reads. - * - * It is expected that no synchronization and mutual exclusion between - * the caller and the feed function is necessary so that the function - * can run in a thread without interfering with the caller. - */ -struct async { - /* - * proc writes to fd and closes it; - * returns 0 on success, non-zero on failure - */ - int (*proc)(int fd, void *data); - void *data; - int out; /* caller reads from here and closes it */ -#ifndef __MINGW32__ - pid_t pid; -#else - HANDLE tid; - int fd_for_proc; -#endif -}; - -int start_async(struct async *async); -int finish_async(struct async *async); - -#endif diff --git a/trunk/tools/perf/util/sigchain.c b/trunk/tools/perf/util/sigchain.c deleted file mode 100644 index 1118b99e57d3..000000000000 --- a/trunk/tools/perf/util/sigchain.c +++ /dev/null @@ -1,52 +0,0 @@ -#include "sigchain.h" -#include "cache.h" - -#define SIGCHAIN_MAX_SIGNALS 32 - -struct sigchain_signal { - sigchain_fun *old; - int n; - int alloc; -}; -static struct sigchain_signal signals[SIGCHAIN_MAX_SIGNALS]; - -static void check_signum(int sig) -{ - if (sig < 1 || sig >= SIGCHAIN_MAX_SIGNALS) - die("BUG: signal out of range: %d", sig); -} - -int sigchain_push(int sig, sigchain_fun f) -{ - struct sigchain_signal *s = signals + sig; - check_signum(sig); - - ALLOC_GROW(s->old, s->n + 1, s->alloc); - s->old[s->n] = signal(sig, f); - if (s->old[s->n] == SIG_ERR) - return -1; - s->n++; - return 0; -} - -int sigchain_pop(int sig) -{ - struct sigchain_signal *s = signals + sig; - check_signum(sig); - if (s->n < 1) - return 0; - - if (signal(sig, s->old[s->n - 1]) == SIG_ERR) - return -1; - s->n--; - return 0; -} - -void sigchain_push_common(sigchain_fun f) -{ - sigchain_push(SIGINT, f); - sigchain_push(SIGHUP, f); - sigchain_push(SIGTERM, f); - sigchain_push(SIGQUIT, f); - sigchain_push(SIGPIPE, f); -} diff --git a/trunk/tools/perf/util/sigchain.h b/trunk/tools/perf/util/sigchain.h deleted file mode 100644 index 618083bce0c6..000000000000 --- a/trunk/tools/perf/util/sigchain.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef SIGCHAIN_H -#define SIGCHAIN_H - -typedef void (*sigchain_fun)(int); - -int sigchain_push(int sig, sigchain_fun f); -int sigchain_pop(int sig); - -void sigchain_push_common(sigchain_fun f); - -#endif /* SIGCHAIN_H */ diff --git a/trunk/tools/perf/util/strbuf.c b/trunk/tools/perf/util/strbuf.c deleted file mode 100644 index eaba09306802..000000000000 --- a/trunk/tools/perf/util/strbuf.c +++ /dev/null @@ -1,359 +0,0 @@ -#include "cache.h" - -int prefixcmp(const char *str, const char *prefix) -{ - for (; ; str++, prefix++) - if (!*prefix) - return 0; - else if (*str != *prefix) - return (unsigned char)*prefix - (unsigned char)*str; -} - -/* - * Used as the default ->buf value, so that people can always assume - * buf is non NULL and ->buf is NUL terminated even for a freshly - * initialized strbuf. - */ -char strbuf_slopbuf[1]; - -void strbuf_init(struct strbuf *sb, size_t hint) -{ - sb->alloc = sb->len = 0; - sb->buf = strbuf_slopbuf; - if (hint) - strbuf_grow(sb, hint); -} - -void strbuf_release(struct strbuf *sb) -{ - if (sb->alloc) { - free(sb->buf); - strbuf_init(sb, 0); - } -} - -char *strbuf_detach(struct strbuf *sb, size_t *sz) -{ - char *res = sb->alloc ? sb->buf : NULL; - if (sz) - *sz = sb->len; - strbuf_init(sb, 0); - return res; -} - -void strbuf_attach(struct strbuf *sb, void *buf, size_t len, size_t alloc) -{ - strbuf_release(sb); - sb->buf = buf; - sb->len = len; - sb->alloc = alloc; - strbuf_grow(sb, 0); - sb->buf[sb->len] = '\0'; -} - -void strbuf_grow(struct strbuf *sb, size_t extra) -{ - if (sb->len + extra + 1 <= sb->len) - die("you want to use way too much memory"); - if (!sb->alloc) - sb->buf = NULL; - ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc); -} - -void strbuf_trim(struct strbuf *sb) -{ - char *b = sb->buf; - while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1])) - sb->len--; - while (sb->len > 0 && isspace(*b)) { - b++; - sb->len--; - } - memmove(sb->buf, b, sb->len); - sb->buf[sb->len] = '\0'; -} -void strbuf_rtrim(struct strbuf *sb) -{ - while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1])) - sb->len--; - sb->buf[sb->len] = '\0'; -} - -void strbuf_ltrim(struct strbuf *sb) -{ - char *b = sb->buf; - while (sb->len > 0 && isspace(*b)) { - b++; - sb->len--; - } - memmove(sb->buf, b, sb->len); - sb->buf[sb->len] = '\0'; -} - -void strbuf_tolower(struct strbuf *sb) -{ - int i; - for (i = 0; i < sb->len; i++) - sb->buf[i] = tolower(sb->buf[i]); -} - -struct strbuf **strbuf_split(const struct strbuf *sb, int delim) -{ - int alloc = 2, pos = 0; - char *n, *p; - struct strbuf **ret; - struct strbuf *t; - - ret = calloc(alloc, sizeof(struct strbuf *)); - p = n = sb->buf; - while (n < sb->buf + sb->len) { - int len; - n = memchr(n, delim, sb->len - (n - sb->buf)); - if (pos + 1 >= alloc) { - alloc = alloc * 2; - ret = realloc(ret, sizeof(struct strbuf *) * alloc); - } - if (!n) - n = sb->buf + sb->len - 1; - len = n - p + 1; - t = malloc(sizeof(struct strbuf)); - strbuf_init(t, len); - strbuf_add(t, p, len); - ret[pos] = t; - ret[++pos] = NULL; - p = ++n; - } - return ret; -} - -void strbuf_list_free(struct strbuf **sbs) -{ - struct strbuf **s = sbs; - - while (*s) { - strbuf_release(*s); - free(*s++); - } - free(sbs); -} - -int strbuf_cmp(const struct strbuf *a, const struct strbuf *b) -{ - int len = a->len < b->len ? a->len: b->len; - int cmp = memcmp(a->buf, b->buf, len); - if (cmp) - return cmp; - return a->len < b->len ? -1: a->len != b->len; -} - -void strbuf_splice(struct strbuf *sb, size_t pos, size_t len, - const void *data, size_t dlen) -{ - if (pos + len < pos) - die("you want to use way too much memory"); - if (pos > sb->len) - die("`pos' is too far after the end of the buffer"); - if (pos + len > sb->len) - die("`pos + len' is too far after the end of the buffer"); - - if (dlen >= len) - strbuf_grow(sb, dlen - len); - memmove(sb->buf + pos + dlen, - sb->buf + pos + len, - sb->len - pos - len); - memcpy(sb->buf + pos, data, dlen); - strbuf_setlen(sb, sb->len + dlen - len); -} - -void strbuf_insert(struct strbuf *sb, size_t pos, const void *data, size_t len) -{ - strbuf_splice(sb, pos, 0, data, len); -} - -void strbuf_remove(struct strbuf *sb, size_t pos, size_t len) -{ - strbuf_splice(sb, pos, len, NULL, 0); -} - -void strbuf_add(struct strbuf *sb, const void *data, size_t len) -{ - strbuf_grow(sb, len); - memcpy(sb->buf + sb->len, data, len); - strbuf_setlen(sb, sb->len + len); -} - -void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len) -{ - strbuf_grow(sb, len); - memcpy(sb->buf + sb->len, sb->buf + pos, len); - strbuf_setlen(sb, sb->len + len); -} - -void strbuf_addf(struct strbuf *sb, const char *fmt, ...) -{ - int len; - va_list ap; - - if (!strbuf_avail(sb)) - strbuf_grow(sb, 64); - va_start(ap, fmt); - len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); - va_end(ap); - if (len < 0) - die("your vsnprintf is broken"); - if (len > strbuf_avail(sb)) { - strbuf_grow(sb, len); - va_start(ap, fmt); - len = vsnprintf(sb->buf + sb->len, sb->alloc - sb->len, fmt, ap); - va_end(ap); - if (len > strbuf_avail(sb)) { - die("this should not happen, your snprintf is broken"); - } - } - strbuf_setlen(sb, sb->len + len); -} - -void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, - void *context) -{ - for (;;) { - const char *percent; - size_t consumed; - - percent = strchrnul(format, '%'); - strbuf_add(sb, format, percent - format); - if (!*percent) - break; - format = percent + 1; - - consumed = fn(sb, format, context); - if (consumed) - format += consumed; - else - strbuf_addch(sb, '%'); - } -} - -size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder, - void *context) -{ - struct strbuf_expand_dict_entry *e = context; - size_t len; - - for (; e->placeholder && (len = strlen(e->placeholder)); e++) { - if (!strncmp(placeholder, e->placeholder, len)) { - if (e->value) - strbuf_addstr(sb, e->value); - return len; - } - } - return 0; -} - -size_t strbuf_fread(struct strbuf *sb, size_t size, FILE *f) -{ - size_t res; - size_t oldalloc = sb->alloc; - - strbuf_grow(sb, size); - res = fread(sb->buf + sb->len, 1, size, f); - if (res > 0) - strbuf_setlen(sb, sb->len + res); - else if (res < 0 && oldalloc == 0) - strbuf_release(sb); - return res; -} - -ssize_t strbuf_read(struct strbuf *sb, int fd, size_t hint) -{ - size_t oldlen = sb->len; - size_t oldalloc = sb->alloc; - - strbuf_grow(sb, hint ? hint : 8192); - for (;;) { - ssize_t cnt; - - cnt = read(fd, sb->buf + sb->len, sb->alloc - sb->len - 1); - if (cnt < 0) { - if (oldalloc == 0) - strbuf_release(sb); - else - strbuf_setlen(sb, oldlen); - return -1; - } - if (!cnt) - break; - sb->len += cnt; - strbuf_grow(sb, 8192); - } - - sb->buf[sb->len] = '\0'; - return sb->len - oldlen; -} - -#define STRBUF_MAXLINK (2*PATH_MAX) - -int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint) -{ - size_t oldalloc = sb->alloc; - - if (hint < 32) - hint = 32; - - while (hint < STRBUF_MAXLINK) { - int len; - - strbuf_grow(sb, hint); - len = readlink(path, sb->buf, hint); - if (len < 0) { - if (errno != ERANGE) - break; - } else if (len < hint) { - strbuf_setlen(sb, len); - return 0; - } - - /* .. the buffer was too small - try again */ - hint *= 2; - } - if (oldalloc == 0) - strbuf_release(sb); - return -1; -} - -int strbuf_getline(struct strbuf *sb, FILE *fp, int term) -{ - int ch; - - strbuf_grow(sb, 0); - if (feof(fp)) - return EOF; - - strbuf_reset(sb); - while ((ch = fgetc(fp)) != EOF) { - if (ch == term) - break; - strbuf_grow(sb, 1); - sb->buf[sb->len++] = ch; - } - if (ch == EOF && sb->len == 0) - return EOF; - - sb->buf[sb->len] = '\0'; - return 0; -} - -int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint) -{ - int fd, len; - - fd = open(path, O_RDONLY); - if (fd < 0) - return -1; - len = strbuf_read(sb, fd, hint); - close(fd); - if (len < 0) - return -1; - - return len; -} diff --git a/trunk/tools/perf/util/strbuf.h b/trunk/tools/perf/util/strbuf.h deleted file mode 100644 index 9ee908a3ec5d..000000000000 --- a/trunk/tools/perf/util/strbuf.h +++ /dev/null @@ -1,137 +0,0 @@ -#ifndef STRBUF_H -#define STRBUF_H - -/* - * Strbuf's can be use in many ways: as a byte array, or to store arbitrary - * long, overflow safe strings. - * - * Strbufs has some invariants that are very important to keep in mind: - * - * 1. the ->buf member is always malloc-ed, hence strbuf's can be used to - * build complex strings/buffers whose final size isn't easily known. - * - * It is NOT legal to copy the ->buf pointer away. - * `strbuf_detach' is the operation that detachs a buffer from its shell - * while keeping the shell valid wrt its invariants. - * - * 2. the ->buf member is a byte array that has at least ->len + 1 bytes - * allocated. The extra byte is used to store a '\0', allowing the ->buf - * member to be a valid C-string. Every strbuf function ensure this - * invariant is preserved. - * - * Note that it is OK to "play" with the buffer directly if you work it - * that way: - * - * strbuf_grow(sb, SOME_SIZE); - * ... Here, the memory array starting at sb->buf, and of length - * ... strbuf_avail(sb) is all yours, and you are sure that - * ... strbuf_avail(sb) is at least SOME_SIZE. - * strbuf_setlen(sb, sb->len + SOME_OTHER_SIZE); - * - * Of course, SOME_OTHER_SIZE must be smaller or equal to strbuf_avail(sb). - * - * Doing so is safe, though if it has to be done in many places, adding the - * missing API to the strbuf module is the way to go. - * - * XXX: do _not_ assume that the area that is yours is of size ->alloc - 1 - * even if it's true in the current implementation. Alloc is somehow a - * "private" member that should not be messed with. - */ - -#include - -extern char strbuf_slopbuf[]; -struct strbuf { - size_t alloc; - size_t len; - char *buf; -}; - -#define STRBUF_INIT { 0, 0, strbuf_slopbuf } - -/*----- strbuf life cycle -----*/ -extern void strbuf_init(struct strbuf *, size_t); -extern void strbuf_release(struct strbuf *); -extern char *strbuf_detach(struct strbuf *, size_t *); -extern void strbuf_attach(struct strbuf *, void *, size_t, size_t); -static inline void strbuf_swap(struct strbuf *a, struct strbuf *b) { - struct strbuf tmp = *a; - *a = *b; - *b = tmp; -} - -/*----- strbuf size related -----*/ -static inline size_t strbuf_avail(const struct strbuf *sb) { - return sb->alloc ? sb->alloc - sb->len - 1 : 0; -} - -extern void strbuf_grow(struct strbuf *, size_t); - -static inline void strbuf_setlen(struct strbuf *sb, size_t len) { - if (!sb->alloc) - strbuf_grow(sb, 0); - assert(len < sb->alloc); - sb->len = len; - sb->buf[len] = '\0'; -} -#define strbuf_reset(sb) strbuf_setlen(sb, 0) - -/*----- content related -----*/ -extern void strbuf_trim(struct strbuf *); -extern void strbuf_rtrim(struct strbuf *); -extern void strbuf_ltrim(struct strbuf *); -extern int strbuf_cmp(const struct strbuf *, const struct strbuf *); -extern void strbuf_tolower(struct strbuf *); - -extern struct strbuf **strbuf_split(const struct strbuf *, int delim); -extern void strbuf_list_free(struct strbuf **); - -/*----- add data in your buffer -----*/ -static inline void strbuf_addch(struct strbuf *sb, int c) { - strbuf_grow(sb, 1); - sb->buf[sb->len++] = c; - sb->buf[sb->len] = '\0'; -} - -extern void strbuf_insert(struct strbuf *, size_t pos, const void *, size_t); -extern void strbuf_remove(struct strbuf *, size_t pos, size_t len); - -/* splice pos..pos+len with given data */ -extern void strbuf_splice(struct strbuf *, size_t pos, size_t len, - const void *, size_t); - -extern void strbuf_add(struct strbuf *, const void *, size_t); -static inline void strbuf_addstr(struct strbuf *sb, const char *s) { - strbuf_add(sb, s, strlen(s)); -} -static inline void strbuf_addbuf(struct strbuf *sb, const struct strbuf *sb2) { - strbuf_add(sb, sb2->buf, sb2->len); -} -extern void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len); - -typedef size_t (*expand_fn_t) (struct strbuf *sb, const char *placeholder, void *context); -extern void strbuf_expand(struct strbuf *sb, const char *format, expand_fn_t fn, void *context); -struct strbuf_expand_dict_entry { - const char *placeholder; - const char *value; -}; -extern size_t strbuf_expand_dict_cb(struct strbuf *sb, const char *placeholder, void *context); - -__attribute__((format(printf,2,3))) -extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...); - -extern size_t strbuf_fread(struct strbuf *, size_t, FILE *); -/* XXX: if read fails, any partial read is undone */ -extern ssize_t strbuf_read(struct strbuf *, int fd, size_t hint); -extern int strbuf_read_file(struct strbuf *sb, const char *path, size_t hint); -extern int strbuf_readlink(struct strbuf *sb, const char *path, size_t hint); - -extern int strbuf_getline(struct strbuf *, FILE *, int); - -extern void stripspace(struct strbuf *buf, int skip_comments); -extern int launch_editor(const char *path, struct strbuf *buffer, const char *const *env); - -extern int strbuf_branchname(struct strbuf *sb, const char *name); -extern int strbuf_check_branch_ref(struct strbuf *sb, const char *name); - -#endif /* STRBUF_H */ diff --git a/trunk/tools/perf/util/string.c b/trunk/tools/perf/util/string.c deleted file mode 100644 index ec33c0c7f4e2..000000000000 --- a/trunk/tools/perf/util/string.c +++ /dev/null @@ -1,34 +0,0 @@ -#include "string.h" - -static int hex(char ch) -{ - if ((ch >= '0') && (ch <= '9')) - return ch - '0'; - if ((ch >= 'a') && (ch <= 'f')) - return ch - 'a' + 10; - if ((ch >= 'A') && (ch <= 'F')) - return ch - 'A' + 10; - return -1; -} - -/* - * While we find nice hex chars, build a long_val. - * Return number of chars processed. - */ -int hex2u64(const char *ptr, __u64 *long_val) -{ - const char *p = ptr; - *long_val = 0; - - while (*p) { - const int hex_val = hex(*p); - - if (hex_val < 0) - break; - - *long_val = (*long_val << 4) | hex_val; - p++; - } - - return p - ptr; -} diff --git a/trunk/tools/perf/util/string.h b/trunk/tools/perf/util/string.h deleted file mode 100644 index 72812c1c9a7a..000000000000 --- a/trunk/tools/perf/util/string.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _PERF_STRING_H_ -#define _PERF_STRING_H_ - -#include - -int hex2u64(const char *ptr, __u64 *val); - -#endif diff --git a/trunk/tools/perf/util/symbol.c b/trunk/tools/perf/util/symbol.c deleted file mode 100644 index 49a55f813712..000000000000 --- a/trunk/tools/perf/util/symbol.c +++ /dev/null @@ -1,641 +0,0 @@ -#include "util.h" -#include "../perf.h" -#include "string.h" -#include "symbol.h" - -#include -#include -#include - -const char *sym_hist_filter; - -static struct symbol *symbol__new(__u64 start, __u64 len, - const char *name, unsigned int priv_size, - __u64 obj_start, int verbose) -{ - size_t namelen = strlen(name) + 1; - struct symbol *self = calloc(1, priv_size + sizeof(*self) + namelen); - - if (!self) - return NULL; - - if (verbose >= 2) - printf("new symbol: %016Lx [%08lx]: %s, hist: %p, obj_start: %p\n", - (__u64)start, (unsigned long)len, name, self->hist, (void *)(unsigned long)obj_start); - - self->obj_start= obj_start; - self->hist = NULL; - self->hist_sum = 0; - - if (sym_hist_filter && !strcmp(name, sym_hist_filter)) - self->hist = calloc(sizeof(__u64), len); - - if (priv_size) { - memset(self, 0, priv_size); - self = ((void *)self) + priv_size; - } - self->start = start; - self->end = start + len - 1; - memcpy(self->name, name, namelen); - - return self; -} - -static void symbol__delete(struct symbol *self, unsigned int priv_size) -{ - free(((void *)self) - priv_size); -} - -static size_t symbol__fprintf(struct symbol *self, FILE *fp) -{ - return fprintf(fp, " %llx-%llx %s\n", - self->start, self->end, self->name); -} - -struct dso *dso__new(const char *name, unsigned int sym_priv_size) -{ - struct dso *self = malloc(sizeof(*self) + strlen(name) + 1); - - if (self != NULL) { - strcpy(self->name, name); - self->syms = RB_ROOT; - self->sym_priv_size = sym_priv_size; - self->find_symbol = dso__find_symbol; - } - - return self; -} - -static void dso__delete_symbols(struct dso *self) -{ - struct symbol *pos; - struct rb_node *next = rb_first(&self->syms); - - while (next) { - pos = rb_entry(next, struct symbol, rb_node); - next = rb_next(&pos->rb_node); - rb_erase(&pos->rb_node, &self->syms); - symbol__delete(pos, self->sym_priv_size); - } -} - -void dso__delete(struct dso *self) -{ - dso__delete_symbols(self); - free(self); -} - -static void dso__insert_symbol(struct dso *self, struct symbol *sym) -{ - struct rb_node **p = &self->syms.rb_node; - struct rb_node *parent = NULL; - const __u64 ip = sym->start; - struct symbol *s; - - while (*p != NULL) { - parent = *p; - s = rb_entry(parent, struct symbol, rb_node); - if (ip < s->start) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - rb_link_node(&sym->rb_node, parent, p); - rb_insert_color(&sym->rb_node, &self->syms); -} - -struct symbol *dso__find_symbol(struct dso *self, __u64 ip) -{ - struct rb_node *n; - - if (self == NULL) - return NULL; - - n = self->syms.rb_node; - - while (n) { - struct symbol *s = rb_entry(n, struct symbol, rb_node); - - if (ip < s->start) - n = n->rb_left; - else if (ip > s->end) - n = n->rb_right; - else - return s; - } - - return NULL; -} - -size_t dso__fprintf(struct dso *self, FILE *fp) -{ - size_t ret = fprintf(fp, "dso: %s\n", self->name); - - struct rb_node *nd; - for (nd = rb_first(&self->syms); nd; nd = rb_next(nd)) { - struct symbol *pos = rb_entry(nd, struct symbol, rb_node); - ret += symbol__fprintf(pos, fp); - } - - return ret; -} - -static int dso__load_kallsyms(struct dso *self, symbol_filter_t filter, int verbose) -{ - struct rb_node *nd, *prevnd; - char *line = NULL; - size_t n; - FILE *file = fopen("/proc/kallsyms", "r"); - - if (file == NULL) - goto out_failure; - - while (!feof(file)) { - __u64 start; - struct symbol *sym; - int line_len, len; - char symbol_type; - - line_len = getline(&line, &n, file); - if (line_len < 0) - break; - - if (!line) - goto out_failure; - - line[--line_len] = '\0'; /* \n */ - - len = hex2u64(line, &start); - - len++; - if (len + 2 >= line_len) - continue; - - symbol_type = toupper(line[len]); - /* - * We're interested only in code ('T'ext) - */ - if (symbol_type != 'T' && symbol_type != 'W') - continue; - /* - * Well fix up the end later, when we have all sorted. - */ - sym = symbol__new(start, 0xdead, line + len + 2, - self->sym_priv_size, 0, verbose); - - if (sym == NULL) - goto out_delete_line; - - if (filter && filter(self, sym)) - symbol__delete(sym, self->sym_priv_size); - else - dso__insert_symbol(self, sym); - } - - /* - * Now that we have all sorted out, just set the ->end of all - * symbols - */ - prevnd = rb_first(&self->syms); - - if (prevnd == NULL) - goto out_delete_line; - - for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) { - struct symbol *prev = rb_entry(prevnd, struct symbol, rb_node), - *curr = rb_entry(nd, struct symbol, rb_node); - - prev->end = curr->start - 1; - prevnd = nd; - } - - free(line); - fclose(file); - - return 0; - -out_delete_line: - free(line); -out_failure: - return -1; -} - -static int dso__load_perf_map(struct dso *self, symbol_filter_t filter, int verbose) -{ - char *line = NULL; - size_t n; - FILE *file; - int nr_syms = 0; - - file = fopen(self->name, "r"); - if (file == NULL) - goto out_failure; - - while (!feof(file)) { - __u64 start, size; - struct symbol *sym; - int line_len, len; - - line_len = getline(&line, &n, file); - if (line_len < 0) - break; - - if (!line) - goto out_failure; - - line[--line_len] = '\0'; /* \n */ - - len = hex2u64(line, &start); - - len++; - if (len + 2 >= line_len) - continue; - - len += hex2u64(line + len, &size); - - len++; - if (len + 2 >= line_len) - continue; - - sym = symbol__new(start, size, line + len, - self->sym_priv_size, start, verbose); - - if (sym == NULL) - goto out_delete_line; - - if (filter && filter(self, sym)) - symbol__delete(sym, self->sym_priv_size); - else { - dso__insert_symbol(self, sym); - nr_syms++; - } - } - - free(line); - fclose(file); - - return nr_syms; - -out_delete_line: - free(line); -out_failure: - return -1; -} - -/** - * elf_symtab__for_each_symbol - iterate thru all the symbols - * - * @self: struct elf_symtab instance to iterate - * @index: uint32_t index - * @sym: GElf_Sym iterator - */ -#define elf_symtab__for_each_symbol(syms, nr_syms, index, sym) \ - for (index = 0, gelf_getsym(syms, index, &sym);\ - index < nr_syms; \ - index++, gelf_getsym(syms, index, &sym)) - -static inline uint8_t elf_sym__type(const GElf_Sym *sym) -{ - return GELF_ST_TYPE(sym->st_info); -} - -static inline int elf_sym__is_function(const GElf_Sym *sym) -{ - return elf_sym__type(sym) == STT_FUNC && - sym->st_name != 0 && - sym->st_shndx != SHN_UNDEF && - sym->st_size != 0; -} - -static inline const char *elf_sym__name(const GElf_Sym *sym, - const Elf_Data *symstrs) -{ - return symstrs->d_buf + sym->st_name; -} - -static Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep, - GElf_Shdr *shp, const char *name, - size_t *index) -{ - Elf_Scn *sec = NULL; - size_t cnt = 1; - - while ((sec = elf_nextscn(elf, sec)) != NULL) { - char *str; - - gelf_getshdr(sec, shp); - str = elf_strptr(elf, ep->e_shstrndx, shp->sh_name); - if (!strcmp(name, str)) { - if (index) - *index = cnt; - break; - } - ++cnt; - } - - return sec; -} - -#define elf_section__for_each_rel(reldata, pos, pos_mem, idx, nr_entries) \ - for (idx = 0, pos = gelf_getrel(reldata, 0, &pos_mem); \ - idx < nr_entries; \ - ++idx, pos = gelf_getrel(reldata, idx, &pos_mem)) - -#define elf_section__for_each_rela(reldata, pos, pos_mem, idx, nr_entries) \ - for (idx = 0, pos = gelf_getrela(reldata, 0, &pos_mem); \ - idx < nr_entries; \ - ++idx, pos = gelf_getrela(reldata, idx, &pos_mem)) - -static int dso__synthesize_plt_symbols(struct dso *self, Elf *elf, - GElf_Ehdr *ehdr, Elf_Scn *scn_dynsym, - GElf_Shdr *shdr_dynsym, - size_t dynsym_idx, int verbose) -{ - uint32_t nr_rel_entries, idx; - GElf_Sym sym; - __u64 plt_offset; - GElf_Shdr shdr_plt; - struct symbol *f; - GElf_Shdr shdr_rel_plt; - Elf_Data *reldata, *syms, *symstrs; - Elf_Scn *scn_plt_rel, *scn_symstrs; - char sympltname[1024]; - int nr = 0, symidx; - - scn_plt_rel = elf_section_by_name(elf, ehdr, &shdr_rel_plt, - ".rela.plt", NULL); - if (scn_plt_rel == NULL) { - scn_plt_rel = elf_section_by_name(elf, ehdr, &shdr_rel_plt, - ".rel.plt", NULL); - if (scn_plt_rel == NULL) - return 0; - } - - if (shdr_rel_plt.sh_link != dynsym_idx) - return 0; - - if (elf_section_by_name(elf, ehdr, &shdr_plt, ".plt", NULL) == NULL) - return 0; - - /* - * Fetch the relocation section to find the indexes to the GOT - * and the symbols in the .dynsym they refer to. - */ - reldata = elf_getdata(scn_plt_rel, NULL); - if (reldata == NULL) - return -1; - - syms = elf_getdata(scn_dynsym, NULL); - if (syms == NULL) - return -1; - - scn_symstrs = elf_getscn(elf, shdr_dynsym->sh_link); - if (scn_symstrs == NULL) - return -1; - - symstrs = elf_getdata(scn_symstrs, NULL); - if (symstrs == NULL) - return -1; - - nr_rel_entries = shdr_rel_plt.sh_size / shdr_rel_plt.sh_entsize; - plt_offset = shdr_plt.sh_offset; - - if (shdr_rel_plt.sh_type == SHT_RELA) { - GElf_Rela pos_mem, *pos; - - elf_section__for_each_rela(reldata, pos, pos_mem, idx, - nr_rel_entries) { - symidx = GELF_R_SYM(pos->r_info); - plt_offset += shdr_plt.sh_entsize; - gelf_getsym(syms, symidx, &sym); - snprintf(sympltname, sizeof(sympltname), - "%s@plt", elf_sym__name(&sym, symstrs)); - - f = symbol__new(plt_offset, shdr_plt.sh_entsize, - sympltname, self->sym_priv_size, 0, verbose); - if (!f) - return -1; - - dso__insert_symbol(self, f); - ++nr; - } - } else if (shdr_rel_plt.sh_type == SHT_REL) { - GElf_Rel pos_mem, *pos; - elf_section__for_each_rel(reldata, pos, pos_mem, idx, - nr_rel_entries) { - symidx = GELF_R_SYM(pos->r_info); - plt_offset += shdr_plt.sh_entsize; - gelf_getsym(syms, symidx, &sym); - snprintf(sympltname, sizeof(sympltname), - "%s@plt", elf_sym__name(&sym, symstrs)); - - f = symbol__new(plt_offset, shdr_plt.sh_entsize, - sympltname, self->sym_priv_size, 0, verbose); - if (!f) - return -1; - - dso__insert_symbol(self, f); - ++nr; - } - } else { - /* - * TODO: There are still one more shdr_rel_plt.sh_type - * I have to investigate, but probably should be ignored. - */ - } - - return nr; -} - -static int dso__load_sym(struct dso *self, int fd, const char *name, - symbol_filter_t filter, int verbose) -{ - Elf_Data *symstrs; - uint32_t nr_syms; - int err = -1; - uint32_t index; - GElf_Ehdr ehdr; - GElf_Shdr shdr; - Elf_Data *syms; - GElf_Sym sym; - Elf_Scn *sec, *sec_dynsym; - Elf *elf; - size_t dynsym_idx; - int nr = 0; - - elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); - if (elf == NULL) { - if (verbose) - fprintf(stderr, "%s: cannot read %s ELF file.\n", - __func__, name); - goto out_close; - } - - if (gelf_getehdr(elf, &ehdr) == NULL) { - if (verbose) - fprintf(stderr, "%s: cannot get elf header.\n", __func__); - goto out_elf_end; - } - - /* - * We need to check if we have a .dynsym, so that we can handle the - * .plt, synthesizing its symbols, that aren't on the symtabs (be it - * .dynsym or .symtab) - */ - sec_dynsym = elf_section_by_name(elf, &ehdr, &shdr, - ".dynsym", &dynsym_idx); - if (sec_dynsym != NULL) { - nr = dso__synthesize_plt_symbols(self, elf, &ehdr, - sec_dynsym, &shdr, - dynsym_idx, verbose); - if (nr < 0) - goto out_elf_end; - } - - /* - * But if we have a full .symtab (that is a superset of .dynsym) we - * should add the symbols not in the .dynsyn - */ - sec = elf_section_by_name(elf, &ehdr, &shdr, ".symtab", NULL); - if (sec == NULL) { - if (sec_dynsym == NULL) - goto out_elf_end; - - sec = sec_dynsym; - gelf_getshdr(sec, &shdr); - } - - syms = elf_getdata(sec, NULL); - if (syms == NULL) - goto out_elf_end; - - sec = elf_getscn(elf, shdr.sh_link); - if (sec == NULL) - goto out_elf_end; - - symstrs = elf_getdata(sec, NULL); - if (symstrs == NULL) - goto out_elf_end; - - nr_syms = shdr.sh_size / shdr.sh_entsize; - - memset(&sym, 0, sizeof(sym)); - - elf_symtab__for_each_symbol(syms, nr_syms, index, sym) { - struct symbol *f; - __u64 obj_start; - - if (!elf_sym__is_function(&sym)) - continue; - - sec = elf_getscn(elf, sym.st_shndx); - if (!sec) - goto out_elf_end; - - gelf_getshdr(sec, &shdr); - obj_start = sym.st_value; - - sym.st_value -= shdr.sh_addr - shdr.sh_offset; - - f = symbol__new(sym.st_value, sym.st_size, - elf_sym__name(&sym, symstrs), - self->sym_priv_size, obj_start, verbose); - if (!f) - goto out_elf_end; - - if (filter && filter(self, f)) - symbol__delete(f, self->sym_priv_size); - else { - dso__insert_symbol(self, f); - nr++; - } - } - - err = nr; -out_elf_end: - elf_end(elf); -out_close: - return err; -} - -int dso__load(struct dso *self, symbol_filter_t filter, int verbose) -{ - int size = strlen(self->name) + sizeof("/usr/lib/debug%s.debug"); - char *name = malloc(size); - int variant = 0; - int ret = -1; - int fd; - - if (!name) - return -1; - - if (strncmp(self->name, "/tmp/perf-", 10) == 0) - return dso__load_perf_map(self, filter, verbose); - -more: - do { - switch (variant) { - case 0: /* Fedora */ - snprintf(name, size, "/usr/lib/debug%s.debug", self->name); - break; - case 1: /* Ubuntu */ - snprintf(name, size, "/usr/lib/debug%s", self->name); - break; - case 2: /* Sane people */ - snprintf(name, size, "%s", self->name); - break; - - default: - goto out; - } - variant++; - - fd = open(name, O_RDONLY); - } while (fd < 0); - - ret = dso__load_sym(self, fd, name, filter, verbose); - close(fd); - - /* - * Some people seem to have debuginfo files _WITHOUT_ debug info!?!? - */ - if (!ret) - goto more; - -out: - free(name); - return ret; -} - -static int dso__load_vmlinux(struct dso *self, const char *vmlinux, - symbol_filter_t filter, int verbose) -{ - int err, fd = open(vmlinux, O_RDONLY); - - if (fd < 0) - return -1; - - err = dso__load_sym(self, fd, vmlinux, filter, verbose); - close(fd); - - return err; -} - -int dso__load_kernel(struct dso *self, const char *vmlinux, - symbol_filter_t filter, int verbose) -{ - int err = -1; - - if (vmlinux) - err = dso__load_vmlinux(self, vmlinux, filter, verbose); - - if (err) - err = dso__load_kallsyms(self, filter, verbose); - - return err; -} - -void symbol__init(void) -{ - elf_version(EV_CURRENT); -} diff --git a/trunk/tools/perf/util/symbol.h b/trunk/tools/perf/util/symbol.h deleted file mode 100644 index 0d1292bd8270..000000000000 --- a/trunk/tools/perf/util/symbol.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef _PERF_SYMBOL_ -#define _PERF_SYMBOL_ 1 - -#include -#include "list.h" -#include "rbtree.h" - -struct symbol { - struct rb_node rb_node; - __u64 start; - __u64 end; - __u64 obj_start; - __u64 hist_sum; - __u64 *hist; - char name[0]; -}; - -struct dso { - struct list_head node; - struct rb_root syms; - unsigned int sym_priv_size; - struct symbol *(*find_symbol)(struct dso *, __u64 ip); - char name[0]; -}; - -const char *sym_hist_filter; - -typedef int (*symbol_filter_t)(struct dso *self, struct symbol *sym); - -struct dso *dso__new(const char *name, unsigned int sym_priv_size); -void dso__delete(struct dso *self); - -static inline void *dso__sym_priv(struct dso *self, struct symbol *sym) -{ - return ((void *)sym) - self->sym_priv_size; -} - -struct symbol *dso__find_symbol(struct dso *self, __u64 ip); - -int dso__load_kernel(struct dso *self, const char *vmlinux, - symbol_filter_t filter, int verbose); -int dso__load(struct dso *self, symbol_filter_t filter, int verbose); - -size_t dso__fprintf(struct dso *self, FILE *fp); - -void symbol__init(void); -#endif /* _PERF_SYMBOL_ */ diff --git a/trunk/tools/perf/util/usage.c b/trunk/tools/perf/util/usage.c deleted file mode 100644 index e16bf9a707e8..000000000000 --- a/trunk/tools/perf/util/usage.c +++ /dev/null @@ -1,80 +0,0 @@ -/* - * GIT - The information manager from hell - * - * Copyright (C) Linus Torvalds, 2005 - */ -#include "util.h" - -static void report(const char *prefix, const char *err, va_list params) -{ - char msg[1024]; - vsnprintf(msg, sizeof(msg), err, params); - fprintf(stderr, " %s%s\n", prefix, msg); -} - -static NORETURN void usage_builtin(const char *err) -{ - fprintf(stderr, "\n Usage: %s\n", err); - exit(129); -} - -static NORETURN void die_builtin(const char *err, va_list params) -{ - report(" Fatal: ", err, params); - exit(128); -} - -static void error_builtin(const char *err, va_list params) -{ - report(" Error: ", err, params); -} - -static void warn_builtin(const char *warn, va_list params) -{ - report(" Warning: ", warn, params); -} - -/* If we are in a dlopen()ed .so write to a global variable would segfault - * (ugh), so keep things static. */ -static void (*usage_routine)(const char *err) NORETURN = usage_builtin; -static void (*die_routine)(const char *err, va_list params) NORETURN = die_builtin; -static void (*error_routine)(const char *err, va_list params) = error_builtin; -static void (*warn_routine)(const char *err, va_list params) = warn_builtin; - -void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN) -{ - die_routine = routine; -} - -void usage(const char *err) -{ - usage_routine(err); -} - -void die(const char *err, ...) -{ - va_list params; - - va_start(params, err); - die_routine(err, params); - va_end(params); -} - -int error(const char *err, ...) -{ - va_list params; - - va_start(params, err); - error_routine(err, params); - va_end(params); - return -1; -} - -void warning(const char *warn, ...) -{ - va_list params; - - va_start(params, warn); - warn_routine(warn, params); - va_end(params); -} diff --git a/trunk/tools/perf/util/util.h b/trunk/tools/perf/util/util.h deleted file mode 100644 index 76590a16c271..000000000000 --- a/trunk/tools/perf/util/util.h +++ /dev/null @@ -1,410 +0,0 @@ -#ifndef GIT_COMPAT_UTIL_H -#define GIT_COMPAT_UTIL_H - -#define _FILE_OFFSET_BITS 64 - -#ifndef FLEX_ARRAY -/* - * See if our compiler is known to support flexible array members. - */ -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) -# define FLEX_ARRAY /* empty */ -#elif defined(__GNUC__) -# if (__GNUC__ >= 3) -# define FLEX_ARRAY /* empty */ -# else -# define FLEX_ARRAY 0 /* older GNU extension */ -# endif -#endif - -/* - * Otherwise, default to safer but a bit wasteful traditional style - */ -#ifndef FLEX_ARRAY -# define FLEX_ARRAY 1 -#endif -#endif - -#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0])) - -#ifdef __GNUC__ -#define TYPEOF(x) (__typeof__(x)) -#else -#define TYPEOF(x) -#endif - -#define MSB(x, bits) ((x) & TYPEOF(x)(~0ULL << (sizeof(x) * 8 - (bits)))) -#define HAS_MULTI_BITS(i) ((i) & ((i) - 1)) /* checks if an integer has more than 1 bit set */ - -/* Approximation of the length of the decimal representation of this type. */ -#define decimal_length(x) ((int)(sizeof(x) * 2.56 + 0.5) + 1) - -#if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__USLC__) && !defined(_M_UNIX) -#define _XOPEN_SOURCE 600 /* glibc2 and AIX 5.3L need 500, OpenBSD needs 600 for S_ISLNK() */ -#define _XOPEN_SOURCE_EXTENDED 1 /* AIX 5.3L needs this */ -#endif -#define _ALL_SOURCE 1 -#define _GNU_SOURCE 1 -#define _BSD_SOURCE 1 - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifndef __MINGW32__ -#include -#include -#include -#include -#ifndef NO_SYS_SELECT_H -#include -#endif -#include -#include -#include -#include -#include -#include -#if defined(__CYGWIN__) -#undef _XOPEN_SOURCE -#include -#define _XOPEN_SOURCE 600 -#include "compat/cygwin.h" -#else -#undef _ALL_SOURCE /* AIX 5.3L defines a struct list with _ALL_SOURCE. */ -#include -#define _ALL_SOURCE 1 -#endif -#else /* __MINGW32__ */ -/* pull in Windows compatibility stuff */ -#include "compat/mingw.h" -#endif /* __MINGW32__ */ - -#ifndef NO_ICONV -#include -#endif - -#ifndef NO_OPENSSL -#include -#include -#endif - -/* On most systems would have given us this, but - * not on some systems (e.g. GNU/Hurd). - */ -#ifndef PATH_MAX -#define PATH_MAX 4096 -#endif - -#ifndef PRIuMAX -#define PRIuMAX "llu" -#endif - -#ifndef PRIu32 -#define PRIu32 "u" -#endif - -#ifndef PRIx32 -#define PRIx32 "x" -#endif - -#ifndef PATH_SEP -#define PATH_SEP ':' -#endif - -#ifndef STRIP_EXTENSION -#define STRIP_EXTENSION "" -#endif - -#ifndef has_dos_drive_prefix -#define has_dos_drive_prefix(path) 0 -#endif - -#ifndef is_dir_sep -#define is_dir_sep(c) ((c) == '/') -#endif - -#ifdef __GNUC__ -#define NORETURN __attribute__((__noreturn__)) -#else -#define NORETURN -#ifndef __attribute__ -#define __attribute__(x) -#endif -#endif - -/* General helper functions */ -extern void usage(const char *err) NORETURN; -extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2))); -extern int error(const char *err, ...) __attribute__((format (printf, 1, 2))); -extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2))); - -extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN); - -extern int prefixcmp(const char *str, const char *prefix); -extern time_t tm_to_time_t(const struct tm *tm); - -static inline const char *skip_prefix(const char *str, const char *prefix) -{ - size_t len = strlen(prefix); - return strncmp(str, prefix, len) ? NULL : str + len; -} - -#if defined(NO_MMAP) || defined(USE_WIN32_MMAP) - -#ifndef PROT_READ -#define PROT_READ 1 -#define PROT_WRITE 2 -#define MAP_PRIVATE 1 -#define MAP_FAILED ((void*)-1) -#endif - -#define mmap git_mmap -#define munmap git_munmap -extern void *git_mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset); -extern int git_munmap(void *start, size_t length); - -#else /* NO_MMAP || USE_WIN32_MMAP */ - -#include - -#endif /* NO_MMAP || USE_WIN32_MMAP */ - -#ifdef NO_MMAP - -/* This value must be multiple of (pagesize * 2) */ -#define DEFAULT_PACKED_GIT_WINDOW_SIZE (1 * 1024 * 1024) - -#else /* NO_MMAP */ - -/* This value must be multiple of (pagesize * 2) */ -#define DEFAULT_PACKED_GIT_WINDOW_SIZE \ - (sizeof(void*) >= 8 \ - ? 1 * 1024 * 1024 * 1024 \ - : 32 * 1024 * 1024) - -#endif /* NO_MMAP */ - -#ifdef NO_ST_BLOCKS_IN_STRUCT_STAT -#define on_disk_bytes(st) ((st).st_size) -#else -#define on_disk_bytes(st) ((st).st_blocks * 512) -#endif - -#define DEFAULT_PACKED_GIT_LIMIT \ - ((1024L * 1024L) * (sizeof(void*) >= 8 ? 8192 : 256)) - -#ifdef NO_PREAD -#define pread git_pread -extern ssize_t git_pread(int fd, void *buf, size_t count, off_t offset); -#endif -/* - * Forward decl that will remind us if its twin in cache.h changes. - * This function is used in compat/pread.c. But we can't include - * cache.h there. - */ -extern ssize_t read_in_full(int fd, void *buf, size_t count); - -#ifdef NO_SETENV -#define setenv gitsetenv -extern int gitsetenv(const char *, const char *, int); -#endif - -#ifdef NO_MKDTEMP -#define mkdtemp gitmkdtemp -extern char *gitmkdtemp(char *); -#endif - -#ifdef NO_UNSETENV -#define unsetenv gitunsetenv -extern void gitunsetenv(const char *); -#endif - -#ifdef NO_STRCASESTR -#define strcasestr gitstrcasestr -extern char *gitstrcasestr(const char *haystack, const char *needle); -#endif - -#ifdef NO_STRLCPY -#define strlcpy gitstrlcpy -extern size_t gitstrlcpy(char *, const char *, size_t); -#endif - -#ifdef NO_STRTOUMAX -#define strtoumax gitstrtoumax -extern uintmax_t gitstrtoumax(const char *, char **, int); -#endif - -#ifdef NO_HSTRERROR -#define hstrerror githstrerror -extern const char *githstrerror(int herror); -#endif - -#ifdef NO_MEMMEM -#define memmem gitmemmem -void *gitmemmem(const void *haystack, size_t haystacklen, - const void *needle, size_t needlelen); -#endif - -#ifdef FREAD_READS_DIRECTORIES -#ifdef fopen -#undef fopen -#endif -#define fopen(a,b) git_fopen(a,b) -extern FILE *git_fopen(const char*, const char*); -#endif - -#ifdef SNPRINTF_RETURNS_BOGUS -#define snprintf git_snprintf -extern int git_snprintf(char *str, size_t maxsize, - const char *format, ...); -#define vsnprintf git_vsnprintf -extern int git_vsnprintf(char *str, size_t maxsize, - const char *format, va_list ap); -#endif - -#ifdef __GLIBC_PREREQ -#if __GLIBC_PREREQ(2, 1) -#define HAVE_STRCHRNUL -#endif -#endif - -#ifndef HAVE_STRCHRNUL -#define strchrnul gitstrchrnul -static inline char *gitstrchrnul(const char *s, int c) -{ - while (*s && *s != c) - s++; - return (char *)s; -} -#endif - -/* - * Wrappers: - */ -extern char *xstrdup(const char *str); -extern void *xmalloc(size_t size); -extern void *xmemdupz(const void *data, size_t len); -extern char *xstrndup(const char *str, size_t len); -extern void *xrealloc(void *ptr, size_t size); -extern void *xcalloc(size_t nmemb, size_t size); -extern void *xmmap(void *start, size_t length, int prot, int flags, int fd, off_t offset); -extern ssize_t xread(int fd, void *buf, size_t len); -extern ssize_t xwrite(int fd, const void *buf, size_t len); -extern int xdup(int fd); -extern FILE *xfdopen(int fd, const char *mode); -extern int xmkstemp(char *template); - -static inline size_t xsize_t(off_t len) -{ - return (size_t)len; -} - -static inline int has_extension(const char *filename, const char *ext) -{ - size_t len = strlen(filename); - size_t extlen = strlen(ext); - return len > extlen && !memcmp(filename + len - extlen, ext, extlen); -} - -/* Sane ctype - no locale, and works with signed chars */ -#undef isascii -#undef isspace -#undef isdigit -#undef isalpha -#undef isalnum -#undef tolower -#undef toupper -extern unsigned char sane_ctype[256]; -#define GIT_SPACE 0x01 -#define GIT_DIGIT 0x02 -#define GIT_ALPHA 0x04 -#define GIT_GLOB_SPECIAL 0x08 -#define GIT_REGEX_SPECIAL 0x10 -#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) -#define isascii(x) (((x) & ~0x7f) == 0) -#define isspace(x) sane_istest(x,GIT_SPACE) -#define isdigit(x) sane_istest(x,GIT_DIGIT) -#define isalpha(x) sane_istest(x,GIT_ALPHA) -#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) -#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL) -#define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL) -#define tolower(x) sane_case((unsigned char)(x), 0x20) -#define toupper(x) sane_case((unsigned char)(x), 0) - -static inline int sane_case(int x, int high) -{ - if (sane_istest(x, GIT_ALPHA)) - x = (x & ~0x20) | high; - return x; -} - -static inline int strtoul_ui(char const *s, int base, unsigned int *result) -{ - unsigned long ul; - char *p; - - errno = 0; - ul = strtoul(s, &p, base); - if (errno || *p || p == s || (unsigned int) ul != ul) - return -1; - *result = ul; - return 0; -} - -static inline int strtol_i(char const *s, int base, int *result) -{ - long ul; - char *p; - - errno = 0; - ul = strtol(s, &p, base); - if (errno || *p || p == s || (int) ul != ul) - return -1; - *result = ul; - return 0; -} - -#ifdef INTERNAL_QSORT -void git_qsort(void *base, size_t nmemb, size_t size, - int(*compar)(const void *, const void *)); -#define qsort git_qsort -#endif - -#ifndef DIR_HAS_BSD_GROUP_SEMANTICS -# define FORCE_DIR_SET_GID S_ISGID -#else -# define FORCE_DIR_SET_GID 0 -#endif - -#ifdef NO_NSEC -#undef USE_NSEC -#define ST_CTIME_NSEC(st) 0 -#define ST_MTIME_NSEC(st) 0 -#else -#ifdef USE_ST_TIMESPEC -#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctimespec.tv_nsec)) -#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtimespec.tv_nsec)) -#else -#define ST_CTIME_NSEC(st) ((unsigned int)((st).st_ctim.tv_nsec)) -#define ST_MTIME_NSEC(st) ((unsigned int)((st).st_mtim.tv_nsec)) -#endif -#endif - -#endif diff --git a/trunk/tools/perf/util/wrapper.c b/trunk/tools/perf/util/wrapper.c deleted file mode 100644 index 6350d65f6d9e..000000000000 --- a/trunk/tools/perf/util/wrapper.c +++ /dev/null @@ -1,206 +0,0 @@ -/* - * Various trivial helper wrappers around standard functions - */ -#include "cache.h" - -/* - * There's no pack memory to release - but stay close to the Git - * version so wrap this away: - */ -static inline void release_pack_memory(size_t size, int flag) -{ -} - -char *xstrdup(const char *str) -{ - char *ret = strdup(str); - if (!ret) { - release_pack_memory(strlen(str) + 1, -1); - ret = strdup(str); - if (!ret) - die("Out of memory, strdup failed"); - } - return ret; -} - -void *xmalloc(size_t size) -{ - void *ret = malloc(size); - if (!ret && !size) - ret = malloc(1); - if (!ret) { - release_pack_memory(size, -1); - ret = malloc(size); - if (!ret && !size) - ret = malloc(1); - if (!ret) - die("Out of memory, malloc failed"); - } -#ifdef XMALLOC_POISON - memset(ret, 0xA5, size); -#endif - return ret; -} - -/* - * xmemdupz() allocates (len + 1) bytes of memory, duplicates "len" bytes of - * "data" to the allocated memory, zero terminates the allocated memory, - * and returns a pointer to the allocated memory. If the allocation fails, - * the program dies. - */ -void *xmemdupz(const void *data, size_t len) -{ - char *p = xmalloc(len + 1); - memcpy(p, data, len); - p[len] = '\0'; - return p; -} - -char *xstrndup(const char *str, size_t len) -{ - char *p = memchr(str, '\0', len); - return xmemdupz(str, p ? p - str : len); -} - -void *xrealloc(void *ptr, size_t size) -{ - void *ret = realloc(ptr, size); - if (!ret && !size) - ret = realloc(ptr, 1); - if (!ret) { - release_pack_memory(size, -1); - ret = realloc(ptr, size); - if (!ret && !size) - ret = realloc(ptr, 1); - if (!ret) - die("Out of memory, realloc failed"); - } - return ret; -} - -void *xcalloc(size_t nmemb, size_t size) -{ - void *ret = calloc(nmemb, size); - if (!ret && (!nmemb || !size)) - ret = calloc(1, 1); - if (!ret) { - release_pack_memory(nmemb * size, -1); - ret = calloc(nmemb, size); - if (!ret && (!nmemb || !size)) - ret = calloc(1, 1); - if (!ret) - die("Out of memory, calloc failed"); - } - return ret; -} - -void *xmmap(void *start, size_t length, - int prot, int flags, int fd, off_t offset) -{ - void *ret = mmap(start, length, prot, flags, fd, offset); - if (ret == MAP_FAILED) { - if (!length) - return NULL; - release_pack_memory(length, fd); - ret = mmap(start, length, prot, flags, fd, offset); - if (ret == MAP_FAILED) - die("Out of memory? mmap failed: %s", strerror(errno)); - } - return ret; -} - -/* - * xread() is the same a read(), but it automatically restarts read() - * operations with a recoverable error (EAGAIN and EINTR). xread() - * DOES NOT GUARANTEE that "len" bytes is read even if the data is available. - */ -ssize_t xread(int fd, void *buf, size_t len) -{ - ssize_t nr; - while (1) { - nr = read(fd, buf, len); - if ((nr < 0) && (errno == EAGAIN || errno == EINTR)) - continue; - return nr; - } -} - -/* - * xwrite() is the same a write(), but it automatically restarts write() - * operations with a recoverable error (EAGAIN and EINTR). xwrite() DOES NOT - * GUARANTEE that "len" bytes is written even if the operation is successful. - */ -ssize_t xwrite(int fd, const void *buf, size_t len) -{ - ssize_t nr; - while (1) { - nr = write(fd, buf, len); - if ((nr < 0) && (errno == EAGAIN || errno == EINTR)) - continue; - return nr; - } -} - -ssize_t read_in_full(int fd, void *buf, size_t count) -{ - char *p = buf; - ssize_t total = 0; - - while (count > 0) { - ssize_t loaded = xread(fd, p, count); - if (loaded <= 0) - return total ? total : loaded; - count -= loaded; - p += loaded; - total += loaded; - } - - return total; -} - -ssize_t write_in_full(int fd, const void *buf, size_t count) -{ - const char *p = buf; - ssize_t total = 0; - - while (count > 0) { - ssize_t written = xwrite(fd, p, count); - if (written < 0) - return -1; - if (!written) { - errno = ENOSPC; - return -1; - } - count -= written; - p += written; - total += written; - } - - return total; -} - -int xdup(int fd) -{ - int ret = dup(fd); - if (ret < 0) - die("dup failed: %s", strerror(errno)); - return ret; -} - -FILE *xfdopen(int fd, const char *mode) -{ - FILE *stream = fdopen(fd, mode); - if (stream == NULL) - die("Out of memory? fdopen failed: %s", strerror(errno)); - return stream; -} - -int xmkstemp(char *template) -{ - int fd; - - fd = mkstemp(template); - if (fd < 0) - die("Unable to create temporary file: %s", strerror(errno)); - return fd; -}