Skip to content

Commit

Permalink
perf_counter: allow for data addresses to be recorded
Browse files Browse the repository at this point in the history
Paul suggested we allow for data addresses to be recorded along with
the traditional IPs as power can provide these.

For now, only the software pagefault events provide data addresses,
but in the future power might as well for some events.

x86 doesn't seem capable of providing this atm.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <20090408130409.394816925@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
  • Loading branch information
Peter Zijlstra authored and Ingo Molnar committed Apr 8, 2009
1 parent 4d85545 commit 78f13e9
Show file tree
Hide file tree
Showing 6 changed files with 49 additions and 31 deletions.
2 changes: 1 addition & 1 deletion arch/powerpc/kernel/perf_counter.c
Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,7 @@ static void record_and_restart(struct perf_counter *counter, long val,
* Finally record data if requested.
*/
if (record)
perf_counter_overflow(counter, 1, regs);
perf_counter_overflow(counter, 1, regs, 0);
}

/*
Expand Down
8 changes: 5 additions & 3 deletions arch/powerpc/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
die("Weird page fault", regs, SIGSEGV);
}

perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs);
perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address);

/* When running in the kernel we expect faults to occur only to
* addresses in user space. All other faults represent errors in the
Expand Down Expand Up @@ -312,7 +312,8 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
}
if (ret & VM_FAULT_MAJOR) {
current->maj_flt++;
perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs);
perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0,
regs, address);
#ifdef CONFIG_PPC_SMLPAR
if (firmware_has_feature(FW_FEATURE_CMO)) {
preempt_disable();
Expand All @@ -322,7 +323,8 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
#endif
} else {
current->min_flt++;
perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs);
perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0,
regs, address);
}
up_read(&mm->mmap_sem);
return 0;
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/cpu/perf_counter.c
Original file line number Diff line number Diff line change
Expand Up @@ -800,7 +800,7 @@ static int __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
continue;

perf_save_and_restart(counter);
if (perf_counter_overflow(counter, nmi, regs))
if (perf_counter_overflow(counter, nmi, regs, 0))
__pmc_generic_disable(counter, &counter->hw, bit);
}

Expand Down
8 changes: 5 additions & 3 deletions arch/x86/mm/fault.c
Original file line number Diff line number Diff line change
Expand Up @@ -1045,7 +1045,7 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)
if (unlikely(error_code & PF_RSVD))
pgtable_bad(regs, error_code, address);

perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs);
perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs, address);

/*
* If we're in an interrupt, have no user context or are running
Expand Down Expand Up @@ -1142,10 +1142,12 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code)

if (fault & VM_FAULT_MAJOR) {
tsk->maj_flt++;
perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs);
perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0,
regs, address);
} else {
tsk->min_flt++;
perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs);
perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0,
regs, address);
}

check_v8086_mode(regs, address, tsk);
Expand Down
14 changes: 8 additions & 6 deletions include/linux/perf_counter.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,9 @@ enum perf_counter_record_format {
PERF_RECORD_IP = 1U << 0,
PERF_RECORD_TID = 1U << 1,
PERF_RECORD_TIME = 1U << 2,
PERF_RECORD_GROUP = 1U << 3,
PERF_RECORD_CALLCHAIN = 1U << 4,
PERF_RECORD_ADDR = 1U << 3,
PERF_RECORD_GROUP = 1U << 4,
PERF_RECORD_CALLCHAIN = 1U << 5,
};

/*
Expand Down Expand Up @@ -251,6 +252,7 @@ enum perf_event_type {
* { u64 ip; } && PERF_RECORD_IP
* { u32 pid, tid; } && PERF_RECORD_TID
* { u64 time; } && PERF_RECORD_TIME
* { u64 addr; } && PERF_RECORD_ADDR
*
* { u64 nr;
* { u64 event, val; } cnt[nr]; } && PERF_RECORD_GROUP
Expand Down Expand Up @@ -537,7 +539,7 @@ extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
extern void perf_counter_update_userpage(struct perf_counter *counter);

extern int perf_counter_overflow(struct perf_counter *counter,
int nmi, struct pt_regs *regs);
int nmi, struct pt_regs *regs, u64 addr);
/*
* Return 1 for a software counter, 0 for a hardware counter
*/
Expand All @@ -547,7 +549,7 @@ static inline int is_software_counter(struct perf_counter *counter)
perf_event_type(&counter->hw_event) != PERF_TYPE_HARDWARE;
}

extern void perf_swcounter_event(u32, u64, int, struct pt_regs *);
extern void perf_swcounter_event(u32, u64, int, struct pt_regs *, u64);

extern void perf_counter_mmap(unsigned long addr, unsigned long len,
unsigned long pgoff, struct file *file);
Expand Down Expand Up @@ -584,8 +586,8 @@ static inline int perf_counter_task_disable(void) { return -EINVAL; }
static inline int perf_counter_task_enable(void) { return -EINVAL; }

static inline void
perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs) { }

perf_swcounter_event(u32 event, u64 nr, int nmi,
struct pt_regs *regs, u64 addr) { }

static inline void
perf_counter_mmap(unsigned long addr, unsigned long len,
Expand Down
46 changes: 29 additions & 17 deletions kernel/perf_counter.c
Original file line number Diff line number Diff line change
Expand Up @@ -800,7 +800,7 @@ void perf_counter_task_sched_out(struct task_struct *task, int cpu)
update_context_time(ctx);

regs = task_pt_regs(task);
perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs);
perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs, 0);
__perf_counter_sched_out(ctx, cpuctx);

cpuctx->task_ctx = NULL;
Expand Down Expand Up @@ -1810,7 +1810,7 @@ static void perf_output_end(struct perf_output_handle *handle)
}

static void perf_counter_output(struct perf_counter *counter,
int nmi, struct pt_regs *regs)
int nmi, struct pt_regs *regs, u64 addr)
{
int ret;
u64 record_type = counter->hw_event.record_type;
Expand Down Expand Up @@ -1860,6 +1860,11 @@ static void perf_counter_output(struct perf_counter *counter,
header.size += sizeof(u64);
}

if (record_type & PERF_RECORD_ADDR) {
header.type |= PERF_RECORD_ADDR;
header.size += sizeof(u64);
}

if (record_type & PERF_RECORD_GROUP) {
header.type |= PERF_RECORD_GROUP;
header.size += sizeof(u64) +
Expand Down Expand Up @@ -1892,6 +1897,9 @@ static void perf_counter_output(struct perf_counter *counter,
if (record_type & PERF_RECORD_TIME)
perf_output_put(&handle, time);

if (record_type & PERF_RECORD_ADDR)
perf_output_put(&handle, addr);

if (record_type & PERF_RECORD_GROUP) {
struct perf_counter *leader, *sub;
u64 nr = counter->nr_siblings;
Expand Down Expand Up @@ -2158,7 +2166,7 @@ void perf_counter_munmap(unsigned long addr, unsigned long len,
*/

int perf_counter_overflow(struct perf_counter *counter,
int nmi, struct pt_regs *regs)
int nmi, struct pt_regs *regs, u64 addr)
{
int events = atomic_read(&counter->event_limit);
int ret = 0;
Expand All @@ -2175,7 +2183,7 @@ int perf_counter_overflow(struct perf_counter *counter,
perf_counter_disable(counter);
}

perf_counter_output(counter, nmi, regs);
perf_counter_output(counter, nmi, regs, addr);
return ret;
}

Expand Down Expand Up @@ -2240,7 +2248,7 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
regs = task_pt_regs(current);

if (regs) {
if (perf_counter_overflow(counter, 0, regs))
if (perf_counter_overflow(counter, 0, regs, 0))
ret = HRTIMER_NORESTART;
}

Expand All @@ -2250,11 +2258,11 @@ static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
}

static void perf_swcounter_overflow(struct perf_counter *counter,
int nmi, struct pt_regs *regs)
int nmi, struct pt_regs *regs, u64 addr)
{
perf_swcounter_update(counter);
perf_swcounter_set_period(counter);
if (perf_counter_overflow(counter, nmi, regs))
if (perf_counter_overflow(counter, nmi, regs, addr))
/* soft-disable the counter */
;

Expand Down Expand Up @@ -2286,16 +2294,17 @@ static int perf_swcounter_match(struct perf_counter *counter,
}

static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
int nmi, struct pt_regs *regs)
int nmi, struct pt_regs *regs, u64 addr)
{
int neg = atomic64_add_negative(nr, &counter->hw.count);
if (counter->hw.irq_period && !neg)
perf_swcounter_overflow(counter, nmi, regs);
perf_swcounter_overflow(counter, nmi, regs, addr);
}

static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
enum perf_event_types type, u32 event,
u64 nr, int nmi, struct pt_regs *regs)
u64 nr, int nmi, struct pt_regs *regs,
u64 addr)
{
struct perf_counter *counter;

Expand All @@ -2305,7 +2314,7 @@ static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
rcu_read_lock();
list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
if (perf_swcounter_match(counter, type, event, regs))
perf_swcounter_add(counter, nr, nmi, regs);
perf_swcounter_add(counter, nr, nmi, regs, addr);
}
rcu_read_unlock();
}
Expand All @@ -2325,7 +2334,8 @@ static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx)
}

static void __perf_swcounter_event(enum perf_event_types type, u32 event,
u64 nr, int nmi, struct pt_regs *regs)
u64 nr, int nmi, struct pt_regs *regs,
u64 addr)
{
struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
int *recursion = perf_swcounter_recursion_context(cpuctx);
Expand All @@ -2336,10 +2346,11 @@ static void __perf_swcounter_event(enum perf_event_types type, u32 event,
(*recursion)++;
barrier();

perf_swcounter_ctx_event(&cpuctx->ctx, type, event, nr, nmi, regs);
perf_swcounter_ctx_event(&cpuctx->ctx, type, event,
nr, nmi, regs, addr);
if (cpuctx->task_ctx) {
perf_swcounter_ctx_event(cpuctx->task_ctx, type, event,
nr, nmi, regs);
nr, nmi, regs, addr);
}

barrier();
Expand All @@ -2349,9 +2360,10 @@ static void __perf_swcounter_event(enum perf_event_types type, u32 event,
put_cpu_var(perf_cpu_context);
}

void perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs)
void
perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
{
__perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs);
__perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs, addr);
}

static void perf_swcounter_read(struct perf_counter *counter)
Expand Down Expand Up @@ -2548,7 +2560,7 @@ void perf_tpcounter_event(int event_id)
if (!regs)
regs = task_pt_regs(current);

__perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs);
__perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs, 0);
}

extern int ftrace_profile_enable(int);
Expand Down

0 comments on commit 78f13e9

Please sign in to comment.