Skip to content

Commit

Permalink
Merge branch 'perf/hw-branch-sampling' into perf/core
Browse files Browse the repository at this point in the history
Merge reason: The 'perf record -b' hardware branch sampling feature is ready for upstream.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
  • Loading branch information
Ingo Molnar committed Mar 12, 2012
2 parents f9b4eeb + 24bff2d commit bea95c1
Show file tree
Hide file tree
Showing 33 changed files with 2,017 additions and 243 deletions.
4 changes: 4 additions & 0 deletions arch/alpha/kernel/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,10 @@ static int alpha_pmu_event_init(struct perf_event *event)
{
int err;

/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;

switch (event->attr.type) {
case PERF_TYPE_RAW:
case PERF_TYPE_HARDWARE:
Expand Down
4 changes: 4 additions & 0 deletions arch/arm/kernel/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,10 @@ static int armpmu_event_init(struct perf_event *event)
int err = 0;
atomic_t *active_events = &armpmu->active_events;

/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;

if (armpmu->map_event(event) == -ENOENT)
return -ENOENT;

Expand Down
4 changes: 4 additions & 0 deletions arch/mips/kernel/perf_event_mipsxx.c
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,10 @@ static int mipspmu_event_init(struct perf_event *event)
{
int err = 0;

/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;

switch (event->attr.type) {
case PERF_TYPE_RAW:
case PERF_TYPE_HARDWARE:
Expand Down
4 changes: 4 additions & 0 deletions arch/powerpc/kernel/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -1084,6 +1084,10 @@ static int power_pmu_event_init(struct perf_event *event)
if (!ppmu)
return -ENOENT;

/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;

switch (event->attr.type) {
case PERF_TYPE_HARDWARE:
ev = event->attr.config;
Expand Down
4 changes: 4 additions & 0 deletions arch/sh/kernel/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,10 @@ static int sh_pmu_event_init(struct perf_event *event)
{
int err;

/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;

switch (event->attr.type) {
case PERF_TYPE_RAW:
case PERF_TYPE_HW_CACHE:
Expand Down
4 changes: 4 additions & 0 deletions arch/sparc/kernel/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -1105,6 +1105,10 @@ static int sparc_pmu_event_init(struct perf_event *event)
if (atomic_read(&nmi_active) < 0)
return -ENODEV;

/* does not support taken branch sampling */
if (has_branch_stack(event))
return -EOPNOTSUPP;

switch (attr->type) {
case PERF_TYPE_HARDWARE:
if (attr->config >= sparc_pmu->max_events)
Expand Down
7 changes: 7 additions & 0 deletions arch/x86/include/asm/msr-index.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,13 @@
#define MSR_OFFCORE_RSP_0 0x000001a6
#define MSR_OFFCORE_RSP_1 0x000001a7

#define MSR_LBR_SELECT 0x000001c8
#define MSR_LBR_TOS 0x000001c9
#define MSR_LBR_NHM_FROM 0x00000680
#define MSR_LBR_NHM_TO 0x000006c0
#define MSR_LBR_CORE_FROM 0x00000040
#define MSR_LBR_CORE_TO 0x00000060

#define MSR_IA32_PEBS_ENABLE 0x000003f1
#define MSR_IA32_DS_AREA 0x00000600
#define MSR_IA32_PERF_CAPABILITIES 0x00000345
Expand Down
85 changes: 78 additions & 7 deletions arch/x86/kernel/cpu/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,36 @@ int x86_setup_perfctr(struct perf_event *event)
return 0;
}

/*
* check that branch_sample_type is compatible with
* settings needed for precise_ip > 1 which implies
* using the LBR to capture ALL taken branches at the
* priv levels of the measurement
*/
static inline int precise_br_compat(struct perf_event *event)
{
u64 m = event->attr.branch_sample_type;
u64 b = 0;

/* must capture all branches */
if (!(m & PERF_SAMPLE_BRANCH_ANY))
return 0;

m &= PERF_SAMPLE_BRANCH_KERNEL | PERF_SAMPLE_BRANCH_USER;

if (!event->attr.exclude_user)
b |= PERF_SAMPLE_BRANCH_USER;

if (!event->attr.exclude_kernel)
b |= PERF_SAMPLE_BRANCH_KERNEL;

/*
* ignore PERF_SAMPLE_BRANCH_HV, not supported on x86
*/

return m == b;
}

int x86_pmu_hw_config(struct perf_event *event)
{
if (event->attr.precise_ip) {
Expand All @@ -369,6 +399,36 @@ int x86_pmu_hw_config(struct perf_event *event)

if (event->attr.precise_ip > precise)
return -EOPNOTSUPP;
/*
* check that PEBS LBR correction does not conflict with
* whatever the user is asking with attr->branch_sample_type
*/
if (event->attr.precise_ip > 1) {
u64 *br_type = &event->attr.branch_sample_type;

if (has_branch_stack(event)) {
if (!precise_br_compat(event))
return -EOPNOTSUPP;

/* branch_sample_type is compatible */

} else {
/*
* user did not specify branch_sample_type
*
* For PEBS fixups, we capture all
* the branches at the priv level of the
* event.
*/
*br_type = PERF_SAMPLE_BRANCH_ANY;

if (!event->attr.exclude_user)
*br_type |= PERF_SAMPLE_BRANCH_USER;

if (!event->attr.exclude_kernel)
*br_type |= PERF_SAMPLE_BRANCH_KERNEL;
}
}
}

/*
Expand Down Expand Up @@ -426,6 +486,10 @@ static int __x86_pmu_event_init(struct perf_event *event)
/* mark unused */
event->hw.extra_reg.idx = EXTRA_REG_NONE;

/* mark not used */
event->hw.extra_reg.idx = EXTRA_REG_NONE;
event->hw.branch_reg.idx = EXTRA_REG_NONE;

return x86_pmu.hw_config(event);
}

Expand Down Expand Up @@ -1607,25 +1671,32 @@ static const struct attribute_group *x86_pmu_attr_groups[] = {
NULL,
};

static void x86_pmu_flush_branch_stack(void)
{
if (x86_pmu.flush_branch_stack)
x86_pmu.flush_branch_stack();
}

static struct pmu pmu = {
.pmu_enable = x86_pmu_enable,
.pmu_disable = x86_pmu_disable,
.pmu_enable = x86_pmu_enable,
.pmu_disable = x86_pmu_disable,

.attr_groups = x86_pmu_attr_groups,

.event_init = x86_pmu_event_init,

.add = x86_pmu_add,
.del = x86_pmu_del,
.start = x86_pmu_start,
.stop = x86_pmu_stop,
.read = x86_pmu_read,
.add = x86_pmu_add,
.del = x86_pmu_del,
.start = x86_pmu_start,
.stop = x86_pmu_stop,
.read = x86_pmu_read,

.start_txn = x86_pmu_start_txn,
.cancel_txn = x86_pmu_cancel_txn,
.commit_txn = x86_pmu_commit_txn,

.event_idx = x86_pmu_event_idx,
.flush_branch_stack = x86_pmu_flush_branch_stack,
};

void perf_update_user_clock(struct perf_event_mmap_page *userpg, u64 now)
Expand Down
19 changes: 19 additions & 0 deletions arch/x86/kernel/cpu/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ enum extra_reg_type {

EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
EXTRA_REG_LBR = 2, /* lbr_select */

EXTRA_REG_MAX /* number of entries needed */
};
Expand Down Expand Up @@ -130,6 +131,8 @@ struct cpu_hw_events {
void *lbr_context;
struct perf_branch_stack lbr_stack;
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
struct er_account *lbr_sel;
u64 br_sel;

/*
* Intel host/guest exclude bits
Expand Down Expand Up @@ -344,6 +347,7 @@ struct x86_pmu {
void (*cpu_starting)(int cpu);
void (*cpu_dying)(int cpu);
void (*cpu_dead)(int cpu);
void (*flush_branch_stack)(void);

/*
* Intel Arch Perfmon v2+
Expand All @@ -365,6 +369,8 @@ struct x86_pmu {
*/
unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
int lbr_nr; /* hardware stack size */
u64 lbr_sel_mask; /* LBR_SELECT valid bits */
const int *lbr_sel_map; /* lbr_select mappings */

/*
* Extra registers for events
Expand Down Expand Up @@ -478,6 +484,15 @@ extern struct event_constraint emptyconstraint;

extern struct event_constraint unconstrained;

static inline bool kernel_ip(unsigned long ip)
{
#ifdef CONFIG_X86_32
return ip > PAGE_OFFSET;
#else
return (long)ip < 0;
#endif
}

#ifdef CONFIG_CPU_SUP_AMD

int amd_pmu_init(void);
Expand Down Expand Up @@ -558,6 +573,10 @@ void intel_pmu_lbr_init_nhm(void);

void intel_pmu_lbr_init_atom(void);

void intel_pmu_lbr_init_snb(void);

int intel_pmu_setup_lbr_filter(struct perf_event *event);

int p4_pmu_init(void);

int p6_pmu_init(void);
Expand Down
3 changes: 3 additions & 0 deletions arch/x86/kernel/cpu/perf_event_amd.c
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,9 @@ static int amd_pmu_hw_config(struct perf_event *event)
if (ret)
return ret;

if (has_branch_stack(event))
return -EOPNOTSUPP;

if (event->attr.exclude_host && event->attr.exclude_guest)
/*
* When HO == GO == 1 the hardware treats that as GO == HO == 0
Expand Down
Loading

0 comments on commit bea95c1

Please sign in to comment.