Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 191050
b: refs/heads/master
c: caff2be
h: refs/heads/master
v: v3
  • Loading branch information
Peter Zijlstra authored and Ingo Molnar committed Mar 10, 2010
1 parent ea55265 commit 4d3432e
Show file tree
Hide file tree
Showing 5 changed files with 271 additions and 1 deletion.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 69fef0d2e2c2c049ef4207a52e78b50d527bd85a
refs/heads/master: caff2befffe899e63df5cc760b7ed01cfd902685
18 changes: 18 additions & 0 deletions trunk/arch/x86/kernel/cpu/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ struct amd_nb {
struct event_constraint event_constraints[X86_PMC_IDX_MAX];
};

#define MAX_LBR_ENTRIES 16

struct cpu_hw_events {
/*
* Generic x86 PMC bits
Expand All @@ -69,6 +71,14 @@ struct cpu_hw_events {
struct debug_store *ds;
u64 pebs_enabled;

/*
* Intel LBR bits
*/
int lbr_users;
void *lbr_context;
struct perf_branch_stack lbr_stack;
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];

/*
* AMD specific bits
*/
Expand Down Expand Up @@ -159,6 +169,13 @@ struct x86_pmu {
int pebs_record_size;
void (*drain_pebs)(struct pt_regs *regs);
struct event_constraint *pebs_constraints;

/*
* Intel LBR
*/
unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
int lbr_nr; /* hardware stack size */
int lbr_format; /* hardware format */
};

static struct x86_pmu x86_pmu __read_mostly;
Expand Down Expand Up @@ -1237,6 +1254,7 @@ int hw_perf_group_sched_in(struct perf_event *leader,

#include "perf_event_amd.c"
#include "perf_event_p6.c"
#include "perf_event_intel_lbr.c"
#include "perf_event_intel_ds.c"
#include "perf_event_intel.c"

Expand Down
13 changes: 13 additions & 0 deletions trunk/arch/x86/kernel/cpu/perf_event_intel.c
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,7 @@ static void intel_pmu_disable_all(void)
intel_pmu_disable_bts();

intel_pmu_pebs_disable_all();
intel_pmu_lbr_disable_all();
}

static void intel_pmu_enable_all(void)
Expand All @@ -499,6 +500,7 @@ static void intel_pmu_enable_all(void)
}

intel_pmu_pebs_enable_all();
intel_pmu_lbr_enable_all();
}

static inline u64 intel_pmu_get_status(void)
Expand Down Expand Up @@ -674,6 +676,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
inc_irq_stat(apic_perf_irqs);
ack = status;

intel_pmu_lbr_read();

/*
* PEBS overflow sets bit 62 in the global status register
*/
Expand Down Expand Up @@ -848,6 +852,8 @@ static __init int intel_pmu_init(void)
memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
sizeof(hw_cache_event_ids));

intel_pmu_lbr_init_core();

x86_pmu.event_constraints = intel_core2_event_constraints;
pr_cont("Core2 events, ");
break;
Expand All @@ -857,13 +863,18 @@ static __init int intel_pmu_init(void)
memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
sizeof(hw_cache_event_ids));

intel_pmu_lbr_init_nhm();

x86_pmu.event_constraints = intel_nehalem_event_constraints;
pr_cont("Nehalem/Corei7 events, ");
break;

case 28: /* Atom */
memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
sizeof(hw_cache_event_ids));

intel_pmu_lbr_init_atom();

x86_pmu.event_constraints = intel_gen_event_constraints;
pr_cont("Atom events, ");
break;
Expand All @@ -873,6 +884,8 @@ static __init int intel_pmu_init(void)
memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
sizeof(hw_cache_event_ids));

intel_pmu_lbr_init_nhm();

x86_pmu.event_constraints = intel_westmere_event_constraints;
pr_cont("Westmere events, ");
break;
Expand Down
228 changes: 228 additions & 0 deletions trunk/arch/x86/kernel/cpu/perf_event_intel_lbr.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
#ifdef CONFIG_CPU_SUP_INTEL

enum {
LBR_FORMAT_32 = 0x00,
LBR_FORMAT_LIP = 0x01,
LBR_FORMAT_EIP = 0x02,
LBR_FORMAT_EIP_FLAGS = 0x03,
};

/*
* We only support LBR implementations that have FREEZE_LBRS_ON_PMI
* otherwise it becomes near impossible to get a reliable stack.
*/

#define X86_DEBUGCTL_LBR (1 << 0)
#define X86_DEBUGCTL_FREEZE_LBRS_ON_PMI (1 << 11)

static void __intel_pmu_lbr_enable(void)
{
u64 debugctl;

rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
debugctl |= (X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI);
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
}

static void __intel_pmu_lbr_disable(void)
{
u64 debugctl;

rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
debugctl &= ~(X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI);
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
}

static void intel_pmu_lbr_reset_32(void)
{
int i;

for (i = 0; i < x86_pmu.lbr_nr; i++)
wrmsrl(x86_pmu.lbr_from + i, 0);
}

static void intel_pmu_lbr_reset_64(void)
{
int i;

for (i = 0; i < x86_pmu.lbr_nr; i++) {
wrmsrl(x86_pmu.lbr_from + i, 0);
wrmsrl(x86_pmu.lbr_to + i, 0);
}
}

static void intel_pmu_lbr_reset(void)
{
if (x86_pmu.lbr_format == LBR_FORMAT_32)
intel_pmu_lbr_reset_32();
else
intel_pmu_lbr_reset_64();
}

static void intel_pmu_lbr_enable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);

if (!x86_pmu.lbr_nr)
return;

WARN_ON(cpuc->enabled);

/*
* Reset the LBR stack if this is the first LBR user or
* we changed task context so as to avoid data leaks.
*/

if (!cpuc->lbr_users ||
(event->ctx->task && cpuc->lbr_context != event->ctx)) {
intel_pmu_lbr_reset();
cpuc->lbr_context = event->ctx;
}

cpuc->lbr_users++;
}

static void intel_pmu_lbr_disable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);

if (!x86_pmu.lbr_nr)
return;

cpuc->lbr_users--;

BUG_ON(cpuc->lbr_users < 0);
WARN_ON(cpuc->enabled);
}

static void intel_pmu_lbr_enable_all(void)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);

if (cpuc->lbr_users)
__intel_pmu_lbr_enable();
}

static void intel_pmu_lbr_disable_all(void)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);

if (cpuc->lbr_users)
__intel_pmu_lbr_disable();
}

static inline u64 intel_pmu_lbr_tos(void)
{
u64 tos;

rdmsrl(x86_pmu.lbr_tos, tos);

return tos;
}

static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
{
unsigned long mask = x86_pmu.lbr_nr - 1;
u64 tos = intel_pmu_lbr_tos();
int i;

for (i = 0; i < x86_pmu.lbr_nr; i++, tos--) {
unsigned long lbr_idx = (tos - i) & mask;
union {
struct {
u32 from;
u32 to;
};
u64 lbr;
} msr_lastbranch;

rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);

cpuc->lbr_entries[i].from = msr_lastbranch.from;
cpuc->lbr_entries[i].to = msr_lastbranch.to;
cpuc->lbr_entries[i].flags = 0;
}
cpuc->lbr_stack.nr = i;
}

#define LBR_FROM_FLAG_MISPRED (1ULL << 63)

/*
* Due to lack of segmentation in Linux the effective address (offset)
* is the same as the linear address, allowing us to merge the LIP and EIP
* LBR formats.
*/
static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
{
unsigned long mask = x86_pmu.lbr_nr - 1;
u64 tos = intel_pmu_lbr_tos();
int i;

for (i = 0; i < x86_pmu.lbr_nr; i++, tos--) {
unsigned long lbr_idx = (tos - i) & mask;
u64 from, to, flags = 0;

rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
rdmsrl(x86_pmu.lbr_to + lbr_idx, to);

if (x86_pmu.lbr_format == LBR_FORMAT_EIP_FLAGS) {
flags = !!(from & LBR_FROM_FLAG_MISPRED);
from = (u64)((((s64)from) << 1) >> 1);
}

cpuc->lbr_entries[i].from = from;
cpuc->lbr_entries[i].to = to;
cpuc->lbr_entries[i].flags = flags;
}
cpuc->lbr_stack.nr = i;
}

static void intel_pmu_lbr_read(void)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);

if (!cpuc->lbr_users)
return;

if (x86_pmu.lbr_format == LBR_FORMAT_32)
intel_pmu_lbr_read_32(cpuc);
else
intel_pmu_lbr_read_64(cpuc);
}

static int intel_pmu_lbr_format(void)
{
u64 capabilities;

rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
return capabilities & 0x1f;
}

static void intel_pmu_lbr_init_core(void)
{
x86_pmu.lbr_format = intel_pmu_lbr_format();
x86_pmu.lbr_nr = 4;
x86_pmu.lbr_tos = 0x01c9;
x86_pmu.lbr_from = 0x40;
x86_pmu.lbr_to = 0x60;
}

static void intel_pmu_lbr_init_nhm(void)
{
x86_pmu.lbr_format = intel_pmu_lbr_format();
x86_pmu.lbr_nr = 16;
x86_pmu.lbr_tos = 0x01c9;
x86_pmu.lbr_from = 0x680;
x86_pmu.lbr_to = 0x6c0;
}

static void intel_pmu_lbr_init_atom(void)
{
x86_pmu.lbr_format = intel_pmu_lbr_format();
x86_pmu.lbr_nr = 8;
x86_pmu.lbr_tos = 0x01c9;
x86_pmu.lbr_from = 0x40;
x86_pmu.lbr_to = 0x60;
}

#endif /* CONFIG_CPU_SUP_INTEL */
11 changes: 11 additions & 0 deletions trunk/include/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,17 @@ struct perf_raw_record {
void *data;
};

struct perf_branch_entry {
__u64 from;
__u64 to;
__u64 flags;
};

struct perf_branch_stack {
__u64 nr;
struct perf_branch_entry entries[0];
};

struct task_struct;

/**
Expand Down

0 comments on commit 4d3432e

Please sign in to comment.