Skip to content

Commit

Permalink
perf/x86: Add Intel LBR sharing logic
Browse files Browse the repository at this point in the history
The Intel LBR on some recent processor is capable
of filtering branches by type. The filter is configurable
via the LBR_SELECT MSR register.

There are limitation on how this register can be used.

On Nehalem/Westmere, the LBR_SELECT is shared by the two HT threads
when HT is on. It is private to each core when HT is off.

On SandyBridge, the LBR_SELECT register is private to each thread
when HT is on. It is private to each core when HT is off.

The kernel must manage the sharing of LBR_SELECT. It allows
multiple users on the same logical CPU to use LBR_SELECT as
long as they program it with the same value. Across sibling
CPUs (HT threads), the same restriction applies on NHM/WSM.

This patch implements this sharing logic by leveraging the
mechanism put in place for managing the offcore_response
shared MSR.

We modify __intel_shared_reg_get_constraints() to cause
x86_get_event_constraint() to be called because LBR may
be associated with events that may be counter constrained.

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1328826068-11713-4-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
  • Loading branch information
Stephane Eranian authored and Ingo Molnar committed Mar 5, 2012
1 parent 225ce53 commit b36817e
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 26 deletions.
4 changes: 4 additions & 0 deletions arch/x86/kernel/cpu/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,10 @@ static int __x86_pmu_event_init(struct perf_event *event)
/* mark unused */
event->hw.extra_reg.idx = EXTRA_REG_NONE;

/* mark not used */
event->hw.extra_reg.idx = EXTRA_REG_NONE;
event->hw.branch_reg.idx = EXTRA_REG_NONE;

return x86_pmu.hw_config(event);
}

Expand Down
4 changes: 4 additions & 0 deletions arch/x86/kernel/cpu/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ enum extra_reg_type {

EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
EXTRA_REG_LBR = 2, /* lbr_select */

EXTRA_REG_MAX /* number of entries needed */
};
Expand Down Expand Up @@ -130,6 +131,7 @@ struct cpu_hw_events {
void *lbr_context;
struct perf_branch_stack lbr_stack;
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
struct er_account *lbr_sel;

/*
* Intel host/guest exclude bits
Expand Down Expand Up @@ -342,6 +344,8 @@ struct x86_pmu {
*/
unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
int lbr_nr; /* hardware stack size */
u64 lbr_sel_mask; /* LBR_SELECT valid bits */
const int *lbr_sel_map; /* lbr_select mappings */

/*
* Extra registers for events
Expand Down
70 changes: 44 additions & 26 deletions arch/x86/kernel/cpu/perf_event_intel.c
Original file line number Diff line number Diff line change
Expand Up @@ -1123,17 +1123,17 @@ static bool intel_try_alt_er(struct perf_event *event, int orig_idx)
*/
static struct event_constraint *
__intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event)
struct perf_event *event,
struct hw_perf_event_extra *reg)
{
struct event_constraint *c = &emptyconstraint;
struct hw_perf_event_extra *reg = &event->hw.extra_reg;
struct er_account *era;
unsigned long flags;
int orig_idx = reg->idx;

/* already allocated shared msr */
if (reg->alloc)
return &unconstrained;
return NULL; /* call x86_get_event_constraint() */

again:
era = &cpuc->shared_regs->regs[reg->idx];
Expand All @@ -1156,14 +1156,10 @@ __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
reg->alloc = 1;

/*
* All events using extra_reg are unconstrained.
* Avoids calling x86_get_event_constraints()
*
* Must revisit if extra_reg controlling events
* ever have constraints. Worst case we go through
* the regular event constraint table.
* need to call x86_get_event_constraint()
* to check if associated event has constraints
*/
c = &unconstrained;
c = NULL;
} else if (intel_try_alt_er(event, orig_idx)) {
raw_spin_unlock_irqrestore(&era->lock, flags);
goto again;
Expand Down Expand Up @@ -1200,11 +1196,23 @@ static struct event_constraint *
intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
struct event_constraint *c = NULL;

if (event->hw.extra_reg.idx != EXTRA_REG_NONE)
c = __intel_shared_reg_get_constraints(cpuc, event);

struct event_constraint *c = NULL, *d;
struct hw_perf_event_extra *xreg, *breg;

xreg = &event->hw.extra_reg;
if (xreg->idx != EXTRA_REG_NONE) {
c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
if (c == &emptyconstraint)
return c;
}
breg = &event->hw.branch_reg;
if (breg->idx != EXTRA_REG_NONE) {
d = __intel_shared_reg_get_constraints(cpuc, event, breg);
if (d == &emptyconstraint) {
__intel_shared_reg_put_constraints(cpuc, xreg);
c = d;
}
}
return c;
}

Expand Down Expand Up @@ -1252,6 +1260,10 @@ intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
reg = &event->hw.extra_reg;
if (reg->idx != EXTRA_REG_NONE)
__intel_shared_reg_put_constraints(cpuc, reg);

reg = &event->hw.branch_reg;
if (reg->idx != EXTRA_REG_NONE)
__intel_shared_reg_put_constraints(cpuc, reg);
}

static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
Expand Down Expand Up @@ -1431,7 +1443,7 @@ static int intel_pmu_cpu_prepare(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);

if (!x86_pmu.extra_regs)
if (!(x86_pmu.extra_regs || x86_pmu.lbr_sel_map))
return NOTIFY_OK;

cpuc->shared_regs = allocate_shared_regs(cpu);
Expand All @@ -1453,22 +1465,28 @@ static void intel_pmu_cpu_starting(int cpu)
*/
intel_pmu_lbr_reset();

if (!cpuc->shared_regs || (x86_pmu.er_flags & ERF_NO_HT_SHARING))
cpuc->lbr_sel = NULL;

if (!cpuc->shared_regs)
return;

for_each_cpu(i, topology_thread_cpumask(cpu)) {
struct intel_shared_regs *pc;
if (!(x86_pmu.er_flags & ERF_NO_HT_SHARING)) {
for_each_cpu(i, topology_thread_cpumask(cpu)) {
struct intel_shared_regs *pc;

pc = per_cpu(cpu_hw_events, i).shared_regs;
if (pc && pc->core_id == core_id) {
cpuc->kfree_on_online = cpuc->shared_regs;
cpuc->shared_regs = pc;
break;
pc = per_cpu(cpu_hw_events, i).shared_regs;
if (pc && pc->core_id == core_id) {
cpuc->kfree_on_online = cpuc->shared_regs;
cpuc->shared_regs = pc;
break;
}
}
cpuc->shared_regs->core_id = core_id;
cpuc->shared_regs->refcnt++;
}

cpuc->shared_regs->core_id = core_id;
cpuc->shared_regs->refcnt++;
if (x86_pmu.lbr_sel_map)
cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
}

static void intel_pmu_cpu_dying(int cpu)
Expand Down

0 comments on commit b36817e

Please sign in to comment.