Skip to content

Commit

Permalink
perf_events: Update Intel extra regs shared constraints management
Browse files Browse the repository at this point in the history
This patch improves the code managing the extra shared registers
used for offcore_response events on Intel Nehalem/Westmere. The
idea is to use static allocation instead of dynamic allocation.
This simplifies greatly the get and put constraint routines for
those events.

The patch also renames per_core to shared_regs because the same
data structure gets used whether or not HT is on. When HT is
off, those events still need to coordination because they use
a extra MSR that has to be shared within an event group.

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20110606145703.GA7258@quad
Signed-off-by: Ingo Molnar <mingo@elte.hu>
  • Loading branch information
Stephane Eranian authored and Ingo Molnar committed Jul 1, 2011
1 parent a7ac67e commit efc9f05
Show file tree
Hide file tree
Showing 3 changed files with 200 additions and 152 deletions.
78 changes: 61 additions & 17 deletions arch/x86/kernel/cpu/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,29 @@ do { \
} while (0)
#endif

/*
* | NHM/WSM | SNB |
* register -------------------------------
* | HT | no HT | HT | no HT |
*-----------------------------------------
* offcore | core | core | cpu | core |
* lbr_sel | core | core | cpu | core |
* ld_lat | cpu | core | cpu | core |
*-----------------------------------------
*
* Given that there is a small number of shared regs,
* we can pre-allocate their slot in the per-cpu
* per-core reg tables.
*/
enum extra_reg_type {
EXTRA_REG_NONE = -1, /* not used */

EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */

EXTRA_REG_MAX /* number of entries needed */
};

/*
* best effort, GUP based copy_from_user() that assumes IRQ or NMI context
*/
Expand Down Expand Up @@ -132,11 +155,10 @@ struct cpu_hw_events {
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];

/*
* Intel percore register state.
* Coordinate shared resources between HT threads.
* manage shared (per-core, per-cpu) registers
* used on Intel NHM/WSM/SNB
*/
int percore_used; /* Used by this CPU? */
struct intel_percore *per_core;
struct intel_shared_regs *shared_regs;

/*
* AMD specific bits
Expand Down Expand Up @@ -186,27 +208,46 @@ struct cpu_hw_events {
#define for_each_event_constraint(e, c) \
for ((e) = (c); (e)->weight; (e)++)

/*
* Per register state.
*/
struct er_account {
raw_spinlock_t lock; /* per-core: protect structure */
u64 config; /* extra MSR config */
u64 reg; /* extra MSR number */
atomic_t ref; /* reference count */
};

/*
* Extra registers for specific events.
*
* Some events need large masks and require external MSRs.
* Define a mapping to these extra registers.
* Those extra MSRs end up being shared for all events on
* a PMU and sometimes between PMU of sibling HT threads.
* In either case, the kernel needs to handle conflicting
* accesses to those extra, shared, regs. The data structure
* to manage those registers is stored in cpu_hw_event.
*/
struct extra_reg {
unsigned int event;
unsigned int msr;
u64 config_mask;
u64 valid_mask;
int idx; /* per_xxx->regs[] reg index */
};

#define EVENT_EXTRA_REG(e, ms, m, vm) { \
#define EVENT_EXTRA_REG(e, ms, m, vm, i) { \
.event = (e), \
.msr = (ms), \
.config_mask = (m), \
.valid_mask = (vm), \
.idx = EXTRA_REG_##i \
}
#define INTEL_EVENT_EXTRA_REG(event, msr, vm) \
EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm)
#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0)

#define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \
EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx)

#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0)

union perf_capabilities {
struct {
Expand Down Expand Up @@ -253,7 +294,6 @@ struct x86_pmu {
void (*put_event_constraints)(struct cpu_hw_events *cpuc,
struct perf_event *event);
struct event_constraint *event_constraints;
struct event_constraint *percore_constraints;
void (*quirks)(void);
int perfctr_second_write;

Expand Down Expand Up @@ -400,10 +440,10 @@ static inline unsigned int x86_pmu_event_addr(int index)
*/
static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
{
struct hw_perf_event_extra *reg;
struct extra_reg *er;

event->hw.extra_reg = 0;
event->hw.extra_config = 0;
reg = &event->hw.extra_reg;

if (!x86_pmu.extra_regs)
return 0;
Expand All @@ -413,8 +453,10 @@ static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
continue;
if (event->attr.config1 & ~er->valid_mask)
return -EINVAL;
event->hw.extra_reg = er->msr;
event->hw.extra_config = event->attr.config1;

reg->idx = er->idx;
reg->config = event->attr.config1;
reg->reg = er->msr;
break;
}
return 0;
Expand Down Expand Up @@ -713,6 +755,9 @@ static int __x86_pmu_event_init(struct perf_event *event)
event->hw.last_cpu = -1;
event->hw.last_tag = ~0ULL;

/* mark unused */
event->hw.extra_reg.idx = EXTRA_REG_NONE;

return x86_pmu.hw_config(event);
}

Expand Down Expand Up @@ -754,8 +799,8 @@ static void x86_pmu_disable(struct pmu *pmu)
static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
u64 enable_mask)
{
if (hwc->extra_reg)
wrmsrl(hwc->extra_reg, hwc->extra_config);
if (hwc->extra_reg.reg)
wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config);
wrmsrl(hwc->config_base, hwc->config | enable_mask);
}

Expand Down Expand Up @@ -1692,7 +1737,6 @@ static int validate_group(struct perf_event *event)
fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
if (!fake_cpuc)
goto out;

/*
* the event is not yet connected with its
* siblings therefore we must first collect
Expand Down
Loading

0 comments on commit efc9f05

Please sign in to comment.