Skip to content

Commit

Permalink
perf: Add support for supplementary event registers
Browse files Browse the repository at this point in the history
Change logs against Andi's original version:

- Extends perf_event_attr:config to config{,1,2} (Peter Zijlstra)
- Fixed a major event scheduling issue. There cannot be a ref++ on an
  event that has already done ref++ once and without calling
  put_constraint() in between. (Stephane Eranian)
- Use thread_cpumask for percore allocation. (Lin Ming)
- Use MSR names in the extra reg lists. (Lin Ming)
- Remove redundant "c = NULL" in intel_percore_constraints
- Fix comment of perf_event_attr::config1

Intel Nehalem/Westmere have a special OFFCORE_RESPONSE event
that can be used to monitor any offcore accesses from a core.
This is a very useful event for various tunings, and it's
also needed to implement the generic LLC-* events correctly.

Unfortunately this event requires programming a mask in a separate
register. And worse this separate register is per core, not per
CPU thread.

This patch:

- Teaches perf_events that OFFCORE_RESPONSE needs extra parameters.
  The extra parameters are passed by user space in the
  perf_event_attr::config1 field.

- Adds support to the Intel perf_event core to schedule per
  core resources. This adds fairly generic infrastructure that
  can be also used for other per core resources.
  The basic code has is patterned after the similar AMD northbridge
  constraints code.

Thanks to Stephane Eranian who pointed out some problems
in the original version and suggested improvements.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1299119690-13991-2-git-send-email-ming.m.lin@intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
  • Loading branch information
Andi Kleen authored and Ingo Molnar committed Mar 4, 2011
1 parent 17e3162 commit a7e3ed1
Show file tree
Hide file tree
Showing 4 changed files with 276 additions and 2 deletions.
3 changes: 3 additions & 0 deletions arch/x86/include/asm/msr-index.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@
#define MSR_IA32_MCG_STATUS 0x0000017a
#define MSR_IA32_MCG_CTL 0x0000017b

#define MSR_OFFCORE_RSP_0 0x000001a6
#define MSR_OFFCORE_RSP_1 0x000001a7

#define MSR_IA32_PEBS_ENABLE 0x000003f1
#define MSR_IA32_DS_AREA 0x00000600
#define MSR_IA32_PERF_CAPABILITIES 0x00000345
Expand Down
64 changes: 64 additions & 0 deletions arch/x86/kernel/cpu/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ struct amd_nb {
struct event_constraint event_constraints[X86_PMC_IDX_MAX];
};

struct intel_percore;

#define MAX_LBR_ENTRIES 16

struct cpu_hw_events {
Expand Down Expand Up @@ -127,6 +129,13 @@ struct cpu_hw_events {
struct perf_branch_stack lbr_stack;
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];

/*
* Intel percore register state.
* Coordinate shared resources between HT threads.
*/
int percore_used; /* Used by this CPU? */
struct intel_percore *per_core;

/*
* AMD specific bits
*/
Expand Down Expand Up @@ -177,6 +186,28 @@ struct cpu_hw_events {
#define for_each_event_constraint(e, c) \
for ((e) = (c); (e)->weight; (e)++)

/*
* Extra registers for specific events.
* Some events need large masks and require external MSRs.
* Define a mapping to these extra registers.
*/
struct extra_reg {
unsigned int event;
unsigned int msr;
u64 config_mask;
u64 valid_mask;
};

#define EVENT_EXTRA_REG(e, ms, m, vm) { \
.event = (e), \
.msr = (ms), \
.config_mask = (m), \
.valid_mask = (vm), \
}
#define INTEL_EVENT_EXTRA_REG(event, msr, vm) \
EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm)
#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0)

union perf_capabilities {
struct {
u64 lbr_format : 6;
Expand Down Expand Up @@ -221,6 +252,7 @@ struct x86_pmu {
void (*put_event_constraints)(struct cpu_hw_events *cpuc,
struct perf_event *event);
struct event_constraint *event_constraints;
struct event_constraint *percore_constraints;
void (*quirks)(void);
int perfctr_second_write;

Expand Down Expand Up @@ -249,6 +281,11 @@ struct x86_pmu {
*/
unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */
int lbr_nr; /* hardware stack size */

/*
* Extra registers for events
*/
struct extra_reg *extra_regs;
};

static struct x86_pmu x86_pmu __read_mostly;
Expand Down Expand Up @@ -341,6 +378,31 @@ static inline unsigned int x86_pmu_event_addr(int index)
return x86_pmu.perfctr + x86_pmu_addr_offset(index);
}

/*
* Find and validate any extra registers to set up.
*/
static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
{
struct extra_reg *er;

event->hw.extra_reg = 0;
event->hw.extra_config = 0;

if (!x86_pmu.extra_regs)
return 0;

for (er = x86_pmu.extra_regs; er->msr; er++) {
if (er->event != (config & er->config_mask))
continue;
if (event->attr.config1 & ~er->valid_mask)
return -EINVAL;
event->hw.extra_reg = er->msr;
event->hw.extra_config = event->attr.config1;
break;
}
return 0;
}

static atomic_t active_events;
static DEFINE_MUTEX(pmc_reserve_mutex);

Expand Down Expand Up @@ -665,6 +727,8 @@ static void x86_pmu_disable(struct pmu *pmu)
static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
u64 enable_mask)
{
if (hwc->extra_reg)
wrmsrl(hwc->extra_reg, hwc->extra_config);
wrmsrl(hwc->config_base, hwc->config | enable_mask);
}

Expand Down
Loading

0 comments on commit a7e3ed1

Please sign in to comment.