Skip to content

Commit

Permalink
perf/x86/intel/pt: Prevent redundant WRMSRs
Browse files Browse the repository at this point in the history
With recent optimizations to AUX and PT buffer management code (high order
AUX allocations, opportunistic Single Range Output), it is far more likely
now that the output MSRs won't need reprogramming on every sched-in.

To avoid needless WRMSRs of those registers, cache their values and only
write them when needed.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Link: https://lkml.kernel.org/r/20191105082701.78442-3-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Alexander Shishkin authored and Ingo Molnar committed Nov 13, 2019
1 parent 6706384 commit 295c52e
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 12 deletions.
25 changes: 16 additions & 9 deletions arch/x86/events/intel/pt.c
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,7 @@ static inline phys_addr_t topa_pfn(struct topa *topa)

static void pt_config_buffer(struct pt_buffer *buf)
{
struct pt *pt = this_cpu_ptr(&pt_ctx);
u64 reg, mask;
void *base;

Expand All @@ -617,11 +618,17 @@ static void pt_config_buffer(struct pt_buffer *buf)
mask = (u64)buf->cur_idx;
}

wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, virt_to_phys(base));
reg = virt_to_phys(base);
if (pt->output_base != reg) {
pt->output_base = reg;
wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, reg);
}

reg = 0x7f | (mask << 7) | ((u64)buf->output_off << 32);

wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
if (pt->output_mask != reg) {
pt->output_mask = reg;
wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
}
}

/**
Expand Down Expand Up @@ -930,21 +937,21 @@ static void pt_handle_status(struct pt *pt)
*/
static void pt_read_offset(struct pt_buffer *buf)
{
u64 offset, base;
struct pt *pt = this_cpu_ptr(&pt_ctx);
struct topa_page *tp;

if (!buf->single) {
rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base);
tp = phys_to_virt(base);
rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, pt->output_base);
tp = phys_to_virt(pt->output_base);
buf->cur = &tp->topa;
}

rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset);
rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, pt->output_mask);
/* offset within current output region */
buf->output_off = offset >> 32;
buf->output_off = pt->output_mask >> 32;
/* index of current output region within this table */
if (!buf->single)
buf->cur_idx = (offset & 0xffffff80) >> 7;
buf->cur_idx = (pt->output_mask & 0xffffff80) >> 7;
}

static struct topa_entry *
Expand Down
10 changes: 7 additions & 3 deletions arch/x86/events/intel/pt.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,16 +113,20 @@ struct pt_filters {

/**
* struct pt - per-cpu pt context
* @handle: perf output handle
* @handle: perf output handle
* @filters: last configured filters
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
* @vmx_on: 1 if VMX is ON on this cpu
* @handle_nmi: do handle PT PMI on this cpu, there's an active event
* @vmx_on: 1 if VMX is ON on this cpu
* @output_base: cached RTIT_OUTPUT_BASE MSR value
* @output_mask: cached RTIT_OUTPUT_MASK MSR value
*/
struct pt {
struct perf_output_handle handle;
struct pt_filters filters;
int handle_nmi;
int vmx_on;
u64 output_base;
u64 output_mask;
};

#endif /* __INTEL_PT_H__ */

0 comments on commit 295c52e

Please sign in to comment.