Skip to content

Commit

Permalink
Merge tag 'perf_urgent_for_v5.17_rc2' of git://git.kernel.org/pub/scm…
Browse files Browse the repository at this point in the history
…/linux/kernel/git/tip/tip

Pull perf fixes from Borislav Petkov:

 - Add support for accessing the general purpose counters on Alder Lake
   via MMIO

 - Add new LBR format v7 support which is v5 modulo TSX

 - Fix counter enumeration on Alder Lake hybrids

 - Overhaul how context time updates are done and get rid of
   perf_event::shadow_ctx_time.

 - The usual amount of fixes: event mask correction, supported event
   types reporting, etc.

* tag 'perf_urgent_for_v5.17_rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/perf: Avoid warning for Arch LBR without XSAVE
  perf/x86/intel/uncore: Add IMC uncore support for ADL
  perf/x86/intel/lbr: Add static_branch for LBR INFO flags
  perf/x86/intel/lbr: Support LBR format V7
  perf/x86/rapl: fix AMD event handling
  perf/x86/intel/uncore: Fix CAS_COUNT_WRITE issue for ICX
  perf/x86/intel: Add a quirk for the calculation of the number of counters on Alder Lake
  perf: Fix perf_event_read_local() time
  • Loading branch information
Linus Torvalds committed Jan 23, 2022
2 parents e783362 + 8c16dc0 commit 0f9e042
Show file tree
Hide file tree
Showing 12 changed files with 501 additions and 189 deletions.
15 changes: 15 additions & 0 deletions arch/x86/events/intel/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -6236,6 +6236,19 @@ __init int intel_pmu_init(void)
pmu->num_counters = x86_pmu.num_counters;
pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
}

/*
* Quirk: For some Alder Lake machine, when all E-cores are disabled in
* a BIOS, the leaf 0xA will enumerate all counters of P-cores. However,
* the X86_FEATURE_HYBRID_CPU is still set. The above codes will
* mistakenly add extra counters for P-cores. Correct the number of
* counters here.
*/
if ((pmu->num_counters > 8) || (pmu->num_counters_fixed > 4)) {
pmu->num_counters = x86_pmu.num_counters;
pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
}

pmu->max_pebs_events = min_t(unsigned, MAX_PEBS_EVENTS, pmu->num_counters);
pmu->unconstrained = (struct event_constraint)
__EVENT_CONSTRAINT(0, (1ULL << pmu->num_counters) - 1,
Expand Down Expand Up @@ -6340,6 +6353,8 @@ __init int intel_pmu_init(void)
}

if (x86_pmu.lbr_nr) {
intel_pmu_lbr_init();

pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);

/* only support branch_stack snapshot for perfmon >= v2 */
Expand Down
168 changes: 101 additions & 67 deletions arch/x86/events/intel/lbr.c
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,6 @@

#include "../perf_event.h"

static const enum {
LBR_EIP_FLAGS = 1,
LBR_TSX = 2,
} lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
[LBR_FORMAT_EIP_FLAGS] = LBR_EIP_FLAGS,
[LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
};

/*
* Intel LBR_SELECT bits
* Intel Vol3a, April 2011, Section 16.7 Table 16-10
Expand Down Expand Up @@ -243,7 +235,7 @@ void intel_pmu_lbr_reset_64(void)
for (i = 0; i < x86_pmu.lbr_nr; i++) {
wrmsrl(x86_pmu.lbr_from + i, 0);
wrmsrl(x86_pmu.lbr_to + i, 0);
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
if (x86_pmu.lbr_has_info)
wrmsrl(x86_pmu.lbr_info + i, 0);
}
}
Expand Down Expand Up @@ -305,11 +297,10 @@ enum {
*/
static inline bool lbr_from_signext_quirk_needed(void)
{
int lbr_format = x86_pmu.intel_cap.lbr_format;
bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
boot_cpu_has(X86_FEATURE_RTM);

return !tsx_support && (lbr_desc[lbr_format] & LBR_TSX);
return !tsx_support && x86_pmu.lbr_has_tsx;
}

static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
Expand Down Expand Up @@ -427,12 +418,12 @@ rdlbr_all(struct lbr_entry *lbr, unsigned int idx, bool need_info)

void intel_pmu_lbr_restore(void *ctx)
{
bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct x86_perf_task_context *task_ctx = ctx;
int i;
unsigned lbr_idx, mask;
bool need_info = x86_pmu.lbr_has_info;
u64 tos = task_ctx->tos;
unsigned lbr_idx, mask;
int i;

mask = x86_pmu.lbr_nr - 1;
for (i = 0; i < task_ctx->valid_lbrs; i++) {
Expand All @@ -444,7 +435,7 @@ void intel_pmu_lbr_restore(void *ctx)
lbr_idx = (tos - i) & mask;
wrlbr_from(lbr_idx, 0);
wrlbr_to(lbr_idx, 0);
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO)
if (need_info)
wrlbr_info(lbr_idx, 0);
}

Expand Down Expand Up @@ -519,9 +510,9 @@ static void __intel_pmu_lbr_restore(void *ctx)

void intel_pmu_lbr_save(void *ctx)
{
bool need_info = x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO;
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct x86_perf_task_context *task_ctx = ctx;
bool need_info = x86_pmu.lbr_has_info;
unsigned lbr_idx, mask;
u64 tos;
int i;
Expand Down Expand Up @@ -816,7 +807,6 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
{
bool need_info = false, call_stack = false;
unsigned long mask = x86_pmu.lbr_nr - 1;
int lbr_format = x86_pmu.intel_cap.lbr_format;
u64 tos = intel_pmu_lbr_tos();
int i;
int out = 0;
Expand All @@ -831,9 +821,7 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
for (i = 0; i < num; i++) {
unsigned long lbr_idx = (tos - i) & mask;
u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
int skip = 0;
u16 cycles = 0;
int lbr_flags = lbr_desc[lbr_format];

from = rdlbr_from(lbr_idx, NULL);
to = rdlbr_to(lbr_idx, NULL);
Expand All @@ -845,37 +833,39 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
if (call_stack && !from)
break;

if (lbr_format == LBR_FORMAT_INFO && need_info) {
u64 info;

info = rdlbr_info(lbr_idx, NULL);
mis = !!(info & LBR_INFO_MISPRED);
pred = !mis;
in_tx = !!(info & LBR_INFO_IN_TX);
abort = !!(info & LBR_INFO_ABORT);
cycles = (info & LBR_INFO_CYCLES);
}

if (lbr_format == LBR_FORMAT_TIME) {
mis = !!(from & LBR_FROM_FLAG_MISPRED);
pred = !mis;
skip = 1;
cycles = ((to >> 48) & LBR_INFO_CYCLES);

to = (u64)((((s64)to) << 16) >> 16);
}

if (lbr_flags & LBR_EIP_FLAGS) {
mis = !!(from & LBR_FROM_FLAG_MISPRED);
pred = !mis;
skip = 1;
}
if (lbr_flags & LBR_TSX) {
in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
abort = !!(from & LBR_FROM_FLAG_ABORT);
skip = 3;
if (x86_pmu.lbr_has_info) {
if (need_info) {
u64 info;

info = rdlbr_info(lbr_idx, NULL);
mis = !!(info & LBR_INFO_MISPRED);
pred = !mis;
cycles = (info & LBR_INFO_CYCLES);
if (x86_pmu.lbr_has_tsx) {
in_tx = !!(info & LBR_INFO_IN_TX);
abort = !!(info & LBR_INFO_ABORT);
}
}
} else {
int skip = 0;

if (x86_pmu.lbr_from_flags) {
mis = !!(from & LBR_FROM_FLAG_MISPRED);
pred = !mis;
skip = 1;
}
if (x86_pmu.lbr_has_tsx) {
in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
abort = !!(from & LBR_FROM_FLAG_ABORT);
skip = 3;
}
from = (u64)((((s64)from) << skip) >> skip);

if (x86_pmu.lbr_to_cycles) {
cycles = ((to >> 48) & LBR_INFO_CYCLES);
to = (u64)((((s64)to) << 16) >> 16);
}
}
from = (u64)((((s64)from) << skip) >> skip);

/*
* Some CPUs report duplicated abort records,
Expand Down Expand Up @@ -903,37 +893,40 @@ void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
cpuc->lbr_stack.hw_idx = tos;
}

static DEFINE_STATIC_KEY_FALSE(x86_lbr_mispred);
static DEFINE_STATIC_KEY_FALSE(x86_lbr_cycles);
static DEFINE_STATIC_KEY_FALSE(x86_lbr_type);

static __always_inline int get_lbr_br_type(u64 info)
{
if (!static_cpu_has(X86_FEATURE_ARCH_LBR) || !x86_pmu.lbr_br_type)
return 0;
int type = 0;

return (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET;
if (static_branch_likely(&x86_lbr_type))
type = (info & LBR_INFO_BR_TYPE) >> LBR_INFO_BR_TYPE_OFFSET;

return type;
}

static __always_inline bool get_lbr_mispred(u64 info)
{
if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred)
return 0;
bool mispred = 0;

return !!(info & LBR_INFO_MISPRED);
}
if (static_branch_likely(&x86_lbr_mispred))
mispred = !!(info & LBR_INFO_MISPRED);

static __always_inline bool get_lbr_predicted(u64 info)
{
if (static_cpu_has(X86_FEATURE_ARCH_LBR) && !x86_pmu.lbr_mispred)
return 0;

return !(info & LBR_INFO_MISPRED);
return mispred;
}

static __always_inline u16 get_lbr_cycles(u64 info)
{
u16 cycles = info & LBR_INFO_CYCLES;

if (static_cpu_has(X86_FEATURE_ARCH_LBR) &&
!(x86_pmu.lbr_timed_lbr && info & LBR_INFO_CYC_CNT_VALID))
return 0;
(!static_branch_likely(&x86_lbr_cycles) ||
!(info & LBR_INFO_CYC_CNT_VALID)))
cycles = 0;

return info & LBR_INFO_CYCLES;
return cycles;
}

static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
Expand Down Expand Up @@ -961,7 +954,7 @@ static void intel_pmu_store_lbr(struct cpu_hw_events *cpuc,
e->from = from;
e->to = to;
e->mispred = get_lbr_mispred(info);
e->predicted = get_lbr_predicted(info);
e->predicted = !e->mispred;
e->in_tx = !!(info & LBR_INFO_IN_TX);
e->abort = !!(info & LBR_INFO_ABORT);
e->cycles = get_lbr_cycles(info);
Expand Down Expand Up @@ -1120,7 +1113,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)

if ((br_type & PERF_SAMPLE_BRANCH_NO_CYCLES) &&
(br_type & PERF_SAMPLE_BRANCH_NO_FLAGS) &&
(x86_pmu.intel_cap.lbr_format == LBR_FORMAT_INFO))
x86_pmu.lbr_has_info)
reg->config |= LBR_NO_INFO;

return 0;
Expand Down Expand Up @@ -1706,6 +1699,38 @@ void intel_pmu_lbr_init_knl(void)
x86_pmu.intel_cap.lbr_format = LBR_FORMAT_EIP_FLAGS;
}

void intel_pmu_lbr_init(void)
{
switch (x86_pmu.intel_cap.lbr_format) {
case LBR_FORMAT_EIP_FLAGS2:
x86_pmu.lbr_has_tsx = 1;
fallthrough;
case LBR_FORMAT_EIP_FLAGS:
x86_pmu.lbr_from_flags = 1;
break;

case LBR_FORMAT_INFO:
x86_pmu.lbr_has_tsx = 1;
fallthrough;
case LBR_FORMAT_INFO2:
x86_pmu.lbr_has_info = 1;
break;

case LBR_FORMAT_TIME:
x86_pmu.lbr_from_flags = 1;
x86_pmu.lbr_to_cycles = 1;
break;
}

if (x86_pmu.lbr_has_info) {
/*
* Only used in combination with baseline pebs.
*/
static_branch_enable(&x86_lbr_mispred);
static_branch_enable(&x86_lbr_cycles);
}
}

/*
* LBR state size is variable based on the max number of registers.
* This calculates the expected state size, which should match
Expand All @@ -1726,6 +1751,9 @@ static bool is_arch_lbr_xsave_available(void)
* Check the LBR state with the corresponding software structure.
* Disable LBR XSAVES support if the size doesn't match.
*/
if (xfeature_size(XFEATURE_LBR) == 0)
return false;

if (WARN_ON(xfeature_size(XFEATURE_LBR) != get_lbr_state_size()))
return false;

Expand Down Expand Up @@ -1765,6 +1793,12 @@ void __init intel_pmu_arch_lbr_init(void)
x86_pmu.lbr_br_type = ecx.split.lbr_br_type;
x86_pmu.lbr_nr = lbr_nr;

if (x86_pmu.lbr_mispred)
static_branch_enable(&x86_lbr_mispred);
if (x86_pmu.lbr_timed_lbr)
static_branch_enable(&x86_lbr_cycles);
if (x86_pmu.lbr_br_type)
static_branch_enable(&x86_lbr_type);

arch_lbr_xsave = is_arch_lbr_xsave_available();
if (arch_lbr_xsave) {
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/events/intel/uncore.c
Original file line number Diff line number Diff line change
Expand Up @@ -1762,7 +1762,7 @@ static const struct intel_uncore_init_fun rkl_uncore_init __initconst = {

static const struct intel_uncore_init_fun adl_uncore_init __initconst = {
.cpu_init = adl_uncore_cpu_init,
.mmio_init = tgl_uncore_mmio_init,
.mmio_init = adl_uncore_mmio_init,
};

static const struct intel_uncore_init_fun icx_uncore_init __initconst = {
Expand Down
3 changes: 2 additions & 1 deletion arch/x86/events/intel/uncore.h
Original file line number Diff line number Diff line change
Expand Up @@ -584,10 +584,11 @@ void snb_uncore_cpu_init(void);
void nhm_uncore_cpu_init(void);
void skl_uncore_cpu_init(void);
void icl_uncore_cpu_init(void);
void adl_uncore_cpu_init(void);
void tgl_uncore_cpu_init(void);
void adl_uncore_cpu_init(void);
void tgl_uncore_mmio_init(void);
void tgl_l_uncore_mmio_init(void);
void adl_uncore_mmio_init(void);
int snb_pci2phy_map_init(int devid);

/* uncore_snbep.c */
Expand Down
4 changes: 2 additions & 2 deletions arch/x86/events/intel/uncore_discovery.c
Original file line number Diff line number Diff line change
Expand Up @@ -494,8 +494,8 @@ void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box)
writel(0, box->io_addr);
}

static void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box,
struct perf_event *event)
void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box,
struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;

Expand Down
2 changes: 2 additions & 0 deletions arch/x86/events/intel/uncore_discovery.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ void intel_generic_uncore_mmio_disable_box(struct intel_uncore_box *box);
void intel_generic_uncore_mmio_enable_box(struct intel_uncore_box *box);
void intel_generic_uncore_mmio_disable_event(struct intel_uncore_box *box,
struct perf_event *event);
void intel_generic_uncore_mmio_enable_event(struct intel_uncore_box *box,
struct perf_event *event);

void intel_generic_uncore_pci_init_box(struct intel_uncore_box *box);
void intel_generic_uncore_pci_disable_box(struct intel_uncore_box *box);
Expand Down
Loading

0 comments on commit 0f9e042

Please sign in to comment.