Skip to content

Commit

Permalink
Merge tag 'perf-core-2022-12-12' of git://git.kernel.org/pub/scm/linu…
Browse files Browse the repository at this point in the history
…x/kernel/git/tip/tip

Pull perf events updates from Ingo Molnar:

 - Thoroughly rewrite the data structures that implement perf task
   context handling, with the goal of fixing various quirks and
   unfeatures both in already merged, and in upcoming proposed code.

   The old data structure is the per task and per cpu
   perf_event_contexts:

         task_struct::perf_events_ctxp[] <-> perf_event_context <-> perf_cpu_context
              ^                                 |    ^     |           ^
              `---------------------------------'    |     `--> pmu ---'
                                                     v           ^
                                                perf_event ------'

   In this new design this is replaced with a single task context and a
   single CPU context, plus intermediate data-structures:

         task_struct::perf_event_ctxp -> perf_event_context <- perf_cpu_context
              ^                           |   ^ ^
              `---------------------------'   | |
                                              | |    perf_cpu_pmu_context <--.
                                              | `----.    ^                  |
                                              |      |    |                  |
                                              |      v    v                  |
                                              | ,--> perf_event_pmu_context  |
                                              | |                            |
                                              | |                            |
                                              v v                            |
                                         perf_event ---> pmu ----------------'

   [ See commit bd27568 for more details. ]

   This rewrite was developed by Peter Zijlstra and Ravi Bangoria.

 - Optimize perf_tp_event()

 - Update the Intel uncore PMU driver, extending it with UPI topology
   discovery on various hardware models.

 - Misc fixes & cleanups

* tag 'perf-core-2022-12-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (25 commits)
  perf/x86/intel/uncore: Fix reference count leak in __uncore_imc_init_box()
  perf/x86/intel/uncore: Fix reference count leak in snr_uncore_mmio_map()
  perf/x86/intel/uncore: Fix reference count leak in hswep_has_limit_sbox()
  perf/x86/intel/uncore: Fix reference count leak in sad_cfg_iio_topology()
  perf/x86/intel/uncore: Make set_mapping() procedure void
  perf/x86/intel/uncore: Update sysfs-devices-mapping file
  perf/x86/intel/uncore: Enable UPI topology discovery for Sapphire Rapids
  perf/x86/intel/uncore: Enable UPI topology discovery for Icelake Server
  perf/x86/intel/uncore: Get UPI NodeID and GroupID
  perf/x86/intel/uncore: Enable UPI topology discovery for Skylake Server
  perf/x86/intel/uncore: Generalize get_topology() for SKX PMUs
  perf/x86/intel/uncore: Disable I/O stacks to PMU mapping on ICX-D
  perf/x86/intel/uncore: Clear attr_update properly
  perf/x86/intel/uncore: Introduce UPI topology type
  perf/x86/intel/uncore: Generalize IIO topology support
  perf/core: Don't allow grouping events from different hw pmus
  perf/amd/ibs: Make IBS a core pmu
  perf: Fix function pointer case
  perf/x86/amd: Remove the repeated declaration
  perf: Fix possible memleak in pmu_dev_alloc()
  ...
  • Loading branch information
Linus Torvalds committed Dec 12, 2022
2 parents 617fe4f + 17b8d84 commit add7695
Show file tree
Hide file tree
Showing 21 changed files with 1,765 additions and 1,230 deletions.
30 changes: 29 additions & 1 deletion Documentation/ABI/testing/sysfs-devices-mapping
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
What: /sys/devices/uncore_iio_x/dieX
Date: February 2020
Contact: Roman Sudarikov <roman.sudarikov@linux.intel.com>
Contact: Alexander Antonov <alexander.antonov@linux.intel.com>
Description:
Each IIO stack (PCIe root port) has its own IIO PMON block, so
each dieX file (where X is die number) holds "Segment:Root Bus"
Expand Down Expand Up @@ -32,3 +32,31 @@ Description:
IIO PMU 0 on die 1 belongs to PCI RP on bus 0x40, domain 0x0000
IIO PMU 0 on die 2 belongs to PCI RP on bus 0x80, domain 0x0000
IIO PMU 0 on die 3 belongs to PCI RP on bus 0xc0, domain 0x0000

What: /sys/devices/uncore_upi_x/dieX
Date: March 2022
Contact: Alexander Antonov <alexander.antonov@linux.intel.com>
Description:
Each /sys/devices/uncore_upi_X/dieY file holds "upi_Z,die_W"
value that means UPI link number X on die Y is connected to UPI
link Z on die W and this link between sockets can be monitored
by UPI PMON block.
For example, 4-die Sapphire Rapids platform has the following
UPI 0 topology::

# tail /sys/devices/uncore_upi_0/die*
==> /sys/devices/uncore_upi_0/die0 <==
upi_1,die_1
==> /sys/devices/uncore_upi_0/die1 <==
upi_0,die_3
==> /sys/devices/uncore_upi_0/die2 <==
upi_1,die_3
==> /sys/devices/uncore_upi_0/die3 <==
upi_0,die_1

Which means::

UPI link 0 on die 0 is connected to UPI link 1 on die 1
UPI link 0 on die 1 is connected to UPI link 0 on die 3
UPI link 0 on die 2 is connected to UPI link 1 on die 3
UPI link 0 on die 3 is connected to UPI link 0 on die 1
18 changes: 11 additions & 7 deletions arch/arm64/kernel/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -806,10 +806,14 @@ static void armv8pmu_disable_event(struct perf_event *event)

static void armv8pmu_start(struct arm_pmu *cpu_pmu)
{
struct perf_event_context *task_ctx =
this_cpu_ptr(cpu_pmu->pmu.pmu_cpu_context)->task_ctx;
struct perf_event_context *ctx;
int nr_user = 0;

if (sysctl_perf_user_access && task_ctx && task_ctx->nr_user)
ctx = perf_cpu_task_ctx();
if (ctx)
nr_user = ctx->nr_user;

if (sysctl_perf_user_access && nr_user)
armv8pmu_enable_user_access(cpu_pmu);
else
armv8pmu_disable_user_access();
Expand Down Expand Up @@ -1019,10 +1023,10 @@ static int armv8pmu_set_event_filter(struct hw_perf_event *event,
return 0;
}

static int armv8pmu_filter_match(struct perf_event *event)
static bool armv8pmu_filter(struct pmu *pmu, int cpu)
{
unsigned long evtype = event->hw.config_base & ARMV8_PMU_EVTYPE_EVENT;
return evtype != ARMV8_PMUV3_PERFCTR_CHAIN;
struct arm_pmu *armpmu = to_arm_pmu(pmu);
return !cpumask_test_cpu(smp_processor_id(), &armpmu->supported_cpus);
}

static void armv8pmu_reset(void *info)
Expand Down Expand Up @@ -1254,7 +1258,7 @@ static int armv8_pmu_init(struct arm_pmu *cpu_pmu, char *name,
cpu_pmu->stop = armv8pmu_stop;
cpu_pmu->reset = armv8pmu_reset;
cpu_pmu->set_event_filter = armv8pmu_set_event_filter;
cpu_pmu->filter_match = armv8pmu_filter_match;
cpu_pmu->filter = armv8pmu_filter;

cpu_pmu->pmu.event_idx = armv8pmu_user_event_idx;

Expand Down
8 changes: 4 additions & 4 deletions arch/powerpc/perf/core-book3s.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)

static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {}
static void power_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in) {}
static inline void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) {}
static void pmao_restore_workaround(bool ebb) { }
#endif /* CONFIG_PPC32 */
Expand Down Expand Up @@ -424,7 +424,7 @@ static void power_pmu_bhrb_enable(struct perf_event *event)
cpuhw->bhrb_context = event->ctx;
}
cpuhw->bhrb_users++;
perf_sched_cb_inc(event->ctx->pmu);
perf_sched_cb_inc(event->pmu);
}

static void power_pmu_bhrb_disable(struct perf_event *event)
Expand All @@ -436,7 +436,7 @@ static void power_pmu_bhrb_disable(struct perf_event *event)

WARN_ON_ONCE(!cpuhw->bhrb_users);
cpuhw->bhrb_users--;
perf_sched_cb_dec(event->ctx->pmu);
perf_sched_cb_dec(event->pmu);

if (!cpuhw->disabled && !cpuhw->bhrb_users) {
/* BHRB cannot be turned off when other
Expand All @@ -451,7 +451,7 @@ static void power_pmu_bhrb_disable(struct perf_event *event)
/* Called from ctxsw to prevent one process's branch entries to
* mingle with the other process's entries during context switch.
*/
static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
static void power_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
if (!ppmu->bhrb_nr)
return;
Expand Down
2 changes: 1 addition & 1 deletion arch/s390/kernel/perf_pai_crypto.c
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ static int paicrypt_push_sample(void)
/* Called on schedule-in and schedule-out. No access to event structure,
* but for sampling only event CRYPTO_ALL is allowed.
*/
static void paicrypt_sched_task(struct perf_event_context *ctx, bool sched_in)
static void paicrypt_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
/* We started with a clean page on event installation. So read out
* results on schedule_out and if page was dirty, clear values.
Expand Down
2 changes: 1 addition & 1 deletion arch/s390/kernel/perf_pai_ext.c
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,7 @@ static int paiext_push_sample(void)
/* Called on schedule-in and schedule-out. No access to event structure,
* but for sampling only event NNPA_ALL is allowed.
*/
static void paiext_sched_task(struct perf_event_context *ctx, bool sched_in)
static void paiext_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
/* We started with a clean page on event installation. So read out
* results on schedule_out and if page was dirty, clear values.
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/events/amd/brs.c
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ static void amd_brs_poison_buffer(void)
* On ctxswin, sched_in = true, called after the PMU has started
* On ctxswout, sched_in = false, called before the PMU is stopped
*/
void amd_pmu_brs_sched_task(struct perf_event_context *ctx, bool sched_in)
void amd_pmu_brs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

Expand Down
4 changes: 2 additions & 2 deletions arch/x86/events/amd/ibs.c
Original file line number Diff line number Diff line change
Expand Up @@ -631,7 +631,7 @@ static const struct attribute_group *op_attr_update[] = {

static struct perf_ibs perf_ibs_fetch = {
.pmu = {
.task_ctx_nr = perf_invalid_context,
.task_ctx_nr = perf_hw_context,

.event_init = perf_ibs_init,
.add = perf_ibs_add,
Expand All @@ -655,7 +655,7 @@ static struct perf_ibs perf_ibs_fetch = {

static struct perf_ibs perf_ibs_op = {
.pmu = {
.task_ctx_nr = perf_invalid_context,
.task_ctx_nr = perf_hw_context,

.event_init = perf_ibs_init,
.add = perf_ibs_add,
Expand Down
6 changes: 3 additions & 3 deletions arch/x86/events/amd/lbr.c
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ void amd_pmu_lbr_add(struct perf_event *event)
cpuc->br_sel = reg->reg;
}

perf_sched_cb_inc(event->ctx->pmu);
perf_sched_cb_inc(event->pmu);

if (!cpuc->lbr_users++ && !event->total_time_running)
amd_pmu_lbr_reset();
Expand All @@ -370,10 +370,10 @@ void amd_pmu_lbr_del(struct perf_event *event)

cpuc->lbr_users--;
WARN_ON_ONCE(cpuc->lbr_users < 0);
perf_sched_cb_dec(event->ctx->pmu);
perf_sched_cb_dec(event->pmu);
}

void amd_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
void amd_pmu_lbr_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

Expand Down
48 changes: 14 additions & 34 deletions arch/x86/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx);
DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs, *x86_pmu.drain_pebs);
DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases);

DEFINE_STATIC_CALL_NULL(x86_pmu_filter, *x86_pmu.filter);

/*
* This one is magic, it will get called even when PMU init fails (because
* there is no PMU), in which case it should simply return NULL.
Expand Down Expand Up @@ -2031,6 +2033,7 @@ static void x86_pmu_static_call_update(void)
static_call_update(x86_pmu_pebs_aliases, x86_pmu.pebs_aliases);

static_call_update(x86_pmu_guest_get_msrs, x86_pmu.guest_get_msrs);
static_call_update(x86_pmu_filter, x86_pmu.filter);
}

static void _x86_pmu_read(struct perf_event *event)
Expand All @@ -2052,23 +2055,6 @@ void x86_pmu_show_pmu_cap(int num_counters, int num_counters_fixed,
pr_info("... event mask: %016Lx\n", intel_ctrl);
}

/*
* The generic code is not hybrid friendly. The hybrid_pmu->pmu
* of the first registered PMU is unconditionally assigned to
* each possible cpuctx->ctx.pmu.
* Update the correct hybrid PMU to the cpuctx->ctx.pmu.
*/
void x86_pmu_update_cpu_context(struct pmu *pmu, int cpu)
{
struct perf_cpu_context *cpuctx;

if (!pmu->pmu_cpu_context)
return;

cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
cpuctx->ctx.pmu = pmu;
}

static int __init init_hw_perf_events(void)
{
struct x86_pmu_quirk *quirk;
Expand Down Expand Up @@ -2175,13 +2161,9 @@ static int __init init_hw_perf_events(void)
if (err)
goto out2;
} else {
u8 cpu_type = get_this_hybrid_cpu_type();
struct x86_hybrid_pmu *hybrid_pmu;
int i, j;

if (!cpu_type && x86_pmu.get_hybrid_cpu_type)
cpu_type = x86_pmu.get_hybrid_cpu_type();

for (i = 0; i < x86_pmu.num_hybrid_pmus; i++) {
hybrid_pmu = &x86_pmu.hybrid_pmu[i];

Expand All @@ -2195,9 +2177,6 @@ static int __init init_hw_perf_events(void)
(hybrid_pmu->cpu_type == hybrid_big) ? PERF_TYPE_RAW : -1);
if (err)
break;

if (cpu_type == hybrid_pmu->cpu_type)
x86_pmu_update_cpu_context(&hybrid_pmu->pmu, raw_smp_processor_id());
}

if (i < x86_pmu.num_hybrid_pmus) {
Expand Down Expand Up @@ -2646,15 +2625,15 @@ static const struct attribute_group *x86_pmu_attr_groups[] = {
NULL,
};

static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
static void x86_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
static_call_cond(x86_pmu_sched_task)(ctx, sched_in);
static_call_cond(x86_pmu_sched_task)(pmu_ctx, sched_in);
}

static void x86_pmu_swap_task_ctx(struct perf_event_context *prev,
struct perf_event_context *next)
static void x86_pmu_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
struct perf_event_pmu_context *next_epc)
{
static_call_cond(x86_pmu_swap_task_ctx)(prev, next);
static_call_cond(x86_pmu_swap_task_ctx)(prev_epc, next_epc);
}

void perf_check_microcode(void)
Expand Down Expand Up @@ -2689,12 +2668,13 @@ static int x86_pmu_aux_output_match(struct perf_event *event)
return 0;
}

static int x86_pmu_filter_match(struct perf_event *event)
static bool x86_pmu_filter(struct pmu *pmu, int cpu)
{
if (x86_pmu.filter_match)
return x86_pmu.filter_match(event);
bool ret = false;

return 1;
static_call_cond(x86_pmu_filter)(pmu, cpu, &ret);

return ret;
}

static struct pmu pmu = {
Expand Down Expand Up @@ -2725,7 +2705,7 @@ static struct pmu pmu = {

.aux_output_match = x86_pmu_aux_output_match,

.filter_match = x86_pmu_filter_match,
.filter = x86_pmu_filter,
};

void arch_perf_update_userpage(struct perf_event *event,
Expand Down
23 changes: 10 additions & 13 deletions arch/x86/events/intel/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -4536,8 +4536,6 @@ static bool init_hybrid_pmu(int cpu)
cpumask_set_cpu(cpu, &pmu->supported_cpus);
cpuc->pmu = &pmu->pmu;

x86_pmu_update_cpu_context(&pmu->pmu, cpu);

return true;
}

Expand Down Expand Up @@ -4671,17 +4669,17 @@ static void intel_pmu_cpu_dead(int cpu)
cpumask_clear_cpu(cpu, &hybrid_pmu(cpuc->pmu)->supported_cpus);
}

static void intel_pmu_sched_task(struct perf_event_context *ctx,
static void intel_pmu_sched_task(struct perf_event_pmu_context *pmu_ctx,
bool sched_in)
{
intel_pmu_pebs_sched_task(ctx, sched_in);
intel_pmu_lbr_sched_task(ctx, sched_in);
intel_pmu_pebs_sched_task(pmu_ctx, sched_in);
intel_pmu_lbr_sched_task(pmu_ctx, sched_in);
}

static void intel_pmu_swap_task_ctx(struct perf_event_context *prev,
struct perf_event_context *next)
static void intel_pmu_swap_task_ctx(struct perf_event_pmu_context *prev_epc,
struct perf_event_pmu_context *next_epc)
{
intel_pmu_lbr_swap_task_ctx(prev, next);
intel_pmu_lbr_swap_task_ctx(prev_epc, next_epc);
}

static int intel_pmu_check_period(struct perf_event *event, u64 value)
Expand All @@ -4705,12 +4703,11 @@ static int intel_pmu_aux_output_match(struct perf_event *event)
return is_intel_pt_event(event);
}

static int intel_pmu_filter_match(struct perf_event *event)
static void intel_pmu_filter(struct pmu *pmu, int cpu, bool *ret)
{
struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
unsigned int cpu = smp_processor_id();
struct x86_hybrid_pmu *hpmu = hybrid_pmu(pmu);

return cpumask_test_cpu(cpu, &pmu->supported_cpus);
*ret = !cpumask_test_cpu(cpu, &hpmu->supported_cpus);
}

PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
Expand Down Expand Up @@ -6413,7 +6410,7 @@ __init int intel_pmu_init(void)
static_call_update(intel_pmu_set_topdown_event_period,
&adl_set_topdown_event_period);

x86_pmu.filter_match = intel_pmu_filter_match;
x86_pmu.filter = intel_pmu_filter;
x86_pmu.get_event_constraints = adl_get_event_constraints;
x86_pmu.hw_config = adl_hw_config;
x86_pmu.limit_period = spr_limit_period;
Expand Down
4 changes: 2 additions & 2 deletions arch/x86/events/intel/ds.c
Original file line number Diff line number Diff line change
Expand Up @@ -1069,7 +1069,7 @@ static inline bool pebs_needs_sched_cb(struct cpu_hw_events *cpuc)
return cpuc->n_pebs && (cpuc->n_pebs == cpuc->n_large_pebs);
}

void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
void intel_pmu_pebs_sched_task(struct perf_event_pmu_context *pmu_ctx, bool sched_in)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

Expand Down Expand Up @@ -1177,7 +1177,7 @@ static void
pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
struct perf_event *event, bool add)
{
struct pmu *pmu = event->ctx->pmu;
struct pmu *pmu = event->pmu;
/*
* Make sure we get updated with the first PEBS
* event. It will trigger also during removal, but
Expand Down
Loading

0 comments on commit add7695

Please sign in to comment.