Skip to content

Commit

Permalink
perf/core: Detach 'struct perf_cpu_pmu_context' and 'struct pmu' life…
Browse files Browse the repository at this point in the history
…times

In prepration for being able to unregister a PMU with existing events,
it becomes important to detach struct perf_cpu_pmu_context lifetimes
from that of struct pmu.

Notably struct perf_cpu_pmu_context embeds a struct perf_event_pmu_context
that can stay referenced until the last event goes.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Ravi Bangoria <ravi.bangoria@amd.com>
Link: https://lore.kernel.org/r/20241104135518.760214287@infradead.org
  • Loading branch information
Peter Zijlstra authored and Ingo Molnar committed Mar 4, 2025
1 parent 0983593 commit 4eabf53
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 11 deletions.
4 changes: 2 additions & 2 deletions include/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ struct pmu {
*/
unsigned int scope;

struct perf_cpu_pmu_context __percpu *cpu_pmu_context;
struct perf_cpu_pmu_context __percpu **cpu_pmu_context;
atomic_t exclusive_cnt; /* < 0: cpu; > 0: tsk */
int task_ctx_nr;
int hrtimer_interval_ms;
Expand Down Expand Up @@ -922,7 +922,7 @@ struct perf_event_pmu_context {
struct list_head pinned_active;
struct list_head flexible_active;

/* Used to avoid freeing per-cpu perf_event_pmu_context */
/* Used to identify the per-cpu perf_event_pmu_context */
unsigned int embedded : 1;

unsigned int nr_events;
Expand Down
56 changes: 47 additions & 9 deletions kernel/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -1219,7 +1219,7 @@ static int perf_mux_hrtimer_restart_ipi(void *arg)

static __always_inline struct perf_cpu_pmu_context *this_cpc(struct pmu *pmu)
{
return this_cpu_ptr(pmu->cpu_pmu_context);
return *this_cpu_ptr(pmu->cpu_pmu_context);
}

void perf_pmu_disable(struct pmu *pmu)
Expand Down Expand Up @@ -5007,11 +5007,14 @@ find_get_pmu_context(struct pmu *pmu, struct perf_event_context *ctx,
*/
struct perf_cpu_pmu_context *cpc;

cpc = per_cpu_ptr(pmu->cpu_pmu_context, event->cpu);
cpc = *per_cpu_ptr(pmu->cpu_pmu_context, event->cpu);
epc = &cpc->epc;
raw_spin_lock_irq(&ctx->lock);
if (!epc->ctx) {
atomic_set(&epc->refcount, 1);
/*
* One extra reference for the pmu; see perf_pmu_free().
*/
atomic_set(&epc->refcount, 2);
epc->embedded = 1;
list_add(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list);
epc->ctx = ctx;
Expand Down Expand Up @@ -5087,6 +5090,15 @@ static void get_pmu_ctx(struct perf_event_pmu_context *epc)
WARN_ON_ONCE(!atomic_inc_not_zero(&epc->refcount));
}

static void free_cpc_rcu(struct rcu_head *head)
{
struct perf_cpu_pmu_context *cpc =
container_of(head, typeof(*cpc), epc.rcu_head);

kfree(cpc->epc.task_ctx_data);
kfree(cpc);
}

static void free_epc_rcu(struct rcu_head *head)
{
struct perf_event_pmu_context *epc = container_of(head, typeof(*epc), rcu_head);
Expand Down Expand Up @@ -5121,8 +5133,10 @@ static void put_pmu_ctx(struct perf_event_pmu_context *epc)

raw_spin_unlock_irqrestore(&ctx->lock, flags);

if (epc->embedded)
if (epc->embedded) {
call_rcu(&epc->rcu_head, free_cpc_rcu);
return;
}

call_rcu(&epc->rcu_head, free_epc_rcu);
}
Expand Down Expand Up @@ -11752,7 +11766,7 @@ perf_event_mux_interval_ms_store(struct device *dev,
cpus_read_lock();
for_each_online_cpu(cpu) {
struct perf_cpu_pmu_context *cpc;
cpc = per_cpu_ptr(pmu->cpu_pmu_context, cpu);
cpc = *per_cpu_ptr(pmu->cpu_pmu_context, cpu);
cpc->hrtimer_interval = ns_to_ktime(NSEC_PER_MSEC * timer);

cpu_function_call(cpu, perf_mux_hrtimer_restart_ipi, cpc);
Expand Down Expand Up @@ -11925,7 +11939,25 @@ static void perf_pmu_free(struct pmu *pmu)
device_del(pmu->dev);
put_device(pmu->dev);
}
free_percpu(pmu->cpu_pmu_context);

if (pmu->cpu_pmu_context) {
int cpu;

for_each_possible_cpu(cpu) {
struct perf_cpu_pmu_context *cpc;

cpc = *per_cpu_ptr(pmu->cpu_pmu_context, cpu);
if (!cpc)
continue;
if (cpc->epc.embedded) {
/* refcount managed */
put_pmu_ctx(&cpc->epc);
continue;
}
kfree(cpc);
}
free_percpu(pmu->cpu_pmu_context);
}
}

DEFINE_FREE(pmu_unregister, struct pmu *, if (_T) perf_pmu_free(_T))
Expand Down Expand Up @@ -11964,14 +11996,20 @@ int perf_pmu_register(struct pmu *_pmu, const char *name, int type)
return ret;
}

pmu->cpu_pmu_context = alloc_percpu(struct perf_cpu_pmu_context);
pmu->cpu_pmu_context = alloc_percpu(struct perf_cpu_pmu_context *);
if (!pmu->cpu_pmu_context)
return -ENOMEM;

for_each_possible_cpu(cpu) {
struct perf_cpu_pmu_context *cpc;
struct perf_cpu_pmu_context *cpc =
kmalloc_node(sizeof(struct perf_cpu_pmu_context),
GFP_KERNEL | __GFP_ZERO,
cpu_to_node(cpu));

if (!cpc)
return -ENOMEM;

cpc = per_cpu_ptr(pmu->cpu_pmu_context, cpu);
*per_cpu_ptr(pmu->cpu_pmu_context, cpu) = cpc;
__perf_init_event_pmu_context(&cpc->epc, pmu);
__perf_mux_hrtimer_init(cpc, cpu);
}
Expand Down

0 comments on commit 4eabf53

Please sign in to comment.