Skip to content

Commit

Permalink
perf/tracing/cpuhotplug: Fix locking order
Browse files Browse the repository at this point in the history
perf, tracing, kprobes and jump_labels have a gazillion of ways to create
dependency lock chains. Some of those involve nested invocations of
get_online_cpus().

The conversion of the hotplug locking to a percpu rwsem requires to avoid
such nested calls. sys_perf_event_open() protects most of the syscall logic
against cpu hotplug. This causes nested calls and lock inversions versus
ftrace and kprobes in various interesting ways.

It's impossible to move the hotplug locking to the outer end of all call
chains in the involved facilities, so the hotplug protection in
sys_perf_event_open() needs to be solved differently.

Introduce 'pmus_mutex' which protects a perf private online cpumask. This
mutex is taken when the mask is updated in the cpu hotplug callbacks and
can be taken in sys_perf_event_open() to protect the swhash setup/teardown
code and when the final judgement about a valid event has to be made.

[ tglx: Produced changelog and fixed the swhash interaction ]

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Sebastian Siewior <bigeasy@linutronix.de>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Link: http://lkml.kernel.org/r/20170524081548.930941109@linutronix.de
  • Loading branch information
Thomas Gleixner committed May 26, 2017
1 parent fdaf0a5 commit a63fbed
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 30 deletions.
2 changes: 2 additions & 0 deletions include/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,8 @@ struct perf_cpu_context {

struct list_head sched_cb_entry;
int sched_cb_usage;

int online;
};

struct perf_output_handle {
Expand Down
106 changes: 76 additions & 30 deletions kernel/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,7 @@ static atomic_t nr_switch_events __read_mostly;
static LIST_HEAD(pmus);
static DEFINE_MUTEX(pmus_lock);
static struct srcu_struct pmus_srcu;
static cpumask_var_t perf_online_mask;

/*
* perf event paranoia level:
Expand Down Expand Up @@ -3812,14 +3813,6 @@ find_get_context(struct pmu *pmu, struct task_struct *task,
if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
return ERR_PTR(-EACCES);

/*
* We could be clever and allow to attach a event to an
* offline CPU and activate it when the CPU comes up, but
* that's for later.
*/
if (!cpu_online(cpu))
return ERR_PTR(-ENODEV);

cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
ctx = &cpuctx->ctx;
get_ctx(ctx);
Expand Down Expand Up @@ -7703,7 +7696,8 @@ static int swevent_hlist_get_cpu(int cpu)
int err = 0;

mutex_lock(&swhash->hlist_mutex);
if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) {
if (!swevent_hlist_deref(swhash) &&
cpumask_test_cpu(cpu, perf_online_mask)) {
struct swevent_hlist *hlist;

hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
Expand All @@ -7724,25 +7718,23 @@ static int swevent_hlist_get(void)
{
int err, cpu, failed_cpu;

get_online_cpus();
mutex_lock(&pmus_lock);
for_each_possible_cpu(cpu) {
err = swevent_hlist_get_cpu(cpu);
if (err) {
failed_cpu = cpu;
goto fail;
}
}
put_online_cpus();

mutex_unlock(&pmus_lock);
return 0;
fail:
for_each_possible_cpu(cpu) {
if (cpu == failed_cpu)
break;
swevent_hlist_put_cpu(cpu);
}

put_online_cpus();
mutex_unlock(&pmus_lock);
return err;
}

Expand Down Expand Up @@ -8920,7 +8912,7 @@ perf_event_mux_interval_ms_store(struct device *dev,
pmu->hrtimer_interval_ms = timer;

/* update all cpuctx for this PMU */
get_online_cpus();
cpus_read_lock();
for_each_online_cpu(cpu) {
struct perf_cpu_context *cpuctx;
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
Expand All @@ -8929,7 +8921,7 @@ perf_event_mux_interval_ms_store(struct device *dev,
cpu_function_call(cpu,
(remote_function_f)perf_mux_hrtimer_restart, cpuctx);
}
put_online_cpus();
cpus_read_unlock();
mutex_unlock(&mux_interval_mutex);

return count;
Expand Down Expand Up @@ -9059,6 +9051,7 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);
lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
cpuctx->ctx.pmu = pmu;
cpuctx->online = cpumask_test_cpu(cpu, perf_online_mask);

__perf_mux_hrtimer_init(cpuctx, cpu);
}
Expand Down Expand Up @@ -9882,12 +9875,10 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_task;
}

get_online_cpus();

if (task) {
err = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
if (err)
goto err_cpus;
goto err_cred;

/*
* Reuse ptrace permission checks for now.
Expand Down Expand Up @@ -10073,6 +10064,23 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_locked;
}

if (!task) {
/*
* Check if the @cpu we're creating an event for is online.
*
* We use the perf_cpu_context::ctx::mutex to serialize against
* the hotplug notifiers. See perf_event_{init,exit}_cpu().
*/
struct perf_cpu_context *cpuctx =
container_of(ctx, struct perf_cpu_context, ctx);

if (!cpuctx->online) {
err = -ENODEV;
goto err_locked;
}
}


/*
* Must be under the same ctx::mutex as perf_install_in_context(),
* because we need to serialize with concurrent event creation.
Expand Down Expand Up @@ -10162,8 +10170,6 @@ SYSCALL_DEFINE5(perf_event_open,
put_task_struct(task);
}

put_online_cpus();

mutex_lock(&current->perf_event_mutex);
list_add_tail(&event->owner_entry, &current->perf_event_list);
mutex_unlock(&current->perf_event_mutex);
Expand Down Expand Up @@ -10197,8 +10203,6 @@ SYSCALL_DEFINE5(perf_event_open,
err_cred:
if (task)
mutex_unlock(&task->signal->cred_guard_mutex);
err_cpus:
put_online_cpus();
err_task:
if (task)
put_task_struct(task);
Expand Down Expand Up @@ -10253,6 +10257,21 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
goto err_unlock;
}

if (!task) {
/*
* Check if the @cpu we're creating an event for is online.
*
* We use the perf_cpu_context::ctx::mutex to serialize against
* the hotplug notifiers. See perf_event_{init,exit}_cpu().
*/
struct perf_cpu_context *cpuctx =
container_of(ctx, struct perf_cpu_context, ctx);
if (!cpuctx->online) {
err = -ENODEV;
goto err_unlock;
}
}

if (!exclusive_event_installable(event, ctx)) {
err = -EBUSY;
goto err_unlock;
Expand Down Expand Up @@ -10920,6 +10939,8 @@ static void __init perf_event_init_all_cpus(void)
struct swevent_htable *swhash;
int cpu;

zalloc_cpumask_var(&perf_online_mask, GFP_KERNEL);

for_each_possible_cpu(cpu) {
swhash = &per_cpu(swevent_htable, cpu);
mutex_init(&swhash->hlist_mutex);
Expand All @@ -10935,7 +10956,7 @@ static void __init perf_event_init_all_cpus(void)
}
}

int perf_event_init_cpu(unsigned int cpu)
void perf_swevent_init_cpu(unsigned int cpu)
{
struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);

Expand All @@ -10948,7 +10969,6 @@ int perf_event_init_cpu(unsigned int cpu)
rcu_assign_pointer(swhash->swevent_hlist, hlist);
}
mutex_unlock(&swhash->hlist_mutex);
return 0;
}

#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
Expand All @@ -10966,26 +10986,52 @@ static void __perf_event_exit_context(void *__info)

static void perf_event_exit_cpu_context(int cpu)
{
struct perf_cpu_context *cpuctx;
struct perf_event_context *ctx;
struct pmu *pmu;
int idx;

idx = srcu_read_lock(&pmus_srcu);
list_for_each_entry_rcu(pmu, &pmus, entry) {
ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx;
mutex_lock(&pmus_lock);
list_for_each_entry(pmu, &pmus, entry) {
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
ctx = &cpuctx->ctx;

mutex_lock(&ctx->mutex);
smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
cpuctx->online = 0;
mutex_unlock(&ctx->mutex);
}
srcu_read_unlock(&pmus_srcu, idx);
cpumask_clear_cpu(cpu, perf_online_mask);
mutex_unlock(&pmus_lock);
}
#else

static void perf_event_exit_cpu_context(int cpu) { }

#endif

int perf_event_init_cpu(unsigned int cpu)
{
struct perf_cpu_context *cpuctx;
struct perf_event_context *ctx;
struct pmu *pmu;

perf_swevent_init_cpu(cpu);

mutex_lock(&pmus_lock);
cpumask_set_cpu(cpu, perf_online_mask);
list_for_each_entry(pmu, &pmus, entry) {
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
ctx = &cpuctx->ctx;

mutex_lock(&ctx->mutex);
cpuctx->online = 1;
mutex_unlock(&ctx->mutex);
}
mutex_unlock(&pmus_lock);

return 0;
}

int perf_event_exit_cpu(unsigned int cpu)
{
perf_event_exit_cpu_context(cpu);
Expand Down

0 comments on commit a63fbed

Please sign in to comment.