Skip to content

Commit

Permalink
perf: Fix the perf context switch optimization
Browse files Browse the repository at this point in the history
Currently we only optimize the context switch between two
contexts that have the same parent; this forgoes the
optimization between parent and child context, even though these
contexts could be equivalent too.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Shishkin, Alexander <alexander.shishkin@intel.com>
Link: http://lkml.kernel.org/r/20131007164257.GH3081@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Peter Zijlstra authored and Ingo Molnar committed Oct 29, 2013
1 parent e00b12e commit 5a3126d
Showing 1 changed file with 46 additions and 18 deletions.
64 changes: 46 additions & 18 deletions kernel/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -899,6 +899,7 @@ static void unclone_ctx(struct perf_event_context *ctx)
put_ctx(ctx->parent_ctx);
ctx->parent_ctx = NULL;
}
ctx->generation++;
}

static u32 perf_event_pid(struct perf_event *event, struct task_struct *p)
Expand Down Expand Up @@ -1136,6 +1137,8 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
ctx->nr_events++;
if (event->attr.inherit_stat)
ctx->nr_stat++;

ctx->generation++;
}

/*
Expand Down Expand Up @@ -1313,6 +1316,8 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
*/
if (event->state > PERF_EVENT_STATE_OFF)
event->state = PERF_EVENT_STATE_OFF;

ctx->generation++;
}

static void perf_group_detach(struct perf_event *event)
Expand Down Expand Up @@ -2149,22 +2154,38 @@ static void ctx_sched_out(struct perf_event_context *ctx,
}

/*
* Test whether two contexts are equivalent, i.e. whether they
* have both been cloned from the same version of the same context
* and they both have the same number of enabled events.
* If the number of enabled events is the same, then the set
* of enabled events should be the same, because these are both
* inherited contexts, therefore we can't access individual events
* in them directly with an fd; we can only enable/disable all
* events via prctl, or enable/disable all events in a family
* via ioctl, which will have the same effect on both contexts.
* Test whether two contexts are equivalent, i.e. whether they have both been
* cloned from the same version of the same context.
*
* Equivalence is measured using a generation number in the context that is
* incremented on each modification to it; see unclone_ctx(), list_add_event()
* and list_del_event().
*/
static int context_equiv(struct perf_event_context *ctx1,
struct perf_event_context *ctx2)
{
return ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx
&& ctx1->parent_gen == ctx2->parent_gen
&& !ctx1->pin_count && !ctx2->pin_count;
/* Pinning disables the swap optimization */
if (ctx1->pin_count || ctx2->pin_count)
return 0;

/* If ctx1 is the parent of ctx2 */
if (ctx1 == ctx2->parent_ctx && ctx1->generation == ctx2->parent_gen)
return 1;

/* If ctx2 is the parent of ctx1 */
if (ctx1->parent_ctx == ctx2 && ctx1->parent_gen == ctx2->generation)
return 1;

/*
* If ctx1 and ctx2 have the same parent; we flatten the parent
* hierarchy, see perf_event_init_context().
*/
if (ctx1->parent_ctx && ctx1->parent_ctx == ctx2->parent_ctx &&
ctx1->parent_gen == ctx2->parent_gen)
return 1;

/* Unmatched */
return 0;
}

static void __perf_event_sync_stat(struct perf_event *event,
Expand Down Expand Up @@ -2247,7 +2268,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
{
struct perf_event_context *ctx = task->perf_event_ctxp[ctxn];
struct perf_event_context *next_ctx;
struct perf_event_context *parent;
struct perf_event_context *parent, *next_parent;
struct perf_cpu_context *cpuctx;
int do_switch = 1;

Expand All @@ -2259,10 +2280,18 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
return;

rcu_read_lock();
parent = rcu_dereference(ctx->parent_ctx);
next_ctx = next->perf_event_ctxp[ctxn];
if (parent && next_ctx &&
rcu_dereference(next_ctx->parent_ctx) == parent) {
if (!next_ctx)
goto unlock;

parent = rcu_dereference(ctx->parent_ctx);
next_parent = rcu_dereference(next_ctx->parent_ctx);

/* If neither context have a parent context; they cannot be clones. */
if (!parent && !next_parent)
goto unlock;

if (next_parent == ctx || next_ctx == parent || next_parent == parent) {
/*
* Looks like the two contexts are clones, so we might be
* able to optimize the context switch. We lock both
Expand Down Expand Up @@ -2290,6 +2319,7 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn,
raw_spin_unlock(&next_ctx->lock);
raw_spin_unlock(&ctx->lock);
}
unlock:
rcu_read_unlock();

if (do_switch) {
Expand Down Expand Up @@ -7136,7 +7166,6 @@ SYSCALL_DEFINE5(perf_event_open,
}

perf_install_in_context(ctx, event, event->cpu);
++ctx->generation;
perf_unpin_context(ctx);
mutex_unlock(&ctx->mutex);

Expand Down Expand Up @@ -7219,7 +7248,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
WARN_ON_ONCE(ctx->parent_ctx);
mutex_lock(&ctx->mutex);
perf_install_in_context(ctx, event, cpu);
++ctx->generation;
perf_unpin_context(ctx);
mutex_unlock(&ctx->mutex);

Expand Down

0 comments on commit 5a3126d

Please sign in to comment.