Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 147559
b: refs/heads/master
c: c93f766
h: refs/heads/master
i:
  147557: cdf4c3c
  147555: 35bcd69
  147551: 092c7f3
v: v3
  • Loading branch information
Paul Mackerras authored and Ingo Molnar committed May 28, 2009
1 parent eba4602 commit ba41e52
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 63 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 63299f057fbce47da895e8865cba7e9c3eb01a20
refs/heads/master: c93f7669098eb97c5376e5396e3dfb734c17df4f
5 changes: 3 additions & 2 deletions trunk/include/linux/perf_counter.h
Original file line number Diff line number Diff line change
Expand Up @@ -541,8 +541,9 @@ struct perf_counter_context {
* been cloned (inherited) from a common ancestor.
*/
struct perf_counter_context *parent_ctx;
u32 parent_gen;
u32 generation;
u64 parent_gen;
u64 generation;
struct rcu_head rcu_head;
};

/**
Expand Down
205 changes: 145 additions & 60 deletions trunk/kernel/perf_counter.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,12 +103,22 @@ static void get_ctx(struct perf_counter_context *ctx)
atomic_inc(&ctx->refcount);
}

static void free_ctx(struct rcu_head *head)
{
struct perf_counter_context *ctx;

ctx = container_of(head, struct perf_counter_context, rcu_head);
kfree(ctx);
}

static void put_ctx(struct perf_counter_context *ctx)
{
if (atomic_dec_and_test(&ctx->refcount)) {
if (ctx->parent_ctx)
put_ctx(ctx->parent_ctx);
kfree(ctx);
if (ctx->task)
put_task_struct(ctx->task);
call_rcu(&ctx->rcu_head, free_ctx);
}
}

Expand Down Expand Up @@ -211,22 +221,6 @@ group_sched_out(struct perf_counter *group_counter,
cpuctx->exclusive = 0;
}

/*
* Mark this context as not being a clone of another.
* Called when counters are added to or removed from this context.
* We also increment our generation number so that anything that
* was cloned from this context before this will not match anything
* cloned from this context after this.
*/
static void unclone_ctx(struct perf_counter_context *ctx)
{
++ctx->generation;
if (!ctx->parent_ctx)
return;
put_ctx(ctx->parent_ctx);
ctx->parent_ctx = NULL;
}

/*
* Cross CPU call to remove a performance counter
*
Expand Down Expand Up @@ -281,13 +275,19 @@ static void __perf_counter_remove_from_context(void *info)
*
* CPU counters are removed with a smp call. For task counters we only
* call when the task is on a CPU.
*
* If counter->ctx is a cloned context, callers must make sure that
* every task struct that counter->ctx->task could possibly point to
* remains valid. This is OK when called from perf_release since
* that only calls us on the top-level context, which can't be a clone.
* When called from perf_counter_exit_task, it's OK because the
* context has been detached from its task.
*/
static void perf_counter_remove_from_context(struct perf_counter *counter)
{
struct perf_counter_context *ctx = counter->ctx;
struct task_struct *task = ctx->task;

unclone_ctx(ctx);
if (!task) {
/*
* Per cpu counters are removed via an smp call and
Expand Down Expand Up @@ -410,6 +410,16 @@ static void __perf_counter_disable(void *info)

/*
* Disable a counter.
*
* If counter->ctx is a cloned context, callers must make sure that
* every task struct that counter->ctx->task could possibly point to
* remains valid. This condition is satisifed when called through
* perf_counter_for_each_child or perf_counter_for_each because they
* hold the top-level counter's child_mutex, so any descendant that
* goes to exit will block in sync_child_counter.
* When called from perf_pending_counter it's OK because counter->ctx
* is the current context on this CPU and preemption is disabled,
* hence we can't get into perf_counter_task_sched_out for this context.
*/
static void perf_counter_disable(struct perf_counter *counter)
{
Expand Down Expand Up @@ -794,6 +804,12 @@ static void __perf_counter_enable(void *info)

/*
* Enable a counter.
*
* If counter->ctx is a cloned context, callers must make sure that
* every task struct that counter->ctx->task could possibly point to
* remains valid. This condition is satisfied when called through
* perf_counter_for_each_child or perf_counter_for_each as described
* for perf_counter_disable.
*/
static void perf_counter_enable(struct perf_counter *counter)
{
Expand Down Expand Up @@ -923,7 +939,9 @@ void perf_counter_task_sched_out(struct task_struct *task,
struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
struct perf_counter_context *ctx = task->perf_counter_ctxp;
struct perf_counter_context *next_ctx;
struct perf_counter_context *parent;
struct pt_regs *regs;
int do_switch = 1;

regs = task_pt_regs(task);
perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs, 0);
Expand All @@ -932,18 +950,39 @@ void perf_counter_task_sched_out(struct task_struct *task,
return;

update_context_time(ctx);

rcu_read_lock();
parent = rcu_dereference(ctx->parent_ctx);
next_ctx = next->perf_counter_ctxp;
if (next_ctx && context_equiv(ctx, next_ctx)) {
task->perf_counter_ctxp = next_ctx;
next->perf_counter_ctxp = ctx;
ctx->task = next;
next_ctx->task = task;
return;
if (parent && next_ctx &&
rcu_dereference(next_ctx->parent_ctx) == parent) {
/*
* Looks like the two contexts are clones, so we might be
* able to optimize the context switch. We lock both
* contexts and check that they are clones under the
* lock (including re-checking that neither has been
* uncloned in the meantime). It doesn't matter which
* order we take the locks because no other cpu could
* be trying to lock both of these tasks.
*/
spin_lock(&ctx->lock);
spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING);
if (context_equiv(ctx, next_ctx)) {
task->perf_counter_ctxp = next_ctx;
next->perf_counter_ctxp = ctx;
ctx->task = next;
next_ctx->task = task;
do_switch = 0;
}
spin_unlock(&next_ctx->lock);
spin_unlock(&ctx->lock);
}
rcu_read_unlock();

__perf_counter_sched_out(ctx, cpuctx);

cpuctx->task_ctx = NULL;
if (do_switch) {
__perf_counter_sched_out(ctx, cpuctx);
cpuctx->task_ctx = NULL;
}
}

static void __perf_counter_task_sched_out(struct perf_counter_context *ctx)
Expand Down Expand Up @@ -1215,18 +1254,13 @@ __perf_counter_init_context(struct perf_counter_context *ctx,
ctx->task = task;
}

static void put_context(struct perf_counter_context *ctx)
{
if (ctx->task)
put_task_struct(ctx->task);
}

static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
{
struct perf_cpu_context *cpuctx;
struct perf_counter_context *ctx;
struct perf_counter_context *tctx;
struct perf_counter_context *parent_ctx;
struct task_struct *task;
int err;

/*
* If cpu is not a wildcard then this is a percpu counter:
Expand All @@ -1249,6 +1283,7 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)

cpuctx = &per_cpu(perf_cpu_context, cpu);
ctx = &cpuctx->ctx;
get_ctx(ctx);

return ctx;
}
Expand All @@ -1265,45 +1300,86 @@ static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
if (!task)
return ERR_PTR(-ESRCH);

/*
* Can't attach counters to a dying task.
*/
err = -ESRCH;
if (task->flags & PF_EXITING)
goto errout;

/* Reuse ptrace permission checks for now. */
if (!ptrace_may_access(task, PTRACE_MODE_READ)) {
put_task_struct(task);
return ERR_PTR(-EACCES);
err = -EACCES;
if (!ptrace_may_access(task, PTRACE_MODE_READ))
goto errout;

retry_lock:
rcu_read_lock();
retry:
ctx = rcu_dereference(task->perf_counter_ctxp);
if (ctx) {
/*
* If this context is a clone of another, it might
* get swapped for another underneath us by
* perf_counter_task_sched_out, though the
* rcu_read_lock() protects us from any context
* getting freed. Lock the context and check if it
* got swapped before we could get the lock, and retry
* if so. If we locked the right context, then it
* can't get swapped on us any more and we can
* unclone it if necessary.
* Once it's not a clone things will be stable.
*/
spin_lock_irq(&ctx->lock);
if (ctx != rcu_dereference(task->perf_counter_ctxp)) {
spin_unlock_irq(&ctx->lock);
goto retry;
}
parent_ctx = ctx->parent_ctx;
if (parent_ctx) {
put_ctx(parent_ctx);
ctx->parent_ctx = NULL; /* no longer a clone */
}
++ctx->generation;
/*
* Get an extra reference before dropping the lock so that
* this context won't get freed if the task exits.
*/
get_ctx(ctx);
spin_unlock_irq(&ctx->lock);
}
rcu_read_unlock();

ctx = task->perf_counter_ctxp;
if (!ctx) {
ctx = kmalloc(sizeof(struct perf_counter_context), GFP_KERNEL);
if (!ctx) {
put_task_struct(task);
return ERR_PTR(-ENOMEM);
}
err = -ENOMEM;
if (!ctx)
goto errout;
__perf_counter_init_context(ctx, task);
/*
* Make sure other cpus see correct values for *ctx
* once task->perf_counter_ctxp is visible to them.
*/
smp_wmb();
tctx = cmpxchg(&task->perf_counter_ctxp, NULL, ctx);
if (tctx) {
get_ctx(ctx);
if (cmpxchg(&task->perf_counter_ctxp, NULL, ctx)) {
/*
* We raced with some other task; use
* the context they set.
*/
kfree(ctx);
ctx = tctx;
goto retry_lock;
}
get_task_struct(task);
}

put_task_struct(task);
return ctx;

errout:
put_task_struct(task);
return ERR_PTR(err);
}

static void free_counter_rcu(struct rcu_head *head)
{
struct perf_counter *counter;

counter = container_of(head, struct perf_counter, rcu_head);
put_ctx(counter->ctx);
kfree(counter);
}

Expand All @@ -1324,6 +1400,7 @@ static void free_counter(struct perf_counter *counter)
if (counter->destroy)
counter->destroy(counter);

put_ctx(counter->ctx);
call_rcu(&counter->rcu_head, free_counter_rcu);
}

Expand All @@ -1347,7 +1424,6 @@ static int perf_release(struct inode *inode, struct file *file)
put_task_struct(counter->owner);

free_counter(counter);
put_context(ctx);

return 0;
}
Expand Down Expand Up @@ -1437,6 +1513,12 @@ static void perf_counter_for_each_sibling(struct perf_counter *counter,
mutex_unlock(&ctx->mutex);
}

/*
* Holding the top-level counter's child_mutex means that any
* descendant process that has inherited this counter will block
* in sync_child_counter if it goes to exit, thus satisfying the
* task existence requirements of perf_counter_enable/disable.
*/
static void perf_counter_for_each_child(struct perf_counter *counter,
void (*func)(struct perf_counter *))
{
Expand Down Expand Up @@ -3124,8 +3206,6 @@ perf_counter_alloc(struct perf_counter_hw_event *hw_event,
counter->ctx = ctx;
counter->oncpu = -1;

get_ctx(ctx);

counter->state = PERF_COUNTER_STATE_INACTIVE;
if (hw_event->disabled)
counter->state = PERF_COUNTER_STATE_OFF;
Expand Down Expand Up @@ -3290,7 +3370,7 @@ SYSCALL_DEFINE5(perf_counter_open,
kfree(counter);

err_put_context:
put_context(ctx);
put_ctx(ctx);

goto out_fput;
}
Expand Down Expand Up @@ -3322,6 +3402,7 @@ inherit_counter(struct perf_counter *parent_counter,
group_leader, GFP_KERNEL);
if (IS_ERR(child_counter))
return child_counter;
get_ctx(child_ctx);

/*
* Make the child state follow the state of the parent counter,
Expand Down Expand Up @@ -3439,11 +3520,6 @@ __perf_counter_exit_task(struct task_struct *child,

/*
* When a child task exits, feed back counter values to parent counters.
*
* Note: we may be running in child context, but the PID is not hashed
* anymore so new counters will not be added.
* (XXX not sure that is true when we get called from flush_old_exec.
* -- paulus)
*/
void perf_counter_exit_task(struct task_struct *child)
{
Expand All @@ -3458,7 +3534,15 @@ void perf_counter_exit_task(struct task_struct *child)

local_irq_save(flags);
__perf_counter_task_sched_out(child_ctx);

/*
* Take the context lock here so that if find_get_context is
* reading child->perf_counter_ctxp, we wait until it has
* incremented the context's refcount before we do put_ctx below.
*/
spin_lock(&child_ctx->lock);
child->perf_counter_ctxp = NULL;
spin_unlock(&child_ctx->lock);
local_irq_restore(flags);

mutex_lock(&child_ctx->mutex);
Expand Down Expand Up @@ -3513,6 +3597,7 @@ int perf_counter_init_task(struct task_struct *child)

__perf_counter_init_context(child_ctx, child);
child->perf_counter_ctxp = child_ctx;
get_task_struct(child);

/*
* Lock the parent list. No need to lock the child - not PID
Expand Down

0 comments on commit ba41e52

Please sign in to comment.