Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 377256
b: refs/heads/master
c: 9bb5d40
h: refs/heads/master
v: v3
  • Loading branch information
Peter Zijlstra authored and Ingo Molnar committed Jun 19, 2013
1 parent b6339d2 commit d67413e
Show file tree
Hide file tree
Showing 3 changed files with 160 additions and 73 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 26cb63ad11e04047a64309362674bcbbd6a6f246
refs/heads/master: 9bb5d40cd93c9dd4be74834b1dcb1ba03629716b
228 changes: 157 additions & 71 deletions trunk/kernel/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -196,9 +196,6 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
static void update_context_time(struct perf_event_context *ctx);
static u64 perf_event_time(struct perf_event *event);

static void ring_buffer_attach(struct perf_event *event,
struct ring_buffer *rb);

void __weak perf_event_print_debug(void) { }

extern __weak const char *perf_pmu_name(void)
Expand Down Expand Up @@ -2917,7 +2914,8 @@ static void free_event_rcu(struct rcu_head *head)
kfree(event);
}

static bool ring_buffer_put(struct ring_buffer *rb);
static void ring_buffer_put(struct ring_buffer *rb);
static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb);

static void free_event(struct perf_event *event)
{
Expand All @@ -2942,15 +2940,30 @@ static void free_event(struct perf_event *event)
if (has_branch_stack(event)) {
static_key_slow_dec_deferred(&perf_sched_events);
/* is system-wide event */
if (!(event->attach_state & PERF_ATTACH_TASK))
if (!(event->attach_state & PERF_ATTACH_TASK)) {
atomic_dec(&per_cpu(perf_branch_stack_events,
event->cpu));
}
}
}

if (event->rb) {
ring_buffer_put(event->rb);
event->rb = NULL;
struct ring_buffer *rb;

/*
* Can happen when we close an event with re-directed output.
*
* Since we have a 0 refcount, perf_mmap_close() will skip
* over us; possibly making our ring_buffer_put() the last.
*/
mutex_lock(&event->mmap_mutex);
rb = event->rb;
if (rb) {
rcu_assign_pointer(event->rb, NULL);
ring_buffer_detach(event, rb);
ring_buffer_put(rb); /* could be last */
}
mutex_unlock(&event->mmap_mutex);
}

if (is_cgroup_event(event))
Expand Down Expand Up @@ -3188,30 +3201,13 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
unsigned int events = POLL_HUP;

/*
* Race between perf_event_set_output() and perf_poll(): perf_poll()
* grabs the rb reference but perf_event_set_output() overrides it.
* Here is the timeline for two threads T1, T2:
* t0: T1, rb = rcu_dereference(event->rb)
* t1: T2, old_rb = event->rb
* t2: T2, event->rb = new rb
* t3: T2, ring_buffer_detach(old_rb)
* t4: T1, ring_buffer_attach(rb1)
* t5: T1, poll_wait(event->waitq)
*
* To avoid this problem, we grab mmap_mutex in perf_poll()
* thereby ensuring that the assignment of the new ring buffer
* and the detachment of the old buffer appear atomic to perf_poll()
* Pin the event->rb by taking event->mmap_mutex; otherwise
* perf_event_set_output() can swizzle our rb and make us miss wakeups.
*/
mutex_lock(&event->mmap_mutex);

rcu_read_lock();
rb = rcu_dereference(event->rb);
if (rb) {
ring_buffer_attach(event, rb);
rb = event->rb;
if (rb)
events = atomic_xchg(&rb->poll, 0);
}
rcu_read_unlock();

mutex_unlock(&event->mmap_mutex);

poll_wait(file, &event->waitq, wait);
Expand Down Expand Up @@ -3521,16 +3517,12 @@ static void ring_buffer_attach(struct perf_event *event,
return;

spin_lock_irqsave(&rb->event_lock, flags);
if (!list_empty(&event->rb_entry))
goto unlock;

list_add(&event->rb_entry, &rb->event_list);
unlock:
if (list_empty(&event->rb_entry))
list_add(&event->rb_entry, &rb->event_list);
spin_unlock_irqrestore(&rb->event_lock, flags);
}

static void ring_buffer_detach(struct perf_event *event,
struct ring_buffer *rb)
static void ring_buffer_detach(struct perf_event *event, struct ring_buffer *rb)
{
unsigned long flags;

Expand All @@ -3549,13 +3541,10 @@ static void ring_buffer_wakeup(struct perf_event *event)

rcu_read_lock();
rb = rcu_dereference(event->rb);
if (!rb)
goto unlock;

list_for_each_entry_rcu(event, &rb->event_list, rb_entry)
wake_up_all(&event->waitq);

unlock:
if (rb) {
list_for_each_entry_rcu(event, &rb->event_list, rb_entry)
wake_up_all(&event->waitq);
}
rcu_read_unlock();
}

Expand All @@ -3582,52 +3571,115 @@ static struct ring_buffer *ring_buffer_get(struct perf_event *event)
return rb;
}

static bool ring_buffer_put(struct ring_buffer *rb)
static void ring_buffer_put(struct ring_buffer *rb)
{
struct perf_event *event, *n;
unsigned long flags;

if (!atomic_dec_and_test(&rb->refcount))
return false;
return;

spin_lock_irqsave(&rb->event_lock, flags);
list_for_each_entry_safe(event, n, &rb->event_list, rb_entry) {
list_del_init(&event->rb_entry);
wake_up_all(&event->waitq);
}
spin_unlock_irqrestore(&rb->event_lock, flags);
WARN_ON_ONCE(!list_empty(&rb->event_list));

call_rcu(&rb->rcu_head, rb_free_rcu);
return true;
}

static void perf_mmap_open(struct vm_area_struct *vma)
{
struct perf_event *event = vma->vm_file->private_data;

atomic_inc(&event->mmap_count);
atomic_inc(&event->rb->mmap_count);
}

/*
* A buffer can be mmap()ed multiple times; either directly through the same
* event, or through other events by use of perf_event_set_output().
*
* In order to undo the VM accounting done by perf_mmap() we need to destroy
* the buffer here, where we still have a VM context. This means we need
* to detach all events redirecting to us.
*/
static void perf_mmap_close(struct vm_area_struct *vma)
{
struct perf_event *event = vma->vm_file->private_data;

if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
struct ring_buffer *rb = event->rb;
struct user_struct *mmap_user = rb->mmap_user;
int mmap_locked = rb->mmap_locked;
unsigned long size = perf_data_size(rb);
struct ring_buffer *rb = event->rb;
struct user_struct *mmap_user = rb->mmap_user;
int mmap_locked = rb->mmap_locked;
unsigned long size = perf_data_size(rb);

rcu_assign_pointer(event->rb, NULL);
ring_buffer_detach(event, rb);
mutex_unlock(&event->mmap_mutex);
atomic_dec(&rb->mmap_count);

if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
return;

/* Detach current event from the buffer. */
rcu_assign_pointer(event->rb, NULL);
ring_buffer_detach(event, rb);
mutex_unlock(&event->mmap_mutex);

/* If there's still other mmap()s of this buffer, we're done. */
if (atomic_read(&rb->mmap_count)) {
ring_buffer_put(rb); /* can't be last */
return;
}

if (ring_buffer_put(rb)) {
atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm);
vma->vm_mm->pinned_vm -= mmap_locked;
free_uid(mmap_user);
/*
* No other mmap()s, detach from all other events that might redirect
* into the now unreachable buffer. Somewhat complicated by the
* fact that rb::event_lock otherwise nests inside mmap_mutex.
*/
again:
rcu_read_lock();
list_for_each_entry_rcu(event, &rb->event_list, rb_entry) {
if (!atomic_long_inc_not_zero(&event->refcount)) {
/*
* This event is en-route to free_event() which will
* detach it and remove it from the list.
*/
continue;
}
rcu_read_unlock();

mutex_lock(&event->mmap_mutex);
/*
* Check we didn't race with perf_event_set_output() which can
* swizzle the rb from under us while we were waiting to
* acquire mmap_mutex.
*
* If we find a different rb; ignore this event, a next
* iteration will no longer find it on the list. We have to
* still restart the iteration to make sure we're not now
* iterating the wrong list.
*/
if (event->rb == rb) {
rcu_assign_pointer(event->rb, NULL);
ring_buffer_detach(event, rb);
ring_buffer_put(rb); /* can't be last, we still have one */
}
mutex_unlock(&event->mmap_mutex);
put_event(event);

/*
* Restart the iteration; either we're on the wrong list or
* destroyed its integrity by doing a deletion.
*/
goto again;
}
rcu_read_unlock();

/*
* It could be there's still a few 0-ref events on the list; they'll
* get cleaned up by free_event() -- they'll also still have their
* ref on the rb and will free it whenever they are done with it.
*
* Aside from that, this buffer is 'fully' detached and unmapped,
* undo the VM accounting.
*/

atomic_long_sub((size >> PAGE_SHIFT) + 1, &mmap_user->locked_vm);
vma->vm_mm->pinned_vm -= mmap_locked;
free_uid(mmap_user);

ring_buffer_put(rb); /* could be last */
}

static const struct vm_operations_struct perf_mmap_vmops = {
Expand Down Expand Up @@ -3677,10 +3729,24 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
return -EINVAL;

WARN_ON_ONCE(event->ctx->parent_ctx);
again:
mutex_lock(&event->mmap_mutex);
if (event->rb) {
if (event->rb->nr_pages != nr_pages)
if (event->rb->nr_pages != nr_pages) {
ret = -EINVAL;
goto unlock;
}

if (!atomic_inc_not_zero(&event->rb->mmap_count)) {
/*
* Raced against perf_mmap_close() through
* perf_event_set_output(). Try again, hope for better
* luck.
*/
mutex_unlock(&event->mmap_mutex);
goto again;
}

goto unlock;
}

Expand Down Expand Up @@ -3722,12 +3788,14 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
goto unlock;
}

atomic_set(&rb->mmap_count, 1);
rb->mmap_locked = extra;
rb->mmap_user = get_current_user();

atomic_long_add(user_extra, &user->locked_vm);
vma->vm_mm->pinned_vm += extra;

ring_buffer_attach(event, rb);
rcu_assign_pointer(event->rb, rb);

perf_event_update_userpage(event);
Expand All @@ -3737,6 +3805,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
atomic_inc(&event->mmap_count);
mutex_unlock(&event->mmap_mutex);

/*
* Since pinned accounting is per vm we cannot allow fork() to copy our
* vma.
*/
vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
vma->vm_ops = &perf_mmap_vmops;

Expand Down Expand Up @@ -6415,23 +6487,37 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
if (atomic_read(&event->mmap_count))
goto unlock;

old_rb = event->rb;

if (output_event) {
/* get the rb we want to redirect to */
rb = ring_buffer_get(output_event);
if (!rb)
goto unlock;
}

old_rb = event->rb;
rcu_assign_pointer(event->rb, rb);
if (old_rb)
ring_buffer_detach(event, old_rb);

if (rb)
ring_buffer_attach(event, rb);

rcu_assign_pointer(event->rb, rb);

if (old_rb) {
ring_buffer_put(old_rb);
/*
* Since we detached before setting the new rb, so that we
* could attach the new rb, we could have missed a wakeup.
* Provide it now.
*/
wake_up_all(&event->waitq);
}

ret = 0;
unlock:
mutex_unlock(&event->mmap_mutex);

if (old_rb)
ring_buffer_put(old_rb);
out:
return ret;
}
Expand Down
3 changes: 2 additions & 1 deletion trunk/kernel/events/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ struct ring_buffer {
spinlock_t event_lock;
struct list_head event_list;

int mmap_locked;
atomic_t mmap_count;
unsigned long mmap_locked;
struct user_struct *mmap_user;

struct perf_event_mmap_page *user_page;
Expand Down

0 comments on commit d67413e

Please sign in to comment.