Skip to content

Commit

Permalink
perfcounters: make context switch and migration software counters wor…
Browse files Browse the repository at this point in the history
…k again

Jaswinder Singh Rajput reported that commit 23a185c caused the
context switch and migration software counters to report zero always.
With that commit, the software counters only count events that occur
between sched-in and sched-out for a task.  This is necessary for the
counter enable/disable prctls and ioctls to work.  However, the
context switch and migration counts are incremented after sched-out
for one task and before sched-in for the next.  Since the increment
doesn't occur while a task is scheduled in (as far as the software
counters are concerned) it doesn't count towards any counter.

Thus the context switch and migration counters need to count events
that occur at any time, provided the counter is enabled, not just
those that occur while the task is scheduled in (from the perf_counter
subsystem's point of view).  The problem though is that the software
counter code can't tell the difference between being enabled and being
scheduled in, and between being disabled and being scheduled out,
since we use the one pair of enable/disable entry points for both.
That is, the high-level disable operation simply arranges for the
counter to not be scheduled in any more, and the high-level enable
operation arranges for it to be scheduled in again.

One way to solve this would be to have sched_in/out operations in the
hw_perf_counter_ops struct as well as enable/disable.  However, this
takes a simpler approach: it adds a 'prev_state' field to the
perf_counter struct that allows a counter's enable method to know
whether the counter was previously disabled or just inactive
(scheduled out), and therefore whether the enable method is being
called as a result of a high-level enable or a schedule-in operation.

This then allows the context switch, migration and page fault counters
to reset their hw.prev_count value in their enable functions only if
they are called as a result of a high-level enable operation.
Although page faults would normally only occur while the counter is
scheduled in, this changes the page fault counter code too in case
there are ever circumstances where page faults get counted against a
task while its counters are not scheduled in.

Reported-by: Jaswinder Singh Rajput <jaswinder@kernel.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
  • Loading branch information
Paul Mackerras authored and Ingo Molnar committed Feb 13, 2009
1 parent b1864e9 commit c07c99b
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 6 deletions.
1 change: 1 addition & 0 deletions include/linux/perf_counter.h
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ struct perf_counter {
const struct hw_perf_counter_ops *hw_ops;

enum perf_counter_active_state state;
enum perf_counter_active_state prev_state;
atomic64_t count;

struct perf_counter_hw_event hw_event;
Expand Down
21 changes: 15 additions & 6 deletions kernel/perf_counter.c
Original file line number Diff line number Diff line change
Expand Up @@ -444,6 +444,7 @@ static void __perf_install_in_context(void *info)

list_add_counter(counter, ctx);
ctx->nr_counters++;
counter->prev_state = PERF_COUNTER_STATE_OFF;

/*
* Don't put the counter on if it is disabled or if
Expand Down Expand Up @@ -562,6 +563,7 @@ static void __perf_counter_enable(void *info)
curr_rq_lock_irq_save(&flags);
spin_lock(&ctx->lock);

counter->prev_state = counter->state;
if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
goto unlock;
counter->state = PERF_COUNTER_STATE_INACTIVE;
Expand Down Expand Up @@ -733,13 +735,15 @@ group_sched_in(struct perf_counter *group_counter,
if (ret)
return ret < 0 ? ret : 0;

group_counter->prev_state = group_counter->state;
if (counter_sched_in(group_counter, cpuctx, ctx, cpu))
return -EAGAIN;

/*
* Schedule in siblings as one group (if any):
*/
list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
counter->prev_state = counter->state;
if (counter_sched_in(counter, cpuctx, ctx, cpu)) {
partial_group = counter;
goto group_error;
Expand Down Expand Up @@ -1398,9 +1402,9 @@ static void task_clock_perf_counter_read(struct perf_counter *counter)

static int task_clock_perf_counter_enable(struct perf_counter *counter)
{
u64 now = task_clock_perf_counter_val(counter, 0);

atomic64_set(&counter->hw.prev_count, now);
if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
atomic64_set(&counter->hw.prev_count,
task_clock_perf_counter_val(counter, 0));

return 0;
}
Expand Down Expand Up @@ -1455,7 +1459,8 @@ static void page_faults_perf_counter_read(struct perf_counter *counter)

static int page_faults_perf_counter_enable(struct perf_counter *counter)
{
atomic64_set(&counter->hw.prev_count, get_page_faults(counter));
if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
atomic64_set(&counter->hw.prev_count, get_page_faults(counter));
return 0;
}

Expand Down Expand Up @@ -1501,7 +1506,9 @@ static void context_switches_perf_counter_read(struct perf_counter *counter)

static int context_switches_perf_counter_enable(struct perf_counter *counter)
{
atomic64_set(&counter->hw.prev_count, get_context_switches(counter));
if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
atomic64_set(&counter->hw.prev_count,
get_context_switches(counter));
return 0;
}

Expand Down Expand Up @@ -1547,7 +1554,9 @@ static void cpu_migrations_perf_counter_read(struct perf_counter *counter)

static int cpu_migrations_perf_counter_enable(struct perf_counter *counter)
{
atomic64_set(&counter->hw.prev_count, get_cpu_migrations(counter));
if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
atomic64_set(&counter->hw.prev_count,
get_cpu_migrations(counter));
return 0;
}

Expand Down

0 comments on commit c07c99b

Please sign in to comment.