Skip to content

Commit

Permalink
perf/x86/intel: Use context switch callback to flush LBR stack
Browse files Browse the repository at this point in the history
Previous commit introduces context switch callback, its function
overlaps with the flush branch stack callback. So we can use the
context switch callback to flush LBR stack.

This patch adds code that uses the flush branch callback to
flush the LBR stack when task is being scheduled in. The callback
is enabled only when there are events use the LBR hardware. This
patch also removes all old flush branch stack code.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: eranian@google.com
Cc: jolsa@redhat.com
Link: http://lkml.kernel.org/r/1415156173-10035-4-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Yan, Zheng authored and Ingo Molnar committed Feb 18, 2015
1 parent ba53250 commit 2a0ad3b
Show file tree
Hide file tree
Showing 6 changed files with 30 additions and 99 deletions.
7 changes: 0 additions & 7 deletions arch/x86/kernel/cpu/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -1920,12 +1920,6 @@ static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
x86_pmu.sched_task(ctx, sched_in);
}

static void x86_pmu_flush_branch_stack(void)
{
if (x86_pmu.flush_branch_stack)
x86_pmu.flush_branch_stack();
}

void perf_check_microcode(void)
{
if (x86_pmu.check_microcode)
Expand Down Expand Up @@ -1955,7 +1949,6 @@ static struct pmu pmu = {
.commit_txn = x86_pmu_commit_txn,

.event_idx = x86_pmu_event_idx,
.flush_branch_stack = x86_pmu_flush_branch_stack,
.sched_task = x86_pmu_sched_task,
};

Expand Down
3 changes: 2 additions & 1 deletion arch/x86/kernel/cpu/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,6 @@ struct x86_pmu {
void (*cpu_dead)(int cpu);

void (*check_microcode)(void);
void (*flush_branch_stack)(void);
void (*sched_task)(struct perf_event_context *ctx,
bool sched_in);

Expand Down Expand Up @@ -733,6 +732,8 @@ void intel_pmu_pebs_disable_all(void);

void intel_ds_init(void);

void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);

void intel_pmu_lbr_reset(void);

void intel_pmu_lbr_enable(struct perf_event *event);
Expand Down
14 changes: 1 addition & 13 deletions arch/x86/kernel/cpu/perf_event_intel.c
Original file line number Diff line number Diff line change
Expand Up @@ -2044,18 +2044,6 @@ static void intel_pmu_cpu_dying(int cpu)
fini_debug_store_on_cpu(cpu);
}

static void intel_pmu_flush_branch_stack(void)
{
/*
* Intel LBR does not tag entries with the
* PID of the current task, then we need to
* flush it on ctxsw
* For now, we simply reset it
*/
if (x86_pmu.lbr_nr)
intel_pmu_lbr_reset();
}

PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");

PMU_FORMAT_ATTR(ldlat, "config1:0-15");
Expand Down Expand Up @@ -2107,7 +2095,7 @@ static __initconst const struct x86_pmu intel_pmu = {
.cpu_starting = intel_pmu_cpu_starting,
.cpu_dying = intel_pmu_cpu_dying,
.guest_get_msrs = intel_guest_get_msrs,
.flush_branch_stack = intel_pmu_flush_branch_stack,
.sched_task = intel_pmu_lbr_sched_task,
};

static __init void intel_clovertown_quirk(void)
Expand Down
27 changes: 27 additions & 0 deletions arch/x86/kernel/cpu/perf_event_intel_lbr.c
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,31 @@ void intel_pmu_lbr_reset(void)
intel_pmu_lbr_reset_64();
}

void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);

if (!x86_pmu.lbr_nr)
return;

/*
* When sampling the branck stack in system-wide, it may be
* necessary to flush the stack on context switch. This happens
* when the branch stack does not tag its entries with the pid
* of the current task. Otherwise it becomes impossible to
* associate a branch entry with a task. This ambiguity is more
* likely to appear when the branch stack supports priv level
* filtering and the user sets it to monitor only at the user
* level (which could be a useful measurement in system-wide
* mode). In that case, the risk is high of having a branch
* stack with branch from multiple tasks.
*/
if (sched_in) {
intel_pmu_lbr_reset();
cpuc->lbr_context = ctx;
}
}

void intel_pmu_lbr_enable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
Expand All @@ -195,6 +220,7 @@ void intel_pmu_lbr_enable(struct perf_event *event)
cpuc->br_sel = event->hw.branch_reg.reg;

cpuc->lbr_users++;
perf_sched_cb_inc(event->ctx->pmu);
}

void intel_pmu_lbr_disable(struct perf_event *event)
Expand All @@ -206,6 +232,7 @@ void intel_pmu_lbr_disable(struct perf_event *event)

cpuc->lbr_users--;
WARN_ON_ONCE(cpuc->lbr_users < 0);
perf_sched_cb_dec(event->ctx->pmu);

if (cpuc->enabled && !cpuc->lbr_users) {
__intel_pmu_lbr_disable();
Expand Down
1 change: 0 additions & 1 deletion include/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,6 @@ struct perf_event_context {
u64 generation;
int pin_count;
int nr_cgroups; /* cgroup evts */
int nr_branch_stack; /* branch_stack evt */
struct rcu_head rcu_head;

struct delayed_work orphans_remove;
Expand Down
77 changes: 0 additions & 77 deletions kernel/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@ enum event_type_t {
*/
struct static_key_deferred perf_sched_events __read_mostly;
static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
static DEFINE_PER_CPU(atomic_t, perf_branch_stack_events);
static DEFINE_PER_CPU(int, perf_sched_cb_usages);

static atomic_t nr_mmap_events __read_mostly;
Expand Down Expand Up @@ -1240,9 +1239,6 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
if (is_cgroup_event(event))
ctx->nr_cgroups++;

if (has_branch_stack(event))
ctx->nr_branch_stack++;

list_add_rcu(&event->event_entry, &ctx->event_list);
ctx->nr_events++;
if (event->attr.inherit_stat)
Expand Down Expand Up @@ -1409,9 +1405,6 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
cpuctx->cgrp = NULL;
}

if (has_branch_stack(event))
ctx->nr_branch_stack--;

ctx->nr_events--;
if (event->attr.inherit_stat)
ctx->nr_stat--;
Expand Down Expand Up @@ -2808,64 +2801,6 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx,
perf_ctx_unlock(cpuctx, ctx);
}

/*
* When sampling the branck stack in system-wide, it may be necessary
* to flush the stack on context switch. This happens when the branch
* stack does not tag its entries with the pid of the current task.
* Otherwise it becomes impossible to associate a branch entry with a
* task. This ambiguity is more likely to appear when the branch stack
* supports priv level filtering and the user sets it to monitor only
* at the user level (which could be a useful measurement in system-wide
* mode). In that case, the risk is high of having a branch stack with
* branch from multiple tasks. Flushing may mean dropping the existing
* entries or stashing them somewhere in the PMU specific code layer.
*
* This function provides the context switch callback to the lower code
* layer. It is invoked ONLY when there is at least one system-wide context
* with at least one active event using taken branch sampling.
*/
static void perf_branch_stack_sched_in(struct task_struct *prev,
struct task_struct *task)
{
struct perf_cpu_context *cpuctx;
struct pmu *pmu;
unsigned long flags;

/* no need to flush branch stack if not changing task */
if (prev == task)
return;

local_irq_save(flags);

rcu_read_lock();

list_for_each_entry_rcu(pmu, &pmus, entry) {
cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);

/*
* check if the context has at least one
* event using PERF_SAMPLE_BRANCH_STACK
*/
if (cpuctx->ctx.nr_branch_stack > 0
&& pmu->flush_branch_stack) {

perf_ctx_lock(cpuctx, cpuctx->task_ctx);

perf_pmu_disable(pmu);

pmu->flush_branch_stack();

perf_pmu_enable(pmu);

perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
}
}

rcu_read_unlock();

local_irq_restore(flags);
}

/*
* Called from scheduler to add the events of the current task
* with interrupts disabled.
Expand Down Expand Up @@ -2898,10 +2833,6 @@ void __perf_event_task_sched_in(struct task_struct *prev,
if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
perf_cgroup_sched_in(prev, task);

/* check for system-wide branch_stack events */
if (atomic_read(this_cpu_ptr(&perf_branch_stack_events)))
perf_branch_stack_sched_in(prev, task);

if (__this_cpu_read(perf_sched_cb_usages))
perf_pmu_sched_task(prev, task, true);
}
Expand Down Expand Up @@ -3480,10 +3411,6 @@ static void unaccount_event_cpu(struct perf_event *event, int cpu)
if (event->parent)
return;

if (has_branch_stack(event)) {
if (!(event->attach_state & PERF_ATTACH_TASK))
atomic_dec(&per_cpu(perf_branch_stack_events, cpu));
}
if (is_cgroup_event(event))
atomic_dec(&per_cpu(perf_cgroup_events, cpu));
}
Expand Down Expand Up @@ -7139,10 +7066,6 @@ static void account_event_cpu(struct perf_event *event, int cpu)
if (event->parent)
return;

if (has_branch_stack(event)) {
if (!(event->attach_state & PERF_ATTACH_TASK))
atomic_inc(&per_cpu(perf_branch_stack_events, cpu));
}
if (is_cgroup_event(event))
atomic_inc(&per_cpu(perf_cgroup_events, cpu));
}
Expand Down

0 comments on commit 2a0ad3b

Please sign in to comment.