Skip to content

Commit

Permalink
Merge branch 'perf/urgent' into perf/core
Browse files Browse the repository at this point in the history
Conflicts:
	arch/x86/kernel/apic/hw_nmi.c

Merge reason: Resolve conflict, queue up dependent patch.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
  • Loading branch information
Ingo Molnar committed Nov 26, 2010
2 parents e4e91ac + ee6dcfa commit 6c869e7
Show file tree
Hide file tree
Showing 12 changed files with 144 additions and 46 deletions.
2 changes: 1 addition & 1 deletion arch/x86/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ config X86
select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_IDE
select HAVE_OPROFILE
select HAVE_PERF_EVENTS if (!M386 && !M486)
select HAVE_PERF_EVENTS
select HAVE_IRQ_WORK
select HAVE_IOREMAP_PROT
select HAVE_KPROBES
Expand Down
7 changes: 4 additions & 3 deletions arch/x86/kernel/apic/hw_nmi.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,17 @@
#include <linux/nmi.h>
#include <linux/module.h>

/* For reliability, we're prepared to waste bits here. */
static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;

#ifdef CONFIG_HARDLOCKUP_DETECTOR
u64 hw_nmi_get_sample_period(void)
{
return (u64)(cpu_khz) * 1000 * 60;
}
#endif


/* For reliability, we're prepared to waste bits here. */
static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;

#ifdef arch_trigger_all_cpu_backtrace
void arch_trigger_all_cpu_backtrace(void)
{
Expand Down
20 changes: 20 additions & 0 deletions arch/x86/kernel/cpu/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,20 @@ static void release_pmc_hardware(void) {}

#endif

static bool check_hw_exists(void)
{
u64 val, val_new = 0;
int ret = 0;

val = 0xabcdUL;
ret |= checking_wrmsrl(x86_pmu.perfctr, val);
ret |= rdmsrl_safe(x86_pmu.perfctr, &val_new);
if (ret || val != val_new)
return false;

return true;
}

static void reserve_ds_buffers(void);
static void release_ds_buffers(void);

Expand Down Expand Up @@ -1363,6 +1377,12 @@ void __init init_hw_perf_events(void)

pmu_check_apic();

/* sanity check that the hardware exists or is emulated */
if (!check_hw_exists()) {
pr_cont("Broken PMU hardware detected, software events only.\n");
return;
}

pr_cont("%s PMU driver.\n", x86_pmu.name);

if (x86_pmu.quirks)
Expand Down
2 changes: 2 additions & 0 deletions arch/x86/kernel/entry_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,7 @@ ENDPROC(native_usergs_sysret64)
.endm

/* save partial stack frame */
.pushsection .kprobes.text, "ax"
ENTRY(save_args)
XCPT_FRAME
cld
Expand Down Expand Up @@ -334,6 +335,7 @@ ENTRY(save_args)
ret
CFI_ENDPROC
END(save_args)
.popsection

ENTRY(save_rest)
PARTIAL_FRAME 1 REST_SKIP+8
Expand Down
4 changes: 4 additions & 0 deletions arch/x86/kernel/hw_breakpoint.c
Original file line number Diff line number Diff line change
Expand Up @@ -433,6 +433,10 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
dr6_p = (unsigned long *)ERR_PTR(args->err);
dr6 = *dr6_p;

/* If it's a single step, TRAP bits are random */
if (dr6 & DR_STEP)
return NOTIFY_DONE;

/* Do an early return if no trap bits are set in DR6 */
if ((dr6 & DR_TRAP_BITS) == 0)
return NOTIFY_DONE;
Expand Down
4 changes: 4 additions & 0 deletions include/linux/hw_breakpoint.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ enum bp_type_idx {

#ifdef CONFIG_HAVE_HW_BREAKPOINT

extern int __init init_hw_breakpoint(void);

static inline void hw_breakpoint_init(struct perf_event_attr *attr)
{
memset(attr, 0, sizeof(*attr));
Expand Down Expand Up @@ -108,6 +110,8 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp)

#else /* !CONFIG_HAVE_HW_BREAKPOINT */

static inline int __init init_hw_breakpoint(void) { return 0; }

static inline struct perf_event *
register_user_hw_breakpoint(struct perf_event_attr *attr,
perf_overflow_handler_t triggered,
Expand Down
30 changes: 16 additions & 14 deletions include/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -850,6 +850,7 @@ struct perf_event_context {
int nr_active;
int is_active;
int nr_stat;
int rotate_disable;
atomic_t refcount;
struct task_struct *task;

Expand Down Expand Up @@ -908,20 +909,6 @@ extern int perf_num_counters(void);
extern const char *perf_pmu_name(void);
extern void __perf_event_task_sched_in(struct task_struct *task);
extern void __perf_event_task_sched_out(struct task_struct *task, struct task_struct *next);

extern atomic_t perf_task_events;

static inline void perf_event_task_sched_in(struct task_struct *task)
{
COND_STMT(&perf_task_events, __perf_event_task_sched_in(task));
}

static inline
void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
{
COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next));
}

extern int perf_event_init_task(struct task_struct *child);
extern void perf_event_exit_task(struct task_struct *child);
extern void perf_event_free_task(struct task_struct *task);
Expand Down Expand Up @@ -1030,6 +1017,21 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
__perf_sw_event(event_id, nr, nmi, regs, addr);
}

extern atomic_t perf_task_events;

static inline void perf_event_task_sched_in(struct task_struct *task)
{
COND_STMT(&perf_task_events, __perf_event_task_sched_in(task));
}

static inline
void perf_event_task_sched_out(struct task_struct *task, struct task_struct *next)
{
perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);

COND_STMT(&perf_task_events, __perf_event_task_sched_out(task, next));
}

extern void perf_event_mmap(struct vm_area_struct *vma);
extern struct perf_guest_info_callbacks *perf_guest_cbs;
extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
Expand Down
3 changes: 1 addition & 2 deletions kernel/hw_breakpoint.c
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ static struct pmu perf_breakpoint = {
.read = hw_breakpoint_pmu_read,
};

static int __init init_hw_breakpoint(void)
int __init init_hw_breakpoint(void)
{
unsigned int **task_bp_pinned;
int cpu, err_cpu;
Expand Down Expand Up @@ -655,6 +655,5 @@ static int __init init_hw_breakpoint(void)

return -ENOMEM;
}
core_initcall(init_hw_breakpoint);


4 changes: 3 additions & 1 deletion kernel/irq_work.c
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,9 @@ void irq_work_run(void)
* Clear the BUSY bit and return to the free state if
* no-one else claimed it meanwhile.
*/
cmpxchg(&entry->next, next_flags(NULL, IRQ_WORK_BUSY), NULL);
(void)cmpxchg(&entry->next,
next_flags(NULL, IRQ_WORK_BUSY),
NULL);
}
}
EXPORT_SYMBOL_GPL(irq_work_run);
Expand Down
93 changes: 77 additions & 16 deletions kernel/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <linux/kernel_stat.h>
#include <linux/perf_event.h>
#include <linux/ftrace_event.h>
#include <linux/hw_breakpoint.h>

#include <asm/irq_regs.h>

Expand Down Expand Up @@ -1286,8 +1287,6 @@ void __perf_event_task_sched_out(struct task_struct *task,
{
int ctxn;

perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);

for_each_task_context_nr(ctxn)
perf_event_context_sched_out(task, ctxn, next);
}
Expand Down Expand Up @@ -1621,8 +1620,12 @@ static void rotate_ctx(struct perf_event_context *ctx)
{
raw_spin_lock(&ctx->lock);

/* Rotate the first entry last of non-pinned groups */
list_rotate_left(&ctx->flexible_groups);
/*
* Rotate the first entry last of non-pinned groups. Rotation might be
* disabled by the inheritance code.
*/
if (!ctx->rotate_disable)
list_rotate_left(&ctx->flexible_groups);

raw_spin_unlock(&ctx->lock);
}
Expand Down Expand Up @@ -2234,11 +2237,6 @@ int perf_event_release_kernel(struct perf_event *event)
raw_spin_unlock_irq(&ctx->lock);
mutex_unlock(&ctx->mutex);

mutex_lock(&event->owner->perf_event_mutex);
list_del_init(&event->owner_entry);
mutex_unlock(&event->owner->perf_event_mutex);
put_task_struct(event->owner);

free_event(event);

return 0;
Expand All @@ -2251,9 +2249,43 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
static int perf_release(struct inode *inode, struct file *file)
{
struct perf_event *event = file->private_data;
struct task_struct *owner;

file->private_data = NULL;

rcu_read_lock();
owner = ACCESS_ONCE(event->owner);
/*
* Matches the smp_wmb() in perf_event_exit_task(). If we observe
* !owner it means the list deletion is complete and we can indeed
* free this event, otherwise we need to serialize on
* owner->perf_event_mutex.
*/
smp_read_barrier_depends();
if (owner) {
/*
* Since delayed_put_task_struct() also drops the last
* task reference we can safely take a new reference
* while holding the rcu_read_lock().
*/
get_task_struct(owner);
}
rcu_read_unlock();

if (owner) {
mutex_lock(&owner->perf_event_mutex);
/*
* We have to re-check the event->owner field, if it is cleared
* we raced with perf_event_exit_task(), acquiring the mutex
* ensured they're done, and we can proceed with freeing the
* event.
*/
if (event->owner)
list_del_init(&event->owner_entry);
mutex_unlock(&owner->perf_event_mutex);
put_task_struct(owner);
}

return perf_event_release_kernel(event);
}

Expand Down Expand Up @@ -5668,7 +5700,7 @@ SYSCALL_DEFINE5(perf_event_open,
mutex_unlock(&ctx->mutex);

event->owner = current;
get_task_struct(current);

mutex_lock(&current->perf_event_mutex);
list_add_tail(&event->owner_entry, &current->perf_event_list);
mutex_unlock(&current->perf_event_mutex);
Expand Down Expand Up @@ -5736,12 +5768,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
++ctx->generation;
mutex_unlock(&ctx->mutex);

event->owner = current;
get_task_struct(current);
mutex_lock(&current->perf_event_mutex);
list_add_tail(&event->owner_entry, &current->perf_event_list);
mutex_unlock(&current->perf_event_mutex);

return event;

err_free:
Expand Down Expand Up @@ -5892,8 +5918,24 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
*/
void perf_event_exit_task(struct task_struct *child)
{
struct perf_event *event, *tmp;
int ctxn;

mutex_lock(&child->perf_event_mutex);
list_for_each_entry_safe(event, tmp, &child->perf_event_list,
owner_entry) {
list_del_init(&event->owner_entry);

/*
* Ensure the list deletion is visible before we clear
* the owner, closes a race against perf_release() where
* we need to serialize on the owner->perf_event_mutex.
*/
smp_wmb();
event->owner = NULL;
}
mutex_unlock(&child->perf_event_mutex);

for_each_task_context_nr(ctxn)
perf_event_exit_task_context(child, ctxn);
}
Expand Down Expand Up @@ -6113,6 +6155,7 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
struct perf_event *event;
struct task_struct *parent = current;
int inherited_all = 1;
unsigned long flags;
int ret = 0;

child->perf_event_ctxp[ctxn] = NULL;
Expand Down Expand Up @@ -6153,13 +6196,26 @@ int perf_event_init_context(struct task_struct *child, int ctxn)
break;
}

/*
* We can't hold ctx->lock when iterating the ->flexible_group list due
* to allocations, but we need to prevent rotation because
* rotate_ctx() will change the list from interrupt context.
*/
raw_spin_lock_irqsave(&parent_ctx->lock, flags);
parent_ctx->rotate_disable = 1;
raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);

list_for_each_entry(event, &parent_ctx->flexible_groups, group_entry) {
ret = inherit_task_group(event, parent, parent_ctx,
child, ctxn, &inherited_all);
if (ret)
break;
}

raw_spin_lock_irqsave(&parent_ctx->lock, flags);
parent_ctx->rotate_disable = 0;
raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);

child_ctx = child->perf_event_ctxp[ctxn];

if (child_ctx && inherited_all) {
Expand Down Expand Up @@ -6312,11 +6368,16 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)

void __init perf_event_init(void)
{
int ret;

perf_event_init_all_cpus();
init_srcu_struct(&pmus_srcu);
perf_pmu_register(&perf_swevent);
perf_pmu_register(&perf_cpu_clock);
perf_pmu_register(&perf_task_clock);
perf_tp_register();
perf_cpu_notifier(perf_cpu_notify);

ret = init_hw_breakpoint();
WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
}
Loading

0 comments on commit 6c869e7

Please sign in to comment.