Skip to content

Commit

Permalink
Merge branch 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/l…
Browse files Browse the repository at this point in the history
…inux/kernel/git/tip/linux-2.6-tip

* 'perf-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  perf: Always build the powerpc perf_arch_fetch_caller_regs version
  perf: Always build the stub perf_arch_fetch_caller_regs version
  perf, probe-finder: Build fix on Debian
  perf/scripts: Tuple was set from long in both branches in python_process_event()
  perf: Fix 'perf sched record' deadlock
  perf, x86: Fix callgraphs of 32-bit processes on 64-bit kernels
  perf, x86: Fix AMD hotplug & constraint initialization
  x86: Move notify_cpu_starting() callback to a later stage
  x86,kgdb: Always initialize the hw breakpoint attribute
  perf: Use hot regs with software sched switch/migrate events
  perf: Correctly align perf event tracing buffer
  • Loading branch information
Linus Torvalds committed Apr 4, 2010
2 parents 0121b0c + 6e03bb5 commit 8ce42c8
Show file tree
Hide file tree
Showing 11 changed files with 153 additions and 75 deletions.
2 changes: 0 additions & 2 deletions arch/powerpc/kernel/misc.S
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@ _GLOBAL(__restore_cpu_power7)
/* place holder */
blr

#ifdef CONFIG_EVENT_TRACING
/*
* Get a minimal set of registers for our caller's nth caller.
* r3 = regs pointer, r5 = n.
Expand All @@ -154,4 +153,3 @@ _GLOBAL(perf_arch_fetch_caller_regs)
PPC_STL r4,_NIP-STACK_FRAME_OVERHEAD(r3)
PPC_STL r7,_LINK-STACK_FRAME_OVERHEAD(r3)
blr
#endif /* CONFIG_EVENT_TRACING */
54 changes: 44 additions & 10 deletions arch/x86/kernel/cpu/perf_event.c
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <asm/apic.h>
#include <asm/stacktrace.h>
#include <asm/nmi.h>
#include <asm/compat.h>

static u64 perf_event_mask __read_mostly;

Expand Down Expand Up @@ -158,7 +159,7 @@ struct x86_pmu {
struct perf_event *event);
struct event_constraint *event_constraints;

void (*cpu_prepare)(int cpu);
int (*cpu_prepare)(int cpu);
void (*cpu_starting)(int cpu);
void (*cpu_dying)(int cpu);
void (*cpu_dead)(int cpu);
Expand Down Expand Up @@ -1333,11 +1334,12 @@ static int __cpuinit
x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
{
unsigned int cpu = (long)hcpu;
int ret = NOTIFY_OK;

switch (action & ~CPU_TASKS_FROZEN) {
case CPU_UP_PREPARE:
if (x86_pmu.cpu_prepare)
x86_pmu.cpu_prepare(cpu);
ret = x86_pmu.cpu_prepare(cpu);
break;

case CPU_STARTING:
Expand All @@ -1350,6 +1352,7 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
x86_pmu.cpu_dying(cpu);
break;

case CPU_UP_CANCELED:
case CPU_DEAD:
if (x86_pmu.cpu_dead)
x86_pmu.cpu_dead(cpu);
Expand All @@ -1359,7 +1362,7 @@ x86_pmu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
break;
}

return NOTIFY_OK;
return ret;
}

static void __init pmu_check_apic(void)
Expand Down Expand Up @@ -1628,14 +1631,42 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
return len;
}

static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
#ifdef CONFIG_COMPAT
static inline int
perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
{
unsigned long bytes;
/* 32-bit process in 64-bit kernel. */
struct stack_frame_ia32 frame;
const void __user *fp;

bytes = copy_from_user_nmi(frame, fp, sizeof(*frame));
if (!test_thread_flag(TIF_IA32))
return 0;

fp = compat_ptr(regs->bp);
while (entry->nr < PERF_MAX_STACK_DEPTH) {
unsigned long bytes;
frame.next_frame = 0;
frame.return_address = 0;

bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
if (bytes != sizeof(frame))
break;

if (fp < compat_ptr(regs->sp))
break;

return bytes == sizeof(*frame);
callchain_store(entry, frame.return_address);
fp = compat_ptr(frame.next_frame);
}
return 1;
}
#else
static inline int
perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry *entry)
{
return 0;
}
#endif

static void
perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
Expand All @@ -1651,11 +1682,16 @@ perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry)
callchain_store(entry, PERF_CONTEXT_USER);
callchain_store(entry, regs->ip);

if (perf_callchain_user32(regs, entry))
return;

while (entry->nr < PERF_MAX_STACK_DEPTH) {
unsigned long bytes;
frame.next_frame = NULL;
frame.return_address = 0;

if (!copy_stack_frame(fp, &frame))
bytes = copy_from_user_nmi(&frame, fp, sizeof(frame));
if (bytes != sizeof(frame))
break;

if ((unsigned long)fp < regs->sp)
Expand Down Expand Up @@ -1702,7 +1738,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
return entry;
}

#ifdef CONFIG_EVENT_TRACING
void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
{
regs->ip = ip;
Expand All @@ -1714,4 +1749,3 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski
regs->cs = __KERNEL_CS;
local_save_flags(regs->flags);
}
#endif
80 changes: 47 additions & 33 deletions arch/x86/kernel/cpu/perf_event_amd.c
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,13 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc)
return (hwc->config & 0xe0) == 0xe0;
}

static inline int amd_has_nb(struct cpu_hw_events *cpuc)
{
struct amd_nb *nb = cpuc->amd_nb;

return nb && nb->nb_id != -1;
}

static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
Expand All @@ -147,7 +154,7 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc,
/*
* only care about NB events
*/
if (!(nb && amd_is_nb_event(hwc)))
if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
return;

/*
Expand Down Expand Up @@ -214,7 +221,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
/*
* if not NB event or no NB, then no constraints
*/
if (!(nb && amd_is_nb_event(hwc)))
if (!(amd_has_nb(cpuc) && amd_is_nb_event(hwc)))
return &unconstrained;

/*
Expand Down Expand Up @@ -293,51 +300,55 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id)
return nb;
}

static void amd_pmu_cpu_online(int cpu)
static int amd_pmu_cpu_prepare(int cpu)
{
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);

WARN_ON_ONCE(cpuc->amd_nb);

if (boot_cpu_data.x86_max_cores < 2)
return NOTIFY_OK;

cpuc->amd_nb = amd_alloc_nb(cpu, -1);
if (!cpuc->amd_nb)
return NOTIFY_BAD;

return NOTIFY_OK;
}

static void amd_pmu_cpu_starting(int cpu)
{
struct cpu_hw_events *cpu1, *cpu2;
struct amd_nb *nb = NULL;
struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
struct amd_nb *nb;
int i, nb_id;

if (boot_cpu_data.x86_max_cores < 2)
return;

/*
* function may be called too early in the
* boot process, in which case nb_id is bogus
*/
nb_id = amd_get_nb_id(cpu);
if (nb_id == BAD_APICID)
return;

cpu1 = &per_cpu(cpu_hw_events, cpu);
cpu1->amd_nb = NULL;
WARN_ON_ONCE(nb_id == BAD_APICID);

raw_spin_lock(&amd_nb_lock);

for_each_online_cpu(i) {
cpu2 = &per_cpu(cpu_hw_events, i);
nb = cpu2->amd_nb;
if (!nb)
nb = per_cpu(cpu_hw_events, i).amd_nb;
if (WARN_ON_ONCE(!nb))
continue;
if (nb->nb_id == nb_id)
goto found;
}

nb = amd_alloc_nb(cpu, nb_id);
if (!nb) {
pr_err("perf_events: failed NB allocation for CPU%d\n", cpu);
raw_spin_unlock(&amd_nb_lock);
return;
if (nb->nb_id == nb_id) {
kfree(cpuc->amd_nb);
cpuc->amd_nb = nb;
break;
}
}
found:
nb->refcnt++;
cpu1->amd_nb = nb;

cpuc->amd_nb->nb_id = nb_id;
cpuc->amd_nb->refcnt++;

raw_spin_unlock(&amd_nb_lock);
}

static void amd_pmu_cpu_offline(int cpu)
static void amd_pmu_cpu_dead(int cpu)
{
struct cpu_hw_events *cpuhw;

Expand All @@ -349,8 +360,10 @@ static void amd_pmu_cpu_offline(int cpu)
raw_spin_lock(&amd_nb_lock);

if (cpuhw->amd_nb) {
if (--cpuhw->amd_nb->refcnt == 0)
kfree(cpuhw->amd_nb);
struct amd_nb *nb = cpuhw->amd_nb;

if (nb->nb_id == -1 || --nb->refcnt == 0)
kfree(nb);

cpuhw->amd_nb = NULL;
}
Expand Down Expand Up @@ -379,8 +392,9 @@ static __initconst struct x86_pmu amd_pmu = {
.get_event_constraints = amd_get_event_constraints,
.put_event_constraints = amd_put_event_constraints,

.cpu_prepare = amd_pmu_cpu_online,
.cpu_dead = amd_pmu_cpu_offline,
.cpu_prepare = amd_pmu_cpu_prepare,
.cpu_starting = amd_pmu_cpu_starting,
.cpu_dead = amd_pmu_cpu_dead,
};

static __init int amd_pmu_init(void)
Expand Down
5 changes: 5 additions & 0 deletions arch/x86/kernel/dumpstack.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ struct stack_frame {
unsigned long return_address;
};

struct stack_frame_ia32 {
u32 next_frame;
u32 return_address;
};

static inline unsigned long rewind_frame_pointer(int n)
{
struct stack_frame *frame;
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kernel/kgdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -618,8 +618,8 @@ int kgdb_arch_init(void)
* portion of kgdb because this operation requires mutexs to
* complete.
*/
hw_breakpoint_init(&attr);
attr.bp_addr = (unsigned long)kgdb_arch_init;
attr.type = PERF_TYPE_BREAKPOINT;
attr.bp_len = HW_BREAKPOINT_LEN_1;
attr.bp_type = HW_BREAKPOINT_W;
attr.disabled = 1;
Expand Down
4 changes: 2 additions & 2 deletions arch/x86/kernel/smpboot.c
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,6 @@ static void __cpuinit smp_callin(void)
end_local_APIC_setup();
map_cpu_to_logical_apicid();

notify_cpu_starting(cpuid);

/*
* Need to setup vector mappings before we enable interrupts.
*/
Expand All @@ -264,6 +262,8 @@ static void __cpuinit smp_callin(void)
*/
smp_store_cpu_info(cpuid);

notify_cpu_starting(cpuid);

/*
* Allow the master to continue.
*/
Expand Down
21 changes: 14 additions & 7 deletions include/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -842,13 +842,6 @@ extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];

extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);

static inline void
perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
{
if (atomic_read(&perf_swevent_enabled[event_id]))
__perf_sw_event(event_id, nr, nmi, regs, addr);
}

extern void
perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);

Expand Down Expand Up @@ -887,6 +880,20 @@ static inline void perf_fetch_caller_regs(struct pt_regs *regs, int skip)
return perf_arch_fetch_caller_regs(regs, ip, skip);
}

static inline void
perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
{
if (atomic_read(&perf_swevent_enabled[event_id])) {
struct pt_regs hot_regs;

if (!regs) {
perf_fetch_caller_regs(&hot_regs, 1);
regs = &hot_regs;
}
__perf_sw_event(event_id, nr, nmi, regs, addr);
}
}

extern void __perf_event_mmap(struct vm_area_struct *vma);

static inline void perf_event_mmap(struct vm_area_struct *vma)
Expand Down
Loading

0 comments on commit 8ce42c8

Please sign in to comment.