Skip to content

Commit

Permalink
Merge tag 'perf-core-for-mingo-4.12-20170314' of git://git.kernel.org…
Browse files Browse the repository at this point in the history
…/pub/scm/linux/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

New features:

- Add PERF_RECORD_NAMESPACES so that the kernel can record information
  required to associate samples to namespaces, helping in container
  problem characterization.

  Now the 'perf record has a --namespace' option to ask for such info,
  and when present, it can be used, initially, via a new sort order,
  'cgroup_id', allowing histogram entry bucketization by a (device, inode)
  based cgroup identifier (Hari Bathini)

- Add --next option to 'perf sched timehist', showing what is the next
  thread to run (Brendan Gregg)

Fixes:

- Fix segfault with basic block 'cycles' sort dimension (Changbin Du)

- Add c2c to command-list.txt, making it appear in the 'perf help'
  output (Changbin Du)

- Fix zeroing of 'abs_path' variable in the perf hists browser switch
  file code (Changbin Du)

- Hide tips messages when -q/--quiet is given to 'perf report' (Namhyung Kim)

Infrastructure changes:

- Use ref_reloc_sym + offset to setup kretprobes (Naveen Rao)

- Ignore generated files pmu-events/{jevents,pmu-events.c} for git (Changbin Du)

Documentation changes:

- Document +field style argument support for --field option (Changbin Du)

- Clarify 'perf c2c --stats' help message (Namhyung Kim)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Ingo Molnar committed Mar 15, 2017
2 parents 84e5b54 + 5f6bee3 commit ffa86c2
Show file tree
Hide file tree
Showing 50 changed files with 799 additions and 74 deletions.
5 changes: 3 additions & 2 deletions Documentation/trace/kprobetrace.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ Overview
--------
These events are similar to tracepoint based events. Instead of Tracepoint,
this is based on kprobes (kprobe and kretprobe). So it can probe wherever
kprobes can probe (this means, all functions body except for __kprobes
functions). Unlike the Tracepoint based event, this can be added and removed
kprobes can probe (this means, all functions except those with
__kprobes/nokprobe_inline annotation and those marked NOKPROBE_SYMBOL).
Unlike the Tracepoint based event, this can be added and removed
dynamically, on the fly.

To enable this feature, build your kernel with CONFIG_KPROBE_EVENTS=y.
Expand Down
2 changes: 2 additions & 0 deletions include/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -1112,6 +1112,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks

extern void perf_event_exec(void);
extern void perf_event_comm(struct task_struct *tsk, bool exec);
extern void perf_event_namespaces(struct task_struct *tsk);
extern void perf_event_fork(struct task_struct *tsk);

/* Callchains */
Expand Down Expand Up @@ -1315,6 +1316,7 @@ static inline int perf_unregister_guest_info_callbacks
static inline void perf_event_mmap(struct vm_area_struct *vma) { }
static inline void perf_event_exec(void) { }
static inline void perf_event_comm(struct task_struct *tsk, bool exec) { }
static inline void perf_event_namespaces(struct task_struct *tsk) { }
static inline void perf_event_fork(struct task_struct *tsk) { }
static inline void perf_event_init(void) { }
static inline int perf_swevent_get_recursion_context(void) { return -1; }
Expand Down
32 changes: 31 additions & 1 deletion include/uapi/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,8 @@ struct perf_event_attr {
use_clockid : 1, /* use @clockid for time fields */
context_switch : 1, /* context switch data */
write_backward : 1, /* Write ring buffer from end to beginning */
__reserved_1 : 36;
namespaces : 1, /* include namespaces data */
__reserved_1 : 35;

union {
__u32 wakeup_events; /* wakeup every n events */
Expand Down Expand Up @@ -610,6 +611,23 @@ struct perf_event_header {
__u16 size;
};

struct perf_ns_link_info {
__u64 dev;
__u64 ino;
};

enum {
NET_NS_INDEX = 0,
UTS_NS_INDEX = 1,
IPC_NS_INDEX = 2,
PID_NS_INDEX = 3,
USER_NS_INDEX = 4,
MNT_NS_INDEX = 5,
CGROUP_NS_INDEX = 6,

NR_NAMESPACES, /* number of available namespaces */
};

enum perf_event_type {

/*
Expand Down Expand Up @@ -862,6 +880,18 @@ enum perf_event_type {
*/
PERF_RECORD_SWITCH_CPU_WIDE = 15,

/*
* struct {
* struct perf_event_header header;
* u32 pid;
* u32 tid;
* u64 nr_namespaces;
* { u64 dev, inode; } [nr_namespaces];
* struct sample_id sample_id;
* };
*/
PERF_RECORD_NAMESPACES = 16,

PERF_RECORD_MAX, /* non-ABI */
};

Expand Down
139 changes: 139 additions & 0 deletions kernel/events/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@
#include <linux/parser.h>
#include <linux/sched/clock.h>
#include <linux/sched/mm.h>
#include <linux/proc_ns.h>
#include <linux/mount.h>

#include "internal.h"

Expand Down Expand Up @@ -379,6 +381,7 @@ static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);

static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
static atomic_t nr_namespaces_events __read_mostly;
static atomic_t nr_task_events __read_mostly;
static atomic_t nr_freq_events __read_mostly;
static atomic_t nr_switch_events __read_mostly;
Expand Down Expand Up @@ -3991,6 +3994,8 @@ static void unaccount_event(struct perf_event *event)
atomic_dec(&nr_mmap_events);
if (event->attr.comm)
atomic_dec(&nr_comm_events);
if (event->attr.namespaces)
atomic_dec(&nr_namespaces_events);
if (event->attr.task)
atomic_dec(&nr_task_events);
if (event->attr.freq)
Expand Down Expand Up @@ -6491,6 +6496,7 @@ static void perf_event_task(struct task_struct *task,
void perf_event_fork(struct task_struct *task)
{
perf_event_task(task, NULL, 1);
perf_event_namespaces(task);
}

/*
Expand Down Expand Up @@ -6592,6 +6598,132 @@ void perf_event_comm(struct task_struct *task, bool exec)
perf_event_comm_event(&comm_event);
}

/*
* namespaces tracking
*/

struct perf_namespaces_event {
struct task_struct *task;

struct {
struct perf_event_header header;

u32 pid;
u32 tid;
u64 nr_namespaces;
struct perf_ns_link_info link_info[NR_NAMESPACES];
} event_id;
};

static int perf_event_namespaces_match(struct perf_event *event)
{
return event->attr.namespaces;
}

static void perf_event_namespaces_output(struct perf_event *event,
void *data)
{
struct perf_namespaces_event *namespaces_event = data;
struct perf_output_handle handle;
struct perf_sample_data sample;
int ret;

if (!perf_event_namespaces_match(event))
return;

perf_event_header__init_id(&namespaces_event->event_id.header,
&sample, event);
ret = perf_output_begin(&handle, event,
namespaces_event->event_id.header.size);
if (ret)
return;

namespaces_event->event_id.pid = perf_event_pid(event,
namespaces_event->task);
namespaces_event->event_id.tid = perf_event_tid(event,
namespaces_event->task);

perf_output_put(&handle, namespaces_event->event_id);

perf_event__output_id_sample(event, &handle, &sample);

perf_output_end(&handle);
}

static void perf_fill_ns_link_info(struct perf_ns_link_info *ns_link_info,
struct task_struct *task,
const struct proc_ns_operations *ns_ops)
{
struct path ns_path;
struct inode *ns_inode;
void *error;

error = ns_get_path(&ns_path, task, ns_ops);
if (!error) {
ns_inode = ns_path.dentry->d_inode;
ns_link_info->dev = new_encode_dev(ns_inode->i_sb->s_dev);
ns_link_info->ino = ns_inode->i_ino;
}
}

void perf_event_namespaces(struct task_struct *task)
{
struct perf_namespaces_event namespaces_event;
struct perf_ns_link_info *ns_link_info;

if (!atomic_read(&nr_namespaces_events))
return;

namespaces_event = (struct perf_namespaces_event){
.task = task,
.event_id = {
.header = {
.type = PERF_RECORD_NAMESPACES,
.misc = 0,
.size = sizeof(namespaces_event.event_id),
},
/* .pid */
/* .tid */
.nr_namespaces = NR_NAMESPACES,
/* .link_info[NR_NAMESPACES] */
},
};

ns_link_info = namespaces_event.event_id.link_info;

perf_fill_ns_link_info(&ns_link_info[MNT_NS_INDEX],
task, &mntns_operations);

#ifdef CONFIG_USER_NS
perf_fill_ns_link_info(&ns_link_info[USER_NS_INDEX],
task, &userns_operations);
#endif
#ifdef CONFIG_NET_NS
perf_fill_ns_link_info(&ns_link_info[NET_NS_INDEX],
task, &netns_operations);
#endif
#ifdef CONFIG_UTS_NS
perf_fill_ns_link_info(&ns_link_info[UTS_NS_INDEX],
task, &utsns_operations);
#endif
#ifdef CONFIG_IPC_NS
perf_fill_ns_link_info(&ns_link_info[IPC_NS_INDEX],
task, &ipcns_operations);
#endif
#ifdef CONFIG_PID_NS
perf_fill_ns_link_info(&ns_link_info[PID_NS_INDEX],
task, &pidns_operations);
#endif
#ifdef CONFIG_CGROUPS
perf_fill_ns_link_info(&ns_link_info[CGROUP_NS_INDEX],
task, &cgroupns_operations);
#endif

perf_iterate_sb(perf_event_namespaces_output,
&namespaces_event,
NULL);
}

/*
* mmap tracking
*/
Expand Down Expand Up @@ -9146,6 +9278,8 @@ static void account_event(struct perf_event *event)
atomic_inc(&nr_mmap_events);
if (event->attr.comm)
atomic_inc(&nr_comm_events);
if (event->attr.namespaces)
atomic_inc(&nr_namespaces_events);
if (event->attr.task)
atomic_inc(&nr_task_events);
if (event->attr.freq)
Expand Down Expand Up @@ -9691,6 +9825,11 @@ SYSCALL_DEFINE5(perf_event_open,
return -EACCES;
}

if (attr.namespaces) {
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
}

if (attr.freq) {
if (attr.sample_freq > sysctl_perf_event_sample_rate)
return -EINVAL;
Expand Down
2 changes: 2 additions & 0 deletions kernel/fork.c
Original file line number Diff line number Diff line change
Expand Up @@ -2352,6 +2352,8 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
}
}

perf_event_namespaces(current);

bad_unshare_cleanup_cred:
if (new_cred)
put_cred(new_cred);
Expand Down
5 changes: 3 additions & 2 deletions kernel/kprobes.c
Original file line number Diff line number Diff line change
Expand Up @@ -1740,11 +1740,12 @@ void unregister_kprobes(struct kprobe **kps, int num)
}
EXPORT_SYMBOL_GPL(unregister_kprobes);

int __weak __kprobes kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data)
int __weak kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data)
{
return NOTIFY_DONE;
}
NOKPROBE_SYMBOL(kprobe_exceptions_notify);

static struct notifier_block kprobe_exceptions_nb = {
.notifier_call = kprobe_exceptions_notify,
Expand Down
3 changes: 3 additions & 0 deletions kernel/nsproxy.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <linux/file.h>
#include <linux/syscalls.h>
#include <linux/cgroup.h>
#include <linux/perf_event.h>

static struct kmem_cache *nsproxy_cachep;

Expand Down Expand Up @@ -262,6 +263,8 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
goto out;
}
switch_task_namespaces(tsk, new_nsproxy);

perf_event_namespaces(tsk);
out:
fput(file);
return err;
Expand Down
32 changes: 31 additions & 1 deletion tools/include/uapi/linux/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,8 @@ struct perf_event_attr {
use_clockid : 1, /* use @clockid for time fields */
context_switch : 1, /* context switch data */
write_backward : 1, /* Write ring buffer from end to beginning */
__reserved_1 : 36;
namespaces : 1, /* include namespaces data */
__reserved_1 : 35;

union {
__u32 wakeup_events; /* wakeup every n events */
Expand Down Expand Up @@ -610,6 +611,23 @@ struct perf_event_header {
__u16 size;
};

struct perf_ns_link_info {
__u64 dev;
__u64 ino;
};

enum {
NET_NS_INDEX = 0,
UTS_NS_INDEX = 1,
IPC_NS_INDEX = 2,
PID_NS_INDEX = 3,
USER_NS_INDEX = 4,
MNT_NS_INDEX = 5,
CGROUP_NS_INDEX = 6,

NR_NAMESPACES, /* number of available namespaces */
};

enum perf_event_type {

/*
Expand Down Expand Up @@ -862,6 +880,18 @@ enum perf_event_type {
*/
PERF_RECORD_SWITCH_CPU_WIDE = 15,

/*
* struct {
* struct perf_event_header header;
* u32 pid;
* u32 tid;
* u64 nr_namespaces;
* { u64 dev, inode; } [nr_namespaces];
* struct sample_id sample_id;
* };
*/
PERF_RECORD_NAMESPACES = 16,

PERF_RECORD_MAX, /* non-ABI */
};

Expand Down
2 changes: 2 additions & 0 deletions tools/perf/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,5 @@ config.mak.autogen
.config-detected
util/intel-pt-decoder/inat-tables.c
arch/*/include/generated/
pmu-events/pmu-events.c
pmu-events/jevents
3 changes: 3 additions & 0 deletions tools/perf/Documentation/perf-record.txt
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,9 @@ Enable weightened sampling. An additional weight is recorded per sample and can
displayed with the weight and local_weight sort keys. This currently works for TSX
abort events and some memory events in precise mode on modern Intel CPUs.

--namespaces::
Record events of type PERF_RECORD_NAMESPACES.

--transaction::
Record transaction flags for transaction related events.

Expand Down
Loading

0 comments on commit ffa86c2

Please sign in to comment.