Skip to content

Commit

Permalink
tracing: Use pid bitmap instead of a pid array for set_event_pid
Browse files Browse the repository at this point in the history
In order to add the ability to let tasks that are filtered by the events
have their children also be traced on fork (and then not traced on exit),
convert the array into a pid bitmask. Most of the time the number of pids is
only 32768 pids or a 4k bitmask, which is the same size as the default list
currently is, and that list could grow if more pids are listed.

This also greatly simplifies the code.

Suggested-by: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
  • Loading branch information
Steven Rostedt committed Apr 19, 2016
1 parent 9ebc57c commit f4d34a8
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 124 deletions.
5 changes: 2 additions & 3 deletions kernel/trace/trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,8 @@ struct trace_options {
};

struct trace_pid_list {
unsigned int nr_pids;
int order;
pid_t *pids;
int pid_max;
unsigned long *pids;
};

/*
Expand Down
221 changes: 100 additions & 121 deletions kernel/trace/trace_events.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
#include <linux/kthread.h>
#include <linux/tracefs.h>
#include <linux/uaccess.h>
#include <linux/bsearch.h>
#include <linux/vmalloc.h>
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/sort.h>
Expand Down Expand Up @@ -471,23 +471,13 @@ static void ftrace_clear_events(struct trace_array *tr)
mutex_unlock(&event_mutex);
}

static int cmp_pid(const void *key, const void *elt)
{
const pid_t *search_pid = key;
const pid_t *pid = elt;

if (*search_pid == *pid)
return 0;
if (*search_pid < *pid)
return -1;
return 1;
}
/* Shouldn't this be in a header? */
extern int pid_max;

static bool
ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
{
pid_t search_pid;
pid_t *pid;
pid_t pid;

/*
* Return false, because if filtered_pids does not exist,
Expand All @@ -496,15 +486,16 @@ ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
if (!filtered_pids)
return false;

search_pid = task->pid;
pid = task->pid;

pid = bsearch(&search_pid, filtered_pids->pids,
filtered_pids->nr_pids, sizeof(pid_t),
cmp_pid);
if (!pid)
/*
* If pid_max changed after filtered_pids was created, we
* by default ignore all pids greater than the previous pid_max.
*/
if (task->pid >= filtered_pids->pid_max)
return true;

return false;
return !test_bit(task->pid, filtered_pids->pids);
}

static void
Expand Down Expand Up @@ -602,7 +593,7 @@ static void __ftrace_clear_event_pids(struct trace_array *tr)
/* Wait till all users are no longer using pid filtering */
synchronize_sched();

free_pages((unsigned long)pid_list->pids, pid_list->order);
vfree(pid_list->pids);
kfree(pid_list);
}

Expand Down Expand Up @@ -946,11 +937,32 @@ static void t_stop(struct seq_file *m, void *p)
mutex_unlock(&event_mutex);
}

static void *
p_next(struct seq_file *m, void *v, loff_t *pos)
{
struct trace_array *tr = m->private;
struct trace_pid_list *pid_list = rcu_dereference_sched(tr->filtered_pids);
unsigned long pid = (unsigned long)v;

(*pos)++;

/* pid already is +1 of the actual prevous bit */
pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);

/* Return pid + 1 to allow zero to be represented */
if (pid < pid_list->pid_max)
return (void *)(pid + 1);

return NULL;
}

static void *p_start(struct seq_file *m, loff_t *pos)
__acquires(RCU)
{
struct trace_pid_list *pid_list;
struct trace_array *tr = m->private;
unsigned long pid;
loff_t l = 0;

/*
* Grab the mutex, to keep calls to p_next() having the same
Expand All @@ -963,10 +975,18 @@ static void *p_start(struct seq_file *m, loff_t *pos)

pid_list = rcu_dereference_sched(tr->filtered_pids);

if (!pid_list || *pos >= pid_list->nr_pids)
if (!pid_list)
return NULL;

pid = find_first_bit(pid_list->pids, pid_list->pid_max);
if (pid >= pid_list->pid_max)
return NULL;

return (void *)&pid_list->pids[*pos];
/* Return pid + 1 so that zero can be the exit value */
for (pid++; pid && l < *pos;
pid = (unsigned long)p_next(m, (void *)pid, &l))
;
return (void *)pid;
}

static void p_stop(struct seq_file *m, void *p)
Expand All @@ -976,25 +996,11 @@ static void p_stop(struct seq_file *m, void *p)
mutex_unlock(&event_mutex);
}

static void *
p_next(struct seq_file *m, void *v, loff_t *pos)
{
struct trace_array *tr = m->private;
struct trace_pid_list *pid_list = rcu_dereference_sched(tr->filtered_pids);

(*pos)++;

if (*pos >= pid_list->nr_pids)
return NULL;

return (void *)&pid_list->pids[*pos];
}

static int p_show(struct seq_file *m, void *v)
{
pid_t *pid = v;
unsigned long pid = (unsigned long)v - 1;

seq_printf(m, "%d\n", *pid);
seq_printf(m, "%lu\n", pid);
return 0;
}

Expand Down Expand Up @@ -1543,11 +1549,6 @@ show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
return r;
}

static int max_pids(struct trace_pid_list *pid_list)
{
return (PAGE_SIZE << pid_list->order) / sizeof(pid_t);
}

static void ignore_task_cpu(void *data)
{
struct trace_array *tr = data;
Expand All @@ -1571,15 +1572,15 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
struct seq_file *m = filp->private_data;
struct trace_array *tr = m->private;
struct trace_pid_list *filtered_pids = NULL;
struct trace_pid_list *pid_list = NULL;
struct trace_pid_list *pid_list;
struct trace_event_file *file;
struct trace_parser parser;
unsigned long val;
loff_t this_pos;
ssize_t read = 0;
ssize_t ret = 0;
pid_t pid;
int i;
int nr_pids = 0;

if (!cnt)
return 0;
Expand All @@ -1592,10 +1593,43 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
return -ENOMEM;

mutex_lock(&event_mutex);
filtered_pids = rcu_dereference_protected(tr->filtered_pids,
lockdep_is_held(&event_mutex));

/*
* Load as many pids into the array before doing a
* swap from the tr->filtered_pids to the new list.
* Always recreate a new array. The write is an all or nothing
* operation. Always create a new array when adding new pids by
* the user. If the operation fails, then the current list is
* not modified.
*/
pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
if (!pid_list) {
read = -ENOMEM;
goto out;
}
pid_list->pid_max = READ_ONCE(pid_max);
/* Only truncating will shrink pid_max */
if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
pid_list->pid_max = filtered_pids->pid_max;
pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
if (!pid_list->pids) {
kfree(pid_list);
read = -ENOMEM;
goto out;
}
if (filtered_pids) {
/* copy the current bits to the new max */
pid = find_first_bit(filtered_pids->pids,
filtered_pids->pid_max);
while (pid < filtered_pids->pid_max) {
set_bit(pid, pid_list->pids);
pid = find_next_bit(filtered_pids->pids,
filtered_pids->pid_max,
pid + 1);
nr_pids++;
}
}

while (cnt > 0) {

this_pos = 0;
Expand All @@ -1613,92 +1647,35 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
ret = -EINVAL;
if (kstrtoul(parser.buffer, 0, &val))
break;
if (val > INT_MAX)
if (val >= pid_list->pid_max)
break;

pid = (pid_t)val;

ret = -ENOMEM;
if (!pid_list) {
pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
if (!pid_list)
break;

filtered_pids = rcu_dereference_protected(tr->filtered_pids,
lockdep_is_held(&event_mutex));
if (filtered_pids)
pid_list->order = filtered_pids->order;
else
pid_list->order = 0;

pid_list->pids = (void *)__get_free_pages(GFP_KERNEL,
pid_list->order);
if (!pid_list->pids)
break;

if (filtered_pids) {
pid_list->nr_pids = filtered_pids->nr_pids;
memcpy(pid_list->pids, filtered_pids->pids,
pid_list->nr_pids * sizeof(pid_t));
} else
pid_list->nr_pids = 0;
}

if (pid_list->nr_pids >= max_pids(pid_list)) {
pid_t *pid_page;

pid_page = (void *)__get_free_pages(GFP_KERNEL,
pid_list->order + 1);
if (!pid_page)
break;
memcpy(pid_page, pid_list->pids,
pid_list->nr_pids * sizeof(pid_t));
free_pages((unsigned long)pid_list->pids, pid_list->order);

pid_list->order++;
pid_list->pids = pid_page;
}
set_bit(pid, pid_list->pids);
nr_pids++;

pid_list->pids[pid_list->nr_pids++] = pid;
trace_parser_clear(&parser);
ret = 0;
}
trace_parser_put(&parser);

if (ret < 0) {
if (pid_list)
free_pages((unsigned long)pid_list->pids, pid_list->order);
vfree(pid_list->pids);
kfree(pid_list);
mutex_unlock(&event_mutex);
return ret;
}

if (!pid_list) {
mutex_unlock(&event_mutex);
return ret;
read = ret;
goto out;
}

sort(pid_list->pids, pid_list->nr_pids, sizeof(pid_t), cmp_pid, NULL);

/* Remove duplicates */
for (i = 1; i < pid_list->nr_pids; i++) {
int start = i;

while (i < pid_list->nr_pids &&
pid_list->pids[i - 1] == pid_list->pids[i])
i++;

if (start != i) {
if (i < pid_list->nr_pids) {
memmove(&pid_list->pids[start], &pid_list->pids[i],
(pid_list->nr_pids - i) * sizeof(pid_t));
pid_list->nr_pids -= i - start;
i = start;
} else
pid_list->nr_pids = start;
}
if (!nr_pids) {
/* Cleared the list of pids */
vfree(pid_list->pids);
kfree(pid_list);
read = ret;
if (!filtered_pids)
goto out;
pid_list = NULL;
}

rcu_assign_pointer(tr->filtered_pids, pid_list);

list_for_each_entry(file, &tr->events, list) {
Expand All @@ -1708,7 +1685,7 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
if (filtered_pids) {
synchronize_sched();

free_pages((unsigned long)filtered_pids->pids, filtered_pids->order);
vfree(filtered_pids->pids);
kfree(filtered_pids);
} else {
/*
Expand Down Expand Up @@ -1745,10 +1722,12 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
*/
on_each_cpu(ignore_task_cpu, tr, 1);

out:
mutex_unlock(&event_mutex);

ret = read;
*ppos += read;
if (read > 0)
*ppos += read;

return ret;
}
Expand Down

0 comments on commit f4d34a8

Please sign in to comment.