Skip to content

Commit

Permalink
perf machine: Use hashtable for machine threads
Browse files Browse the repository at this point in the history
To process any events, it needs to find the thread in the machine first.
The machine maintains a rb tree to store all threads. The rb tree is
protected by a rw lock.

It is not a problem for current perf which serially processing events.
However, it will have scalability performance issue to process events in
parallel, especially on a heavy load system which have many threads.

Introduce a hashtable to divide the big rb tree into many samll rb tree
for threads. The index is thread id % hashtable size. It can reduce the
lock contention.

Committer notes:

Renamed some variables and function names to reduce semantic confusion:

  'struct threads' pointers: thread -> threads
  threads hastable index: tid -> hash_bucket
  struct threads *machine__thread() -> machine__threads()
  Cast tid to (unsigned int) to handle -1 in machine__threads() (Kan Liang)

Signed-off-by: Kan Liang <kan.liang@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Lukasz Odzioba <lukasz.odzioba@intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1505096603-215017-2-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
  • Loading branch information
Kan Liang authored and Arnaldo Carvalho de Melo committed Sep 18, 2017
1 parent 56de5b6 commit 91e467b
Show file tree
Hide file tree
Showing 4 changed files with 117 additions and 66 deletions.
19 changes: 11 additions & 8 deletions tools/perf/builtin-trace.c
Original file line number Diff line number Diff line change
Expand Up @@ -2730,20 +2730,23 @@ DEFINE_RESORT_RB(threads, (thread__nr_events(a->thread->priv) < thread__nr_event

static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp)
{
DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host);
size_t printed = trace__fprintf_threads_header(fp);
struct rb_node *nd;
int i;

if (threads == NULL) {
fprintf(fp, "%s", "Error sorting output by nr_events!\n");
return 0;
}
for (i = 0; i < THREADS__TABLE_SIZE; i++) {
DECLARE_RESORT_RB_MACHINE_THREADS(threads, trace->host, i);

resort_rb__for_each_entry(nd, threads)
printed += trace__fprintf_thread(fp, threads_entry->thread, trace);
if (threads == NULL) {
fprintf(fp, "%s", "Error sorting output by nr_events!\n");
return 0;
}

resort_rb__delete(threads);
resort_rb__for_each_entry(nd, threads)
printed += trace__fprintf_thread(fp, threads_entry->thread, trace);

resort_rb__delete(threads);
}
return printed;
}

Expand Down
136 changes: 85 additions & 51 deletions tools/perf/util/machine.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,28 @@ static void dsos__init(struct dsos *dsos)
pthread_rwlock_init(&dsos->lock, NULL);
}

static void machine__threads_init(struct machine *machine)
{
int i;

for (i = 0; i < THREADS__TABLE_SIZE; i++) {
struct threads *threads = &machine->threads[i];
threads->entries = RB_ROOT;
pthread_rwlock_init(&threads->lock, NULL);
threads->nr = 0;
INIT_LIST_HEAD(&threads->dead);
threads->last_match = NULL;
}
}

int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
{
memset(machine, 0, sizeof(*machine));
map_groups__init(&machine->kmaps, machine);
RB_CLEAR_NODE(&machine->rb_node);
dsos__init(&machine->dsos);

machine->threads = RB_ROOT;
pthread_rwlock_init(&machine->threads_lock, NULL);
machine->nr_threads = 0;
INIT_LIST_HEAD(&machine->dead_threads);
machine->last_match = NULL;
machine__threads_init(machine);

machine->vdso_info = NULL;
machine->env = NULL;
Expand Down Expand Up @@ -141,27 +151,37 @@ static void dsos__exit(struct dsos *dsos)
void machine__delete_threads(struct machine *machine)
{
struct rb_node *nd;
int i;

pthread_rwlock_wrlock(&machine->threads_lock);
nd = rb_first(&machine->threads);
while (nd) {
struct thread *t = rb_entry(nd, struct thread, rb_node);
for (i = 0; i < THREADS__TABLE_SIZE; i++) {
struct threads *threads = &machine->threads[i];
pthread_rwlock_wrlock(&threads->lock);
nd = rb_first(&threads->entries);
while (nd) {
struct thread *t = rb_entry(nd, struct thread, rb_node);

nd = rb_next(nd);
__machine__remove_thread(machine, t, false);
nd = rb_next(nd);
__machine__remove_thread(machine, t, false);
}
pthread_rwlock_unlock(&threads->lock);
}
pthread_rwlock_unlock(&machine->threads_lock);
}

void machine__exit(struct machine *machine)
{
int i;

machine__destroy_kernel_maps(machine);
map_groups__exit(&machine->kmaps);
dsos__exit(&machine->dsos);
machine__exit_vdso(machine);
zfree(&machine->root_dir);
zfree(&machine->current_tid);
pthread_rwlock_destroy(&machine->threads_lock);

for (i = 0; i < THREADS__TABLE_SIZE; i++) {
struct threads *threads = &machine->threads[i];
pthread_rwlock_destroy(&threads->lock);
}
}

void machine__delete(struct machine *machine)
Expand Down Expand Up @@ -382,7 +402,8 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
pid_t pid, pid_t tid,
bool create)
{
struct rb_node **p = &machine->threads.rb_node;
struct threads *threads = machine__threads(machine, tid);
struct rb_node **p = &threads->entries.rb_node;
struct rb_node *parent = NULL;
struct thread *th;

Expand All @@ -391,22 +412,22 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
* so most of the time we dont have to look up
* the full rbtree:
*/
th = machine->last_match;
th = threads->last_match;
if (th != NULL) {
if (th->tid == tid) {
machine__update_thread_pid(machine, th, pid);
return thread__get(th);
}

machine->last_match = NULL;
threads->last_match = NULL;
}

while (*p != NULL) {
parent = *p;
th = rb_entry(parent, struct thread, rb_node);

if (th->tid == tid) {
machine->last_match = th;
threads->last_match = th;
machine__update_thread_pid(machine, th, pid);
return thread__get(th);
}
Expand All @@ -423,7 +444,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
th = thread__new(pid, tid);
if (th != NULL) {
rb_link_node(&th->rb_node, parent, p);
rb_insert_color(&th->rb_node, &machine->threads);
rb_insert_color(&th->rb_node, &threads->entries);

/*
* We have to initialize map_groups separately
Expand All @@ -434,7 +455,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
* leader and that would screwed the rb tree.
*/
if (thread__init_map_groups(th, machine)) {
rb_erase_init(&th->rb_node, &machine->threads);
rb_erase_init(&th->rb_node, &threads->entries);
RB_CLEAR_NODE(&th->rb_node);
thread__put(th);
return NULL;
Expand All @@ -443,8 +464,8 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
* It is now in the rbtree, get a ref
*/
thread__get(th);
machine->last_match = th;
++machine->nr_threads;
threads->last_match = th;
++threads->nr;
}

return th;
Expand All @@ -458,21 +479,24 @@ struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid
struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
pid_t tid)
{
struct threads *threads = machine__threads(machine, tid);
struct thread *th;

pthread_rwlock_wrlock(&machine->threads_lock);
pthread_rwlock_wrlock(&threads->lock);
th = __machine__findnew_thread(machine, pid, tid);
pthread_rwlock_unlock(&machine->threads_lock);
pthread_rwlock_unlock(&threads->lock);
return th;
}

struct thread *machine__find_thread(struct machine *machine, pid_t pid,
pid_t tid)
{
struct threads *threads = machine__threads(machine, tid);
struct thread *th;
pthread_rwlock_rdlock(&machine->threads_lock);

pthread_rwlock_rdlock(&threads->lock);
th = ____machine__findnew_thread(machine, pid, tid, false);
pthread_rwlock_unlock(&machine->threads_lock);
pthread_rwlock_unlock(&threads->lock);
return th;
}

Expand Down Expand Up @@ -719,21 +743,24 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp)

size_t machine__fprintf(struct machine *machine, FILE *fp)
{
size_t ret;
struct rb_node *nd;
size_t ret;
int i;

pthread_rwlock_rdlock(&machine->threads_lock);

ret = fprintf(fp, "Threads: %u\n", machine->nr_threads);
for (i = 0; i < THREADS__TABLE_SIZE; i++) {
struct threads *threads = &machine->threads[i];
pthread_rwlock_rdlock(&threads->lock);

for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
struct thread *pos = rb_entry(nd, struct thread, rb_node);
ret = fprintf(fp, "Threads: %u\n", threads->nr);

ret += thread__fprintf(pos, fp);
}
for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) {
struct thread *pos = rb_entry(nd, struct thread, rb_node);

pthread_rwlock_unlock(&machine->threads_lock);
ret += thread__fprintf(pos, fp);
}

pthread_rwlock_unlock(&threads->lock);
}
return ret;
}

Expand Down Expand Up @@ -1479,23 +1506,25 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event

static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock)
{
if (machine->last_match == th)
machine->last_match = NULL;
struct threads *threads = machine__threads(machine, th->tid);

if (threads->last_match == th)
threads->last_match = NULL;

BUG_ON(refcount_read(&th->refcnt) == 0);
if (lock)
pthread_rwlock_wrlock(&machine->threads_lock);
rb_erase_init(&th->rb_node, &machine->threads);
pthread_rwlock_wrlock(&threads->lock);
rb_erase_init(&th->rb_node, &threads->entries);
RB_CLEAR_NODE(&th->rb_node);
--machine->nr_threads;
--threads->nr;
/*
* Move it first to the dead_threads list, then drop the reference,
* if this is the last reference, then the thread__delete destructor
* will be called and we will remove it from the dead_threads list.
*/
list_add_tail(&th->node, &machine->dead_threads);
list_add_tail(&th->node, &threads->dead);
if (lock)
pthread_rwlock_unlock(&machine->threads_lock);
pthread_rwlock_unlock(&threads->lock);
thread__put(th);
}

Expand Down Expand Up @@ -2140,21 +2169,26 @@ int machine__for_each_thread(struct machine *machine,
int (*fn)(struct thread *thread, void *p),
void *priv)
{
struct threads *threads;
struct rb_node *nd;
struct thread *thread;
int rc = 0;
int i;

for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
thread = rb_entry(nd, struct thread, rb_node);
rc = fn(thread, priv);
if (rc != 0)
return rc;
}
for (i = 0; i < THREADS__TABLE_SIZE; i++) {
threads = &machine->threads[i];
for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) {
thread = rb_entry(nd, struct thread, rb_node);
rc = fn(thread, priv);
if (rc != 0)
return rc;
}

list_for_each_entry(thread, &machine->dead_threads, node) {
rc = fn(thread, priv);
if (rc != 0)
return rc;
list_for_each_entry(thread, &threads->dead, node) {
rc = fn(thread, priv);
if (rc != 0)
return rc;
}
}
return rc;
}
Expand Down
23 changes: 18 additions & 5 deletions tools/perf/util/machine.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,25 @@ extern const char *ref_reloc_sym_names[];

struct vdso_info;

#define THREADS__TABLE_BITS 8
#define THREADS__TABLE_SIZE (1 << THREADS__TABLE_BITS)

struct threads {
struct rb_root entries;
pthread_rwlock_t lock;
unsigned int nr;
struct list_head dead;
struct thread *last_match;
};

struct machine {
struct rb_node rb_node;
pid_t pid;
u16 id_hdr_size;
bool comm_exec;
bool kptr_restrict_warned;
char *root_dir;
struct rb_root threads;
pthread_rwlock_t threads_lock;
unsigned int nr_threads;
struct list_head dead_threads;
struct thread *last_match;
struct threads threads[THREADS__TABLE_SIZE];
struct vdso_info *vdso_info;
struct perf_env *env;
struct dsos dsos;
Expand All @@ -48,6 +55,12 @@ struct machine {
};
};

static inline struct threads *machine__threads(struct machine *machine, pid_t tid)
{
/* Cast it to handle tid == -1 */
return &machine->threads[(unsigned int)tid % THREADS__TABLE_SIZE];
}

static inline
struct map *__machine__kernel_map(struct machine *machine, enum map_type type)
{
Expand Down
5 changes: 3 additions & 2 deletions tools/perf/util/rb_resort.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,8 @@ struct __name##_sorted *__name = __name##_sorted__new
__ilist->rblist.nr_entries)

/* For 'struct machine->threads' */
#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine) \
DECLARE_RESORT_RB(__name)(&__machine->threads, __machine->nr_threads)
#define DECLARE_RESORT_RB_MACHINE_THREADS(__name, __machine, hash_bucket) \
DECLARE_RESORT_RB(__name)(&__machine->threads[hash_bucket].entries, \
__machine->threads[hash_bucket].nr)

#endif /* _PERF_RESORT_RB_H_ */

0 comments on commit 91e467b

Please sign in to comment.