Skip to content

Commit

Permalink
Merge tag 'perf-core-for-mingo' of git://git.kernel.org/pub/scm/linux…
Browse files Browse the repository at this point in the history
…/kernel/git/acme/linux into perf/core

Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:

User visible changes:

  * Fix mmap return address truncation to 32-bit in 'perf trace'. (Chang Hyun Park)

  * Support operations for shared futexes. (Davidlohr Bueso)

  * Fix error message for --filter option not coming after tracepoint. (Arnaldo Carvalho de Melo)

Infrastructure changes:

  * Refactor unit and scale function parameters for PMU parsing routines. (Matt Fleming)

  * Improve DSO long names lookup with rbtree, resulting in great speedup for
    workloads with lots of DSOs. (Waiman Long)

  * Fix build breakage on arm64 targets. (Will Deacon)

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Ingo Molnar committed Oct 3, 2014
2 parents 07394b5 + 281f92f commit 69e8f5b
Show file tree
Hide file tree
Showing 15 changed files with 200 additions and 86 deletions.
1 change: 1 addition & 0 deletions tools/perf/arch/arm64/util/unwind-libunwind.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <libunwind.h>
#include "perf_regs.h"
#include "../../util/unwind.h"
#include "../../util/debug.h"

int libunwind__arch_reg_id(int regnum)
{
Expand Down
7 changes: 5 additions & 2 deletions tools/perf/bench/futex-hash.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ static unsigned int nsecs = 10;
/* amount of futexes per thread */
static unsigned int nfutexes = 1024;
static bool fshared = false, done = false, silent = false;
static int futex_flag = 0;

struct timeval start, end, runtime;
static pthread_mutex_t thread_lock;
Expand Down Expand Up @@ -75,8 +76,7 @@ static void *workerfn(void *arg)
* such as internal waitqueue handling, thus enlarging
* the critical region protected by hb->lock.
*/
ret = futex_wait(&w->futex[i], 1234, NULL,
fshared ? 0 : FUTEX_PRIVATE_FLAG);
ret = futex_wait(&w->futex[i], 1234, NULL, futex_flag);
if (!silent &&
(!ret || errno != EAGAIN || errno != EWOULDBLOCK))
warn("Non-expected futex return call");
Expand Down Expand Up @@ -135,6 +135,9 @@ int bench_futex_hash(int argc, const char **argv,
if (!worker)
goto errmem;

if (!fshared)
futex_flag = FUTEX_PRIVATE_FLAG;

printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n",
getpid(), nthreads, nfutexes, fshared ? "shared":"private", nsecs);

Expand Down
28 changes: 18 additions & 10 deletions tools/perf/bench/futex-requeue.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,18 @@ static u_int32_t futex1 = 0, futex2 = 0;
static unsigned int nrequeue = 1;

static pthread_t *worker;
static bool done = 0, silent = 0;
static bool done = false, silent = false, fshared = false;
static pthread_mutex_t thread_lock;
static pthread_cond_t thread_parent, thread_worker;
static struct stats requeuetime_stats, requeued_stats;
static unsigned int ncpus, threads_starting, nthreads = 0;
static int futex_flag = 0;

static const struct option options[] = {
OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
OPT_UINTEGER('q', "nrequeue", &nrequeue, "Specify amount of threads to requeue at once"),
OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"),
OPT_END()
};

Expand Down Expand Up @@ -70,7 +72,7 @@ static void *workerfn(void *arg __maybe_unused)
pthread_cond_wait(&thread_worker, &thread_lock);
pthread_mutex_unlock(&thread_lock);

futex_wait(&futex1, 0, NULL, FUTEX_PRIVATE_FLAG);
futex_wait(&futex1, 0, NULL, futex_flag);
return NULL;
}

Expand Down Expand Up @@ -127,9 +129,12 @@ int bench_futex_requeue(int argc, const char **argv,
if (!worker)
err(EXIT_FAILURE, "calloc");

printf("Run summary [PID %d]: Requeuing %d threads (from %p to %p), "
"%d at a time.\n\n",
getpid(), nthreads, &futex1, &futex2, nrequeue);
if (!fshared)
futex_flag = FUTEX_PRIVATE_FLAG;

printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %p), "
"%d at a time.\n\n", getpid(), nthreads,
fshared ? "shared":"private", &futex1, &futex2, nrequeue);

init_stats(&requeued_stats);
init_stats(&requeuetime_stats);
Expand All @@ -156,16 +161,20 @@ int bench_futex_requeue(int argc, const char **argv,

/* Ok, all threads are patiently blocked, start requeueing */
gettimeofday(&start, NULL);
for (nrequeued = 0; nrequeued < nthreads; nrequeued += nrequeue)
for (nrequeued = 0; nrequeued < nthreads; nrequeued += nrequeue) {
/*
* Do not wakeup any tasks blocked on futex1, allowing
* us to really measure futex_wait functionality.
*/
futex_cmp_requeue(&futex1, 0, &futex2, 0, nrequeue,
FUTEX_PRIVATE_FLAG);
futex_cmp_requeue(&futex1, 0, &futex2, 0,
nrequeue, futex_flag);
}
gettimeofday(&end, NULL);
timersub(&end, &start, &runtime);

if (nrequeued > nthreads)
nrequeued = nthreads;

update_stats(&requeued_stats, nrequeued);
update_stats(&requeuetime_stats, runtime.tv_usec);

Expand All @@ -175,7 +184,7 @@ int bench_futex_requeue(int argc, const char **argv,
}

/* everybody should be blocked on futex2, wake'em up */
nrequeued = futex_wake(&futex2, nthreads, FUTEX_PRIVATE_FLAG);
nrequeued = futex_wake(&futex2, nthreads, futex_flag);
if (nthreads != nrequeued)
warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads);

Expand All @@ -184,7 +193,6 @@ int bench_futex_requeue(int argc, const char **argv,
if (ret)
err(EXIT_FAILURE, "pthread_join");
}

}

/* cleanup & report results */
Expand Down
15 changes: 10 additions & 5 deletions tools/perf/bench/futex-wake.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,18 @@ static u_int32_t futex1 = 0;
static unsigned int nwakes = 1;

pthread_t *worker;
static bool done = false, silent = false;
static bool done = false, silent = false, fshared = false;
static pthread_mutex_t thread_lock;
static pthread_cond_t thread_parent, thread_worker;
static struct stats waketime_stats, wakeup_stats;
static unsigned int ncpus, threads_starting, nthreads = 0;
static int futex_flag = 0;

static const struct option options[] = {
OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
OPT_UINTEGER('w', "nwakes", &nwakes, "Specify amount of threads to wake at once"),
OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"),
OPT_END()
};

Expand All @@ -58,7 +60,7 @@ static void *workerfn(void *arg __maybe_unused)
pthread_cond_wait(&thread_worker, &thread_lock);
pthread_mutex_unlock(&thread_lock);

futex_wait(&futex1, 0, NULL, FUTEX_PRIVATE_FLAG);
futex_wait(&futex1, 0, NULL, futex_flag);
return NULL;
}

Expand Down Expand Up @@ -130,9 +132,12 @@ int bench_futex_wake(int argc, const char **argv,
if (!worker)
err(EXIT_FAILURE, "calloc");

printf("Run summary [PID %d]: blocking on %d threads (at futex %p), "
if (!fshared)
futex_flag = FUTEX_PRIVATE_FLAG;

printf("Run summary [PID %d]: blocking on %d threads (at [%s] futex %p), "
"waking up %d at a time.\n\n",
getpid(), nthreads, &futex1, nwakes);
getpid(), nthreads, fshared ? "shared":"private", &futex1, nwakes);

init_stats(&wakeup_stats);
init_stats(&waketime_stats);
Expand Down Expand Up @@ -160,7 +165,7 @@ int bench_futex_wake(int argc, const char **argv,
/* Ok, all threads are patiently blocked, start waking folks up */
gettimeofday(&start, NULL);
while (nwoken != nthreads)
nwoken += futex_wake(&futex1, nwakes, FUTEX_PRIVATE_FLAG);
nwoken += futex_wake(&futex1, nwakes, futex_flag);
gettimeofday(&end, NULL);
timersub(&end, &start, &runtime);

Expand Down
6 changes: 3 additions & 3 deletions tools/perf/builtin-trace.c
Original file line number Diff line number Diff line change
Expand Up @@ -1695,7 +1695,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
union perf_event *event __maybe_unused,
struct perf_sample *sample)
{
int ret;
long ret;
u64 duration = 0;
struct thread *thread;
int id = perf_evsel__sc_tp_uint(evsel, id, sample);
Expand Down Expand Up @@ -1748,7 +1748,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,

if (sc->fmt == NULL) {
signed_print:
fprintf(trace->output, ") = %d", ret);
fprintf(trace->output, ") = %ld", ret);
} else if (ret < 0 && sc->fmt->errmsg) {
char bf[STRERR_BUFSIZE];
const char *emsg = strerror_r(-ret, bf, sizeof(bf)),
Expand All @@ -1758,7 +1758,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
} else if (ret == 0 && sc->fmt->timeout)
fprintf(trace->output, ") = 0 Timeout");
else if (sc->fmt->hexret)
fprintf(trace->output, ") = %#x", ret);
fprintf(trace->output, ") = %#lx", ret);
else
goto signed_print;

Expand Down
85 changes: 74 additions & 11 deletions tools/perf/util/dso.c
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,65 @@ struct dso *dso__kernel_findnew(struct machine *machine, const char *name,
return dso;
}

/*
* Find a matching entry and/or link current entry to RB tree.
* Either one of the dso or name parameter must be non-NULL or the
* function will not work.
*/
static struct dso *dso__findlink_by_longname(struct rb_root *root,
struct dso *dso, const char *name)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;

if (!name)
name = dso->long_name;
/*
* Find node with the matching name
*/
while (*p) {
struct dso *this = rb_entry(*p, struct dso, rb_node);
int rc = strcmp(name, this->long_name);

parent = *p;
if (rc == 0) {
/*
* In case the new DSO is a duplicate of an existing
* one, print an one-time warning & put the new entry
* at the end of the list of duplicates.
*/
if (!dso || (dso == this))
return this; /* Find matching dso */
/*
* The core kernel DSOs may have duplicated long name.
* In this case, the short name should be different.
* Comparing the short names to differentiate the DSOs.
*/
rc = strcmp(dso->short_name, this->short_name);
if (rc == 0) {
pr_err("Duplicated dso name: %s\n", name);
return NULL;
}
}
if (rc < 0)
p = &parent->rb_left;
else
p = &parent->rb_right;
}
if (dso) {
/* Add new node and rebalance tree */
rb_link_node(&dso->rb_node, parent, p);
rb_insert_color(&dso->rb_node, root);
}
return NULL;
}

static inline struct dso *
dso__find_by_longname(const struct rb_root *root, const char *name)
{
return dso__findlink_by_longname((struct rb_root *)root, NULL, name);
}

void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated)
{
if (name == NULL)
Expand Down Expand Up @@ -755,6 +814,7 @@ struct dso *dso__new(const char *name)
dso->a2l_fails = 1;
dso->kernel = DSO_TYPE_USER;
dso->needs_swap = DSO_SWAP__UNSET;
RB_CLEAR_NODE(&dso->rb_node);
INIT_LIST_HEAD(&dso->node);
INIT_LIST_HEAD(&dso->data.open_entry);
}
Expand All @@ -765,6 +825,10 @@ struct dso *dso__new(const char *name)
void dso__delete(struct dso *dso)
{
int i;

if (!RB_EMPTY_NODE(&dso->rb_node))
pr_err("DSO %s is still in rbtree when being deleted!\n",
dso->long_name);
for (i = 0; i < MAP__NR_TYPES; ++i)
symbols__delete(&dso->symbols[i]);

Expand Down Expand Up @@ -851,35 +915,34 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
return have_build_id;
}

void dsos__add(struct list_head *head, struct dso *dso)
void dsos__add(struct dsos *dsos, struct dso *dso)
{
list_add_tail(&dso->node, head);
list_add_tail(&dso->node, &dsos->head);
dso__findlink_by_longname(&dsos->root, dso, NULL);
}

struct dso *dsos__find(const struct list_head *head, const char *name, bool cmp_short)
struct dso *dsos__find(const struct dsos *dsos, const char *name,
bool cmp_short)
{
struct dso *pos;

if (cmp_short) {
list_for_each_entry(pos, head, node)
list_for_each_entry(pos, &dsos->head, node)
if (strcmp(pos->short_name, name) == 0)
return pos;
return NULL;
}
list_for_each_entry(pos, head, node)
if (strcmp(pos->long_name, name) == 0)
return pos;
return NULL;
return dso__find_by_longname(&dsos->root, name);
}

struct dso *__dsos__findnew(struct list_head *head, const char *name)
struct dso *__dsos__findnew(struct dsos *dsos, const char *name)
{
struct dso *dso = dsos__find(head, name, false);
struct dso *dso = dsos__find(dsos, name, false);

if (!dso) {
dso = dso__new(name);
if (dso != NULL) {
dsos__add(head, dso);
dsos__add(dsos, dso);
dso__set_basename(dso);
}
}
Expand Down
16 changes: 13 additions & 3 deletions tools/perf/util/dso.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,18 @@ struct dso_cache {
char data[0];
};

/*
* DSOs are put into both a list for fast iteration and rbtree for fast
* long name lookup.
*/
struct dsos {
struct list_head head;
struct rb_root root; /* rbtree root sorted by long name */
};

struct dso {
struct list_head node;
struct rb_node rb_node; /* rbtree node sorted by long name */
struct rb_root symbols[MAP__NR_TYPES];
struct rb_root symbol_names[MAP__NR_TYPES];
void *a2l;
Expand Down Expand Up @@ -224,10 +234,10 @@ struct map *dso__new_map(const char *name);
struct dso *dso__kernel_findnew(struct machine *machine, const char *name,
const char *short_name, int dso_type);

void dsos__add(struct list_head *head, struct dso *dso);
struct dso *dsos__find(const struct list_head *head, const char *name,
void dsos__add(struct dsos *dsos, struct dso *dso);
struct dso *dsos__find(const struct dsos *dsos, const char *name,
bool cmp_short);
struct dso *__dsos__findnew(struct list_head *head, const char *name);
struct dso *__dsos__findnew(struct dsos *dsos, const char *name);
bool __dsos__read_build_ids(struct list_head *head, bool with_hits);

size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
Expand Down
Loading

0 comments on commit 69e8f5b

Please sign in to comment.