Skip to content

Commit

Permalink
perf bench futex: Add lock_pi stresser
Browse files Browse the repository at this point in the history
Allows a way of measuring low level kernel implementation of FUTEX_LOCK_PI and
FUTEX_UNLOCK_PI.

The program comes in two flavors:

(i) single futex (default), all threads contend on the same uaddr.  For the
sake of the benchmark, we call into kernel space even when the lock is
uncontended.  The kernel will set it to TID, any waters that come in and
contend for the pi futex will be handled respectively by the kernel.

(ii) -M option for multiple futexes, each thread deals with its own futex. This
is a trivial scenario and only measures kernel handling of 0->TID transition.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Mel Gorman <mgorman@suse.de>
Link: http://lkml.kernel.org/r/1436259353.12255.78.camel@stgolabs.net
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
  • Loading branch information
Davidlohr Bueso authored and Arnaldo Carvalho de Melo committed Jul 20, 2015
1 parent 52c0a18 commit d2f3f5d
Show file tree
Hide file tree
Showing 6 changed files with 248 additions and 0 deletions.
4 changes: 4 additions & 0 deletions tools/perf/Documentation/perf-bench.txt
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,10 @@ Suite for evaluating parallel wake calls.
*requeue*::
Suite for evaluating requeue calls.

*lock-pi*::
Suite for evaluating futex lock_pi calls.


SEE ALSO
--------
linkperf:perf[1]
1 change: 1 addition & 0 deletions tools/perf/bench/Build
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ perf-y += futex-hash.o
perf-y += futex-wake.o
perf-y += futex-wake-parallel.o
perf-y += futex-requeue.o
perf-y += futex-lock-pi.o

perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
Expand Down
2 changes: 2 additions & 0 deletions tools/perf/bench/bench.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ extern int bench_futex_wake(int argc, const char **argv, const char *prefix);
extern int bench_futex_wake_parallel(int argc, const char **argv,
const char *prefix);
extern int bench_futex_requeue(int argc, const char **argv, const char *prefix);
/* pi futexes */
extern int bench_futex_lock_pi(int argc, const char **argv, const char *prefix);

#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
Expand Down
219 changes: 219 additions & 0 deletions tools/perf/bench/futex-lock-pi.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,219 @@
/*
* Copyright (C) 2015 Davidlohr Bueso.
*/

#include "../perf.h"
#include "../util/util.h"
#include "../util/stat.h"
#include "../util/parse-options.h"
#include "../util/header.h"
#include "bench.h"
#include "futex.h"

#include <err.h>
#include <stdlib.h>
#include <sys/time.h>
#include <pthread.h>

struct worker {
int tid;
u_int32_t *futex;
pthread_t thread;
unsigned long ops;
};

static u_int32_t global_futex = 0;
static struct worker *worker;
static unsigned int nsecs = 10;
static bool silent = false, multi = false;
static bool done = false, fshared = false;
static unsigned int ncpus, nthreads = 0;
static int futex_flag = 0;
struct timeval start, end, runtime;
static pthread_mutex_t thread_lock;
static unsigned int threads_starting;
static struct stats throughput_stats;
static pthread_cond_t thread_parent, thread_worker;

static const struct option options[] = {
OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"),
OPT_BOOLEAN( 'M', "multi", &multi, "Use multiple futexes"),
OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"),
OPT_END()
};

static const char * const bench_futex_lock_pi_usage[] = {
"perf bench futex requeue <options>",
NULL
};

static void print_summary(void)
{
unsigned long avg = avg_stats(&throughput_stats);
double stddev = stddev_stats(&throughput_stats);

printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
!silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
(int) runtime.tv_sec);
}

static void toggle_done(int sig __maybe_unused,
siginfo_t *info __maybe_unused,
void *uc __maybe_unused)
{
/* inform all threads that we're done for the day */
done = true;
gettimeofday(&end, NULL);
timersub(&end, &start, &runtime);
}

static void *workerfn(void *arg)
{
struct worker *w = (struct worker *) arg;

pthread_mutex_lock(&thread_lock);
threads_starting--;
if (!threads_starting)
pthread_cond_signal(&thread_parent);
pthread_cond_wait(&thread_worker, &thread_lock);
pthread_mutex_unlock(&thread_lock);

do {
int ret;
again:
ret = futex_lock_pi(w->futex, NULL, 0, futex_flag);

if (ret) { /* handle lock acquisition */
if (!silent)
warn("thread %d: Could not lock pi-lock for %p (%d)",
w->tid, w->futex, ret);
if (done)
break;

goto again;
}

usleep(1);
ret = futex_unlock_pi(w->futex, futex_flag);
if (ret && !silent)
warn("thread %d: Could not unlock pi-lock for %p (%d)",
w->tid, w->futex, ret);
w->ops++; /* account for thread's share of work */
} while (!done);

return NULL;
}

static void create_threads(struct worker *w, pthread_attr_t thread_attr)
{
cpu_set_t cpu;
unsigned int i;

threads_starting = nthreads;

for (i = 0; i < nthreads; i++) {
worker[i].tid = i;

if (multi) {
worker[i].futex = calloc(1, sizeof(u_int32_t));
if (!worker[i].futex)
err(EXIT_FAILURE, "calloc");
} else
worker[i].futex = &global_futex;

CPU_ZERO(&cpu);
CPU_SET(i % ncpus, &cpu);

if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
err(EXIT_FAILURE, "pthread_attr_setaffinity_np");

if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i]))
err(EXIT_FAILURE, "pthread_create");
}
}

int bench_futex_lock_pi(int argc, const char **argv,
const char *prefix __maybe_unused)
{
int ret = 0;
unsigned int i;
struct sigaction act;
pthread_attr_t thread_attr;

argc = parse_options(argc, argv, options, bench_futex_lock_pi_usage, 0);
if (argc)
goto err;

ncpus = sysconf(_SC_NPROCESSORS_ONLN);

sigfillset(&act.sa_mask);
act.sa_sigaction = toggle_done;
sigaction(SIGINT, &act, NULL);

if (!nthreads)
nthreads = ncpus;

worker = calloc(nthreads, sizeof(*worker));
if (!worker)
err(EXIT_FAILURE, "calloc");

if (!fshared)
futex_flag = FUTEX_PRIVATE_FLAG;

printf("Run summary [PID %d]: %d threads doing pi lock/unlock pairing for %d secs.\n\n",
getpid(), nthreads, nsecs);

init_stats(&throughput_stats);
pthread_mutex_init(&thread_lock, NULL);
pthread_cond_init(&thread_parent, NULL);
pthread_cond_init(&thread_worker, NULL);

threads_starting = nthreads;
pthread_attr_init(&thread_attr);
gettimeofday(&start, NULL);

create_threads(worker, thread_attr);
pthread_attr_destroy(&thread_attr);

pthread_mutex_lock(&thread_lock);
while (threads_starting)
pthread_cond_wait(&thread_parent, &thread_lock);
pthread_cond_broadcast(&thread_worker);
pthread_mutex_unlock(&thread_lock);

sleep(nsecs);
toggle_done(0, NULL, NULL);

for (i = 0; i < nthreads; i++) {
ret = pthread_join(worker[i].thread, NULL);
if (ret)
err(EXIT_FAILURE, "pthread_join");
}

/* cleanup & report results */
pthread_cond_destroy(&thread_parent);
pthread_cond_destroy(&thread_worker);
pthread_mutex_destroy(&thread_lock);

for (i = 0; i < nthreads; i++) {
unsigned long t = worker[i].ops/runtime.tv_sec;

update_stats(&throughput_stats, t);
if (!silent)
printf("[thread %3d] futex: %p [ %ld ops/sec ]\n",
worker[i].tid, worker[i].futex, t);

if (multi)
free(worker[i].futex);
}

print_summary();

free(worker);
return ret;
err:
usage_with_options(bench_futex_lock_pi_usage, options);
exit(EXIT_FAILURE);
}
20 changes: 20 additions & 0 deletions tools/perf/bench/futex.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,26 @@ futex_wake(u_int32_t *uaddr, int nr_wake, int opflags)
return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags);
}

/**
* futex_lock_pi() - block on uaddr as a PI mutex
* @detect: whether (1) or not (0) to perform deadlock detection
*/
static inline int
futex_lock_pi(u_int32_t *uaddr, struct timespec *timeout, int detect,
int opflags)
{
return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags);
}

/**
* futex_unlock_pi() - release uaddr as a PI mutex, waking the top waiter
*/
static inline int
futex_unlock_pi(u_int32_t *uaddr, int opflags)
{
return futex(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags);
}

/**
* futex_cmp_requeue() - requeue tasks from uaddr to uaddr2
* @nr_wake: wake up to this many tasks
Expand Down
2 changes: 2 additions & 0 deletions tools/perf/builtin-bench.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ static struct bench futex_benchmarks[] = {
{ "wake", "Benchmark for futex wake calls", bench_futex_wake },
{ "wake-parallel", "Benchmark for parallel futex wake calls", bench_futex_wake_parallel },
{ "requeue", "Benchmark for futex requeue calls", bench_futex_requeue },
/* pi-futexes */
{ "lock-pi", "Benchmark for futex lock_pi calls", bench_futex_lock_pi },
{ "all", "Test all futex benchmarks", NULL },
{ NULL, NULL, NULL }
};
Expand Down

0 comments on commit d2f3f5d

Please sign in to comment.