Skip to content

Commit

Permalink
Merge tag 'perf-tools-fixes-for-v5.18-2022-04-14' of git://git.kernel…
Browse files Browse the repository at this point in the history
….org/pub/scm/linux/kernel/git/acme/linux

Pull perf tools fixes from Arnaldo Carvalho de Melo:

 - 'perf record --per-thread' mode doesn't have the CPU mask setup, so
   it can use it to figure out the number of mmaps, fix it.

 - Fix segfault accessing sample_id xyarray out of bounds, noticed while
   using Intel PT where we have a dummy event to capture text poke perf
   metadata events and we mixup the set of CPUs specified by the user
   with the all CPUs map needed for text poke.

 - Fix 'perf bench numa' to check if CPU used to bind task is online.

 - Fix 'perf bench numa' usage of affinity for machines with more than
   1000 CPUs.

 - Fix misleading add event PMU debug message, noticed while using the
  'intel_pt' PMU.

 - Fix error check return value of hashmap__new() in 'perf stat', it
   must use IS_ERR().

* tag 'perf-tools-fixes-for-v5.18-2022-04-14' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux:
  perf bench: Fix numa bench to fix usage of affinity for machines with #CPUs > 1K
  perf bench: Fix numa testcase to check if CPU used to bind task is online
  perf record: Fix per-thread option
  perf tools: Fix segfault accessing sample_id xyarray
  perf stat: Fix error check return value of hashmap__new(), must use IS_ERR()
  perf tools: Fix misleading add event PMU debug message
  • Loading branch information
Linus Torvalds committed Apr 15, 2022
2 parents 028192f + f58faed commit e2dec48
Show file tree
Hide file tree
Showing 7 changed files with 176 additions and 45 deletions.
3 changes: 1 addition & 2 deletions tools/lib/perf/evlist.c
Original file line number Diff line number Diff line change
Expand Up @@ -577,7 +577,6 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
{
struct perf_evsel *evsel;
const struct perf_cpu_map *cpus = evlist->user_requested_cpus;
const struct perf_thread_map *threads = evlist->threads;

if (!ops || !ops->get || !ops->mmap)
return -EINVAL;
Expand All @@ -589,7 +588,7 @@ int perf_evlist__mmap_ops(struct perf_evlist *evlist,
perf_evlist__for_each_entry(evlist, evsel) {
if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
evsel->sample_id == NULL &&
perf_evsel__alloc_id(evsel, perf_cpu_map__nr(cpus), threads->nr) < 0)
perf_evsel__alloc_id(evsel, evsel->fd->max_x, evsel->fd->max_y) < 0)
return -ENOMEM;
}

Expand Down
136 changes: 101 additions & 35 deletions tools/perf/bench/numa.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <linux/numa.h>
#include <linux/zalloc.h>

#include "../util/header.h"
#include <numa.h>
#include <numaif.h>

Expand All @@ -54,7 +55,7 @@

struct thread_data {
int curr_cpu;
cpu_set_t bind_cpumask;
cpu_set_t *bind_cpumask;
int bind_node;
u8 *process_data;
int process_nr;
Expand Down Expand Up @@ -266,71 +267,115 @@ static bool node_has_cpus(int node)
return ret;
}

static cpu_set_t bind_to_cpu(int target_cpu)
static cpu_set_t *bind_to_cpu(int target_cpu)
{
cpu_set_t orig_mask, mask;
int ret;
int nrcpus = numa_num_possible_cpus();
cpu_set_t *orig_mask, *mask;
size_t size;

ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
BUG_ON(ret);
orig_mask = CPU_ALLOC(nrcpus);
BUG_ON(!orig_mask);
size = CPU_ALLOC_SIZE(nrcpus);
CPU_ZERO_S(size, orig_mask);

if (sched_getaffinity(0, size, orig_mask))
goto err_out;

mask = CPU_ALLOC(nrcpus);
if (!mask)
goto err_out;

CPU_ZERO(&mask);
CPU_ZERO_S(size, mask);

if (target_cpu == -1) {
int cpu;

for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
CPU_SET(cpu, &mask);
CPU_SET_S(cpu, size, mask);
} else {
BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus);
CPU_SET(target_cpu, &mask);
if (target_cpu < 0 || target_cpu >= g->p.nr_cpus)
goto err;

CPU_SET_S(target_cpu, size, mask);
}

ret = sched_setaffinity(0, sizeof(mask), &mask);
BUG_ON(ret);
if (sched_setaffinity(0, size, mask))
goto err;

return orig_mask;

err:
CPU_FREE(mask);
err_out:
CPU_FREE(orig_mask);

/* BUG_ON due to failure in allocation of orig_mask/mask */
BUG_ON(-1);
}

static cpu_set_t bind_to_node(int target_node)
static cpu_set_t *bind_to_node(int target_node)
{
cpu_set_t orig_mask, mask;
int nrcpus = numa_num_possible_cpus();
size_t size;
cpu_set_t *orig_mask, *mask;
int cpu;
int ret;

ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
BUG_ON(ret);
orig_mask = CPU_ALLOC(nrcpus);
BUG_ON(!orig_mask);
size = CPU_ALLOC_SIZE(nrcpus);
CPU_ZERO_S(size, orig_mask);

CPU_ZERO(&mask);
if (sched_getaffinity(0, size, orig_mask))
goto err_out;

mask = CPU_ALLOC(nrcpus);
if (!mask)
goto err_out;

CPU_ZERO_S(size, mask);

if (target_node == NUMA_NO_NODE) {
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
CPU_SET(cpu, &mask);
CPU_SET_S(cpu, size, mask);
} else {
struct bitmask *cpumask = numa_allocate_cpumask();

BUG_ON(!cpumask);
if (!cpumask)
goto err;

if (!numa_node_to_cpus(target_node, cpumask)) {
for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
if (numa_bitmask_isbitset(cpumask, cpu))
CPU_SET(cpu, &mask);
CPU_SET_S(cpu, size, mask);
}
}
numa_free_cpumask(cpumask);
}

ret = sched_setaffinity(0, sizeof(mask), &mask);
BUG_ON(ret);
if (sched_setaffinity(0, size, mask))
goto err;

return orig_mask;

err:
CPU_FREE(mask);
err_out:
CPU_FREE(orig_mask);

/* BUG_ON due to failure in allocation of orig_mask/mask */
BUG_ON(-1);
}

static void bind_to_cpumask(cpu_set_t mask)
static void bind_to_cpumask(cpu_set_t *mask)
{
int ret;
size_t size = CPU_ALLOC_SIZE(numa_num_possible_cpus());

ret = sched_setaffinity(0, sizeof(mask), &mask);
BUG_ON(ret);
ret = sched_setaffinity(0, size, mask);
if (ret) {
CPU_FREE(mask);
BUG_ON(ret);
}
}

static void mempol_restore(void)
Expand Down Expand Up @@ -376,7 +421,7 @@ do { \
static u8 *alloc_data(ssize_t bytes0, int map_flags,
int init_zero, int init_cpu0, int thp, int init_random)
{
cpu_set_t orig_mask;
cpu_set_t *orig_mask = NULL;
ssize_t bytes;
u8 *buf;
int ret;
Expand Down Expand Up @@ -434,6 +479,7 @@ static u8 *alloc_data(ssize_t bytes0, int map_flags,
/* Restore affinity: */
if (init_cpu0) {
bind_to_cpumask(orig_mask);
CPU_FREE(orig_mask);
mempol_restore();
}

Expand Down Expand Up @@ -585,10 +631,16 @@ static int parse_setup_cpu_list(void)
return -1;
}

if (is_cpu_online(bind_cpu_0) != 1 || is_cpu_online(bind_cpu_1) != 1) {
printf("\nTest not applicable, bind_cpu_0 or bind_cpu_1 is offline\n");
return -1;
}

BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0);
BUG_ON(bind_cpu_0 > bind_cpu_1);

for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) {
size_t size = CPU_ALLOC_SIZE(g->p.nr_cpus);
int i;

for (i = 0; i < mul; i++) {
Expand All @@ -608,10 +660,15 @@ static int parse_setup_cpu_list(void)
tprintf("%2d", bind_cpu);
}

CPU_ZERO(&td->bind_cpumask);
td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus);
BUG_ON(!td->bind_cpumask);
CPU_ZERO_S(size, td->bind_cpumask);
for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) {
BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus);
CPU_SET(cpu, &td->bind_cpumask);
if (cpu < 0 || cpu >= g->p.nr_cpus) {
CPU_FREE(td->bind_cpumask);
BUG_ON(-1);
}
CPU_SET_S(cpu, size, td->bind_cpumask);
}
t++;
}
Expand Down Expand Up @@ -752,8 +809,6 @@ static int parse_nodes_opt(const struct option *opt __maybe_unused,
return parse_node_list(arg);
}

#define BIT(x) (1ul << x)

static inline uint32_t lfsr_32(uint32_t lfsr)
{
const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31);
Expand Down Expand Up @@ -1241,7 +1296,7 @@ static void *worker_thread(void *__tdata)
* by migrating to CPU#0:
*/
if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) {
cpu_set_t orig_mask;
cpu_set_t *orig_mask;
int target_cpu;
int this_cpu;

Expand All @@ -1265,6 +1320,7 @@ static void *worker_thread(void *__tdata)
printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu);

bind_to_cpumask(orig_mask);
CPU_FREE(orig_mask);
}

if (details >= 3) {
Expand Down Expand Up @@ -1398,21 +1454,31 @@ static void init_thread_data(void)

for (t = 0; t < g->p.nr_tasks; t++) {
struct thread_data *td = g->threads + t;
size_t cpuset_size = CPU_ALLOC_SIZE(g->p.nr_cpus);
int cpu;

/* Allow all nodes by default: */
td->bind_node = NUMA_NO_NODE;

/* Allow all CPUs by default: */
CPU_ZERO(&td->bind_cpumask);
td->bind_cpumask = CPU_ALLOC(g->p.nr_cpus);
BUG_ON(!td->bind_cpumask);
CPU_ZERO_S(cpuset_size, td->bind_cpumask);
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
CPU_SET(cpu, &td->bind_cpumask);
CPU_SET_S(cpu, cpuset_size, td->bind_cpumask);
}
}

static void deinit_thread_data(void)
{
ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
int t;

/* Free the bind_cpumask allocated for thread_data */
for (t = 0; t < g->p.nr_tasks; t++) {
struct thread_data *td = g->threads + t;
CPU_FREE(td->bind_cpumask);
}

free_data(g->threads, size);
}
Expand Down
22 changes: 17 additions & 5 deletions tools/perf/builtin-record.c
Original file line number Diff line number Diff line change
Expand Up @@ -989,8 +989,11 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru
struct mmap *overwrite_mmap = evlist->overwrite_mmap;
struct perf_cpu_map *cpus = evlist->core.user_requested_cpus;

thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
thread_data->mask->maps.nbits);
if (cpu_map__is_dummy(cpus))
thread_data->nr_mmaps = nr_mmaps;
else
thread_data->nr_mmaps = bitmap_weight(thread_data->mask->maps.bits,
thread_data->mask->maps.nbits);
if (mmap) {
thread_data->maps = zalloc(thread_data->nr_mmaps * sizeof(struct mmap *));
if (!thread_data->maps)
Expand All @@ -1007,16 +1010,17 @@ static int record__thread_data_init_maps(struct record_thread *thread_data, stru
thread_data->nr_mmaps, thread_data->maps, thread_data->overwrite_maps);

for (m = 0, tm = 0; m < nr_mmaps && tm < thread_data->nr_mmaps; m++) {
if (test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) {
if (cpu_map__is_dummy(cpus) ||
test_bit(cpus->map[m].cpu, thread_data->mask->maps.bits)) {
if (thread_data->maps) {
thread_data->maps[tm] = &mmap[m];
pr_debug2("thread_data[%p]: cpu%d: maps[%d] -> mmap[%d]\n",
thread_data, cpus->map[m].cpu, tm, m);
thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
}
if (thread_data->overwrite_maps) {
thread_data->overwrite_maps[tm] = &overwrite_mmap[m];
pr_debug2("thread_data[%p]: cpu%d: ow_maps[%d] -> ow_mmap[%d]\n",
thread_data, cpus->map[m].cpu, tm, m);
thread_data, perf_cpu_map__cpu(cpus, m).cpu, tm, m);
}
tm++;
}
Expand Down Expand Up @@ -3329,6 +3333,9 @@ static void record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_c
{
int c;

if (cpu_map__is_dummy(cpus))
return;

for (c = 0; c < cpus->nr; c++)
set_bit(cpus->map[c].cpu, mask->bits);
}
Expand Down Expand Up @@ -3680,6 +3687,11 @@ static int record__init_thread_masks(struct record *rec)
if (!record__threads_enabled(rec))
return record__init_thread_default_masks(rec, cpus);

if (cpu_map__is_dummy(cpus)) {
pr_err("--per-thread option is mutually exclusive to parallel streaming mode.\n");
return -EINVAL;
}

switch (rec->opts.threads_spec) {
case THREAD_SPEC__CPU:
ret = record__init_thread_cpu_masks(rec, cpus);
Expand Down
Loading

0 comments on commit e2dec48

Please sign in to comment.