Skip to content

Commit

Permalink
Merge branch 'for-5.12' of git://git.kernel.org/pub/scm/linux/kernel/…
Browse files Browse the repository at this point in the history
…git/dennis/percpu

Pull percpu updates from Dennis Zhou:
 "Percpu had a cleanup come in that makes use of the cpu bitmask helpers
  instead of the current iterative approach.

  This clean up then had an adverse interaction when clang's inlining
  sensitivity is changed such that not all sites are inlined resulting
  in modpost being upset with section mismatch due to percpu setup being
  marked __init.

  That was fixed by introducing __flatten to compiler_attributes.h"

* 'for-5.12' of git://git.kernel.org/pub/scm/linux/kernel/git/dennis/percpu:
  percpu: fix clang modpost section mismatch
  percpu: reduce the number of cpu distance comparisons
  • Loading branch information
Linus Torvalds committed Feb 23, 2021
2 parents 5cf0fd5 + 258e081 commit aa8e329
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 15 deletions.
6 changes: 6 additions & 0 deletions include/linux/compiler_attributes.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,12 @@
# define fallthrough do {} while (0) /* fallthrough */
#endif

/*
* gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#Common-Function-Attributes
* clang: https://clang.llvm.org/docs/AttributeReference.html#flatten
*/
# define __flatten __attribute__((flatten))

/*
* Note the missing underscores.
*
Expand Down
36 changes: 21 additions & 15 deletions mm/percpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

#include <linux/bitmap.h>
#include <linux/cpumask.h>
#include <linux/memblock.h>
#include <linux/err.h>
#include <linux/lcm.h>
Expand Down Expand Up @@ -2662,13 +2663,14 @@ early_param("percpu_alloc", percpu_alloc_setup);
* On success, pointer to the new allocation_info is returned. On
* failure, ERR_PTR value is returned.
*/
static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
size_t reserved_size, size_t dyn_size,
size_t atom_size,
pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
{
static int group_map[NR_CPUS] __initdata;
static int group_cnt[NR_CPUS] __initdata;
static struct cpumask mask __initdata;
const size_t static_size = __per_cpu_end - __per_cpu_start;
int nr_groups = 1, nr_units = 0;
size_t size_sum, min_unit_size, alloc_size;
Expand All @@ -2681,6 +2683,7 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
/* this function may be called multiple times */
memset(group_map, 0, sizeof(group_map));
memset(group_cnt, 0, sizeof(group_cnt));
cpumask_clear(&mask);

/* calculate size_sum and ensure dyn_size is enough for early alloc */
size_sum = PFN_ALIGN(static_size + reserved_size +
Expand All @@ -2702,24 +2705,27 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
upa--;
max_upa = upa;

cpumask_copy(&mask, cpu_possible_mask);

/* group cpus according to their proximity */
for_each_possible_cpu(cpu) {
group = 0;
next_group:
for_each_possible_cpu(tcpu) {
if (cpu == tcpu)
break;
if (group_map[tcpu] == group && cpu_distance_fn &&
(cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
group++;
nr_groups = max(nr_groups, group + 1);
goto next_group;
}
}
for (group = 0; !cpumask_empty(&mask); group++) {
/* pop the group's first cpu */
cpu = cpumask_first(&mask);
group_map[cpu] = group;
group_cnt[group]++;
cpumask_clear_cpu(cpu, &mask);

for_each_cpu(tcpu, &mask) {
if (!cpu_distance_fn ||
(cpu_distance_fn(cpu, tcpu) == LOCAL_DISTANCE &&
cpu_distance_fn(tcpu, cpu) == LOCAL_DISTANCE)) {
group_map[tcpu] = group;
group_cnt[group]++;
cpumask_clear_cpu(tcpu, &mask);
}
}
}
nr_groups = group;

/*
* Wasted space is caused by a ratio imbalance of upa to group_cnt.
Expand Down

0 comments on commit aa8e329

Please sign in to comment.