diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 3db4866d7880f..7c8dc0443d6a4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -813,12 +813,10 @@ static void comp_irqs_release_pci(struct mlx5_core_dev *dev) static int comp_irqs_request_pci(struct mlx5_core_dev *dev) { struct mlx5_eq_table *table = dev->priv.eq_table; - const struct cpumask *prev = cpu_none_mask; - const struct cpumask *mask; int ncomp_eqs; u16 *cpus; int ret; - int cpu; + int cpu, hop; int i; ncomp_eqs = table->num_comp_eqs; @@ -828,15 +826,11 @@ static int comp_irqs_request_pci(struct mlx5_core_dev *dev) i = 0; rcu_read_lock(); - for_each_numa_hop_mask(mask, dev->priv.numa_node) { - for_each_cpu_andnot(cpu, mask, prev) { - cpus[i] = cpu; - if (++i == ncomp_eqs) - goto spread_done; - } - prev = mask; + for_each_numa_online_cpu(cpu, hop, dev->priv.numa_node) { + cpus[i] = cpu; + if (++i == ncomp_eqs) + break; } -spread_done: rcu_read_unlock(); ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs, &table->rmap); kfree(cpus); diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 0d2e2a38b92d0..f10fb87d49dbe 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -175,8 +175,8 @@ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp) /** * cpumask_first_and - return the first cpu from *srcp1 & *srcp2 - * @src1p: the first input - * @src2p: the second input + * @srcp1: the first input + * @srcp2: the second input * * Returns >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and(). */ @@ -1197,6 +1197,10 @@ cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask, /** * cpumap_print_list_to_buf - copies the cpumask into the buffer as * comma-separated list of cpus + * @buf: the buffer to copy into + * @mask: the cpumask to copy + * @off: in the string from which we are copying, we copy to @buf + * @count: the maximum number of bytes to print * * Everything is same with the above cpumap_print_bitmask_to_buf() * except the print format. diff --git a/include/linux/find.h b/include/linux/find.h index 5e4f39ef2e72c..90b68d76c0737 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -16,6 +16,9 @@ unsigned long _find_next_andnot_bit(const unsigned long *addr1, const unsigned l unsigned long nbits, unsigned long start); unsigned long _find_next_or_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long nbits, unsigned long start); +unsigned long _find_next_and_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, + const unsigned long *addr3, unsigned long nbits, + unsigned long start); unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits, unsigned long start); extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size); @@ -159,6 +162,40 @@ unsigned long find_next_or_bit(const unsigned long *addr1, } #endif +#ifndef find_next_and_andnot_bit +/** + * find_next_and_andnot_bit - find the next bit set in *addr1 and *addr2, + * excluding all the bits in *addr3 + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @addr3: The third address to base the search on + * @size: The bitmap size in bits + * @offset: The bitnumber to start searching at + * + * Return: the bit number for the next set bit + * If no bits are set, returns @size. + */ +static __always_inline +unsigned long find_next_and_andnot_bit(const unsigned long *addr1, + const unsigned long *addr2, + const unsigned long *addr3, + unsigned long size, + unsigned long offset) +{ + if (small_const_nbits(size)) { + unsigned long val; + + if (unlikely(offset >= size)) + return size; + + val = *addr1 & *addr2 & ~*addr3 & GENMASK(size - 1, offset); + return val ? __ffs(val) : size; + } + + return _find_next_and_andnot_bit(addr1, addr2, addr3, size, offset); +} +#endif + #ifndef find_next_zero_bit /** * find_next_zero_bit - find the next cleared bit in a memory region @@ -568,6 +605,12 @@ unsigned long find_next_bit_le(const void *addr, unsigned (bit) = find_next_andnot_bit((addr1), (addr2), (size), (bit)), (bit) < (size);\ (bit)++) +#define for_each_and_andnot_bit(bit, addr1, addr2, addr3, size) \ + for ((bit) = 0; \ + (bit) = find_next_and_andnot_bit((addr1), (addr2), (addr3), (size), (bit)),\ + (bit) < (size); \ + (bit)++) + #define for_each_or_bit(bit, addr1, addr2, size) \ for ((bit) = 0; \ (bit) = find_next_or_bit((addr1), (addr2), (size), (bit)), (bit) < (size);\ diff --git a/include/linux/topology.h b/include/linux/topology.h index fea32377f7c77..c5a9626521663 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -245,38 +245,49 @@ static inline const struct cpumask *cpu_cpu_mask(int cpu) return cpumask_of_node(cpu_to_node(cpu)); } +/* + * sched_numa_find_*_cpu() functions family traverses only accessible CPUs, + * i.e. those listed in cpu_online_mask. + */ #ifdef CONFIG_NUMA int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node); -extern const struct cpumask *sched_numa_hop_mask(unsigned int node, unsigned int hops); +int sched_numa_find_next_cpu(const struct cpumask *cpus, int cpu, int node, unsigned int *hop); #else static __always_inline int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node) { - return cpumask_nth(cpu, cpus); + return cpumask_nth_and(cpu, cpus, cpu_online_mask); } -static inline const struct cpumask * -sched_numa_hop_mask(unsigned int node, unsigned int hops) +static __always_inline +int sched_numa_find_next_cpu(const struct cpumask *cpus, int cpu, int node, unsigned int *hop) { - return ERR_PTR(-EOPNOTSUPP); + return find_next_and_bit(cpumask_bits(cpus), cpumask_bits(cpu_online_mask), + small_cpumask_bits, cpu); } #endif /* CONFIG_NUMA */ /** - * for_each_numa_hop_mask - iterate over cpumasks of increasing NUMA distance - * from a given node. - * @mask: the iteration variable. + * for_each_numa_cpu - iterate over cpus in increasing order taking into account + * NUMA distances from a given node. + * @cpu: the (optionally unsigned) integer iterator + * @hop: the iterator variable for nodes, i.e. proximity order to the @node * @node: the NUMA node to start the search from. + * @mask: the cpumask pointer * - * Requires rcu_lock to be held. + * Where considered as a replacement to for_each_cpu(), the following should be + * taken into consideration: + * - Only accessible (i.e. online) CPUs are enumerated. + * - CPUs enumeration may not be a monotonic increasing sequence; * - * Yields cpu_online_mask for @node == NUMA_NO_NODE. + * rcu_lock must be held; */ -#define for_each_numa_hop_mask(mask, node) \ - for (unsigned int __hops = 0; \ - mask = (node != NUMA_NO_NODE || __hops) ? \ - sched_numa_hop_mask(node, __hops) : \ - cpu_online_mask, \ - !IS_ERR_OR_NULL(mask); \ - __hops++) +#define for_each_numa_cpu(cpu, hop, node, mask) \ + for ((cpu) = 0, (hop) = 0; \ + (cpu) = sched_numa_find_next_cpu((mask), (cpu), (node), &(hop)),\ + (cpu) < nr_cpu_ids; \ + (cpu)++) + +#define for_each_numa_online_cpu(cpu, hop, node) \ + for_each_numa_cpu(cpu, hop, node, cpu_online_mask) #endif /* _LINUX_TOPOLOGY_H */ diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 05a5bc678c089..b128380208bbe 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -2144,37 +2144,44 @@ int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node) } EXPORT_SYMBOL_GPL(sched_numa_find_nth_cpu); -/** - * sched_numa_hop_mask() - Get the cpumask of CPUs at most @hops hops away from - * @node - * @node: The node to count hops from. - * @hops: Include CPUs up to that many hops away. 0 means local node. - * - * Return: On success, a pointer to a cpumask of CPUs at most @hops away from - * @node, an error value otherwise. - * - * Requires rcu_lock to be held. Returned cpumask is only valid within that - * read-side section, copy it if required beyond that. - * - * Note that not all hops are equal in distance; see sched_init_numa() for how - * distances and masks are handled. - * Also note that this is a reflection of sched_domains_numa_masks, which may change - * during the lifetime of the system (offline nodes are taken out of the masks). +/* + * sched_numa_find_next_cpu() - given the NUMA topology, find the next cpu + * cpumask: cpumask to find a CPU from + * cpu: current CPU + * node: local node + * hop: (in/out) indicates distance order of current CPU to a local node + * + * The function searches for a next CPU at a given NUMA distance, indicated + * by hop, and if nothing found, tries to find CPUs at a greater distance, + * starting from the beginning. + * + * Return: cpu, or >= nr_cpu_ids when nothing found. */ -const struct cpumask *sched_numa_hop_mask(unsigned int node, unsigned int hops) +int sched_numa_find_next_cpu(const struct cpumask *cpus, int cpu, int node, unsigned int *hop) { + unsigned long *cur, *prev; struct cpumask ***masks; + unsigned int ret; - if (node >= nr_node_ids || hops >= sched_domains_numa_levels) - return ERR_PTR(-EINVAL); + if (*hop >= sched_domains_numa_levels) + return nr_cpu_ids; masks = rcu_dereference(sched_domains_numa_masks); - if (!masks) - return ERR_PTR(-EBUSY); + cur = cpumask_bits(masks[*hop][node]); + if (*hop == 0) + ret = find_next_and_bit(cpumask_bits(cpus), cur, nr_cpu_ids, cpu); + else { + prev = cpumask_bits(masks[*hop - 1][node]); + ret = find_next_and_andnot_bit(cpumask_bits(cpus), cur, prev, nr_cpu_ids, cpu); + } + + if (ret < nr_cpu_ids) + return ret; - return masks[hops][node]; + *hop += 1; + return sched_numa_find_next_cpu(cpus, 0, node, hop); } -EXPORT_SYMBOL_GPL(sched_numa_hop_mask); +EXPORT_SYMBOL_GPL(sched_numa_find_next_cpu); #endif /* CONFIG_NUMA */ diff --git a/lib/Makefile b/lib/Makefile index 42d307ade225e..1ffae65bb7eed 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -82,7 +82,13 @@ obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o obj-$(CONFIG_TEST_DYNAMIC_DEBUG) += test_dynamic_debug.o obj-$(CONFIG_TEST_PRINTF) += test_printf.o obj-$(CONFIG_TEST_SCANF) += test_scanf.o + obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o +ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_KASAN),yy) +# FIXME: Clang breaks test_bitmap_const_eval when KASAN and GCOV are enabled +GCOV_PROFILE_test_bitmap.o := n +endif + obj-$(CONFIG_TEST_UUID) += test_uuid.o obj-$(CONFIG_TEST_XARRAY) += test_xarray.o obj-$(CONFIG_TEST_MAPLE_TREE) += test_maple_tree.o diff --git a/lib/cpumask.c b/lib/cpumask.c index de356f16773a0..357b68be526b1 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -45,6 +45,7 @@ EXPORT_SYMBOL(cpumask_next_wrap); * alloc_cpumask_var_node - allocate a struct cpumask on a given node * @mask: pointer to cpumask_var_t where the cpumask is returned * @flags: GFP_ flags + * @node: memory node from which to allocate or %NUMA_NO_NODE * * Only defined when CONFIG_CPUMASK_OFFSTACK=y, otherwise is * a nop returning a constant 1 (in ) @@ -127,11 +128,8 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask) * * There's a better alternative based on for_each()-like iterators: * - * for_each_numa_hop_mask(mask, node) { - * for_each_cpu_andnot(cpu, mask, prev) - * do_something(cpu); - * prev = mask; - * } + * for_each_numa_online_cpu(cpu, hop, node) + * do_something(cpu); * * It's simpler and more verbose than above. Complexity of iterator-based * enumeration is O(sched_domains_numa_levels * nr_cpu_ids), while @@ -157,7 +155,9 @@ EXPORT_SYMBOL(cpumask_local_spread); static DEFINE_PER_CPU(int, distribute_cpu_mask_prev); /** - * cpumask_any_and_distribute - Return an arbitrary cpu within srcp1 & srcp2. + * cpumask_any_and_distribute - Return an arbitrary cpu within src1p & src2p. + * @src1p: first &cpumask for intersection + * @src2p: second &cpumask for intersection * * Iterated calls using the same srcp1 and srcp2 will be distributed within * their intersection. diff --git a/lib/find_bit.c b/lib/find_bit.c index 32f99e9a670e6..4403e00890b13 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -182,6 +182,18 @@ unsigned long _find_next_andnot_bit(const unsigned long *addr1, const unsigned l EXPORT_SYMBOL(_find_next_andnot_bit); #endif +#ifndef find_next_and_andnot_bit +unsigned long _find_next_and_andnot_bit(const unsigned long *addr1, + const unsigned long *addr2, + const unsigned long *addr3, + unsigned long nbits, + unsigned long start) +{ + return FIND_NEXT_BIT(addr1[idx] & addr2[idx] & ~addr3[idx], /* nop */, nbits, start); +} +EXPORT_SYMBOL(_find_next_and_andnot_bit); +#endif + #ifndef find_next_or_bit unsigned long _find_next_or_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long nbits, unsigned long start) diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 187f5b2db4cf1..08aed2e3bb238 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include "../tools/testing/selftests/kselftest_module.h" @@ -71,6 +72,16 @@ __check_eq_uint(const char *srcfile, unsigned int line, return true; } +static bool __init +__check_ge_uint(const char *srcfile, unsigned int line, + const unsigned int exp_uint, unsigned int x) +{ + if (exp_uint >= x) + return true; + + pr_err("[%s:%u] expected >= %u, got %u\n", srcfile, line, exp_uint, x); + return false; +} static bool __init __check_eq_bitmap(const char *srcfile, unsigned int line, @@ -86,6 +97,18 @@ __check_eq_bitmap(const char *srcfile, unsigned int line, return true; } +static bool __init +__check_eq_cpumask(const char *srcfile, unsigned int line, + const struct cpumask *exp_cpumask, const struct cpumask *cpumask) +{ + if (cpumask_equal(exp_cpumask, cpumask)) + return true; + + pr_warn("[%s:%u] cpumasks contents differ: expected \"%*pbl\", got \"%*pbl\"\n", + srcfile, line, cpumask_pr_args(exp_cpumask), cpumask_pr_args(cpumask)); + return false; +} + static bool __init __check_eq_pbl(const char *srcfile, unsigned int line, const char *expected_pbl, @@ -173,11 +196,11 @@ __check_eq_str(const char *srcfile, unsigned int line, return eq; } -#define __expect_eq(suffix, ...) \ +#define __expect(suffix, ...) \ ({ \ int result = 0; \ total_tests++; \ - if (!__check_eq_ ## suffix(__FILE__, __LINE__, \ + if (!__check_ ## suffix(__FILE__, __LINE__, \ ##__VA_ARGS__)) { \ failed_tests++; \ result = 1; \ @@ -185,13 +208,19 @@ __check_eq_str(const char *srcfile, unsigned int line, result; \ }) +#define __expect_eq(suffix, ...) __expect(eq_ ## suffix, ##__VA_ARGS__) +#define __expect_ge(suffix, ...) __expect(ge_ ## suffix, ##__VA_ARGS__) + #define expect_eq_uint(...) __expect_eq(uint, ##__VA_ARGS__) #define expect_eq_bitmap(...) __expect_eq(bitmap, ##__VA_ARGS__) +#define expect_eq_cpumask(...) __expect_eq(cpumask, ##__VA_ARGS__) #define expect_eq_pbl(...) __expect_eq(pbl, ##__VA_ARGS__) #define expect_eq_u32_array(...) __expect_eq(u32_array, ##__VA_ARGS__) #define expect_eq_clump8(...) __expect_eq(clump8, ##__VA_ARGS__) #define expect_eq_str(...) __expect_eq(str, ##__VA_ARGS__) +#define expect_ge_uint(...) __expect_ge(uint, ##__VA_ARGS__) + static void __init test_zero_clear(void) { DECLARE_BITMAP(bmap, 1024); @@ -763,6 +792,42 @@ static void __init test_for_each_set_bit_wrap(void) } } +static void __init test_for_each_numa_cpu(void) +{ + unsigned int node, cpu, hop; + cpumask_var_t mask; + + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) { + pr_err("Can't allocate cpumask. Skipping for_each_numa_cpu() test"); + return; + } + + for_each_node(node) { + unsigned int c = 0, dist, old_dist = node_distance(node, node); + + cpumask_clear(mask); + + rcu_read_lock(); + for_each_numa_cpu(cpu, hop, node, cpu_possible_mask) { + dist = node_distance(cpu_to_node(cpu), node); + + /* Distance between nodes must never decrease */ + expect_ge_uint(dist, old_dist); + + /* Test for coherence with cpumask_local_spread() */ + expect_eq_uint(cpumask_local_spread(c++, node), cpu); + + cpumask_set_cpu(cpu, mask); + old_dist = dist; + } + rcu_read_unlock(); + + /* Each online CPU must be visited exactly once */ + expect_eq_uint(c, num_online_cpus()); + expect_eq_cpumask(mask, cpu_online_mask); + } +} + static void __init test_for_each_set_bit(void) { DECLARE_BITMAP(orig, 500); @@ -1161,6 +1226,10 @@ static void __init test_bitmap_print_buf(void) } } +/* + * FIXME: Clang breaks compile-time evaluations when KASAN and GCOV are enabled. + * To workaround it, GCOV is force-disabled in Makefile for this configuration. + */ static void __init test_bitmap_const_eval(void) { DECLARE_BITMAP(bitmap, BITS_PER_LONG); @@ -1186,11 +1255,7 @@ static void __init test_bitmap_const_eval(void) * the compiler is fixed. */ bitmap_clear(bitmap, 0, BITS_PER_LONG); -#if defined(__s390__) && defined(__clang__) - if (!const_test_bit(7, bitmap)) -#else if (!test_bit(7, bitmap)) -#endif bitmap_set(bitmap, 5, 2); /* Equals to `unsigned long bitopvar = BIT(20)` */ @@ -1249,6 +1314,7 @@ static void __init selftest(void) test_for_each_clear_bitrange_from(); test_for_each_set_clump8(); test_for_each_set_bit_wrap(); + test_for_each_numa_cpu(); } KSTM_MODULE_LOADERS(test_bitmap);