Skip to content

Commit

Permalink
Merge branch 'bitmap-for-next' of https://github.com/norov/linux.git
Browse files Browse the repository at this point in the history
  • Loading branch information
Stephen Rothwell committed Aug 1, 2023
2 parents 7f91634 + 94b1547 commit 61d1532
Show file tree
Hide file tree
Showing 9 changed files with 208 additions and 65 deletions.
16 changes: 5 additions & 11 deletions drivers/net/ethernet/mellanox/mlx5/core/eq.c
Original file line number Diff line number Diff line change
Expand Up @@ -813,12 +813,10 @@ static void comp_irqs_release_pci(struct mlx5_core_dev *dev)
static int comp_irqs_request_pci(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
const struct cpumask *prev = cpu_none_mask;
const struct cpumask *mask;
int ncomp_eqs;
u16 *cpus;
int ret;
int cpu;
int cpu, hop;
int i;

ncomp_eqs = table->num_comp_eqs;
Expand All @@ -828,15 +826,11 @@ static int comp_irqs_request_pci(struct mlx5_core_dev *dev)

i = 0;
rcu_read_lock();
for_each_numa_hop_mask(mask, dev->priv.numa_node) {
for_each_cpu_andnot(cpu, mask, prev) {
cpus[i] = cpu;
if (++i == ncomp_eqs)
goto spread_done;
}
prev = mask;
for_each_numa_online_cpu(cpu, hop, dev->priv.numa_node) {
cpus[i] = cpu;
if (++i == ncomp_eqs)
break;
}
spread_done:
rcu_read_unlock();
ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs, &table->rmap);
kfree(cpus);
Expand Down
8 changes: 6 additions & 2 deletions include/linux/cpumask.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,8 @@ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)

/**
* cpumask_first_and - return the first cpu from *srcp1 & *srcp2
* @src1p: the first input
* @src2p: the second input
* @srcp1: the first input
* @srcp2: the second input
*
* Returns >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and().
*/
Expand Down Expand Up @@ -1197,6 +1197,10 @@ cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask,
/**
* cpumap_print_list_to_buf - copies the cpumask into the buffer as
* comma-separated list of cpus
* @buf: the buffer to copy into
* @mask: the cpumask to copy
* @off: in the string from which we are copying, we copy to @buf
* @count: the maximum number of bytes to print
*
* Everything is same with the above cpumap_print_bitmask_to_buf()
* except the print format.
Expand Down
43 changes: 43 additions & 0 deletions include/linux/find.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ unsigned long _find_next_andnot_bit(const unsigned long *addr1, const unsigned l
unsigned long nbits, unsigned long start);
unsigned long _find_next_or_bit(const unsigned long *addr1, const unsigned long *addr2,
unsigned long nbits, unsigned long start);
unsigned long _find_next_and_andnot_bit(const unsigned long *addr1, const unsigned long *addr2,
const unsigned long *addr3, unsigned long nbits,
unsigned long start);
unsigned long _find_next_zero_bit(const unsigned long *addr, unsigned long nbits,
unsigned long start);
extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size);
Expand Down Expand Up @@ -159,6 +162,40 @@ unsigned long find_next_or_bit(const unsigned long *addr1,
}
#endif

#ifndef find_next_and_andnot_bit
/**
* find_next_and_andnot_bit - find the next bit set in *addr1 and *addr2,
* excluding all the bits in *addr3
* @addr1: The first address to base the search on
* @addr2: The second address to base the search on
* @addr3: The third address to base the search on
* @size: The bitmap size in bits
* @offset: The bitnumber to start searching at
*
* Return: the bit number for the next set bit
* If no bits are set, returns @size.
*/
static __always_inline
unsigned long find_next_and_andnot_bit(const unsigned long *addr1,
const unsigned long *addr2,
const unsigned long *addr3,
unsigned long size,
unsigned long offset)
{
if (small_const_nbits(size)) {
unsigned long val;

if (unlikely(offset >= size))
return size;

val = *addr1 & *addr2 & ~*addr3 & GENMASK(size - 1, offset);
return val ? __ffs(val) : size;
}

return _find_next_and_andnot_bit(addr1, addr2, addr3, size, offset);
}
#endif

#ifndef find_next_zero_bit
/**
* find_next_zero_bit - find the next cleared bit in a memory region
Expand Down Expand Up @@ -568,6 +605,12 @@ unsigned long find_next_bit_le(const void *addr, unsigned
(bit) = find_next_andnot_bit((addr1), (addr2), (size), (bit)), (bit) < (size);\
(bit)++)

#define for_each_and_andnot_bit(bit, addr1, addr2, addr3, size) \
for ((bit) = 0; \
(bit) = find_next_and_andnot_bit((addr1), (addr2), (addr3), (size), (bit)),\
(bit) < (size); \
(bit)++)

#define for_each_or_bit(bit, addr1, addr2, size) \
for ((bit) = 0; \
(bit) = find_next_or_bit((addr1), (addr2), (size), (bit)), (bit) < (size);\
Expand Down
45 changes: 28 additions & 17 deletions include/linux/topology.h
Original file line number Diff line number Diff line change
Expand Up @@ -245,38 +245,49 @@ static inline const struct cpumask *cpu_cpu_mask(int cpu)
return cpumask_of_node(cpu_to_node(cpu));
}

/*
* sched_numa_find_*_cpu() functions family traverses only accessible CPUs,
* i.e. those listed in cpu_online_mask.
*/
#ifdef CONFIG_NUMA
int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node);
extern const struct cpumask *sched_numa_hop_mask(unsigned int node, unsigned int hops);
int sched_numa_find_next_cpu(const struct cpumask *cpus, int cpu, int node, unsigned int *hop);
#else
static __always_inline int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node)
{
return cpumask_nth(cpu, cpus);
return cpumask_nth_and(cpu, cpus, cpu_online_mask);
}

static inline const struct cpumask *
sched_numa_hop_mask(unsigned int node, unsigned int hops)
static __always_inline
int sched_numa_find_next_cpu(const struct cpumask *cpus, int cpu, int node, unsigned int *hop)
{
return ERR_PTR(-EOPNOTSUPP);
return find_next_and_bit(cpumask_bits(cpus), cpumask_bits(cpu_online_mask),
small_cpumask_bits, cpu);
}
#endif /* CONFIG_NUMA */

/**
* for_each_numa_hop_mask - iterate over cpumasks of increasing NUMA distance
* from a given node.
* @mask: the iteration variable.
* for_each_numa_cpu - iterate over cpus in increasing order taking into account
* NUMA distances from a given node.
* @cpu: the (optionally unsigned) integer iterator
* @hop: the iterator variable for nodes, i.e. proximity order to the @node
* @node: the NUMA node to start the search from.
* @mask: the cpumask pointer
*
* Requires rcu_lock to be held.
* Where considered as a replacement to for_each_cpu(), the following should be
* taken into consideration:
* - Only accessible (i.e. online) CPUs are enumerated.
* - CPUs enumeration may not be a monotonic increasing sequence;
*
* Yields cpu_online_mask for @node == NUMA_NO_NODE.
* rcu_lock must be held;
*/
#define for_each_numa_hop_mask(mask, node) \
for (unsigned int __hops = 0; \
mask = (node != NUMA_NO_NODE || __hops) ? \
sched_numa_hop_mask(node, __hops) : \
cpu_online_mask, \
!IS_ERR_OR_NULL(mask); \
__hops++)
#define for_each_numa_cpu(cpu, hop, node, mask) \
for ((cpu) = 0, (hop) = 0; \
(cpu) = sched_numa_find_next_cpu((mask), (cpu), (node), &(hop)),\
(cpu) < nr_cpu_ids; \
(cpu)++)

#define for_each_numa_online_cpu(cpu, hop, node) \
for_each_numa_cpu(cpu, hop, node, cpu_online_mask)

#endif /* _LINUX_TOPOLOGY_H */
53 changes: 30 additions & 23 deletions kernel/sched/topology.c
Original file line number Diff line number Diff line change
Expand Up @@ -2144,37 +2144,44 @@ int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node)
}
EXPORT_SYMBOL_GPL(sched_numa_find_nth_cpu);

/**
* sched_numa_hop_mask() - Get the cpumask of CPUs at most @hops hops away from
* @node
* @node: The node to count hops from.
* @hops: Include CPUs up to that many hops away. 0 means local node.
*
* Return: On success, a pointer to a cpumask of CPUs at most @hops away from
* @node, an error value otherwise.
*
* Requires rcu_lock to be held. Returned cpumask is only valid within that
* read-side section, copy it if required beyond that.
*
* Note that not all hops are equal in distance; see sched_init_numa() for how
* distances and masks are handled.
* Also note that this is a reflection of sched_domains_numa_masks, which may change
* during the lifetime of the system (offline nodes are taken out of the masks).
/*
* sched_numa_find_next_cpu() - given the NUMA topology, find the next cpu
* cpumask: cpumask to find a CPU from
* cpu: current CPU
* node: local node
* hop: (in/out) indicates distance order of current CPU to a local node
*
* The function searches for a next CPU at a given NUMA distance, indicated
* by hop, and if nothing found, tries to find CPUs at a greater distance,
* starting from the beginning.
*
* Return: cpu, or >= nr_cpu_ids when nothing found.
*/
const struct cpumask *sched_numa_hop_mask(unsigned int node, unsigned int hops)
int sched_numa_find_next_cpu(const struct cpumask *cpus, int cpu, int node, unsigned int *hop)
{
unsigned long *cur, *prev;
struct cpumask ***masks;
unsigned int ret;

if (node >= nr_node_ids || hops >= sched_domains_numa_levels)
return ERR_PTR(-EINVAL);
if (*hop >= sched_domains_numa_levels)
return nr_cpu_ids;

masks = rcu_dereference(sched_domains_numa_masks);
if (!masks)
return ERR_PTR(-EBUSY);
cur = cpumask_bits(masks[*hop][node]);
if (*hop == 0)
ret = find_next_and_bit(cpumask_bits(cpus), cur, nr_cpu_ids, cpu);
else {
prev = cpumask_bits(masks[*hop - 1][node]);
ret = find_next_and_andnot_bit(cpumask_bits(cpus), cur, prev, nr_cpu_ids, cpu);
}

if (ret < nr_cpu_ids)
return ret;

return masks[hops][node];
*hop += 1;
return sched_numa_find_next_cpu(cpus, 0, node, hop);
}
EXPORT_SYMBOL_GPL(sched_numa_hop_mask);
EXPORT_SYMBOL_GPL(sched_numa_find_next_cpu);

#endif /* CONFIG_NUMA */

Expand Down
6 changes: 6 additions & 0 deletions lib/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,13 @@ obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
obj-$(CONFIG_TEST_DYNAMIC_DEBUG) += test_dynamic_debug.o
obj-$(CONFIG_TEST_PRINTF) += test_printf.o
obj-$(CONFIG_TEST_SCANF) += test_scanf.o

obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_KASAN),yy)
# FIXME: Clang breaks test_bitmap_const_eval when KASAN and GCOV are enabled
GCOV_PROFILE_test_bitmap.o := n
endif

obj-$(CONFIG_TEST_UUID) += test_uuid.o
obj-$(CONFIG_TEST_XARRAY) += test_xarray.o
obj-$(CONFIG_TEST_MAPLE_TREE) += test_maple_tree.o
Expand Down
12 changes: 6 additions & 6 deletions lib/cpumask.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ EXPORT_SYMBOL(cpumask_next_wrap);
* alloc_cpumask_var_node - allocate a struct cpumask on a given node
* @mask: pointer to cpumask_var_t where the cpumask is returned
* @flags: GFP_ flags
* @node: memory node from which to allocate or %NUMA_NO_NODE
*
* Only defined when CONFIG_CPUMASK_OFFSTACK=y, otherwise is
* a nop returning a constant 1 (in <linux/cpumask.h>)
Expand Down Expand Up @@ -127,11 +128,8 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask)
*
* There's a better alternative based on for_each()-like iterators:
*
* for_each_numa_hop_mask(mask, node) {
* for_each_cpu_andnot(cpu, mask, prev)
* do_something(cpu);
* prev = mask;
* }
* for_each_numa_online_cpu(cpu, hop, node)
* do_something(cpu);
*
* It's simpler and more verbose than above. Complexity of iterator-based
* enumeration is O(sched_domains_numa_levels * nr_cpu_ids), while
Expand All @@ -157,7 +155,9 @@ EXPORT_SYMBOL(cpumask_local_spread);
static DEFINE_PER_CPU(int, distribute_cpu_mask_prev);

/**
* cpumask_any_and_distribute - Return an arbitrary cpu within srcp1 & srcp2.
* cpumask_any_and_distribute - Return an arbitrary cpu within src1p & src2p.
* @src1p: first &cpumask for intersection
* @src2p: second &cpumask for intersection
*
* Iterated calls using the same srcp1 and srcp2 will be distributed within
* their intersection.
Expand Down
12 changes: 12 additions & 0 deletions lib/find_bit.c
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,18 @@ unsigned long _find_next_andnot_bit(const unsigned long *addr1, const unsigned l
EXPORT_SYMBOL(_find_next_andnot_bit);
#endif

#ifndef find_next_and_andnot_bit
unsigned long _find_next_and_andnot_bit(const unsigned long *addr1,
const unsigned long *addr2,
const unsigned long *addr3,
unsigned long nbits,
unsigned long start)
{
return FIND_NEXT_BIT(addr1[idx] & addr2[idx] & ~addr3[idx], /* nop */, nbits, start);
}
EXPORT_SYMBOL(_find_next_and_andnot_bit);
#endif

#ifndef find_next_or_bit
unsigned long _find_next_or_bit(const unsigned long *addr1, const unsigned long *addr2,
unsigned long nbits, unsigned long start)
Expand Down
Loading

0 comments on commit 61d1532

Please sign in to comment.