Skip to content

Commit

Permalink
Merge tag 'bitmap-for-6.15' of https://github.com/norov/linux
Browse files Browse the repository at this point in the history
Pull bitmap updates from Yury Norov:

 - cpumask_next_wrap() rework (me)

 - GENMASK() simplification (I Hsin)

 - rust bindings for cpumasks (Viresh and me)

 - scattered cleanups (Andy, Tamir, Vincent, Ignacio and Joel)

* tag 'bitmap-for-6.15' of https://github.com/norov/linux: (22 commits)
  cpumask: align text in comment
  riscv: fix test_and_{set,clear}_bit ordering documentation
  treewide: fix typo 'unsigned __init128' -> 'unsigned __int128'
  MAINTAINERS: add rust bindings entry for bitmap API
  rust: Add cpumask helpers
  uapi: Revert "bitops: avoid integer overflow in GENMASK(_ULL)"
  cpumask: drop cpumask_next_wrap_old()
  PCI: hv: Switch hv_compose_multi_msi_req_get_cpu() to using cpumask_next_wrap()
  scsi: lpfc: rework lpfc_next_{online,present}_cpu()
  scsi: lpfc: switch lpfc_irq_rebalance() to using cpumask_next_wrap()
  s390: switch stop_machine_yield() to using cpumask_next_wrap()
  padata: switch padata_find_next() to using cpumask_next_wrap()
  cpumask: use cpumask_next_wrap() where appropriate
  cpumask: re-introduce cpumask_next{,_and}_wrap()
  cpumask: deprecate cpumask_next_wrap()
  powerpc/xmon: simplify xmon_batch_next_cpu()
  ibmvnic: simplify ibmvnic_set_queue_affinity()
  virtio_net: simplify virtnet_set_affinity()
  objpool: rework objpool_pop()
  cpumask: add for_each_{possible,online}_cpu_wrap
  ...
  • Loading branch information
Linus Torvalds committed Mar 25, 2025
2 parents f81c2b8 + 1cf8e15 commit 2f2d529
Show file tree
Hide file tree
Showing 23 changed files with 147 additions and 144 deletions.
5 changes: 5 additions & 0 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -4026,6 +4026,11 @@ F: tools/include/vdso/bits.h
F: tools/lib/bitmap.c
F: tools/lib/find_bit.c

BITMAP API BINDINGS [RUST]
M: Yury Norov <yury.norov@gmail.com>
S: Maintained
F: rust/helpers/cpumask.c

BITOPS API
M: Yury Norov <yury.norov@gmail.com>
R: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Expand Down
6 changes: 1 addition & 5 deletions arch/powerpc/xmon/xmon.c
Original file line number Diff line number Diff line change
Expand Up @@ -1271,11 +1271,7 @@ static int xmon_batch_next_cpu(void)
{
unsigned long cpu;

while (!cpumask_empty(&xmon_batch_cpus)) {
cpu = cpumask_next_wrap(smp_processor_id(), &xmon_batch_cpus,
xmon_batch_start_cpu, true);
if (cpu >= nr_cpu_ids)
break;
for_each_cpu_wrap(cpu, &xmon_batch_cpus, xmon_batch_start_cpu) {
if (xmon_batch_start_cpu == -1)
xmon_batch_start_cpu = cpu;
if (xmon_switch_cpu(cpu))
Expand Down
4 changes: 2 additions & 2 deletions arch/riscv/include/asm/bitops.h
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ static __always_inline int variable_fls(unsigned int x)
* @nr: Bit to set
* @addr: Address to count from
*
* This operation may be reordered on other architectures than x86.
* This is an atomic fully-ordered operation (implied full memory barrier).
*/
static __always_inline int arch_test_and_set_bit(int nr, volatile unsigned long *addr)
{
Expand All @@ -238,7 +238,7 @@ static __always_inline int arch_test_and_set_bit(int nr, volatile unsigned long
* @nr: Bit to clear
* @addr: Address to count from
*
* This operation can be reordered on other architectures other than x86.
* This is an atomic fully-ordered operation (implied full memory barrier).
*/
static __always_inline int arch_test_and_clear_bit(int nr, volatile unsigned long *addr)
{
Expand Down
2 changes: 1 addition & 1 deletion arch/s390/kernel/processor.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ void notrace stop_machine_yield(const struct cpumask *cpumask)
this_cpu = smp_processor_id();
if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) {
__this_cpu_write(cpu_relax_retry, 0);
cpu = cpumask_next_wrap(this_cpu, cpumask, this_cpu, false);
cpu = cpumask_next_wrap(this_cpu, cpumask);
if (cpu >= nr_cpu_ids)
return;
if (arch_vcpu_is_preempted(cpu))
Expand Down
18 changes: 11 additions & 7 deletions drivers/net/ethernet/ibm/ibmvnic.c
Original file line number Diff line number Diff line change
Expand Up @@ -234,11 +234,17 @@ static int ibmvnic_set_queue_affinity(struct ibmvnic_sub_crq_queue *queue,
(*stragglers)--;
}
/* atomic write is safer than writing bit by bit directly */
for (i = 0; i < stride; i++) {
cpumask_set_cpu(*cpu, mask);
*cpu = cpumask_next_wrap(*cpu, cpu_online_mask,
nr_cpu_ids, false);
for_each_online_cpu_wrap(i, *cpu) {
if (!stride--) {
/* For the next queue we start from the first
* unused CPU in this queue
*/
*cpu = i;
break;
}
cpumask_set_cpu(i, mask);
}

/* set queue affinity mask */
cpumask_copy(queue->affinity_mask, mask);
rc = irq_set_affinity_and_hint(queue->irq, queue->affinity_mask);
Expand All @@ -256,7 +262,7 @@ static void ibmvnic_set_affinity(struct ibmvnic_adapter *adapter)
int num_rxqs = adapter->num_active_rx_scrqs, i_rxqs = 0;
int num_txqs = adapter->num_active_tx_scrqs, i_txqs = 0;
int total_queues, stride, stragglers, i;
unsigned int num_cpu, cpu;
unsigned int num_cpu, cpu = 0;
bool is_rx_queue;
int rc = 0;

Expand All @@ -274,8 +280,6 @@ static void ibmvnic_set_affinity(struct ibmvnic_adapter *adapter)
stride = max_t(int, num_cpu / total_queues, 1);
/* number of leftover cpu's */
stragglers = num_cpu >= total_queues ? num_cpu % total_queues : 0;
/* next available cpu to assign irq to */
cpu = cpumask_next(-1, cpu_online_mask);

for (i = 0; i < total_queues; i++) {
is_rx_queue = false;
Expand Down
12 changes: 7 additions & 5 deletions drivers/net/virtio_net.c
Original file line number Diff line number Diff line change
Expand Up @@ -3826,7 +3826,7 @@ static void virtnet_set_affinity(struct virtnet_info *vi)
cpumask_var_t mask;
int stragglers;
int group_size;
int i, j, cpu;
int i, start = 0, cpu;
int num_cpu;
int stride;

Expand All @@ -3840,16 +3840,18 @@ static void virtnet_set_affinity(struct virtnet_info *vi)
stragglers = num_cpu >= vi->curr_queue_pairs ?
num_cpu % vi->curr_queue_pairs :
0;
cpu = cpumask_first(cpu_online_mask);

for (i = 0; i < vi->curr_queue_pairs; i++) {
group_size = stride + (i < stragglers ? 1 : 0);

for (j = 0; j < group_size; j++) {
for_each_online_cpu_wrap(cpu, start) {
if (!group_size--) {
start = cpu;
break;
}
cpumask_set_cpu(cpu, mask);
cpu = cpumask_next_wrap(cpu, cpu_online_mask,
nr_cpu_ids, false);
}

virtqueue_set_affinity(vi->rq[i].vq, mask);
virtqueue_set_affinity(vi->sq[i].vq, mask);
__netif_set_xps_queue(vi->dev, cpumask_bits(mask), i, XPS_CPUS);
Expand Down
3 changes: 1 addition & 2 deletions drivers/pci/controller/pci-hyperv.c
Original file line number Diff line number Diff line change
Expand Up @@ -1757,8 +1757,7 @@ static int hv_compose_multi_msi_req_get_cpu(void)

spin_lock_irqsave(&multi_msi_cpu_lock, flags);

cpu_next = cpumask_next_wrap(cpu_next, cpu_online_mask, nr_cpu_ids,
false);
cpu_next = cpumask_next_wrap(cpu_next, cpu_online_mask);
cpu = cpu_next;

spin_unlock_irqrestore(&multi_msi_cpu_lock, flags);
Expand Down
23 changes: 5 additions & 18 deletions drivers/scsi/lpfc/lpfc.h
Original file line number Diff line number Diff line change
Expand Up @@ -1715,35 +1715,22 @@ lpfc_phba_elsring(struct lpfc_hba *phba)
* Note: If no valid cpu found, then nr_cpu_ids is returned.
*
**/
static inline unsigned int
static __always_inline unsigned int
lpfc_next_online_cpu(const struct cpumask *mask, unsigned int start)
{
unsigned int cpu_it;

for_each_cpu_wrap(cpu_it, mask, start) {
if (cpu_online(cpu_it))
break;
}

return cpu_it;
return cpumask_next_and_wrap(start, mask, cpu_online_mask);
}

/**
* lpfc_next_present_cpu - Finds next present CPU after n
* @n: the cpu prior to search
*
* Note: If no next present cpu, then fallback to first present cpu.
*
**/
static inline unsigned int lpfc_next_present_cpu(int n)
static __always_inline unsigned int lpfc_next_present_cpu(int n)
{
unsigned int cpu;

cpu = cpumask_next(n, cpu_present_mask);

if (cpu >= nr_cpu_ids)
cpu = cpumask_first(cpu_present_mask);

return cpu;
return cpumask_next_wrap(n, cpu_present_mask);
}

/**
Expand Down
2 changes: 1 addition & 1 deletion drivers/scsi/lpfc/lpfc_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -12873,7 +12873,7 @@ lpfc_irq_rebalance(struct lpfc_hba *phba, unsigned int cpu, bool offline)

if (offline) {
/* Find next online CPU on original mask */
cpu_next = cpumask_next_wrap(cpu, orig_mask, cpu, true);
cpu_next = cpumask_next_wrap(cpu, orig_mask);
cpu_select = lpfc_next_online_cpu(orig_mask, cpu_next);

/* Found a valid CPU */
Expand Down
8 changes: 5 additions & 3 deletions include/linux/bitmap.h
Original file line number Diff line number Diff line change
Expand Up @@ -560,9 +560,9 @@ void bitmap_replace(unsigned long *dst,
* ...0..11...0..10
* dst: 0000001100000010
*
* A relationship exists between bitmap_scatter() and bitmap_gather().
* A relationship exists between bitmap_scatter() and bitmap_gather(). See
* bitmap_gather() for the bitmap gather detailed operations. TL;DR:
* bitmap_gather() can be seen as the 'reverse' bitmap_scatter() operation.
* See bitmap_scatter() for details related to this relationship.
*/
static __always_inline
void bitmap_scatter(unsigned long *dst, const unsigned long *src,
Expand Down Expand Up @@ -608,7 +608,9 @@ void bitmap_scatter(unsigned long *dst, const unsigned long *src,
* dst: 0000000000011010
*
* A relationship exists between bitmap_gather() and bitmap_scatter(). See
* bitmap_scatter() for the bitmap scatter detailed operations.
* bitmap_scatter() for the bitmap scatter detailed operations. TL;DR:
* bitmap_scatter() can be seen as the 'reverse' bitmap_gather() operation.
*
* Suppose scattered computed using bitmap_scatter(scattered, src, mask, n).
* The operation bitmap_gather(result, scattered, mask, n) leads to a result
* equal or equivalent to src.
Expand Down
2 changes: 1 addition & 1 deletion include/linux/bits.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
* Missing asm support
*
* __GENMASK_U128() depends on _BIT128() which would not work
* in the asm code, as it shifts an 'unsigned __init128' data
* in the asm code, as it shifts an 'unsigned __int128' data
* type instead of direct representation of 128 bit constants
* such as long and unsigned long. The fundamental problem is
* that a 128 bit constant will get silently truncated by the
Expand Down
71 changes: 49 additions & 22 deletions include/linux/cpumask.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ static __always_inline void set_nr_cpu_ids(unsigned int nr)
*
* cpu_possible_mask- has bit 'cpu' set iff cpu is populatable
* cpu_present_mask - has bit 'cpu' set iff cpu is populated
* cpu_enabled_mask - has bit 'cpu' set iff cpu can be brought online
* cpu_enabled_mask - has bit 'cpu' set iff cpu can be brought online
* cpu_online_mask - has bit 'cpu' set iff cpu available to scheduler
* cpu_active_mask - has bit 'cpu' set iff cpu available to migration
*
Expand Down Expand Up @@ -285,35 +285,52 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p,
}

/**
* for_each_cpu - iterate over every cpu in a mask
* @cpu: the (optionally unsigned) integer iterator
* @mask: the cpumask pointer
* cpumask_next_and_wrap - get the next cpu in *src1p & *src2p, starting from
* @n+1. If nothing found, wrap around and start from
* the beginning
* @n: the cpu prior to the place to search (i.e. search starts from @n+1)
* @src1p: the first cpumask pointer
* @src2p: the second cpumask pointer
*
* After the loop, cpu is >= nr_cpu_ids.
* Return: next set bit, wrapped if needed, or >= nr_cpu_ids if @src1p & @src2p is empty.
*/
#define for_each_cpu(cpu, mask) \
for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits)

#if NR_CPUS == 1
static __always_inline
unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap)
unsigned int cpumask_next_and_wrap(int n, const struct cpumask *src1p,
const struct cpumask *src2p)
{
cpumask_check(start);
/* -1 is a legal arg here. */
if (n != -1)
cpumask_check(n);
return find_next_and_bit_wrap(cpumask_bits(src1p), cpumask_bits(src2p),
small_cpumask_bits, n + 1);
}

/*
* Return the first available CPU when wrapping, or when starting before cpu0,
* since there is only one valid option.
*/
if (wrap && n >= 0)
return nr_cpumask_bits;

return cpumask_first(mask);
/**
* cpumask_next_wrap - get the next cpu in *src, starting from @n+1. If nothing
* found, wrap around and start from the beginning
* @n: the cpu prior to the place to search (i.e. search starts from @n+1)
* @src: cpumask pointer
*
* Return: next set bit, wrapped if needed, or >= nr_cpu_ids if @src is empty.
*/
static __always_inline
unsigned int cpumask_next_wrap(int n, const struct cpumask *src)
{
/* -1 is a legal arg here. */
if (n != -1)
cpumask_check(n);
return find_next_bit_wrap(cpumask_bits(src), small_cpumask_bits, n + 1);
}
#else
unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap);
#endif

/**
* for_each_cpu - iterate over every cpu in a mask
* @cpu: the (optionally unsigned) integer iterator
* @mask: the cpumask pointer
*
* After the loop, cpu is >= nr_cpu_ids.
*/
#define for_each_cpu(cpu, mask) \
for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits)

/**
* for_each_cpu_wrap - iterate over every cpu in a mask, starting at a specified location
Expand Down Expand Up @@ -1033,11 +1050,21 @@ extern const DECLARE_BITMAP(cpu_all_bits, NR_CPUS);
#define for_each_possible_cpu(cpu) for ((cpu) = 0; (cpu) < 1; (cpu)++)
#define for_each_online_cpu(cpu) for ((cpu) = 0; (cpu) < 1; (cpu)++)
#define for_each_present_cpu(cpu) for ((cpu) = 0; (cpu) < 1; (cpu)++)

#define for_each_possible_cpu_wrap(cpu, start) \
for ((void)(start), (cpu) = 0; (cpu) < 1; (cpu)++)
#define for_each_online_cpu_wrap(cpu, start) \
for ((void)(start), (cpu) = 0; (cpu) < 1; (cpu)++)
#else
#define for_each_possible_cpu(cpu) for_each_cpu((cpu), cpu_possible_mask)
#define for_each_online_cpu(cpu) for_each_cpu((cpu), cpu_online_mask)
#define for_each_enabled_cpu(cpu) for_each_cpu((cpu), cpu_enabled_mask)
#define for_each_present_cpu(cpu) for_each_cpu((cpu), cpu_present_mask)

#define for_each_possible_cpu_wrap(cpu, start) \
for_each_cpu_wrap((cpu), cpu_possible_mask, (start))
#define for_each_online_cpu_wrap(cpu, start) \
for_each_cpu_wrap((cpu), cpu_online_mask, (start))
#endif

/* Wrappers for arch boot code to manipulate normally-constant masks */
Expand Down
7 changes: 3 additions & 4 deletions include/linux/objpool.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,17 +170,16 @@ static inline void *objpool_pop(struct objpool_head *pool)
{
void *obj = NULL;
unsigned long flags;
int i, cpu;
int start, cpu;

/* disable local irq to avoid preemption & interruption */
raw_local_irq_save(flags);

cpu = raw_smp_processor_id();
for (i = 0; i < pool->nr_possible_cpus; i++) {
start = raw_smp_processor_id();
for_each_possible_cpu_wrap(cpu, start) {
obj = __objpool_try_get_slot(pool, cpu);
if (obj)
break;
cpu = cpumask_next_wrap(cpu, cpu_possible_mask, -1, 1);
}
raw_local_irq_restore(flags);

Expand Down
8 changes: 2 additions & 6 deletions include/uapi/linux/bits.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,9 @@
#ifndef _UAPI_LINUX_BITS_H
#define _UAPI_LINUX_BITS_H

#define __GENMASK(h, l) \
(((~_UL(0)) - (_UL(1) << (l)) + 1) & \
(~_UL(0) >> (__BITS_PER_LONG - 1 - (h))))
#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h))))

#define __GENMASK_ULL(h, l) \
(((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \
(~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h))))
#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h))))

#define __GENMASK_U128(h, l) \
((_BIT128((h)) << 1) - (_BIT128(l)))
Expand Down
2 changes: 1 addition & 1 deletion include/uapi/linux/const.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
* Missing asm support
*
* __BIT128() would not work in the asm code, as it shifts an
* 'unsigned __init128' data type as direct representation of
* 'unsigned __int128' data type as direct representation of
* 128 bit constants is not supported in the gcc compiler, as
* they get silently truncated.
*
Expand Down
2 changes: 1 addition & 1 deletion kernel/padata.c
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ static struct padata_priv *padata_find_next(struct parallel_data *pd,
if (remove_object) {
list_del_init(&padata->list);
++pd->processed;
pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1, false);
pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu);
}

spin_unlock(&reorder->lock);
Expand Down
Loading

0 comments on commit 2f2d529

Please sign in to comment.