From 43245117806ff8914e37327b610fc08b5ddedc91 Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Fri, 20 Jan 2023 20:24:28 -0800
Subject: [PATCH 1/9] lib/find: introduce find_nth_and_andnot_bit

In the following patches the function is used to implement in-place bitmaps
traversing without storing intermediate result in temporary bitmaps.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
Acked-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Peter Lafreniere <peter@n8pjl.ca>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/find.h | 33 +++++++++++++++++++++++++++++++++
 lib/find_bit.c       |  9 +++++++++
 2 files changed, 42 insertions(+)

diff --git a/include/linux/find.h b/include/linux/find.h
index ccaf61a0f5fd5..4647864a5ffdb 100644
--- a/include/linux/find.h
+++ b/include/linux/find.h
@@ -22,6 +22,9 @@ unsigned long __find_nth_and_bit(const unsigned long *addr1, const unsigned long
 				unsigned long size, unsigned long n);
 unsigned long __find_nth_andnot_bit(const unsigned long *addr1, const unsigned long *addr2,
 					unsigned long size, unsigned long n);
+unsigned long __find_nth_and_andnot_bit(const unsigned long *addr1, const unsigned long *addr2,
+					const unsigned long *addr3, unsigned long size,
+					unsigned long n);
 extern unsigned long _find_first_and_bit(const unsigned long *addr1,
 					 const unsigned long *addr2, unsigned long size);
 extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size);
@@ -255,6 +258,36 @@ unsigned long find_nth_andnot_bit(const unsigned long *addr1, const unsigned lon
 	return __find_nth_andnot_bit(addr1, addr2, size, n);
 }
 
+/**
+ * find_nth_and_andnot_bit - find N'th set bit in 2 memory regions,
+ *			     excluding those set in 3rd region
+ * @addr1: The 1st address to start the search at
+ * @addr2: The 2nd address to start the search at
+ * @addr3: The 3rd address to start the search at
+ * @size: The maximum number of bits to search
+ * @n: The number of set bit, which position is needed, counting from 0
+ *
+ * Returns the bit number of the N'th set bit.
+ * If no such, returns @size.
+ */
+static __always_inline
+unsigned long find_nth_and_andnot_bit(const unsigned long *addr1,
+					const unsigned long *addr2,
+					const unsigned long *addr3,
+					unsigned long size, unsigned long n)
+{
+	if (n >= size)
+		return size;
+
+	if (small_const_nbits(size)) {
+		unsigned long val =  *addr1 & *addr2 & (~*addr3) & GENMASK(size - 1, 0);
+
+		return val ? fns(val, n) : size;
+	}
+
+	return __find_nth_and_andnot_bit(addr1, addr2, addr3, size, n);
+}
+
 #ifndef find_first_and_bit
 /**
  * find_first_and_bit - find the first set bit in both memory regions
diff --git a/lib/find_bit.c b/lib/find_bit.c
index 18bc0a7ac8eed..c10920e667889 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -155,6 +155,15 @@ unsigned long __find_nth_andnot_bit(const unsigned long *addr1, const unsigned l
 }
 EXPORT_SYMBOL(__find_nth_andnot_bit);
 
+unsigned long __find_nth_and_andnot_bit(const unsigned long *addr1,
+					const unsigned long *addr2,
+					const unsigned long *addr3,
+					unsigned long size, unsigned long n)
+{
+	return FIND_NTH_BIT(addr1[idx] & addr2[idx] & ~addr3[idx], size, n);
+}
+EXPORT_SYMBOL(__find_nth_and_andnot_bit);
+
 #ifndef find_next_and_bit
 unsigned long _find_next_and_bit(const unsigned long *addr1, const unsigned long *addr2,
 					unsigned long nbits, unsigned long start)

From 62f4386e564d31c7d0ed7d835843e2685f99ae71 Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Fri, 20 Jan 2023 20:24:29 -0800
Subject: [PATCH 2/9] cpumask: introduce cpumask_nth_and_andnot

Introduce cpumask_nth_and_andnot() based on find_nth_and_andnot_bit().
It's used in the following patch to traverse cpumasks without storing
intermediate result in temporary cpumask.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
Acked-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Peter Lafreniere <peter@n8pjl.ca>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/cpumask.h | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index c2aa0aa26b457..7b16aede7ac58 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -391,6 +391,26 @@ unsigned int cpumask_nth_andnot(unsigned int cpu, const struct cpumask *srcp1,
 				nr_cpumask_bits, cpumask_check(cpu));
 }
 
+/**
+ * cpumask_nth_and_andnot - get the Nth cpu set in 1st and 2nd cpumask, and clear in 3rd.
+ * @srcp1: the cpumask pointer
+ * @srcp2: the cpumask pointer
+ * @srcp3: the cpumask pointer
+ * @cpu: the N'th cpu to find, starting from 0
+ *
+ * Returns >= nr_cpu_ids if such cpu doesn't exist.
+ */
+static __always_inline
+unsigned int cpumask_nth_and_andnot(unsigned int cpu, const struct cpumask *srcp1,
+							const struct cpumask *srcp2,
+							const struct cpumask *srcp3)
+{
+	return find_nth_and_andnot_bit(cpumask_bits(srcp1),
+					cpumask_bits(srcp2),
+					cpumask_bits(srcp3),
+					nr_cpumask_bits, cpumask_check(cpu));
+}
+
 #define CPU_BITS_NONE						\
 {								\
 	[0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL			\

From cd7f55359c90a4108e6528e326b8623fce1ad72a Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Fri, 20 Jan 2023 20:24:30 -0800
Subject: [PATCH 3/9] sched: add sched_numa_find_nth_cpu()

The function finds Nth set CPU in a given cpumask starting from a given
node.

Leveraging the fact that each hop in sched_domains_numa_masks includes the
same or greater number of CPUs than the previous one, we can use binary
search on hops instead of linear walk, which makes the overall complexity
of O(log n) in terms of number of cpumask_weight() calls.

Signed-off-by: Yury Norov <yury.norov@gmail.com>
Acked-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Peter Lafreniere <peter@n8pjl.ca>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/topology.h |  8 ++++++
 kernel/sched/topology.c  | 57 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+)

diff --git a/include/linux/topology.h b/include/linux/topology.h
index 4564faafd0e12..72f2645756982 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -245,5 +245,13 @@ static inline const struct cpumask *cpu_cpu_mask(int cpu)
 	return cpumask_of_node(cpu_to_node(cpu));
 }
 
+#ifdef CONFIG_NUMA
+int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node);
+#else
+static __always_inline int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node)
+{
+	return cpumask_nth(cpu, cpus);
+}
+#endif	/* CONFIG_NUMA */
 
 #endif /* _LINUX_TOPOLOGY_H */
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 8739c2a5a54ea..2bf89186a10fa 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -3,6 +3,8 @@
  * Scheduler topology setup/handling methods
  */
 
+#include <linux/bsearch.h>
+
 DEFINE_MUTEX(sched_domains_mutex);
 
 /* Protected by sched_domains_mutex: */
@@ -2067,6 +2069,61 @@ int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
 	return found;
 }
 
+struct __cmp_key {
+	const struct cpumask *cpus;
+	struct cpumask ***masks;
+	int node;
+	int cpu;
+	int w;
+};
+
+static int hop_cmp(const void *a, const void *b)
+{
+	struct cpumask **prev_hop = *((struct cpumask ***)b - 1);
+	struct cpumask **cur_hop = *(struct cpumask ***)b;
+	struct __cmp_key *k = (struct __cmp_key *)a;
+
+	if (cpumask_weight_and(k->cpus, cur_hop[k->node]) <= k->cpu)
+		return 1;
+
+	k->w = (b == k->masks) ? 0 : cpumask_weight_and(k->cpus, prev_hop[k->node]);
+	if (k->w <= k->cpu)
+		return 0;
+
+	return -1;
+}
+
+/*
+ * sched_numa_find_nth_cpu() - given the NUMA topology, find the Nth next cpu
+ *                             closest to @cpu from @cpumask.
+ * cpumask: cpumask to find a cpu from
+ * cpu: Nth cpu to find
+ *
+ * returns: cpu, or nr_cpu_ids when nothing found.
+ */
+int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node)
+{
+	struct __cmp_key k = { .cpus = cpus, .node = node, .cpu = cpu };
+	struct cpumask ***hop_masks;
+	int hop, ret = nr_cpu_ids;
+
+	rcu_read_lock();
+
+	k.masks = rcu_dereference(sched_domains_numa_masks);
+	if (!k.masks)
+		goto unlock;
+
+	hop_masks = bsearch(&k, k.masks, sched_domains_numa_levels, sizeof(k.masks[0]), hop_cmp);
+	hop = hop_masks	- k.masks;
+
+	ret = hop ?
+		cpumask_nth_and_andnot(cpu - k.w, cpus, k.masks[hop][node], k.masks[hop-1][node]) :
+		cpumask_nth_and(cpu, cpus, k.masks[0][node]);
+unlock:
+	rcu_read_unlock();
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sched_numa_find_nth_cpu);
 #endif /* CONFIG_NUMA */
 
 static int __sdt_alloc(const struct cpumask *cpu_map)

From 406d394abfcd8f16dc1dbcc8fc1b828252befb6d Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Fri, 20 Jan 2023 20:24:31 -0800
Subject: [PATCH 4/9] cpumask: improve on cpumask_local_spread() locality

Switch cpumask_local_spread() to use newly added sched_numa_find_nth_cpu(),
which takes into account distances to each node in the system.

For the following NUMA configuration:

root@debian:~# numactl -H
available: 4 nodes (0-3)
node 0 cpus: 0 1 2 3
node 0 size: 3869 MB
node 0 free: 3740 MB
node 1 cpus: 4 5
node 1 size: 1969 MB
node 1 free: 1937 MB
node 2 cpus: 6 7
node 2 size: 1967 MB
node 2 free: 1873 MB
node 3 cpus: 8 9 10 11 12 13 14 15
node 3 size: 7842 MB
node 3 free: 7723 MB
node distances:
node   0   1   2   3
  0:  10  50  30  70
  1:  50  10  70  30
  2:  30  70  10  50
  3:  70  30  50  10

The new cpumask_local_spread() traverses cpus for each node like this:

node 0:   0   1   2   3   6   7   4   5   8   9  10  11  12  13  14  15
node 1:   4   5   8   9  10  11  12  13  14  15   0   1   2   3   6   7
node 2:   6   7   0   1   2   3   8   9  10  11  12  13  14  15   4   5
node 3:   8   9  10  11  12  13  14  15   4   5   6   7   0   1   2   3

Signed-off-by: Yury Norov <yury.norov@gmail.com>
Acked-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Peter Lafreniere <peter@n8pjl.ca>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 lib/cpumask.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/lib/cpumask.c b/lib/cpumask.c
index c7c392514fd34..255974cd6734f 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -110,7 +110,7 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask)
 #endif
 
 /**
- * cpumask_local_spread - select the i'th cpu with local numa cpu's first
+ * cpumask_local_spread - select the i'th cpu based on NUMA distances
  * @i: index number
  * @node: local numa_node
  *
@@ -132,15 +132,7 @@ unsigned int cpumask_local_spread(unsigned int i, int node)
 		if (cpu < nr_cpu_ids)
 			return cpu;
 	} else {
-		/* NUMA first. */
-		cpu = cpumask_nth_and(i, cpu_online_mask, cpumask_of_node(node));
-		if (cpu < nr_cpu_ids)
-			return cpu;
-
-		i -= cpumask_weight_and(cpu_online_mask, cpumask_of_node(node));
-
-		/* Skip NUMA nodes, done above. */
-		cpu = cpumask_nth_andnot(i, cpu_online_mask, cpumask_of_node(node));
+		cpu = sched_numa_find_nth_cpu(cpu_online_mask, i, node);
 		if (cpu < nr_cpu_ids)
 			return cpu;
 	}

From b1beed72b8b75d365fdbc925da856c212195051b Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Fri, 20 Jan 2023 20:24:32 -0800
Subject: [PATCH 5/9] lib/cpumask: reorganize cpumask_local_spread() logic

Now after moving all NUMA logic into sched_numa_find_nth_cpu(),
else-branch of cpumask_local_spread() is just a function call, and
we can simplify logic by using ternary operator.

While here, replace BUG() with WARN_ON().

Signed-off-by: Yury Norov <yury.norov@gmail.com>
Acked-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Peter Lafreniere <peter@n8pjl.ca>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 lib/cpumask.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/lib/cpumask.c b/lib/cpumask.c
index 255974cd6734f..10aa15715c0d2 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -127,16 +127,12 @@ unsigned int cpumask_local_spread(unsigned int i, int node)
 	/* Wrap: we always want a cpu. */
 	i %= num_online_cpus();
 
-	if (node == NUMA_NO_NODE) {
-		cpu = cpumask_nth(i, cpu_online_mask);
-		if (cpu < nr_cpu_ids)
-			return cpu;
-	} else {
-		cpu = sched_numa_find_nth_cpu(cpu_online_mask, i, node);
-		if (cpu < nr_cpu_ids)
-			return cpu;
-	}
-	BUG();
+	cpu = (node == NUMA_NO_NODE) ?
+		cpumask_nth(i, cpu_online_mask) :
+		sched_numa_find_nth_cpu(cpu_online_mask, i, node);
+
+	WARN_ON(cpu >= nr_cpu_ids);
+	return cpu;
 }
 EXPORT_SYMBOL(cpumask_local_spread);
 

From 9feae65845f7b16376716fe70b7d4b9bf8721848 Mon Sep 17 00:00:00 2001
From: Valentin Schneider <vschneid@redhat.com>
Date: Fri, 20 Jan 2023 20:24:33 -0800
Subject: [PATCH 6/9] sched/topology: Introduce sched_numa_hop_mask()

Tariq has pointed out that drivers allocating IRQ vectors would benefit
from having smarter NUMA-awareness - cpumask_local_spread() only knows
about the local node and everything outside is in the same bucket.

sched_domains_numa_masks is pretty much what we want to hand out (a cpumask
of CPUs reachable within a given distance budget), introduce
sched_numa_hop_mask() to export those cpumasks.

Link: http://lore.kernel.org/r/20220728191203.4055-1-tariqt@nvidia.com
Signed-off-by: Valentin Schneider <vschneid@redhat.com>
Reviewed-by: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/topology.h |  7 +++++++
 kernel/sched/topology.c  | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

diff --git a/include/linux/topology.h b/include/linux/topology.h
index 72f2645756982..344c2362755aa 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -247,11 +247,18 @@ static inline const struct cpumask *cpu_cpu_mask(int cpu)
 
 #ifdef CONFIG_NUMA
 int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node);
+extern const struct cpumask *sched_numa_hop_mask(unsigned int node, unsigned int hops);
 #else
 static __always_inline int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node)
 {
 	return cpumask_nth(cpu, cpus);
 }
+
+static inline const struct cpumask *
+sched_numa_hop_mask(unsigned int node, unsigned int hops)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
 #endif	/* CONFIG_NUMA */
 
 #endif /* _LINUX_TOPOLOGY_H */
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 2bf89186a10fa..1233affc106c6 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -2124,6 +2124,39 @@ int sched_numa_find_nth_cpu(const struct cpumask *cpus, int cpu, int node)
 	return ret;
 }
 EXPORT_SYMBOL_GPL(sched_numa_find_nth_cpu);
+
+/**
+ * sched_numa_hop_mask() - Get the cpumask of CPUs at most @hops hops away from
+ *                         @node
+ * @node: The node to count hops from.
+ * @hops: Include CPUs up to that many hops away. 0 means local node.
+ *
+ * Return: On success, a pointer to a cpumask of CPUs at most @hops away from
+ * @node, an error value otherwise.
+ *
+ * Requires rcu_lock to be held. Returned cpumask is only valid within that
+ * read-side section, copy it if required beyond that.
+ *
+ * Note that not all hops are equal in distance; see sched_init_numa() for how
+ * distances and masks are handled.
+ * Also note that this is a reflection of sched_domains_numa_masks, which may change
+ * during the lifetime of the system (offline nodes are taken out of the masks).
+ */
+const struct cpumask *sched_numa_hop_mask(unsigned int node, unsigned int hops)
+{
+	struct cpumask ***masks;
+
+	if (node >= nr_node_ids || hops >= sched_domains_numa_levels)
+		return ERR_PTR(-EINVAL);
+
+	masks = rcu_dereference(sched_domains_numa_masks);
+	if (!masks)
+		return ERR_PTR(-EBUSY);
+
+	return masks[hops][node];
+}
+EXPORT_SYMBOL_GPL(sched_numa_hop_mask);
+
 #endif /* CONFIG_NUMA */
 
 static int __sdt_alloc(const struct cpumask *cpu_map)

From 06ac01721f7d07da722abe0ec6f147b90bfc8c77 Mon Sep 17 00:00:00 2001
From: Valentin Schneider <vschneid@redhat.com>
Date: Fri, 20 Jan 2023 20:24:34 -0800
Subject: [PATCH 7/9] sched/topology: Introduce for_each_numa_hop_mask()

The recently introduced sched_numa_hop_mask() exposes cpumasks of CPUs
reachable within a given distance budget, wrap the logic for iterating over
all (distance, mask) values inside an iterator macro.

Signed-off-by: Valentin Schneider <vschneid@redhat.com>
Reviewed-by: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/topology.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/include/linux/topology.h b/include/linux/topology.h
index 344c2362755aa..fea32377f7c77 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -261,4 +261,22 @@ sched_numa_hop_mask(unsigned int node, unsigned int hops)
 }
 #endif	/* CONFIG_NUMA */
 
+/**
+ * for_each_numa_hop_mask - iterate over cpumasks of increasing NUMA distance
+ *                          from a given node.
+ * @mask: the iteration variable.
+ * @node: the NUMA node to start the search from.
+ *
+ * Requires rcu_lock to be held.
+ *
+ * Yields cpu_online_mask for @node == NUMA_NO_NODE.
+ */
+#define for_each_numa_hop_mask(mask, node)				       \
+	for (unsigned int __hops = 0;					       \
+	     mask = (node != NUMA_NO_NODE || __hops) ?			       \
+		     sched_numa_hop_mask(node, __hops) :		       \
+		     cpu_online_mask,					       \
+	     !IS_ERR_OR_NULL(mask);					       \
+	     __hops++)
+
 #endif /* _LINUX_TOPOLOGY_H */

From 2acda57736de1e486036b90a648e67a3599080a1 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@nvidia.com>
Date: Fri, 20 Jan 2023 20:24:35 -0800
Subject: [PATCH 8/9] net/mlx5e: Improve remote NUMA preferences used for the
 IRQ affinity hints

In the IRQ affinity hints, replace the binary NUMA preference (local /
remote) with the improved for_each_numa_hop_cpu() API that minds the
actual distances, so that remote NUMAs with short distance are preferred
over farther ones.

This has significant performance implications when using NUMA-aware
allocated memory (follow [1] and derivatives for example).

[1]
drivers/net/ethernet/mellanox/mlx5/core/en_main.c :: mlx5e_open_channel()
   int cpu = cpumask_first(mlx5_comp_irq_get_affinity_mask(priv->mdev, ix));

Performance tests:

TCP multi-stream, using 16 iperf3 instances pinned to 16 cores (with aRFS on).
Active cores: 64,65,72,73,80,81,88,89,96,97,104,105,112,113,120,121

+-------------------------+-----------+------------------+------------------+
|                         | BW (Gbps) | TX side CPU util | RX side CPU util |
+-------------------------+-----------+------------------+------------------+
| Baseline                | 52.3      | 6.4 %            | 17.9 %           |
+-------------------------+-----------+------------------+------------------+
| Applied on TX side only | 52.6      | 5.2 %            | 18.5 %           |
+-------------------------+-----------+------------------+------------------+
| Applied on RX side only | 94.9      | 11.9 %           | 27.2 %           |
+-------------------------+-----------+------------------+------------------+
| Applied on both sides   | 95.1      | 8.4 %            | 27.3 %           |
+-------------------------+-----------+------------------+------------------+

Bottleneck in RX side is released, reached linerate (~1.8x speedup).
~30% less cpu util on TX.

* CPU util on active cores only.

Setups details (similar for both sides):

NIC: ConnectX6-DX dual port, 100 Gbps each.
Single port used in the tests.

$ lscpu
Architecture:        x86_64
CPU op-mode(s):      32-bit, 64-bit
Byte Order:          Little Endian
CPU(s):              256
On-line CPU(s) list: 0-255
Thread(s) per core:  2
Core(s) per socket:  64
Socket(s):           2
NUMA node(s):        16
Vendor ID:           AuthenticAMD
CPU family:          25
Model:               1
Model name:          AMD EPYC 7763 64-Core Processor
Stepping:            1
CPU MHz:             2594.804
BogoMIPS:            4890.73
Virtualization:      AMD-V
L1d cache:           32K
L1i cache:           32K
L2 cache:            512K
L3 cache:            32768K
NUMA node0 CPU(s):   0-7,128-135
NUMA node1 CPU(s):   8-15,136-143
NUMA node2 CPU(s):   16-23,144-151
NUMA node3 CPU(s):   24-31,152-159
NUMA node4 CPU(s):   32-39,160-167
NUMA node5 CPU(s):   40-47,168-175
NUMA node6 CPU(s):   48-55,176-183
NUMA node7 CPU(s):   56-63,184-191
NUMA node8 CPU(s):   64-71,192-199
NUMA node9 CPU(s):   72-79,200-207
NUMA node10 CPU(s):  80-87,208-215
NUMA node11 CPU(s):  88-95,216-223
NUMA node12 CPU(s):  96-103,224-231
NUMA node13 CPU(s):  104-111,232-239
NUMA node14 CPU(s):  112-119,240-247
NUMA node15 CPU(s):  120-127,248-255
..

$ numactl -H
..
node distances:
node   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15
  0:  10  11  11  11  12  12  12  12  32  32  32  32  32  32  32  32
  1:  11  10  11  11  12  12  12  12  32  32  32  32  32  32  32  32
  2:  11  11  10  11  12  12  12  12  32  32  32  32  32  32  32  32
  3:  11  11  11  10  12  12  12  12  32  32  32  32  32  32  32  32
  4:  12  12  12  12  10  11  11  11  32  32  32  32  32  32  32  32
  5:  12  12  12  12  11  10  11  11  32  32  32  32  32  32  32  32
  6:  12  12  12  12  11  11  10  11  32  32  32  32  32  32  32  32
  7:  12  12  12  12  11  11  11  10  32  32  32  32  32  32  32  32
  8:  32  32  32  32  32  32  32  32  10  11  11  11  12  12  12  12
  9:  32  32  32  32  32  32  32  32  11  10  11  11  12  12  12  12
 10:  32  32  32  32  32  32  32  32  11  11  10  11  12  12  12  12
 11:  32  32  32  32  32  32  32  32  11  11  11  10  12  12  12  12
 12:  32  32  32  32  32  32  32  32  12  12  12  12  10  11  11  11
 13:  32  32  32  32  32  32  32  32  12  12  12  12  11  10  11  11
 14:  32  32  32  32  32  32  32  32  12  12  12  12  11  11  10  11
 15:  32  32  32  32  32  32  32  32  12  12  12  12  11  11  11  10

$ cat /sys/class/net/ens5f0/device/numa_node
14

Affinity hints (127 IRQs):
Before:
331: 00000000,00000000,00000000,00000000,00010000,00000000,00000000,00000000
332: 00000000,00000000,00000000,00000000,00020000,00000000,00000000,00000000
333: 00000000,00000000,00000000,00000000,00040000,00000000,00000000,00000000
334: 00000000,00000000,00000000,00000000,00080000,00000000,00000000,00000000
335: 00000000,00000000,00000000,00000000,00100000,00000000,00000000,00000000
336: 00000000,00000000,00000000,00000000,00200000,00000000,00000000,00000000
337: 00000000,00000000,00000000,00000000,00400000,00000000,00000000,00000000
338: 00000000,00000000,00000000,00000000,00800000,00000000,00000000,00000000
339: 00010000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
340: 00020000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
341: 00040000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
342: 00080000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
343: 00100000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
344: 00200000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
345: 00400000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
346: 00800000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
347: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000001
348: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000002
349: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000004
350: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000008
351: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000010
352: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000020
353: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000040
354: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000080
355: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000100
356: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000200
357: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000400
358: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000800
359: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00001000
360: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00002000
361: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00004000
362: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00008000
363: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00010000
364: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00020000
365: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00040000
366: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00080000
367: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00100000
368: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00200000
369: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00400000
370: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,00800000
371: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,01000000
372: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,02000000
373: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,04000000
374: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,08000000
375: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,10000000
376: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,20000000
377: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,40000000
378: 00000000,00000000,00000000,00000000,00000000,00000000,00000000,80000000
379: 00000000,00000000,00000000,00000000,00000000,00000000,00000001,00000000
380: 00000000,00000000,00000000,00000000,00000000,00000000,00000002,00000000
381: 00000000,00000000,00000000,00000000,00000000,00000000,00000004,00000000
382: 00000000,00000000,00000000,00000000,00000000,00000000,00000008,00000000
383: 00000000,00000000,00000000,00000000,00000000,00000000,00000010,00000000
384: 00000000,00000000,00000000,00000000,00000000,00000000,00000020,00000000
385: 00000000,00000000,00000000,00000000,00000000,00000000,00000040,00000000
386: 00000000,00000000,00000000,00000000,00000000,00000000,00000080,00000000
387: 00000000,00000000,00000000,00000000,00000000,00000000,00000100,00000000
388: 00000000,00000000,00000000,00000000,00000000,00000000,00000200,00000000
389: 00000000,00000000,00000000,00000000,00000000,00000000,00000400,00000000
390: 00000000,00000000,00000000,00000000,00000000,00000000,00000800,00000000
391: 00000000,00000000,00000000,00000000,00000000,00000000,00001000,00000000
392: 00000000,00000000,00000000,00000000,00000000,00000000,00002000,00000000
393: 00000000,00000000,00000000,00000000,00000000,00000000,00004000,00000000
394: 00000000,00000000,00000000,00000000,00000000,00000000,00008000,00000000
395: 00000000,00000000,00000000,00000000,00000000,00000000,00010000,00000000
396: 00000000,00000000,00000000,00000000,00000000,00000000,00020000,00000000
397: 00000000,00000000,00000000,00000000,00000000,00000000,00040000,00000000
398: 00000000,00000000,00000000,00000000,00000000,00000000,00080000,00000000
399: 00000000,00000000,00000000,00000000,00000000,00000000,00100000,00000000
400: 00000000,00000000,00000000,00000000,00000000,00000000,00200000,00000000
401: 00000000,00000000,00000000,00000000,00000000,00000000,00400000,00000000
402: 00000000,00000000,00000000,00000000,00000000,00000000,00800000,00000000
403: 00000000,00000000,00000000,00000000,00000000,00000000,01000000,00000000
404: 00000000,00000000,00000000,00000000,00000000,00000000,02000000,00000000
405: 00000000,00000000,00000000,00000000,00000000,00000000,04000000,00000000
406: 00000000,00000000,00000000,00000000,00000000,00000000,08000000,00000000
407: 00000000,00000000,00000000,00000000,00000000,00000000,10000000,00000000
408: 00000000,00000000,00000000,00000000,00000000,00000000,20000000,00000000
409: 00000000,00000000,00000000,00000000,00000000,00000000,40000000,00000000
410: 00000000,00000000,00000000,00000000,00000000,00000000,80000000,00000000
411: 00000000,00000000,00000000,00000000,00000000,00000001,00000000,00000000
412: 00000000,00000000,00000000,00000000,00000000,00000002,00000000,00000000
413: 00000000,00000000,00000000,00000000,00000000,00000004,00000000,00000000
414: 00000000,00000000,00000000,00000000,00000000,00000008,00000000,00000000
415: 00000000,00000000,00000000,00000000,00000000,00000010,00000000,00000000
416: 00000000,00000000,00000000,00000000,00000000,00000020,00000000,00000000
417: 00000000,00000000,00000000,00000000,00000000,00000040,00000000,00000000
418: 00000000,00000000,00000000,00000000,00000000,00000080,00000000,00000000
419: 00000000,00000000,00000000,00000000,00000000,00000100,00000000,00000000
420: 00000000,00000000,00000000,00000000,00000000,00000200,00000000,00000000
421: 00000000,00000000,00000000,00000000,00000000,00000400,00000000,00000000
422: 00000000,00000000,00000000,00000000,00000000,00000800,00000000,00000000
423: 00000000,00000000,00000000,00000000,00000000,00001000,00000000,00000000
424: 00000000,00000000,00000000,00000000,00000000,00002000,00000000,00000000
425: 00000000,00000000,00000000,00000000,00000000,00004000,00000000,00000000
426: 00000000,00000000,00000000,00000000,00000000,00008000,00000000,00000000
427: 00000000,00000000,00000000,00000000,00000000,00010000,00000000,00000000
428: 00000000,00000000,00000000,00000000,00000000,00020000,00000000,00000000
429: 00000000,00000000,00000000,00000000,00000000,00040000,00000000,00000000
430: 00000000,00000000,00000000,00000000,00000000,00080000,00000000,00000000
431: 00000000,00000000,00000000,00000000,00000000,00100000,00000000,00000000
432: 00000000,00000000,00000000,00000000,00000000,00200000,00000000,00000000
433: 00000000,00000000,00000000,00000000,00000000,00400000,00000000,00000000
434: 00000000,00000000,00000000,00000000,00000000,00800000,00000000,00000000
435: 00000000,00000000,00000000,00000000,00000000,01000000,00000000,00000000
436: 00000000,00000000,00000000,00000000,00000000,02000000,00000000,00000000
437: 00000000,00000000,00000000,00000000,00000000,04000000,00000000,00000000
438: 00000000,00000000,00000000,00000000,00000000,08000000,00000000,00000000
439: 00000000,00000000,00000000,00000000,00000000,10000000,00000000,00000000
440: 00000000,00000000,00000000,00000000,00000000,20000000,00000000,00000000
441: 00000000,00000000,00000000,00000000,00000000,40000000,00000000,00000000
442: 00000000,00000000,00000000,00000000,00000000,80000000,00000000,00000000
443: 00000000,00000000,00000000,00000000,00000001,00000000,00000000,00000000
444: 00000000,00000000,00000000,00000000,00000002,00000000,00000000,00000000
445: 00000000,00000000,00000000,00000000,00000004,00000000,00000000,00000000
446: 00000000,00000000,00000000,00000000,00000008,00000000,00000000,00000000
447: 00000000,00000000,00000000,00000000,00000010,00000000,00000000,00000000
448: 00000000,00000000,00000000,00000000,00000020,00000000,00000000,00000000
449: 00000000,00000000,00000000,00000000,00000040,00000000,00000000,00000000
450: 00000000,00000000,00000000,00000000,00000080,00000000,00000000,00000000
451: 00000000,00000000,00000000,00000000,00000100,00000000,00000000,00000000
452: 00000000,00000000,00000000,00000000,00000200,00000000,00000000,00000000
453: 00000000,00000000,00000000,00000000,00000400,00000000,00000000,00000000
454: 00000000,00000000,00000000,00000000,00000800,00000000,00000000,00000000
455: 00000000,00000000,00000000,00000000,00001000,00000000,00000000,00000000
456: 00000000,00000000,00000000,00000000,00002000,00000000,00000000,00000000
457: 00000000,00000000,00000000,00000000,00004000,00000000,00000000,00000000

After:
331: 00000000,00000000,00000000,00000000,00010000,00000000,00000000,00000000
332: 00000000,00000000,00000000,00000000,00020000,00000000,00000000,00000000
333: 00000000,00000000,00000000,00000000,00040000,00000000,00000000,00000000
334: 00000000,00000000,00000000,00000000,00080000,00000000,00000000,00000000
335: 00000000,00000000,00000000,00000000,00100000,00000000,00000000,00000000
336: 00000000,00000000,00000000,00000000,00200000,00000000,00000000,00000000
337: 00000000,00000000,00000000,00000000,00400000,00000000,00000000,00000000
338: 00000000,00000000,00000000,00000000,00800000,00000000,00000000,00000000
339: 00010000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
340: 00020000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
341: 00040000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
342: 00080000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
343: 00100000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
344: 00200000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
345: 00400000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
346: 00800000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
347: 00000000,00000000,00000000,00000000,00000001,00000000,00000000,00000000
348: 00000000,00000000,00000000,00000000,00000002,00000000,00000000,00000000
349: 00000000,00000000,00000000,00000000,00000004,00000000,00000000,00000000
350: 00000000,00000000,00000000,00000000,00000008,00000000,00000000,00000000
351: 00000000,00000000,00000000,00000000,00000010,00000000,00000000,00000000
352: 00000000,00000000,00000000,00000000,00000020,00000000,00000000,00000000
353: 00000000,00000000,00000000,00000000,00000040,00000000,00000000,00000000
354: 00000000,00000000,00000000,00000000,00000080,00000000,00000000,00000000
355: 00000000,00000000,00000000,00000000,00000100,00000000,00000000,00000000
356: 00000000,00000000,00000000,00000000,00000200,00000000,00000000,00000000
357: 00000000,00000000,00000000,00000000,00000400,00000000,00000000,00000000
358: 00000000,00000000,00000000,00000000,00000800,00000000,00000000,00000000
359: 00000000,00000000,00000000,00000000,00001000,00000000,00000000,00000000
360: 00000000,00000000,00000000,00000000,00002000,00000000,00000000,00000000
361: 00000000,00000000,00000000,00000000,00004000,00000000,00000000,00000000
362: 00000000,00000000,00000000,00000000,00008000,00000000,00000000,00000000
363: 00000000,00000000,00000000,00000000,01000000,00000000,00000000,00000000
364: 00000000,00000000,00000000,00000000,02000000,00000000,00000000,00000000
365: 00000000,00000000,00000000,00000000,04000000,00000000,00000000,00000000
366: 00000000,00000000,00000000,00000000,08000000,00000000,00000000,00000000
367: 00000000,00000000,00000000,00000000,10000000,00000000,00000000,00000000
368: 00000000,00000000,00000000,00000000,20000000,00000000,00000000,00000000
369: 00000000,00000000,00000000,00000000,40000000,00000000,00000000,00000000
370: 00000000,00000000,00000000,00000000,80000000,00000000,00000000,00000000
371: 00000001,00000000,00000000,00000000,00000000,00000000,00000000,00000000
372: 00000002,00000000,00000000,00000000,00000000,00000000,00000000,00000000
373: 00000004,00000000,00000000,00000000,00000000,00000000,00000000,00000000
374: 00000008,00000000,00000000,00000000,00000000,00000000,00000000,00000000
375: 00000010,00000000,00000000,00000000,00000000,00000000,00000000,00000000
376: 00000020,00000000,00000000,00000000,00000000,00000000,00000000,00000000
377: 00000040,00000000,00000000,00000000,00000000,00000000,00000000,00000000
378: 00000080,00000000,00000000,00000000,00000000,00000000,00000000,00000000
379: 00000100,00000000,00000000,00000000,00000000,00000000,00000000,00000000
380: 00000200,00000000,00000000,00000000,00000000,00000000,00000000,00000000
381: 00000400,00000000,00000000,00000000,00000000,00000000,00000000,00000000
382: 00000800,00000000,00000000,00000000,00000000,00000000,00000000,00000000
383: 00001000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
384: 00002000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
385: 00004000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
386: 00008000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
387: 01000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
388: 02000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
389: 04000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
390: 08000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
391: 10000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
392: 20000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
393: 40000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
394: 80000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000
395: 00000000,00000000,00000000,00000000,00000000,00000001,00000000,00000000
396: 00000000,00000000,00000000,00000000,00000000,00000002,00000000,00000000
397: 00000000,00000000,00000000,00000000,00000000,00000004,00000000,00000000
398: 00000000,00000000,00000000,00000000,00000000,00000008,00000000,00000000
399: 00000000,00000000,00000000,00000000,00000000,00000010,00000000,00000000
400: 00000000,00000000,00000000,00000000,00000000,00000020,00000000,00000000
401: 00000000,00000000,00000000,00000000,00000000,00000040,00000000,00000000
402: 00000000,00000000,00000000,00000000,00000000,00000080,00000000,00000000
403: 00000000,00000000,00000000,00000000,00000000,00000100,00000000,00000000
404: 00000000,00000000,00000000,00000000,00000000,00000200,00000000,00000000
405: 00000000,00000000,00000000,00000000,00000000,00000400,00000000,00000000
406: 00000000,00000000,00000000,00000000,00000000,00000800,00000000,00000000
407: 00000000,00000000,00000000,00000000,00000000,00001000,00000000,00000000
408: 00000000,00000000,00000000,00000000,00000000,00002000,00000000,00000000
409: 00000000,00000000,00000000,00000000,00000000,00004000,00000000,00000000
410: 00000000,00000000,00000000,00000000,00000000,00008000,00000000,00000000
411: 00000000,00000000,00000000,00000000,00000000,00010000,00000000,00000000
412: 00000000,00000000,00000000,00000000,00000000,00020000,00000000,00000000
413: 00000000,00000000,00000000,00000000,00000000,00040000,00000000,00000000
414: 00000000,00000000,00000000,00000000,00000000,00080000,00000000,00000000
415: 00000000,00000000,00000000,00000000,00000000,00100000,00000000,00000000
416: 00000000,00000000,00000000,00000000,00000000,00200000,00000000,00000000
417: 00000000,00000000,00000000,00000000,00000000,00400000,00000000,00000000
418: 00000000,00000000,00000000,00000000,00000000,00800000,00000000,00000000
419: 00000000,00000000,00000000,00000000,00000000,01000000,00000000,00000000
420: 00000000,00000000,00000000,00000000,00000000,02000000,00000000,00000000
421: 00000000,00000000,00000000,00000000,00000000,04000000,00000000,00000000
422: 00000000,00000000,00000000,00000000,00000000,08000000,00000000,00000000
423: 00000000,00000000,00000000,00000000,00000000,10000000,00000000,00000000
424: 00000000,00000000,00000000,00000000,00000000,20000000,00000000,00000000
425: 00000000,00000000,00000000,00000000,00000000,40000000,00000000,00000000
426: 00000000,00000000,00000000,00000000,00000000,80000000,00000000,00000000
427: 00000000,00000001,00000000,00000000,00000000,00000000,00000000,00000000
428: 00000000,00000002,00000000,00000000,00000000,00000000,00000000,00000000
429: 00000000,00000004,00000000,00000000,00000000,00000000,00000000,00000000
430: 00000000,00000008,00000000,00000000,00000000,00000000,00000000,00000000
431: 00000000,00000010,00000000,00000000,00000000,00000000,00000000,00000000
432: 00000000,00000020,00000000,00000000,00000000,00000000,00000000,00000000
433: 00000000,00000040,00000000,00000000,00000000,00000000,00000000,00000000
434: 00000000,00000080,00000000,00000000,00000000,00000000,00000000,00000000
435: 00000000,00000100,00000000,00000000,00000000,00000000,00000000,00000000
436: 00000000,00000200,00000000,00000000,00000000,00000000,00000000,00000000
437: 00000000,00000400,00000000,00000000,00000000,00000000,00000000,00000000
438: 00000000,00000800,00000000,00000000,00000000,00000000,00000000,00000000
439: 00000000,00001000,00000000,00000000,00000000,00000000,00000000,00000000
440: 00000000,00002000,00000000,00000000,00000000,00000000,00000000,00000000
441: 00000000,00004000,00000000,00000000,00000000,00000000,00000000,00000000
442: 00000000,00008000,00000000,00000000,00000000,00000000,00000000,00000000
443: 00000000,00010000,00000000,00000000,00000000,00000000,00000000,00000000
444: 00000000,00020000,00000000,00000000,00000000,00000000,00000000,00000000
445: 00000000,00040000,00000000,00000000,00000000,00000000,00000000,00000000
446: 00000000,00080000,00000000,00000000,00000000,00000000,00000000,00000000
447: 00000000,00100000,00000000,00000000,00000000,00000000,00000000,00000000
448: 00000000,00200000,00000000,00000000,00000000,00000000,00000000,00000000
449: 00000000,00400000,00000000,00000000,00000000,00000000,00000000,00000000
450: 00000000,00800000,00000000,00000000,00000000,00000000,00000000,00000000
451: 00000000,01000000,00000000,00000000,00000000,00000000,00000000,00000000
452: 00000000,02000000,00000000,00000000,00000000,00000000,00000000,00000000
453: 00000000,04000000,00000000,00000000,00000000,00000000,00000000,00000000
454: 00000000,08000000,00000000,00000000,00000000,00000000,00000000,00000000
455: 00000000,10000000,00000000,00000000,00000000,00000000,00000000,00000000
456: 00000000,20000000,00000000,00000000,00000000,00000000,00000000,00000000
457: 00000000,40000000,00000000,00000000,00000000,00000000,00000000,00000000

Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
[Tweaked API use]
Suggested-by: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Valentin Schneider <vschneid@redhat.com>
Reviewed-by: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 9b44557e7271b..66ec7932f0084 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -817,9 +817,12 @@ static void comp_irqs_release(struct mlx5_core_dev *dev)
 static int comp_irqs_request(struct mlx5_core_dev *dev)
 {
 	struct mlx5_eq_table *table = dev->priv.eq_table;
+	const struct cpumask *prev = cpu_none_mask;
+	const struct cpumask *mask;
 	int ncomp_eqs = table->num_comp_eqs;
 	u16 *cpus;
 	int ret;
+	int cpu;
 	int i;
 
 	ncomp_eqs = table->num_comp_eqs;
@@ -838,8 +841,19 @@ static int comp_irqs_request(struct mlx5_core_dev *dev)
 		ret = -ENOMEM;
 		goto free_irqs;
 	}
-	for (i = 0; i < ncomp_eqs; i++)
-		cpus[i] = cpumask_local_spread(i, dev->priv.numa_node);
+
+	i = 0;
+	rcu_read_lock();
+	for_each_numa_hop_mask(mask, dev->priv.numa_node) {
+		for_each_cpu_andnot(cpu, mask, prev) {
+			cpus[i] = cpu;
+			if (++i == ncomp_eqs)
+				goto spread_done;
+		}
+		prev = mask;
+	}
+spread_done:
+	rcu_read_unlock();
 	ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs);
 	kfree(cpus);
 	if (ret < 0)

From 2ac4980c57f54db7c5b416f7946d2921fc16d9d2 Mon Sep 17 00:00:00 2001
From: Yury Norov <yury.norov@gmail.com>
Date: Fri, 20 Jan 2023 20:24:36 -0800
Subject: [PATCH 9/9] lib/cpumask: update comment for cpumask_local_spread()

Now that we have an iterator-based alternative for a very common case
of using cpumask_local_spread for all cpus in a row, it's worth to
mention that in comment to cpumask_local_spread().

Signed-off-by: Yury Norov <yury.norov@gmail.com>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 lib/cpumask.c | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/lib/cpumask.c b/lib/cpumask.c
index 10aa15715c0d2..e7258836b60b7 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -114,11 +114,29 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask)
  * @i: index number
  * @node: local numa_node
  *
- * This function selects an online CPU according to a numa aware policy;
- * local cpus are returned first, followed by non-local ones, then it
- * wraps around.
+ * Returns online CPU according to a numa aware policy; local cpus are returned
+ * first, followed by non-local ones, then it wraps around.
  *
- * It's not very efficient, but useful for setup.
+ * For those who wants to enumerate all CPUs based on their NUMA distances,
+ * i.e. call this function in a loop, like:
+ *
+ * for (i = 0; i < num_online_cpus(); i++) {
+ *	cpu = cpumask_local_spread(i, node);
+ *	do_something(cpu);
+ * }
+ *
+ * There's a better alternative based on for_each()-like iterators:
+ *
+ *	for_each_numa_hop_mask(mask, node) {
+ *		for_each_cpu_andnot(cpu, mask, prev)
+ *			do_something(cpu);
+ *		prev = mask;
+ *	}
+ *
+ * It's simpler and more verbose than above. Complexity of iterator-based
+ * enumeration is O(sched_domains_numa_levels * nr_cpu_ids), while
+ * cpumask_local_spread() when called for each cpu is
+ * O(sched_domains_numa_levels * nr_cpu_ids * log(nr_cpu_ids)).
  */
 unsigned int cpumask_local_spread(unsigned int i, int node)
 {