From 8222d5910dae08213b6d9d4bc9a7f8502855e624 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 9 Feb 2023 09:09:52 +0800
Subject: [PATCH 001/411] xfrm: Zero padding when dumping algos and encap

When copying data to user-space we should ensure that only valid
data is copied over.  Padding in structures may be filled with
random (possibly sensitve) data and should never be given directly
to user-space.

This patch fixes the copying of xfrm algorithms and the encap
template in xfrm_user so that padding is zeroed.

Reported-by: syzbot+fa5414772d5c445dac3c@syzkaller.appspotmail.com
Reported-by: Hyunwoo Kim <v4bel@theori.io>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Reviewed-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_user.c | 45 ++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 41 insertions(+), 4 deletions(-)

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index cf5172d4ce68c..103af2b3e986f 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1012,7 +1012,9 @@ static int copy_to_user_aead(struct xfrm_algo_aead *aead, struct sk_buff *skb)
 		return -EMSGSIZE;
 
 	ap = nla_data(nla);
-	memcpy(ap, aead, sizeof(*aead));
+	strscpy_pad(ap->alg_name, aead->alg_name, sizeof(ap->alg_name));
+	ap->alg_key_len = aead->alg_key_len;
+	ap->alg_icv_len = aead->alg_icv_len;
 
 	if (redact_secret && aead->alg_key_len)
 		memset(ap->alg_key, 0, (aead->alg_key_len + 7) / 8);
@@ -1032,7 +1034,8 @@ static int copy_to_user_ealg(struct xfrm_algo *ealg, struct sk_buff *skb)
 		return -EMSGSIZE;
 
 	ap = nla_data(nla);
-	memcpy(ap, ealg, sizeof(*ealg));
+	strscpy_pad(ap->alg_name, ealg->alg_name, sizeof(ap->alg_name));
+	ap->alg_key_len = ealg->alg_key_len;
 
 	if (redact_secret && ealg->alg_key_len)
 		memset(ap->alg_key, 0, (ealg->alg_key_len + 7) / 8);
@@ -1043,6 +1046,40 @@ static int copy_to_user_ealg(struct xfrm_algo *ealg, struct sk_buff *skb)
 	return 0;
 }
 
+static int copy_to_user_calg(struct xfrm_algo *calg, struct sk_buff *skb)
+{
+	struct nlattr *nla = nla_reserve(skb, XFRMA_ALG_COMP, sizeof(*calg));
+	struct xfrm_algo *ap;
+
+	if (!nla)
+		return -EMSGSIZE;
+
+	ap = nla_data(nla);
+	strscpy_pad(ap->alg_name, calg->alg_name, sizeof(ap->alg_name));
+	ap->alg_key_len = 0;
+
+	return 0;
+}
+
+static int copy_to_user_encap(struct xfrm_encap_tmpl *ep, struct sk_buff *skb)
+{
+	struct nlattr *nla = nla_reserve(skb, XFRMA_ENCAP, sizeof(*ep));
+	struct xfrm_encap_tmpl *uep;
+
+	if (!nla)
+		return -EMSGSIZE;
+
+	uep = nla_data(nla);
+	memset(uep, 0, sizeof(*uep));
+
+	uep->encap_type = ep->encap_type;
+	uep->encap_sport = ep->encap_sport;
+	uep->encap_dport = ep->encap_dport;
+	uep->encap_oa = ep->encap_oa;
+
+	return 0;
+}
+
 static int xfrm_smark_put(struct sk_buff *skb, struct xfrm_mark *m)
 {
 	int ret = 0;
@@ -1098,12 +1135,12 @@ static int copy_to_user_state_extra(struct xfrm_state *x,
 			goto out;
 	}
 	if (x->calg) {
-		ret = nla_put(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg);
+		ret = copy_to_user_calg(x->calg, skb);
 		if (ret)
 			goto out;
 	}
 	if (x->encap) {
-		ret = nla_put(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap);
+		ret = copy_to_user_encap(x->encap, skb);
 		if (ret)
 			goto out;
 	}

From 5f6e839ebc951c50f1ca06791d016c256f0285a9 Mon Sep 17 00:00:00 2001
From: Krister Johansen <kjlx@templeofstupid.com>
Date: Wed, 22 Feb 2023 09:54:42 -0800
Subject: [PATCH 002/411] xen: update arch/x86/include/asm/xen/cpuid.h

Update arch/x86/include/asm/xen/cpuid.h from the Xen tree to get newest
definitions.  This picks up some TSC mode definitions and comment
formatting changes.

Signed-off-by: Krister Johansen <kjlx@templeofstupid.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/94b9046dd0db3794f0633d134b7108508957758d.1677038165.git.kjlx@templeofstupid.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/include/asm/xen/cpuid.h | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/xen/cpuid.h b/arch/x86/include/asm/xen/cpuid.h
index 6daa9b0c8d114..a3c29b1496c83 100644
--- a/arch/x86/include/asm/xen/cpuid.h
+++ b/arch/x86/include/asm/xen/cpuid.h
@@ -89,11 +89,21 @@
  * Sub-leaf 2: EAX: host tsc frequency in kHz
  */
 
+#define XEN_CPUID_TSC_EMULATED               (1u << 0)
+#define XEN_CPUID_HOST_TSC_RELIABLE          (1u << 1)
+#define XEN_CPUID_RDTSCP_INSTR_AVAIL         (1u << 2)
+
+#define XEN_CPUID_TSC_MODE_DEFAULT           (0)
+#define XEN_CPUID_TSC_MODE_ALWAYS_EMULATE    (1u)
+#define XEN_CPUID_TSC_MODE_NEVER_EMULATE     (2u)
+#define XEN_CPUID_TSC_MODE_PVRDTSCP          (3u)
+
 /*
  * Leaf 5 (0x40000x04)
  * HVM-specific features
  * Sub-leaf 0: EAX: Features
  * Sub-leaf 0: EBX: vcpu id (iff EAX has XEN_HVM_CPUID_VCPU_ID_PRESENT flag)
+ * Sub-leaf 0: ECX: domain id (iff EAX has XEN_HVM_CPUID_DOMID_PRESENT flag)
  */
 #define XEN_HVM_CPUID_APIC_ACCESS_VIRT (1u << 0) /* Virtualized APIC registers */
 #define XEN_HVM_CPUID_X2APIC_VIRT      (1u << 1) /* Virtualized x2APIC accesses */
@@ -102,12 +112,16 @@
 #define XEN_HVM_CPUID_VCPU_ID_PRESENT  (1u << 3) /* vcpu id is present in EBX */
 #define XEN_HVM_CPUID_DOMID_PRESENT    (1u << 4) /* domid is present in ECX */
 /*
- * Bits 55:49 from the IO-APIC RTE and bits 11:5 from the MSI address can be
- * used to store high bits for the Destination ID. This expands the Destination
- * ID field from 8 to 15 bits, allowing to target APIC IDs up 32768.
+ * With interrupt format set to 0 (non-remappable) bits 55:49 from the
+ * IO-APIC RTE and bits 11:5 from the MSI address can be used to store
+ * high bits for the Destination ID. This expands the Destination ID
+ * field from 8 to 15 bits, allowing to target APIC IDs up 32768.
  */
 #define XEN_HVM_CPUID_EXT_DEST_ID      (1u << 5)
-/* Per-vCPU event channel upcalls */
+/*
+ * Per-vCPU event channel upcalls work correctly with physical IRQs
+ * bound to event channels.
+ */
 #define XEN_HVM_CPUID_UPCALL_VECTOR    (1u << 6)
 
 /*

From 99a7bcafbd0d04555074554573019096a8c10450 Mon Sep 17 00:00:00 2001
From: Krister Johansen <kjlx@templeofstupid.com>
Date: Wed, 22 Feb 2023 09:54:56 -0800
Subject: [PATCH 003/411] x86/xen/time: cleanup xen_tsc_safe_clocksource

Modifies xen_tsc_safe_clocksource() to use newly defined constants from
arch/x86/include/asm/xen/cpuid.h.  This replaces a numeric value with
XEN_CPUID_TSC_MODE_NEVER_EMULATE, and deletes a comment that is now self
explanatory.

There should be no change in the function's behavior.

Signed-off-by: Krister Johansen <kjlx@templeofstupid.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/a69ca370fecf85d312d2db633d9438ace2af6e5b.1677038165.git.kjlx@templeofstupid.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/time.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c
index 95140609c8a80..94056013a2a47 100644
--- a/arch/x86/xen/time.c
+++ b/arch/x86/xen/time.c
@@ -20,6 +20,7 @@
 #include <asm/pvclock.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
+#include <asm/xen/cpuid.h>
 
 #include <xen/events.h>
 #include <xen/features.h>
@@ -495,11 +496,7 @@ static int __init xen_tsc_safe_clocksource(void)
 	/* Leaf 4, sub-leaf 0 (0x40000x03) */
 	cpuid_count(xen_cpuid_base() + 3, 0, &eax, &ebx, &ecx, &edx);
 
-	/* tsc_mode = no_emulate (2) */
-	if (ebx != 2)
-		return 0;
-
-	return 1;
+	return ebx == XEN_CPUID_TSC_MODE_NEVER_EMULATE;
 }
 
 static void __init xen_time_init(void)

From c276a706ea1f51cf9723ed8484feceaf961b8f89 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 21 Feb 2023 13:54:00 +0800
Subject: [PATCH 004/411] xfrm: Allow transport-mode states with AF_UNSPEC
 selector

xfrm state selectors are matched against the inner-most flow
which can be of any address family.  Therefore middle states
in nested configurations need to carry a wildcard selector in
order to work at all.

However, this is currently forbidden for transport-mode states.

Fix this by removing the unnecessary check.

Fixes: 13996378e658 ("[IPSEC]: Rename mode to outer_mode and add inner_mode")
Reported-by: David George <David.George@sophos.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_state.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 00afe831c71c4..f238048bf786e 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -2815,11 +2815,6 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload,
 			goto error;
 		}
 
-		if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) {
-			NL_SET_ERR_MSG(extack, "Only tunnel modes can accommodate an AF_UNSPEC selector");
-			goto error;
-		}
-
 		x->inner_mode = *inner_mode;
 
 		if (x->props.family == AF_INET)

From a54bace095d00e9222161495649688bc43de4dde Mon Sep 17 00:00:00 2001
From: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Date: Fri, 24 Feb 2023 17:34:50 +0200
Subject: [PATCH 005/411] drm/virtio: Pass correct device to
 dma_sync_sgtable_for_device()

The "vdev->dev.parent" should be used instead of "vdev->dev" as a device
for which to perform the DMA operation in both
virtio_gpu_cmd_transfer_to_host_2d(3d).

Because the virtio-gpu device "vdev->dev" doesn't really have DMA OPS
assigned to it, but parent (virtio-pci or virtio-mmio) device
"vdev->dev.parent" has. The more, the sgtable in question the code is
trying to sync here was mapped for the parent device (by using its DMA OPS)
previously at:
virtio_gpu_object_shmem_init()->drm_gem_shmem_get_pages_sgt()->
dma_map_sgtable(), so should be synced here for the same parent device.

Fixes: b5c9ed70d1a9 ("drm/virtio: Improve DMA API usage for shmem BOs")
Signed-off-by: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com>
Reviewed-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230224153450.526222-1-olekstysh@gmail.com
---
 drivers/gpu/drm/virtio/virtgpu_vq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c
index 9ff8660b50ade..208e9434cb28d 100644
--- a/drivers/gpu/drm/virtio/virtgpu_vq.c
+++ b/drivers/gpu/drm/virtio/virtgpu_vq.c
@@ -597,7 +597,7 @@ void virtio_gpu_cmd_transfer_to_host_2d(struct virtio_gpu_device *vgdev,
 	bool use_dma_api = !virtio_has_dma_quirk(vgdev->vdev);
 
 	if (virtio_gpu_is_shmem(bo) && use_dma_api)
-		dma_sync_sgtable_for_device(&vgdev->vdev->dev,
+		dma_sync_sgtable_for_device(vgdev->vdev->dev.parent,
 					    bo->base.sgt, DMA_TO_DEVICE);
 
 	cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p));
@@ -1019,7 +1019,7 @@ void virtio_gpu_cmd_transfer_to_host_3d(struct virtio_gpu_device *vgdev,
 	bool use_dma_api = !virtio_has_dma_quirk(vgdev->vdev);
 
 	if (virtio_gpu_is_shmem(bo) && use_dma_api)
-		dma_sync_sgtable_for_device(&vgdev->vdev->dev,
+		dma_sync_sgtable_for_device(vgdev->vdev->dev.parent,
 					    bo->base.sgt, DMA_TO_DEVICE);
 
 	cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p));

From ee9adb7a45516cfa536ca92253d7ae59d56db9e4 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Date: Mon, 9 Jan 2023 00:13:11 +0300
Subject: [PATCH 006/411] drm/shmem-helper: Remove another errant put in error
 path

drm_gem_shmem_mmap() doesn't own reference in error code path, resulting
in the dma-buf shmem GEM object getting prematurely freed leading to a
later use-after-free.

Fixes: f49a51bfdc8e ("drm/shme-helpers: Fix dma_buf_mmap forwarding bug")
Cc: stable@vger.kernel.org
Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Reviewed-by: Rob Clark <robdclark@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230108211311.3950107-1-dmitry.osipenko@collabora.com
---
 drivers/gpu/drm/drm_gem_shmem_helper.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index b602cd72a1205..1a681de9a6eab 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -622,11 +622,14 @@ int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct
 	int ret;
 
 	if (obj->import_attach) {
-		/* Drop the reference drm_gem_mmap_obj() acquired.*/
-		drm_gem_object_put(obj);
 		vma->vm_private_data = NULL;
+		ret = dma_buf_mmap(obj->dma_buf, vma, 0);
+
+		/* Drop the reference drm_gem_mmap_obj() acquired.*/
+		if (!ret)
+			drm_gem_object_put(obj);
 
-		return dma_buf_mmap(obj->dma_buf, vma, 0);
+		return ret;
 	}
 
 	ret = drm_gem_shmem_get_pages(shmem);

From 9630b585b607bd26f505d34620b14d75b9a5af7d Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Date: Fri, 4 Nov 2022 19:04:59 +0300
Subject: [PATCH 007/411] drm/msm/gem: Prevent blocking within shrinker loop

Consider this scenario:

1. APP1 continuously creates lots of small GEMs
2. APP2 triggers `drop_caches`
3. Shrinker starts to evict APP1 GEMs, while APP1 produces new purgeable
   GEMs
4. msm_gem_shrinker_scan() returns non-zero number of freed pages
   and causes shrinker to try shrink more
5. msm_gem_shrinker_scan() returns non-zero number of freed pages again,
   goto 4
6. The APP2 is blocked in `drop_caches` until APP1 stops producing
   purgeable GEMs

To prevent this blocking scenario, check number of remaining pages
that GPU shrinker couldn't release due to a GEM locking contention
or shrinking rejection. If there are no remaining pages left to shrink,
then there is no need to free up more pages and shrinker may break out
from the loop.

This problem was found during shrinker/madvise IOCTL testing of
virtio-gpu driver. The MSM driver is affected in the same way.

Reviewed-by: Rob Clark <robdclark@gmail.com>
Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Fixes: b352ba54a820 ("drm/msm/gem: Convert to using drm_gem_lru")
Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Link: https://lore.kernel.org/all/20230108210445.3948344-2-dmitry.osipenko@collabora.com/
---
 drivers/gpu/drm/drm_gem.c              |  9 +++++++--
 drivers/gpu/drm/msm/msm_gem_shrinker.c | 11 +++++++++--
 include/drm/drm_gem.h                  |  4 +++-
 3 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index b8db675e7fb5e..b0246a8480068 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -1375,10 +1375,13 @@ EXPORT_SYMBOL(drm_gem_lru_move_tail);
  *
  * @lru: The LRU to scan
  * @nr_to_scan: The number of pages to try to reclaim
+ * @remaining: The number of pages left to reclaim, should be initialized by caller
  * @shrink: Callback to try to shrink/reclaim the object.
  */
 unsigned long
-drm_gem_lru_scan(struct drm_gem_lru *lru, unsigned nr_to_scan,
+drm_gem_lru_scan(struct drm_gem_lru *lru,
+		 unsigned int nr_to_scan,
+		 unsigned long *remaining,
 		 bool (*shrink)(struct drm_gem_object *obj))
 {
 	struct drm_gem_lru still_in_lru;
@@ -1417,8 +1420,10 @@ drm_gem_lru_scan(struct drm_gem_lru *lru, unsigned nr_to_scan,
 		 * hit shrinker in response to trying to get backing pages
 		 * for this obj (ie. while it's lock is already held)
 		 */
-		if (!dma_resv_trylock(obj->resv))
+		if (!dma_resv_trylock(obj->resv)) {
+			*remaining += obj->size >> PAGE_SHIFT;
 			goto tail;
+		}
 
 		if (shrink(obj)) {
 			freed += obj->size >> PAGE_SHIFT;
diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c
index 051bdbc093cf9..f38296ad87434 100644
--- a/drivers/gpu/drm/msm/msm_gem_shrinker.c
+++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c
@@ -107,6 +107,7 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 		bool (*shrink)(struct drm_gem_object *obj);
 		bool cond;
 		unsigned long freed;
+		unsigned long remaining;
 	} stages[] = {
 		/* Stages of progressively more aggressive/expensive reclaim: */
 		{ &priv->lru.dontneed, purge,        true },
@@ -116,14 +117,18 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 	};
 	long nr = sc->nr_to_scan;
 	unsigned long freed = 0;
+	unsigned long remaining = 0;
 
 	for (unsigned i = 0; (nr > 0) && (i < ARRAY_SIZE(stages)); i++) {
 		if (!stages[i].cond)
 			continue;
 		stages[i].freed =
-			drm_gem_lru_scan(stages[i].lru, nr, stages[i].shrink);
+			drm_gem_lru_scan(stages[i].lru, nr,
+					&stages[i].remaining,
+					 stages[i].shrink);
 		nr -= stages[i].freed;
 		freed += stages[i].freed;
+		remaining += stages[i].remaining;
 	}
 
 	if (freed) {
@@ -132,7 +137,7 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 				     stages[3].freed);
 	}
 
-	return (freed > 0) ? freed : SHRINK_STOP;
+	return (freed > 0 && remaining > 0) ? freed : SHRINK_STOP;
 }
 
 #ifdef CONFIG_DEBUG_FS
@@ -182,10 +187,12 @@ msm_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr)
 		NULL,
 	};
 	unsigned idx, unmapped = 0;
+	unsigned long remaining = 0;
 
 	for (idx = 0; lrus[idx] && unmapped < vmap_shrink_limit; idx++) {
 		unmapped += drm_gem_lru_scan(lrus[idx],
 					     vmap_shrink_limit - unmapped,
+					     &remaining,
 					     vmap_shrink);
 	}
 
diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
index a17c2f903f81e..b46ade8124436 100644
--- a/include/drm/drm_gem.h
+++ b/include/drm/drm_gem.h
@@ -475,7 +475,9 @@ int drm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev,
 void drm_gem_lru_init(struct drm_gem_lru *lru, struct mutex *lock);
 void drm_gem_lru_remove(struct drm_gem_object *obj);
 void drm_gem_lru_move_tail(struct drm_gem_lru *lru, struct drm_gem_object *obj);
-unsigned long drm_gem_lru_scan(struct drm_gem_lru *lru, unsigned nr_to_scan,
+unsigned long drm_gem_lru_scan(struct drm_gem_lru *lru,
+			       unsigned int nr_to_scan,
+			       unsigned long *remaining,
 			       bool (*shrink)(struct drm_gem_object *obj));
 
 #endif /* __DRM_GEM_H__ */

From ba3be66f11c3c49afaa9f49b99e21d88756229ef Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Date: Thu, 17 Nov 2022 04:40:38 +0300
Subject: [PATCH 008/411] drm/panfrost: Don't sync rpm suspension after mmu
 flushing

Lockdep warns about potential circular locking dependency of devfreq
with the fs_reclaim caused by immediate device suspension when mapping is
released by shrinker. Fix it by doing the suspension asynchronously.

Reviewed-by: Steven Price <steven.price@arm.com>
Fixes: ec7eba47da86 ("drm/panfrost: Rework page table flushing and runtime PM interaction")
Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Link: https://lore.kernel.org/all/20230108210445.3948344-3-dmitry.osipenko@collabora.com/
---
 drivers/gpu/drm/panfrost/panfrost_mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index 4e83a1891f3ed..666a5e53fe193 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -282,7 +282,7 @@ static void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
 	if (pm_runtime_active(pfdev->dev))
 		mmu_hw_do_operation(pfdev, mmu, iova, size, AS_COMMAND_FLUSH_PT);
 
-	pm_runtime_put_sync_autosuspend(pfdev->dev);
+	pm_runtime_put_autosuspend(pfdev->dev);
 }
 
 static int mmu_map_sg(struct panfrost_device *pfdev, struct panfrost_mmu *mmu,

From 77bc762451c2dc72bdbea07b857c916c9e7f4952 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Mon, 27 Feb 2023 13:33:30 +0300
Subject: [PATCH 009/411] fbdev: chipsfb: Fix error codes in chipsfb_pci_init()

The error codes are not set on these error paths.

Fixes: 145eed48de27 ("fbdev: Remove conflicting devices on PCI bus")
Signed-off-by: Dan Carpenter <error27@gmail.com>
Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/Y/yG+sm2mhdJeTZW@kili
---
 drivers/video/fbdev/chipsfb.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/video/fbdev/chipsfb.c b/drivers/video/fbdev/chipsfb.c
index cc37ec3f8fc1f..7799d52a651f3 100644
--- a/drivers/video/fbdev/chipsfb.c
+++ b/drivers/video/fbdev/chipsfb.c
@@ -358,16 +358,21 @@ static int chipsfb_pci_init(struct pci_dev *dp, const struct pci_device_id *ent)
 	if (rc)
 		return rc;
 
-	if (pci_enable_device(dp) < 0) {
+	rc = pci_enable_device(dp);
+	if (rc < 0) {
 		dev_err(&dp->dev, "Cannot enable PCI device\n");
 		goto err_out;
 	}
 
-	if ((dp->resource[0].flags & IORESOURCE_MEM) == 0)
+	if ((dp->resource[0].flags & IORESOURCE_MEM) == 0) {
+		rc = -ENODEV;
 		goto err_disable;
+	}
 	addr = pci_resource_start(dp, 0);
-	if (addr == 0)
+	if (addr == 0) {
+		rc = -ENODEV;
 		goto err_disable;
+	}
 
 	p = framebuffer_alloc(0, &dp->dev);
 	if (p == NULL) {
@@ -417,7 +422,8 @@ static int chipsfb_pci_init(struct pci_dev *dp, const struct pci_device_id *ent)
 
 	init_chips(p, addr);
 
-	if (register_framebuffer(p) < 0) {
+	rc = register_framebuffer(p);
+	if (rc < 0) {
 		dev_err(&dp->dev,"C&T 65550 framebuffer failed to register\n");
 		goto err_unmap;
 	}

From 4d8457fe0eb9c80ff7795cf8a30962128b71d853 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Thu, 2 Mar 2023 08:47:04 +0100
Subject: [PATCH 010/411] drm/edid: fix info leak when failing to get panel id

Make sure to clear the transfer buffer before fetching the EDID to
avoid leaking slab data to the logs on errors that leave the buffer
unchanged.

Fixes: 69c7717c20cc ("drm/edid: Dump the EDID when drm_edid_get_panel_id() has an error")
Cc: stable@vger.kernel.org	# 6.2
Cc: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Jani Nikula <jani.nikula@intel.com>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20230302074704.11371-1-johan+linaro@kernel.org
---
 drivers/gpu/drm/drm_edid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 3841aba17abdc..8707fe72a0283 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -2797,7 +2797,7 @@ u32 drm_edid_get_panel_id(struct i2c_adapter *adapter)
 	 * the EDID then we'll just return 0.
 	 */
 
-	base_block = kmalloc(EDID_LENGTH, GFP_KERNEL);
+	base_block = kzalloc(EDID_LENGTH, GFP_KERNEL);
 	if (!base_block)
 		return 0;
 

From 5f8d1e3b6f9b5971f9c06d5846ce00c49e3a8d94 Mon Sep 17 00:00:00 2001
From: Tony O'Brien <tony.obrien@alliedtelesis.co.nz>
Date: Wed, 22 Feb 2023 13:52:27 +1300
Subject: [PATCH 011/411] hwmon: (adt7475) Display smoothing attributes in
 correct order

Throughout the ADT7475 driver, attributes relating to the temperature
sensors are displayed in the order Remote 1, Local, Remote 2.  Make
temp_st_show() conform to this expectation so that values set by
temp_st_store() can be displayed using the correct attribute.

Fixes: 8f05bcc33e74 ("hwmon: (adt7475) temperature smoothing")
Signed-off-by: Tony O'Brien <tony.obrien@alliedtelesis.co.nz>
Link: https://lore.kernel.org/r/20230222005228.158661-2-tony.obrien@alliedtelesis.co.nz
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/adt7475.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c
index 51b3d16c32233..77222c35a38ec 100644
--- a/drivers/hwmon/adt7475.c
+++ b/drivers/hwmon/adt7475.c
@@ -556,11 +556,11 @@ static ssize_t temp_st_show(struct device *dev, struct device_attribute *attr,
 		val = data->enh_acoustics[0] & 0xf;
 		break;
 	case 1:
-		val = (data->enh_acoustics[1] >> 4) & 0xf;
+		val = data->enh_acoustics[1] & 0xf;
 		break;
 	case 2:
 	default:
-		val = data->enh_acoustics[1] & 0xf;
+		val = (data->enh_acoustics[1] >> 4) & 0xf;
 		break;
 	}
 

From 48e8186870d9d0902e712d601ccb7098cb220688 Mon Sep 17 00:00:00 2001
From: Tony O'Brien <tony.obrien@alliedtelesis.co.nz>
Date: Wed, 22 Feb 2023 13:52:28 +1300
Subject: [PATCH 012/411] hwmon: (adt7475) Fix masking of hysteresis registers

The wrong bits are masked in the hysteresis register; indices 0 and 2
should zero bits [7:4] and preserve bits [3:0], and index 1 should zero
bits [3:0] and preserve bits [7:4].

Fixes: 1c301fc5394f ("hwmon: Add a driver for the ADT7475 hardware monitoring chip")
Signed-off-by: Tony O'Brien <tony.obrien@alliedtelesis.co.nz>
Link: https://lore.kernel.org/r/20230222005228.158661-3-tony.obrien@alliedtelesis.co.nz
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/adt7475.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c
index 77222c35a38ec..6e4c92b500b8e 100644
--- a/drivers/hwmon/adt7475.c
+++ b/drivers/hwmon/adt7475.c
@@ -488,10 +488,10 @@ static ssize_t temp_store(struct device *dev, struct device_attribute *attr,
 		val = (temp - val) / 1000;
 
 		if (sattr->index != 1) {
-			data->temp[HYSTERSIS][sattr->index] &= 0xF0;
+			data->temp[HYSTERSIS][sattr->index] &= 0x0F;
 			data->temp[HYSTERSIS][sattr->index] |= (val & 0xF) << 4;
 		} else {
-			data->temp[HYSTERSIS][sattr->index] &= 0x0F;
+			data->temp[HYSTERSIS][sattr->index] &= 0xF0;
 			data->temp[HYSTERSIS][sattr->index] |= (val & 0xF);
 		}
 

From 03d0f97fdb45c99cf6f808832db8bd5534e22374 Mon Sep 17 00:00:00 2001
From: Luca Ceresoli <luca.ceresoli@bootlin.com>
Date: Fri, 3 Mar 2023 10:34:10 +0100
Subject: [PATCH 013/411] ASoC: clarify that SND_SOC_IMX_SGTL5000 is the old
 driver

Both SND_SOC_IMX_SGTL5000 and SND_SOC_FSL_ASOC_CARD implement the
fsl,imx-audio-sgtl5000 compatible string, which is confusing. It took a
little research to find out that the latter is much newer and it is
supposed to be the preferred choice since several years.

Add a clarification note to avoid wasting time for future readers.

Signed-off-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
Link: https://lore.kernel.org/r/20230303093410.357621-1-luca.ceresoli@bootlin.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/fsl/Kconfig | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig
index 614eceda6b9e3..33b67db8794e3 100644
--- a/sound/soc/fsl/Kconfig
+++ b/sound/soc/fsl/Kconfig
@@ -294,6 +294,10 @@ config SND_SOC_IMX_SGTL5000
 	  Say Y if you want to add support for SoC audio on an i.MX board with
 	  a sgtl5000 codec.
 
+	  Note that this is an old driver. Consider enabling
+	  SND_SOC_FSL_ASOC_CARD and SND_SOC_SGTL5000 to use the newer
+	  driver.
+
 config SND_SOC_IMX_SPDIF
 	tristate "SoC Audio support for i.MX boards with S/PDIF"
 	select SND_SOC_IMX_PCM_DMA

From 65882134bc622a1e57bd5928ac588855ea2e3ddd Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 2 Mar 2023 13:29:08 +0100
Subject: [PATCH 014/411] ASoC: qcom: q6prm: fix incorrect clk_root passed to
 ADSP

The second to last argument is clk_root (root of the clock), however the
code called q6prm_request_lpass_clock() with clk_attr instead
(copy-paste error).  This effectively was passing value of 1 as root
clock which worked on some of the SoCs (e.g. SM8450) but fails on
others, depending on the ADSP.  For example on SM8550 this "1" as root
clock is not accepted and results in errors coming from ADSP.

Fixes: 2f20640491ed ("ASoC: qdsp6: qdsp6: q6prm: handle clk disable correctly")
Cc: <stable@vger.kernel.org>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Tested-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20230302122908.221398-1-krzysztof.kozlowski@linaro.org
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/qcom/qdsp6/q6prm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/soc/qcom/qdsp6/q6prm.c b/sound/soc/qcom/qdsp6/q6prm.c
index 3aa63aac4a68e..81554d2026589 100644
--- a/sound/soc/qcom/qdsp6/q6prm.c
+++ b/sound/soc/qcom/qdsp6/q6prm.c
@@ -184,9 +184,9 @@ int q6prm_set_lpass_clock(struct device *dev, int clk_id, int clk_attr, int clk_
 			  unsigned int freq)
 {
 	if (freq)
-		return q6prm_request_lpass_clock(dev, clk_id, clk_attr, clk_attr, freq);
+		return q6prm_request_lpass_clock(dev, clk_id, clk_attr, clk_root, freq);
 
-	return q6prm_release_lpass_clock(dev, clk_id, clk_attr, clk_attr, freq);
+	return q6prm_release_lpass_clock(dev, clk_id, clk_attr, clk_root, freq);
 }
 EXPORT_SYMBOL_GPL(q6prm_set_lpass_clock);
 

From e5e7e398f6bb7918dab0612eb6991f7bae95520d Mon Sep 17 00:00:00 2001
From: Ravulapati Vishnu Vardhan Rao <quic_visr@quicinc.com>
Date: Sat, 4 Mar 2023 13:37:02 +0530
Subject: [PATCH 015/411] ASoC: codecs: tx-macro: Fix for KASAN:
 slab-out-of-bounds

When we run syzkaller we get below Out of Bound.
    "KASAN: slab-out-of-bounds Read in regcache_flat_read"

    Below is the backtrace of the issue:

    dump_backtrace+0x0/0x4c8
    show_stack+0x34/0x44
    dump_stack_lvl+0xd8/0x118
    print_address_description+0x30/0x2d8
    kasan_report+0x158/0x198
    __asan_report_load4_noabort+0x44/0x50
    regcache_flat_read+0x10c/0x110
    regcache_read+0xf4/0x180
    _regmap_read+0xc4/0x278
    _regmap_update_bits+0x130/0x290
    regmap_update_bits_base+0xc0/0x15c
    snd_soc_component_update_bits+0xa8/0x22c
    snd_soc_component_write_field+0x68/0xd4
    tx_macro_digital_mute+0xec/0x140

    Actually There is no need to have decimator with 32 bits.
    By limiting the variable with short type u8 issue is resolved.

Signed-off-by: Ravulapati Vishnu Vardhan Rao <quic_visr@quicinc.com>
Link: https://lore.kernel.org/r/20230304080702.609-1-quic_visr@quicinc.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/lpass-tx-macro.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/sound/soc/codecs/lpass-tx-macro.c b/sound/soc/codecs/lpass-tx-macro.c
index bf27bdd5be206..473d3cd395548 100644
--- a/sound/soc/codecs/lpass-tx-macro.c
+++ b/sound/soc/codecs/lpass-tx-macro.c
@@ -242,7 +242,7 @@ enum {
 
 struct tx_mute_work {
 	struct tx_macro *tx;
-	u32 decimator;
+	u8 decimator;
 	struct delayed_work dwork;
 };
 
@@ -635,7 +635,7 @@ static int tx_macro_mclk_enable(struct tx_macro *tx,
 	return 0;
 }
 
-static bool is_amic_enabled(struct snd_soc_component *component, int decimator)
+static bool is_amic_enabled(struct snd_soc_component *component, u8 decimator)
 {
 	u16 adc_mux_reg, adc_reg, adc_n;
 
@@ -849,7 +849,7 @@ static int tx_macro_enable_dec(struct snd_soc_dapm_widget *w,
 			       struct snd_kcontrol *kcontrol, int event)
 {
 	struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm);
-	unsigned int decimator;
+	u8 decimator;
 	u16 tx_vol_ctl_reg, dec_cfg_reg, hpf_gate_reg, tx_gain_ctl_reg;
 	u8 hpf_cut_off_freq;
 	int hpf_delay = TX_MACRO_DMIC_HPF_DELAY_MS;
@@ -1064,7 +1064,8 @@ static int tx_macro_hw_params(struct snd_pcm_substream *substream,
 			      struct snd_soc_dai *dai)
 {
 	struct snd_soc_component *component = dai->component;
-	u32 decimator, sample_rate;
+	u32 sample_rate;
+	u8 decimator;
 	int tx_fs_rate;
 	struct tx_macro *tx = snd_soc_component_get_drvdata(component);
 
@@ -1128,7 +1129,7 @@ static int tx_macro_digital_mute(struct snd_soc_dai *dai, int mute, int stream)
 {
 	struct snd_soc_component *component = dai->component;
 	struct tx_macro *tx = snd_soc_component_get_drvdata(component);
-	u16 decimator;
+	u8 decimator;
 
 	/* active decimator not set yet */
 	if (tx->active_decimator[dai->id] == -1)

From d0dc41119905f740e8d5594adce277f7c0de8c92 Mon Sep 17 00:00:00 2001
From: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
Date: Wed, 16 Nov 2022 11:11:36 +0800
Subject: [PATCH 016/411] cifs: Move the in_send statistic to __smb_send_rqst()

When send SMB_COM_NT_CANCEL and RFC1002_SESSION_REQUEST, the
in_send statistic was lost.

Let's move the in_send statistic to the send function to avoid
this scenario.

Fixes: 7ee1af765dfa ("[CIFS]")
Signed-off-by: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/transport.c | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index b42050c68e6c9..24bdd5f4d3bcc 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -278,7 +278,7 @@ static int
 __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
 		struct smb_rqst *rqst)
 {
-	int rc = 0;
+	int rc;
 	struct kvec *iov;
 	int n_vec;
 	unsigned int send_length = 0;
@@ -289,6 +289,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
 	struct msghdr smb_msg = {};
 	__be32 rfc1002_marker;
 
+	cifs_in_send_inc(server);
 	if (cifs_rdma_enabled(server)) {
 		/* return -EAGAIN when connecting or reconnecting */
 		rc = -EAGAIN;
@@ -297,14 +298,17 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
 		goto smbd_done;
 	}
 
+	rc = -EAGAIN;
 	if (ssocket == NULL)
-		return -EAGAIN;
+		goto out;
 
+	rc = -ERESTARTSYS;
 	if (fatal_signal_pending(current)) {
 		cifs_dbg(FYI, "signal pending before send request\n");
-		return -ERESTARTSYS;
+		goto out;
 	}
 
+	rc = 0;
 	/* cork the socket */
 	tcp_sock_set_cork(ssocket->sk, true);
 
@@ -407,7 +411,8 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
 			 rc);
 	else if (rc > 0)
 		rc = 0;
-
+out:
+	cifs_in_send_dec(server);
 	return rc;
 }
 
@@ -826,9 +831,7 @@ cifs_call_async(struct TCP_Server_Info *server, struct smb_rqst *rqst,
 	 * I/O response may come back and free the mid entry on another thread.
 	 */
 	cifs_save_when_sent(mid);
-	cifs_in_send_inc(server);
 	rc = smb_send_rqst(server, 1, rqst, flags);
-	cifs_in_send_dec(server);
 
 	if (rc < 0) {
 		revert_current_mid(server, mid->credits);
@@ -1144,9 +1147,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
 		else
 			midQ[i]->callback = cifs_compound_last_callback;
 	}
-	cifs_in_send_inc(server);
 	rc = smb_send_rqst(server, num_rqst, rqst, flags);
-	cifs_in_send_dec(server);
 
 	for (i = 0; i < num_rqst; i++)
 		cifs_save_when_sent(midQ[i]);
@@ -1396,9 +1397,7 @@ SendReceive(const unsigned int xid, struct cifs_ses *ses,
 
 	midQ->mid_state = MID_REQUEST_SUBMITTED;
 
-	cifs_in_send_inc(server);
 	rc = smb_send(server, in_buf, len);
-	cifs_in_send_dec(server);
 	cifs_save_when_sent(midQ);
 
 	if (rc < 0)
@@ -1539,9 +1538,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifs_tcon *tcon,
 	}
 
 	midQ->mid_state = MID_REQUEST_SUBMITTED;
-	cifs_in_send_inc(server);
 	rc = smb_send(server, in_buf, len);
-	cifs_in_send_dec(server);
 	cifs_save_when_sent(midQ);
 
 	if (rc < 0)

From 5c8cf1664f288098a971a1d1e65716a2b6a279e1 Mon Sep 17 00:00:00 2001
From: Christian Hewitt <christianshewitt@gmail.com>
Date: Fri, 3 Mar 2023 12:33:12 +0000
Subject: [PATCH 017/411] drm/meson: fix 1px pink line on GXM when scaling
 video overlay

Playing media with a resolution smaller than the crtc size requires the
video overlay to be scaled for output and GXM boards display a 1px pink
line on the bottom of the scaled overlay. Comparing with the downstream
vendor driver revealed VPP_DUMMY_DATA not being set [0].

Setting VPP_DUMMY_DATA prevents the 1px pink line from being seen.

[0] https://github.com/endlessm/linux-s905x/blob/master/drivers/amlogic/amports/video.c#L7869

Fixes: bbbe775ec5b5 ("drm: Add support for Amlogic Meson Graphic Controller")
Suggested-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: Christian Hewitt <christianshewitt@gmail.com>
Acked-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20230303123312.155164-1-christianshewitt@gmail.com
---
 drivers/gpu/drm/meson/meson_vpp.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/meson/meson_vpp.c b/drivers/gpu/drm/meson/meson_vpp.c
index 154837688ab0d..5df1957c8e41f 100644
--- a/drivers/gpu/drm/meson/meson_vpp.c
+++ b/drivers/gpu/drm/meson/meson_vpp.c
@@ -100,6 +100,8 @@ void meson_vpp_init(struct meson_drm *priv)
 			       priv->io_base + _REG(VPP_DOLBY_CTRL));
 		writel_relaxed(0x1020080,
 				priv->io_base + _REG(VPP_DUMMY_DATA1));
+		writel_relaxed(0x42020,
+				priv->io_base + _REG(VPP_DUMMY_DATA));
 	} else if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
 		writel_relaxed(0xf, priv->io_base + _REG(DOLBY_PATH_CTRL));
 

From f77ebdda0ee652124061c2ac42399bb6c367e729 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Thu, 5 Jan 2023 02:22:19 +0200
Subject: [PATCH 018/411] interconnect: qcom: osm-l3: fix icc_onecell_data
 allocation

This is a struct with a trailing zero-length array of icc_node pointers
but it's allocated as if it were a single array of icc_nodes instead.

Fortunately this overallocates memory rather then allocating less memory
than required.

Fix by replacing devm_kcalloc() with devm_kzalloc() and struct_size()
macro.

Fixes: 5bc9900addaf ("interconnect: qcom: Add OSM L3 interconnect provider support")
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20230105002221.1416479-2-dmitry.baryshkov@linaro.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/qcom/osm-l3.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/interconnect/qcom/osm-l3.c b/drivers/interconnect/qcom/osm-l3.c
index 5fa1710874258..1bc01ff6e02a4 100644
--- a/drivers/interconnect/qcom/osm-l3.c
+++ b/drivers/interconnect/qcom/osm-l3.c
@@ -236,7 +236,7 @@ static int qcom_osm_l3_probe(struct platform_device *pdev)
 	qnodes = desc->nodes;
 	num_nodes = desc->num_nodes;
 
-	data = devm_kcalloc(&pdev->dev, num_nodes, sizeof(*node), GFP_KERNEL);
+	data = devm_kzalloc(&pdev->dev, struct_size(data, nodes, num_nodes), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 

From 87e8fab1917a2b3f6e3dedfd1cdf22a1416e6676 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Thu, 5 Jan 2023 02:22:20 +0200
Subject: [PATCH 019/411] interconnect: qcom: sm8450: switch to qcom_icc_rpmh_*
 function

Change sm8450 interconnect driver to use generic qcom_icc_rpmh_*
functions rather than embedding a copy of thema. This also fixes an
overallocation of memory for icc_onecell_data structure.

Fixes: fafc114a468e ("interconnect: qcom: Add SM8450 interconnect provider driver")
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20230105002221.1416479-3-dmitry.baryshkov@linaro.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/qcom/sm8450.c | 98 +-----------------------------
 1 file changed, 2 insertions(+), 96 deletions(-)

diff --git a/drivers/interconnect/qcom/sm8450.c b/drivers/interconnect/qcom/sm8450.c
index e3a12e3d6e061..2d7a8e7b85ec2 100644
--- a/drivers/interconnect/qcom/sm8450.c
+++ b/drivers/interconnect/qcom/sm8450.c
@@ -1844,100 +1844,6 @@ static const struct qcom_icc_desc sm8450_system_noc = {
 	.num_bcms = ARRAY_SIZE(system_noc_bcms),
 };
 
-static int qnoc_probe(struct platform_device *pdev)
-{
-	const struct qcom_icc_desc *desc;
-	struct icc_onecell_data *data;
-	struct icc_provider *provider;
-	struct qcom_icc_node * const *qnodes;
-	struct qcom_icc_provider *qp;
-	struct icc_node *node;
-	size_t num_nodes, i;
-	int ret;
-
-	desc = device_get_match_data(&pdev->dev);
-	if (!desc)
-		return -EINVAL;
-
-	qnodes = desc->nodes;
-	num_nodes = desc->num_nodes;
-
-	qp = devm_kzalloc(&pdev->dev, sizeof(*qp), GFP_KERNEL);
-	if (!qp)
-		return -ENOMEM;
-
-	data = devm_kcalloc(&pdev->dev, num_nodes, sizeof(*node), GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	provider = &qp->provider;
-	provider->dev = &pdev->dev;
-	provider->set = qcom_icc_set;
-	provider->pre_aggregate = qcom_icc_pre_aggregate;
-	provider->aggregate = qcom_icc_aggregate;
-	provider->xlate_extended = qcom_icc_xlate_extended;
-	INIT_LIST_HEAD(&provider->nodes);
-	provider->data = data;
-
-	qp->dev = &pdev->dev;
-	qp->bcms = desc->bcms;
-	qp->num_bcms = desc->num_bcms;
-
-	qp->voter = of_bcm_voter_get(qp->dev, NULL);
-	if (IS_ERR(qp->voter))
-		return PTR_ERR(qp->voter);
-
-	ret = icc_provider_add(provider);
-	if (ret) {
-		dev_err(&pdev->dev, "error adding interconnect provider\n");
-		return ret;
-	}
-
-	for (i = 0; i < qp->num_bcms; i++)
-		qcom_icc_bcm_init(qp->bcms[i], &pdev->dev);
-
-	for (i = 0; i < num_nodes; i++) {
-		size_t j;
-
-		if (!qnodes[i])
-			continue;
-
-		node = icc_node_create(qnodes[i]->id);
-		if (IS_ERR(node)) {
-			ret = PTR_ERR(node);
-			goto err;
-		}
-
-		node->name = qnodes[i]->name;
-		node->data = qnodes[i];
-		icc_node_add(node, provider);
-
-		for (j = 0; j < qnodes[i]->num_links; j++)
-			icc_link_create(node, qnodes[i]->links[j]);
-
-		data->nodes[i] = node;
-	}
-	data->num_nodes = num_nodes;
-
-	platform_set_drvdata(pdev, qp);
-
-	return 0;
-err:
-	icc_nodes_remove(provider);
-	icc_provider_del(provider);
-	return ret;
-}
-
-static int qnoc_remove(struct platform_device *pdev)
-{
-	struct qcom_icc_provider *qp = platform_get_drvdata(pdev);
-
-	icc_nodes_remove(&qp->provider);
-	icc_provider_del(&qp->provider);
-
-	return 0;
-}
-
 static const struct of_device_id qnoc_of_match[] = {
 	{ .compatible = "qcom,sm8450-aggre1-noc",
 	  .data = &sm8450_aggre1_noc},
@@ -1966,8 +1872,8 @@ static const struct of_device_id qnoc_of_match[] = {
 MODULE_DEVICE_TABLE(of, qnoc_of_match);
 
 static struct platform_driver qnoc_driver = {
-	.probe = qnoc_probe,
-	.remove = qnoc_remove,
+	.probe = qcom_icc_rpmh_probe,
+	.remove = qcom_icc_rpmh_remove,
 	.driver = {
 		.name = "qnoc-sm8450",
 		.of_match_table = qnoc_of_match,

From 0d00cd114f20e4a6db37e4b08435c27acc1d1db0 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Thu, 5 Jan 2023 02:22:21 +0200
Subject: [PATCH 020/411] interconnect: qcom: sm8550: switch to qcom_icc_rpmh_*
 function

Change sm8550 interconnect driver to use generic qcom_icc_rpmh_*
functions rather than embedding a copy of thema. This also fixes an
overallocation of memory for icc_onecell_data structure.

Fixes: e6f0d6a30f73 ("interconnect: qcom: Add SM8550 interconnect provider driver")
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20230105002221.1416479-4-dmitry.baryshkov@linaro.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/qcom/sm8550.c | 99 +-----------------------------
 1 file changed, 2 insertions(+), 97 deletions(-)

diff --git a/drivers/interconnect/qcom/sm8550.c b/drivers/interconnect/qcom/sm8550.c
index 54fa027ab961f..d823ba988ef68 100644
--- a/drivers/interconnect/qcom/sm8550.c
+++ b/drivers/interconnect/qcom/sm8550.c
@@ -2165,101 +2165,6 @@ static const struct qcom_icc_desc sm8550_system_noc = {
 	.num_bcms = ARRAY_SIZE(system_noc_bcms),
 };
 
-static int qnoc_probe(struct platform_device *pdev)
-{
-	const struct qcom_icc_desc *desc;
-	struct icc_onecell_data *data;
-	struct icc_provider *provider;
-	struct qcom_icc_node * const *qnodes;
-	struct qcom_icc_provider *qp;
-	struct icc_node *node;
-	size_t num_nodes, i;
-	int ret;
-
-	desc = device_get_match_data(&pdev->dev);
-	if (!desc)
-		return -EINVAL;
-
-	qnodes = desc->nodes;
-	num_nodes = desc->num_nodes;
-
-	qp = devm_kzalloc(&pdev->dev, sizeof(*qp), GFP_KERNEL);
-	if (!qp)
-		return -ENOMEM;
-
-	data = devm_kcalloc(&pdev->dev, num_nodes, sizeof(*node), GFP_KERNEL);
-	if (!data)
-		return -ENOMEM;
-
-	provider = &qp->provider;
-	provider->dev = &pdev->dev;
-	provider->set = qcom_icc_set;
-	provider->pre_aggregate = qcom_icc_pre_aggregate;
-	provider->aggregate = qcom_icc_aggregate;
-	provider->xlate_extended = qcom_icc_xlate_extended;
-	INIT_LIST_HEAD(&provider->nodes);
-	provider->data = data;
-
-	qp->dev = &pdev->dev;
-	qp->bcms = desc->bcms;
-	qp->num_bcms = desc->num_bcms;
-
-	qp->voter = of_bcm_voter_get(qp->dev, NULL);
-	if (IS_ERR(qp->voter))
-		return PTR_ERR(qp->voter);
-
-	ret = icc_provider_add(provider);
-	if (ret) {
-		dev_err_probe(&pdev->dev, ret,
-			      "error adding interconnect provider\n");
-		return ret;
-	}
-
-	for (i = 0; i < qp->num_bcms; i++)
-		qcom_icc_bcm_init(qp->bcms[i], &pdev->dev);
-
-	for (i = 0; i < num_nodes; i++) {
-		size_t j;
-
-		if (!qnodes[i])
-			continue;
-
-		node = icc_node_create(qnodes[i]->id);
-		if (IS_ERR(node)) {
-			ret = PTR_ERR(node);
-			goto err;
-		}
-
-		node->name = qnodes[i]->name;
-		node->data = qnodes[i];
-		icc_node_add(node, provider);
-
-		for (j = 0; j < qnodes[i]->num_links; j++)
-			icc_link_create(node, qnodes[i]->links[j]);
-
-		data->nodes[i] = node;
-	}
-	data->num_nodes = num_nodes;
-
-	platform_set_drvdata(pdev, qp);
-
-	return 0;
-err:
-	icc_nodes_remove(provider);
-	icc_provider_del(provider);
-	return ret;
-}
-
-static int qnoc_remove(struct platform_device *pdev)
-{
-	struct qcom_icc_provider *qp = platform_get_drvdata(pdev);
-
-	icc_nodes_remove(&qp->provider);
-	icc_provider_del(&qp->provider);
-
-	return 0;
-}
-
 static const struct of_device_id qnoc_of_match[] = {
 	{ .compatible = "qcom,sm8550-aggre1-noc",
 	  .data = &sm8550_aggre1_noc},
@@ -2294,8 +2199,8 @@ static const struct of_device_id qnoc_of_match[] = {
 MODULE_DEVICE_TABLE(of, qnoc_of_match);
 
 static struct platform_driver qnoc_driver = {
-	.probe = qnoc_probe,
-	.remove = qnoc_remove,
+	.probe = qcom_icc_rpmh_probe,
+	.remove = qcom_icc_rpmh_remove,
 	.driver = {
 		.name = "qnoc-sm8550",
 		.of_match_table = qnoc_of_match,

From d16c893425d07ada1fdd817ec06d322efcf69480 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?=
 <amadeuszx.slawinski@linux.intel.com>
Date: Fri, 3 Mar 2023 14:48:50 +0100
Subject: [PATCH 021/411] ASoC: Intel: avs: max98357a: Explicitly define codec
 format
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

max98357a is speaker codec configured in 48000/2/S16_LE format
regardless of front end format, so force it to be so.

Reviewed-by: Cezary Rojewski <cezary.rojewski@intel.com>
Signed-off-by: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com>
Link: https://lore.kernel.org/r/20230303134854.2277146-2-amadeuszx.slawinski@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/avs/boards/max98357a.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/sound/soc/intel/avs/boards/max98357a.c b/sound/soc/intel/avs/boards/max98357a.c
index 921f42caf7e09..183123d08c5a3 100644
--- a/sound/soc/intel/avs/boards/max98357a.c
+++ b/sound/soc/intel/avs/boards/max98357a.c
@@ -8,6 +8,7 @@
 
 #include <linux/module.h>
 #include <linux/platform_device.h>
+#include <sound/pcm_params.h>
 #include <sound/soc.h>
 #include <sound/soc-acpi.h>
 #include <sound/soc-dapm.h>
@@ -24,6 +25,26 @@ static const struct snd_soc_dapm_route card_base_routes[] = {
 	{ "Spk", NULL, "Speaker" },
 };
 
+static int
+avs_max98357a_be_fixup(struct snd_soc_pcm_runtime *runrime, struct snd_pcm_hw_params *params)
+{
+	struct snd_interval *rate, *channels;
+	struct snd_mask *fmt;
+
+	rate = hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE);
+	channels = hw_param_interval(params, SNDRV_PCM_HW_PARAM_CHANNELS);
+	fmt = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT);
+
+	/* The ADSP will convert the FE rate to 48k, stereo */
+	rate->min = rate->max = 48000;
+	channels->min = channels->max = 2;
+
+	/* set SSP0 to 16 bit */
+	snd_mask_none(fmt);
+	snd_mask_set_format(fmt, SNDRV_PCM_FORMAT_S16_LE);
+	return 0;
+}
+
 static int avs_create_dai_link(struct device *dev, const char *platform_name, int ssp_port,
 			       struct snd_soc_dai_link **dai_link)
 {
@@ -55,6 +76,7 @@ static int avs_create_dai_link(struct device *dev, const char *platform_name, in
 	dl->num_platforms = 1;
 	dl->id = 0;
 	dl->dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBS_CFS;
+	dl->be_hw_params_fixup = avs_max98357a_be_fixup;
 	dl->nonatomic = 1;
 	dl->no_pcm = 1;
 	dl->dpcm_playback = 1;

From 61f368624fe4d0c25c6e9c917574b8ace51d776e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?=
 <amadeuszx.slawinski@linux.intel.com>
Date: Fri, 3 Mar 2023 14:48:51 +0100
Subject: [PATCH 022/411] ASoC: Intel: avs: da7219: Explicitly define codec
 format
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

da7219 is headset codec configured in 48000/2/S24_LE format regardless
of front end format, so force it to be so.

Reviewed-by: Cezary Rojewski <cezary.rojewski@intel.com>
Signed-off-by: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com>
Link: https://lore.kernel.org/r/20230303134854.2277146-3-amadeuszx.slawinski@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/avs/boards/da7219.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/sound/soc/intel/avs/boards/da7219.c b/sound/soc/intel/avs/boards/da7219.c
index acd43b6108e99..1a1d572cc1d02 100644
--- a/sound/soc/intel/avs/boards/da7219.c
+++ b/sound/soc/intel/avs/boards/da7219.c
@@ -117,6 +117,26 @@ static void avs_da7219_codec_exit(struct snd_soc_pcm_runtime *rtd)
 	snd_soc_component_set_jack(asoc_rtd_to_codec(rtd, 0)->component, NULL, NULL);
 }
 
+static int
+avs_da7219_be_fixup(struct snd_soc_pcm_runtime *runrime, struct snd_pcm_hw_params *params)
+{
+	struct snd_interval *rate, *channels;
+	struct snd_mask *fmt;
+
+	rate = hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE);
+	channels = hw_param_interval(params, SNDRV_PCM_HW_PARAM_CHANNELS);
+	fmt = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT);
+
+	/* The ADSP will convert the FE rate to 48k, stereo */
+	rate->min = rate->max = 48000;
+	channels->min = channels->max = 2;
+
+	/* set SSP0 to 24 bit */
+	snd_mask_none(fmt);
+	snd_mask_set_format(fmt, SNDRV_PCM_FORMAT_S24_LE);
+	return 0;
+}
+
 static int avs_create_dai_link(struct device *dev, const char *platform_name, int ssp_port,
 			       struct snd_soc_dai_link **dai_link)
 {
@@ -148,6 +168,7 @@ static int avs_create_dai_link(struct device *dev, const char *platform_name, in
 	dl->num_platforms = 1;
 	dl->id = 0;
 	dl->dai_fmt = SND_SOC_DAIFMT_I2S | SND_SOC_DAIFMT_NB_NF | SND_SOC_DAIFMT_CBS_CFS;
+	dl->be_hw_params_fixup = avs_da7219_be_fixup;
 	dl->init = avs_da7219_codec_init;
 	dl->exit = avs_da7219_codec_exit;
 	dl->nonatomic = 1;

From d24dbc865c2bd5946bef62bb862a65df092dfc79 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?=
 <amadeuszx.slawinski@linux.intel.com>
Date: Fri, 3 Mar 2023 14:48:52 +0100
Subject: [PATCH 023/411] ASoC: Intel: avs: rt5682: Explicitly define codec
 format
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

rt5682 is headset codec configured in 48000/2/S24_LE format regardless
of front end format, so force it to be so.

Reviewed-by: Cezary Rojewski <cezary.rojewski@intel.com>
Signed-off-by: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com>
Link: https://lore.kernel.org/r/20230303134854.2277146-4-amadeuszx.slawinski@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/avs/boards/rt5682.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/sound/soc/intel/avs/boards/rt5682.c b/sound/soc/intel/avs/boards/rt5682.c
index 473e9fe5d0bf7..b2c2ba93dcb56 100644
--- a/sound/soc/intel/avs/boards/rt5682.c
+++ b/sound/soc/intel/avs/boards/rt5682.c
@@ -169,6 +169,27 @@ static const struct snd_soc_ops avs_rt5682_ops = {
 	.hw_params = avs_rt5682_hw_params,
 };
 
+static int
+avs_rt5682_be_fixup(struct snd_soc_pcm_runtime *runtime, struct snd_pcm_hw_params *params)
+{
+	struct snd_interval *rate, *channels;
+	struct snd_mask *fmt;
+
+	rate = hw_param_interval(params, SNDRV_PCM_HW_PARAM_RATE);
+	channels = hw_param_interval(params, SNDRV_PCM_HW_PARAM_CHANNELS);
+	fmt = hw_param_mask(params, SNDRV_PCM_HW_PARAM_FORMAT);
+
+	/* The ADSP will convert the FE rate to 48k, stereo */
+	rate->min = rate->max = 48000;
+	channels->min = channels->max = 2;
+
+	/* set SSPN to 24 bit */
+	snd_mask_none(fmt);
+	snd_mask_set_format(fmt, SNDRV_PCM_FORMAT_S24_LE);
+
+	return 0;
+}
+
 static int avs_create_dai_link(struct device *dev, const char *platform_name, int ssp_port,
 			       struct snd_soc_dai_link **dai_link)
 {
@@ -201,6 +222,7 @@ static int avs_create_dai_link(struct device *dev, const char *platform_name, in
 	dl->id = 0;
 	dl->init = avs_rt5682_codec_init;
 	dl->exit = avs_rt5682_codec_exit;
+	dl->be_hw_params_fixup = avs_rt5682_be_fixup;
 	dl->ops = &avs_rt5682_ops;
 	dl->nonatomic = 1;
 	dl->no_pcm = 1;

From 933de2d127281731166cf2880fa1e23c5a0f7faa Mon Sep 17 00:00:00 2001
From: Cezary Rojewski <cezary.rojewski@intel.com>
Date: Fri, 3 Mar 2023 14:48:53 +0100
Subject: [PATCH 024/411] ASoC: Intel: avs: ssm4567: Remove nau8825 bits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some of the nau8825 clock control got into the ssm4567, remove it.

Signed-off-by: Cezary Rojewski <cezary.rojewski@intel.com>
Signed-off-by: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com>
Link: https://lore.kernel.org/r/20230303134854.2277146-5-amadeuszx.slawinski@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/avs/boards/ssm4567.c | 31 ----------------------------
 1 file changed, 31 deletions(-)

diff --git a/sound/soc/intel/avs/boards/ssm4567.c b/sound/soc/intel/avs/boards/ssm4567.c
index c5db696127624..2b7f5ad92aca7 100644
--- a/sound/soc/intel/avs/boards/ssm4567.c
+++ b/sound/soc/intel/avs/boards/ssm4567.c
@@ -15,7 +15,6 @@
 #include <sound/soc-acpi.h>
 #include "../../../codecs/nau8825.h"
 
-#define SKL_NUVOTON_CODEC_DAI	"nau8825-hifi"
 #define SKL_SSM_CODEC_DAI	"ssm4567-hifi"
 
 static struct snd_soc_codec_conf card_codec_conf[] = {
@@ -34,41 +33,11 @@ static const struct snd_kcontrol_new card_controls[] = {
 	SOC_DAPM_PIN_SWITCH("Right Speaker"),
 };
 
-static int
-platform_clock_control(struct snd_soc_dapm_widget *w, struct snd_kcontrol *control, int event)
-{
-	struct snd_soc_dapm_context *dapm = w->dapm;
-	struct snd_soc_card *card = dapm->card;
-	struct snd_soc_dai *codec_dai;
-	int ret;
-
-	codec_dai = snd_soc_card_get_codec_dai(card, SKL_NUVOTON_CODEC_DAI);
-	if (!codec_dai) {
-		dev_err(card->dev, "Codec dai not found\n");
-		return -EINVAL;
-	}
-
-	if (SND_SOC_DAPM_EVENT_ON(event)) {
-		ret = snd_soc_dai_set_sysclk(codec_dai, NAU8825_CLK_MCLK, 24000000,
-					     SND_SOC_CLOCK_IN);
-		if (ret < 0)
-			dev_err(card->dev, "set sysclk err = %d\n", ret);
-	} else {
-		ret = snd_soc_dai_set_sysclk(codec_dai, NAU8825_CLK_INTERNAL, 0, SND_SOC_CLOCK_IN);
-		if (ret < 0)
-			dev_err(card->dev, "set sysclk err = %d\n", ret);
-	}
-
-	return ret;
-}
-
 static const struct snd_soc_dapm_widget card_widgets[] = {
 	SND_SOC_DAPM_SPK("Left Speaker", NULL),
 	SND_SOC_DAPM_SPK("Right Speaker", NULL),
 	SND_SOC_DAPM_SPK("DP1", NULL),
 	SND_SOC_DAPM_SPK("DP2", NULL),
-	SND_SOC_DAPM_SUPPLY("Platform Clock", SND_SOC_NOPM, 0, 0, platform_clock_control,
-			    SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD),
 };
 
 static const struct snd_soc_dapm_route card_base_routes[] = {

From 6206b2e787da2ed567922c37bb588a44f6fb6705 Mon Sep 17 00:00:00 2001
From: Cezary Rojewski <cezary.rojewski@intel.com>
Date: Fri, 3 Mar 2023 14:48:54 +0100
Subject: [PATCH 025/411] ASoC: Intel: avs: nau8825: Adjust clock control
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Internal clock shall be adjusted also in cases when DAPM event other
than 'ON' is triggered.

Signed-off-by: Cezary Rojewski <cezary.rojewski@intel.com>
Signed-off-by: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com>
Link: https://lore.kernel.org/r/20230303134854.2277146-6-amadeuszx.slawinski@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/avs/boards/nau8825.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/sound/soc/intel/avs/boards/nau8825.c b/sound/soc/intel/avs/boards/nau8825.c
index b31fa931ba8b6..b69fc5567135d 100644
--- a/sound/soc/intel/avs/boards/nau8825.c
+++ b/sound/soc/intel/avs/boards/nau8825.c
@@ -33,15 +33,15 @@ avs_nau8825_clock_control(struct snd_soc_dapm_widget *w, struct snd_kcontrol *co
 		return -EINVAL;
 	}
 
-	if (!SND_SOC_DAPM_EVENT_ON(event)) {
+	if (SND_SOC_DAPM_EVENT_ON(event))
+		ret = snd_soc_dai_set_sysclk(codec_dai, NAU8825_CLK_MCLK, 24000000,
+					     SND_SOC_CLOCK_IN);
+	else
 		ret = snd_soc_dai_set_sysclk(codec_dai, NAU8825_CLK_INTERNAL, 0, SND_SOC_CLOCK_IN);
-		if (ret < 0) {
-			dev_err(card->dev, "set sysclk err = %d\n", ret);
-			return ret;
-		}
-	}
+	if (ret < 0)
+		dev_err(card->dev, "Set sysclk failed: %d\n", ret);
 
-	return 0;
+	return ret;
 }
 
 static const struct snd_kcontrol_new card_controls[] = {

From c28cd1f3433c7e339315d1ddacaeacf0fdfbe252 Mon Sep 17 00:00:00 2001
From: Saravana Kannan <saravanak@google.com>
Date: Wed, 1 Mar 2023 17:46:38 -0800
Subject: [PATCH 026/411] clk: Mark a fwnode as initialized when using
 CLK_OF_DECLARE() macro

We already mark fwnodes as initialized when they are registered as clock
providers. We do this so that fw_devlink can tell when a clock driver
doesn't use the driver core framework to probe/initialize its device.
This ensures fw_devlink doesn't block the consumers of such a clock
provider indefinitely.

However, some users of CLK_OF_DECLARE() macros don't use the same node
that matches the macro as the node for the clock provider, but they
initialize the entire node. To cover these cases, also mark the nodes
that match the macros as initialized when the init callback function is
called.

An example of this is "stericsson,u8500-clks" that's handled using
CLK_OF_DECLARE() and looks something like this:

clocks {
	compatible = "stericsson,u8500-clks";

	prcmu_clk: prcmu-clock {
		#clock-cells = <1>;
	};

	prcc_pclk: prcc-periph-clock {
		#clock-cells = <2>;
	};

	prcc_kclk: prcc-kernel-clock {
		#clock-cells = <2>;
	};

	prcc_reset: prcc-reset-controller {
		#reset-cells = <2>;
	};
	...
};

This patch makes sure that "clocks" is marked as initialized so that
fw_devlink knows that all nodes under it have been initialized. If the
driver creates struct devices for some of the subnodes, fw_devlink is
smart enough to know to wait for those devices to probe, so no special
handling is required for those cases.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reported-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/lkml/CACRpkdamxDX6EBVjKX5=D3rkHp17f5pwGdBVhzFU90-0MHY6dQ@mail.gmail.com/
Fixes: 4a032827daa8 ("of: property: Simplify of_link_to_phandle()")
Signed-off-by: Saravana Kannan <saravanak@google.com>
Link: https://lore.kernel.org/r/20230302014639.297514-1-saravanak@google.com
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/linux/clk-provider.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 842e72a5348fa..c9f5276006a09 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -1363,7 +1363,13 @@ struct clk_hw_onecell_data {
 	struct clk_hw *hws[];
 };
 
-#define CLK_OF_DECLARE(name, compat, fn) OF_DECLARE_1(clk, name, compat, fn)
+#define CLK_OF_DECLARE(name, compat, fn) \
+	static void __init name##_of_clk_init_declare(struct device_node *np) \
+	{								\
+		fn(np);							\
+		fwnode_dev_initialized(of_fwnode_handle(np), true);	\
+	}								\
+	OF_DECLARE_1(clk, name, compat, name##_of_clk_init_declare)
 
 /*
  * Use this macro when you have a driver that requires two initialization

From 9d941b8abf863751c1c634d96539c3fab220a99c Mon Sep 17 00:00:00 2001
From: Nick Alcock <nick.alcock@oracle.com>
Date: Wed, 22 Feb 2023 12:14:37 +0000
Subject: [PATCH 027/411] kbuild, clk: bcm2835: remove MODULE_LICENSE in
 non-modules

Since commit 8b41fc4454e ("kbuild: create modules.builtin without
Makefile.modbuiltin or tristate.conf"), MODULE_LICENSE declarations
are used to identify modules. As a consequence, uses of the macro
in non-modules will cause modprobe to misidentify their containing
object file as a module when it is not (false positives), and modprobe
might succeed rather than failing with a suitable error message.

So remove it in the files in this commit, none of which can be built as
modules.

Signed-off-by: Nick Alcock <nick.alcock@oracle.com>
Suggested-by: Luis Chamberlain <mcgrof@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: linux-modules@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: Hitomi Hasegawa <hasegawa-hitomi@fujitsu.com>
Cc: Michael Turquette <mturquette@baylibre.com>
Cc: Stephen Boyd <sboyd@kernel.org>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: Ray Jui <rjui@broadcom.com>
Cc: Scott Branden <sbranden@broadcom.com>
Cc: linux-clk@vger.kernel.org
Cc: linux-rpi-kernel@lists.infradead.org
Cc: linux-arm-kernel@lists.infradead.org
Link: https://lore.kernel.org/r/20230222121453.91915-12-nick.alcock@oracle.com
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/bcm/clk-bcm2835-aux.c | 1 -
 drivers/clk/bcm/clk-bcm2835.c     | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/clk/bcm/clk-bcm2835-aux.c b/drivers/clk/bcm/clk-bcm2835-aux.c
index 290a2846a86b6..0fafa5cba4427 100644
--- a/drivers/clk/bcm/clk-bcm2835-aux.c
+++ b/drivers/clk/bcm/clk-bcm2835-aux.c
@@ -69,4 +69,3 @@ builtin_platform_driver(bcm2835_aux_clk_driver);
 
 MODULE_AUTHOR("Eric Anholt <eric@anholt.net>");
 MODULE_DESCRIPTION("BCM2835 auxiliary peripheral clock driver");
-MODULE_LICENSE("GPL");
diff --git a/drivers/clk/bcm/clk-bcm2835.c b/drivers/clk/bcm/clk-bcm2835.c
index e74fe6219d14e..8dc476ef5bf97 100644
--- a/drivers/clk/bcm/clk-bcm2835.c
+++ b/drivers/clk/bcm/clk-bcm2835.c
@@ -2350,4 +2350,3 @@ builtin_platform_driver(bcm2835_clk_driver);
 
 MODULE_AUTHOR("Eric Anholt <eric@anholt.net>");
 MODULE_DESCRIPTION("BCM2835 clock driver");
-MODULE_LICENSE("GPL");

From 94511ebc6810142eb36c8b935e1713a15db94d28 Mon Sep 17 00:00:00 2001
From: Nick Alcock <nick.alcock@oracle.com>
Date: Wed, 22 Feb 2023 12:14:38 +0000
Subject: [PATCH 028/411] kbuild, clk: remove MODULE_LICENSE in non-modules

Since commit 8b41fc4454e ("kbuild: create modules.builtin without
Makefile.modbuiltin or tristate.conf"), MODULE_LICENSE declarations
are used to identify modules. As a consequence, uses of the macro
in non-modules will cause modprobe to misidentify their containing
object file as a module when it is not (false positives), and modprobe
might succeed rather than failing with a suitable error message.

So remove it in the files in this commit, none of which can be built as
modules.

Signed-off-by: Nick Alcock <nick.alcock@oracle.com>
Suggested-by: Luis Chamberlain <mcgrof@kernel.org>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: linux-modules@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: Hitomi Hasegawa <hasegawa-hitomi@fujitsu.com>
Cc: Michael Turquette <mturquette@baylibre.com>
Cc: Stephen Boyd <sboyd@kernel.org>
Cc: linux-clk@vger.kernel.org
Link: https://lore.kernel.org/r/20230222121453.91915-13-nick.alcock@oracle.com
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/clk-fixed-mmio.c         | 1 -
 drivers/clk/clk-fsl-sai.c            | 1 -
 drivers/clk/hisilicon/clk-hi3559a.c  | 1 -
 drivers/clk/microchip/clk-mpfs-ccc.c | 1 -
 4 files changed, 4 deletions(-)

diff --git a/drivers/clk/clk-fixed-mmio.c b/drivers/clk/clk-fixed-mmio.c
index 5225d17d6b3f3..8609fca29cc4e 100644
--- a/drivers/clk/clk-fixed-mmio.c
+++ b/drivers/clk/clk-fixed-mmio.c
@@ -99,4 +99,3 @@ module_platform_driver(of_fixed_mmio_clk_driver);
 
 MODULE_AUTHOR("Jan Kotas <jank@cadence.com>");
 MODULE_DESCRIPTION("Memory Mapped IO Fixed clock driver");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/clk/clk-fsl-sai.c b/drivers/clk/clk-fsl-sai.c
index 6238fcea04673..ee5baf993ff21 100644
--- a/drivers/clk/clk-fsl-sai.c
+++ b/drivers/clk/clk-fsl-sai.c
@@ -88,5 +88,4 @@ module_platform_driver(fsl_sai_clk_driver);
 
 MODULE_DESCRIPTION("Freescale SAI bitclock-as-a-clock driver");
 MODULE_AUTHOR("Michael Walle <michael@walle.cc>");
-MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:fsl-sai-clk");
diff --git a/drivers/clk/hisilicon/clk-hi3559a.c b/drivers/clk/hisilicon/clk-hi3559a.c
index 9ea1a80acbe8b..8036bd8cbb0ac 100644
--- a/drivers/clk/hisilicon/clk-hi3559a.c
+++ b/drivers/clk/hisilicon/clk-hi3559a.c
@@ -841,5 +841,4 @@ static void __exit hi3559av100_crg_exit(void)
 module_exit(hi3559av100_crg_exit);
 
 
-MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("HiSilicon Hi3559AV100 CRG Driver");
diff --git a/drivers/clk/microchip/clk-mpfs-ccc.c b/drivers/clk/microchip/clk-mpfs-ccc.c
index 0ddc73e07be42..bce61c45e9674 100644
--- a/drivers/clk/microchip/clk-mpfs-ccc.c
+++ b/drivers/clk/microchip/clk-mpfs-ccc.c
@@ -291,4 +291,3 @@ module_exit(clk_ccc_exit);
 
 MODULE_DESCRIPTION("Microchip PolarFire SoC Clock Conditioning Circuitry Driver");
 MODULE_AUTHOR("Conor Dooley <conor.dooley@microchip.com>");
-MODULE_LICENSE("GPL");

From 0ffad67784a097beccf34d297ddd1b0773b3b8a3 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 25 Feb 2023 21:39:47 -0800
Subject: [PATCH 029/411] clk: HI655X: select REGMAP instead of depending on it

REGMAP is a hidden (not user visible) symbol. Users cannot set it
directly thru "make *config", so drivers should select it instead of
depending on it if they need it.

Consistently using "select" or "depends on" can also help reduce
Kconfig circular dependency issues.

Therefore, change the use of "depends on REGMAP" to "select REGMAP".

Fixes: 3a49afb84ca0 ("clk: enable hi655x common clk automatically")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Riku Voipio <riku.voipio@linaro.org>
Cc: Stephen Boyd <sboyd@kernel.org>
Cc: Michael Turquette <mturquette@baylibre.com>
Cc: linux-clk@vger.kernel.org
Link: https://lore.kernel.org/r/20230226053953.4681-3-rdunlap@infradead.org
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index b6c5bf69a2b2c..1eef05bb1f995 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -91,7 +91,7 @@ config COMMON_CLK_RK808
 config COMMON_CLK_HI655X
 	tristate "Clock driver for Hi655x" if EXPERT
 	depends on (MFD_HI655X_PMIC || COMPILE_TEST)
-	depends on REGMAP
+	select REGMAP
 	default MFD_HI655X_PMIC
 	help
 	  This driver supports the hi655x PMIC clock. This

From 26243872fe26ec0df7d81766253d00213990e382 Mon Sep 17 00:00:00 2001
From: Conor Dooley <conor.dooley@microchip.com>
Date: Wed, 22 Feb 2023 12:46:11 +0000
Subject: [PATCH 030/411] MAINTAINERS: add missing clock driver coverage for
 Microchip FPGAs

When the CCC support was added, the clock binding coverage was
converted to a regex in commit 71c8517e004b ("MAINTAINERS: update
polarfire soc clock binding"), but the coverage for the clock drivers
themselves was not updated. Rectify that now.

Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20230222124610.257101-1-conor.dooley@microchip.com
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 8d5bc223f3053..cfd630ce68e75 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17989,7 +17989,7 @@ F:	Documentation/devicetree/bindings/spi/microchip,mpfs-spi.yaml
 F:	Documentation/devicetree/bindings/usb/microchip,mpfs-musb.yaml
 F:	arch/riscv/boot/dts/microchip/
 F:	drivers/char/hw_random/mpfs-rng.c
-F:	drivers/clk/microchip/clk-mpfs.c
+F:	drivers/clk/microchip/clk-mpfs*.c
 F:	drivers/i2c/busses/i2c-microchip-corei2c.c
 F:	drivers/mailbox/mailbox-mpfs.c
 F:	drivers/pci/controller/pcie-microchip-host.c

From 633a12fda6536a1a17bcea29502e777e86a4547e Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio@linaro.org>
Date: Tue, 3 Jan 2023 15:21:20 +0100
Subject: [PATCH 031/411] interconnect: qcom: qcm2290: Fix MASTER_SNOC_BIMC_NRT

Due to what seems to be a copy-paste error, the _NRT master was
identical to the _RT master, which should not be the case.. Fix it
using the values available from the downstream kernel [1].

[1] https://android.googlesource.com/kernel/msm-extra/devicetree/+/refs/heads/android-msm-bramble-4.19-android11-qpr1/qcom/scuba-bus.dtsi#127
Fixes: 1a14b1ac3935 ("interconnect: qcom: Add QCM2290 driver support")
Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Acked-by: Shawn Guo <shawn.guo@linaro.org>
Link: https://lore.kernel.org/r/20230103142120.15605-1-konrad.dybcio@linaro.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/qcom/qcm2290.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/interconnect/qcom/qcm2290.c b/drivers/interconnect/qcom/qcm2290.c
index 0da612d6398c5..a29cdb4fac03f 100644
--- a/drivers/interconnect/qcom/qcm2290.c
+++ b/drivers/interconnect/qcom/qcm2290.c
@@ -147,9 +147,9 @@ static struct qcom_icc_node mas_snoc_bimc_nrt = {
 	.name = "mas_snoc_bimc_nrt",
 	.buswidth = 16,
 	.qos.ap_owned = true,
-	.qos.qos_port = 2,
+	.qos.qos_port = 3,
 	.qos.qos_mode = NOC_QOS_MODE_BYPASS,
-	.mas_rpm_id = 163,
+	.mas_rpm_id = 164,
 	.slv_rpm_id = -1,
 	.num_links = ARRAY_SIZE(mas_snoc_bimc_nrt_links),
 	.links = mas_snoc_bimc_nrt_links,

From 89dc65a7cc8a119c395c0931b12d7a514f9d2bcc Mon Sep 17 00:00:00 2001
From: Conor Dooley <conor.dooley@microchip.com>
Date: Tue, 28 Feb 2023 19:26:55 -0500
Subject: [PATCH 032/411] clk: k210: remove an implicit 64-bit division

The K210 clock driver depends on SOC_CANAAN, which is only selectable
when !MMU on RISC-V. !MMU is not possible on 32-bit yet, but patches
have been sent for its enabling. The kernel test robot reported this
implicit 64-bit division there.

Replace the implicit division with an explicit one.

Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/linux-riscv/202301201538.zNlqgE4L-lkp@intel.com/
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Jesse Taube <Mr.Bossman075@gmail.com>
Link: https://lore.kernel.org/r/20230301002657.352637-2-Mr.Bossman075@gmail.com
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/clk-k210.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/clk/clk-k210.c b/drivers/clk/clk-k210.c
index 67a7cb3503c36..4eed667eddaf2 100644
--- a/drivers/clk/clk-k210.c
+++ b/drivers/clk/clk-k210.c
@@ -495,7 +495,7 @@ static unsigned long k210_pll_get_rate(struct clk_hw *hw,
 	f = FIELD_GET(K210_PLL_CLKF, reg) + 1;
 	od = FIELD_GET(K210_PLL_CLKOD, reg) + 1;
 
-	return (u64)parent_rate * f / (r * od);
+	return div_u64((u64)parent_rate * f, r * od);
 }
 
 static const struct clk_ops k210_pll_ops = {

From 780f6a9afe8b0e303406a39f6968cf1daa6c3d51 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 4 Jan 2023 13:20:52 -0800
Subject: [PATCH 033/411] lib: zstd: Fix -Wstringop-overflow warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following -Wstringop-overflow warning when building with GCC 11+:

lib/zstd/decompress/huf_decompress.c: In function ‘HUF_readDTableX2_wksp’:
lib/zstd/decompress/huf_decompress.c:700:5: warning: ‘HUF_fillDTableX2.constprop’ accessing 624 bytes in a region of size 52 [-Wstringop-overflow=]
  700 |     HUF_fillDTableX2(dt, maxTableLog,
      |     ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  701 |                    wksp->sortedSymbol, sizeOfSort,
      |                    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  702 |                    wksp->rankStart0, wksp->rankVal, maxW,
      |                    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  703 |                    tableLog+1,
      |                    ~~~~~~~~~~~
  704 |                    wksp->calleeWksp, sizeof(wksp->calleeWksp) / sizeof(U32));
      |
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
lib/zstd/decompress/huf_decompress.c:700:5: note: referencing argument 6 of type ‘U32 (*)[13]’ {aka ‘unsigned int (*)[13]’}
lib/zstd/decompress/huf_decompress.c:571:13: note: in a call to function ‘HUF_fillDTableX2.constprop’
  571 | static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
      |             ^~~~~~~~~~~~~~~~

by using pointer notation instead of array notation.

This is one of the last remaining warnings to be fixed before globally
enabling -Wstringop-overflow.

Co-developed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Cc: Nick Terrell <terrelln@fb.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Nick Terrell <terrelln@fb.com>
---
 lib/zstd/decompress/huf_decompress.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/zstd/decompress/huf_decompress.c b/lib/zstd/decompress/huf_decompress.c
index 89b269a641c7e..60958afebc415 100644
--- a/lib/zstd/decompress/huf_decompress.c
+++ b/lib/zstd/decompress/huf_decompress.c
@@ -985,7 +985,7 @@ static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32
 
 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
                            const sortedSymbol_t* sortedList,
-                           const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
+                           const U32* rankStart, rankValCol_t *rankValOrigin, const U32 maxWeight,
                            const U32 nbBitsBaseline)
 {
     U32* const rankVal = rankValOrigin[0];

From 038505c41f0aad26ef101f4f7f6e111531c3914f Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Wed, 15 Feb 2023 15:19:17 -0800
Subject: [PATCH 034/411] lib: zstd: Backport fix for in-place decompression

Backport the relevant part of upstream commit 5b266196 [0].

This fixes in-place decompression for x86-64 kernel decompression. It
uses a bound of 131072 + (uncompressed_size >> 8), which can be violated
after upstream commit 6a7ede3d [1], as zstd can use part of the output
buffer as temporary storage, and without this patch needs a bound of
~262144.

The fix is for zstd to detect that the input and output buffers overlap,
so that zstd knows it can't use the overlapping portion of the output
buffer as tempoary storage. If the margin is not large enough, this will
ensure that zstd will fail the decompression, rather than overwriting
part of the input data, and causing corruption.

This fix has been landed upstream and is in release v1.5.4. That commit
also adds unit and fuzz tests to verify that the margin we use is
respected, and correct. That means that the fix is well tested upstream.

I have not been able to reproduce the potential bug in x86-64 kernel
decompression locally, nor have I recieved reports of failures to
decompress the kernel. It is possible that compression saves enough
space to make it very hard for the issue to appear.

I've boot tested the zstd compressed kernel on x86-64 and i386 with this
patch, which uses in-place decompression, and sanity tested zstd compression
in btrfs / squashfs to make sure that we don't see any issues, but other
uses of zstd shouldn't be affected, because they don't use in-place
decompression.

Thanks to Vasily Gorbik <gor@linux.ibm.com> for debugging a related issue
on s390, which was triggered by the same commit, but was a bug in how
__decompress() was called [2]. And to Sasha Levin <sashal@kernel.org>
for the CC alerting me of the issue.

[0] https://github.com/facebook/zstd/commit/5b266196a41e6a15e21bd4f0eeab43b938db1d90
[1] https://github.com/facebook/zstd/commit/6a7ede3dfccbf3e0a5928b4224a039c260dcff72
[2] https://lore.kernel.org/r/patch-1.thread-41c676.git-41c676c2d153.your-ad-here.call-01675030179-ext-9637@work.hours

CC: Vasily Gorbik <gor@linux.ibm.com>
CC: Heiko Carstens <hca@linux.ibm.com>
CC: Sasha Levin <sashal@kernel.org>
CC: Yann Collet <cyan@fb.com>
Signed-off-by: Nick Terrell <terrelln@fb.com>
---
 lib/zstd/decompress/zstd_decompress.c | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/lib/zstd/decompress/zstd_decompress.c b/lib/zstd/decompress/zstd_decompress.c
index b9b935a9f5c0d..6b3177c947114 100644
--- a/lib/zstd/decompress/zstd_decompress.c
+++ b/lib/zstd/decompress/zstd_decompress.c
@@ -798,7 +798,7 @@ static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
         if (srcSize == 0) return 0;
         RETURN_ERROR(dstBuffer_null, "");
     }
-    ZSTD_memcpy(dst, src, srcSize);
+    ZSTD_memmove(dst, src, srcSize);
     return srcSize;
 }
 
@@ -858,6 +858,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
 
     /* Loop on each block */
     while (1) {
+        BYTE* oBlockEnd = oend;
         size_t decodedSize;
         blockProperties_t blockProperties;
         size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties);
@@ -867,16 +868,34 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
         remainingSrcSize -= ZSTD_blockHeaderSize;
         RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, "");
 
+        if (ip >= op && ip < oBlockEnd) {
+            /* We are decompressing in-place. Limit the output pointer so that we
+             * don't overwrite the block that we are currently reading. This will
+             * fail decompression if the input & output pointers aren't spaced
+             * far enough apart.
+             *
+             * This is important to set, even when the pointers are far enough
+             * apart, because ZSTD_decompressBlock_internal() can decide to store
+             * literals in the output buffer, after the block it is decompressing.
+             * Since we don't want anything to overwrite our input, we have to tell
+             * ZSTD_decompressBlock_internal to never write past ip.
+             *
+             * See ZSTD_allocateLiteralsBuffer() for reference.
+             */
+            oBlockEnd = op + (ip - op);
+        }
+
         switch(blockProperties.blockType)
         {
         case bt_compressed:
-            decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1, not_streaming);
+            decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, /* frame */ 1, not_streaming);
             break;
         case bt_raw :
+            /* Use oend instead of oBlockEnd because this function is safe to overlap. It uses memmove. */
             decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize);
             break;
         case bt_rle :
-            decodedSize = ZSTD_setRleBlock(op, (size_t)(oend-op), *ip, blockProperties.origSize);
+            decodedSize = ZSTD_setRleBlock(op, (size_t)(oBlockEnd-op), *ip, blockProperties.origSize);
             break;
         case bt_reserved :
         default:

From 6906598f1ce93761716d780b6e3f171e13f0f4ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Sun, 29 Jan 2023 14:14:36 +0100
Subject: [PATCH 035/411] zstd: Fix definition of assert()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

assert(x) should emit a warning if x is false. WARN_ON(x) emits a
warning if x is true. Thus, assert(x) should be defined as WARN_ON(!x)
rather than WARN_ON(x).

Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Signed-off-by: Nick Terrell <terrelln@fb.com>
---
 lib/zstd/common/zstd_deps.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/zstd/common/zstd_deps.h b/lib/zstd/common/zstd_deps.h
index 7a5bf44839c9c..f06df065dec01 100644
--- a/lib/zstd/common/zstd_deps.h
+++ b/lib/zstd/common/zstd_deps.h
@@ -84,7 +84,7 @@ static uint64_t ZSTD_div64(uint64_t dividend, uint32_t divisor) {
 
 #include <linux/kernel.h>
 
-#define assert(x) WARN_ON((x))
+#define assert(x) WARN_ON(!(x))
 
 #endif /* ZSTD_DEPS_ASSERT */
 #endif /* ZSTD_DEPS_NEED_ASSERT */

From 2da789cda462bda93679f53ee38f9aa2309d47e8 Mon Sep 17 00:00:00 2001
From: Guillaume Tucker <guillaume.tucker@collabora.com>
Date: Sat, 4 Feb 2023 14:34:54 +0100
Subject: [PATCH 036/411] selftests: amd-pstate: fix TEST_FILES

Bring back the Python scripts that were initially added with
TEST_GEN_FILES but now with TEST_FILES to avoid having them deleted
when doing a clean.  Also fix the way the architecture is being
determined as they should also be installed when ARCH=x86_64 is
provided explicitly.  Then also append extra files to TEST_FILES and
TEST_PROGS with += so they don't get discarded.

Fixes: ba2d788aa873 ("selftests: amd-pstate: Trigger tbench benchmark and test cpus")
Fixes: a49fb7218ed8 ("selftests: amd-pstate: Don't delete source files via Makefile")
Signed-off-by: Guillaume Tucker <guillaume.tucker@collabora.com>
Acked-by: Huang Rui <ray.huang@amd.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/amd-pstate/Makefile | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/amd-pstate/Makefile b/tools/testing/selftests/amd-pstate/Makefile
index 5fd1424db37d8..c382f579fe94a 100644
--- a/tools/testing/selftests/amd-pstate/Makefile
+++ b/tools/testing/selftests/amd-pstate/Makefile
@@ -4,10 +4,15 @@
 # No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
 all:
 
-uname_M := $(shell uname -m 2>/dev/null || echo not)
-ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+ARCH ?= $(shell uname -m 2>/dev/null || echo not)
+ARCH := $(shell echo $(ARCH) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
 
-TEST_PROGS := run.sh
-TEST_FILES := basic.sh tbench.sh gitsource.sh
+ifeq (x86,$(ARCH))
+TEST_FILES += ../../../power/x86/amd_pstate_tracer/amd_pstate_trace.py
+TEST_FILES += ../../../power/x86/intel_pstate_tracer/intel_pstate_tracer.py
+endif
+
+TEST_PROGS += run.sh
+TEST_FILES += basic.sh tbench.sh gitsource.sh
 
 include ../lib.mk

From ced0f245ed951e2b8bd68f79c15238d7dd253662 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 6 Mar 2023 11:14:50 +0100
Subject: [PATCH 037/411] kallsyms: add kallsyms_seqs_of_names to list of
 special symbols

My randconfig build setup ran into another kallsyms warning:

Inconsistent kallsyms data
Try make KALLSYMS_EXTRA_PASS=1 as a workaround

After adding some debugging code to kallsyms.c, I saw that the recently
added kallsyms_seqs_of_names symbol can sometimes cause the second stage
table to be slightly longer than the first stage, which makes the
build inconsistent.

Add it to the exception table that contains all other kallsyms-generated
symbols.

Fixes: 60443c88f3a8 ("kallsyms: Improve the performance of kallsyms_lookup_name()")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Zhen Lei <thunder.leizhen@huawei.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/kallsyms.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index 8a68179a98a39..a239a87e7bec1 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -119,6 +119,7 @@ static bool is_ignored_symbol(const char *name, char type)
 		"kallsyms_markers",
 		"kallsyms_token_table",
 		"kallsyms_token_index",
+		"kallsyms_seqs_of_names",
 		/* Exclude linker generated symbols which vary between passes */
 		"_SDA_BASE_",		/* ppc */
 		"_SDA2_BASE_",		/* ppc */

From 6b0313c2fa3d2cf991c9ffef6fae6e7ef592ce6d Mon Sep 17 00:00:00 2001
From: Shawn Guo <shawn.guo@linaro.org>
Date: Sat, 4 Mar 2023 15:41:07 +0800
Subject: [PATCH 038/411] cpuidle: psci: Iterate backwards over list in
 psci_pd_remove()

In case that psci_pd_init_topology() fails for some reason,
psci_pd_remove() will be responsible for deleting provider and removing
genpd from psci_pd_providers list.  There will be a failure when removing
the cluster PD, because the cpu (child) PDs haven't been removed.

[    0.050232] CPUidle PSCI: init PM domain cpu0
[    0.050278] CPUidle PSCI: init PM domain cpu1
[    0.050329] CPUidle PSCI: init PM domain cpu2
[    0.050370] CPUidle PSCI: init PM domain cpu3
[    0.050422] CPUidle PSCI: init PM domain cpu-cluster0
[    0.050475] PM: genpd_remove: unable to remove cpu-cluster0
[    0.051412] PM: genpd_remove: removed cpu3
[    0.051449] PM: genpd_remove: removed cpu2
[    0.051499] PM: genpd_remove: removed cpu1
[    0.051546] PM: genpd_remove: removed cpu0

Fix the problem by iterating the provider list reversely, so that parent
PD gets removed after child's PDs like below.

[    0.029052] CPUidle PSCI: init PM domain cpu0
[    0.029076] CPUidle PSCI: init PM domain cpu1
[    0.029103] CPUidle PSCI: init PM domain cpu2
[    0.029124] CPUidle PSCI: init PM domain cpu3
[    0.029151] CPUidle PSCI: init PM domain cpu-cluster0
[    0.029647] PM: genpd_remove: removed cpu0
[    0.029666] PM: genpd_remove: removed cpu1
[    0.029690] PM: genpd_remove: removed cpu2
[    0.029714] PM: genpd_remove: removed cpu3
[    0.029738] PM: genpd_remove: removed cpu-cluster0

Fixes: a65a397f2451 ("cpuidle: psci: Add support for PM domains by using genpd")
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Shawn Guo <shawn.guo@linaro.org>
Cc: 5.10+ <stable@vger.kernel.org> # 5.10+
Signed-off-by: Rafael J. Wysocki <rjw@rjwysocki.net>
---
 drivers/cpuidle/cpuidle-psci-domain.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c
index 6ad2954948a5a..11316c3b14ca4 100644
--- a/drivers/cpuidle/cpuidle-psci-domain.c
+++ b/drivers/cpuidle/cpuidle-psci-domain.c
@@ -106,7 +106,8 @@ static void psci_pd_remove(void)
 	struct psci_pd_provider *pd_provider, *it;
 	struct generic_pm_domain *genpd;
 
-	list_for_each_entry_safe(pd_provider, it, &psci_pd_providers, link) {
+	list_for_each_entry_safe_reverse(pd_provider, it,
+					 &psci_pd_providers, link) {
 		of_genpd_del_provider(pd_provider->node);
 
 		genpd = of_genpd_remove_last(pd_provider->node);

From c29b97725c91add5021a3257c14ad1ed32eedf76 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 2 Mar 2023 18:12:12 +0200
Subject: [PATCH 039/411] =?UTF-8?q?ACPI:=20docs:=20enumeration:=20Correct?=
 =?UTF-8?q?=20reference=20to=20the=20I=C2=B2C=20device=20data=20type?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I²C peripheral devices that are connected to the controller are
represented in the Linux kernel as objects of the struct i2c_client.
Fix this in the documentation.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rjw@rjwysocki.net>
---
 Documentation/firmware-guide/acpi/enumeration.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/firmware-guide/acpi/enumeration.rst b/Documentation/firmware-guide/acpi/enumeration.rst
index b9dc0c603f367..56d9913a3370c 100644
--- a/Documentation/firmware-guide/acpi/enumeration.rst
+++ b/Documentation/firmware-guide/acpi/enumeration.rst
@@ -19,7 +19,7 @@ possible we decided to do following:
     platform devices.
 
   - Devices behind real busses where there is a connector resource
-    are represented as struct spi_device or struct i2c_device. Note
+    are represented as struct spi_device or struct i2c_client. Note
     that standard UARTs are not busses so there is no struct uart_device,
     although some of them may be represented by struct serdev_device.
 

From 89b0411481967a2e8c91190a211a359966cfcf4b Mon Sep 17 00:00:00 2001
From: "Chia-Lin Kao (AceLan)" <acelan.kao@canonical.com>
Date: Thu, 2 Mar 2023 17:33:00 +0800
Subject: [PATCH 040/411] ACPI: video: Add backlight=native DMI quirk for Dell
 Vostro 15 3535

Sometimes the system boots up with a acpi_video0 backlight interface
which doesn't work. So add Dell Vostro 15 3535 into the
video_detect_dmi_table to set it to native explicitly.

Signed-off-by: Chia-Lin Kao (AceLan) <acelan.kao@canonical.com>
Signed-off-by: Rafael J. Wysocki <rjw@rjwysocki.net>
---
 drivers/acpi/video_detect.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
index 710ac640267dd..14d6d81e536fe 100644
--- a/drivers/acpi/video_detect.c
+++ b/drivers/acpi/video_detect.c
@@ -716,6 +716,13 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
 		DMI_MATCH(DMI_PRODUCT_NAME, "Dell G15 5515"),
 		},
 	},
+	{
+	 .callback = video_detect_force_native,
+	 .matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+		DMI_MATCH(DMI_PRODUCT_NAME, "Vostro 15 3535"),
+		},
+	},
 
 	/*
 	 * Desktops which falsely report a backlight and which our heuristics

From 5adc409340b1fc82bc1175e602d14ac82ac685e3 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 1 Mar 2023 11:04:34 +0100
Subject: [PATCH 041/411] ACPI: x86: Introduce an
 acpi_quirk_skip_gpio_event_handlers() helper

x86 ACPI boards which ship with only Android as their factory image usually
have pretty broken ACPI tables, relying on everything being hardcoded in
the factory kernel image and often disabling parts of the ACPI enumeration
kernel code to avoid the broken tables causing issues.

Part of this broken ACPI code is that sometimes these boards have _AEI
ACPI GPIO event handlers which are broken.

So far this has been dealt with in the platform/x86/x86-android-tablets.c
module, which contains various workarounds for these devices, by it calling
acpi_gpiochip_free_interrupts() on gpiochip-s with troublesome handlers to
disable the handlers.

But in some cases this is too late, if the handlers are of the edge type
then gpiolib-acpi.c's code will already have run them at boot.
This can cause issues such as GPIOs ending up as owned by "ACPI:OpRegion",
making them unavailable for drivers which actually need them.

Boards with these broken ACPI tables are already listed in
drivers/acpi/x86/utils.c for e.g. acpi_quirk_skip_i2c_client_enumeration().
Extend the quirks mechanism for a new acpi_quirk_skip_gpio_event_handlers()
helper, this re-uses the DMI-ids rather then having to duplicate the same
DMI table in gpiolib-acpi.c .

Also add the new ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS quirk to existing
boards with troublesome ACPI gpio event handlers, so that the current
acpi_gpiochip_free_interrupts() hack can be removed from
x86-android-tablets.c .

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Rafael J. Wysocki <rjw@rjwysocki.net>
---
 drivers/acpi/x86/utils.c    | 24 +++++++++++++++++++++---
 drivers/gpio/gpiolib-acpi.c |  3 +++
 include/acpi/acpi_bus.h     |  5 +++++
 3 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
index e45285d4e62a4..4bf57cce30bbf 100644
--- a/drivers/acpi/x86/utils.c
+++ b/drivers/acpi/x86/utils.c
@@ -251,6 +251,7 @@ bool force_storage_d3(void)
 #define ACPI_QUIRK_UART1_TTY_UART2_SKIP				BIT(1)
 #define ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY			BIT(2)
 #define ACPI_QUIRK_USE_ACPI_AC_AND_BATTERY			BIT(3)
+#define ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS			BIT(4)
 
 static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
 	/*
@@ -286,7 +287,8 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
 		},
 		.driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
 					ACPI_QUIRK_UART1_TTY_UART2_SKIP |
-					ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY),
+					ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY |
+					ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS),
 	},
 	{
 		.matches = {
@@ -294,7 +296,8 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "TF103C"),
 		},
 		.driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
-					ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY),
+					ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY |
+					ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS),
 	},
 	{
 		/* Lenovo Yoga Tablet 2 1050F/L */
@@ -336,7 +339,8 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "M890BAP"),
 		},
 		.driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
-					ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY),
+					ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY |
+					ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS),
 	},
 	{
 		/* Whitelabel (sold as various brands) TM800A550L */
@@ -413,6 +417,20 @@ int acpi_quirk_skip_serdev_enumeration(struct device *controller_parent, bool *s
 	return 0;
 }
 EXPORT_SYMBOL_GPL(acpi_quirk_skip_serdev_enumeration);
+
+bool acpi_quirk_skip_gpio_event_handlers(void)
+{
+	const struct dmi_system_id *dmi_id;
+	long quirks;
+
+	dmi_id = dmi_first_match(acpi_quirk_skip_dmi_ids);
+	if (!dmi_id)
+		return false;
+
+	quirks = (unsigned long)dmi_id->driver_data;
+	return (quirks & ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS);
+}
+EXPORT_SYMBOL_GPL(acpi_quirk_skip_gpio_event_handlers);
 #endif
 
 /* Lists of PMIC ACPI HIDs with an (often better) native charger driver */
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index d8a421ce26a83..31ae0adbb295a 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -536,6 +536,9 @@ void acpi_gpiochip_request_interrupts(struct gpio_chip *chip)
 	if (ACPI_FAILURE(status))
 		return;
 
+	if (acpi_quirk_skip_gpio_event_handlers())
+		return;
+
 	acpi_walk_resources(handle, METHOD_NAME__AEI,
 			    acpi_gpiochip_alloc_event, acpi_gpio);
 
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 0584e9f6e3397..57acb895c0381 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -657,6 +657,7 @@ static inline bool acpi_quirk_skip_acpi_ac_and_battery(void)
 #if IS_ENABLED(CONFIG_X86_ANDROID_TABLETS)
 bool acpi_quirk_skip_i2c_client_enumeration(struct acpi_device *adev);
 int acpi_quirk_skip_serdev_enumeration(struct device *controller_parent, bool *skip);
+bool acpi_quirk_skip_gpio_event_handlers(void);
 #else
 static inline bool acpi_quirk_skip_i2c_client_enumeration(struct acpi_device *adev)
 {
@@ -668,6 +669,10 @@ acpi_quirk_skip_serdev_enumeration(struct device *controller_parent, bool *skip)
 	*skip = false;
 	return 0;
 }
+static inline bool acpi_quirk_skip_gpio_event_handlers(void)
+{
+	return false;
+}
 #endif
 
 #ifdef CONFIG_PM

From a5cb0695c5f0ac2ab0cedf2c1c0d75826cb73448 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 1 Mar 2023 11:04:35 +0100
Subject: [PATCH 042/411] ACPI: x86: Add skip i2c clients quirk for Acer Iconia
 One 7 B1-750

The Acer Iconia One 7 B1-750 is a x86 tablet which ships with Android x86
as factory OS. The Android x86 kernel fork ignores I2C devices described
in the DSDT, except for the PMIC and Audio codecs.

As usual the Acer Iconia One 7 B1-750's DSDT contains a bunch of extra I2C
devices which are not actually there, causing various resource conflicts.
Add an ACPI_QUIRK_SKIP_I2C_CLIENTS quirk for the Acer Iconia One 7 B1-750
to the acpi_quirk_skip_dmi_ids table to woraround this.

The DSDT also contains broken ACPI GPIO event handlers, disable those too.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Rafael J. Wysocki <rjw@rjwysocki.net>
---
 drivers/acpi/x86/utils.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
index 4bf57cce30bbf..b2b0e2701333a 100644
--- a/drivers/acpi/x86/utils.c
+++ b/drivers/acpi/x86/utils.c
@@ -280,6 +280,16 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
 	 *    need the x86-android-tablets module to properly work.
 	 */
 #if IS_ENABLED(CONFIG_X86_ANDROID_TABLETS)
+	{
+		/* Acer Iconia One 7 B1-750 */
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Insyde"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "VESPA2"),
+		},
+		.driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
+					ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY |
+					ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS),
+	},
 	{
 		.matches = {
 			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),

From 1a1e7540cf501dd5c8b57a577a155cdd13c7e202 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Wed, 1 Mar 2023 11:04:36 +0100
Subject: [PATCH 043/411] ACPI: x86: Add skip i2c clients quirk for Lenovo Yoga
 Book X90

The Lenovo Yoga Book X90 is a x86 tablet which ships with Android x86
as factory OS. The Android x86 kernel fork ignores I2C devices described
in the DSDT, except for the PMIC and Audio codecs.

As usual the Lenovo Yoga Book X90's DSDT contains a bunch of extra I2C
devices which are not actually there, causing various resource conflicts.
Add an ACPI_QUIRK_SKIP_I2C_CLIENTS quirk for the Lenovo Yoga Book X90
to the acpi_quirk_skip_dmi_ids table to woraround this.

The DSDT also contains broken ACPI GPIO event handlers, disable those too.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Rafael J. Wysocki <rjw@rjwysocki.net>
---
 drivers/acpi/x86/utils.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/acpi/x86/utils.c b/drivers/acpi/x86/utils.c
index b2b0e2701333a..da5727069d851 100644
--- a/drivers/acpi/x86/utils.c
+++ b/drivers/acpi/x86/utils.c
@@ -300,6 +300,17 @@ static const struct dmi_system_id acpi_quirk_skip_dmi_ids[] = {
 					ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY |
 					ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS),
 	},
+	{
+		/* Lenovo Yoga Book X90F/L */
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Intel Corporation"),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "CHERRYVIEW D1 PLATFORM"),
+			DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "YETI-11"),
+		},
+		.driver_data = (void *)(ACPI_QUIRK_SKIP_I2C_CLIENTS |
+					ACPI_QUIRK_SKIP_ACPI_AC_AND_BATTERY |
+					ACPI_QUIRK_SKIP_GPIO_EVENT_HANDLERS),
+	},
 	{
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),

From a659e35ca0af2765f567bdfdccfa247eff0cdab8 Mon Sep 17 00:00:00 2001
From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Date: Tue, 7 Mar 2023 11:39:11 +0200
Subject: [PATCH 044/411] ASoC: SOF: Intel: MTL: Fix the device description

Add the missing ops_free callback.

Fixes: 064520e8aeaa ("ASoC: SOF: Intel: Add support for MeteorLake (MTL)")

Signed-off-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Link: https://lore.kernel.org/r/20230307093914.25409-2-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/intel/pci-mtl.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/soc/sof/intel/pci-mtl.c b/sound/soc/sof/intel/pci-mtl.c
index 6e4e6d4ef5a56..b183dc0014b4b 100644
--- a/sound/soc/sof/intel/pci-mtl.c
+++ b/sound/soc/sof/intel/pci-mtl.c
@@ -46,6 +46,7 @@ static const struct sof_dev_desc mtl_desc = {
 	.nocodec_tplg_filename = "sof-mtl-nocodec.tplg",
 	.ops = &sof_mtl_ops,
 	.ops_init = sof_mtl_ops_init,
+	.ops_free = hda_ops_free,
 };
 
 /* PCI IDs */

From 9eb2b4cac223095d2079a6d52b8bbddc6e064288 Mon Sep 17 00:00:00 2001
From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Date: Tue, 7 Mar 2023 11:39:12 +0200
Subject: [PATCH 045/411] ASoC: SOF: Intel: HDA: Fix device description

Add the missing ops_free callback for APL/CNL/CML/JSL/TGL/EHL platforms.

Fixes: 1da51943725f ("ASoC: SOF: Intel: hda: init NHLT for IPC4")

Signed-off-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Link: https://lore.kernel.org/r/20230307093914.25409-3-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/intel/pci-apl.c | 1 +
 sound/soc/sof/intel/pci-cnl.c | 2 ++
 sound/soc/sof/intel/pci-icl.c | 1 +
 sound/soc/sof/intel/pci-tgl.c | 5 +++++
 4 files changed, 9 insertions(+)

diff --git a/sound/soc/sof/intel/pci-apl.c b/sound/soc/sof/intel/pci-apl.c
index 69279dcc92dc1..aff6cb573c270 100644
--- a/sound/soc/sof/intel/pci-apl.c
+++ b/sound/soc/sof/intel/pci-apl.c
@@ -78,6 +78,7 @@ static const struct sof_dev_desc glk_desc = {
 	.nocodec_tplg_filename = "sof-glk-nocodec.tplg",
 	.ops = &sof_apl_ops,
 	.ops_init = sof_apl_ops_init,
+	.ops_free = hda_ops_free,
 };
 
 /* PCI IDs */
diff --git a/sound/soc/sof/intel/pci-cnl.c b/sound/soc/sof/intel/pci-cnl.c
index 8db3f8d15b55e..4c0c1c369dcd8 100644
--- a/sound/soc/sof/intel/pci-cnl.c
+++ b/sound/soc/sof/intel/pci-cnl.c
@@ -48,6 +48,7 @@ static const struct sof_dev_desc cnl_desc = {
 	.nocodec_tplg_filename = "sof-cnl-nocodec.tplg",
 	.ops = &sof_cnl_ops,
 	.ops_init = sof_cnl_ops_init,
+	.ops_free = hda_ops_free,
 };
 
 static const struct sof_dev_desc cfl_desc = {
@@ -111,6 +112,7 @@ static const struct sof_dev_desc cml_desc = {
 	.nocodec_tplg_filename = "sof-cnl-nocodec.tplg",
 	.ops = &sof_cnl_ops,
 	.ops_init = sof_cnl_ops_init,
+	.ops_free = hda_ops_free,
 };
 
 /* PCI IDs */
diff --git a/sound/soc/sof/intel/pci-icl.c b/sound/soc/sof/intel/pci-icl.c
index d6cf75e357dbf..6785669113b3c 100644
--- a/sound/soc/sof/intel/pci-icl.c
+++ b/sound/soc/sof/intel/pci-icl.c
@@ -79,6 +79,7 @@ static const struct sof_dev_desc jsl_desc = {
 	.nocodec_tplg_filename = "sof-jsl-nocodec.tplg",
 	.ops = &sof_cnl_ops,
 	.ops_init = sof_cnl_ops_init,
+	.ops_free = hda_ops_free,
 };
 
 /* PCI IDs */
diff --git a/sound/soc/sof/intel/pci-tgl.c b/sound/soc/sof/intel/pci-tgl.c
index e80c4dfef85a5..adc7314a1b578 100644
--- a/sound/soc/sof/intel/pci-tgl.c
+++ b/sound/soc/sof/intel/pci-tgl.c
@@ -48,6 +48,7 @@ static const struct sof_dev_desc tgl_desc = {
 	.nocodec_tplg_filename = "sof-tgl-nocodec.tplg",
 	.ops = &sof_tgl_ops,
 	.ops_init = sof_tgl_ops_init,
+	.ops_free = hda_ops_free,
 };
 
 static const struct sof_dev_desc tglh_desc = {
@@ -110,6 +111,7 @@ static const struct sof_dev_desc ehl_desc = {
 	.nocodec_tplg_filename = "sof-ehl-nocodec.tplg",
 	.ops = &sof_tgl_ops,
 	.ops_init = sof_tgl_ops_init,
+	.ops_free = hda_ops_free,
 };
 
 static const struct sof_dev_desc adls_desc = {
@@ -141,6 +143,7 @@ static const struct sof_dev_desc adls_desc = {
 	.nocodec_tplg_filename = "sof-adl-nocodec.tplg",
 	.ops = &sof_tgl_ops,
 	.ops_init = sof_tgl_ops_init,
+	.ops_free = hda_ops_free,
 };
 
 static const struct sof_dev_desc adl_desc = {
@@ -172,6 +175,7 @@ static const struct sof_dev_desc adl_desc = {
 	.nocodec_tplg_filename = "sof-adl-nocodec.tplg",
 	.ops = &sof_tgl_ops,
 	.ops_init = sof_tgl_ops_init,
+	.ops_free = hda_ops_free,
 };
 
 static const struct sof_dev_desc adl_n_desc = {
@@ -203,6 +207,7 @@ static const struct sof_dev_desc adl_n_desc = {
 	.nocodec_tplg_filename = "sof-adl-nocodec.tplg",
 	.ops = &sof_tgl_ops,
 	.ops_init = sof_tgl_ops_init,
+	.ops_free = hda_ops_free,
 };
 
 static const struct sof_dev_desc rpls_desc = {

From 1f320bdb29b644a2c9fb301a6fb2d6170e6417e9 Mon Sep 17 00:00:00 2001
From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Date: Tue, 7 Mar 2023 11:39:13 +0200
Subject: [PATCH 046/411] ASoC: SOF: Intel: SKL: Fix device description

Add missing ops_free callback for SKL/KBL platforms.

Fixes: 52d7939d10f2 ("ASoC: SOF: Intel: add ops for SKL/KBL")

Signed-off-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Link: https://lore.kernel.org/r/20230307093914.25409-4-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/intel/pci-skl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/soc/sof/intel/pci-skl.c b/sound/soc/sof/intel/pci-skl.c
index 3a99dc444f92e..5b4bccf819658 100644
--- a/sound/soc/sof/intel/pci-skl.c
+++ b/sound/soc/sof/intel/pci-skl.c
@@ -38,6 +38,7 @@ static struct sof_dev_desc skl_desc = {
 	.nocodec_tplg_filename = "sof-skl-nocodec.tplg",
 	.ops = &sof_skl_ops,
 	.ops_init = sof_skl_ops_init,
+	.ops_free = hda_ops_free,
 };
 
 static struct sof_dev_desc kbl_desc = {
@@ -61,6 +62,7 @@ static struct sof_dev_desc kbl_desc = {
 	.nocodec_tplg_filename = "sof-kbl-nocodec.tplg",
 	.ops = &sof_skl_ops,
 	.ops_init = sof_skl_ops_init,
+	.ops_free = hda_ops_free,
 };
 
 /* PCI IDs */

From 376f79bbf521fc37b871b536276319951b5bef3a Mon Sep 17 00:00:00 2001
From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Date: Tue, 7 Mar 2023 11:39:14 +0200
Subject: [PATCH 047/411] ASOC: SOF: Intel: pci-tgl: Fix device description

Add the missing ops_free callback.

Fixes: 63d375b9f2a9 ("ASoC: SOF: Intel: pci-tgl: use RPL specific firmware definitions")

Signed-off-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Link: https://lore.kernel.org/r/20230307093914.25409-5-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/intel/pci-tgl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/soc/sof/intel/pci-tgl.c b/sound/soc/sof/intel/pci-tgl.c
index adc7314a1b578..22e769e0831d9 100644
--- a/sound/soc/sof/intel/pci-tgl.c
+++ b/sound/soc/sof/intel/pci-tgl.c
@@ -239,6 +239,7 @@ static const struct sof_dev_desc rpls_desc = {
 	.nocodec_tplg_filename = "sof-rpl-nocodec.tplg",
 	.ops = &sof_tgl_ops,
 	.ops_init = sof_tgl_ops_init,
+	.ops_free = hda_ops_free,
 };
 
 static const struct sof_dev_desc rpl_desc = {
@@ -270,6 +271,7 @@ static const struct sof_dev_desc rpl_desc = {
 	.nocodec_tplg_filename = "sof-rpl-nocodec.tplg",
 	.ops = &sof_tgl_ops,
 	.ops_init = sof_tgl_ops_init,
+	.ops_free = hda_ops_free,
 };
 
 /* PCI IDs */

From 989a3e4479177d0f4afab8be1960731bc0ffbbd0 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Date: Tue, 7 Mar 2023 13:49:17 +0200
Subject: [PATCH 048/411] ASoC: SOF: ipc3: Check for upper size limit for the
 received message

The sof_ipc3_rx_msg() checks for minimum size of a new rx message but it is
missing the check for upper limit.
Corrupted or compromised firmware might be able to take advantage of this
to cause out of bounds reads outside of the message area.

Reported-by: Curtis Malainey <cujomalainey@chromium.org>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Curtis Malainey <curtis@malainey.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Link: https://lore.kernel.org/r/20230307114917.5124-1-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/ipc3.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sound/soc/sof/ipc3.c b/sound/soc/sof/ipc3.c
index 3de64ea2dc9aa..4493bbd7faf12 100644
--- a/sound/soc/sof/ipc3.c
+++ b/sound/soc/sof/ipc3.c
@@ -970,8 +970,9 @@ static void sof_ipc3_rx_msg(struct snd_sof_dev *sdev)
 		return;
 	}
 
-	if (hdr.size < sizeof(hdr)) {
-		dev_err(sdev->dev, "The received message size is invalid\n");
+	if (hdr.size < sizeof(hdr) || hdr.size > SOF_IPC_MSG_MAX_SIZE) {
+		dev_err(sdev->dev, "The received message size is invalid: %u\n",
+			hdr.size);
 		return;
 	}
 

From 9e269e3aa9006440de639597079ee7140ef5b5f3 Mon Sep 17 00:00:00 2001
From: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
Date: Tue, 7 Mar 2023 13:07:51 +0200
Subject: [PATCH 049/411] ASoC: SOF: ipc4-topology: Fix incorrect sample rate
 print unit

This patch fixes the sample rate print unit from KHz to Hz.
E.g. 48000KHz becomes 48000Hz.

Signed-off-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Link: https://lore.kernel.org/r/20230307110751.2053-1-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/ipc4-topology.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c
index 3e27c7a48ebd3..dc44ba2ec71c2 100644
--- a/sound/soc/sof/ipc4-topology.c
+++ b/sound/soc/sof/ipc4-topology.c
@@ -155,7 +155,7 @@ static void sof_ipc4_dbg_audio_format(struct device *dev,
 	for (i = 0; i < num_format; i++, ptr = (u8 *)ptr + object_size) {
 		fmt = ptr;
 		dev_dbg(dev,
-			" #%d: %uKHz, %ubit (ch_map %#x ch_cfg %u interleaving_style %u fmt_cfg %#x)\n",
+			" #%d: %uHz, %ubit (ch_map %#x ch_cfg %u interleaving_style %u fmt_cfg %#x)\n",
 			i, fmt->sampling_frequency, fmt->bit_depth, fmt->ch_map,
 			fmt->ch_cfg, fmt->interleaving_style, fmt->fmt_cfg);
 	}

From 858a438a6cf919e5727d2a0f5f3f0e68b2d5354e Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Tue, 7 Mar 2023 12:07:33 +0200
Subject: [PATCH 050/411] ASoC: Intel: soc-acpi: fix copy-paste issue in
 topology names
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For some reason the convention for topology names was not followed and
the name inspired by another unrelated hardware configuration. As a
result, the kernel will request a non-existent topology file.

Link: https://github.com/thesofproject/sof/pull/6878
Fixes: 2ec8b081d59f ("ASoC: Intel: soc-acpi: Add entry for sof_es8336 in ADL match table")
Cc: stable@vger.kernel.org
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Link: https://lore.kernel.org/r/20230307100733.15025-1-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/intel/common/soc-acpi-intel-adl-match.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/intel/common/soc-acpi-intel-adl-match.c b/sound/soc/intel/common/soc-acpi-intel-adl-match.c
index 56ee5fef66a8b..28dd2046e4ac5 100644
--- a/sound/soc/intel/common/soc-acpi-intel-adl-match.c
+++ b/sound/soc/intel/common/soc-acpi-intel-adl-match.c
@@ -559,7 +559,7 @@ struct snd_soc_acpi_mach snd_soc_acpi_intel_adl_machines[] = {
 	{
 		.comp_ids = &essx_83x6,
 		.drv_name = "sof-essx8336",
-		.sof_tplg_filename = "sof-adl-es83x6", /* the tplg suffix is added at run time */
+		.sof_tplg_filename = "sof-adl-es8336", /* the tplg suffix is added at run time */
 		.tplg_quirk_mask = SND_SOC_ACPI_TPLG_INTEL_SSP_NUMBER |
 					SND_SOC_ACPI_TPLG_INTEL_SSP_MSB |
 					SND_SOC_ACPI_TPLG_INTEL_DMIC_NUMBER,

From 6ba8ddf86a3ada463e9952a19b069f978a70a748 Mon Sep 17 00:00:00 2001
From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Date: Tue, 7 Mar 2023 13:48:15 +0200
Subject: [PATCH 051/411] ASoC: SOF: topology: Fix error handling in
 sof_widget_ready()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the error paths in sof_widget_ready() to free all allocated memory
and prevent memory leaks.

Signed-off-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Link: https://lore.kernel.org/r/20230307114815.4909-1-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/topology.c | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/sound/soc/sof/topology.c b/sound/soc/sof/topology.c
index 4a62ccc71fcbf..9f3a038fe21ad 100644
--- a/sound/soc/sof/topology.c
+++ b/sound/soc/sof/topology.c
@@ -1388,14 +1388,15 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index,
 	if (ret < 0) {
 		dev_err(scomp->dev, "failed to parse component pin tokens for %s\n",
 			w->name);
-		return ret;
+		goto widget_free;
 	}
 
 	if (swidget->num_sink_pins > SOF_WIDGET_MAX_NUM_PINS ||
 	    swidget->num_source_pins > SOF_WIDGET_MAX_NUM_PINS) {
 		dev_err(scomp->dev, "invalid pins for %s: [sink: %d, src: %d]\n",
 			swidget->widget->name, swidget->num_sink_pins, swidget->num_source_pins);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto widget_free;
 	}
 
 	if (swidget->num_sink_pins > 1) {
@@ -1404,7 +1405,7 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index,
 		if (ret < 0) {
 			dev_err(scomp->dev, "failed to parse sink pin binding for %s\n",
 				w->name);
-			return ret;
+			goto widget_free;
 		}
 	}
 
@@ -1414,7 +1415,7 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index,
 		if (ret < 0) {
 			dev_err(scomp->dev, "failed to parse source pin binding for %s\n",
 				w->name);
-			return ret;
+			goto widget_free;
 		}
 	}
 
@@ -1436,9 +1437,8 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index,
 	case snd_soc_dapm_dai_out:
 		dai = kzalloc(sizeof(*dai), GFP_KERNEL);
 		if (!dai) {
-			kfree(swidget);
-			return -ENOMEM;
-
+			ret = -ENOMEM;
+			goto widget_free;
 		}
 
 		ret = sof_widget_parse_tokens(scomp, swidget, tw, token_list, token_list_size);
@@ -1496,8 +1496,7 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index,
 			tw->shift, swidget->id, tw->name,
 			strnlen(tw->sname, SNDRV_CTL_ELEM_ID_NAME_MAXLEN) > 0
 				? tw->sname : "none");
-		kfree(swidget);
-		return ret;
+		goto widget_free;
 	}
 
 	if (sof_debug_check_flag(SOF_DBG_DISABLE_MULTICORE)) {
@@ -1518,10 +1517,7 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index,
 			if (ret) {
 				dev_err(scomp->dev, "widget event binding failed for %s\n",
 					swidget->widget->name);
-				kfree(swidget->private);
-				kfree(swidget->tuples);
-				kfree(swidget);
-				return ret;
+				goto free;
 			}
 		}
 	}
@@ -1532,10 +1528,8 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index,
 
 		spipe = kzalloc(sizeof(*spipe), GFP_KERNEL);
 		if (!spipe) {
-			kfree(swidget->private);
-			kfree(swidget->tuples);
-			kfree(swidget);
-			return -ENOMEM;
+			ret = -ENOMEM;
+			goto free;
 		}
 
 		spipe->pipe_widget = swidget;
@@ -1546,6 +1540,12 @@ static int sof_widget_ready(struct snd_soc_component *scomp, int index,
 	w->dobj.private = swidget;
 	list_add(&swidget->list, &sdev->widget_list);
 	return ret;
+free:
+	kfree(swidget->private);
+	kfree(swidget->tuples);
+widget_free:
+	kfree(swidget);
+	return ret;
 }
 
 static int sof_route_unload(struct snd_soc_component *scomp,

From ca09e2a351fbc7836ba9418304ff0c3e72addfe0 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Tue, 7 Mar 2023 11:53:41 +0200
Subject: [PATCH 052/411] ASoC: SOF: Intel: pci-tng: revert invalid bar size
 setting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The logic for the ioremap is to find the resource index 3 (IRAM) and
infer the BAR address by subtracting the IRAM offset. The BAR size
defined in hardware specifications is 2MB.

The commit 5947b2726beb6 ("ASoC: SOF: Intel: Check the bar size before
remapping") tried to find the BAR size by querying the resource length
instead of a pre-canned value, but by requesting the size for index 3
it only gets the size of the IRAM. That's obviously wrong and prevents
the probe from proceeding.

This commit attempted to fix an issue in a fuzzing/simulated
environment but created another on actual devices, so the best course
of action is to revert that change.

Reported-by: Ferry Toth <fntoth@gmail.com>
Tested-by: Ferry Toth <fntoth@gmail.com> (Intel Edison-Arduino)
Link: https://github.com/thesofproject/linux/issues/3901
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Link: https://lore.kernel.org/r/20230307095341.3222-1-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/intel/pci-tng.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/sound/soc/sof/intel/pci-tng.c b/sound/soc/sof/intel/pci-tng.c
index 5b2b409752c58..8c22a00266c06 100644
--- a/sound/soc/sof/intel/pci-tng.c
+++ b/sound/soc/sof/intel/pci-tng.c
@@ -75,11 +75,7 @@ static int tangier_pci_probe(struct snd_sof_dev *sdev)
 
 	/* LPE base */
 	base = pci_resource_start(pci, desc->resindex_lpe_base) - IRAM_OFFSET;
-	size = pci_resource_len(pci, desc->resindex_lpe_base);
-	if (size < PCI_BAR_SIZE) {
-		dev_err(sdev->dev, "error: I/O region is too small.\n");
-		return -ENODEV;
-	}
+	size = PCI_BAR_SIZE;
 
 	dev_dbg(sdev->dev, "LPE PHY base at 0x%x size 0x%x", base, size);
 	sdev->bar[DSP_BAR] = devm_ioremap(sdev->dev, base, size);

From b66bfc3a9810caed5d55dd8907110bdc8028b06b Mon Sep 17 00:00:00 2001
From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Date: Tue, 7 Mar 2023 13:46:39 +0200
Subject: [PATCH 053/411] ASoC: SOF: sof-audio: Fix broken early bclk feature
 for SSP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With the removal of widget setup during BE hw_params, the DAI config IPC
is never sent with the SOF_DAI_CONFIG_FLAGS_HW_PARAMS. This means that
the early bit clock feature required for certain codecs will be broken.

Fix this by saving the config flags sent during BE DAI hw_params and
reusing it when the DAI_CONFIG IPC is sent after the DAI widget is set
up. Also, free the DAI config before the widget is freed.

The DAI_CONFIG IPC sent during the sof_widget_free() does not have the
DAI index information. So, save the dai_index in the config during
hw_params and reuse it during hw_free.

For IPC4, do not clear the node ID during hw_free. It will be needed for
freeing the group_ida during unprepare.

Signed-off-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Rander Wang <rander.wang@intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Link: https://lore.kernel.org/r/20230307114639.4553-1-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/ipc3-topology.c | 32 ++++++++++++++++++++++++++++++--
 sound/soc/sof/ipc4-topology.c | 15 +++++++++++++--
 sound/soc/sof/sof-audio.c     | 28 +++++++++++++++++++++++++---
 3 files changed, 68 insertions(+), 7 deletions(-)

diff --git a/sound/soc/sof/ipc3-topology.c b/sound/soc/sof/ipc3-topology.c
index dceb78bfe17c6..b1f425b39db94 100644
--- a/sound/soc/sof/ipc3-topology.c
+++ b/sound/soc/sof/ipc3-topology.c
@@ -2081,7 +2081,9 @@ static int sof_ipc3_dai_config(struct snd_sof_dev *sdev, struct snd_sof_widget *
 		break;
 	case SOF_DAI_INTEL_ALH:
 		if (data) {
-			config->dai_index = data->dai_index;
+			/* save the dai_index during hw_params and reuse it for hw_free */
+			if (flags & SOF_DAI_CONFIG_FLAGS_HW_PARAMS)
+				config->dai_index = data->dai_index;
 			config->alh.stream_id = data->dai_data;
 		}
 		break;
@@ -2089,7 +2091,30 @@ static int sof_ipc3_dai_config(struct snd_sof_dev *sdev, struct snd_sof_widget *
 		break;
 	}
 
-	config->flags = flags;
+	/*
+	 * The dai_config op is invoked several times and the flags argument varies as below:
+	 * BE DAI hw_params: When the op is invoked during the BE DAI hw_params, flags contains
+	 * SOF_DAI_CONFIG_FLAGS_HW_PARAMS along with quirks
+	 * FE DAI hw_params: When invoked during FE DAI hw_params after the DAI widget has
+	 * just been set up in the DSP, flags is set to SOF_DAI_CONFIG_FLAGS_HW_PARAMS with no
+	 * quirks
+	 * BE DAI trigger: When invoked during the BE DAI trigger, flags is set to
+	 * SOF_DAI_CONFIG_FLAGS_PAUSE and contains no quirks
+	 * BE DAI hw_free: When invoked during the BE DAI hw_free, flags is set to
+	 * SOF_DAI_CONFIG_FLAGS_HW_FREE and contains no quirks
+	 * FE DAI hw_free: When invoked during the FE DAI hw_free, flags is set to
+	 * SOF_DAI_CONFIG_FLAGS_HW_FREE and contains no quirks
+	 *
+	 * The DAI_CONFIG IPC is sent to the DSP, only after the widget is set up during the FE
+	 * DAI hw_params. But since the BE DAI hw_params precedes the FE DAI hw_params, the quirks
+	 * need to be preserved when assigning the flags before sending the IPC.
+	 * For the case of PAUSE/HW_FREE, since there are no quirks, flags can be used as is.
+	 */
+
+	if (flags & SOF_DAI_CONFIG_FLAGS_HW_PARAMS)
+		config->flags |= flags;
+	else
+		config->flags = flags;
 
 	/* only send the IPC if the widget is set up in the DSP */
 	if (swidget->use_count > 0) {
@@ -2097,6 +2122,9 @@ static int sof_ipc3_dai_config(struct snd_sof_dev *sdev, struct snd_sof_widget *
 					 &reply, sizeof(reply));
 		if (ret < 0)
 			dev_err(sdev->dev, "Failed to set dai config for %s\n", dai->name);
+
+		/* clear the flags once the IPC has been sent even if it fails */
+		config->flags = SOF_DAI_CONFIG_FLAGS_NONE;
 	}
 
 	return ret;
diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c
index dc44ba2ec71c2..ae02cc152f879 100644
--- a/sound/soc/sof/ipc4-topology.c
+++ b/sound/soc/sof/ipc4-topology.c
@@ -980,6 +980,7 @@ static void sof_ipc4_unprepare_copier_module(struct snd_sof_widget *swidget)
 
 		ipc4_copier = dai->private;
 		if (ipc4_copier->dai_type == SOF_DAI_INTEL_ALH) {
+			struct sof_ipc4_copier_data *copier_data = &ipc4_copier->data;
 			struct sof_ipc4_alh_configuration_blob *blob;
 			unsigned int group_id;
 
@@ -989,6 +990,9 @@ static void sof_ipc4_unprepare_copier_module(struct snd_sof_widget *swidget)
 					   ALH_MULTI_GTW_BASE;
 				ida_free(&alh_group_ida, group_id);
 			}
+
+			/* clear the node ID */
+			copier_data->gtw_cfg.node_id &= ~SOF_IPC4_NODE_INDEX_MASK;
 		}
 	}
 
@@ -1940,8 +1944,15 @@ static int sof_ipc4_dai_config(struct snd_sof_dev *sdev, struct snd_sof_widget *
 		pipeline->skip_during_fe_trigger = true;
 		fallthrough;
 	case SOF_DAI_INTEL_ALH:
-		copier_data->gtw_cfg.node_id &= ~SOF_IPC4_NODE_INDEX_MASK;
-		copier_data->gtw_cfg.node_id |= SOF_IPC4_NODE_INDEX(data->dai_data);
+		/*
+		 * Do not clear the node ID when this op is invoked with
+		 * SOF_DAI_CONFIG_FLAGS_HW_FREE. It is needed to free the group_ida during
+		 * unprepare.
+		 */
+		if (flags & SOF_DAI_CONFIG_FLAGS_HW_PARAMS) {
+			copier_data->gtw_cfg.node_id &= ~SOF_IPC4_NODE_INDEX_MASK;
+			copier_data->gtw_cfg.node_id |= SOF_IPC4_NODE_INDEX(data->dai_data);
+		}
 		break;
 	case SOF_DAI_INTEL_DMIC:
 	case SOF_DAI_INTEL_SSP:
diff --git a/sound/soc/sof/sof-audio.c b/sound/soc/sof/sof-audio.c
index 760621bfc8028..d7df29f2ada85 100644
--- a/sound/soc/sof/sof-audio.c
+++ b/sound/soc/sof/sof-audio.c
@@ -50,9 +50,27 @@ static int sof_widget_free_unlocked(struct snd_sof_dev *sdev,
 	/* reset route setup status for all routes that contain this widget */
 	sof_reset_route_setup_status(sdev, swidget);
 
+	/* free DAI config and continue to free widget even if it fails */
+	if (WIDGET_IS_DAI(swidget->id)) {
+		struct snd_sof_dai_config_data data;
+		unsigned int flags = SOF_DAI_CONFIG_FLAGS_HW_FREE;
+
+		data.dai_data = DMA_CHAN_INVALID;
+
+		if (tplg_ops && tplg_ops->dai_config) {
+			err = tplg_ops->dai_config(sdev, swidget, flags, &data);
+			if (err < 0)
+				dev_err(sdev->dev, "failed to free config for widget %s\n",
+					swidget->widget->name);
+		}
+	}
+
 	/* continue to disable core even if IPC fails */
-	if (tplg_ops && tplg_ops->widget_free)
-		err = tplg_ops->widget_free(sdev, swidget);
+	if (tplg_ops && tplg_ops->widget_free) {
+		ret = tplg_ops->widget_free(sdev, swidget);
+		if (ret < 0 && !err)
+			err = ret;
+	}
 
 	/*
 	 * disable widget core. continue to route setup status and complete flag
@@ -151,8 +169,12 @@ static int sof_widget_setup_unlocked(struct snd_sof_dev *sdev,
 
 	/* send config for DAI components */
 	if (WIDGET_IS_DAI(swidget->id)) {
-		unsigned int flags = SOF_DAI_CONFIG_FLAGS_NONE;
+		unsigned int flags = SOF_DAI_CONFIG_FLAGS_HW_PARAMS;
 
+		/*
+		 * The config flags saved during BE DAI hw_params will be used for IPC3. IPC4 does
+		 * not use the flags argument.
+		 */
 		if (tplg_ops && tplg_ops->dai_config) {
 			ret = tplg_ops->dai_config(sdev, swidget, flags, NULL);
 			if (ret < 0)

From c99e48f4ce9b986ab7992ec7283a06dae875f668 Mon Sep 17 00:00:00 2001
From: Jaska Uimonen <jaska.uimonen@linux.intel.com>
Date: Tue, 7 Mar 2023 13:07:30 +0200
Subject: [PATCH 054/411] ASoC: SOF: ipc4-topology: set dmic dai index from
 copier
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Dmic dai index was set incorrectly to bits 5-7, when it is actually using
just the lowest 3. Fix the macro for setting the bits.

Fixes: aa84ffb72158 ("ASoC: SOF: ipc4-topology: Add support for SSP/DMIC DAI's")
Signed-off-by: Jaska Uimonen <jaska.uimonen@linux.intel.com>
Reviewed-by: Adrian Bonislawski <adrian.bonislawski@intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Link: https://lore.kernel.org/r/20230307110730.1995-1-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/ipc4-topology.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/soc/sof/ipc4-topology.h b/sound/soc/sof/ipc4-topology.h
index 72529179ac223..c0e457f7f51ad 100644
--- a/sound/soc/sof/ipc4-topology.h
+++ b/sound/soc/sof/ipc4-topology.h
@@ -46,7 +46,7 @@
 #define SOF_IPC4_NODE_INDEX_INTEL_SSP(x) (((x) & 0xf) << 4)
 
 /* Node ID for DMIC type DAI copiers */
-#define SOF_IPC4_NODE_INDEX_INTEL_DMIC(x) (((x) & 0x7) << 5)
+#define SOF_IPC4_NODE_INDEX_INTEL_DMIC(x) ((x) & 0x7)
 
 #define SOF_IPC4_GAIN_ALL_CHANNELS_MASK 0xffffffff
 #define SOF_IPC4_VOL_ZERO_DB	0x7fffffff

From 52a55779ed14792a150421339664193d6eb8e036 Mon Sep 17 00:00:00 2001
From: Rander Wang <rander.wang@intel.com>
Date: Tue, 7 Mar 2023 11:54:52 +0200
Subject: [PATCH 055/411] ASoC: SOF: Intel: hda-dsp: harden D0i3 programming
 sequence
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add delay between set and wait command according to hardware programming
sequence. Also add debug log to detect error.

Signed-off-by: Rander Wang <rander.wang@intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Link: https://lore.kernel.org/r/20230307095453.3719-1-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/intel/hda-dsp.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/sound/soc/sof/intel/hda-dsp.c b/sound/soc/sof/intel/hda-dsp.c
index 68eb06f13a1fd..a6f2822401e03 100644
--- a/sound/soc/sof/intel/hda-dsp.c
+++ b/sound/soc/sof/intel/hda-dsp.c
@@ -392,6 +392,12 @@ static int hda_dsp_update_d0i3c_register(struct snd_sof_dev *sdev, u8 value)
 	snd_sof_dsp_update8(sdev, HDA_DSP_HDA_BAR, chip->d0i3_offset,
 			    SOF_HDA_VS_D0I3C_I3, value);
 
+	/*
+	 * The value written to the D0I3C::I3 bit may not be taken into account immediately.
+	 * A delay is recommended before checking if D0I3C::CIP is cleared
+	 */
+	usleep_range(30, 40);
+
 	/* Wait for cmd in progress to be cleared before exiting the function */
 	ret = hda_dsp_wait_d0i3c_done(sdev);
 	if (ret < 0) {
@@ -400,6 +406,12 @@ static int hda_dsp_update_d0i3c_register(struct snd_sof_dev *sdev, u8 value)
 	}
 
 	reg = snd_sof_dsp_read8(sdev, HDA_DSP_HDA_BAR, chip->d0i3_offset);
+	/* Confirm d0i3 state changed with paranoia check */
+	if ((reg ^ value) & SOF_HDA_VS_D0I3C_I3) {
+		dev_err(sdev->dev, "failed to update D0I3C!\n");
+		return -EIO;
+	}
+
 	trace_sof_intel_D0I3C_updated(sdev, reg);
 
 	return 0;

From 8bac40b8ed17ab1be9133e9620f65fae80262b7e Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Tue, 7 Mar 2023 11:54:12 +0200
Subject: [PATCH 056/411] ASoC: SOF: Intel: hda-ctrl: re-add sleep after
 entering and exiting reset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit a09d82ce0a867 ("ASoC: SOF: Intel: hda-ctrl: remove
useless sleep")

It was a mistake to remove those delays, in light of comments in the
HDaudio spec captured in snd_hdac_bus_reset_link() that the codec
needs time for its initialization and PLL lock.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Reviewed-by: Rander Wang <rander.wang@intel.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Link: https://lore.kernel.org/r/20230307095412.3416-1-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/intel/hda-ctrl.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sound/soc/sof/intel/hda-ctrl.c b/sound/soc/sof/intel/hda-ctrl.c
index 3aea36c077c9d..f3bdeba284122 100644
--- a/sound/soc/sof/intel/hda-ctrl.c
+++ b/sound/soc/sof/intel/hda-ctrl.c
@@ -196,12 +196,15 @@ int hda_dsp_ctrl_init_chip(struct snd_sof_dev *sdev)
 		goto err;
 	}
 
+	usleep_range(500, 1000);
+
 	/* exit HDA controller reset */
 	ret = hda_dsp_ctrl_link_reset(sdev, false);
 	if (ret < 0) {
 		dev_err(sdev->dev, "error: failed to exit HDA controller reset\n");
 		goto err;
 	}
+	usleep_range(1000, 1200);
 
 	hda_codec_detect_mask(sdev);
 

From c7e328f1cbf22efe23bc3cd7dd6bb14efccc28d0 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Tue, 7 Mar 2023 13:46:59 +0200
Subject: [PATCH 057/411] ASoC: SOF: sof-audio: don't squelch errors in
 WIDGET_SETUP phase

When an IPC error happens while setting-up a widget during the FE
hw_params phase, the existing logic will unwind all previous
configurations but will overwrite the return status. The ALSA/ASoC
logic will then proceed with the prepare and trigger phases, even
though the firmware resources are not available.

Fix by returning the initial error code and ignoring the code returned
in the UNPREPARE phase.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Reviewed-by: Chao Song <chao.song@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Link: https://lore.kernel.org/r/20230307114659.4614-1-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/sof-audio.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/soc/sof/sof-audio.c b/sound/soc/sof/sof-audio.c
index d7df29f2ada85..6de388a8d0b8d 100644
--- a/sound/soc/sof/sof-audio.c
+++ b/sound/soc/sof/sof-audio.c
@@ -610,8 +610,8 @@ int sof_widget_list_setup(struct snd_sof_dev *sdev, struct snd_sof_pcm *spcm,
 	ret = sof_walk_widgets_in_order(sdev, spcm, fe_params, platform_params,
 					dir, SOF_WIDGET_SETUP);
 	if (ret < 0) {
-		ret = sof_walk_widgets_in_order(sdev, spcm, fe_params, platform_params,
-						dir, SOF_WIDGET_UNPREPARE);
+		sof_walk_widgets_in_order(sdev, spcm, fe_params, platform_params,
+					  dir, SOF_WIDGET_UNPREPARE);
 		return ret;
 	}
 

From e45cd86c3a78bfb9875a5eb8ab5dab459b59bbe2 Mon Sep 17 00:00:00 2001
From: Rander Wang <rander.wang@intel.com>
Date: Tue, 7 Mar 2023 13:06:56 +0200
Subject: [PATCH 058/411] ASoC: SOF: IPC4: update gain ipc msg definition to
 align with fw
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Recent firmware changes modified the curve duration from 32 to 64 bits,
which breaks volume ramps. A simple solution would be to change the
definition, but unfortunately the ASoC topology framework only supports
up to 32 bit tokens.

This patch suggests breaking the 64 bit value in low and high parts, with
only the low-part extracted from topology and high-part only zeroes. Since
the curve duration is represented in hundred of nanoseconds, we can still
represent a 400s ramp, which is just fine. The defacto ABI change has no
effect on existing users since the IPC4 firmware has not been released just
yet.

Link: https://github.com/thesofproject/linux/issues/4026

Signed-off-by: Rander Wang <rander.wang@intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Reviewed-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
Link: https://lore.kernel.org/r/20230307110656.1816-1-peter.ujfalusi@linux.intel.com
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/sof/ipc4-control.c  | 3 ++-
 sound/soc/sof/ipc4-topology.c | 4 ++--
 sound/soc/sof/ipc4-topology.h | 6 ++++--
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/sound/soc/sof/ipc4-control.c b/sound/soc/sof/ipc4-control.c
index 67bd2233fd9a6..9a71af1a613ac 100644
--- a/sound/soc/sof/ipc4-control.c
+++ b/sound/soc/sof/ipc4-control.c
@@ -97,7 +97,8 @@ sof_ipc4_set_volume_data(struct snd_sof_dev *sdev, struct snd_sof_widget *swidge
 		}
 
 		/* set curve type and duration from topology */
-		data.curve_duration = gain->data.curve_duration;
+		data.curve_duration_l = gain->data.curve_duration_l;
+		data.curve_duration_h = gain->data.curve_duration_h;
 		data.curve_type = gain->data.curve_type;
 
 		msg->data_ptr = &data;
diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c
index ae02cc152f879..a623707c8ffc7 100644
--- a/sound/soc/sof/ipc4-topology.c
+++ b/sound/soc/sof/ipc4-topology.c
@@ -107,7 +107,7 @@ static const struct sof_topology_token gain_tokens[] = {
 		get_token_u32, offsetof(struct sof_ipc4_gain_data, curve_type)},
 	{SOF_TKN_GAIN_RAMP_DURATION,
 		SND_SOC_TPLG_TUPLE_TYPE_WORD, get_token_u32,
-		offsetof(struct sof_ipc4_gain_data, curve_duration)},
+		offsetof(struct sof_ipc4_gain_data, curve_duration_l)},
 	{SOF_TKN_GAIN_VAL, SND_SOC_TPLG_TUPLE_TYPE_WORD,
 		get_token_u32, offsetof(struct sof_ipc4_gain_data, init_val)},
 };
@@ -692,7 +692,7 @@ static int sof_ipc4_widget_setup_comp_pga(struct snd_sof_widget *swidget)
 
 	dev_dbg(scomp->dev,
 		"pga widget %s: ramp type: %d, ramp duration %d, initial gain value: %#x, cpc %d\n",
-		swidget->widget->name, gain->data.curve_type, gain->data.curve_duration,
+		swidget->widget->name, gain->data.curve_type, gain->data.curve_duration_l,
 		gain->data.init_val, gain->base_config.cpc);
 
 	ret = sof_ipc4_widget_setup_msg(swidget, &gain->msg);
diff --git a/sound/soc/sof/ipc4-topology.h b/sound/soc/sof/ipc4-topology.h
index c0e457f7f51ad..123f1096f3261 100644
--- a/sound/soc/sof/ipc4-topology.h
+++ b/sound/soc/sof/ipc4-topology.h
@@ -277,14 +277,16 @@ struct sof_ipc4_control_data {
  * @init_val: Initial value
  * @curve_type: Curve type
  * @reserved: reserved for future use
- * @curve_duration: Curve duration
+ * @curve_duration_l: Curve duration low part
+ * @curve_duration_h: Curve duration high part
  */
 struct sof_ipc4_gain_data {
 	uint32_t channels;
 	uint32_t init_val;
 	uint32_t curve_type;
 	uint32_t reserved;
-	uint32_t curve_duration;
+	uint32_t curve_duration_l;
+	uint32_t curve_duration_h;
 } __aligned(8);
 
 /**

From c22f2ff8724b49dce2ae797e9fbf4bc0fa91112f Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 6 Mar 2023 11:32:42 +0100
Subject: [PATCH 059/411] drm/sun4i: fix missing component unbind on bind
 errors

Make sure to unbind all subcomponents when binding the aggregate device
fails.

Fixes: 9026e0d122ac ("drm: Add Allwinner A10 Display Engine support")
Cc: stable@vger.kernel.org      # 4.7
Cc: Maxime Ripard <mripard@kernel.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Signed-off-by: Maxime Ripard <maxime@cerno.tech>
Link: https://patchwork.freedesktop.org/patch/msgid/20230306103242.4775-1-johan+linaro@kernel.org
---
 drivers/gpu/drm/sun4i/sun4i_drv.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c
index cc94efbbf2d4e..d6c741716167a 100644
--- a/drivers/gpu/drm/sun4i/sun4i_drv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.c
@@ -95,12 +95,12 @@ static int sun4i_drv_bind(struct device *dev)
 	/* drm_vblank_init calls kcalloc, which can fail */
 	ret = drm_vblank_init(drm, drm->mode_config.num_crtc);
 	if (ret)
-		goto cleanup_mode_config;
+		goto unbind_all;
 
 	/* Remove early framebuffers (ie. simplefb) */
 	ret = drm_aperture_remove_framebuffers(false, &sun4i_drv_driver);
 	if (ret)
-		goto cleanup_mode_config;
+		goto unbind_all;
 
 	sun4i_framebuffer_init(drm);
 
@@ -119,6 +119,8 @@ static int sun4i_drv_bind(struct device *dev)
 
 finish_poll:
 	drm_kms_helper_poll_fini(drm);
+unbind_all:
+	component_unbind_all(dev, NULL);
 cleanup_mode_config:
 	drm_mode_config_cleanup(drm);
 	of_reserved_mem_device_release(dev);

From 5d89176af1ae201c01c10a89b68b27cfc683b76c Mon Sep 17 00:00:00 2001
From: Song Shuai <suagrfillet@gmail.com>
Date: Mon, 27 Feb 2023 18:59:41 +0800
Subject: [PATCH 060/411] sched/doc: supplement CPU capacity with RISC-V

This commit 7d2078310cbf ("dt-bindings: arm: move cpu-capacity to a
shared loation") updates some references about capacity-dmips-mhz
property in this document.

The list of architectures using capacity-dmips-mhz omits RISC-V, so
supplements it here.

Signed-off-by: Song Shuai <suagrfillet@gmail.com>
Reviewed-by: Palmer Dabbelt <palmer@rivosinc.com> # English
Acked-by: Palmer Dabbelt <palmer@rivosinc.com>
Reviewed-by: Alex Shi <alexs@kernel.org>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20230227105941.2749193-1-suagrfillet@gmail.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/scheduler/sched-capacity.rst                    | 2 +-
 Documentation/translations/zh_CN/scheduler/sched-capacity.rst | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/scheduler/sched-capacity.rst b/Documentation/scheduler/sched-capacity.rst
index 8e2b8538bc2b7..e2c1cf7431588 100644
--- a/Documentation/scheduler/sched-capacity.rst
+++ b/Documentation/scheduler/sched-capacity.rst
@@ -258,7 +258,7 @@ Linux cannot currently figure out CPU capacity on its own, this information thus
 needs to be handed to it. Architectures must define arch_scale_cpu_capacity()
 for that purpose.
 
-The arm and arm64 architectures directly map this to the arch_topology driver
+The arm, arm64, and RISC-V architectures directly map this to the arch_topology driver
 CPU scaling data, which is derived from the capacity-dmips-mhz CPU binding; see
 Documentation/devicetree/bindings/cpu/cpu-capacity.txt.
 
diff --git a/Documentation/translations/zh_CN/scheduler/sched-capacity.rst b/Documentation/translations/zh_CN/scheduler/sched-capacity.rst
index e07ffdd391d32..8cba135dcd1a6 100644
--- a/Documentation/translations/zh_CN/scheduler/sched-capacity.rst
+++ b/Documentation/translations/zh_CN/scheduler/sched-capacity.rst
@@ -231,7 +231,7 @@ CFS调度类基于实体负载跟踪机制（Per-Entity Load Tracking, PELT）
 当前，Linux无法凭自身算出CPU算力，因此必须要有把这个信息传递给Linux的方式。每个架构必须为此
 定义arch_scale_cpu_capacity()函数。
 
-arm和arm64架构直接把这个信息映射到arch_topology驱动的CPU scaling数据中（译注：参考
+arm、arm64和RISC-V架构直接把这个信息映射到arch_topology驱动的CPU scaling数据中（译注：参考
 arch_topology.h的percpu变量cpu_scale），它是从capacity-dmips-mhz CPU binding中衍生计算
 出来的。参见Documentation/devicetree/bindings/cpu/cpu-capacity.txt。
 

From 74596085796fae0cfce3e42ee46bf4f8acbdac55 Mon Sep 17 00:00:00 2001
From: Glenn Washburn <development@efficientek.com>
Date: Mon, 27 Feb 2023 12:40:42 -0600
Subject: [PATCH 061/411] docs: Correct missing "d_" prefix for
 dentry_operations member d_weak_revalidate

The details for struct dentry_operations member d_weak_revalidate is
missing a "d_" prefix.

Fixes: af96c1e304f7 ("docs: filesystems: vfs: Convert vfs.txt to RST")
Signed-off-by: Glenn Washburn <development@efficientek.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Link: https://lore.kernel.org/r/20230227184042.2375235-1-development@efficientek.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/vfs.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
index c53f30251a660..f3b344f0c0a4b 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -1222,7 +1222,7 @@ defined:
 	return
 	-ECHILD and it will be called again in ref-walk mode.
 
-``_weak_revalidate``
+``d_weak_revalidate``
 	called when the VFS needs to revalidate a "jumped" dentry.  This
 	is called when a path-walk ends at dentry that was not acquired
 	by doing a lookup in the parent directory.  This includes "/",

From 38484a1d0c50596c8080a00c269466f60fa4a051 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Mon, 6 Mar 2023 20:17:11 +0100
Subject: [PATCH 062/411] docs: programming-language: remove mention of the
 Intel compiler

The Intel compiler support has been removed in commit 95207db8166a
("Remove Intel compiler support").

Thus remove its mention in the Documentation too.

Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
Reviewed-by: Vincenzo Palazzo <vincenzopalazzodev@gmail.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Link: https://lore.kernel.org/r/20230306191712.230658-1-ojeda@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/process/programming-language.rst | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/Documentation/process/programming-language.rst b/Documentation/process/programming-language.rst
index 5fc9160ca1fa5..10dc772671d84 100644
--- a/Documentation/process/programming-language.rst
+++ b/Documentation/process/programming-language.rst
@@ -12,10 +12,6 @@ under ``-std=gnu11`` [gcc-c-dialect-options]_: the GNU dialect of ISO C11.
 This dialect contains many extensions to the language [gnu-extensions]_,
 and many of them are used within the kernel as a matter of course.
 
-There is some support for compiling the kernel with ``icc`` [icc]_ for several
-of the architectures, although at the time of writing it is not completed,
-requiring third-party patches.
-
 Attributes
 ----------
 
@@ -38,7 +34,6 @@ Please refer to ``include/linux/compiler_attributes.h`` for more information.
 .. [c-language] http://www.open-std.org/jtc1/sc22/wg14/www/standards
 .. [gcc] https://gcc.gnu.org
 .. [clang] https://clang.llvm.org
-.. [icc] https://software.intel.com/en-us/c-compilers
 .. [gcc-c-dialect-options] https://gcc.gnu.org/onlinedocs/gcc/C-Dialect-Options.html
 .. [gnu-extensions] https://gcc.gnu.org/onlinedocs/gcc/C-Extensions.html
 .. [gcc-attribute-syntax] https://gcc.gnu.org/onlinedocs/gcc/Attribute-Syntax.html

From 0b02076f995332fe1e457da29dd61c3b66c862f7 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Mon, 6 Mar 2023 20:17:12 +0100
Subject: [PATCH 063/411] docs: programming-language: add Rust programming
 language section

Following the C text in the file, add a mention about the Rust
programming language, the currently supported compiler and
the edition used (similar to the "dialect" mention for C).

Similarly, add a mention about the unstable features used (similar
to the "extensions" mentions for C).

In addition, add some links to complement the information.

Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
Link: https://lore.kernel.org/r/20230306191712.230658-2-ojeda@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 .../process/programming-language.rst          | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/Documentation/process/programming-language.rst b/Documentation/process/programming-language.rst
index 10dc772671d84..bc56dee6d0bcb 100644
--- a/Documentation/process/programming-language.rst
+++ b/Documentation/process/programming-language.rst
@@ -31,6 +31,20 @@ in order to feature detect which ones can be used and/or to shorten the code.
 
 Please refer to ``include/linux/compiler_attributes.h`` for more information.
 
+Rust
+----
+
+The kernel has experimental support for the Rust programming language
+[rust-language]_ under ``CONFIG_RUST``. It is compiled with ``rustc`` [rustc]_
+under ``--edition=2021`` [rust-editions]_. Editions are a way to introduce
+small changes to the language that are not backwards compatible.
+
+On top of that, some unstable features [rust-unstable-features]_ are used in
+the kernel. Unstable features may change in the future, thus it is an important
+goal to reach a point where only stable features are used.
+
+Please refer to Documentation/rust/index.rst for more information.
+
 .. [c-language] http://www.open-std.org/jtc1/sc22/wg14/www/standards
 .. [gcc] https://gcc.gnu.org
 .. [clang] https://clang.llvm.org
@@ -38,4 +52,7 @@ Please refer to ``include/linux/compiler_attributes.h`` for more information.
 .. [gnu-extensions] https://gcc.gnu.org/onlinedocs/gcc/C-Extensions.html
 .. [gcc-attribute-syntax] https://gcc.gnu.org/onlinedocs/gcc/Attribute-Syntax.html
 .. [n2049] http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2049.pdf
-
+.. [rust-language] https://www.rust-lang.org
+.. [rustc] https://doc.rust-lang.org/rustc/
+.. [rust-editions] https://doc.rust-lang.org/edition-guide/editions/
+.. [rust-unstable-features] https://github.com/Rust-for-Linux/linux/issues/2

From a414684e3b735a4114c19295a07e8cb2eb889dae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <uwe@kleine-koenig.org>
Date: Tue, 28 Feb 2023 14:46:57 +0100
Subject: [PATCH 064/411] docs: rebasing-and-merging: Drop wrong statement
 about git
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The "^0" syntax is no longer needed to fast-forward to a mainline commit;
take that out and add --ff-only to force an error if fast-forward is not
possible.

Signed-off-by: Uwe Kleine-König <uwe@kleine-koenig.org>
[jc: rewrote changelog]
Link: https://lore.kernel.org/r/20230228134657.1797871-1-u.kleine-koenig@pengutronix.de
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/maintainer/rebasing-and-merging.rst | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/Documentation/maintainer/rebasing-and-merging.rst b/Documentation/maintainer/rebasing-and-merging.rst
index 09f988e7fa719..85800ce95ae5f 100644
--- a/Documentation/maintainer/rebasing-and-merging.rst
+++ b/Documentation/maintainer/rebasing-and-merging.rst
@@ -213,11 +213,7 @@ point rather than some random spot.  If your upstream-bound branch has
 emptied entirely into the mainline during the merge window, you can pull it
 forward with a command like::
 
-  git merge v5.2-rc1^0
-
-The "^0" will cause Git to do a fast-forward merge (which should be
-possible in this situation), thus avoiding the addition of a spurious merge
-commit.
+  git merge --ff-only v5.2-rc1
 
 The guidelines laid out above are just that: guidelines.  There will always
 be situations that call out for a different solution, and these guidelines

From 9a9a8fe26751334b7739193a94eba741073b8a55 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Hellstr=C3=B6m?= <thomas.hellstrom@linux.intel.com>
Date: Tue, 7 Mar 2023 15:46:15 +0100
Subject: [PATCH 065/411] drm/ttm: Fix a NULL pointer dereference
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The LRU mechanism may look up a resource in the process of being removed
from an object. The locking rules here are a bit unclear but it looks
currently like res->bo assignment is protected by the LRU lock, whereas
bo->resource is protected by the object lock, while *clearing* of
bo->resource is also protected by the LRU lock. This means that if
we check that bo->resource points to the LRU resource under the LRU
lock we should be safe.
So perform that check before deciding to swap out a bo. That avoids
dereferencing a NULL bo->resource in ttm_bo_swapout().

Fixes: 6a9b02899402 ("drm/ttm: move the LRU into resource handling v4")
Cc: Christian König <christian.koenig@amd.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Christian Koenig <christian.koenig@amd.com>
Cc: Huang Rui <ray.huang@amd.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: Felix Kuehling <Felix.Kuehling@amd.com>
Cc: Philip Yang <Philip.Yang@amd.com>
Cc: Qiang Yu <qiang.yu@amd.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Nirmoy Das <nirmoy.das@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: "Thomas Hellström" <thomas.hellstrom@linux.intel.com>
Cc: Anshuman Gupta <anshuman.gupta@intel.com>
Cc: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam@amd.com>
Cc: dri-devel@lists.freedesktop.org
Cc: <stable@vger.kernel.org> # v5.19+
Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230307144621.10748-2-thomas.hellstrom@linux.intel.com
---
 drivers/gpu/drm/ttm/ttm_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c
index e7147e3046378..b84f74807ca13 100644
--- a/drivers/gpu/drm/ttm/ttm_device.c
+++ b/drivers/gpu/drm/ttm/ttm_device.c
@@ -158,7 +158,7 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx,
 			struct ttm_buffer_object *bo = res->bo;
 			uint32_t num_pages;
 
-			if (!bo)
+			if (!bo || bo->resource != res)
 				continue;
 
 			num_pages = PFN_UP(bo->base.size);

From 3b80a03d455143cc9135dac86722bbdd079daff3 Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (IBM)" <rppt@kernel.org>
Date: Fri, 24 Feb 2023 12:03:05 +0200
Subject: [PATCH 066/411] docs/mm: Physical Memory: fix a reference to a file
 that doesn't exist

kbuild reports:

>> Warning: Documentation/mm/physical_memory.rst references a file that doesn't exist: Documentation/admin-guide/mm/memory_hotplug.rst

Fix the filename to be 'Documentation/admin-guide/mm/memory-hotplug.rst'.

Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/oe-kbuild-all/202302231311.567PAoS2-lkp@intel.com/
Fixes: 353c7dd636ed ("docs/mm: Physical Memory: remove useless markup")
Signed-off-by: Mike Rapoport (IBM) <rppt@kernel.org>
Link: https://lore.kernel.org/r/20230224100306.2287696-1-rppt@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/mm/physical_memory.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/mm/physical_memory.rst b/Documentation/mm/physical_memory.rst
index f9d7ea4b9dca7..1bc888d36ea13 100644
--- a/Documentation/mm/physical_memory.rst
+++ b/Documentation/mm/physical_memory.rst
@@ -66,7 +66,7 @@ one of the types described below.
   also populated on boot using one of ``kernelcore``, ``movablecore`` and
   ``movable_node`` kernel command line parameters. See
   Documentation/mm/page_migration.rst and
-  Documentation/admin-guide/mm/memory_hotplug.rst for additional details.
+  Documentation/admin-guide/mm/memory-hotplug.rst for additional details.
 
 * ``ZONE_DEVICE`` represents memory residing on devices such as PMEM and GPU.
   It has different characteristics than RAM zone types and it exists to provide

From 87eae260995577d203a70d72d46976521a5687e1 Mon Sep 17 00:00:00 2001
From: "Mike Rapoport (IBM)" <rppt@kernel.org>
Date: Fri, 24 Feb 2023 12:03:06 +0200
Subject: [PATCH 067/411] docs/mm: hugetlbfs_reserv: fix a reference to a file
 that doesn't exist

kbuild reports:

>> Warning: Documentation/mm/hugetlbfs_reserv.rst references a file that doesn't exist: Documentation/mm/hugetlbpage.rst
>> Warning: Documentation/translations/zh_CN/mm/hugetlbfs_reserv.rst references a file that doesn't exist: Documentation/mm/hugetlbpage.rst

Fix the filename to be 'Documentation/admin-guide/mm/hugetlbpage.rst'.

Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/oe-kbuild-all/202302231854.sKlCmx9K-lkp@intel.com/
Fixes: ee86588960e2 ("docs/mm: remove useless markup")
Signed-off-by: Mike Rapoport (IBM) <rppt@kernel.org>
Link: https://lore.kernel.org/r/20230224100306.2287696-2-rppt@kernel.org
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/mm/hugetlbfs_reserv.rst                    | 8 ++++----
 Documentation/translations/zh_CN/mm/hugetlbfs_reserv.rst | 3 ++-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/Documentation/mm/hugetlbfs_reserv.rst b/Documentation/mm/hugetlbfs_reserv.rst
index 3d05d64de9b46..d9c2b0f01dcd0 100644
--- a/Documentation/mm/hugetlbfs_reserv.rst
+++ b/Documentation/mm/hugetlbfs_reserv.rst
@@ -5,10 +5,10 @@ Hugetlbfs Reservation
 Overview
 ========
 
-Huge pages as described at Documentation/mm/hugetlbpage.rst are typically
-preallocated for application use.  These huge pages are instantiated in a
-task's address space at page fault time if the VMA indicates huge pages are
-to be used.  If no huge page exists at page fault time, the task is sent
+Huge pages as described at Documentation/admin-guide/mm/hugetlbpage.rst are
+typically preallocated for application use.  These huge pages are instantiated
+in a task's address space at page fault time if the VMA indicates huge pages
+are to be used.  If no huge page exists at page fault time, the task is sent
 a SIGBUS and often dies an unhappy death.  Shortly after huge page support
 was added, it was determined that it would be better to detect a shortage
 of huge pages at mmap() time.  The idea is that if there were not enough
diff --git a/Documentation/translations/zh_CN/mm/hugetlbfs_reserv.rst b/Documentation/translations/zh_CN/mm/hugetlbfs_reserv.rst
index c1fa35315d8b2..b7a0544224ad1 100644
--- a/Documentation/translations/zh_CN/mm/hugetlbfs_reserv.rst
+++ b/Documentation/translations/zh_CN/mm/hugetlbfs_reserv.rst
@@ -15,7 +15,8 @@ Hugetlbfs 预留
 概述
 ====
 
-Documentation/mm/hugetlbpage.rst 中描述的巨页通常是预先分配给应用程序使用的。如果VMA指
+Documentation/admin-guide/mm/hugetlbpage.rst
+中描述的巨页通常是预先分配给应用程序使用的 。如果VMA指
 示要使用巨页，这些巨页会在缺页异常时被实例化到任务的地址空间。如果在缺页异常
 时没有巨页存在，任务就会被发送一个SIGBUS，并经常不高兴地死去。在加入巨页支
 持后不久，人们决定，在mmap()时检测巨页的短缺情况会更好。这个想法是，如果

From a5904f415e1af72fa8fe6665aa4f554dc2099a95 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 6 Mar 2023 08:56:29 +0100
Subject: [PATCH 068/411] interconnect: fix mem leak when freeing nodes

The node link array is allocated when adding links to a node but is not
deallocated when nodes are destroyed.

Fixes: 11f1ceca7031 ("interconnect: Add generic on-chip interconnect API")
Cc: stable@vger.kernel.org      # 5.1
Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Tested-by: Luca Ceresoli <luca.ceresoli@bootlin.com> # i.MX8MP MSC SM2-MB-EP1 Board
Link: https://lore.kernel.org/r/20230306075651.2449-2-johan+linaro@kernel.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/core.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
index 0f392f59b1353..5217f449eeec2 100644
--- a/drivers/interconnect/core.c
+++ b/drivers/interconnect/core.c
@@ -850,6 +850,10 @@ void icc_node_destroy(int id)
 
 	mutex_unlock(&icc_lock);
 
+	if (!node)
+		return;
+
+	kfree(node->links);
 	kfree(node);
 }
 EXPORT_SYMBOL_GPL(icc_node_destroy);

From e0e7089bf9a87bc5e3997422e4e24563424f9018 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 6 Mar 2023 08:56:30 +0100
Subject: [PATCH 069/411] interconnect: fix icc_provider_del() error handling

The interconnect framework currently expects that providers are only
removed when there are no users and after all nodes have been removed.

There is currently nothing that guarantees this to be the case and the
framework does not do any reference counting, but refusing to remove the
provider is never correct as that would leave a dangling pointer to a
resource that is about to be released in the global provider list (e.g.
accessible through debugfs).

Replace the current sanity checks with WARN_ON() so that the provider is
always removed.

Fixes: 11f1ceca7031 ("interconnect: Add generic on-chip interconnect API")
Cc: stable@vger.kernel.org      # 5.1: 680f8666baf6: interconnect: Make icc_provider_del() return void
Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Tested-by: Luca Ceresoli <luca.ceresoli@bootlin.com> # i.MX8MP MSC SM2-MB-EP1 Board
Link: https://lore.kernel.org/r/20230306075651.2449-3-johan+linaro@kernel.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/core.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
index 5217f449eeec2..cabb6f5df83ef 100644
--- a/drivers/interconnect/core.c
+++ b/drivers/interconnect/core.c
@@ -1065,18 +1065,8 @@ EXPORT_SYMBOL_GPL(icc_provider_add);
 void icc_provider_del(struct icc_provider *provider)
 {
 	mutex_lock(&icc_lock);
-	if (provider->users) {
-		pr_warn("interconnect provider still has %d users\n",
-			provider->users);
-		mutex_unlock(&icc_lock);
-		return;
-	}
-
-	if (!list_empty(&provider->nodes)) {
-		pr_warn("interconnect provider still has nodes\n");
-		mutex_unlock(&icc_lock);
-		return;
-	}
+	WARN_ON(provider->users);
+	WARN_ON(!list_empty(&provider->nodes));
 
 	list_del(&provider->provider_list);
 	mutex_unlock(&icc_lock);

From eb59eca0d8ac15f8c1b7f1cd35999455a90292c0 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 6 Mar 2023 08:56:31 +0100
Subject: [PATCH 070/411] interconnect: fix provider registration API

The current interconnect provider interface is inherently racy as
providers are expected to be added before being fully initialised.

Specifically, nodes are currently not added and the provider data is not
initialised until after registering the provider which can cause racing
DT lookups to fail.

Add a new provider API which will be used to fix up the interconnect
drivers.

The old API is reimplemented using the new interface and will be removed
once all drivers have been fixed.

Fixes: 11f1ceca7031 ("interconnect: Add generic on-chip interconnect API")
Fixes: 87e3031b6fbd ("interconnect: Allow endpoints translation via DT")
Cc: stable@vger.kernel.org      # 5.1
Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Tested-by: Luca Ceresoli <luca.ceresoli@bootlin.com> # i.MX8MP MSC SM2-MB-EP1 Board
Link: https://lore.kernel.org/r/20230306075651.2449-4-johan+linaro@kernel.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/core.c           | 52 +++++++++++++++++++--------
 include/linux/interconnect-provider.h | 12 +++++++
 2 files changed, 50 insertions(+), 14 deletions(-)

diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c
index cabb6f5df83ef..7a24c1444ace3 100644
--- a/drivers/interconnect/core.c
+++ b/drivers/interconnect/core.c
@@ -1033,44 +1033,68 @@ int icc_nodes_remove(struct icc_provider *provider)
 EXPORT_SYMBOL_GPL(icc_nodes_remove);
 
 /**
- * icc_provider_add() - add a new interconnect provider
- * @provider: the interconnect provider that will be added into topology
+ * icc_provider_init() - initialize a new interconnect provider
+ * @provider: the interconnect provider to initialize
+ *
+ * Must be called before adding nodes to the provider.
+ */
+void icc_provider_init(struct icc_provider *provider)
+{
+	WARN_ON(!provider->set);
+
+	INIT_LIST_HEAD(&provider->nodes);
+}
+EXPORT_SYMBOL_GPL(icc_provider_init);
+
+/**
+ * icc_provider_register() - register a new interconnect provider
+ * @provider: the interconnect provider to register
  *
  * Return: 0 on success, or an error code otherwise
  */
-int icc_provider_add(struct icc_provider *provider)
+int icc_provider_register(struct icc_provider *provider)
 {
-	if (WARN_ON(!provider->set))
-		return -EINVAL;
 	if (WARN_ON(!provider->xlate && !provider->xlate_extended))
 		return -EINVAL;
 
 	mutex_lock(&icc_lock);
-
-	INIT_LIST_HEAD(&provider->nodes);
 	list_add_tail(&provider->provider_list, &icc_providers);
-
 	mutex_unlock(&icc_lock);
 
-	dev_dbg(provider->dev, "interconnect provider added to topology\n");
+	dev_dbg(provider->dev, "interconnect provider registered\n");
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(icc_provider_add);
+EXPORT_SYMBOL_GPL(icc_provider_register);
 
 /**
- * icc_provider_del() - delete previously added interconnect provider
- * @provider: the interconnect provider that will be removed from topology
+ * icc_provider_deregister() - deregister an interconnect provider
+ * @provider: the interconnect provider to deregister
  */
-void icc_provider_del(struct icc_provider *provider)
+void icc_provider_deregister(struct icc_provider *provider)
 {
 	mutex_lock(&icc_lock);
 	WARN_ON(provider->users);
-	WARN_ON(!list_empty(&provider->nodes));
 
 	list_del(&provider->provider_list);
 	mutex_unlock(&icc_lock);
 }
+EXPORT_SYMBOL_GPL(icc_provider_deregister);
+
+int icc_provider_add(struct icc_provider *provider)
+{
+	icc_provider_init(provider);
+
+	return icc_provider_register(provider);
+}
+EXPORT_SYMBOL_GPL(icc_provider_add);
+
+void icc_provider_del(struct icc_provider *provider)
+{
+	WARN_ON(!list_empty(&provider->nodes));
+
+	icc_provider_deregister(provider);
+}
 EXPORT_SYMBOL_GPL(icc_provider_del);
 
 static const struct of_device_id __maybe_unused ignore_list[] = {
diff --git a/include/linux/interconnect-provider.h b/include/linux/interconnect-provider.h
index cd5c5a27557f5..d12cd18aab3f4 100644
--- a/include/linux/interconnect-provider.h
+++ b/include/linux/interconnect-provider.h
@@ -122,6 +122,9 @@ int icc_link_destroy(struct icc_node *src, struct icc_node *dst);
 void icc_node_add(struct icc_node *node, struct icc_provider *provider);
 void icc_node_del(struct icc_node *node);
 int icc_nodes_remove(struct icc_provider *provider);
+void icc_provider_init(struct icc_provider *provider);
+int icc_provider_register(struct icc_provider *provider);
+void icc_provider_deregister(struct icc_provider *provider);
 int icc_provider_add(struct icc_provider *provider);
 void icc_provider_del(struct icc_provider *provider);
 struct icc_node_data *of_icc_get_from_provider(struct of_phandle_args *spec);
@@ -167,6 +170,15 @@ static inline int icc_nodes_remove(struct icc_provider *provider)
 	return -ENOTSUPP;
 }
 
+static inline void icc_provider_init(struct icc_provider *provider) { }
+
+static inline int icc_provider_register(struct icc_provider *provider)
+{
+	return -ENOTSUPP;
+}
+
+static inline void icc_provider_deregister(struct icc_provider *provider) { }
+
 static inline int icc_provider_add(struct icc_provider *provider)
 {
 	return -ENOTSUPP;

From 9fbd35520f1f7f3cbe1873939a27ad9b009f21f9 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 6 Mar 2023 08:56:32 +0100
Subject: [PATCH 071/411] interconnect: imx: fix registration race

The current interconnect provider registration interface is inherently
racy as nodes are not added until the after adding the provider. This
can specifically cause racing DT lookups to fail.

Switch to using the new API where the provider is not registered until
after it has been fully initialised.

Fixes: f0d8048525d7 ("interconnect: Add imx core driver")
Cc: stable@vger.kernel.org      # 5.8
Cc: Alexandre Bailon <abailon@baylibre.com>
Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Tested-by: Luca Ceresoli <luca.ceresoli@bootlin.com> # i.MX8MP MSC SM2-MB-EP1 Board
Link: https://lore.kernel.org/r/20230306075651.2449-5-johan+linaro@kernel.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/imx/imx.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/interconnect/imx/imx.c b/drivers/interconnect/imx/imx.c
index 823d9be9771a1..979ed610f704b 100644
--- a/drivers/interconnect/imx/imx.c
+++ b/drivers/interconnect/imx/imx.c
@@ -295,6 +295,9 @@ int imx_icc_register(struct platform_device *pdev,
 	provider->xlate = of_icc_xlate_onecell;
 	provider->data = data;
 	provider->dev = dev->parent;
+
+	icc_provider_init(provider);
+
 	platform_set_drvdata(pdev, imx_provider);
 
 	if (settings) {
@@ -306,20 +309,18 @@ int imx_icc_register(struct platform_device *pdev,
 		}
 	}
 
-	ret = icc_provider_add(provider);
-	if (ret) {
-		dev_err(dev, "error adding interconnect provider: %d\n", ret);
+	ret = imx_icc_register_nodes(imx_provider, nodes, nodes_count, settings);
+	if (ret)
 		return ret;
-	}
 
-	ret = imx_icc_register_nodes(imx_provider, nodes, nodes_count, settings);
+	ret = icc_provider_register(provider);
 	if (ret)
-		goto provider_del;
+		goto err_unregister_nodes;
 
 	return 0;
 
-provider_del:
-	icc_provider_del(provider);
+err_unregister_nodes:
+	imx_icc_unregister_nodes(&imx_provider->provider);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(imx_icc_register);
@@ -328,9 +329,8 @@ void imx_icc_unregister(struct platform_device *pdev)
 {
 	struct imx_icc_provider *imx_provider = platform_get_drvdata(pdev);
 
+	icc_provider_deregister(&imx_provider->provider);
 	imx_icc_unregister_nodes(&imx_provider->provider);
-
-	icc_provider_del(&imx_provider->provider);
 }
 EXPORT_SYMBOL_GPL(imx_icc_unregister);
 

From 174941ed28a3573db075da46d95b4dcf9d4c49c2 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 6 Mar 2023 08:56:33 +0100
Subject: [PATCH 072/411] interconnect: qcom: osm-l3: fix registration race

The current interconnect provider registration interface is inherently
racy as nodes are not added until the after adding the provider. This
can specifically cause racing DT lookups to fail:

	of_icc_xlate_onecell: invalid index 0
	cpu cpu0: error -EINVAL: error finding src node
	cpu cpu0: dev_pm_opp_of_find_icc_paths: Unable to get path0: -22
	qcom-cpufreq-hw: probe of 18591000.cpufreq failed with error -22

Switch to using the new API where the provider is not registered until
after it has been fully initialised.

Fixes: 5bc9900addaf ("interconnect: qcom: Add OSM L3 interconnect provider support")
Cc: stable@vger.kernel.org      # 5.7
Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20230306075651.2449-6-johan+linaro@kernel.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/qcom/osm-l3.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/interconnect/qcom/osm-l3.c b/drivers/interconnect/qcom/osm-l3.c
index 1bc01ff6e02a4..1bafb54f14329 100644
--- a/drivers/interconnect/qcom/osm-l3.c
+++ b/drivers/interconnect/qcom/osm-l3.c
@@ -158,8 +158,8 @@ static int qcom_osm_l3_remove(struct platform_device *pdev)
 {
 	struct qcom_osm_l3_icc_provider *qp = platform_get_drvdata(pdev);
 
+	icc_provider_deregister(&qp->provider);
 	icc_nodes_remove(&qp->provider);
-	icc_provider_del(&qp->provider);
 
 	return 0;
 }
@@ -245,14 +245,9 @@ static int qcom_osm_l3_probe(struct platform_device *pdev)
 	provider->set = qcom_osm_l3_set;
 	provider->aggregate = icc_std_aggregate;
 	provider->xlate = of_icc_xlate_onecell;
-	INIT_LIST_HEAD(&provider->nodes);
 	provider->data = data;
 
-	ret = icc_provider_add(provider);
-	if (ret) {
-		dev_err(&pdev->dev, "error adding interconnect provider\n");
-		return ret;
-	}
+	icc_provider_init(provider);
 
 	for (i = 0; i < num_nodes; i++) {
 		size_t j;
@@ -275,12 +270,15 @@ static int qcom_osm_l3_probe(struct platform_device *pdev)
 	}
 	data->num_nodes = num_nodes;
 
+	ret = icc_provider_register(provider);
+	if (ret)
+		goto err;
+
 	platform_set_drvdata(pdev, qp);
 
 	return 0;
 err:
 	icc_nodes_remove(provider);
-	icc_provider_del(provider);
 
 	return ret;
 }

From bc463201f60803fa6bf2741d59441031cd0910e4 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 6 Mar 2023 08:56:34 +0100
Subject: [PATCH 073/411] interconnect: qcom: rpm: fix probe child-node error
 handling

Make sure to clean up and release resources properly also in case probe
fails when populating child devices.

Fixes: e39bf2972c6e ("interconnect: icc-rpm: Support child NoC device probe")
Cc: stable@vger.kernel.org      # 5.17
Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20230306075651.2449-7-johan+linaro@kernel.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/qcom/icc-rpm.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/interconnect/qcom/icc-rpm.c b/drivers/interconnect/qcom/icc-rpm.c
index df3196f725368..91778cfcbc65d 100644
--- a/drivers/interconnect/qcom/icc-rpm.c
+++ b/drivers/interconnect/qcom/icc-rpm.c
@@ -541,8 +541,11 @@ int qnoc_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, qp);
 
 	/* Populate child NoC devices if any */
-	if (of_get_child_count(dev->of_node) > 0)
-		return of_platform_populate(dev->of_node, NULL, NULL, dev);
+	if (of_get_child_count(dev->of_node) > 0) {
+		ret = of_platform_populate(dev->of_node, NULL, NULL, dev);
+		if (ret)
+			goto err;
+	}
 
 	return 0;
 err:

From 63cf584203f3367c8b073d417c8e5cbbfc450506 Mon Sep 17 00:00:00 2001
From: James Houghton <jthoughton@google.com>
Date: Thu, 2 Mar 2023 22:24:04 +0000
Subject: [PATCH 074/411] mm: teach mincore_hugetlb about pte markers

By checking huge_pte_none(), we incorrectly classify PTE markers as
"present".  Instead, check huge_pte_none_mostly(), classifying PTE markers
the same as if the PTE were completely blank.

PTE markers, unlike other kinds of swap entries, don't reference any
physical page and don't indicate that a physical page was mapped
previously.  As such, treat them as non-present for the sake of mincore().

Link: https://lkml.kernel.org/r/20230302222404.175303-1-jthoughton@google.com
Fixes: 5c041f5d1f23 ("mm: teach core mm about pte markers")
Signed-off-by: James Houghton <jthoughton@google.com>
Acked-by: Peter Xu <peterx@redhat.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: James Houghton <jthoughton@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/mincore.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/mincore.c b/mm/mincore.c
index cd69b9db00812..d359650b0f75b 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -33,7 +33,7 @@ static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr,
 	 * Hugepages under user process are always in RAM and never
 	 * swapped out, but theoretically it needs to be checked.
 	 */
-	present = pte && !huge_pte_none(huge_ptep_get(pte));
+	present = pte && !huge_pte_none_mostly(huge_ptep_get(pte));
 	for (; addr != end; vec++, addr += PAGE_SIZE)
 		*vec = present;
 	walk->private = vec;

From 42b2af2c9b7eede8ef21d0943f84d135e21a32a3 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Thu, 2 Mar 2023 18:54:23 +0100
Subject: [PATCH 075/411] mm/userfaultfd: propagate uffd-wp bit when
 PTE-mapping the huge zeropage

Currently, we'd lose the userfaultfd-wp marker when PTE-mapping a huge
zeropage, resulting in the next write faults in the PMD range not
triggering uffd-wp events.

Various actions (partial MADV_DONTNEED, partial mremap, partial munmap,
partial mprotect) could trigger this.  However, most importantly,
un-protecting a single sub-page from the userfaultfd-wp handler when
processing a uffd-wp event will PTE-map the shared huge zeropage and lose
the uffd-wp bit for the remainder of the PMD.

Let's properly propagate the uffd-wp bit to the PMDs.

 #define _GNU_SOURCE
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <stdbool.h>
 #include <inttypes.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <errno.h>
 #include <poll.h>
 #include <pthread.h>
 #include <sys/mman.h>
 #include <sys/syscall.h>
 #include <sys/ioctl.h>
 #include <linux/userfaultfd.h>

 static size_t pagesize;
 static int uffd;
 static volatile bool uffd_triggered;

 #define barrier() __asm__ __volatile__("": : :"memory")

 static void uffd_wp_range(char *start, size_t size, bool wp)
 {
 	struct uffdio_writeprotect uffd_writeprotect;

 	uffd_writeprotect.range.start = (unsigned long) start;
 	uffd_writeprotect.range.len = size;
 	if (wp) {
 		uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_WP;
 	} else {
 		uffd_writeprotect.mode = 0;
 	}
 	if (ioctl(uffd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) {
 		fprintf(stderr, "UFFDIO_WRITEPROTECT failed: %d\n", errno);
 		exit(1);
 	}
 }

 static void *uffd_thread_fn(void *arg)
 {
 	static struct uffd_msg msg;
 	ssize_t nread;

 	while (1) {
 		struct pollfd pollfd;
 		int nready;

 		pollfd.fd = uffd;
 		pollfd.events = POLLIN;
 		nready = poll(&pollfd, 1, -1);
 		if (nready == -1) {
 			fprintf(stderr, "poll() failed: %d\n", errno);
 			exit(1);
 		}

 		nread = read(uffd, &msg, sizeof(msg));
 		if (nread <= 0)
 			continue;

 		if (msg.event != UFFD_EVENT_PAGEFAULT ||
 		    !(msg.arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP)) {
 			printf("FAIL: wrong uffd-wp event fired\n");
 			exit(1);
 		}

 		/* un-protect the single page. */
 		uffd_triggered = true;
 		uffd_wp_range((char *)(uintptr_t)msg.arg.pagefault.address,
 			      pagesize, false);
 	}
 	return arg;
 }

 static int setup_uffd(char *map, size_t size)
 {
 	struct uffdio_api uffdio_api;
 	struct uffdio_register uffdio_register;
 	pthread_t thread;

 	uffd = syscall(__NR_userfaultfd,
 		       O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
 	if (uffd < 0) {
 		fprintf(stderr, "syscall() failed: %d\n", errno);
 		return -errno;
 	}

 	uffdio_api.api = UFFD_API;
 	uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP;
 	if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) {
 		fprintf(stderr, "UFFDIO_API failed: %d\n", errno);
 		return -errno;
 	}

 	if (!(uffdio_api.features & UFFD_FEATURE_PAGEFAULT_FLAG_WP)) {
 		fprintf(stderr, "UFFD_FEATURE_WRITEPROTECT missing\n");
 		return -ENOSYS;
 	}

 	uffdio_register.range.start = (unsigned long) map;
 	uffdio_register.range.len = size;
 	uffdio_register.mode = UFFDIO_REGISTER_MODE_WP;
 	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) < 0) {
 		fprintf(stderr, "UFFDIO_REGISTER failed: %d\n", errno);
 		return -errno;
 	}

 	pthread_create(&thread, NULL, uffd_thread_fn, NULL);

 	return 0;
 }

 int main(void)
 {
 	const size_t size = 4 * 1024 * 1024ull;
 	char *map, *cur;

 	pagesize = getpagesize();

 	map = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
 	if (map == MAP_FAILED) {
 		fprintf(stderr, "mmap() failed\n");
 		return -errno;
 	}

 	if (madvise(map, size, MADV_HUGEPAGE)) {
 		fprintf(stderr, "MADV_HUGEPAGE failed\n");
 		return -errno;
 	}

 	if (setup_uffd(map, size))
 		return 1;

 	/* Read the whole range, populating zeropages. */
 	madvise(map, size, MADV_POPULATE_READ);

 	/* Write-protect the whole range. */
 	uffd_wp_range(map, size, true);

 	/* Make sure uffd-wp triggers on each page. */
 	for (cur = map; cur < map + size; cur += pagesize) {
 		uffd_triggered = false;

 		barrier();
 		/* Trigger a write fault. */
 		*cur = 1;
 		barrier();

 		if (!uffd_triggered) {
 			printf("FAIL: uffd-wp did not trigger\n");
 			return 1;
 		}
 	}

 	printf("PASS: uffd-wp triggered\n");
 	return 0;
 }

Link: https://lkml.kernel.org/r/20230302175423.589164-1-david@redhat.com
Fixes: e06f1e1dd499 ("userfaultfd: wp: enabled write protection in userfaultfd API")
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Peter Xu <peterx@redhat.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Shaohua Li <shli@fb.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/huge_memory.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4fc43859e59a3..032fb0ef9cd19 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2037,7 +2037,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
 {
 	struct mm_struct *mm = vma->vm_mm;
 	pgtable_t pgtable;
-	pmd_t _pmd;
+	pmd_t _pmd, old_pmd;
 	int i;
 
 	/*
@@ -2048,7 +2048,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
 	 *
 	 * See Documentation/mm/mmu_notifier.rst
 	 */
-	pmdp_huge_clear_flush(vma, haddr, pmd);
+	old_pmd = pmdp_huge_clear_flush(vma, haddr, pmd);
 
 	pgtable = pgtable_trans_huge_withdraw(mm, pmd);
 	pmd_populate(mm, &_pmd, pgtable);
@@ -2057,6 +2057,8 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
 		pte_t *pte, entry;
 		entry = pfn_pte(my_zero_pfn(haddr), vma->vm_page_prot);
 		entry = pte_mkspecial(entry);
+		if (pmd_uffd_wp(old_pmd))
+			entry = pte_mkuffd_wp(entry);
 		pte = pte_offset_map(&_pmd, haddr);
 		VM_BUG_ON(!pte_none(*pte));
 		set_pte_at(mm, haddr, pte, entry);

From af665b40dfa2b49f1e3e11ecd1096506ef40348d Mon Sep 17 00:00:00 2001
From: Jarkko Sakkinen <jarkko@kernel.org>
Date: Thu, 2 Mar 2023 01:54:42 +0200
Subject: [PATCH 076/411] mailmap: updates for Jarkko Sakkinen

Update to my current employer:

https://research.tuni.fi/nisec/

Link: https://lkml.kernel.org/r/20230301235443.6663-1-jarkko@kernel.org
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Ben Widawsky <bwidawsk@kernel.org>
Cc: Bjorn Andersson <andersson@kernel.org>
Cc: Colin Ian King <colin.i.king@gmail.com>
Cc: Kirill Tkhai <tkhai@ya.ru>
Cc: Qais Yousef <qyousef@layalina.io>
Cc: Vasily Averin <vasily.averin@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 .mailmap | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.mailmap b/.mailmap
index 5367faaf78312..7581773e1e161 100644
--- a/.mailmap
+++ b/.mailmap
@@ -191,6 +191,7 @@ Jan Glauber <jan.glauber@gmail.com> <jang@linux.vnet.ibm.com>
 Jan Glauber <jan.glauber@gmail.com> <jglauber@cavium.com>
 Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@linux.intel.com>
 Jarkko Sakkinen <jarkko@kernel.org> <jarkko@profian.com>
+Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@tuni.fi>
 Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com>
 Jason Gunthorpe <jgg@ziepe.ca> <jgg@nvidia.com>
 Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com>

From 071ca76d2c1dddc6bf2f4917a016207e00bcc8e3 Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio@linaro.org>
Date: Wed, 1 Mar 2023 12:00:12 +0100
Subject: [PATCH 077/411] mailmap: correct Dikshita Agarwal's Qualcomm email
 address

I recently sent a patch to map Dikshita's old CAF address to his current
one @ Qualcomm.  It turned out however, that he has two of them, with the
@quicinc.com one meant for upstream contributions.  Fix it.

Link: https://lkml.kernel.org/r/20230301110012.1290379-1-konrad.dybcio@linaro.org
Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Cc: Dikshita Agarwal <quic_dikshita@quicinc.com>
Cc: Andy Gross <agross@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Bjorn Andersson <andersson@kernel.org>
Cc: Colin Ian King <colin.i.king@gmail.com>
Cc: Kirill Tkhai <tkhai@ya.ru>
Cc: Marijn Suijten <marijn.suijten@somainline.org>
Cc: Qais Yousef <qyousef@layalina.io>
Cc: Vasily Averin <vasily.averin@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 .mailmap | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.mailmap b/.mailmap
index 7581773e1e161..16b195032269b 100644
--- a/.mailmap
+++ b/.mailmap
@@ -121,7 +121,7 @@ Dengcheng Zhu <dzhu@wavecomp.com> <dengcheng.zhu@gmail.com>
 Dengcheng Zhu <dzhu@wavecomp.com> <dengcheng.zhu@imgtec.com>
 Dengcheng Zhu <dzhu@wavecomp.com> <dengcheng.zhu@mips.com>
 <dev.kurt@vandijck-laurijssen.be> <kurt.van.dijck@eia.be>
-Dikshita Agarwal <dikshita@qti.qualcomm.com> <dikshita@codeaurora.org>
+Dikshita Agarwal <quic_dikshita@quicinc.com> <dikshita@codeaurora.org>
 Dmitry Baryshkov <dbaryshkov@gmail.com>
 Dmitry Baryshkov <dbaryshkov@gmail.com> <[dbaryshkov@gmail.com]>
 Dmitry Baryshkov <dbaryshkov@gmail.com> <dmitry_baryshkov@mentor.com>

From 89a004508081e1d1a498f10ea6d4f7f97a820438 Mon Sep 17 00:00:00 2001
From: Alexandre Ghiti <alex@ghiti.fr>
Date: Wed, 1 Mar 2023 10:01:32 +0100
Subject: [PATCH 078/411] .mailmap: add Alexandre Ghiti personal email address

I'm no longer employed by Canonical which results in email bouncing so add
an entry to my personal email address.

Link: https://lkml.kernel.org/r/20230301090132.280475-1-alexghiti@rivosinc.com
Signed-off-by: Alexandre Ghiti <alex@ghiti.fr>
Reported-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 .mailmap | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.mailmap b/.mailmap
index 16b195032269b..c1def9105d0c7 100644
--- a/.mailmap
+++ b/.mailmap
@@ -28,6 +28,7 @@ Alexander Lobakin <alobakin@pm.me> <bloodyreaper@yandex.ru>
 Alexander Mikhalitsyn <alexander@mihalicyn.com> <alexander.mikhalitsyn@virtuozzo.com>
 Alexander Mikhalitsyn <alexander@mihalicyn.com> <aleksandr.mikhalitsyn@canonical.com>
 Alexandre Belloni <alexandre.belloni@bootlin.com> <alexandre.belloni@free-electrons.com>
+Alexandre Ghiti <alex@ghiti.fr> <alexandre.ghiti@canonical.com>
 Alexei Starovoitov <ast@kernel.org> <alexei.starovoitov@gmail.com>
 Alexei Starovoitov <ast@kernel.org> <ast@fb.com>
 Alexei Starovoitov <ast@kernel.org> <ast@plumgrid.com>

From fb3592c41a4427601f9643b2a84e55bb99f5cd7c Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Fri, 3 Mar 2023 11:01:53 +0800
Subject: [PATCH 079/411] migrate_pages: fix deadlock in batched migration

Patch series "migrate_pages: fix deadlock in batched synchronous
migration", v2.

Two deadlock bugs were reported for the migrate_pages() batching series.
Thanks Hugh and Pengfei.  Analysis shows that if we have locked some other
folios except the one we are migrating, it's not safe in general to wait
synchronously, for example, to wait the writeback to complete or wait to
lock the buffer head.

So 1/3 fixes the deadlock in a simple way, where the batching support for
the synchronous migration is disabled.  The change is straightforward and
easy to be understood.  While 3/3 re-introduce the batching for
synchronous migration via trying to migrate asynchronously in batch
optimistically, then fall back to migrate synchronously one by one for
fail-to-migrate folios.  Test shows that this can restore the TLB flushing
batching performance for synchronous migration effectively.


This patch (of 3):

Two deadlock bugs were reported for the migrate_pages() batching series.
Thanks Hugh and Pengfei!  For example, in the following deadlock trace
snippet,

 INFO: task kworker/u4:0:9 blocked for more than 147 seconds.
       Not tainted 6.2.0-rc4-kvm+ #1314
 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
 task:kworker/u4:0    state:D stack:0     pid:9     ppid:2      flags:0x00004000
 Workqueue: loop4 loop_rootcg_workfn
 Call Trace:
  <TASK>
  __schedule+0x43b/0xd00
  schedule+0x6a/0xf0
  io_schedule+0x4a/0x80
  folio_wait_bit_common+0x1b5/0x4e0
  ? __pfx_wake_page_function+0x10/0x10
  __filemap_get_folio+0x73d/0x770
  shmem_get_folio_gfp+0x1fd/0xc80
  shmem_write_begin+0x91/0x220
  generic_perform_write+0x10e/0x2e0
  __generic_file_write_iter+0x17e/0x290
  ? generic_write_checks+0x12b/0x1a0
  generic_file_write_iter+0x97/0x180
  ? __sanitizer_cov_trace_const_cmp4+0x1a/0x20
  do_iter_readv_writev+0x13c/0x210
  ? __sanitizer_cov_trace_const_cmp4+0x1a/0x20
  do_iter_write+0xf6/0x330
  vfs_iter_write+0x46/0x70
  loop_process_work+0x723/0xfe0
  loop_rootcg_workfn+0x28/0x40
  process_one_work+0x3cc/0x8d0
  worker_thread+0x66/0x630
  ? __pfx_worker_thread+0x10/0x10
  kthread+0x153/0x190
  ? __pfx_kthread+0x10/0x10
  ret_from_fork+0x29/0x50
  </TASK>

 INFO: task repro:1023 blocked for more than 147 seconds.
       Not tainted 6.2.0-rc4-kvm+ #1314
 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
 task:repro           state:D stack:0     pid:1023  ppid:360    flags:0x00004004
 Call Trace:
  <TASK>
  __schedule+0x43b/0xd00
  schedule+0x6a/0xf0
  io_schedule+0x4a/0x80
  folio_wait_bit_common+0x1b5/0x4e0
  ? compaction_alloc+0x77/0x1150
  ? __pfx_wake_page_function+0x10/0x10
  folio_wait_bit+0x30/0x40
  folio_wait_writeback+0x2e/0x1e0
  migrate_pages_batch+0x555/0x1ac0
  ? __pfx_compaction_alloc+0x10/0x10
  ? __pfx_compaction_free+0x10/0x10
  ? __this_cpu_preempt_check+0x17/0x20
  ? lock_is_held_type+0xe6/0x140
  migrate_pages+0x100e/0x1180
  ? __pfx_compaction_free+0x10/0x10
  ? __pfx_compaction_alloc+0x10/0x10
  compact_zone+0xe10/0x1b50
  ? lock_is_held_type+0xe6/0x140
  ? check_preemption_disabled+0x80/0xf0
  compact_node+0xa3/0x100
  ? __sanitizer_cov_trace_const_cmp8+0x1c/0x30
  ? _find_first_bit+0x7b/0x90
  sysctl_compaction_handler+0x5d/0xb0
  proc_sys_call_handler+0x29d/0x420
  proc_sys_write+0x2b/0x40
  vfs_write+0x3a3/0x780
  ksys_write+0xb7/0x180
  __x64_sys_write+0x26/0x30
  do_syscall_64+0x3b/0x90
  entry_SYSCALL_64_after_hwframe+0x72/0xdc
 RIP: 0033:0x7f3a2471f59d
 RSP: 002b:00007ffe567f7288 EFLAGS: 00000217 ORIG_RAX: 0000000000000001
 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f3a2471f59d
 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000005
 RBP: 00007ffe567f72a0 R08: 0000000000000010 R09: 0000000000000010
 R10: 0000000000000010 R11: 0000000000000217 R12: 00000000004012e0
 R13: 00007ffe567f73e0 R14: 0000000000000000 R15: 0000000000000000
  </TASK>

The page migration task has held the lock of the shmem folio A, and is
waiting the writeback of the folio B of the file system on the loop block
device to complete.  While the loop worker task which writes back the
folio B is waiting to lock the shmem folio A, because the folio A backs
the folio B in the loop device.  Thus deadlock is triggered.

In general, if we have locked some other folios except the one we are
migrating, it's not safe to wait synchronously, for example, to wait the
writeback to complete or wait to lock the buffer head.

To fix the deadlock, in this patch, we avoid to batch the page migration
except for MIGRATE_ASYNC mode.  In MIGRATE_ASYNC mode, synchronous waiting
is avoided.

The fix can be improved further.  We will do that as soon as possible.

Link: https://lkml.kernel.org/r/20230303030155.160983-1-ying.huang@intel.com
Link: https://lore.kernel.org/linux-mm/87a6c8c-c5c1-67dc-1e32-eb30831d6e3d@google.com/
Link: https://lore.kernel.org/linux-mm/874jrg7kke.fsf@yhuang6-desk2.ccr.corp.intel.com/
Link: https://lore.kernel.org/linux-mm/20230227110614.dngdub2j3exr6dfp@quack3/
Link: https://lkml.kernel.org/r/20230303030155.160983-2-ying.huang@intel.com
Fixes: 5dfab109d519 ("migrate_pages: batch _unmap and _move")
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Reported-by: Hugh Dickins <hughd@google.com>
Reported-by: "Xu, Pengfei" <pengfei.xu@intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Stefan Roesch <shr@devkernel.io>
Cc: Tejun Heo <tj@kernel.org>
Cc: Xin Hao <xhao@linux.alibaba.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/migrate.c | 69 ++++++++++++++++++++--------------------------------
 1 file changed, 26 insertions(+), 43 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index 98f1c11197a8c..f348e0a7b1df5 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1112,7 +1112,7 @@ static void migrate_folio_done(struct folio *src,
 /* Obtain the lock on page, remove all ptes. */
 static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page,
 			       unsigned long private, struct folio *src,
-			       struct folio **dstp, int force, bool avoid_force_lock,
+			       struct folio **dstp, int force,
 			       enum migrate_mode mode, enum migrate_reason reason,
 			       struct list_head *ret)
 {
@@ -1163,17 +1163,6 @@ static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page
 		if (current->flags & PF_MEMALLOC)
 			goto out;
 
-		/*
-		 * We have locked some folios and are going to wait to lock
-		 * this folio.  To avoid a potential deadlock, let's bail
-		 * out and not do that. The locked folios will be moved and
-		 * unlocked, then we can wait to lock this folio.
-		 */
-		if (avoid_force_lock) {
-			rc = -EDEADLOCK;
-			goto out;
-		}
-
 		folio_lock(src);
 	}
 	locked = true;
@@ -1253,7 +1242,7 @@ static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page
 		/* Establish migration ptes */
 		VM_BUG_ON_FOLIO(folio_test_anon(src) &&
 			       !folio_test_ksm(src) && !anon_vma, src);
-		try_to_migrate(src, TTU_BATCH_FLUSH);
+		try_to_migrate(src, mode == MIGRATE_ASYNC ? TTU_BATCH_FLUSH : 0);
 		page_was_mapped = 1;
 	}
 
@@ -1267,7 +1256,7 @@ static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page
 	 * A folio that has not been unmapped will be restored to
 	 * right list unless we want to retry.
 	 */
-	if (rc == -EAGAIN || rc == -EDEADLOCK)
+	if (rc == -EAGAIN)
 		ret = NULL;
 
 	migrate_folio_undo_src(src, page_was_mapped, anon_vma, locked, ret);
@@ -1618,6 +1607,11 @@ static int migrate_hugetlbs(struct list_head *from, new_page_t get_new_page,
 /*
  * migrate_pages_batch() first unmaps folios in the from list as many as
  * possible, then move the unmapped folios.
+ *
+ * We only batch migration if mode == MIGRATE_ASYNC to avoid to wait a
+ * lock or bit when we have locked more than one folio.  Which may cause
+ * deadlock (e.g., for loop device).  So, if mode != MIGRATE_ASYNC, the
+ * length of the from list must be <= 1.
  */
 static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 		free_page_t put_new_page, unsigned long private,
@@ -1640,11 +1634,11 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 	LIST_HEAD(dst_folios);
 	bool nosplit = (reason == MR_NUMA_MISPLACED);
 	bool no_split_folio_counting = false;
-	bool avoid_force_lock;
 
+	VM_WARN_ON_ONCE(mode != MIGRATE_ASYNC &&
+			!list_empty(from) && !list_is_singular(from));
 retry:
 	rc_saved = 0;
-	avoid_force_lock = false;
 	retry = 1;
 	for (pass = 0;
 	     pass < NR_MAX_MIGRATE_PAGES_RETRY && (retry || large_retry);
@@ -1689,15 +1683,14 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 			}
 
 			rc = migrate_folio_unmap(get_new_page, put_new_page, private,
-						 folio, &dst, pass > 2, avoid_force_lock,
-						 mode, reason, ret_folios);
+						 folio, &dst, pass > 2, mode,
+						 reason, ret_folios);
 			/*
 			 * The rules are:
 			 *	Success: folio will be freed
 			 *	Unmap: folio will be put on unmap_folios list,
 			 *	       dst folio put on dst_folios list
 			 *	-EAGAIN: stay on the from list
-			 *	-EDEADLOCK: stay on the from list
 			 *	-ENOMEM: stay on the from list
 			 *	Other errno: put on ret_folios list
 			 */
@@ -1749,14 +1742,6 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 					goto out;
 				else
 					goto move;
-			case -EDEADLOCK:
-				/*
-				 * The folio cannot be locked for potential deadlock.
-				 * Go move (and unlock) all locked folios.  Then we can
-				 * try again.
-				 */
-				rc_saved = rc;
-				goto move;
 			case -EAGAIN:
 				if (is_large) {
 					large_retry++;
@@ -1771,11 +1756,6 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 				stats->nr_thp_succeeded += is_thp;
 				break;
 			case MIGRATEPAGE_UNMAP:
-				/*
-				 * We have locked some folios, don't force lock
-				 * to avoid deadlock.
-				 */
-				avoid_force_lock = true;
 				list_move_tail(&folio->lru, &unmap_folios);
 				list_add_tail(&dst->lru, &dst_folios);
 				break;
@@ -1900,17 +1880,15 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 		 */
 		list_splice_init(from, ret_folios);
 		list_splice_init(&split_folios, from);
+		/*
+		 * Force async mode to avoid to wait lock or bit when we have
+		 * locked more than one folios.
+		 */
+		mode = MIGRATE_ASYNC;
 		no_split_folio_counting = true;
 		goto retry;
 	}
 
-	/*
-	 * We have unlocked all locked folios, so we can force lock now, let's
-	 * try again.
-	 */
-	if (rc == -EDEADLOCK)
-		goto retry;
-
 	return rc;
 }
 
@@ -1945,7 +1923,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
 		enum migrate_mode mode, int reason, unsigned int *ret_succeeded)
 {
 	int rc, rc_gather;
-	int nr_pages;
+	int nr_pages, batch;
 	struct folio *folio, *folio2;
 	LIST_HEAD(folios);
 	LIST_HEAD(ret_folios);
@@ -1959,6 +1937,11 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
 				     mode, reason, &stats, &ret_folios);
 	if (rc_gather < 0)
 		goto out;
+
+	if (mode == MIGRATE_ASYNC)
+		batch = NR_MAX_BATCHED_MIGRATION;
+	else
+		batch = 1;
 again:
 	nr_pages = 0;
 	list_for_each_entry_safe(folio, folio2, from, lru) {
@@ -1969,11 +1952,11 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
 		}
 
 		nr_pages += folio_nr_pages(folio);
-		if (nr_pages > NR_MAX_BATCHED_MIGRATION)
+		if (nr_pages >= batch)
 			break;
 	}
-	if (nr_pages > NR_MAX_BATCHED_MIGRATION)
-		list_cut_before(&folios, from, &folio->lru);
+	if (nr_pages >= batch)
+		list_cut_before(&folios, from, &folio2->lru);
 	else
 		list_splice_init(from, &folios);
 	rc = migrate_pages_batch(&folios, get_new_page, put_new_page, private,

From a21d2133215b58fbf254ea2bb77eb3143ffedf60 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Fri, 3 Mar 2023 11:01:54 +0800
Subject: [PATCH 080/411] migrate_pages: move split folios processing out of
 migrate_pages_batch()

To simplify the code logic and reduce the line number.

Link: https://lkml.kernel.org/r/20230303030155.160983-3-ying.huang@intel.com
Fixes: 5dfab109d519 ("migrate_pages: batch _unmap and _move")
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: "Xu, Pengfei" <pengfei.xu@intel.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Stefan Roesch <shr@devkernel.io>
Cc: Tejun Heo <tj@kernel.org>
Cc: Xin Hao <xhao@linux.alibaba.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/migrate.c | 78 +++++++++++++++++++---------------------------------
 1 file changed, 28 insertions(+), 50 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index f348e0a7b1df5..b61abe529107f 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1616,9 +1616,10 @@ static int migrate_hugetlbs(struct list_head *from, new_page_t get_new_page,
 static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 		free_page_t put_new_page, unsigned long private,
 		enum migrate_mode mode, int reason, struct list_head *ret_folios,
-		struct migrate_pages_stats *stats)
+		struct list_head *split_folios, struct migrate_pages_stats *stats,
+		int nr_pass)
 {
-	int retry;
+	int retry = 1;
 	int large_retry = 1;
 	int thp_retry = 1;
 	int nr_failed = 0;
@@ -1628,21 +1629,15 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 	bool is_large = false;
 	bool is_thp = false;
 	struct folio *folio, *folio2, *dst = NULL, *dst2;
-	int rc, rc_saved, nr_pages;
-	LIST_HEAD(split_folios);
+	int rc, rc_saved = 0, nr_pages;
 	LIST_HEAD(unmap_folios);
 	LIST_HEAD(dst_folios);
 	bool nosplit = (reason == MR_NUMA_MISPLACED);
-	bool no_split_folio_counting = false;
 
 	VM_WARN_ON_ONCE(mode != MIGRATE_ASYNC &&
 			!list_empty(from) && !list_is_singular(from));
-retry:
-	rc_saved = 0;
-	retry = 1;
-	for (pass = 0;
-	     pass < NR_MAX_MIGRATE_PAGES_RETRY && (retry || large_retry);
-	     pass++) {
+
+	for (pass = 0; pass < nr_pass && (retry || large_retry); pass++) {
 		retry = 0;
 		large_retry = 0;
 		thp_retry = 0;
@@ -1673,7 +1668,7 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 			if (!thp_migration_supported() && is_thp) {
 				nr_large_failed++;
 				stats->nr_thp_failed++;
-				if (!try_split_folio(folio, &split_folios)) {
+				if (!try_split_folio(folio, split_folios)) {
 					stats->nr_thp_split++;
 					continue;
 				}
@@ -1705,7 +1700,7 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 					stats->nr_thp_failed += is_thp;
 					/* Large folio NUMA faulting doesn't split to retry. */
 					if (!nosplit) {
-						int ret = try_split_folio(folio, &split_folios);
+						int ret = try_split_folio(folio, split_folios);
 
 						if (!ret) {
 							stats->nr_thp_split += is_thp;
@@ -1722,18 +1717,11 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 							break;
 						}
 					}
-				} else if (!no_split_folio_counting) {
+				} else {
 					nr_failed++;
 				}
 
 				stats->nr_failed_pages += nr_pages + nr_retry_pages;
-				/*
-				 * There might be some split folios of fail-to-migrate large
-				 * folios left in split_folios list. Move them to ret_folios
-				 * list so that they could be put back to the right list by
-				 * the caller otherwise the folio refcnt will be leaked.
-				 */
-				list_splice_init(&split_folios, ret_folios);
 				/* nr_failed isn't updated for not used */
 				nr_large_failed += large_retry;
 				stats->nr_thp_failed += thp_retry;
@@ -1746,7 +1734,7 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 				if (is_large) {
 					large_retry++;
 					thp_retry += is_thp;
-				} else if (!no_split_folio_counting) {
+				} else {
 					retry++;
 				}
 				nr_retry_pages += nr_pages;
@@ -1769,7 +1757,7 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 				if (is_large) {
 					nr_large_failed++;
 					stats->nr_thp_failed += is_thp;
-				} else if (!no_split_folio_counting) {
+				} else {
 					nr_failed++;
 				}
 
@@ -1787,9 +1775,7 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 	try_to_unmap_flush();
 
 	retry = 1;
-	for (pass = 0;
-	     pass < NR_MAX_MIGRATE_PAGES_RETRY && (retry || large_retry);
-	     pass++) {
+	for (pass = 0; pass < nr_pass && (retry || large_retry); pass++) {
 		retry = 0;
 		large_retry = 0;
 		thp_retry = 0;
@@ -1818,7 +1804,7 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 				if (is_large) {
 					large_retry++;
 					thp_retry += is_thp;
-				} else if (!no_split_folio_counting) {
+				} else {
 					retry++;
 				}
 				nr_retry_pages += nr_pages;
@@ -1831,7 +1817,7 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 				if (is_large) {
 					nr_large_failed++;
 					stats->nr_thp_failed += is_thp;
-				} else if (!no_split_folio_counting) {
+				} else {
 					nr_failed++;
 				}
 
@@ -1868,27 +1854,6 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 		dst2 = list_next_entry(dst, lru);
 	}
 
-	/*
-	 * Try to migrate split folios of fail-to-migrate large folios, no
-	 * nr_failed counting in this round, since all split folios of a
-	 * large folio is counted as 1 failure in the first round.
-	 */
-	if (rc >= 0 && !list_empty(&split_folios)) {
-		/*
-		 * Move non-migrated folios (after NR_MAX_MIGRATE_PAGES_RETRY
-		 * retries) to ret_folios to avoid migrating them again.
-		 */
-		list_splice_init(from, ret_folios);
-		list_splice_init(&split_folios, from);
-		/*
-		 * Force async mode to avoid to wait lock or bit when we have
-		 * locked more than one folios.
-		 */
-		mode = MIGRATE_ASYNC;
-		no_split_folio_counting = true;
-		goto retry;
-	}
-
 	return rc;
 }
 
@@ -1927,6 +1892,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
 	struct folio *folio, *folio2;
 	LIST_HEAD(folios);
 	LIST_HEAD(ret_folios);
+	LIST_HEAD(split_folios);
 	struct migrate_pages_stats stats;
 
 	trace_mm_migrate_pages_start(mode, reason);
@@ -1960,12 +1926,24 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
 	else
 		list_splice_init(from, &folios);
 	rc = migrate_pages_batch(&folios, get_new_page, put_new_page, private,
-				 mode, reason, &ret_folios, &stats);
+				 mode, reason, &ret_folios, &split_folios, &stats,
+				 NR_MAX_MIGRATE_PAGES_RETRY);
 	list_splice_tail_init(&folios, &ret_folios);
 	if (rc < 0) {
 		rc_gather = rc;
+		list_splice_tail(&split_folios, &ret_folios);
 		goto out;
 	}
+	if (!list_empty(&split_folios)) {
+		/*
+		 * Failure isn't counted since all split folios of a large folio
+		 * is counted as 1 failure already.  And, we only try to migrate
+		 * with minimal effort, force MIGRATE_ASYNC mode and retry once.
+		 */
+		migrate_pages_batch(&split_folios, get_new_page, put_new_page, private,
+				    MIGRATE_ASYNC, reason, &ret_folios, NULL, &stats, 1);
+		list_splice_tail_init(&split_folios, &ret_folios);
+	}
 	rc_gather += rc;
 	if (!list_empty(from))
 		goto again;

From 2ef7dbb269902bde34c82f027806992195d1d1ee Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Fri, 3 Mar 2023 11:01:55 +0800
Subject: [PATCH 081/411] migrate_pages: try migrate in batch asynchronously
 firstly

When we have locked more than one folios, we cannot wait the lock or bit
(e.g., page lock, buffer head lock, writeback bit) synchronously.
Otherwise deadlock may be triggered.  This make it hard to batch the
synchronous migration directly.

This patch re-enables batching synchronous migration via trying to migrate
in batch asynchronously firstly.  And any folios that are failed to be
migrated asynchronously will be migrated synchronously one by one.

Test shows that this can restore the TLB flushing batching performance for
synchronous migration effectively.

Link: https://lkml.kernel.org/r/20230303030155.160983-4-ying.huang@intel.com
Fixes: 5dfab109d519 ("migrate_pages: batch _unmap and _move")
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Tested-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: "Xu, Pengfei" <pengfei.xu@intel.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Stefan Roesch <shr@devkernel.io>
Cc: Tejun Heo <tj@kernel.org>
Cc: Xin Hao <xhao@linux.alibaba.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/migrate.c | 80 ++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 62 insertions(+), 18 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index b61abe529107f..db3f154446af4 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1112,9 +1112,8 @@ static void migrate_folio_done(struct folio *src,
 /* Obtain the lock on page, remove all ptes. */
 static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page,
 			       unsigned long private, struct folio *src,
-			       struct folio **dstp, int force,
-			       enum migrate_mode mode, enum migrate_reason reason,
-			       struct list_head *ret)
+			       struct folio **dstp, enum migrate_mode mode,
+			       enum migrate_reason reason, struct list_head *ret)
 {
 	struct folio *dst;
 	int rc = -EAGAIN;
@@ -1144,7 +1143,7 @@ static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page
 	dst->private = NULL;
 
 	if (!folio_trylock(src)) {
-		if (!force || mode == MIGRATE_ASYNC)
+		if (mode == MIGRATE_ASYNC)
 			goto out;
 
 		/*
@@ -1182,8 +1181,6 @@ static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page
 			rc = -EBUSY;
 			goto out;
 		}
-		if (!force)
-			goto out;
 		folio_wait_writeback(src);
 	}
 
@@ -1497,6 +1494,9 @@ static inline int try_split_folio(struct folio *folio, struct list_head *split_f
 #define NR_MAX_BATCHED_MIGRATION	512
 #endif
 #define NR_MAX_MIGRATE_PAGES_RETRY	10
+#define NR_MAX_MIGRATE_ASYNC_RETRY	3
+#define NR_MAX_MIGRATE_SYNC_RETRY					\
+	(NR_MAX_MIGRATE_PAGES_RETRY - NR_MAX_MIGRATE_ASYNC_RETRY)
 
 struct migrate_pages_stats {
 	int nr_succeeded;	/* Normal and large folios migrated successfully, in
@@ -1678,8 +1678,7 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 			}
 
 			rc = migrate_folio_unmap(get_new_page, put_new_page, private,
-						 folio, &dst, pass > 2, mode,
-						 reason, ret_folios);
+						 folio, &dst, mode, reason, ret_folios);
 			/*
 			 * The rules are:
 			 *	Success: folio will be freed
@@ -1857,6 +1856,51 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,
 	return rc;
 }
 
+static int migrate_pages_sync(struct list_head *from, new_page_t get_new_page,
+		free_page_t put_new_page, unsigned long private,
+		enum migrate_mode mode, int reason, struct list_head *ret_folios,
+		struct list_head *split_folios, struct migrate_pages_stats *stats)
+{
+	int rc, nr_failed = 0;
+	LIST_HEAD(folios);
+	struct migrate_pages_stats astats;
+
+	memset(&astats, 0, sizeof(astats));
+	/* Try to migrate in batch with MIGRATE_ASYNC mode firstly */
+	rc = migrate_pages_batch(from, get_new_page, put_new_page, private, MIGRATE_ASYNC,
+				 reason, &folios, split_folios, &astats,
+				 NR_MAX_MIGRATE_ASYNC_RETRY);
+	stats->nr_succeeded += astats.nr_succeeded;
+	stats->nr_thp_succeeded += astats.nr_thp_succeeded;
+	stats->nr_thp_split += astats.nr_thp_split;
+	if (rc < 0) {
+		stats->nr_failed_pages += astats.nr_failed_pages;
+		stats->nr_thp_failed += astats.nr_thp_failed;
+		list_splice_tail(&folios, ret_folios);
+		return rc;
+	}
+	stats->nr_thp_failed += astats.nr_thp_split;
+	nr_failed += astats.nr_thp_split;
+	/*
+	 * Fall back to migrate all failed folios one by one synchronously. All
+	 * failed folios except split THPs will be retried, so their failure
+	 * isn't counted
+	 */
+	list_splice_tail_init(&folios, from);
+	while (!list_empty(from)) {
+		list_move(from->next, &folios);
+		rc = migrate_pages_batch(&folios, get_new_page, put_new_page,
+					 private, mode, reason, ret_folios,
+					 split_folios, stats, NR_MAX_MIGRATE_SYNC_RETRY);
+		list_splice_tail_init(&folios, ret_folios);
+		if (rc < 0)
+			return rc;
+		nr_failed += rc;
+	}
+
+	return nr_failed;
+}
+
 /*
  * migrate_pages - migrate the folios specified in a list, to the free folios
  *		   supplied as the target for the page migration
@@ -1888,7 +1932,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
 		enum migrate_mode mode, int reason, unsigned int *ret_succeeded)
 {
 	int rc, rc_gather;
-	int nr_pages, batch;
+	int nr_pages;
 	struct folio *folio, *folio2;
 	LIST_HEAD(folios);
 	LIST_HEAD(ret_folios);
@@ -1904,10 +1948,6 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
 	if (rc_gather < 0)
 		goto out;
 
-	if (mode == MIGRATE_ASYNC)
-		batch = NR_MAX_BATCHED_MIGRATION;
-	else
-		batch = 1;
 again:
 	nr_pages = 0;
 	list_for_each_entry_safe(folio, folio2, from, lru) {
@@ -1918,16 +1958,20 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
 		}
 
 		nr_pages += folio_nr_pages(folio);
-		if (nr_pages >= batch)
+		if (nr_pages >= NR_MAX_BATCHED_MIGRATION)
 			break;
 	}
-	if (nr_pages >= batch)
+	if (nr_pages >= NR_MAX_BATCHED_MIGRATION)
 		list_cut_before(&folios, from, &folio2->lru);
 	else
 		list_splice_init(from, &folios);
-	rc = migrate_pages_batch(&folios, get_new_page, put_new_page, private,
-				 mode, reason, &ret_folios, &split_folios, &stats,
-				 NR_MAX_MIGRATE_PAGES_RETRY);
+	if (mode == MIGRATE_ASYNC)
+		rc = migrate_pages_batch(&folios, get_new_page, put_new_page, private,
+					 mode, reason, &ret_folios, &split_folios, &stats,
+					 NR_MAX_MIGRATE_PAGES_RETRY);
+	else
+		rc = migrate_pages_sync(&folios, get_new_page, put_new_page, private,
+					mode, reason, &ret_folios, &split_folios, &stats);
 	list_splice_tail_init(&folios, &ret_folios);
 	if (rc < 0) {
 		rc_gather = rc;

From 90410bcf873cf05f54a32183afff0161f44f9715 Mon Sep 17 00:00:00 2001
From: Jan Kara via Ocfs2-devel <ocfs2-devel@oss.oracle.com>
Date: Thu, 2 Mar 2023 16:38:43 +0100
Subject: [PATCH 082/411] ocfs2: fix data corruption after failed write

When buffered write fails to copy data into underlying page cache page,
ocfs2_write_end_nolock() just zeroes out and dirties the page.  This can
leave dirty page beyond EOF and if page writeback tries to write this page
before write succeeds and expands i_size, page gets into inconsistent
state where page dirty bit is clear but buffer dirty bits stay set
resulting in page data never getting written and so data copied to the
page is lost.  Fix the problem by invalidating page beyond EOF after
failed write.

Link: https://lkml.kernel.org/r/20230302153843.18499-1-jack@suse.cz
Fixes: 6dbf7bb55598 ("fs: Don't invalidate page buffers in block_write_full_page()")
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <gechangwei@live.cn>
Cc: Gang He <ghe@suse.com>
Cc: Jun Piao <piaojun@huawei.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ocfs2/aops.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 1d65f6ef00ca8..0394505fdce3f 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1977,11 +1977,26 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
 	}
 
 	if (unlikely(copied < len) && wc->w_target_page) {
+		loff_t new_isize;
+
 		if (!PageUptodate(wc->w_target_page))
 			copied = 0;
 
-		ocfs2_zero_new_buffers(wc->w_target_page, start+copied,
-				       start+len);
+		new_isize = max_t(loff_t, i_size_read(inode), pos + copied);
+		if (new_isize > page_offset(wc->w_target_page))
+			ocfs2_zero_new_buffers(wc->w_target_page, start+copied,
+					       start+len);
+		else {
+			/*
+			 * When page is fully beyond new isize (data copy
+			 * failed), do not bother zeroing the page. Invalidate
+			 * it instead so that writeback does not get confused
+			 * put page & buffer dirty bits into inconsistent
+			 * state.
+			 */
+			block_invalidate_folio(page_folio(wc->w_target_page),
+						0, PAGE_SIZE);
+		}
 	}
 	if (wc->w_target_page)
 		flush_dcache_page(wc->w_target_page);

From 751688b8be9049f558f86982966ecaa61a9cbedf Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Sat, 4 Mar 2023 19:39:48 +0000
Subject: [PATCH 083/411] mm/damon/paddr: fix folio_size() call after
 folio_put() in damon_pa_young()

Patch series "mm/damon/paddr: Fix folio-use-after-put bugs".

There are two folio accesses after folio_put() in mm/damon/paddr.c file.
Fix those.


This patch (of 2):

damon_pa_young() is accessing a folio via folio_size() after folio_put()
for the folio has invoked.  Fix it.

Link: https://lkml.kernel.org/r/20230304193949.296391-1-sj@kernel.org
Link: https://lkml.kernel.org/r/20230304193949.296391-2-sj@kernel.org
Fixes: 397b0c3a584b ("mm/damon/paddr: remove folio_sz field from damon_pa_access_chk_result")
Signed-off-by: SeongJae Park <sj@kernel.org>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Cc: <stable@vger.kernel.org>	[6.2.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/damon/paddr.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index 6c655d9b56391..49b267b033050 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -130,7 +130,6 @@ static bool damon_pa_young(unsigned long paddr, unsigned long *folio_sz)
 			accessed = false;
 		else
 			accessed = true;
-		folio_put(folio);
 		goto out;
 	}
 
@@ -144,10 +143,10 @@ static bool damon_pa_young(unsigned long paddr, unsigned long *folio_sz)
 
 	if (need_lock)
 		folio_unlock(folio);
-	folio_put(folio);
 
 out:
 	*folio_sz = folio_size(folio);
+	folio_put(folio);
 	return accessed;
 }
 

From dd52a61da0dd8bab8b90e808f0e5ad507b61ad6d Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Sat, 4 Mar 2023 19:39:49 +0000
Subject: [PATCH 084/411] mm/damon/paddr: fix folio_nr_pages() after
 folio_put() in damon_pa_mark_accessed_or_deactivate()

damon_pa_mark_accessed_or_deactivate() is accessing a folio via
folio_nr_pages() after folio_put() for the folio has invoked.  Fix it.

Link: https://lkml.kernel.org/r/20230304193949.296391-3-sj@kernel.org
Fixes: f70da5ee8fe1 ("mm/damon: convert damon_pa_mark_accessed_or_deactivate() to use folios")
Signed-off-by: SeongJae Park <sj@kernel.org>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/damon/paddr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c
index 49b267b033050..dd9c33fbe8052 100644
--- a/mm/damon/paddr.c
+++ b/mm/damon/paddr.c
@@ -280,8 +280,8 @@ static inline unsigned long damon_pa_mark_accessed_or_deactivate(
 			folio_mark_accessed(folio);
 		else
 			folio_deactivate(folio);
-		folio_put(folio);
 		applied += folio_nr_pages(folio);
+		folio_put(folio);
 	}
 	return applied * PAGE_SIZE;
 }

From d3c57724f1569311e4b81e98fad0931028b9bdcd Mon Sep 17 00:00:00 2001
From: Wenchao Hao <haowenchao2@huawei.com>
Date: Sat, 25 Feb 2023 18:01:36 +0800
Subject: [PATCH 085/411] scsi: mpt3sas: Fix NULL pointer access in
 mpt3sas_transport_port_add()

Port is allocated by sas_port_alloc_num() and rphy is allocated by either
sas_end_device_alloc() or sas_expander_alloc(), all of which may return
NULL. So we need to check the rphy to avoid possible NULL pointer access.

If sas_rphy_add() returned with failure, rphy is set to NULL. We would
access the rphy in the following lines which would also result NULL pointer
access.

Fixes: 78316e9dfc24 ("scsi: mpt3sas: Fix possible resource leaks in mpt3sas_transport_port_add()")
Signed-off-by: Wenchao Hao <haowenchao2@huawei.com>
Link: https://lore.kernel.org/r/20230225100135.2109330-1-haowenchao2@huawei.com
Acked-by: Sathya Prakash Veerichetty <sathya.prakash@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/mpt3sas/mpt3sas_transport.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c
index e5ecd6ada6cdd..e8a4750f6ec47 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_transport.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c
@@ -785,7 +785,7 @@ mpt3sas_transport_port_add(struct MPT3SAS_ADAPTER *ioc, u16 handle,
 		goto out_fail;
 	}
 	port = sas_port_alloc_num(sas_node->parent_dev);
-	if ((sas_port_add(port))) {
+	if (!port || (sas_port_add(port))) {
 		ioc_err(ioc, "failure at %s:%d/%s()!\n",
 			__FILE__, __LINE__, __func__);
 		goto out_fail;
@@ -824,6 +824,12 @@ mpt3sas_transport_port_add(struct MPT3SAS_ADAPTER *ioc, u16 handle,
 			    mpt3sas_port->remote_identify.sas_address;
 	}
 
+	if (!rphy) {
+		ioc_err(ioc, "failure at %s:%d/%s()!\n",
+			__FILE__, __LINE__, __func__);
+		goto out_delete_port;
+	}
+
 	rphy->identify = mpt3sas_port->remote_identify;
 
 	if ((sas_rphy_add(rphy))) {
@@ -831,6 +837,7 @@ mpt3sas_transport_port_add(struct MPT3SAS_ADAPTER *ioc, u16 handle,
 			__FILE__, __LINE__, __func__);
 		sas_rphy_free(rphy);
 		rphy = NULL;
+		goto out_delete_port;
 	}
 
 	if (mpt3sas_port->remote_identify.device_type == SAS_END_DEVICE) {
@@ -857,7 +864,10 @@ mpt3sas_transport_port_add(struct MPT3SAS_ADAPTER *ioc, u16 handle,
 		    rphy_to_expander_device(rphy), hba_port->port_id);
 	return mpt3sas_port;
 
- out_fail:
+out_delete_port:
+	sas_port_delete(port);
+
+out_fail:
 	list_for_each_entry_safe(mpt3sas_phy, next, &mpt3sas_port->phy_list,
 	    port_siblings)
 		list_del(&mpt3sas_phy->port_siblings);

From f305a7b6ca21a665e8d0cf70b5936991a298c93c Mon Sep 17 00:00:00 2001
From: Tomas Henzl <thenzl@redhat.com>
Date: Fri, 3 Mar 2023 00:43:31 +0100
Subject: [PATCH 086/411] scsi: mpi3mr: Fix throttle_groups memory leak

Add a missing kfree().

Fixes: f10af057325c ("scsi: mpi3mr: Resource Based Metering")
Signed-off-by: Tomas Henzl <thenzl@redhat.com>
Link: https://lore.kernel.org/r/20230302234336.25456-2-thenzl@redhat.com
Acked-by: Sathya Prakash Veerichetty <sathya.prakash@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/mpi3mr/mpi3mr_fw.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c
index 29acf6111db30..514ccf29cd7fa 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_fw.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c
@@ -4389,6 +4389,9 @@ void mpi3mr_free_mem(struct mpi3mr_ioc *mrioc)
 		mrioc->pel_seqnum_virt = NULL;
 	}
 
+	kfree(mrioc->throttle_groups);
+	mrioc->throttle_groups = NULL;
+
 	kfree(mrioc->logdata_buf);
 	mrioc->logdata_buf = NULL;
 

From 7d2b02172b6a2ae6aecd7ef6480b9c4bf3dc59f4 Mon Sep 17 00:00:00 2001
From: Tomas Henzl <thenzl@redhat.com>
Date: Fri, 3 Mar 2023 00:43:32 +0100
Subject: [PATCH 087/411] scsi: mpi3mr: Fix config page DMA memory leak

A fix for:

DMA-API: pci 0000:83:00.0: device driver has pending DMA allocations while released from device [count=1]

Fixes: 32d457d5a2af ("scsi: mpi3mr: Add framework to issue config requests")
Signed-off-by: Tomas Henzl <thenzl@redhat.com>
Link: https://lore.kernel.org/r/20230302234336.25456-3-thenzl@redhat.com
Acked-by: Sathya Prakash Veerichetty <sathya.prakash@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/mpi3mr/mpi3mr_fw.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c
index 514ccf29cd7fa..0736e6a59e81c 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_fw.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c
@@ -4382,7 +4382,11 @@ void mpi3mr_free_mem(struct mpi3mr_ioc *mrioc)
 		    mrioc->admin_req_base, mrioc->admin_req_dma);
 		mrioc->admin_req_base = NULL;
 	}
-
+	if (mrioc->cfg_page) {
+		dma_free_coherent(&mrioc->pdev->dev, mrioc->cfg_page_sz,
+		    mrioc->cfg_page, mrioc->cfg_page_dma);
+		mrioc->cfg_page = NULL;
+	}
 	if (mrioc->pel_seqnum_virt) {
 		dma_free_coherent(&mrioc->pdev->dev, mrioc->pel_seqnum_sz,
 		    mrioc->pel_seqnum_virt, mrioc->pel_seqnum_dma);

From d0f3c3728da8af76dfe435f7f0cfa2b9d9e43ef0 Mon Sep 17 00:00:00 2001
From: Tomas Henzl <thenzl@redhat.com>
Date: Fri, 3 Mar 2023 00:43:33 +0100
Subject: [PATCH 088/411] scsi: mpi3mr: Fix mpi3mr_hba_port memory leak in
 mpi3mr_remove()

Free mpi3mr_hba_port at .remove.

Fixes: 42fc9fee116f ("scsi: mpi3mr: Add helper functions to manage device's port")
Signed-off-by: Tomas Henzl <thenzl@redhat.com>
Link: https://lore.kernel.org/r/20230302234336.25456-4-thenzl@redhat.com
Acked-by: Sathya Prakash Veerichetty <sathya.prakash@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/mpi3mr/mpi3mr_os.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c
index a794cc8a1c0b1..2ad03b993f05e 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_os.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_os.c
@@ -5078,6 +5078,7 @@ static void mpi3mr_remove(struct pci_dev *pdev)
 	struct workqueue_struct	*wq;
 	unsigned long flags;
 	struct mpi3mr_tgt_dev *tgtdev, *tgtdev_next;
+	struct mpi3mr_hba_port *port, *hba_port_next;
 
 	if (!shost)
 		return;
@@ -5117,6 +5118,16 @@ static void mpi3mr_remove(struct pci_dev *pdev)
 	mpi3mr_free_mem(mrioc);
 	mpi3mr_cleanup_resources(mrioc);
 
+	spin_lock_irqsave(&mrioc->sas_node_lock, flags);
+	list_for_each_entry_safe(port, hba_port_next, &mrioc->hba_port_table_list, list) {
+		ioc_info(mrioc,
+		    "removing hba_port entry: %p port: %d from hba_port list\n",
+		    port, port->port_id);
+		list_del(&port->list);
+		kfree(port);
+	}
+	spin_unlock_irqrestore(&mrioc->sas_node_lock, flags);
+
 	spin_lock(&mrioc_list_lock);
 	list_del(&mrioc->list);
 	spin_unlock(&mrioc_list_lock);

From d4caa1a4255cc44be56bcab3db2c97c632e6cc10 Mon Sep 17 00:00:00 2001
From: Tomas Henzl <thenzl@redhat.com>
Date: Fri, 3 Mar 2023 00:43:34 +0100
Subject: [PATCH 089/411] scsi: mpi3mr: Fix sas_hba.phy memory leak in
 mpi3mr_remove()

Free mrioc->sas_hba.phy at .remove.

Fixes: 42fc9fee116f ("scsi: mpi3mr: Add helper functions to manage device's port")
Signed-off-by: Tomas Henzl <thenzl@redhat.com>
Link: https://lore.kernel.org/r/20230302234336.25456-5-thenzl@redhat.com
Acked-by: Sathya Prakash Veerichetty <sathya.prakash@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/mpi3mr/mpi3mr_os.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c
index 2ad03b993f05e..2e546c80d98ce 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_os.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_os.c
@@ -5128,6 +5128,12 @@ static void mpi3mr_remove(struct pci_dev *pdev)
 	}
 	spin_unlock_irqrestore(&mrioc->sas_node_lock, flags);
 
+	if (mrioc->sas_hba.num_phys) {
+		kfree(mrioc->sas_hba.phy);
+		mrioc->sas_hba.phy = NULL;
+		mrioc->sas_hba.num_phys = 0;
+	}
+
 	spin_lock(&mrioc_list_lock);
 	list_del(&mrioc->list);
 	spin_unlock(&mrioc_list_lock);

From c798304470cab88723d895726d17fcb96472e0e9 Mon Sep 17 00:00:00 2001
From: Tomas Henzl <thenzl@redhat.com>
Date: Fri, 3 Mar 2023 00:43:35 +0100
Subject: [PATCH 090/411] scsi: mpi3mr: Fix memory leaks in mpi3mr_init_ioc()

Don't allocate memory again when IOC is being reinitialized.

Fixes: fe6db6151565 ("scsi: mpi3mr: Handle offline FW activation in graceful manner")
Signed-off-by: Tomas Henzl <thenzl@redhat.com>
Link: https://lore.kernel.org/r/20230302234336.25456-6-thenzl@redhat.com
Acked-by: Sathya Prakash Veerichetty <sathya.prakash@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/mpi3mr/mpi3mr_fw.c | 41 ++++++++++++++++++---------------
 1 file changed, 23 insertions(+), 18 deletions(-)

diff --git a/drivers/scsi/mpi3mr/mpi3mr_fw.c b/drivers/scsi/mpi3mr/mpi3mr_fw.c
index 0736e6a59e81c..a565817aa56d4 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_fw.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_fw.c
@@ -3837,29 +3837,34 @@ int mpi3mr_init_ioc(struct mpi3mr_ioc *mrioc)
 
 	mpi3mr_print_ioc_info(mrioc);
 
-	dprint_init(mrioc, "allocating config page buffers\n");
-	mrioc->cfg_page = dma_alloc_coherent(&mrioc->pdev->dev,
-	    MPI3MR_DEFAULT_CFG_PAGE_SZ, &mrioc->cfg_page_dma, GFP_KERNEL);
 	if (!mrioc->cfg_page) {
-		retval = -1;
-		goto out_failed_noretry;
+		dprint_init(mrioc, "allocating config page buffers\n");
+		mrioc->cfg_page_sz = MPI3MR_DEFAULT_CFG_PAGE_SZ;
+		mrioc->cfg_page = dma_alloc_coherent(&mrioc->pdev->dev,
+		    mrioc->cfg_page_sz, &mrioc->cfg_page_dma, GFP_KERNEL);
+		if (!mrioc->cfg_page) {
+			retval = -1;
+			goto out_failed_noretry;
+		}
 	}
 
-	mrioc->cfg_page_sz = MPI3MR_DEFAULT_CFG_PAGE_SZ;
-
-	retval = mpi3mr_alloc_reply_sense_bufs(mrioc);
-	if (retval) {
-		ioc_err(mrioc,
-		    "%s :Failed to allocated reply sense buffers %d\n",
-		    __func__, retval);
-		goto out_failed_noretry;
+	if (!mrioc->init_cmds.reply) {
+		retval = mpi3mr_alloc_reply_sense_bufs(mrioc);
+		if (retval) {
+			ioc_err(mrioc,
+			    "%s :Failed to allocated reply sense buffers %d\n",
+			    __func__, retval);
+			goto out_failed_noretry;
+		}
 	}
 
-	retval = mpi3mr_alloc_chain_bufs(mrioc);
-	if (retval) {
-		ioc_err(mrioc, "Failed to allocated chain buffers %d\n",
-		    retval);
-		goto out_failed_noretry;
+	if (!mrioc->chain_sgl_list) {
+		retval = mpi3mr_alloc_chain_bufs(mrioc);
+		if (retval) {
+			ioc_err(mrioc, "Failed to allocated chain buffers %d\n",
+			    retval);
+			goto out_failed_noretry;
+		}
 	}
 
 	retval = mpi3mr_issue_iocinit(mrioc);

From ce756daa36e1ba271bb3334267295e447aa57a5c Mon Sep 17 00:00:00 2001
From: Tomas Henzl <thenzl@redhat.com>
Date: Fri, 3 Mar 2023 00:43:36 +0100
Subject: [PATCH 091/411] scsi: mpi3mr: Fix expander node leak in
 mpi3mr_remove()

Add a missing resource clean up in .remove.

Fixes: e22bae30667a ("scsi: mpi3mr: Add expander devices to STL")
Signed-off-by: Tomas Henzl <thenzl@redhat.com>
Link: https://lore.kernel.org/r/20230302234336.25456-7-thenzl@redhat.com
Acked-by: Sathya Prakash Veerichetty <sathya.prakash@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/mpi3mr/mpi3mr.h           | 2 ++
 drivers/scsi/mpi3mr/mpi3mr_os.c        | 7 +++++++
 drivers/scsi/mpi3mr/mpi3mr_transport.c | 5 +----
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/mpi3mr/mpi3mr.h b/drivers/scsi/mpi3mr/mpi3mr.h
index 40f238fa80cc1..364fb1b5e45ac 100644
--- a/drivers/scsi/mpi3mr/mpi3mr.h
+++ b/drivers/scsi/mpi3mr/mpi3mr.h
@@ -1393,4 +1393,6 @@ void mpi3mr_flush_drv_cmds(struct mpi3mr_ioc *mrioc);
 void mpi3mr_flush_cmds_for_unrecovered_controller(struct mpi3mr_ioc *mrioc);
 void mpi3mr_free_enclosure_list(struct mpi3mr_ioc *mrioc);
 int mpi3mr_process_admin_reply_q(struct mpi3mr_ioc *mrioc);
+void mpi3mr_expander_node_remove(struct mpi3mr_ioc *mrioc,
+	struct mpi3mr_sas_node *sas_expander);
 #endif /*MPI3MR_H_INCLUDED*/
diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c
index 2e546c80d98ce..6d55698ea4d16 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_os.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_os.c
@@ -5079,6 +5079,7 @@ static void mpi3mr_remove(struct pci_dev *pdev)
 	unsigned long flags;
 	struct mpi3mr_tgt_dev *tgtdev, *tgtdev_next;
 	struct mpi3mr_hba_port *port, *hba_port_next;
+	struct mpi3mr_sas_node *sas_expander, *sas_expander_next;
 
 	if (!shost)
 		return;
@@ -5119,6 +5120,12 @@ static void mpi3mr_remove(struct pci_dev *pdev)
 	mpi3mr_cleanup_resources(mrioc);
 
 	spin_lock_irqsave(&mrioc->sas_node_lock, flags);
+	list_for_each_entry_safe_reverse(sas_expander, sas_expander_next,
+	    &mrioc->sas_expander_list, list) {
+		spin_unlock_irqrestore(&mrioc->sas_node_lock, flags);
+		mpi3mr_expander_node_remove(mrioc, sas_expander);
+		spin_lock_irqsave(&mrioc->sas_node_lock, flags);
+	}
 	list_for_each_entry_safe(port, hba_port_next, &mrioc->hba_port_table_list, list) {
 		ioc_info(mrioc,
 		    "removing hba_port entry: %p port: %d from hba_port list\n",
diff --git a/drivers/scsi/mpi3mr/mpi3mr_transport.c b/drivers/scsi/mpi3mr/mpi3mr_transport.c
index be25f242fa794..5748bd9369ff7 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_transport.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_transport.c
@@ -9,9 +9,6 @@
 
 #include "mpi3mr.h"
 
-static void mpi3mr_expander_node_remove(struct mpi3mr_ioc *mrioc,
-	struct mpi3mr_sas_node *sas_expander);
-
 /**
  * mpi3mr_post_transport_req - Issue transport requests and wait
  * @mrioc: Adapter instance reference
@@ -2164,7 +2161,7 @@ int mpi3mr_expander_add(struct mpi3mr_ioc *mrioc, u16 handle)
  *
  * Return nothing.
  */
-static void mpi3mr_expander_node_remove(struct mpi3mr_ioc *mrioc,
+void mpi3mr_expander_node_remove(struct mpi3mr_ioc *mrioc,
 	struct mpi3mr_sas_node *sas_expander)
 {
 	struct mpi3mr_sas_port *mr_sas_port, *next;

From bbdf904b13a62bb8b1272d92a7dde082dff86fbb Mon Sep 17 00:00:00 2001
From: Bard Liao <yung-chuan.liao@linux.intel.com>
Date: Mon, 6 Mar 2023 15:41:01 +0800
Subject: [PATCH 092/411] ALSA: hda: intel-dsp-config: add MTL PCI id

Use SOF as default audio driver.

Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Reviewed-by: Gongjun Song <gongjun.song@intel.com>
Reviewed-by: Kai Vehmanen <kai.vehmanen@linux.intel.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20230306074101.3906707-1-yung-chuan.liao@linux.intel.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/hda/intel-dsp-config.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c
index ae31bb1275940..317bdf6dcbef4 100644
--- a/sound/hda/intel-dsp-config.c
+++ b/sound/hda/intel-dsp-config.c
@@ -472,6 +472,15 @@ static const struct config_entry config_table[] = {
 	},
 #endif
 
+/* Meteor Lake */
+#if IS_ENABLED(CONFIG_SND_SOC_SOF_METEORLAKE)
+	/* Meteorlake-P */
+	{
+		.flags = FLAG_SOF | FLAG_SOF_ONLY_IF_DMIC_OR_SOUNDWIRE,
+		.device = 0x7e28,
+	},
+#endif
+
 };
 
 static const struct config_entry *snd_intel_dsp_find_config

From 7bb62340951a9af20235a3bde8c98e2e292915df Mon Sep 17 00:00:00 2001
From: Jeremy Szu <jeremy.szu@canonical.com>
Date: Tue, 7 Mar 2023 21:53:16 +0800
Subject: [PATCH 093/411] ALSA: hda/realtek: fix speaker, mute/micmute LEDs not
 work on a HP platform

There is a HP platform needs ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED quirk to
make mic-mute/audio-mute/speaker working.

Signed-off-by: Jeremy Szu <jeremy.szu@canonical.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20230307135317.37621-1-jeremy.szu@canonical.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 3c629f4ae0807..5d530b489c48c 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -9447,6 +9447,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x103c, 0x8b8a, "HP", ALC236_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8b8b, "HP", ALC236_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8b8d, "HP", ALC236_FIXUP_HP_GPIO_LED),
+	SND_PCI_QUIRK(0x103c, 0x8b8f, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8b92, "HP", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x103c, 0x8bf0, "HP", ALC236_FIXUP_HP_GPIO_LED),
 	SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),

From a86e79e3015f5dd8e1b01ccfa49bd5c6e41047a1 Mon Sep 17 00:00:00 2001
From: "Hamidreza H. Fard" <nitocris@posteo.net>
Date: Tue, 7 Mar 2023 16:37:41 +0000
Subject: [PATCH 094/411] ALSA: hda/realtek: Fix the speaker output on Samsung
 Galaxy Book2 Pro

Samsung Galaxy Book2 Pro (13" 2022 NP930XED-KA1DE) with codec SSID
144d:c868 requires the same workaround for enabling the speaker amp
like other Samsung models with ALC298 code.

Signed-off-by: Hamidreza H. Fard <nitocris@posteo.net>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20230307163741.3878-1-nitocris@posteo.net
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_realtek.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 5d530b489c48c..f09a1d7c1b186 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -9540,6 +9540,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
 	SND_PCI_QUIRK(0x144d, 0xc830, "Samsung Galaxy Book Ion (NT950XCJ-X716A)", ALC298_FIXUP_SAMSUNG_AMP),
 	SND_PCI_QUIRK(0x144d, 0xc832, "Samsung Galaxy Book Flex Alpha (NP730QCJ)", ALC256_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET),
 	SND_PCI_QUIRK(0x144d, 0xca03, "Samsung Galaxy Book2 Pro 360 (NP930QED)", ALC298_FIXUP_SAMSUNG_AMP),
+	SND_PCI_QUIRK(0x144d, 0xc868, "Samsung Galaxy Book2 Pro (NP930XED)", ALC298_FIXUP_SAMSUNG_AMP),
 	SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC),
 	SND_PCI_QUIRK(0x1462, 0xb171, "Cubi N 8GL (MS-B171)", ALC283_FIXUP_HEADSET_MIC),

From ff447886e675979d66b2bc01810035d3baea1b3a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Tue, 7 Mar 2023 15:40:54 -0600
Subject: [PATCH 095/411] ALSA: hda: Match only Intel devices with
 CONTROLLER_IN_GPU()

CONTROLLER_IN_GPU() is clearly intended to match only Intel devices, but
previously it checked only the PCI Device ID, not the Vendor ID, so it
could match devices from other vendors that happened to use the same Device
ID.

Update CONTROLLER_IN_GPU() so it matches only Intel devices.

Fixes: 535115b5ff51 ("ALSA: hda - Abort the probe without i915 binding for HSW/B")
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://lore.kernel.org/r/20230307214054.886721-1-helgaas@kernel.org
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/hda_intel.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 81c4a45254ff2..77a592f219472 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c
@@ -328,14 +328,15 @@ enum {
 #define needs_eld_notify_link(chip)	false
 #endif
 
-#define CONTROLLER_IN_GPU(pci) (((pci)->device == 0x0a0c) || \
+#define CONTROLLER_IN_GPU(pci) (((pci)->vendor == 0x8086) &&         \
+				       (((pci)->device == 0x0a0c) || \
 					((pci)->device == 0x0c0c) || \
 					((pci)->device == 0x0d0c) || \
 					((pci)->device == 0x160c) || \
 					((pci)->device == 0x490d) || \
 					((pci)->device == 0x4f90) || \
 					((pci)->device == 0x4f91) || \
-					((pci)->device == 0x4f92))
+					((pci)->device == 0x4f92)))
 
 #define IS_BXT(pci) ((pci)->vendor == 0x8086 && (pci)->device == 0x5a98)
 

From 068d82e75d537b444303b8c449a11e51ea659565 Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Tue, 7 Mar 2023 23:22:56 +0000
Subject: [PATCH 096/411] netfilter: nft_nat: correct length for loading
 protocol registers

The values in the protocol registers are two bytes wide.  However, when
parsing the register loads, the code currently uses the larger 16-byte
size of a `union nf_inet_addr`.  Change it to use the (correct) size of
a `union nf_conntrack_man_proto` instead.

Fixes: d07db9884a5f ("netfilter: nf_tables: introduce nft_validate_register_load()")
Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_nat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 0479991503900..5c29915ab0289 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -226,7 +226,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 		priv->flags |= NF_NAT_RANGE_MAP_IPS;
 	}
 
-	plen = sizeof_field(struct nf_nat_range, min_addr.all);
+	plen = sizeof_field(struct nf_nat_range, min_proto.all);
 	if (tb[NFTA_NAT_REG_PROTO_MIN]) {
 		err = nft_parse_register_load(tb[NFTA_NAT_REG_PROTO_MIN],
 					      &priv->sreg_proto_min, plen);

From ec2c5917eb858428b2083d1c74f445aabbe8316b Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Tue, 7 Mar 2023 23:22:57 +0000
Subject: [PATCH 097/411] netfilter: nft_masq: correct length for loading
 protocol registers

The values in the protocol registers are two bytes wide.  However, when
parsing the register loads, the code currently uses the larger 16-byte
size of a `union nf_inet_addr`.  Change it to use the (correct) size of
a `union nf_conntrack_man_proto` instead.

Fixes: 8a6bf5da1aef ("netfilter: nft_masq: support port range")
Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_masq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index e55e455275c48..9544c2f16998b 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -43,7 +43,7 @@ static int nft_masq_init(const struct nft_ctx *ctx,
 			 const struct nft_expr *expr,
 			 const struct nlattr * const tb[])
 {
-	u32 plen = sizeof_field(struct nf_nat_range, min_addr.all);
+	u32 plen = sizeof_field(struct nf_nat_range, min_proto.all);
 	struct nft_masq *priv = nft_expr_priv(expr);
 	int err;
 

From 1f617b6b4c7a3d5ea7a56abb83a4c27733b60c2f Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Tue, 7 Mar 2023 23:22:58 +0000
Subject: [PATCH 098/411] netfilter: nft_redir: correct length for loading
 protocol registers

The values in the protocol registers are two bytes wide.  However, when
parsing the register loads, the code currently uses the larger 16-byte
size of a `union nf_inet_addr`.  Change it to use the (correct) size of
a `union nf_conntrack_man_proto` instead.

Fixes: d07db9884a5f ("netfilter: nf_tables: introduce nft_validate_register_load()")
Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_redir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index 5f77399875593..dbc642f5d32a3 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -48,7 +48,7 @@ static int nft_redir_init(const struct nft_ctx *ctx,
 	unsigned int plen;
 	int err;
 
-	plen = sizeof_field(struct nf_nat_range, min_addr.all);
+	plen = sizeof_field(struct nf_nat_range, min_proto.all);
 	if (tb[NFTA_REDIR_REG_PROTO_MIN]) {
 		err = nft_parse_register_load(tb[NFTA_REDIR_REG_PROTO_MIN],
 					      &priv->sreg_proto_min, plen);

From 493924519b1fe3faab13ee621a43b0d0939abab1 Mon Sep 17 00:00:00 2001
From: Jeremy Sowden <jeremy@azazel.net>
Date: Tue, 7 Mar 2023 23:22:59 +0000
Subject: [PATCH 099/411] netfilter: nft_redir: correct value of inet type
 `.maxattrs`

`nft_redir_inet_type.maxattrs` was being set, presumably because of a
cut-and-paste error, to `NFTA_MASQ_MAX`, instead of `NFTA_REDIR_MAX`.

Fixes: 63ce3940f3ab ("netfilter: nft_redir: add inet support")
Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
Reviewed-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_redir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index dbc642f5d32a3..67cec56bc84a3 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -236,7 +236,7 @@ static struct nft_expr_type nft_redir_inet_type __read_mostly = {
 	.name		= "redir",
 	.ops		= &nft_redir_inet_ops,
 	.policy		= nft_redir_policy,
-	.maxattr	= NFTA_MASQ_MAX,
+	.maxattr	= NFTA_REDIR_MAX,
 	.owner		= THIS_MODULE,
 };
 

From af0f46e5b9a462aaa1d76e82781a5316f03828eb Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Tue, 7 Mar 2023 07:51:11 -0800
Subject: [PATCH 100/411] ASoC: da7219: Initialize jack_det_mutex

The following traceback is reported if mutex debugging is enabled.

DEBUG_LOCKS_WARN_ON(lock->magic != lock)
WARNING: CPU: 0 PID: 17 at kernel/locking/mutex.c:950 __mutex_lock_common+0x31c/0x11d4
Modules linked in:
CPU: 0 PID: 17 Comm: kworker/0:1 Not tainted 5.10.172-lockdep-21846-g849884cfca5a #1 fd2de466502012eb58bc8beb467f07d0b925611f
Hardware name: MediaTek kakadu rev0/rev1 board (DT)
Workqueue: events da7219_aad_jack_det_work
pstate: 60400005 (nZCv daif +PAN -UAO -TCO BTYPE=--)
pc : __mutex_lock_common+0x31c/0x11d4
lr : __mutex_lock_common+0x31c/0x11d4
sp : ffffff80c0317ae0
x29: ffffff80c0317b50 x28: ffffff80c0317b20
x27: 0000000000000000 x26: 0000000000000000
x25: 0000000000000000 x24: 0000000100000000
x23: ffffffd0121d296c x22: dfffffd000000000
x21: 0000000000000000 x20: 0000000000000000
x19: ffffff80c73d7190 x18: 1ffffff018050f52
x17: 0000000000000000 x16: 0000000000000000
x15: 0000000000000000 x14: 0000000000000000
x13: 0000000000000001 x12: 0000000000000001
x11: 0000000000000000 x10: 0000000000000000
x9 : 83f0d991da544b00 x8 : 83f0d991da544b00
x7 : 0000000000000000 x6 : 0000000000000001
x5 : ffffff80c03176a0 x4 : 0000000000000000
x3 : ffffffd01067fd78 x2 : 0000000100000000
x1 : ffffff80c030ba80 x0 : 0000000000000028
Call trace:
__mutex_lock_common+0x31c/0x11d4
mutex_lock_nested+0x98/0xac
da7219_aad_jack_det_work+0x54/0xf0
process_one_work+0x6cc/0x19dc
worker_thread+0x458/0xddc
kthread+0x2fc/0x370
ret_from_fork+0x10/0x30
irq event stamp: 579
hardirqs last enabled at (579): [<ffffffd012442b30>] exit_to_kernel_mode+0x108/0x138
hardirqs last disabled at (577): [<ffffffd010001144>] __do_softirq+0x53c/0x125c
softirqs last enabled at (578): [<ffffffd01009995c>] __irq_exit_rcu+0x264/0x4f4
softirqs last disabled at (573): [<ffffffd01009995c>] __irq_exit_rcu+0x264/0x4f4
---[ end trace 26da674636181c40 ]---

Initialize the mutex to fix the problem.

Cc: David Rau <David.Rau.opensource@dm.renesas.com>
Fixes: 7fde88eda855 ("ASoC: da7219: Improve the IRQ process to increase the stability")
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20230307155111.1985522-1-linux@roeck-us.net
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/da7219-aad.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sound/soc/codecs/da7219-aad.c b/sound/soc/codecs/da7219-aad.c
index 4a4f09f924bc5..e3d398b8f54e4 100644
--- a/sound/soc/codecs/da7219-aad.c
+++ b/sound/soc/codecs/da7219-aad.c
@@ -968,6 +968,8 @@ int da7219_aad_init(struct snd_soc_component *component)
 	INIT_WORK(&da7219_aad->hptest_work, da7219_aad_hptest_work);
 	INIT_WORK(&da7219_aad->jack_det_work, da7219_aad_jack_det_work);
 
+	mutex_init(&da7219_aad->jack_det_mutex);
+
 	ret = request_threaded_irq(da7219_aad->irq, da7219_aad_pre_irq_thread,
 				   da7219_aad_irq_thread,
 				   IRQF_TRIGGER_LOW | IRQF_ONESHOT,

From e041a2a550582106cba6a7c862c90dfc2ad14492 Mon Sep 17 00:00:00 2001
From: Emil Abildgaard Svendsen <EMAS@bang-olufsen.dk>
Date: Thu, 9 Mar 2023 06:54:41 +0000
Subject: [PATCH 101/411] ASoC: hdmi-codec: only startup/shutdown on supported
 streams

Currently only one stream is supported. This isn't usally a problem
until you have a multi codec audio card. Because the audio card will run
startup and shutdown on both capture and playback streams. So if your
hdmi-codec only support either playback or capture. Then ALSA can't open
for playback and capture.

This patch will ignore if startup and shutdown are called with a non
supported stream. Thus, allowing an audio card like this:

           +-+
 cpu1 <--@-| |-> codec1 (HDMI-CODEC)
           | |<- codec2 (NOT HDMI-CODEC)
           +-+

Signed-off-by: Emil Svendsen <emas@bang-olufsen.dk>
Link: https://lore.kernel.org/r/20230309065432.4150700-2-emas@bang-olufsen.dk
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 sound/soc/codecs/hdmi-codec.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/sound/soc/codecs/hdmi-codec.c b/sound/soc/codecs/hdmi-codec.c
index 01e8ffda2a4bf..6d980fbc42077 100644
--- a/sound/soc/codecs/hdmi-codec.c
+++ b/sound/soc/codecs/hdmi-codec.c
@@ -428,8 +428,13 @@ static int hdmi_codec_startup(struct snd_pcm_substream *substream,
 {
 	struct hdmi_codec_priv *hcp = snd_soc_dai_get_drvdata(dai);
 	bool tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK;
+	bool has_capture = !hcp->hcd.no_i2s_capture;
+	bool has_playback = !hcp->hcd.no_i2s_playback;
 	int ret = 0;
 
+	if (!((has_playback && tx) || (has_capture && !tx)))
+		return 0;
+
 	mutex_lock(&hcp->lock);
 	if (hcp->busy) {
 		dev_err(dai->dev, "Only one simultaneous stream supported!\n");
@@ -468,6 +473,12 @@ static void hdmi_codec_shutdown(struct snd_pcm_substream *substream,
 				struct snd_soc_dai *dai)
 {
 	struct hdmi_codec_priv *hcp = snd_soc_dai_get_drvdata(dai);
+	bool tx = substream->stream == SNDRV_PCM_STREAM_PLAYBACK;
+	bool has_capture = !hcp->hcd.no_i2s_capture;
+	bool has_playback = !hcp->hcd.no_i2s_playback;
+
+	if (!((has_playback && tx) || (has_capture && !tx)))
+		return;
 
 	hcp->chmap_idx = HDMI_CODEC_CHMAP_IDX_UNKNOWN;
 	hcp->hcd.ops->audio_shutdown(dai->dev->parent, hcp->hcd.data);

From 11440da77d6020831ee6f9ce4551b545dea789ee Mon Sep 17 00:00:00 2001
From: Francesco Dolcini <francesco.dolcini@toradex.com>
Date: Mon, 6 Mar 2023 17:27:51 +0100
Subject: [PATCH 102/411] mmc: sdhci_am654: lower power-on failed message
 severity

Lower the power-on failed message severity from warn to info when the
controller does not power-up. It's normal to have this situation when
the SD card slot is empty, therefore we should not warn the user about
it.

Fixes: 7ca0f166f5b2 ("mmc: sdhci_am654: Add workaround for card detect debounce timer")
Signed-off-by: Francesco Dolcini <francesco.dolcini@toradex.com>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20230306162751.163369-1-francesco@dolcini.it
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci_am654.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/sdhci_am654.c b/drivers/mmc/host/sdhci_am654.c
index 7ef828942df35..89953093e20c7 100644
--- a/drivers/mmc/host/sdhci_am654.c
+++ b/drivers/mmc/host/sdhci_am654.c
@@ -369,7 +369,7 @@ static void sdhci_am654_write_b(struct sdhci_host *host, u8 val, int reg)
 					MAX_POWER_ON_TIMEOUT, false, host, val,
 					reg);
 		if (ret)
-			dev_warn(mmc_dev(host->mmc), "Power on failed\n");
+			dev_info(mmc_dev(host->mmc), "Power on failed\n");
 	}
 }
 

From 92771cdd90de64b15e65f3c88d6c6199bd5f33f5 Mon Sep 17 00:00:00 2001
From: William Qiu <william.qiu@starfivetech.com>
Date: Tue, 7 Mar 2023 10:46:46 +0800
Subject: [PATCH 103/411] mmc: dw_mmc-starfive: Fix initialization of prev_err

Fix a bug by making sure prev_err doesn't get used when being
uninitialized.

Signed-off-by: William Qiu <william.qiu@starfivetech.com>
Reported-by: Dan Carpenter <error27@gmail.com>
Reviewed-by: Emil Renner Berthing <emil.renner.berthing@canonical.com>
Fixes: 9e622229bbf4 ("mmc: starfive: Add sdio/emmc driver support")
Link: https://lore.kernel.org/r/20230307024646.10216-3-william.qiu@starfivetech.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/dw_mmc-starfive.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mmc/host/dw_mmc-starfive.c b/drivers/mmc/host/dw_mmc-starfive.c
index 40f5969b07a66..dab1508bf83c6 100644
--- a/drivers/mmc/host/dw_mmc-starfive.c
+++ b/drivers/mmc/host/dw_mmc-starfive.c
@@ -51,7 +51,7 @@ static int dw_mci_starfive_execute_tuning(struct dw_mci_slot *slot,
 	struct dw_mci *host = slot->host;
 	struct starfive_priv *priv = host->priv;
 	int rise_point = -1, fall_point = -1;
-	int err, prev_err;
+	int err, prev_err = 0;
 	int i;
 	bool found = 0;
 	u32 regval;

From 2d638be71155b2e036aca1966b6129e2d661e91f Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Sun, 26 Feb 2023 12:38:46 -0500
Subject: [PATCH 104/411] Revert "tty: serial: fsl_lpuart: adjust
 SERIAL_FSL_LPUART_CONSOLE config dependency"

This reverts commit 5779a072c248db7a40cfd0f5ea958097fd1d9a30.

This results in a link error of

ld: drivers/tty/serial/earlycon.o: in function `parse_options':
drivers/tty/serial/earlycon.c:99: undefined reference to `uart_parse_earlycon'

When the config is in this state

CONFIG_SERIAL_CORE=m
CONFIG_SERIAL_CORE_CONSOLE=y
CONFIG_SERIAL_EARLYCON=y
CONFIG_SERIAL_FSL_LPUART=m
CONFIG_SERIAL_FSL_LPUART_CONSOLE=y

Fixes: 5779a072c248 ("tty: serial: fsl_lpuart: adjust SERIAL_FSL_LPUART_CONSOLE config dependency")
Cc: stable <stable@kernel.org>
Signed-off-by: Tom Rix <trix@redhat.com>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Jiri Slaby <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20230226173846.236691-1-trix@redhat.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 625358f444197..0072892ca7fc9 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -1313,7 +1313,7 @@ config SERIAL_FSL_LPUART
 
 config SERIAL_FSL_LPUART_CONSOLE
 	bool "Console on Freescale lpuart serial port"
-	depends on SERIAL_FSL_LPUART
+	depends on SERIAL_FSL_LPUART=y
 	select SERIAL_CORE_CONSOLE
 	select SERIAL_EARLYCON
 	help

From 2411fd94ceaa6e11326e95d6ebf876cbfed28d23 Mon Sep 17 00:00:00 2001
From: Sherry Sun <sherry.sun@nxp.com>
Date: Thu, 23 Feb 2023 17:39:41 +0800
Subject: [PATCH 105/411] tty: serial: fsl_lpuart: skip waiting for
 transmission complete when UARTCTRL_SBK is asserted

According to LPUART RM, Transmission Complete Flag becomes 0 if queuing
a break character by writing 1 to CTRL[SBK], so here need to skip
waiting for transmission complete when UARTCTRL_SBK is asserted,
otherwise the kernel may stuck here.
And actually set_termios() adds transmission completion waiting to avoid
data loss or data breakage when changing the baud rate, but we don't
need to worry about this when queuing break characters.

Signed-off-by: Sherry Sun <sherry.sun@nxp.com>
Cc: stable <stable@kernel.org>
Link: https://lore.kernel.org/r/20230223093941.31790-1-sherry.sun@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/fsl_lpuart.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index e945f41b93d43..f9e164abf9200 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -2240,9 +2240,15 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios,
 	/* update the per-port timeout */
 	uart_update_timeout(port, termios->c_cflag, baud);
 
-	/* wait transmit engin complete */
-	lpuart32_write(&sport->port, 0, UARTMODIR);
-	lpuart32_wait_bit_set(&sport->port, UARTSTAT, UARTSTAT_TC);
+	/*
+	 * LPUART Transmission Complete Flag may never be set while queuing a break
+	 * character, so skip waiting for transmission complete when UARTCTRL_SBK is
+	 * asserted.
+	 */
+	if (!(old_ctrl & UARTCTRL_SBK)) {
+		lpuart32_write(&sport->port, 0, UARTMODIR);
+		lpuart32_wait_bit_set(&sport->port, UARTSTAT, UARTSTAT_TC);
+	}
 
 	/* disable transmit and receive */
 	lpuart32_write(&sport->port, old_ctrl & ~(UARTCTRL_TE | UARTCTRL_RE),

From f8086d1a65ac693e3fd863128352b4b11ee7324d Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 25 Feb 2023 21:39:53 -0800
Subject: [PATCH 106/411] serial: 8250: ASPEED_VUART: select REGMAP instead of
 depending on it

REGMAP is a hidden (not user visible) symbol. Users cannot set it
directly thru "make *config", so drivers should select it instead of
depending on it if they need it.

Consistently using "select" or "depends on" can also help reduce
Kconfig circular dependency issues.

Therefore, change the use of "depends on REGMAP" to "select REGMAP".

Fixes: 8d310c9107a2 ("drivers/tty/serial/8250: Make Aspeed VUART SIRQ polarity configurable")
Cc: stable <stable@kernel.org>
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Oskar Senft <osk@google.com>
Cc: linux-serial@vger.kernel.org
Link: https://lore.kernel.org/r/20230226053953.4681-9-rdunlap@infradead.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig
index 978dc196c29be..caeff76e58f29 100644
--- a/drivers/tty/serial/8250/Kconfig
+++ b/drivers/tty/serial/8250/Kconfig
@@ -257,8 +257,9 @@ config SERIAL_8250_ASPEED_VUART
 	tristate "Aspeed Virtual UART"
 	depends on SERIAL_8250
 	depends on OF
-	depends on REGMAP && MFD_SYSCON
+	depends on MFD_SYSCON
 	depends on ARCH_ASPEED || COMPILE_TEST
+	select REGMAP
 	help
 	  If you want to use the virtual UART (VUART) device on Aspeed
 	  BMC platforms, enable this option. This enables the 16550A-

From 32e293be736b853f168cd065d9cbc1b0c69f545d Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das.jz@bp.renesas.com>
Date: Mon, 27 Feb 2023 11:41:46 +0000
Subject: [PATCH 107/411] serial: 8250_em: Fix UART port type

As per HW manual for  EMEV2 "R19UH0040EJ0400 Rev.4.00", the UART
IP found on EMMA mobile SoC is Register-compatible with the
general-purpose 16750 UART chip. Fix UART port type as 16750 and
enable 64-bytes fifo support.

Fixes: 22886ee96895 ("serial8250-em: Emma Mobile UART driver V2")
Cc: stable@vger.kernel.org
Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
Link: https://lore.kernel.org/r/20230227114152.22265-2-biju.das.jz@bp.renesas.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_em.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_em.c b/drivers/tty/serial/8250/8250_em.c
index f8e99995eee91..d94c3811a8f7a 100644
--- a/drivers/tty/serial/8250/8250_em.c
+++ b/drivers/tty/serial/8250/8250_em.c
@@ -106,8 +106,8 @@ static int serial8250_em_probe(struct platform_device *pdev)
 	memset(&up, 0, sizeof(up));
 	up.port.mapbase = regs->start;
 	up.port.irq = irq;
-	up.port.type = PORT_UNKNOWN;
-	up.port.flags = UPF_BOOT_AUTOCONF | UPF_FIXED_PORT | UPF_IOREMAP;
+	up.port.type = PORT_16750;
+	up.port.flags = UPF_FIXED_PORT | UPF_IOREMAP | UPF_FIXED_TYPE;
 	up.port.dev = &pdev->dev;
 	up.port.private_data = priv;
 

From 6e01f9a594ee0f69fb52cc8d11971612b4817f0b Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 27 Feb 2023 09:50:46 +0100
Subject: [PATCH 108/411] serial: 8250_fsl: fix handle_irq locking

The 8250 handle_irq callback is not just called from the interrupt
handler but also from a timer callback when polling (e.g. for ports
without an interrupt line). Consequently the callback must explicitly
disable interrupts to avoid a potential deadlock with another interrupt
in polled mode.

Fix up the two paths in the freescale callback that failed to re-enable
interrupts when polling.

Fixes: 853a9ae29e97 ("serial: 8250: fix handle_irq locking")
Cc: stable@vger.kernel.org	# 5.13
Reported-by: Dan Carpenter <error27@gmail.com>
Link: https://lore.kernel.org/r/Y/xYzqp4ogmOF5t0@kili
Signed-off-by: Johan Hovold <johan@kernel.org>
Acked-by: Jiri Slaby <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20230227085046.24282-1-johan@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_fsl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_fsl.c b/drivers/tty/serial/8250/8250_fsl.c
index 8aad15622a2e5..8adfaa183f778 100644
--- a/drivers/tty/serial/8250/8250_fsl.c
+++ b/drivers/tty/serial/8250/8250_fsl.c
@@ -34,7 +34,7 @@ int fsl8250_handle_irq(struct uart_port *port)
 
 	iir = port->serial_in(port, UART_IIR);
 	if (iir & UART_IIR_NO_INT) {
-		spin_unlock(&up->port.lock);
+		spin_unlock_irqrestore(&up->port.lock, flags);
 		return 0;
 	}
 
@@ -42,7 +42,7 @@ int fsl8250_handle_irq(struct uart_port *port)
 	if (unlikely(up->lsr_saved_flags & UART_LSR_BI)) {
 		up->lsr_saved_flags &= ~UART_LSR_BI;
 		port->serial_in(port, UART_RX);
-		spin_unlock(&up->port.lock);
+		spin_unlock_irqrestore(&up->port.lock, flags);
 		return 1;
 	}
 

From 5d943b5d69c032de7ce9cd625ac083a5c277b9c5 Mon Sep 17 00:00:00 2001
From: Kumaravel Thiagarajan <kumaravel.thiagarajan@microchip.com>
Date: Sun, 5 Mar 2023 20:21:24 +0530
Subject: [PATCH 109/411] serial: 8250_pci1xxxx: Disable SERIAL_8250_PCI1XXXX
 config by default

Commit 32bb477fa7bf ("serial: 8250_pci1xxxx: Add driver for quad-uart
support") made the SERIAL_8250_PCI1XXXX driver enabled when SERIAL_8250
is enabled, disable it as this driver does not need to be enabled by
default

Fixes: 32bb477fa7bf ("serial: 8250_pci1xxxx: Add driver for quad-uart support")
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/lkml/CAHk-=whhFCeeuo6vTEmNSx6S-KKkugxgzN_W5Z6v-9yH9gc3Zw@mail.gmail.com/
Signed-off-by: Kumaravel Thiagarajan <kumaravel.thiagarajan@microchip.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20230305145124.13444-1-kumaravel.thiagarajan@microchip.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig
index caeff76e58f29..5313aa31930f4 100644
--- a/drivers/tty/serial/8250/Kconfig
+++ b/drivers/tty/serial/8250/Kconfig
@@ -300,7 +300,6 @@ config SERIAL_8250_PCI1XXXX
 	tristate "Microchip 8250 based serial port"
 	depends on SERIAL_8250 && PCI
 	select SERIAL_8250_PCILIB
-	default SERIAL_8250
 	help
 	 Select this option if you have a setup with Microchip PCIe
 	 Switch with serial port enabled and wish to enable 8250

From 1be6f2b15f902c02e055ae0b419ca789200473c9 Mon Sep 17 00:00:00 2001
From: Alexander Sverdlin <alexander.sverdlin@siemens.com>
Date: Thu, 9 Mar 2023 14:43:02 +0100
Subject: [PATCH 110/411] tty: serial: fsl_lpuart: fix race on RX DMA shutdown

From time to time DMA completion can come in the middle of DMA shutdown:

<process ctx>:				<IRQ>:
lpuart32_shutdown()
  lpuart_dma_shutdown()
    del_timer_sync()
					lpuart_dma_rx_complete()
					  lpuart_copy_rx_to_tty()
					    mod_timer()
    lpuart_dma_rx_free()

When the timer fires a bit later, sport->dma_rx_desc is NULL:

Unable to handle kernel NULL pointer dereference at virtual address 0000000000000004
pc : lpuart_copy_rx_to_tty+0xcc/0x5bc
lr : lpuart_timer_func+0x1c/0x2c
Call trace:
 lpuart_copy_rx_to_tty
 lpuart_timer_func
 call_timer_fn
 __run_timers.part.0
 run_timer_softirq
 __do_softirq
 __irq_exit_rcu
 irq_exit
 handle_domain_irq
 gic_handle_irq
 call_on_irq_stack
 do_interrupt_handler
 ...

To fix this fold del_timer_sync() into lpuart_dma_rx_free() after
dmaengine_terminate_sync() to make sure timer will not be re-started in
lpuart_copy_rx_to_tty() <= lpuart_dma_rx_complete().

Fixes: 4a8588a1cf86 ("serial: fsl_lpuart: delete timer on shutdown")
Cc: stable <stable@kernel.org>
Signed-off-by: Alexander Sverdlin <alexander.sverdlin@siemens.com>
Link: https://lore.kernel.org/r/20230309134302.74940-2-alexander.sverdlin@siemens.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/fsl_lpuart.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index f9e164abf9200..56e6ba3250cd1 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -1354,6 +1354,7 @@ static void lpuart_dma_rx_free(struct uart_port *port)
 	struct dma_chan *chan = sport->dma_rx_chan;
 
 	dmaengine_terminate_sync(chan);
+	del_timer_sync(&sport->lpuart_timer);
 	dma_unmap_sg(chan->device->dev, &sport->rx_sgl, 1, DMA_FROM_DEVICE);
 	kfree(sport->rx_ring.buf);
 	sport->rx_ring.tail = 0;
@@ -1813,7 +1814,6 @@ static int lpuart32_startup(struct uart_port *port)
 static void lpuart_dma_shutdown(struct lpuart_port *sport)
 {
 	if (sport->lpuart_dma_rx_use) {
-		del_timer_sync(&sport->lpuart_timer);
 		lpuart_dma_rx_free(&sport->port);
 		sport->lpuart_dma_rx_use = false;
 	}
@@ -1973,10 +1973,8 @@ lpuart_set_termios(struct uart_port *port, struct ktermios *termios,
 	 * Since timer function acqures sport->port.lock, need to stop before
 	 * acquring same lock because otherwise del_timer_sync() can deadlock.
 	 */
-	if (old && sport->lpuart_dma_rx_use) {
-		del_timer_sync(&sport->lpuart_timer);
+	if (old && sport->lpuart_dma_rx_use)
 		lpuart_dma_rx_free(&sport->port);
-	}
 
 	spin_lock_irqsave(&sport->port.lock, flags);
 
@@ -2210,10 +2208,8 @@ lpuart32_set_termios(struct uart_port *port, struct ktermios *termios,
 	 * Since timer function acqures sport->port.lock, need to stop before
 	 * acquring same lock because otherwise del_timer_sync() can deadlock.
 	 */
-	if (old && sport->lpuart_dma_rx_use) {
-		del_timer_sync(&sport->lpuart_timer);
+	if (old && sport->lpuart_dma_rx_use)
 		lpuart_dma_rx_free(&sport->port);
-	}
 
 	spin_lock_irqsave(&sport->port.lock, flags);
 
@@ -3020,7 +3016,6 @@ static int lpuart_suspend(struct device *dev)
 			 * cannot resume as expected, hence gracefully release the
 			 * Rx DMA path before suspend and start Rx DMA path on resume.
 			 */
-			del_timer_sync(&sport->lpuart_timer);
 			lpuart_dma_rx_free(&sport->port);
 
 			/* Disable Rx DMA to use UART port as wakeup source */

From 1a5ecc73b2bfeffe036212d4a6bfacee053ab0a1 Mon Sep 17 00:00:00 2001
From: Saravana Kannan <saravanak@google.com>
Date: Wed, 1 Mar 2023 18:35:09 -0800
Subject: [PATCH 111/411] serdev: Set fwnode for serdev devices

This allow fw_devlink to do dependency tracking for serdev devices.

Reported-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/lkml/03b70a8a-0591-f28b-a567-9d2f736f17e5@gmail.com/
Cc: Stefan Wahren <stefan.wahren@i2se.com>
Signed-off-by: Saravana Kannan <saravanak@google.com>
Tested-by: Stefan Wahren <stefan.wahren@i2se.com>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/20230302023509.319903-1-saravanak@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serdev/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c
index aa80de3a81947..678014253b7b2 100644
--- a/drivers/tty/serdev/core.c
+++ b/drivers/tty/serdev/core.c
@@ -534,7 +534,7 @@ static int of_serdev_register_devices(struct serdev_controller *ctrl)
 		if (!serdev)
 			continue;
 
-		serdev->dev.of_node = node;
+		device_set_node(&serdev->dev, of_fwnode_handle(node));
 
 		err = serdev_device_add(serdev);
 		if (err) {

From 9aff74cc4e9eb841dde5fd009ed7ddca5db40e68 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Tue, 7 Mar 2023 17:44:02 +0100
Subject: [PATCH 112/411] serial: qcom-geni: fix console shutdown hang

A recent commit added back the calls top stop tx and rx to shutdown()
which had previously been removed by commit e83766334f96 ("tty: serial:
qcom_geni_serial: No need to stop tx/rx on UART shutdown") in order to
be able to use kgdb after stopping the getty.

Not only did this again break kgdb, but it also broke serial consoles
more generally by hanging TX when stopping the getty during reboot.

The underlying problem has been there since the driver was first merged
and fixing it is going to be a bit involved so simply stop calling the
broken stop functions during shutdown for consoles for now.

Fixes: d8aca2f96813 ("tty: serial: qcom-geni-serial: stop operations in progress at shutdown")
Cc: stable <stable@kernel.org>
Cc: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Tested-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Reviewed-by: Andrew Halaney <ahalaney@redhat.com>
Tested-by: Andrew Halaney <ahalaney@redhat.com> # sa8540p-ride
Link: https://lore.kernel.org/r/20230307164405.14218-2-johan+linaro@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/qcom_geni_serial.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index d69592e5e2ec5..11da05d8f8485 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -1070,6 +1070,10 @@ static int setup_fifos(struct qcom_geni_serial_port *port)
 static void qcom_geni_serial_shutdown(struct uart_port *uport)
 {
 	disable_irq(uport->irq);
+
+	if (uart_console(uport))
+		return;
+
 	qcom_geni_serial_stop_tx(uport);
 	qcom_geni_serial_stop_rx(uport);
 }

From 95fcfc08537763bff21ec8c450d3d3cb1a60ad09 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Tue, 7 Mar 2023 17:44:03 +0100
Subject: [PATCH 113/411] serial: qcom-geni: fix DMA mapping leak on shutdown

Fix what appears to be a copy-paste error that can lead to a leaked DMA
mapping on close() and failure to restart TX after the port is reopened.

Note that rx_dma_addr is generally NULL when
qcom_geni_serial_stop_tx_dma() is called as part of shutdown() (but
tx_dma_addr need not be).

Fixes: 2aaa43c70778 ("tty: serial: qcom-geni-serial: add support for serial engine DMA")
Cc: stable <stable@kernel.org>
Cc: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Tested-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Reviewed-by: Andrew Halaney <ahalaney@redhat.com>
Tested-by: Andrew Halaney <ahalaney@redhat.com> # sa8540p-ride
Link: https://lore.kernel.org/r/20230307164405.14218-3-johan+linaro@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/qcom_geni_serial.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index 11da05d8f8485..2aa3872e62835 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -596,7 +596,7 @@ static void qcom_geni_serial_stop_tx_dma(struct uart_port *uport)
 	if (!qcom_geni_serial_main_active(uport))
 		return;
 
-	if (port->rx_dma_addr) {
+	if (port->tx_dma_addr) {
 		geni_se_tx_dma_unprep(&port->se, port->tx_dma_addr,
 				      port->tx_remaining);
 		port->tx_dma_addr = 0;

From 97820780b723197d1b472f2bd39fd8593b5d4edc Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Tue, 7 Mar 2023 17:44:04 +0100
Subject: [PATCH 114/411] serial: qcom-geni: fix mapping of empty DMA buffer

Make sure that there is data in the ring buffer before trying to set up
a zero-length DMA transfer.

This specifically fixes the following warning when unmapping the empty
buffer on the sc8280xp-crd:

   WARNING: CPU: 0 PID: 138 at drivers/iommu/dma-iommu.c:1046 iommu_dma_unmap_page+0xbc/0xd8
   ...
   Call trace:
    iommu_dma_unmap_page+0xbc/0xd8
    dma_unmap_page_attrs+0x30/0x1c8
    geni_se_tx_dma_unprep+0x28/0x38
    qcom_geni_serial_isr+0x358/0x75c

Fixes: 2aaa43c70778 ("tty: serial: qcom-geni-serial: add support for serial engine DMA")
Cc: stable <stable@kernel.org>
Cc: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Tested-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Reviewed-by: Andrew Halaney <ahalaney@redhat.com>
Tested-by: Andrew Halaney <ahalaney@redhat.com> # sa8540p-ride
Link: https://lore.kernel.org/r/20230307164405.14218-4-johan+linaro@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/qcom_geni_serial.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index 2aa3872e62835..9871225b2f9bb 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -631,6 +631,9 @@ static void qcom_geni_serial_start_tx_dma(struct uart_port *uport)
 	if (port->tx_dma_addr)
 		return;
 
+	if (uart_circ_empty(xmit))
+		return;
+
 	xmit_size = uart_circ_chars_pending(xmit);
 	if (xmit_size < WAKEUP_CHARS)
 		uart_write_wakeup(uport);

From b6a7bac184472b5b79286a71a61c2f16ea4e86ad Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Tue, 7 Mar 2023 17:44:05 +0100
Subject: [PATCH 115/411] serial: qcom-geni: drop bogus uart_write_wakeup()

Drop the bogus uart_write_wakeup() from when setting up a new DMA
transfer, which does not free up any more space in the ring buffer.

Any pending writers will be woken up when the transfer completes.

Cc: stable <stable@kernel.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Tested-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Reviewed-by: Andrew Halaney <ahalaney@redhat.com>
Tested-by: Andrew Halaney <ahalaney@redhat.com> # sa8540p-ride
Link: https://lore.kernel.org/r/20230307164405.14218-5-johan+linaro@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/qcom_geni_serial.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index 9871225b2f9bb..28fbc927a5465 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -634,10 +634,6 @@ static void qcom_geni_serial_start_tx_dma(struct uart_port *uport)
 	if (uart_circ_empty(xmit))
 		return;
 
-	xmit_size = uart_circ_chars_pending(xmit);
-	if (xmit_size < WAKEUP_CHARS)
-		uart_write_wakeup(uport);
-
 	xmit_size = CIRC_CNT_TO_END(xmit->head, xmit->tail, UART_XMIT_SIZE);
 
 	qcom_geni_serial_setup_tx(uport, xmit_size);

From 18365ebf23f3e713e5dd8e295c9a639295250f3c Mon Sep 17 00:00:00 2001
From: Samuel Thibault <samuel.thibault@ens-lyon.org>
Date: Mon, 6 Mar 2023 10:49:21 +0100
Subject: [PATCH 116/411] tty: vt: protect KD_FONT_OP_GET_TALL from unbound
 access

In ioctl(KD_FONT_OP_GET_TALL), userland tells through op->height which
vpitch should be used to copy over the font. In con_font_get, we were
not checking that it is within the maximum height value, and thus
userland could make the vc->vc_sw->con_font_get(vc, &font, vpitch);
call possibly overflow the allocated max_font_size bytes, and the
copy_to_user(op->data, font.data, c) call possibly read out of that
allocated buffer.

By checking vpitch against max_font_height, the max_font_size buffer
will always be large enough for the vc->vc_sw->con_font_get(vc, &font,
vpitch) call (since we already prevent loading a font larger than that),
and c = (font.width+7)/8 * vpitch * font.charcount will always remain
below max_font_size.

Fixes: 24d69384bcd3 ("VT: Add KD_FONT_OP_SET/GET_TALL operations")
Reported-by: syzbot+3af17071816b61e807ed@syzkaller.appspotmail.com
Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Reviewed-by: Jiri Slaby <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20230306094921.tik5ewne4ft6mfpo@begin
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/vt/vt.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index 57a5c23b51d47..3c2ea9c098f7c 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -4545,6 +4545,9 @@ static int con_font_get(struct vc_data *vc, struct console_font_op *op)
 	int c;
 	unsigned int vpitch = op->op == KD_FONT_OP_GET_TALL ? op->height : 32;
 
+	if (vpitch > max_font_height)
+		return -EINVAL;
+
 	if (op->data) {
 		font.data = kvmalloc(max_font_size, GFP_KERNEL);
 		if (!font.data)

From 38ed310c22e7a0fc978b1f8292136a4a4a8b3051 Mon Sep 17 00:00:00 2001
From: Roman Gushchin <roman.gushchin@linux.dev>
Date: Wed, 8 Mar 2023 14:26:02 -0800
Subject: [PATCH 117/411] firmware: xilinx: don't make a sleepable memory
 allocation from an atomic context

The following issue was discovered using lockdep:
[    6.691371] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:209
[    6.694602] in_atomic(): 1, irqs_disabled(): 128, non_block: 0, pid: 1, name: swapper/0
[    6.702431] 2 locks held by swapper/0/1:
[    6.706300]  #0: ffffff8800f6f188 (&dev->mutex){....}-{3:3}, at: __device_driver_lock+0x4c/0x90
[    6.714900]  #1: ffffffc009a2abb8 (enable_lock){....}-{2:2}, at: clk_enable_lock+0x4c/0x140
[    6.723156] irq event stamp: 304030
[    6.726596] hardirqs last  enabled at (304029): [<ffffffc008d17ee0>] _raw_spin_unlock_irqrestore+0xc0/0xd0
[    6.736142] hardirqs last disabled at (304030): [<ffffffc00876bc5c>] clk_enable_lock+0xfc/0x140
[    6.744742] softirqs last  enabled at (303958): [<ffffffc0080904f0>] _stext+0x4f0/0x894
[    6.752655] softirqs last disabled at (303951): [<ffffffc0080e53b8>] irq_exit+0x238/0x280
[    6.760744] CPU: 1 PID: 1 Comm: swapper/0 Tainted: G     U            5.15.36 #2
[    6.768048] Hardware name: xlnx,zynqmp (DT)
[    6.772179] Call trace:
[    6.774584]  dump_backtrace+0x0/0x300
[    6.778197]  show_stack+0x18/0x30
[    6.781465]  dump_stack_lvl+0xb8/0xec
[    6.785077]  dump_stack+0x1c/0x38
[    6.788345]  ___might_sleep+0x1a8/0x2a0
[    6.792129]  __might_sleep+0x6c/0xd0
[    6.795655]  kmem_cache_alloc_trace+0x270/0x3d0
[    6.800127]  do_feature_check_call+0x100/0x220
[    6.804513]  zynqmp_pm_invoke_fn+0x8c/0xb0
[    6.808555]  zynqmp_pm_clock_getstate+0x90/0xe0
[    6.813027]  zynqmp_pll_is_enabled+0x8c/0x120
[    6.817327]  zynqmp_pll_enable+0x38/0xc0
[    6.821197]  clk_core_enable+0x144/0x400
[    6.825067]  clk_core_enable+0xd4/0x400
[    6.828851]  clk_core_enable+0xd4/0x400
[    6.832635]  clk_core_enable+0xd4/0x400
[    6.836419]  clk_core_enable+0xd4/0x400
[    6.840203]  clk_core_enable+0xd4/0x400
[    6.843987]  clk_core_enable+0xd4/0x400
[    6.847771]  clk_core_enable+0xd4/0x400
[    6.851555]  clk_core_enable_lock+0x24/0x50
[    6.855683]  clk_enable+0x24/0x40
[    6.858952]  fclk_probe+0x84/0xf0
[    6.862220]  platform_probe+0x8c/0x110
[    6.865918]  really_probe+0x110/0x5f0
[    6.869530]  __driver_probe_device+0xcc/0x210
[    6.873830]  driver_probe_device+0x64/0x140
[    6.877958]  __driver_attach+0x114/0x1f0
[    6.881828]  bus_for_each_dev+0xe8/0x160
[    6.885698]  driver_attach+0x34/0x50
[    6.889224]  bus_add_driver+0x228/0x300
[    6.893008]  driver_register+0xc0/0x1e0
[    6.896792]  __platform_driver_register+0x44/0x60
[    6.901436]  fclk_driver_init+0x1c/0x28
[    6.905220]  do_one_initcall+0x104/0x590
[    6.909091]  kernel_init_freeable+0x254/0x2bc
[    6.913390]  kernel_init+0x24/0x130
[    6.916831]  ret_from_fork+0x10/0x20

Fix it by passing the GFP_ATOMIC gfp flag for the corresponding
memory allocation.

Fixes: acfdd18591ea ("firmware: xilinx: Use hash-table for api feature check")
Cc: stable <stable@kernel.org>
Signed-off-by: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Amit Sunil Dhamne <amit.sunil.dhamne@xilinx.com>
Cc: Michal Simek <michal.simek@xilinx.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Link: https://lore.kernel.org/r/20230308222602.123866-1-roman.gushchin@linux.dev
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/firmware/xilinx/zynqmp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/firmware/xilinx/zynqmp.c b/drivers/firmware/xilinx/zynqmp.c
index acd83d29c8667..ce86a18503054 100644
--- a/drivers/firmware/xilinx/zynqmp.c
+++ b/drivers/firmware/xilinx/zynqmp.c
@@ -206,7 +206,7 @@ static int do_feature_check_call(const u32 api_id)
 	}
 
 	/* Add new entry if not present */
-	feature_data = kmalloc(sizeof(*feature_data), GFP_KERNEL);
+	feature_data = kmalloc(sizeof(*feature_data), GFP_ATOMIC);
 	if (!feature_data)
 		return -ENOMEM;
 

From e921050022f1f12d5029d1487a7dfc46cde15523 Mon Sep 17 00:00:00 2001
From: Sergey Matyukevich <sergey.matyukevich@syntacore.com>
Date: Sun, 26 Feb 2023 18:01:36 +0300
Subject: [PATCH 118/411] Revert "riscv: mm: notify remote harts about mmu
 cache updates"

This reverts the remaining bits of commit 4bd1d80efb5a ("riscv: mm:
notify remote harts harts about mmu cache updates").

According to bug reports, suggested approach to fix stale TLB entries
is not sufficient. It needs to be replaced by a more robust solution.

Fixes: 4bd1d80efb5a ("riscv: mm: notify remote harts about mmu cache updates")
Reported-by: Zong Li <zong.li@sifive.com>
Reported-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Signed-off-by: Sergey Matyukevich <sergey.matyukevich@syntacore.com>
Cc: stable@vger.kernel.org
Reviewed-by: Guo Ren <guoren@kernel.org>
Link: https://lore.kernel.org/r/20230226150137.1919750-2-geomatsi@gmail.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/include/asm/mmu.h      |  2 --
 arch/riscv/include/asm/tlbflush.h | 18 ------------------
 arch/riscv/mm/context.c           | 10 ----------
 arch/riscv/mm/tlbflush.c          | 28 +++++++++++++++++-----------
 4 files changed, 17 insertions(+), 41 deletions(-)

diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h
index 5ff1f19fd45c2..0099dc1161683 100644
--- a/arch/riscv/include/asm/mmu.h
+++ b/arch/riscv/include/asm/mmu.h
@@ -19,8 +19,6 @@ typedef struct {
 #ifdef CONFIG_SMP
 	/* A local icache flush is needed before user execution can resume. */
 	cpumask_t icache_stale_mask;
-	/* A local tlb flush is needed before user execution can resume. */
-	cpumask_t tlb_stale_mask;
 #endif
 } mm_context_t;
 
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 907b9efd39a87..801019381dea3 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -22,24 +22,6 @@ static inline void local_flush_tlb_page(unsigned long addr)
 {
 	ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory"));
 }
-
-static inline void local_flush_tlb_all_asid(unsigned long asid)
-{
-	__asm__ __volatile__ ("sfence.vma x0, %0"
-			:
-			: "r" (asid)
-			: "memory");
-}
-
-static inline void local_flush_tlb_page_asid(unsigned long addr,
-		unsigned long asid)
-{
-	__asm__ __volatile__ ("sfence.vma %0, %1"
-			:
-			: "r" (addr), "r" (asid)
-			: "memory");
-}
-
 #else /* CONFIG_MMU */
 #define local_flush_tlb_all()			do { } while (0)
 #define local_flush_tlb_page(addr)		do { } while (0)
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index 80ce9caba8d22..7acbfbd14557e 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -196,16 +196,6 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu)
 
 	if (need_flush_tlb)
 		local_flush_tlb_all();
-#ifdef CONFIG_SMP
-	else {
-		cpumask_t *mask = &mm->context.tlb_stale_mask;
-
-		if (cpumask_test_cpu(cpu, mask)) {
-			cpumask_clear_cpu(cpu, mask);
-			local_flush_tlb_all_asid(cntx & asid_mask);
-		}
-	}
-#endif
 }
 
 static void set_mm_noasid(struct mm_struct *mm)
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index ce7dfc81bb3fe..37ed760d007c3 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -5,7 +5,23 @@
 #include <linux/sched.h>
 #include <asm/sbi.h>
 #include <asm/mmu_context.h>
-#include <asm/tlbflush.h>
+
+static inline void local_flush_tlb_all_asid(unsigned long asid)
+{
+	__asm__ __volatile__ ("sfence.vma x0, %0"
+			:
+			: "r" (asid)
+			: "memory");
+}
+
+static inline void local_flush_tlb_page_asid(unsigned long addr,
+		unsigned long asid)
+{
+	__asm__ __volatile__ ("sfence.vma %0, %1"
+			:
+			: "r" (addr), "r" (asid)
+			: "memory");
+}
 
 void flush_tlb_all(void)
 {
@@ -15,7 +31,6 @@ void flush_tlb_all(void)
 static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start,
 				  unsigned long size, unsigned long stride)
 {
-	struct cpumask *pmask = &mm->context.tlb_stale_mask;
 	struct cpumask *cmask = mm_cpumask(mm);
 	unsigned int cpuid;
 	bool broadcast;
@@ -29,15 +44,6 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start,
 	if (static_branch_unlikely(&use_asid_allocator)) {
 		unsigned long asid = atomic_long_read(&mm->context.id);
 
-		/*
-		 * TLB will be immediately flushed on harts concurrently
-		 * executing this MM context. TLB flush on other harts
-		 * is deferred until this MM context migrates there.
-		 */
-		cpumask_setall(pmask);
-		cpumask_clear_cpu(cpuid, pmask);
-		cpumask_andnot(pmask, pmask, cmask);
-
 		if (broadcast) {
 			sbi_remote_sfence_vma_asid(cmask, start, size, asid);
 		} else if (size <= stride) {

From 82dd33fde0268cc622d3d1ac64971f3f61634142 Mon Sep 17 00:00:00 2001
From: Guo Ren <guoren@linux.alibaba.com>
Date: Sun, 26 Feb 2023 18:01:37 +0300
Subject: [PATCH 119/411] riscv: asid: Fixup stale TLB entry cause application
 crash

After use_asid_allocator is enabled, the userspace application will
crash by stale TLB entries. Because only using cpumask_clear_cpu without
local_flush_tlb_all couldn't guarantee CPU's TLB entries were fresh.
Then set_mm_asid would cause the user space application to get a stale
value by stale TLB entry, but set_mm_noasid is okay.

Here is the symptom of the bug:
unhandled signal 11 code 0x1 (coredump)
   0x0000003fd6d22524 <+4>:     auipc   s0,0x70
   0x0000003fd6d22528 <+8>:     ld      s0,-148(s0) # 0x3fd6d92490
=> 0x0000003fd6d2252c <+12>:    ld      a5,0(s0)
(gdb) i r s0
s0          0x8082ed1cc3198b21       0x8082ed1cc3198b21
(gdb) x /2x 0x3fd6d92490
0x3fd6d92490:   0xd80ac8a8      0x0000003f
The core dump file shows that register s0 is wrong, but the value in
memory is correct. Because 'ld s0, -148(s0)' used a stale mapping entry
in TLB and got a wrong result from an incorrect physical address.

When the task ran on CPU0, which loaded/speculative-loaded the value of
address(0x3fd6d92490), then the first version of the mapping entry was
PTWed into CPU0's TLB.
When the task switched from CPU0 to CPU1 (No local_tlb_flush_all here by
asid), it happened to write a value on the address (0x3fd6d92490). It
caused do_page_fault -> wp_page_copy -> ptep_clear_flush ->
ptep_get_and_clear & flush_tlb_page.
The flush_tlb_page used mm_cpumask(mm) to determine which CPUs need TLB
flush, but CPU0 had cleared the CPU0's mm_cpumask in the previous
switch_mm. So we only flushed the CPU1 TLB and set the second version
mapping of the PTE. When the task switched from CPU1 to CPU0 again, CPU0
still used a stale TLB mapping entry which contained a wrong target
physical address. It raised a bug when the task happened to read that
value.

   CPU0                               CPU1
   - switch 'task' in
   - read addr (Fill stale mapping
     entry into TLB)
   - switch 'task' out (no tlb_flush)
                                      - switch 'task' in (no tlb_flush)
                                      - write addr cause pagefault
                                        do_page_fault() (change to
                                        new addr mapping)
                                          wp_page_copy()
                                            ptep_clear_flush()
                                              ptep_get_and_clear()
                                              & flush_tlb_page()
                                        write new value into addr
                                      - switch 'task' out (no tlb_flush)
   - switch 'task' in (no tlb_flush)
   - read addr again (Use stale
     mapping entry in TLB)
     get wrong value from old phyical
     addr, BUG!

The solution is to keep all CPUs' footmarks of cpumask(mm) in switch_mm,
which could guarantee to invalidate all stale TLB entries during TLB
flush.

Fixes: 65d4b9c53017 ("RISC-V: Implement ASID allocator")
Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
Tested-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Tested-by: Zong Li <zong.li@sifive.com>
Tested-by: Sergey Matyukevich <sergey.matyukevich@syntacore.com>
Cc: Anup Patel <apatel@ventanamicro.com>
Cc: Palmer Dabbelt <palmer@rivosinc.com>
Cc: stable@vger.kernel.org
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Link: https://lore.kernel.org/r/20230226150137.1919750-3-geomatsi@gmail.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/mm/context.c | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index 7acbfbd14557e..0f784e3d307bb 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -205,12 +205,24 @@ static void set_mm_noasid(struct mm_struct *mm)
 	local_flush_tlb_all();
 }
 
-static inline void set_mm(struct mm_struct *mm, unsigned int cpu)
+static inline void set_mm(struct mm_struct *prev,
+			  struct mm_struct *next, unsigned int cpu)
 {
-	if (static_branch_unlikely(&use_asid_allocator))
-		set_mm_asid(mm, cpu);
-	else
-		set_mm_noasid(mm);
+	/*
+	 * The mm_cpumask indicates which harts' TLBs contain the virtual
+	 * address mapping of the mm. Compared to noasid, using asid
+	 * can't guarantee that stale TLB entries are invalidated because
+	 * the asid mechanism wouldn't flush TLB for every switch_mm for
+	 * performance. So when using asid, keep all CPUs footmarks in
+	 * cpumask() until mm reset.
+	 */
+	cpumask_set_cpu(cpu, mm_cpumask(next));
+	if (static_branch_unlikely(&use_asid_allocator)) {
+		set_mm_asid(next, cpu);
+	} else {
+		cpumask_clear_cpu(cpu, mm_cpumask(prev));
+		set_mm_noasid(next);
+	}
 }
 
 static int __init asids_init(void)
@@ -264,7 +276,8 @@ static int __init asids_init(void)
 }
 early_initcall(asids_init);
 #else
-static inline void set_mm(struct mm_struct *mm, unsigned int cpu)
+static inline void set_mm(struct mm_struct *prev,
+			  struct mm_struct *next, unsigned int cpu)
 {
 	/* Nothing to do here when there is no MMU */
 }
@@ -317,10 +330,7 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	 */
 	cpu = smp_processor_id();
 
-	cpumask_clear_cpu(cpu, mm_cpumask(prev));
-	cpumask_set_cpu(cpu, mm_cpumask(next));
-
-	set_mm(next, cpu);
+	set_mm(prev, next, cpu);
 
 	flush_icache_deferred(next, cpu);
 }

From 5cf9d015be160e2d90d29ae74ef1364390e8fce8 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Wed, 8 Mar 2023 13:47:11 -0700
Subject: [PATCH 120/411] clk: Avoid invalid function names in CLK_OF_DECLARE()

After commit c28cd1f3433c ("clk: Mark a fwnode as initialized when using
CLK_OF_DECLARE() macro"), drivers/clk/mvebu/kirkwood.c fails to build:

 drivers/clk/mvebu/kirkwood.c:358:1: error: expected identifier or '('
 CLK_OF_DECLARE(98dx1135_clk, "marvell,mv98dx1135-core-clock",
 ^
 include/linux/clk-provider.h:1367:21: note: expanded from macro 'CLK_OF_DECLARE'
         static void __init name##_of_clk_init_declare(struct device_node *np) \
                            ^
 <scratch space>:124:1: note: expanded from here
 98dx1135_clk_of_clk_init_declare
 ^
 drivers/clk/mvebu/kirkwood.c:358:1: error: invalid digit 'd' in decimal constant
 include/linux/clk-provider.h:1372:34: note: expanded from macro 'CLK_OF_DECLARE'
         OF_DECLARE_1(clk, name, compat, name##_of_clk_init_declare)
                                         ^
 <scratch space>:125:3: note: expanded from here
 98dx1135_clk_of_clk_init_declare
   ^
 drivers/clk/mvebu/kirkwood.c:358:1: error: invalid digit 'd' in decimal constant
 include/linux/clk-provider.h:1372:34: note: expanded from macro 'CLK_OF_DECLARE'
         OF_DECLARE_1(clk, name, compat, name##_of_clk_init_declare)
                                         ^
 <scratch space>:125:3: note: expanded from here
 98dx1135_clk_of_clk_init_declare
   ^
 drivers/clk/mvebu/kirkwood.c:358:1: error: invalid digit 'd' in decimal constant
 include/linux/clk-provider.h:1372:34: note: expanded from macro 'CLK_OF_DECLARE'
         OF_DECLARE_1(clk, name, compat, name##_of_clk_init_declare)
                                         ^
 <scratch space>:125:3: note: expanded from here
 98dx1135_clk_of_clk_init_declare
   ^

C function names must start with either an alphabetic letter or an
underscore. To avoid generating invalid function names from clock names,
add two underscores to the beginning of the identifier.

Fixes: c28cd1f3433c ("clk: Mark a fwnode as initialized when using CLK_OF_DECLARE() macro")
Suggested-by: Saravana Kannan <saravanak@google.com>
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/r/20230308-clk_of_declare-fix-v1-1-317b741e2532@kernel.org
Reviewed-by: Saravana Kannan <saravanak@google.com>
Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 include/linux/clk-provider.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index c9f5276006a09..6f3175f0678a8 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -1364,12 +1364,12 @@ struct clk_hw_onecell_data {
 };
 
 #define CLK_OF_DECLARE(name, compat, fn) \
-	static void __init name##_of_clk_init_declare(struct device_node *np) \
+	static void __init __##name##_of_clk_init_declare(struct device_node *np) \
 	{								\
 		fn(np);							\
 		fwnode_dev_initialized(of_fwnode_handle(np), true);	\
 	}								\
-	OF_DECLARE_1(clk, name, compat, name##_of_clk_init_declare)
+	OF_DECLARE_1(clk, name, compat, __##name##_of_clk_init_declare)
 
 /*
  * Use this macro when you have a driver that requires two initialization

From 4b1a2c2a8e0ddcb89c5f6c5003bd9b53142f69e3 Mon Sep 17 00:00:00 2001
From: Lee Duncan <lduncan@suse.com>
Date: Wed, 28 Sep 2022 11:13:50 -0700
Subject: [PATCH 121/411] scsi: core: Add BLIST_NO_VPD_SIZE for some VDASD

Some storage, such as AIX VDASD (virtual storage) and IBM 2076 (front
end), fail as a result of commit c92a6b5d6335 ("scsi: core: Query VPD
size before getting full page").

That commit changed getting SCSI VPD pages so that we now read just
enough of the page to get the actual page size, then read the whole
page in a second read. The problem is that the above mentioned
hardware returns zero for the page size, because of a firmware
error. In such cases, until the firmware is fixed, this new blacklist
flag says to revert to the original method of reading the VPD pages,
i.e. try to read a whole buffer's worth on the first try.

[mkp: reworked somewhat]

Fixes: c92a6b5d6335 ("scsi: core: Query VPD size before getting full page")
Reported-by: Martin Wilck <mwilck@suse.com>
Suggested-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Lee Duncan <lduncan@suse.com>
Link: https://lore.kernel.org/r/20220928181350.9948-1-leeman.duncan@gmail.com
Tested-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi.c         | 3 +++
 drivers/scsi/scsi_devinfo.c | 3 ++-
 drivers/scsi/scsi_scan.c    | 3 +++
 include/scsi/scsi_device.h  | 2 ++
 include/scsi/scsi_devinfo.h | 6 +++---
 5 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 7d2210a006f0d..5cce1ba70fc60 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -326,6 +326,9 @@ static int scsi_get_vpd_size(struct scsi_device *sdev, u8 page)
 	unsigned char vpd_header[SCSI_VPD_HEADER_SIZE] __aligned(4);
 	int result;
 
+	if (sdev->no_vpd_size)
+		return SCSI_DEFAULT_VPD_LEN;
+
 	/*
 	 * Fetch the VPD page header to find out how big the page
 	 * is. This is done to prevent problems on legacy devices
diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index c7080454aea99..bc9d280417f6a 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -134,7 +134,7 @@ static struct {
 	{"3PARdata", "VV", NULL, BLIST_REPORTLUN2},
 	{"ADAPTEC", "AACRAID", NULL, BLIST_FORCELUN},
 	{"ADAPTEC", "Adaptec 5400S", NULL, BLIST_FORCELUN},
-	{"AIX", "VDASD", NULL, BLIST_TRY_VPD_PAGES},
+	{"AIX", "VDASD", NULL, BLIST_TRY_VPD_PAGES | BLIST_NO_VPD_SIZE},
 	{"AFT PRO", "-IX CF", "0.0>", BLIST_FORCELUN},
 	{"BELKIN", "USB 2 HS-CF", "1.95",  BLIST_FORCELUN | BLIST_INQUIRY_36},
 	{"BROWNIE", "1200U3P", NULL, BLIST_NOREPORTLUN},
@@ -188,6 +188,7 @@ static struct {
 	{"HPE", "OPEN-", "*", BLIST_REPORTLUN2 | BLIST_TRY_VPD_PAGES},
 	{"IBM", "AuSaV1S2", NULL, BLIST_FORCELUN},
 	{"IBM", "ProFibre 4000R", "*", BLIST_SPARSELUN | BLIST_LARGELUN},
+	{"IBM", "2076", NULL, BLIST_NO_VPD_SIZE},
 	{"IBM", "2105", NULL, BLIST_RETRY_HWERROR},
 	{"iomega", "jaz 1GB", "J.86", BLIST_NOTQ | BLIST_NOLUN},
 	{"IOMEGA", "ZIP", NULL, BLIST_NOTQ | BLIST_NOLUN},
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 4e842d79de317..d217be323cc69 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -1057,6 +1057,9 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result,
 	else if (*bflags & BLIST_SKIP_VPD_PAGES)
 		sdev->skip_vpd_pages = 1;
 
+	if (*bflags & BLIST_NO_VPD_SIZE)
+		sdev->no_vpd_size = 1;
+
 	transport_configure_device(&sdev->sdev_gendev);
 
 	if (sdev->host->hostt->slave_configure) {
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index de310f21406c5..f10a008e5bfa1 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -145,6 +145,7 @@ struct scsi_device {
 	const char * model;		/* ... after scan; point to static string */
 	const char * rev;		/* ... "nullnullnullnull" before scan */
 
+#define SCSI_DEFAULT_VPD_LEN	255	/* default SCSI VPD page size (max) */
 	struct scsi_vpd __rcu *vpd_pg0;
 	struct scsi_vpd __rcu *vpd_pg83;
 	struct scsi_vpd __rcu *vpd_pg80;
@@ -215,6 +216,7 @@ struct scsi_device {
 					 * creation time */
 	unsigned ignore_media_change:1; /* Ignore MEDIA CHANGE on resume */
 	unsigned silence_suspend:1;	/* Do not print runtime PM related messages */
+	unsigned no_vpd_size:1;		/* No VPD size reported in header */
 
 	unsigned int queue_stopped;	/* request queue is quiesced */
 	bool offline_already;		/* Device offline message logged */
diff --git a/include/scsi/scsi_devinfo.h b/include/scsi/scsi_devinfo.h
index 5d14adae21c78..6b548dc2c4965 100644
--- a/include/scsi/scsi_devinfo.h
+++ b/include/scsi/scsi_devinfo.h
@@ -32,7 +32,8 @@
 #define BLIST_IGN_MEDIA_CHANGE	((__force blist_flags_t)(1ULL << 11))
 /* do not do automatic start on add */
 #define BLIST_NOSTARTONADD	((__force blist_flags_t)(1ULL << 12))
-#define __BLIST_UNUSED_13	((__force blist_flags_t)(1ULL << 13))
+/* do not ask for VPD page size first on some broken targets */
+#define BLIST_NO_VPD_SIZE	((__force blist_flags_t)(1ULL << 13))
 #define __BLIST_UNUSED_14	((__force blist_flags_t)(1ULL << 14))
 #define __BLIST_UNUSED_15	((__force blist_flags_t)(1ULL << 15))
 #define __BLIST_UNUSED_16	((__force blist_flags_t)(1ULL << 16))
@@ -74,8 +75,7 @@
 #define __BLIST_HIGH_UNUSED (~(__BLIST_LAST_USED | \
 			       (__force blist_flags_t) \
 			       ((__force __u64)__BLIST_LAST_USED - 1ULL)))
-#define __BLIST_UNUSED_MASK (__BLIST_UNUSED_13 | \
-			     __BLIST_UNUSED_14 | \
+#define __BLIST_UNUSED_MASK (__BLIST_UNUSED_14 | \
 			     __BLIST_UNUSED_15 | \
 			     __BLIST_UNUSED_16 | \
 			     __BLIST_UNUSED_24 | \

From be03df3d4bfe7e8866d4aa43d62e648ffe884f5f Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 7 Mar 2023 13:44:28 -0800
Subject: [PATCH 122/411] scsi: core: Fix a procfs host directory removal
 regression

scsi_proc_hostdir_rm() decreases a reference counter and hence must only be
called once per host that is removed. This change does not require a
scsi_add_host_with_dma() change since scsi_add_host_with_dma() will return
0 (success) if scsi_proc_host_add() is called.

Fixes: fc663711b944 ("scsi: core: Remove the /proc/scsi/${proc_name} directory earlier")
Cc: John Garry <john.g.garry@oracle.com>
Reported-by: John Garry <john.g.garry@oracle.com>
Link: https://lore.kernel.org/all/ed6b8027-a9d9-1b45-be8e-df4e8c6c4605@oracle.com/
Reported-by: syzbot+645a4616b87a2f10e398@syzkaller.appspotmail.com
Link: https://lore.kernel.org/linux-scsi/000000000000890fab05f65342b6@google.com/
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20230307214428.3703498-1-bvanassche@acm.org
Tested-by: John Garry <john.g.garry@oracle.com>
Tested-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hosts.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index f7f62e56afcae..9b6fbbe15d922 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -341,9 +341,6 @@ static void scsi_host_dev_release(struct device *dev)
 	struct Scsi_Host *shost = dev_to_shost(dev);
 	struct device *parent = dev->parent;
 
-	/* In case scsi_remove_host() has not been called. */
-	scsi_proc_hostdir_rm(shost->hostt);
-
 	/* Wait for functions invoked through call_rcu(&scmd->rcu, ...) */
 	rcu_barrier();
 

From c6001025d53ab56d7159cf313313c6b5bd250380 Mon Sep 17 00:00:00 2001
From: Asutosh Das <quic_asutoshd@quicinc.com>
Date: Wed, 8 Mar 2023 15:13:23 -0800
Subject: [PATCH 123/411] scsi: ufs: mcq: Use active_reqs to check busy in
 clock scaling

Multi Circular Queue doesn't use outstanding_reqs. However, the UFS clock
scaling functions use outstanding_reqs to determine if there are requests
pending. When MCQ is enabled, this check always returns false.

Hence use active_reqs to check if there are pending requests.

Fixes: eacb139b77ff ("scsi: ufs: core: mcq: Enable multi-circular queue")
Signed-off-by: Asutosh Das <quic_asutoshd@quicinc.com>
Reviewed-by: Manivannan Sadhasivam <mani@kernel.org>
Link: https://lore.kernel.org/r/a24e0d646aac70eae0fc5e05fac0c58bb7e6e680.1678317160.git.quic_asutoshd@quicinc.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ufs/core/ufshcd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 05eac965ee275..37e178a9ac475 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -1500,7 +1500,7 @@ static int ufshcd_devfreq_get_dev_status(struct device *dev,
 	scaling->window_start_t = curr_t;
 	scaling->tot_busy_t = 0;
 
-	if (hba->outstanding_reqs) {
+	if (scaling->active_reqs) {
 		scaling->busy_start_t = curr_t;
 		scaling->is_busy_started = true;
 	} else {
@@ -2118,7 +2118,7 @@ static void ufshcd_clk_scaling_update_busy(struct ufs_hba *hba)
 
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	hba->clk_scaling.active_reqs--;
-	if (!hba->outstanding_reqs && scaling->is_busy_started) {
+	if (!scaling->active_reqs && scaling->is_busy_started) {
 		scaling->tot_busy_t += ktime_to_us(ktime_sub(ktime_get(),
 					scaling->busy_start_t));
 		scaling->busy_start_t = 0;

From e0213434fe3e4a0d118923dc98d31e7ff1cd9e45 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Wed, 1 Mar 2023 20:00:52 -0500
Subject: [PATCH 124/411] tracing: Do not let histogram values have some
 modifiers

Histogram values can not be strings, stacktraces, graphs, symbols,
syscalls, or grouped in buckets or log. Give an error if a value is set to
do so.

Note, the histogram code was not prepared to handle these modifiers for
histograms and caused a bug.

Mark Rutland reported:

 # echo 'p:copy_to_user __arch_copy_to_user n=$arg2' >> /sys/kernel/tracing/kprobe_events
 # echo 'hist:keys=n:vals=hitcount.buckets=8:sort=hitcount' > /sys/kernel/tracing/events/kprobes/copy_to_user/trigger
 # cat /sys/kernel/tracing/events/kprobes/copy_to_user/hist
[  143.694628] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
[  143.695190] Mem abort info:
[  143.695362]   ESR = 0x0000000096000004
[  143.695604]   EC = 0x25: DABT (current EL), IL = 32 bits
[  143.695889]   SET = 0, FnV = 0
[  143.696077]   EA = 0, S1PTW = 0
[  143.696302]   FSC = 0x04: level 0 translation fault
[  143.702381] Data abort info:
[  143.702614]   ISV = 0, ISS = 0x00000004
[  143.702832]   CM = 0, WnR = 0
[  143.703087] user pgtable: 4k pages, 48-bit VAs, pgdp=00000000448f9000
[  143.703407] [0000000000000000] pgd=0000000000000000, p4d=0000000000000000
[  143.704137] Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP
[  143.704714] Modules linked in:
[  143.705273] CPU: 0 PID: 133 Comm: cat Not tainted 6.2.0-00003-g6fc512c10a7c #3
[  143.706138] Hardware name: linux,dummy-virt (DT)
[  143.706723] pstate: 80000005 (Nzcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[  143.707120] pc : hist_field_name.part.0+0x14/0x140
[  143.707504] lr : hist_field_name.part.0+0x104/0x140
[  143.707774] sp : ffff800008333a30
[  143.707952] x29: ffff800008333a30 x28: 0000000000000001 x27: 0000000000400cc0
[  143.708429] x26: ffffd7a653b20260 x25: 0000000000000000 x24: ffff10d303ee5800
[  143.708776] x23: ffffd7a6539b27b0 x22: ffff10d303fb8c00 x21: 0000000000000001
[  143.709127] x20: ffff10d303ec2000 x19: 0000000000000000 x18: 0000000000000000
[  143.709478] x17: 0000000000000000 x16: 0000000000000000 x15: 0000000000000000
[  143.709824] x14: 0000000000000000 x13: 203a6f666e692072 x12: 6567676972742023
[  143.710179] x11: 0a230a6d6172676f x10: 000000000000002c x9 : ffffd7a6521e018c
[  143.710584] x8 : 000000000000002c x7 : 7f7f7f7f7f7f7f7f x6 : 000000000000002c
[  143.710915] x5 : ffff10d303b0103e x4 : ffffd7a653b20261 x3 : 000000000000003d
[  143.711239] x2 : 0000000000020001 x1 : 0000000000000001 x0 : 0000000000000000
[  143.711746] Call trace:
[  143.712115]  hist_field_name.part.0+0x14/0x140
[  143.712642]  hist_field_name.part.0+0x104/0x140
[  143.712925]  hist_field_print+0x28/0x140
[  143.713125]  event_hist_trigger_print+0x174/0x4d0
[  143.713348]  hist_show+0xf8/0x980
[  143.713521]  seq_read_iter+0x1bc/0x4b0
[  143.713711]  seq_read+0x8c/0xc4
[  143.713876]  vfs_read+0xc8/0x2a4
[  143.714043]  ksys_read+0x70/0xfc
[  143.714218]  __arm64_sys_read+0x24/0x30
[  143.714400]  invoke_syscall+0x50/0x120
[  143.714587]  el0_svc_common.constprop.0+0x4c/0x100
[  143.714807]  do_el0_svc+0x44/0xd0
[  143.714970]  el0_svc+0x2c/0x84
[  143.715134]  el0t_64_sync_handler+0xbc/0x140
[  143.715334]  el0t_64_sync+0x190/0x194
[  143.715742] Code: a9bd7bfd 910003fd a90153f3 aa0003f3 (f9400000)
[  143.716510] ---[ end trace 0000000000000000 ]---
Segmentation fault

Link: https://lkml.kernel.org/r/20230302020810.559462599@goodmis.org

Cc: stable@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Fixes: c6afad49d127f ("tracing: Add hist trigger 'sym' and 'sym-offset' modifiers")
Reported-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_hist.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 89877a18f9330..6e8ab726a7b5e 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -4235,6 +4235,15 @@ static int __create_val_field(struct hist_trigger_data *hist_data,
 		goto out;
 	}
 
+	/* Some types cannot be a value */
+	if (hist_field->flags & (HIST_FIELD_FL_GRAPH | HIST_FIELD_FL_PERCENT |
+				 HIST_FIELD_FL_BUCKET | HIST_FIELD_FL_LOG2 |
+				 HIST_FIELD_FL_SYM | HIST_FIELD_FL_SYM_OFFSET |
+				 HIST_FIELD_FL_SYSCALL | HIST_FIELD_FL_STACKTRACE)) {
+		hist_err(file->tr, HIST_ERR_BAD_FIELD_MODIFIER, errpos(field_str));
+		ret = -EINVAL;
+	}
+
 	hist_data->fields[val_idx] = hist_field;
 
 	++hist_data->n_vals;

From 9f116f76fa8c04c81aef33ad870dbf9a158e5b70 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Wed, 1 Mar 2023 20:00:53 -0500
Subject: [PATCH 125/411] tracing: Check field value in hist_field_name()

The function hist_field_name() cannot handle being passed a NULL field
parameter. It should never be NULL, but due to a previous bug, NULL was
passed to the function and the kernel crashed due to a NULL dereference.
Mark Rutland reported this to me on IRC.

The bug was fixed, but to prevent future bugs from crashing the kernel,
check the field and add a WARN_ON() if it is NULL.

Link: https://lkml.kernel.org/r/20230302020810.762384440@goodmis.org

Cc: stable@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Reported-by: Mark Rutland <mark.rutland@arm.com>
Fixes: c6afad49d127f ("tracing: Add hist trigger 'sym' and 'sym-offset' modifiers")
Tested-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_hist.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 6e8ab726a7b5e..486cca3c2b754 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -1331,6 +1331,9 @@ static const char *hist_field_name(struct hist_field *field,
 {
 	const char *field_name = "";
 
+	if (WARN_ON_ONCE(!field))
+		return field_name;
+
 	if (level > 1)
 		return field_name;
 

From ee92fa443358f4fc0017c1d0d325c27b37802504 Mon Sep 17 00:00:00 2001
From: Chen Zhongjin <chenzhongjin@huawei.com>
Date: Thu, 9 Mar 2023 16:02:30 +0800
Subject: [PATCH 126/411] ftrace: Fix invalid address access in lookup_rec()
 when index is 0

KASAN reported follow problem:

 BUG: KASAN: use-after-free in lookup_rec
 Read of size 8 at addr ffff000199270ff0 by task modprobe
 CPU: 2 Comm: modprobe
 Call trace:
  kasan_report
  __asan_load8
  lookup_rec
  ftrace_location
  arch_check_ftrace_location
  check_kprobe_address_safe
  register_kprobe

When checking pg->records[pg->index - 1].ip in lookup_rec(), it can get a
pg which is newly added to ftrace_pages_start in ftrace_process_locs().
Before the first pg->index++, index is 0 and accessing pg->records[-1].ip
will cause this problem.

Don't check the ip when pg->index is 0.

Link: https://lore.kernel.org/linux-trace-kernel/20230309080230.36064-1-chenzhongjin@huawei.com

Cc: stable@vger.kernel.org
Fixes: 9644302e3315 ("ftrace: Speed up search by skipping pages by address")
Suggested-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Chen Zhongjin <chenzhongjin@huawei.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 750aa3f08b25a..a47f7d93e32d2 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1537,7 +1537,8 @@ static struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end)
 	key.flags = end;	/* overload flags, as it is unsigned long */
 
 	for (pg = ftrace_pages_start; pg; pg = pg->next) {
-		if (end < pg->records[0].ip ||
+		if (pg->index == 0 ||
+		    end < pg->records[0].ip ||
 		    start >= (pg->records[pg->index - 1].ip + MCOUNT_INSN_SIZE))
 			continue;
 		rec = bsearch(&key, pg->records, pg->index,

From aa69f814920d85a2d4cfd5c294757c3d59d2fba6 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 31 Jan 2023 10:36:30 +0100
Subject: [PATCH 127/411] ftrace,kcfi: Define ftrace_stub_graph conditionally

When CONFIG_FUNCTION_GRAPH_TRACER is disabled, __kcfi_typeid_ftrace_stub_graph
is missing, causing a link failure:

 ld.lld: error: undefined symbol: __kcfi_typeid_ftrace_stub_graph
 referenced by arch/x86/kernel/ftrace_64.o:(__cfi_ftrace_stub_graph) in archive vmlinux.a

Mark the reference to it as conditional on the same symbol, as
is done on arm64.

Link: https://lore.kernel.org/linux-trace-kernel/20230131093643.3850272-1-arnd@kernel.org

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Fixes: 883bbbffa5a4 ("ftrace,kcfi: Separate ftrace_stub() and ftrace_stub_graph()")
See-also: 2598ac6ec493 ("arm64: ftrace: Define ftrace_stub_graph only with FUNCTION_GRAPH_TRACER")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 arch/x86/kernel/ftrace_64.S | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 1265ad519249c..fb4f1e01b64a2 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -136,10 +136,12 @@ SYM_TYPED_FUNC_START(ftrace_stub)
 	RET
 SYM_FUNC_END(ftrace_stub)
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
 SYM_TYPED_FUNC_START(ftrace_stub_graph)
 	CALL_DEPTH_ACCOUNT
 	RET
 SYM_FUNC_END(ftrace_stub_graph)
+#endif
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 

From bced3f7db95ff2e6ca29dc4d1c9751ab5e736a09 Mon Sep 17 00:00:00 2001
From: Breno Leitao <leitao@debian.org>
Date: Wed, 8 Mar 2023 11:07:45 -0800
Subject: [PATCH 128/411] tcp: tcp_make_synack() can be called from process
 context

tcp_rtx_synack() now could be called in process context as explained in
0a375c822497 ("tcp: tcp_rtx_synack() can be called from process
context").

tcp_rtx_synack() might call tcp_make_synack(), which will touch per-CPU
variables with preemption enabled. This causes the following BUG:

    BUG: using __this_cpu_add() in preemptible [00000000] code: ThriftIO1/5464
    caller is tcp_make_synack+0x841/0xac0
    Call Trace:
     <TASK>
     dump_stack_lvl+0x10d/0x1a0
     check_preemption_disabled+0x104/0x110
     tcp_make_synack+0x841/0xac0
     tcp_v6_send_synack+0x5c/0x450
     tcp_rtx_synack+0xeb/0x1f0
     inet_rtx_syn_ack+0x34/0x60
     tcp_check_req+0x3af/0x9e0
     tcp_rcv_state_process+0x59b/0x2030
     tcp_v6_do_rcv+0x5f5/0x700
     release_sock+0x3a/0xf0
     tcp_sendmsg+0x33/0x40
     ____sys_sendmsg+0x2f2/0x490
     __sys_sendmsg+0x184/0x230
     do_syscall_64+0x3d/0x90

Avoid calling __TCP_INC_STATS() with will touch per-cpu variables. Use
TCP_INC_STATS() which is safe to be called from context switch.

Fixes: 8336886f786f ("tcp: TCP Fast Open Server - support TFO listeners")
Signed-off-by: Breno Leitao <leitao@debian.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20230308190745.780221-1-leitao@debian.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/tcp_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 71d01cf3c13eb..ba839e441450f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3605,7 +3605,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
 	th->window = htons(min(req->rsk_rcv_wnd, 65535U));
 	tcp_options_write(th, NULL, &opts);
 	th->doff = (tcp_header_size >> 2);
-	__TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
+	TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
 
 #ifdef CONFIG_TCP_MD5SIG
 	/* Okay, we have all we need - do the md5 hash if needed */

From aed8efddd39b3434c96718d39009285c52b1cafc Mon Sep 17 00:00:00 2001
From: Cindy Lu <lulu@redhat.com>
Date: Tue, 14 Feb 2023 16:09:24 +0800
Subject: [PATCH 129/411] vp_vdpa: fix the crash in hot unplug with vp_vdpa

While unplugging the vp_vdpa device, it triggers a kernel panic
The root cause is: vdpa_mgmtdev_unregister() will accesses modern
devices which will cause a use after free.
So need to change the sequence in vp_vdpa_remove

[  195.003359] BUG: unable to handle page fault for address: ff4e8beb80199014
[  195.004012] #PF: supervisor read access in kernel mode
[  195.004486] #PF: error_code(0x0000) - not-present page
[  195.004960] PGD 100000067 P4D 1001b6067 PUD 1001b7067 PMD 1001b8067 PTE 0
[  195.005578] Oops: 0000 1 PREEMPT SMP PTI
[  195.005968] CPU: 13 PID: 164 Comm: kworker/u56:10 Kdump: loaded Not tainted 5.14.0-252.el9.x86_64 #1
[  195.006792] Hardware name: Red Hat KVM/RHEL, BIOS edk2-20221207gitfff6d81270b5-2.el9 unknown
[  195.007556] Workqueue: kacpi_hotplug acpi_hotplug_work_fn
[  195.008059] RIP: 0010:ioread8+0x31/0x80
[  195.008418] Code: 77 28 48 81 ff 00 00 01 00 76 0b 89 fa ec 0f b6 c0 c3 cc cc cc cc 8b 15 ad 72 93 01 b8 ff 00 00 00 85 d2 75 0f c3 cc cc cc cc <8a> 07 0f b6 c0 c3 cc cc cc cc 83 ea 01 48 83 ec 08 48 89 fe 48 c7
[  195.010104] RSP: 0018:ff4e8beb8067bab8 EFLAGS: 00010292
[  195.010584] RAX: ffffffffc05834a0 RBX: ffffffffc05843c0 RCX: ff4e8beb8067bae0
[  195.011233] RDX: ff1bcbd580f88000 RSI: 0000000000000246 RDI: ff4e8beb80199014
[  195.011881] RBP: ff1bcbd587e39000 R08: ffffffff916fa2d0 R09: ff4e8beb8067ba68
[  195.012527] R10: 000000000000001c R11: 0000000000000000 R12: ff1bcbd5a3de9120
[  195.013179] R13: ffffffffc062d000 R14: 0000000000000080 R15: ff1bcbe402bc7805
[  195.013826] FS:  0000000000000000(0000) GS:ff1bcbe402740000(0000) knlGS:0000000000000000
[  195.014564] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  195.015093] CR2: ff4e8beb80199014 CR3: 0000000107dea002 CR4: 0000000000771ee0
[  195.015741] PKRU: 55555554
[  195.016001] Call Trace:
[  195.016233]  <TASK>
[  195.016434]  vp_modern_get_status+0x12/0x20
[  195.016823]  vp_vdpa_reset+0x1b/0x50 [vp_vdpa]
[  195.017238]  virtio_vdpa_reset+0x3c/0x48 [virtio_vdpa]
[  195.017709]  remove_vq_common+0x1f/0x3a0 [virtio_net]
[  195.018178]  virtnet_remove+0x5d/0x70 [virtio_net]
[  195.018618]  virtio_dev_remove+0x3d/0x90
[  195.018986]  device_release_driver_internal+0x1aa/0x230
[  195.019466]  bus_remove_device+0xd8/0x150
[  195.019841]  device_del+0x18b/0x3f0
[  195.020167]  ? kernfs_find_ns+0x35/0xd0
[  195.020526]  device_unregister+0x13/0x60
[  195.020894]  unregister_virtio_device+0x11/0x20
[  195.021311]  device_release_driver_internal+0x1aa/0x230
[  195.021790]  bus_remove_device+0xd8/0x150
[  195.022162]  device_del+0x18b/0x3f0
[  195.022487]  device_unregister+0x13/0x60
[  195.022852]  ? vdpa_dev_remove+0x30/0x30 [vdpa]
[  195.023270]  vp_vdpa_dev_del+0x12/0x20 [vp_vdpa]
[  195.023694]  vdpa_match_remove+0x2b/0x40 [vdpa]
[  195.024115]  bus_for_each_dev+0x78/0xc0
[  195.024471]  vdpa_mgmtdev_unregister+0x65/0x80 [vdpa]
[  195.024937]  vp_vdpa_remove+0x23/0x40 [vp_vdpa]
[  195.025353]  pci_device_remove+0x36/0xa0
[  195.025719]  device_release_driver_internal+0x1aa/0x230
[  195.026201]  pci_stop_bus_device+0x6c/0x90
[  195.026580]  pci_stop_and_remove_bus_device+0xe/0x20
[  195.027039]  disable_slot+0x49/0x90
[  195.027366]  acpiphp_disable_and_eject_slot+0x15/0x90
[  195.027832]  hotplug_event+0xea/0x210
[  195.028171]  ? hotplug_event+0x210/0x210
[  195.028535]  acpiphp_hotplug_notify+0x22/0x80
[  195.028942]  ? hotplug_event+0x210/0x210
[  195.029303]  acpi_device_hotplug+0x8a/0x1d0
[  195.029690]  acpi_hotplug_work_fn+0x1a/0x30
[  195.030077]  process_one_work+0x1e8/0x3c0
[  195.030451]  worker_thread+0x50/0x3b0
[  195.030791]  ? rescuer_thread+0x3a0/0x3a0
[  195.031165]  kthread+0xd9/0x100
[  195.031459]  ? kthread_complete_and_exit+0x20/0x20
[  195.031899]  ret_from_fork+0x22/0x30
[  195.032233]  </TASK>

Fixes: ffbda8e9df10 ("vdpa/vp_vdpa : add vdpa tool support in vp_vdpa")
Tested-by: Lei Yang <leiyang@redhat.com>
Cc: stable@vger.kernel.org
Signed-off-by: Cindy Lu <lulu@redhat.com>
Message-Id: <20230214080924.131462-1-lulu@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vdpa/virtio_pci/vp_vdpa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/vdpa/virtio_pci/vp_vdpa.c b/drivers/vdpa/virtio_pci/vp_vdpa.c
index 8fe267ca3e76f..281287fae89f1 100644
--- a/drivers/vdpa/virtio_pci/vp_vdpa.c
+++ b/drivers/vdpa/virtio_pci/vp_vdpa.c
@@ -645,8 +645,8 @@ static void vp_vdpa_remove(struct pci_dev *pdev)
 	struct virtio_pci_modern_device *mdev = NULL;
 
 	mdev = vp_vdpa_mgtdev->mdev;
-	vp_modern_remove(mdev);
 	vdpa_mgmtdev_unregister(&vp_vdpa_mgtdev->mgtdev);
+	vp_modern_remove(mdev);
 	kfree(vp_vdpa_mgtdev->mgtdev.id_table);
 	kfree(mdev);
 	kfree(vp_vdpa_mgtdev);

From 09e65ee9059d76b89cb713795748805efd3f50c6 Mon Sep 17 00:00:00 2001
From: Si-Wei Liu <si-wei.liu@oracle.com>
Date: Tue, 14 Feb 2023 17:30:40 -0800
Subject: [PATCH 130/411] vdpa/mlx5: should not activate virtq object when
 suspended

Otherwise the virtqueue object to instate could point to invalid address
that was unmapped from the MTT:

  mlx5_core 0000:41:04.2: mlx5_cmd_out_err:782:(pid 8321):
  CREATE_GENERAL_OBJECT(0xa00) op_mod(0xd) failed, status
  bad parameter(0x3), syndrome (0x5fa1c), err(-22)

Fixes: cae15c2ed8e6 ("vdpa/mlx5: Implement susupend virtqueue callback")
Cc: Eli Cohen <elic@nvidia.com>
Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
Reviewed-by: Eli Cohen <elic@nvidia.com>

Message-Id: <1676424640-11673-1-git-send-email-si-wei.liu@oracle.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vdpa/mlx5/core/mlx5_vdpa.h | 1 +
 drivers/vdpa/mlx5/net/mlx5_vnet.c  | 6 +++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
index 058fbe28107e9..25fc4120b618d 100644
--- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
@@ -96,6 +96,7 @@ struct mlx5_vdpa_dev {
 	struct mlx5_control_vq cvq;
 	struct workqueue_struct *wq;
 	unsigned int group2asid[MLX5_VDPA_NUMVQ_GROUPS];
+	bool suspended;
 };
 
 int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid);
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 3a0e721aef05f..520646ae7fa01 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -2438,7 +2438,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
 	if (err)
 		goto err_mr;
 
-	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
+	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended)
 		goto err_mr;
 
 	restore_channels_info(ndev);
@@ -2606,6 +2606,7 @@ static int mlx5_vdpa_reset(struct vdpa_device *vdev)
 	clear_vqs_ready(ndev);
 	mlx5_vdpa_destroy_mr(&ndev->mvdev);
 	ndev->mvdev.status = 0;
+	ndev->mvdev.suspended = false;
 	ndev->cur_num_vqs = 0;
 	ndev->mvdev.cvq.received_desc = 0;
 	ndev->mvdev.cvq.completed_desc = 0;
@@ -2852,6 +2853,8 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
 	struct mlx5_vdpa_virtqueue *mvq;
 	int i;
 
+	mlx5_vdpa_info(mvdev, "suspending device\n");
+
 	down_write(&ndev->reslock);
 	ndev->nb_registered = false;
 	mlx5_notifier_unregister(mvdev->mdev, &ndev->nb);
@@ -2861,6 +2864,7 @@ static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
 		suspend_vq(ndev, mvq);
 	}
 	mlx5_vdpa_cvq_suspend(mvdev);
+	mvdev->suspended = true;
 	up_write(&ndev->reslock);
 	return 0;
 }

From 06be62083c5308c76a891ca975d66d832e2afc07 Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Fri, 10 Mar 2023 09:48:45 +0000
Subject: [PATCH 131/411] nvmem: core: return -ENOENT if nvmem cell is not
 found

Prior to commit 5d8e6e6c10a3 ("nvmem: core: add an index parameter to
the cell") of_nvmem_cell_get() would return -ENOENT if the cell wasn't
found. Particularly, if of_property_match_string() returned -EINVAL,
that return code was passed as the index to of_parse_phandle(), which
then detected it as invalid and returned NULL. That led to an return
code of -ENOENT.

With the new code, the negative index will lead to an -EINVAL of
of_parse_phandle_with_optional_args() which pass straight to the
caller and break those who expect an -ENOENT.

Fix it by always returning -ENOENT.

Fixes: 5d8e6e6c10a3 ("nvmem: core: add an index parameter to the cell")
Reported-by: Alexander Stein <alexander.stein@ew.tq-group.com>
Link: https://lore.kernel.org/r/2143916.GUh0CODmnK@steina-w/
Signed-off-by: Michael Walle <michael@walle.cc>
Tested-by: Alexander Stein <alexander.stein@ew.tq-group.com>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20230310094845.139400-1-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvmem/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c
index 174ef3574e07f..22024b830788f 100644
--- a/drivers/nvmem/core.c
+++ b/drivers/nvmem/core.c
@@ -1231,7 +1231,7 @@ struct nvmem_cell *of_nvmem_cell_get(struct device_node *np, const char *id)
 						  "#nvmem-cell-cells",
 						  index, &cell_spec);
 	if (ret)
-		return ERR_PTR(ret);
+		return ERR_PTR(-ENOENT);
 
 	if (cell_spec.args_count > 1)
 		return ERR_PTR(-EINVAL);

From f624bb6fad23df3270580b4fcef415c6e7bf7705 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 1 Mar 2023 12:09:29 +0200
Subject: [PATCH 132/411] wifi: nl80211: fix NULL-ptr deref in offchan check

If, e.g. in AP mode, the link was already created by userspace
but not activated yet, it has a chandef but the chandef isn't
valid and has no channel. Check for this and ignore this link.

Fixes: 7b0a0e3c3a88 ("wifi: cfg80211: do some rework towards MLO link APIs")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/20230301115906.71bd4803fbb9.Iee39c0f6c2d3a59a8227674dc55d52e38b1090cf@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 112b4bb009c80..51f6582eff7bc 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -8901,7 +8901,7 @@ static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev,
 		struct cfg80211_chan_def *chandef;
 
 		chandef = wdev_chandef(wdev, link_id);
-		if (!chandef)
+		if (!chandef || !chandef->chan)
 			continue;
 
 		/*

From b27f07c50a73e34eefb6b1030b235192b7ded850 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 24 Feb 2023 13:36:57 +0100
Subject: [PATCH 133/411] wifi: nl80211: fix puncturing bitmap policy

This was meant to be a u32, and while applying the patch
I tried to use policy validation for it. However, not only
did I copy/paste it to u8 instead of u32, but also used
the policy range erroneously. Fix both of these issues.

Fixes: d7c1a9a0ed18 ("wifi: nl80211: validate and configure puncturing bitmap")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 51f6582eff7bc..6869781283e28 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -462,6 +462,11 @@ nl80211_sta_wme_policy[NL80211_STA_WME_MAX + 1] = {
 	[NL80211_STA_WME_MAX_SP] = { .type = NLA_U8 },
 };
 
+static struct netlink_range_validation nl80211_punct_bitmap_range = {
+	.min = 0,
+	.max = 0xffff,
+};
+
 static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD },
 	[NL80211_ATTR_WIPHY] = { .type = NLA_U32 },
@@ -805,7 +810,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_MLD_ADDR] = NLA_POLICY_EXACT_LEN(ETH_ALEN),
 	[NL80211_ATTR_MLO_SUPPORT] = { .type = NLA_FLAG },
 	[NL80211_ATTR_MAX_NUM_AKM_SUITES] = { .type = NLA_REJECT },
-	[NL80211_ATTR_PUNCT_BITMAP] = NLA_POLICY_RANGE(NLA_U8, 0, 0xffff),
+	[NL80211_ATTR_PUNCT_BITMAP] =
+		NLA_POLICY_FULL_RANGE(NLA_U32, &nl80211_punct_bitmap_range),
 };
 
 /* policy for the key attributes */

From ce04abc3fcc62cd5640af981ebfd7c4dc3bded28 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 24 Feb 2023 10:52:19 +0100
Subject: [PATCH 134/411] wifi: mac80211: check basic rates validity

When userspace sets basic rates, it might send us some rates
list that's empty or consists of invalid values only. We're
currently ignoring invalid values and then may end up with a
rates bitmap that's empty, which later results in a warning.

Reject the call if there were no valid rates.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 8eb3423008687..d3d861911ed65 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2611,6 +2611,17 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
 	if (!sband)
 		return -EINVAL;
 
+	if (params->basic_rates) {
+		if (!ieee80211_parse_bitrates(link->conf->chandef.width,
+					      wiphy->bands[sband->band],
+					      params->basic_rates,
+					      params->basic_rates_len,
+					      &link->conf->basic_rates))
+			return -EINVAL;
+		changed |= BSS_CHANGED_BASIC_RATES;
+		ieee80211_check_rate_mask(link);
+	}
+
 	if (params->use_cts_prot >= 0) {
 		link->conf->use_cts_prot = params->use_cts_prot;
 		changed |= BSS_CHANGED_ERP_CTS_PROT;
@@ -2632,16 +2643,6 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
 		changed |= BSS_CHANGED_ERP_SLOT;
 	}
 
-	if (params->basic_rates) {
-		ieee80211_parse_bitrates(link->conf->chandef.width,
-					 wiphy->bands[sband->band],
-					 params->basic_rates,
-					 params->basic_rates_len,
-					 &link->conf->basic_rates);
-		changed |= BSS_CHANGED_BASIC_RATES;
-		ieee80211_check_rate_mask(link);
-	}
-
 	if (params->ap_isolate >= 0) {
 		if (params->ap_isolate)
 			sdata->flags |= IEEE80211_SDATA_DONT_BRIDGE_PACKETS;

From 96c069508377547f913e7265a80fffe9355de592 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 1 Mar 2023 12:09:33 +0200
Subject: [PATCH 135/411] wifi: cfg80211: fix MLO connection ownership

When disconnecting from an MLO connection we need the AP
MLD address, not an arbitrary BSSID. Fix the code to do
that.

Fixes: 9ecff10e82a5 ("wifi: nl80211: refactor BSS lookup in nl80211_associate()")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
Link: https://lore.kernel.org/r/20230301115906.4c1b3b18980e.I008f070c7f3b8e8bde9278101ef9e40706a82902@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 6869781283e28..4f63059efd813 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -10799,8 +10799,7 @@ static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev,
 
 static struct cfg80211_bss *nl80211_assoc_bss(struct cfg80211_registered_device *rdev,
 					      const u8 *ssid, int ssid_len,
-					      struct nlattr **attrs,
-					      const u8 **bssid_out)
+					      struct nlattr **attrs)
 {
 	struct ieee80211_channel *chan;
 	struct cfg80211_bss *bss;
@@ -10827,7 +10826,6 @@ static struct cfg80211_bss *nl80211_assoc_bss(struct cfg80211_registered_device
 	if (!bss)
 		return ERR_PTR(-ENOENT);
 
-	*bssid_out = bssid;
 	return bss;
 }
 
@@ -10837,7 +10835,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 	struct net_device *dev = info->user_ptr[1];
 	struct cfg80211_assoc_request req = {};
 	struct nlattr **attrs = NULL;
-	const u8 *bssid, *ssid;
+	const u8 *ap_addr, *ssid;
 	unsigned int link_id;
 	int err, ssid_len;
 
@@ -10974,6 +10972,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 			return -EINVAL;
 
 		req.ap_mld_addr = nla_data(info->attrs[NL80211_ATTR_MLD_ADDR]);
+		ap_addr = req.ap_mld_addr;
 
 		attrs = kzalloc(attrsize, GFP_KERNEL);
 		if (!attrs)
@@ -10999,8 +10998,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 				goto free;
 			}
 			req.links[link_id].bss =
-				nl80211_assoc_bss(rdev, ssid, ssid_len, attrs,
-						  &bssid);
+				nl80211_assoc_bss(rdev, ssid, ssid_len, attrs);
 			if (IS_ERR(req.links[link_id].bss)) {
 				err = PTR_ERR(req.links[link_id].bss);
 				req.links[link_id].bss = NULL;
@@ -11051,10 +11049,10 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 		if (req.link_id >= 0)
 			return -EINVAL;
 
-		req.bss = nl80211_assoc_bss(rdev, ssid, ssid_len, info->attrs,
-					    &bssid);
+		req.bss = nl80211_assoc_bss(rdev, ssid, ssid_len, info->attrs);
 		if (IS_ERR(req.bss))
 			return PTR_ERR(req.bss);
+		ap_addr = req.bss->bssid;
 	}
 
 	err = nl80211_crypto_settings(rdev, info, &req.crypto, 1);
@@ -11067,7 +11065,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 			dev->ieee80211_ptr->conn_owner_nlportid =
 				info->snd_portid;
 			memcpy(dev->ieee80211_ptr->disconnect_bssid,
-			       bssid, ETH_ALEN);
+			       ap_addr, ETH_ALEN);
 		}
 
 		wdev_unlock(dev->ieee80211_ptr);

From fe9ae05cfbe587dda724fcf537c00bc2f287da62 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 8 Mar 2023 11:50:12 +0100
Subject: [PATCH 136/411] fbdev: Fix incorrect page mapping clearance at
 fb_deferred_io_release()

The recent fix for the deferred I/O by the commit
  3efc61d95259 ("fbdev: Fix invalid page access after closing deferred I/O devices")
caused a regression when the same fb device is opened/closed while
it's being used.  It resulted in a frozen screen even if something
is redrawn there after the close.  The breakage is because the patch
was made under a wrong assumption of a single open; in the current
code, fb_deferred_io_release() cleans up the page mapping of the
pageref list and it calls cancel_delayed_work_sync() unconditionally,
where both are no correct behavior for multiple opens.

This patch adds a refcount for the opens of the device, and applies
the cleanup only when all files get closed.

As both fb_deferred_io_open() and _close() are called always in the
fb_info lock (mutex), it's safe to use the normal int for the
refcounting.

Also, a useless BUG_ON() is dropped.

Fixes: 3efc61d95259 ("fbdev: Fix invalid page access after closing deferred I/O devices")
Cc: <stable@vger.kernel.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Reviewed-by: Patrik Jakobsson <patrik.r.jakobsson@gmail.com>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Link: https://patchwork.freedesktop.org/patch/msgid/20230308105012.1845-1-tiwai@suse.de
---
 drivers/video/fbdev/core/fb_defio.c | 17 +++++++++++++----
 include/linux/fb.h                  |  1 +
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/video/fbdev/core/fb_defio.c b/drivers/video/fbdev/core/fb_defio.c
index 583cbcf094467..a3cf1f764f29b 100644
--- a/drivers/video/fbdev/core/fb_defio.c
+++ b/drivers/video/fbdev/core/fb_defio.c
@@ -309,17 +309,18 @@ void fb_deferred_io_open(struct fb_info *info,
 			 struct inode *inode,
 			 struct file *file)
 {
+	struct fb_deferred_io *fbdefio = info->fbdefio;
+
 	file->f_mapping->a_ops = &fb_deferred_io_aops;
+	fbdefio->open_count++;
 }
 EXPORT_SYMBOL_GPL(fb_deferred_io_open);
 
-void fb_deferred_io_release(struct fb_info *info)
+static void fb_deferred_io_lastclose(struct fb_info *info)
 {
-	struct fb_deferred_io *fbdefio = info->fbdefio;
 	struct page *page;
 	int i;
 
-	BUG_ON(!fbdefio);
 	cancel_delayed_work_sync(&info->deferred_work);
 
 	/* clear out the mapping that we setup */
@@ -328,13 +329,21 @@ void fb_deferred_io_release(struct fb_info *info)
 		page->mapping = NULL;
 	}
 }
+
+void fb_deferred_io_release(struct fb_info *info)
+{
+	struct fb_deferred_io *fbdefio = info->fbdefio;
+
+	if (!--fbdefio->open_count)
+		fb_deferred_io_lastclose(info);
+}
 EXPORT_SYMBOL_GPL(fb_deferred_io_release);
 
 void fb_deferred_io_cleanup(struct fb_info *info)
 {
 	struct fb_deferred_io *fbdefio = info->fbdefio;
 
-	fb_deferred_io_release(info);
+	fb_deferred_io_lastclose(info);
 
 	kvfree(info->pagerefs);
 	mutex_destroy(&fbdefio->lock);
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 73eb1f85ea8e5..05e40fcc76964 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -212,6 +212,7 @@ struct fb_deferred_io {
 	/* delay between mkwrite and deferred handler */
 	unsigned long delay;
 	bool sort_pagereflist; /* sort pagelist by offset */
+	int open_count; /* number of opened files; protected by fb_info lock */
 	struct mutex lock; /* mutex that protects the pageref list */
 	struct list_head pagereflist; /* list of pagerefs for touched pages */
 	/* callback */

From 624c60f326c6e5a80b008e8a5c7feffe8c27dc72 Mon Sep 17 00:00:00 2001
From: Guillaume Tucker <guillaume.tucker@collabora.com>
Date: Tue, 9 Aug 2022 16:22:31 +0200
Subject: [PATCH 137/411] selftests: fix LLVM build for i386 and x86_64

Add missing cases for the i386 and x86_64 architectures when
determining the LLVM target for building kselftest.

Fixes: 795285ef2425 ("selftests: Fix clang cross compilation")
Signed-off-by: Guillaume Tucker <guillaume.tucker@collabora.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 tools/testing/selftests/lib.mk | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index f7900e75d2306..05400462c7799 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -10,12 +10,14 @@ endif
 CLANG_TARGET_FLAGS_arm          := arm-linux-gnueabi
 CLANG_TARGET_FLAGS_arm64        := aarch64-linux-gnu
 CLANG_TARGET_FLAGS_hexagon      := hexagon-linux-musl
+CLANG_TARGET_FLAGS_i386         := i386-linux-gnu
 CLANG_TARGET_FLAGS_m68k         := m68k-linux-gnu
 CLANG_TARGET_FLAGS_mips         := mipsel-linux-gnu
 CLANG_TARGET_FLAGS_powerpc      := powerpc64le-linux-gnu
 CLANG_TARGET_FLAGS_riscv        := riscv64-linux-gnu
 CLANG_TARGET_FLAGS_s390         := s390x-linux-gnu
 CLANG_TARGET_FLAGS_x86          := x86_64-linux-gnu
+CLANG_TARGET_FLAGS_x86_64       := x86_64-linux-gnu
 CLANG_TARGET_FLAGS              := $(CLANG_TARGET_FLAGS_$(ARCH))
 
 ifeq ($(CROSS_COMPILE),)

From 484b7059796e3bc1cb527caa61dfc60da649b4f6 Mon Sep 17 00:00:00 2001
From: Fedor Pchelkin <pchelkin@ispras.ru>
Date: Thu, 9 Mar 2023 19:50:50 +0300
Subject: [PATCH 138/411] nfc: pn533: initialize struct pn533_out_arg properly

struct pn533_out_arg used as a temporary context for out_urb is not
initialized properly. Its uninitialized 'phy' field can be dereferenced in
error cases inside pn533_out_complete() callback function. It causes the
following failure:

general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN
KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007]
CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.2.0-rc3-next-20230110-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022
RIP: 0010:pn533_out_complete.cold+0x15/0x44 drivers/nfc/pn533/usb.c:441
Call Trace:
 <IRQ>
 __usb_hcd_giveback_urb+0x2b6/0x5c0 drivers/usb/core/hcd.c:1671
 usb_hcd_giveback_urb+0x384/0x430 drivers/usb/core/hcd.c:1754
 dummy_timer+0x1203/0x32d0 drivers/usb/gadget/udc/dummy_hcd.c:1988
 call_timer_fn+0x1da/0x800 kernel/time/timer.c:1700
 expire_timers+0x234/0x330 kernel/time/timer.c:1751
 __run_timers kernel/time/timer.c:2022 [inline]
 __run_timers kernel/time/timer.c:1995 [inline]
 run_timer_softirq+0x326/0x910 kernel/time/timer.c:2035
 __do_softirq+0x1fb/0xaf6 kernel/softirq.c:571
 invoke_softirq kernel/softirq.c:445 [inline]
 __irq_exit_rcu+0x123/0x180 kernel/softirq.c:650
 irq_exit_rcu+0x9/0x20 kernel/softirq.c:662
 sysvec_apic_timer_interrupt+0x97/0xc0 arch/x86/kernel/apic/apic.c:1107

Initialize the field with the pn533_usb_phy currently used.

Found by Linux Verification Center (linuxtesting.org) with Syzkaller.

Fixes: 9dab880d675b ("nfc: pn533: Wait for out_urb's completion in pn533_usb_send_frame()")
Reported-by: syzbot+1e608ba4217c96d1952f@syzkaller.appspotmail.com
Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Link: https://lore.kernel.org/r/20230309165050.207390-1-pchelkin@ispras.ru
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/pn533/usb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/nfc/pn533/usb.c b/drivers/nfc/pn533/usb.c
index ed9c5e2cf3ad4..a187f0e0b0f7d 100644
--- a/drivers/nfc/pn533/usb.c
+++ b/drivers/nfc/pn533/usb.c
@@ -175,6 +175,7 @@ static int pn533_usb_send_frame(struct pn533 *dev,
 	print_hex_dump_debug("PN533 TX: ", DUMP_PREFIX_NONE, 16, 1,
 			     out->data, out->len, false);
 
+	arg.phy = phy;
 	init_completion(&arg.done);
 	cntx = phy->out_urb->context;
 	phy->out_urb->context = &arg;

From 25074a44ac4e5dae5b4a25dcb9bbfcbd00f15ae2 Mon Sep 17 00:00:00 2001
From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Date: Wed, 8 Mar 2023 10:49:33 +0800
Subject: [PATCH 139/411] virtio_net: reorder some funcs

The purpose of this is to facilitate the subsequent addition of new
functions without introducing a separate declaration.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/virtio_net.c | 92 ++++++++++++++++++++--------------------
 1 file changed, 46 insertions(+), 46 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index fb5e68ed3ec27..8b31a04052f2f 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -545,6 +545,52 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 	return skb;
 }
 
+static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi)
+{
+	unsigned int len;
+	unsigned int packets = 0;
+	unsigned int bytes = 0;
+	void *ptr;
+
+	while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
+		if (likely(!is_xdp_frame(ptr))) {
+			struct sk_buff *skb = ptr;
+
+			pr_debug("Sent skb %p\n", skb);
+
+			bytes += skb->len;
+			napi_consume_skb(skb, in_napi);
+		} else {
+			struct xdp_frame *frame = ptr_to_xdp(ptr);
+
+			bytes += xdp_get_frame_len(frame);
+			xdp_return_frame(frame);
+		}
+		packets++;
+	}
+
+	/* Avoid overhead when no packets have been processed
+	 * happens when called speculatively from start_xmit.
+	 */
+	if (!packets)
+		return;
+
+	u64_stats_update_begin(&sq->stats.syncp);
+	sq->stats.bytes += bytes;
+	sq->stats.packets += packets;
+	u64_stats_update_end(&sq->stats.syncp);
+}
+
+static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
+{
+	if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
+		return false;
+	else if (q < vi->curr_queue_pairs)
+		return true;
+	else
+		return false;
+}
+
 static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
 				   struct send_queue *sq,
 				   struct xdp_frame *xdpf)
@@ -1714,52 +1760,6 @@ static int virtnet_receive(struct receive_queue *rq, int budget,
 	return stats.packets;
 }
 
-static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi)
-{
-	unsigned int len;
-	unsigned int packets = 0;
-	unsigned int bytes = 0;
-	void *ptr;
-
-	while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
-		if (likely(!is_xdp_frame(ptr))) {
-			struct sk_buff *skb = ptr;
-
-			pr_debug("Sent skb %p\n", skb);
-
-			bytes += skb->len;
-			napi_consume_skb(skb, in_napi);
-		} else {
-			struct xdp_frame *frame = ptr_to_xdp(ptr);
-
-			bytes += xdp_get_frame_len(frame);
-			xdp_return_frame(frame);
-		}
-		packets++;
-	}
-
-	/* Avoid overhead when no packets have been processed
-	 * happens when called speculatively from start_xmit.
-	 */
-	if (!packets)
-		return;
-
-	u64_stats_update_begin(&sq->stats.syncp);
-	sq->stats.bytes += bytes;
-	sq->stats.packets += packets;
-	u64_stats_update_end(&sq->stats.syncp);
-}
-
-static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
-{
-	if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
-		return false;
-	else if (q < vi->curr_queue_pairs)
-		return true;
-	else
-		return false;
-}
-
 static void virtnet_poll_cleantx(struct receive_queue *rq)
 {
 	struct virtnet_info *vi = rq->vq->vdev->priv;

From b8ef4809bc7faa22e63de921ef56de21ed191af0 Mon Sep 17 00:00:00 2001
From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Date: Wed, 8 Mar 2023 10:49:34 +0800
Subject: [PATCH 140/411] virtio_net: separate the logic of checking whether sq
 is full

Separate the logic of checking whether sq is full. The subsequent patch
will reuse this func.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/virtio_net.c | 60 ++++++++++++++++++++++++----------------
 1 file changed, 36 insertions(+), 24 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 8b31a04052f2f..46bbddaadb0d3 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -591,6 +591,41 @@ static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
 		return false;
 }
 
+static void check_sq_full_and_disable(struct virtnet_info *vi,
+				      struct net_device *dev,
+				      struct send_queue *sq)
+{
+	bool use_napi = sq->napi.weight;
+	int qnum;
+
+	qnum = sq - vi->sq;
+
+	/* If running out of space, stop queue to avoid getting packets that we
+	 * are then unable to transmit.
+	 * An alternative would be to force queuing layer to requeue the skb by
+	 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be
+	 * returned in a normal path of operation: it means that driver is not
+	 * maintaining the TX queue stop/start state properly, and causes
+	 * the stack to do a non-trivial amount of useless work.
+	 * Since most packets only take 1 or 2 ring slots, stopping the queue
+	 * early means 16 slots are typically wasted.
+	 */
+	if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
+		netif_stop_subqueue(dev, qnum);
+		if (use_napi) {
+			if (unlikely(!virtqueue_enable_cb_delayed(sq->vq)))
+				virtqueue_napi_schedule(&sq->napi, sq->vq);
+		} else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
+			/* More just got used, free them then recheck. */
+			free_old_xmit_skbs(sq, false);
+			if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
+				netif_start_subqueue(dev, qnum);
+				virtqueue_disable_cb(sq->vq);
+			}
+		}
+	}
+}
+
 static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
 				   struct send_queue *sq,
 				   struct xdp_frame *xdpf)
@@ -1989,30 +2024,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 		nf_reset_ct(skb);
 	}
 
-	/* If running out of space, stop queue to avoid getting packets that we
-	 * are then unable to transmit.
-	 * An alternative would be to force queuing layer to requeue the skb by
-	 * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be
-	 * returned in a normal path of operation: it means that driver is not
-	 * maintaining the TX queue stop/start state properly, and causes
-	 * the stack to do a non-trivial amount of useless work.
-	 * Since most packets only take 1 or 2 ring slots, stopping the queue
-	 * early means 16 slots are typically wasted.
-	 */
-	if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
-		netif_stop_subqueue(dev, qnum);
-		if (use_napi) {
-			if (unlikely(!virtqueue_enable_cb_delayed(sq->vq)))
-				virtqueue_napi_schedule(&sq->napi, sq->vq);
-		} else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
-			/* More just got used, free them then recheck. */
-			free_old_xmit_skbs(sq, false);
-			if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
-				netif_start_subqueue(dev, qnum);
-				virtqueue_disable_cb(sq->vq);
-			}
-		}
-	}
+	check_sq_full_and_disable(vi, dev, sq);
 
 	if (kick || netif_xmit_stopped(txq)) {
 		if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {

From cd1c604aa1d8c641f5edcb58b76352d4eba06ec1 Mon Sep 17 00:00:00 2001
From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Date: Wed, 8 Mar 2023 10:49:35 +0800
Subject: [PATCH 141/411] virtio_net: add checking sq is full inside xdp xmit

If the queue of xdp xmit is not an independent queue, then when the xdp
xmit used all the desc, the xmit from the __dev_queue_xmit() may encounter
the following error.

net ens4: Unexpected TXQ (0) queue failure: -28

This patch adds a check whether sq is full in xdp xmit.

Fixes: 56434a01b12e ("virtio_net: add XDP_TX support")
Reported-by: Yichun Zhang <yichun@openresty.com>
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/virtio_net.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 46bbddaadb0d3..1a309cfb4976a 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -767,6 +767,9 @@ static int virtnet_xdp_xmit(struct net_device *dev,
 	}
 	ret = nxmit;
 
+	if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq))
+		check_sq_full_and_disable(vi, dev, sq);
+
 	if (flags & XDP_XMIT_FLUSH) {
 		if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq))
 			kicks = 1;

From 71582371a5ee09272b4b4b0a07fa6eb78c9d2f90 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Thu, 9 Mar 2023 12:49:11 +0100
Subject: [PATCH 142/411] MAINTAINERS: make my email address consistent

Use jiri@resnulli.us in all MAINTAINERS entries and fixup .mailmap
so all other addresses point to that one.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Link: https://lore.kernel.org/r/20230309114911.923460-1-jiri@resnulli.us
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .mailmap    | 3 +++
 MAINTAINERS | 6 +++---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/.mailmap b/.mailmap
index 66754ca6656d1..438d79c277172 100644
--- a/.mailmap
+++ b/.mailmap
@@ -210,6 +210,9 @@ Jens Axboe <axboe@suse.de>
 Jens Osterkamp <Jens.Osterkamp@de.ibm.com>
 Jernej Skrabec <jernej.skrabec@gmail.com> <jernej.skrabec@siol.net>
 Jessica Zhang <quic_jesszhan@quicinc.com> <jesszhan@codeaurora.org>
+Jiri Pirko <jiri@resnulli.us> <jiri@nvidia.com>
+Jiri Pirko <jiri@resnulli.us> <jiri@mellanox.com>
+Jiri Pirko <jiri@resnulli.us> <jpirko@redhat.com>
 Jiri Slaby <jirislaby@kernel.org> <jirislaby@gmail.com>
 Jiri Slaby <jirislaby@kernel.org> <jslaby@novell.com>
 Jiri Slaby <jirislaby@kernel.org> <jslaby@suse.com>
diff --git a/MAINTAINERS b/MAINTAINERS
index 8d5bc223f3053..d86c7807aa201 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5971,7 +5971,7 @@ F:	include/linux/dm-*.h
 F:	include/uapi/linux/dm-*.h
 
 DEVLINK
-M:	Jiri Pirko <jiri@nvidia.com>
+M:	Jiri Pirko <jiri@resnulli.us>
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	Documentation/networking/devlink
@@ -15079,7 +15079,7 @@ F:	Documentation/hwmon/nzxt-smart2.rst
 F:	drivers/hwmon/nzxt-smart2.c
 
 OBJAGG
-M:	Jiri Pirko <jiri@nvidia.com>
+M:	Jiri Pirko <jiri@resnulli.us>
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	include/linux/objagg.h
@@ -15853,7 +15853,7 @@ F:	drivers/video/logo/logo_parisc*
 F:	include/linux/hp_sdc.h
 
 PARMAN
-M:	Jiri Pirko <jiri@nvidia.com>
+M:	Jiri Pirko <jiri@resnulli.us>
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	include/linux/parman.h

From 8ba572052a4b8fe5b205854d27e54e3486049b71 Mon Sep 17 00:00:00 2001
From: "Radu Pirea (OSS)" <radu-nicolae.pirea@oss.nxp.com>
Date: Thu, 9 Mar 2023 12:01:11 +0200
Subject: [PATCH 143/411] net: phy: nxp-c45-tja11xx: fix MII_BASIC_CONFIG_REV
 bit

According to the TJA1103 user manual, the bit for the reversed role in MII
or RMII modes is bit 4.

Cc: <stable@vger.kernel.org> # 5.15+
Fixes: b050f2f15e04 ("phy: nxp-c45: add driver for tja1103")
Signed-off-by: Radu Pirea (OSS) <radu-nicolae.pirea@oss.nxp.com>
Link: https://lore.kernel.org/r/20230309100111.1246214-1-radu-nicolae.pirea@oss.nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/nxp-c45-tja11xx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c
index 047c581457e34..5813b07242ce1 100644
--- a/drivers/net/phy/nxp-c45-tja11xx.c
+++ b/drivers/net/phy/nxp-c45-tja11xx.c
@@ -79,7 +79,7 @@
 #define SGMII_ABILITY			BIT(0)
 
 #define VEND1_MII_BASIC_CONFIG		0xAFC6
-#define MII_BASIC_CONFIG_REV		BIT(8)
+#define MII_BASIC_CONFIG_REV		BIT(4)
 #define MII_BASIC_CONFIG_SGMII		0x9
 #define MII_BASIC_CONFIG_RGMII		0x7
 #define MII_BASIC_CONFIG_RMII		0x5

From 59a0b022aa249e3f5735d93de0849341722c4754 Mon Sep 17 00:00:00 2001
From: Jianguo Wu <wujianguo@chinatelecom.cn>
Date: Thu, 9 Mar 2023 10:03:36 +0800
Subject: [PATCH 144/411] ipvlan: Make skb->skb_iif track skb->dev for l3s mode

For l3s mode, skb->dev is set to ipvlan interface in ipvlan_nf_input():
  skb->dev = addr->master->dev
but, skb->skb_iif remain unchanged, this will cause socket lookup failed
if a target socket is bound to a interface, like the following example:

  ip link add ipvlan0 link eth0 type ipvlan mode l3s
  ip addr add dev ipvlan0 192.168.124.111/24
  ip link set ipvlan0 up

  ping -c 1 -I ipvlan0 8.8.8.8
  100% packet loss

This is because there is no match sk in __raw_v4_lookup() as sk->sk_bound_dev_if != dif(skb->skb_iif).
Fix this by make skb->skb_iif track skb->dev in ipvlan_nf_input().

Fixes: c675e06a98a4 ("ipvlan: decouple l3s mode dependencies from other modes")
Signed-off-by: Jianguo Wu <wujianguo@chinatelecom.cn>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/29865b1f-6db7-c07a-de89-949d3721ea30@163.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ipvlan/ipvlan_l3s.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ipvlan/ipvlan_l3s.c b/drivers/net/ipvlan/ipvlan_l3s.c
index 943d26cbf39f5..71712ea25403d 100644
--- a/drivers/net/ipvlan/ipvlan_l3s.c
+++ b/drivers/net/ipvlan/ipvlan_l3s.c
@@ -101,6 +101,7 @@ static unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb,
 		goto out;
 
 	skb->dev = addr->master->dev;
+	skb->skb_iif = skb->dev->ifindex;
 	len = skb->len + ETH_HLEN;
 	ipvlan_count_rx(addr->master, len, true, false);
 out:

From 7e4f8a0c495413a50413e8c9f1032ce1bc633bae Mon Sep 17 00:00:00 2001
From: Ivan Vecera <ivecera@redhat.com>
Date: Thu, 9 Mar 2023 10:45:09 -0800
Subject: [PATCH 145/411] i40e: Fix kernel crash during reboot when adapter is
 in recovery mode

If the driver detects during probe that firmware is in recovery
mode then i40e_init_recovery_mode() is called and the rest of
probe function is skipped including pci_set_drvdata(). Subsequent
i40e_shutdown() called during shutdown/reboot dereferences NULL
pointer as pci_get_drvdata() returns NULL.

To fix call pci_set_drvdata() also during entering to recovery mode.

Reproducer:
1) Lets have i40e NIC with firmware in recovery mode
2) Run reboot

Result:
[  139.084698] i40e: Intel(R) Ethernet Connection XL710 Network Driver
[  139.090959] i40e: Copyright (c) 2013 - 2019 Intel Corporation.
[  139.108438] i40e 0000:02:00.0: Firmware recovery mode detected. Limiting functionality.
[  139.116439] i40e 0000:02:00.0: Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.
[  139.129499] i40e 0000:02:00.0: fw 8.3.64775 api 1.13 nvm 8.30 0x8000b78d 1.3106.0 [8086:1583] [15d9:084a]
[  139.215932] i40e 0000:02:00.0 enp2s0f0: renamed from eth0
[  139.223292] i40e 0000:02:00.1: Firmware recovery mode detected. Limiting functionality.
[  139.231292] i40e 0000:02:00.1: Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.
[  139.244406] i40e 0000:02:00.1: fw 8.3.64775 api 1.13 nvm 8.30 0x8000b78d 1.3106.0 [8086:1583] [15d9:084a]
[  139.329209] i40e 0000:02:00.1 enp2s0f1: renamed from eth0
...
[  156.311376] BUG: kernel NULL pointer dereference, address: 00000000000006c2
[  156.318330] #PF: supervisor write access in kernel mode
[  156.323546] #PF: error_code(0x0002) - not-present page
[  156.328679] PGD 0 P4D 0
[  156.331210] Oops: 0002 [#1] PREEMPT SMP NOPTI
[  156.335567] CPU: 26 PID: 15119 Comm: reboot Tainted: G            E      6.2.0+ #1
[  156.343126] Hardware name: Abacus electric, s.r.o. - servis@abacus.cz Super Server/H12SSW-iN, BIOS 2.4 04/13/2022
[  156.353369] RIP: 0010:i40e_shutdown+0x15/0x130 [i40e]
[  156.358430] Code: c1 fc ff ff 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 0f 1f 44 00 00 55 48 89 fd 53 48 8b 9f 48 01 00 00 <f0> 80 8b c2 06 00 00 04 f0 80 8b c0 06 00 00 08 48 8d bb 08 08 00
[  156.377168] RSP: 0018:ffffb223c8447d90 EFLAGS: 00010282
[  156.382384] RAX: ffffffffc073ee70 RBX: 0000000000000000 RCX: 0000000000000001
[  156.389510] RDX: 0000000080000001 RSI: 0000000000000246 RDI: ffff95db49988000
[  156.396634] RBP: ffff95db49988000 R08: ffffffffffffffff R09: ffffffff8bd17d40
[  156.403759] R10: 0000000000000001 R11: ffffffff8a5e3d28 R12: ffff95db49988000
[  156.410882] R13: ffffffff89a6fe17 R14: ffff95db49988150 R15: 0000000000000000
[  156.418007] FS:  00007fe7c0cc3980(0000) GS:ffff95ea8ee80000(0000) knlGS:0000000000000000
[  156.426083] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  156.431819] CR2: 00000000000006c2 CR3: 00000003092fc005 CR4: 0000000000770ee0
[  156.438944] PKRU: 55555554
[  156.441647] Call Trace:
[  156.444096]  <TASK>
[  156.446199]  pci_device_shutdown+0x38/0x60
[  156.450297]  device_shutdown+0x163/0x210
[  156.454215]  kernel_restart+0x12/0x70
[  156.457872]  __do_sys_reboot+0x1ab/0x230
[  156.461789]  ? vfs_writev+0xa6/0x1a0
[  156.465362]  ? __pfx_file_free_rcu+0x10/0x10
[  156.469635]  ? __call_rcu_common.constprop.85+0x109/0x5a0
[  156.475034]  do_syscall_64+0x3e/0x90
[  156.478611]  entry_SYSCALL_64_after_hwframe+0x72/0xdc
[  156.483658] RIP: 0033:0x7fe7bff37ab7

Fixes: 4ff0ee1af016 ("i40e: Introduce recovery mode support")
Signed-off-by: Ivan Vecera <ivecera@redhat.com>
Tested-by: Arpana Arland <arpanax.arland@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Link: https://lore.kernel.org/r/20230309184509.984639-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 467001db5070e..228cd502bb48a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -15525,6 +15525,7 @@ static int i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw)
 	int err;
 	int v_idx;
 
+	pci_set_drvdata(pf->pdev, pf);
 	pci_save_state(pf->pdev);
 
 	/* set up periodic task facility */

From 8f76a4f80fba8096a611b6b60c40a0f4cab3ddfb Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 9 Mar 2023 13:25:25 +0100
Subject: [PATCH 146/411] tools: ynl: fix render-max for flags definition

Properly manage render-max property for flags definition type
introducing mask value and setting it to (last_element << 1) - 1
instead of adding max value set to last_element + 1

Fixes: be5bea1cc0bf ("net: add basic C code generators for Netlink")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/ynl-gen-c.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index 1bcc5354d8000..d47376f19de72 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -1931,9 +1931,14 @@ def render_uapi(family, cw):
 
             if const.get('render-max', False):
                 cw.nl()
-                max_name = c_upper(name_pfx + 'max')
-                cw.p('__' + max_name + ',')
-                cw.p(max_name + ' = (__' + max_name + ' - 1)')
+                if const['type'] == 'flags':
+                    max_name = c_upper(name_pfx + 'mask')
+                    max_val = f' = {enum.get_mask()},'
+                    cw.p(max_name + max_val)
+                else:
+                    max_name = c_upper(name_pfx + 'max')
+                    cw.p('__' + max_name + ',')
+                    cw.p(max_name + ' = (__' + max_name + ' - 1)')
             cw.block_end(line=';')
             cw.nl()
         elif const['type'] == 'const':

From bf51d27704c963ea52c0843096e23c9f404b13af Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 9 Mar 2023 13:25:26 +0100
Subject: [PATCH 147/411] tools: ynl: fix get_mask utility routine

Fix get_mask utility routine in order to take into account possible gaps
in the elements list.

Fixes: be5bea1cc0bf ("net: add basic C code generators for Netlink")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/lib/nlspec.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py
index a34d088f67432..960a356e8225e 100644
--- a/tools/net/ynl/lib/nlspec.py
+++ b/tools/net/ynl/lib/nlspec.py
@@ -138,10 +138,8 @@ def has_doc(self):
 
     def get_mask(self):
         mask = 0
-        idx = self.yaml.get('value-start', 0)
-        for _ in self.entries.values():
-            mask |= 1 << idx
-            idx += 1
+        for e in self.entries.values():
+            mask += e.user_value()
         return mask
 
 
From f85949f98206b3b11d92d695cea4efda6a81f00e Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 9 Mar 2023 13:25:27 +0100
Subject: [PATCH 148/411] xdp: add xdp_set_features_flag utility routine

Introduce xdp_set_features_flag utility routine in order to update
dynamically xdp_features according to the dynamic hw configuration via
ethtool (e.g. changing number of hw rx/tx queues).
Add xdp_clear_features_flag() in order to clear all xdp_feature flag.

Reviewed-by: Shay Agroskin <shayagr@amazon.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/netdev.yaml |  1 +
 include/net/xdp.h                       | 11 +++++++++++
 include/uapi/linux/netdev.h             |  2 ++
 net/core/xdp.c                          | 26 ++++++++++++++++++-------
 tools/include/uapi/linux/netdev.h       |  2 ++
 5 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index 24de747b53443..753e5914a8b7c 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -9,6 +9,7 @@ definitions:
   -
     type: flags
     name: xdp-act
+    render-max: true
     entries:
       -
         name: basic
diff --git a/include/net/xdp.h b/include/net/xdp.h
index d517bfac937b0..41c57b8b16714 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -428,12 +428,18 @@ MAX_XDP_METADATA_KFUNC,
 #ifdef CONFIG_NET
 u32 bpf_xdp_metadata_kfunc_id(int id);
 bool bpf_dev_bound_kfunc_id(u32 btf_id);
+void xdp_set_features_flag(struct net_device *dev, xdp_features_t val);
 void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg);
 void xdp_features_clear_redirect_target(struct net_device *dev);
 #else
 static inline u32 bpf_xdp_metadata_kfunc_id(int id) { return 0; }
 static inline bool bpf_dev_bound_kfunc_id(u32 btf_id) { return false; }
 
+static inline void
+xdp_set_features_flag(struct net_device *dev, xdp_features_t val)
+{
+}
+
 static inline void
 xdp_features_set_redirect_target(struct net_device *dev, bool support_sg)
 {
@@ -445,4 +451,9 @@ xdp_features_clear_redirect_target(struct net_device *dev)
 }
 #endif
 
+static inline void xdp_clear_features_flag(struct net_device *dev)
+{
+	xdp_set_features_flag(dev, 0);
+}
+
 #endif /* __LINUX_NET_XDP_H__ */
diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index 8c4e3e536c042..ed134fbdfd32d 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -33,6 +33,8 @@ enum netdev_xdp_act {
 	NETDEV_XDP_ACT_HW_OFFLOAD = 16,
 	NETDEV_XDP_ACT_RX_SG = 32,
 	NETDEV_XDP_ACT_NDO_XMIT_SG = 64,
+
+	NETDEV_XDP_ACT_MASK = 127,
 };
 
 enum {
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 8c92fc5533177..87e654b7d06c1 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -774,20 +774,32 @@ static int __init xdp_metadata_init(void)
 }
 late_initcall(xdp_metadata_init);
 
-void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg)
+void xdp_set_features_flag(struct net_device *dev, xdp_features_t val)
 {
-	dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
-	if (support_sg)
-		dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT_SG;
+	val &= NETDEV_XDP_ACT_MASK;
+	if (dev->xdp_features == val)
+		return;
 
+	dev->xdp_features = val;
 	call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev);
 }
+EXPORT_SYMBOL_GPL(xdp_set_features_flag);
+
+void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg)
+{
+	xdp_features_t val = (dev->xdp_features | NETDEV_XDP_ACT_NDO_XMIT);
+
+	if (support_sg)
+		val |= NETDEV_XDP_ACT_NDO_XMIT_SG;
+	xdp_set_features_flag(dev, val);
+}
 EXPORT_SYMBOL_GPL(xdp_features_set_redirect_target);
 
 void xdp_features_clear_redirect_target(struct net_device *dev)
 {
-	dev->xdp_features &= ~(NETDEV_XDP_ACT_NDO_XMIT |
-			       NETDEV_XDP_ACT_NDO_XMIT_SG);
-	call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev);
+	xdp_features_t val = dev->xdp_features;
+
+	val &= ~(NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_NDO_XMIT_SG);
+	xdp_set_features_flag(dev, val);
 }
 EXPORT_SYMBOL_GPL(xdp_features_clear_redirect_target);
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index 8c4e3e536c042..ed134fbdfd32d 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -33,6 +33,8 @@ enum netdev_xdp_act {
 	NETDEV_XDP_ACT_HW_OFFLOAD = 16,
 	NETDEV_XDP_ACT_RX_SG = 32,
 	NETDEV_XDP_ACT_NDO_XMIT_SG = 64,
+
+	NETDEV_XDP_ACT_MASK = 127,
 };
 
 enum {

From 3c249fe4de1608a9af56563606d4f6eb3a64a47f Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 9 Mar 2023 13:25:28 +0100
Subject: [PATCH 149/411] net: thunderx: take into account xdp_features setting
 tx/rx queues

thunderx nic allows xdp just if enough hw queues are available for XDP.
Take into account queues configuration setting xdp_features.

Fixes: 66c0e13ad236 ("drivers: net: turn on XDP features")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../net/ethernet/cavium/thunder/nicvf_ethtool.c | 17 +++++++++++------
 .../net/ethernet/cavium/thunder/nicvf_main.c    |  4 +++-
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
index e5c71f9078523..d8d71bf97983b 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
@@ -735,12 +735,17 @@ static int nicvf_set_channels(struct net_device *dev,
 	if (channel->tx_count > nic->max_queues)
 		return -EINVAL;
 
-	if (nic->xdp_prog &&
-	    ((channel->tx_count + channel->rx_count) > nic->max_queues)) {
-		netdev_err(nic->netdev,
-			   "XDP mode, RXQs + TXQs > Max %d\n",
-			   nic->max_queues);
-		return -EINVAL;
+	if (channel->tx_count + channel->rx_count > nic->max_queues) {
+		if (nic->xdp_prog) {
+			netdev_err(nic->netdev,
+				   "XDP mode, RXQs + TXQs > Max %d\n",
+				   nic->max_queues);
+			return -EINVAL;
+		}
+
+		xdp_clear_features_flag(nic->netdev);
+	} else if (!pass1_silicon(nic->pdev)) {
+		xdp_set_features_flag(dev, NETDEV_XDP_ACT_BASIC);
 	}
 
 	if (if_up)
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 8b25313c7f6b8..eff350e0bc2a8 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -2218,7 +2218,9 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	netdev->netdev_ops = &nicvf_netdev_ops;
 	netdev->watchdog_timeo = NICVF_TX_TIMEOUT;
 
-	netdev->xdp_features = NETDEV_XDP_ACT_BASIC;
+	if (!pass1_silicon(nic->pdev) &&
+	    nic->rx_queues + nic->tx_queues <= nic->max_queues)
+		netdev->xdp_features = NETDEV_XDP_ACT_BASIC;
 
 	/* MTU range: 64 - 9200 */
 	netdev->min_mtu = NIC_HW_MIN_FRS;

From 7aa6dc351b92abbfead9ebe25ce7a7ac0384ea6d Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 9 Mar 2023 13:25:29 +0100
Subject: [PATCH 150/411] net: ena: take into account xdp_features setting
 tx/rx queues

ena nic allows xdp just if enough hw queues are available for XDP.
Take into account queues configuration setting xdp_features.

Fixes: 66c0e13ad236 ("drivers: net: turn on XDP features")
Reviewed-by: Shay Agroskin <shayagr@amazon.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/amazon/ena/ena_ethtool.c | 15 ++++++++++++---
 drivers/net/ethernet/amazon/ena/ena_netdev.c  |  6 ++++--
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index 8da79eedc057c..1d4f2f4d10f29 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -850,11 +850,20 @@ static int ena_set_channels(struct net_device *netdev,
 	struct ena_adapter *adapter = netdev_priv(netdev);
 	u32 count = channels->combined_count;
 	/* The check for max value is already done in ethtool */
-	if (count < ENA_MIN_NUM_IO_QUEUES ||
-	    (ena_xdp_present(adapter) &&
-	    !ena_xdp_legal_queue_count(adapter, count)))
+	if (count < ENA_MIN_NUM_IO_QUEUES)
 		return -EINVAL;
 
+	if (!ena_xdp_legal_queue_count(adapter, count)) {
+		if (ena_xdp_present(adapter))
+			return -EINVAL;
+
+		xdp_clear_features_flag(netdev);
+	} else {
+		xdp_set_features_flag(netdev,
+				      NETDEV_XDP_ACT_BASIC |
+				      NETDEV_XDP_ACT_REDIRECT);
+	}
+
 	return ena_update_queue_count(adapter, count);
 }
 
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index d3999db7c6a29..cbfe7f977270f 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -4105,8 +4105,6 @@ static void ena_set_conf_feat_params(struct ena_adapter *adapter,
 	/* Set offload features */
 	ena_set_dev_offloads(feat, netdev);
 
-	netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT;
-
 	adapter->max_mtu = feat->dev_attr.max_mtu;
 	netdev->max_mtu = adapter->max_mtu;
 	netdev->min_mtu = ENA_MIN_MTU;
@@ -4393,6 +4391,10 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	ena_config_debug_area(adapter);
 
+	if (ena_xdp_legal_queue_count(adapter, adapter->num_io_queues))
+		netdev->xdp_features = NETDEV_XDP_ACT_BASIC |
+				       NETDEV_XDP_ACT_REDIRECT;
+
 	memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len);
 
 	netif_carrier_off(netdev);

From fccca038f3003daa8f28a5e5d97efe50f04b8d9d Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 9 Mar 2023 13:25:30 +0100
Subject: [PATCH 151/411] veth: take into account device reconfiguration for
 xdp_features flag

Take into account tx/rx queues reconfiguration setting device
xdp_features flag. Moreover consider NETIF_F_GRO flag in order to enable
ndo_xdp_xmit callback.

Fixes: 66c0e13ad236 ("drivers: net: turn on XDP features")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/veth.c | 42 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 1bb54de7124d9..293dc3b2c84a6 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -1257,6 +1257,26 @@ static int veth_enable_range_safe(struct net_device *dev, int start, int end)
 	return 0;
 }
 
+static void veth_set_xdp_features(struct net_device *dev)
+{
+	struct veth_priv *priv = netdev_priv(dev);
+	struct net_device *peer;
+
+	peer = rcu_dereference(priv->peer);
+	if (peer && peer->real_num_tx_queues <= dev->real_num_rx_queues) {
+		xdp_features_t val = NETDEV_XDP_ACT_BASIC |
+				     NETDEV_XDP_ACT_REDIRECT |
+				     NETDEV_XDP_ACT_RX_SG;
+
+		if (priv->_xdp_prog || veth_gro_requested(dev))
+			val |= NETDEV_XDP_ACT_NDO_XMIT |
+			       NETDEV_XDP_ACT_NDO_XMIT_SG;
+		xdp_set_features_flag(dev, val);
+	} else {
+		xdp_clear_features_flag(dev);
+	}
+}
+
 static int veth_set_channels(struct net_device *dev,
 			     struct ethtool_channels *ch)
 {
@@ -1323,6 +1343,12 @@ static int veth_set_channels(struct net_device *dev,
 		if (peer)
 			netif_carrier_on(peer);
 	}
+
+	/* update XDP supported features */
+	veth_set_xdp_features(dev);
+	if (peer)
+		veth_set_xdp_features(peer);
+
 	return err;
 
 revert:
@@ -1489,7 +1515,10 @@ static int veth_set_features(struct net_device *dev,
 		err = veth_napi_enable(dev);
 		if (err)
 			return err;
+
+		xdp_features_set_redirect_target(dev, true);
 	} else {
+		xdp_features_clear_redirect_target(dev);
 		veth_napi_del(dev);
 	}
 	return 0;
@@ -1570,10 +1599,15 @@ static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
 			peer->hw_features &= ~NETIF_F_GSO_SOFTWARE;
 			peer->max_mtu = max_mtu;
 		}
+
+		xdp_features_set_redirect_target(dev, true);
 	}
 
 	if (old_prog) {
 		if (!prog) {
+			if (!veth_gro_requested(dev))
+				xdp_features_clear_redirect_target(dev);
+
 			if (dev->flags & IFF_UP)
 				veth_disable_xdp(dev);
 
@@ -1686,10 +1720,6 @@ static void veth_setup(struct net_device *dev)
 	dev->hw_enc_features = VETH_FEATURES;
 	dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE;
 	netif_set_tso_max_size(dev, GSO_MAX_SIZE);
-
-	dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
-			    NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG |
-			    NETDEV_XDP_ACT_NDO_XMIT_SG;
 }
 
 /*
@@ -1857,6 +1887,10 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
 		goto err_queues;
 
 	veth_disable_gro(dev);
+	/* update XDP supported features */
+	veth_set_xdp_features(dev);
+	veth_set_xdp_features(peer);
+
 	return 0;
 
 err_queues:

From 4d5ab0ad964df178beba031b89429a601893ff61 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 9 Mar 2023 13:25:31 +0100
Subject: [PATCH 152/411] net/mlx5e: take into account device reconfiguration
 for xdp_features flag

Take into account LRO and GRO configuration setting device xdp_features
flag. Consider channel rq_wq_type enabling rx scatter-gatter support in
xdp_features flag and disable NETDEV_XDP_ACT_NDO_XMIT_SG since it is not
supported yet by the driver.
Moreover always enable NETDEV_XDP_ACT_NDO_XMIT as the ndo_xdp_xmit
callback does not require to load a dummy xdp program on the NIC.

Fixes: 66c0e13ad236 ("drivers: net: turn on XDP features")
Co-developed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h  |  1 +
 .../ethernet/mellanox/mlx5/core/en_ethtool.c  | 10 ++++-
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 37 +++++++++++++------
 .../net/ethernet/mellanox/mlx5/core/en_rep.c  |  3 ++
 4 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 88460b7796e55..4276c6eb68201 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -1243,6 +1243,7 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
 void mlx5e_rx_dim_work(struct work_struct *work);
 void mlx5e_tx_dim_work(struct work_struct *work);
 
+void mlx5e_set_xdp_feature(struct net_device *netdev);
 netdev_features_t mlx5e_features_check(struct sk_buff *skb,
 				       struct net_device *netdev,
 				       netdev_features_t features);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 7708acc9b2ab3..79fd21ecb9cbc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1985,6 +1985,7 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable)
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_params new_params;
+	int err;
 
 	if (enable) {
 		/* Checking the regular RQ here; mlx5e_validate_xsk_param called
@@ -2005,7 +2006,14 @@ static int set_pflag_rx_striding_rq(struct net_device *netdev, bool enable)
 	MLX5E_SET_PFLAG(&new_params, MLX5E_PFLAG_RX_STRIDING_RQ, enable);
 	mlx5e_set_rq_type(mdev, &new_params);
 
-	return mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true);
+	err = mlx5e_safe_switch_params(priv, &new_params, NULL, NULL, true);
+	if (err)
+		return err;
+
+	/* update XDP supported features */
+	mlx5e_set_xdp_feature(netdev);
+
+	return 0;
 }
 
 static int set_pflag_rx_no_csum_complete(struct net_device *netdev, bool enable)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 76a9c5194a704..51b5f3cca5047 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4004,6 +4004,25 @@ static int mlx5e_handle_feature(struct net_device *netdev,
 	return 0;
 }
 
+void mlx5e_set_xdp_feature(struct net_device *netdev)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5e_params *params = &priv->channels.params;
+	xdp_features_t val;
+
+	if (params->packet_merge.type != MLX5E_PACKET_MERGE_NONE) {
+		xdp_clear_features_flag(netdev);
+		return;
+	}
+
+	val = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+	      NETDEV_XDP_ACT_XSK_ZEROCOPY |
+	      NETDEV_XDP_ACT_NDO_XMIT;
+	if (params->rq_wq_type == MLX5_WQ_TYPE_CYCLIC)
+		val |= NETDEV_XDP_ACT_RX_SG;
+	xdp_set_features_flag(netdev, val);
+}
+
 int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
 {
 	netdev_features_t oper_features = features;
@@ -4030,6 +4049,9 @@ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
 		return -EINVAL;
 	}
 
+	/* update XDP supported features */
+	mlx5e_set_xdp_feature(netdev);
+
 	return 0;
 }
 
@@ -4761,13 +4783,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
 	if (old_prog)
 		bpf_prog_put(old_prog);
 
-	if (reset) {
-		if (prog)
-			xdp_features_set_redirect_target(netdev, true);
-		else
-			xdp_features_clear_redirect_target(netdev);
-	}
-
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset)
 		goto unlock;
 
@@ -5163,13 +5178,10 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 	netdev->features         |= NETIF_F_HIGHDMA;
 	netdev->features         |= NETIF_F_HW_VLAN_STAG_FILTER;
 
-	netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
-			       NETDEV_XDP_ACT_XSK_ZEROCOPY |
-			       NETDEV_XDP_ACT_RX_SG;
-
 	netdev->priv_flags       |= IFF_UNICAST_FLT;
 
 	netif_set_tso_max_size(netdev, GSO_MAX_SIZE);
+	mlx5e_set_xdp_feature(netdev);
 	mlx5e_set_netdev_dev_addr(netdev);
 	mlx5e_macsec_build_netdev(priv);
 	mlx5e_ipsec_build_netdev(priv);
@@ -5241,6 +5253,9 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
 		mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
 
 	mlx5e_health_create_reporters(priv);
+	/* update XDP supported features */
+	mlx5e_set_xdp_feature(netdev);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 9b92034430854..43fd12fb87b87 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -747,6 +747,9 @@ static void mlx5e_build_rep_params(struct net_device *netdev)
 	/* RQ */
 	mlx5e_build_rq_params(mdev, params);
 
+	/* update XDP supported features */
+	mlx5e_set_xdp_feature(netdev);
+
 	/* CQ moderation params */
 	params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation);
 	mlx5e_set_rx_cq_mode_params(params, cq_period_mode);

From 481e96fc1307eb52c0c449608e629921ecbbaf15 Mon Sep 17 00:00:00 2001
From: Matteo Croce <teknoraver@meta.com>
Date: Thu, 9 Mar 2023 13:25:32 +0100
Subject: [PATCH 153/411] mvpp2: take care of xdp_features when reconfiguring
 queues

XDP is supported only if enough queues are present, so when reconfiguring
the queues set xdp_features accordingly.

Fixes: 66c0e13ad236 ("drivers: net: turn on XDP features")
Suggested-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Matteo Croce <teknoraver@meta.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 9b4ecbe4f36d4..3ea00bc9b91ca 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -4996,6 +4996,14 @@ static int mvpp2_bm_switch_buffers(struct mvpp2 *priv, bool percpu)
 
 	for (i = 0; i < priv->port_count; i++) {
 		port = priv->port_list[i];
+		if (percpu && port->ntxqs >= num_possible_cpus() * 2)
+			xdp_set_features_flag(port->dev,
+					      NETDEV_XDP_ACT_BASIC |
+					      NETDEV_XDP_ACT_REDIRECT |
+					      NETDEV_XDP_ACT_NDO_XMIT);
+		else
+			xdp_clear_features_flag(port->dev);
+
 		mvpp2_swf_bm_pool_init(port);
 		if (status[i])
 			mvpp2_open(port->dev);
@@ -6863,13 +6871,14 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 
 	if (!port->priv->percpu_pools)
 		mvpp2_set_hw_csum(port, port->pool_long->id);
+	else if (port->ntxqs >= num_possible_cpus() * 2)
+		dev->xdp_features = NETDEV_XDP_ACT_BASIC |
+				    NETDEV_XDP_ACT_REDIRECT |
+				    NETDEV_XDP_ACT_NDO_XMIT;
 
 	dev->vlan_features |= features;
 	netif_set_tso_max_segs(dev, MVPP2_MAX_TSO_SEGS);
 
-	dev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
-			    NETDEV_XDP_ACT_NDO_XMIT;
-
 	dev->priv_flags |= IFF_UNICAST_FLT;
 
 	/* MTU range: 68 - 9704 */

From b7a679ba7c652587b85294f4953f33ac0b756d40 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 9 Mar 2023 15:49:57 +0100
Subject: [PATCH 154/411] mptcp: fix possible deadlock in subflow_error_report

Christoph reported a possible deadlock while the TCP stack
destroys an unaccepted subflow due to an incoming reset: the
MPTCP socket error path tries to acquire the msk-level socket
lock while TCP still owns the listener socket accept queue
spinlock, and the reverse dependency already exists in the
TCP stack.

Note that the above is actually a lockdep false positive, as
the chain involves two separate sockets. A different per-socket
lockdep key will address the issue, but such a change will be
quite invasive.

Instead, we can simply stop earlier the socket error handling
for orphaned or unaccepted subflows, breaking the critical
lockdep chain. Error handling in such a scenario is a no-op.

Reported-and-tested-by: Christoph Paasch <cpaasch@apple.com>
Fixes: 15cc10453398 ("mptcp: deliver ssk errors to msk")
Cc: stable@vger.kernel.org
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/355
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/mptcp/subflow.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 4ae1a7304cf0d..5070dc33675dd 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1432,6 +1432,13 @@ static void subflow_error_report(struct sock *ssk)
 {
 	struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
 
+	/* bail early if this is a no-op, so that we avoid introducing a
+	 * problematic lockdep dependency between TCP accept queue lock
+	 * and msk socket spinlock
+	 */
+	if (!sk->sk_socket)
+		return;
+
 	mptcp_data_lock(sk);
 	if (!sock_owned_by_user(sk))
 		__mptcp_error_report(sk);

From 3a236aef280ed5122b2d47087eb514d0921ae033 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 9 Mar 2023 15:49:58 +0100
Subject: [PATCH 155/411] mptcp: refactor passive socket initialization

After commit 30e51b923e43 ("mptcp: fix unreleased socket in accept queue")
unaccepted msk sockets go throu complete shutdown, we don't need anymore
to delay inserting the first subflow into the subflow lists.

The reference counting deserve some extra care, as __mptcp_close() is
unaware of the request socket linkage to the first subflow.

Please note that this is more a refactoring than a fix but because this
modification is needed to include other corrections, see the following
commits. Then a Fixes tag has been added here to help the stable team.

Fixes: 30e51b923e43 ("mptcp: fix unreleased socket in accept queue")
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Tested-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/mptcp/protocol.c | 17 -----------------
 net/mptcp/subflow.c  | 27 +++++++++++++++++++++------
 2 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3ad9c46202fc6..447641d34c2c5 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -825,7 +825,6 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
 	if (sk->sk_socket && !ssk->sk_socket)
 		mptcp_sock_graft(ssk, sk->sk_socket);
 
-	mptcp_propagate_sndbuf((struct sock *)msk, ssk);
 	mptcp_sockopt_sync_locked(msk, ssk);
 	return true;
 }
@@ -3708,22 +3707,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
 
 		lock_sock(newsk);
 
-		/* PM/worker can now acquire the first subflow socket
-		 * lock without racing with listener queue cleanup,
-		 * we can notify it, if needed.
-		 *
-		 * Even if remote has reset the initial subflow by now
-		 * the refcnt is still at least one.
-		 */
-		subflow = mptcp_subflow_ctx(msk->first);
-		list_add(&subflow->node, &msk->conn_list);
-		sock_hold(msk->first);
-		if (mptcp_is_fully_established(newsk))
-			mptcp_pm_fully_established(msk, msk->first, GFP_KERNEL);
-
-		mptcp_rcv_space_init(msk, msk->first);
-		mptcp_propagate_sndbuf(newsk, msk->first);
-
 		/* set ssk->sk_socket of accept()ed flows to mptcp socket.
 		 * This is needed so NOSPACE flag can be set from tcp stack.
 		 */
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 5070dc33675dd..a631a5e6fc7b5 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -397,6 +397,12 @@ void mptcp_subflow_reset(struct sock *ssk)
 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
 	struct sock *sk = subflow->conn;
 
+	/* mptcp_mp_fail_no_response() can reach here on an already closed
+	 * socket
+	 */
+	if (ssk->sk_state == TCP_CLOSE)
+		return;
+
 	/* must hold: tcp_done() could drop last reference on parent */
 	sock_hold(sk);
 
@@ -750,6 +756,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 	struct mptcp_options_received mp_opt;
 	bool fallback, fallback_is_fatal;
 	struct sock *new_msk = NULL;
+	struct mptcp_sock *owner;
 	struct sock *child;
 
 	pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn);
@@ -824,6 +831,8 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 		ctx->setsockopt_seq = listener->setsockopt_seq;
 
 		if (ctx->mp_capable) {
+			owner = mptcp_sk(new_msk);
+
 			/* this can't race with mptcp_close(), as the msk is
 			 * not yet exposted to user-space
 			 */
@@ -832,14 +841,14 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 			/* record the newly created socket as the first msk
 			 * subflow, but don't link it yet into conn_list
 			 */
-			WRITE_ONCE(mptcp_sk(new_msk)->first, child);
+			WRITE_ONCE(owner->first, child);
 
 			/* new mpc subflow takes ownership of the newly
 			 * created mptcp socket
 			 */
 			mptcp_sk(new_msk)->setsockopt_seq = ctx->setsockopt_seq;
-			mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1);
-			mptcp_token_accept(subflow_req, mptcp_sk(new_msk));
+			mptcp_pm_new_connection(owner, child, 1);
+			mptcp_token_accept(subflow_req, owner);
 			ctx->conn = new_msk;
 			new_msk = NULL;
 
@@ -847,15 +856,21 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 			 * uses the correct data
 			 */
 			mptcp_copy_inaddrs(ctx->conn, child);
+			mptcp_propagate_sndbuf(ctx->conn, child);
+
+			mptcp_rcv_space_init(owner, child);
+			list_add(&ctx->node, &owner->conn_list);
+			sock_hold(child);
 
 			/* with OoO packets we can reach here without ingress
 			 * mpc option
 			 */
-			if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK)
+			if (mp_opt.suboptions & OPTION_MPTCP_MPC_ACK) {
 				mptcp_subflow_fully_established(ctx, &mp_opt);
+				mptcp_pm_fully_established(owner, child, GFP_ATOMIC);
+				ctx->pm_notified = 1;
+			}
 		} else if (ctx->mp_join) {
-			struct mptcp_sock *owner;
-
 			owner = subflow_req->msk;
 			if (!owner) {
 				subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT);

From b6985b9b82954caa53f862d6059d06c0526254f0 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 9 Mar 2023 15:49:59 +0100
Subject: [PATCH 156/411] mptcp: use the workqueue to destroy unaccepted
 sockets

Christoph reported a UaF at token lookup time after having
refactored the passive socket initialization part:

  BUG: KASAN: use-after-free in __token_bucket_busy+0x253/0x260
  Read of size 4 at addr ffff88810698d5b0 by task syz-executor653/3198

  CPU: 1 PID: 3198 Comm: syz-executor653 Not tainted 6.2.0-rc59af4eaa31c1f6c00c8f1e448ed99a45c66340dd5 #6
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
  Call Trace:
   <TASK>
   dump_stack_lvl+0x6e/0x91
   print_report+0x16a/0x46f
   kasan_report+0xad/0x130
   __token_bucket_busy+0x253/0x260
   mptcp_token_new_connect+0x13d/0x490
   mptcp_connect+0x4ed/0x860
   __inet_stream_connect+0x80e/0xd90
   tcp_sendmsg_fastopen+0x3ce/0x710
   mptcp_sendmsg+0xff1/0x1a20
   inet_sendmsg+0x11d/0x140
   __sys_sendto+0x405/0x490
   __x64_sys_sendto+0xdc/0x1b0
   do_syscall_64+0x3b/0x90
   entry_SYSCALL_64_after_hwframe+0x72/0xdc

We need to properly clean-up all the paired MPTCP-level
resources and be sure to release the msk last, even when
the unaccepted subflow is destroyed by the TCP internals
via inet_child_forget().

We can re-use the existing MPTCP_WORK_CLOSE_SUBFLOW infra,
explicitly checking that for the critical scenario: the
closed subflow is the MPC one, the msk is not accepted and
eventually going through full cleanup.

With such change, __mptcp_destroy_sock() is always called
on msk sockets, even on accepted ones. We don't need anymore
to transiently drop one sk reference at msk clone time.

Please note this commit depends on the parent one:

  mptcp: refactor passive socket initialization

Fixes: 58b09919626b ("mptcp: create msk early")
Cc: stable@vger.kernel.org
Reported-and-tested-by: Christoph Paasch <cpaasch@apple.com>
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/347
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/mptcp/protocol.c | 40 ++++++++++++++++++++++++++++++----------
 net/mptcp/protocol.h |  5 ++++-
 net/mptcp/subflow.c  | 17 ++++++++++++-----
 3 files changed, 46 insertions(+), 16 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 447641d34c2c5..2a2093d61835c 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2342,7 +2342,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 		goto out;
 	}
 
-	sock_orphan(ssk);
 	subflow->disposable = 1;
 
 	/* if ssk hit tcp_done(), tcp_cleanup_ulp() cleared the related ops
@@ -2350,7 +2349,20 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 	 * reference owned by msk;
 	 */
 	if (!inet_csk(ssk)->icsk_ulp_ops) {
+		WARN_ON_ONCE(!sock_flag(ssk, SOCK_DEAD));
 		kfree_rcu(subflow, rcu);
+	} else if (msk->in_accept_queue && msk->first == ssk) {
+		/* if the first subflow moved to a close state, e.g. due to
+		 * incoming reset and we reach here before inet_child_forget()
+		 * the TCP stack could later try to close it via
+		 * inet_csk_listen_stop(), or deliver it to the user space via
+		 * accept().
+		 * We can't delete the subflow - or risk a double free - nor let
+		 * the msk survive - or will be leaked in the non accept scenario:
+		 * fallback and let TCP cope with the subflow cleanup.
+		 */
+		WARN_ON_ONCE(sock_flag(ssk, SOCK_DEAD));
+		mptcp_subflow_drop_ctx(ssk);
 	} else {
 		/* otherwise tcp will dispose of the ssk and subflow ctx */
 		if (ssk->sk_state == TCP_LISTEN) {
@@ -2398,9 +2410,10 @@ static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu)
 	return 0;
 }
 
-static void __mptcp_close_subflow(struct mptcp_sock *msk)
+static void __mptcp_close_subflow(struct sock *sk)
 {
 	struct mptcp_subflow_context *subflow, *tmp;
+	struct mptcp_sock *msk = mptcp_sk(sk);
 
 	might_sleep();
 
@@ -2414,7 +2427,15 @@ static void __mptcp_close_subflow(struct mptcp_sock *msk)
 		if (!skb_queue_empty_lockless(&ssk->sk_receive_queue))
 			continue;
 
-		mptcp_close_ssk((struct sock *)msk, ssk, subflow);
+		mptcp_close_ssk(sk, ssk, subflow);
+	}
+
+	/* if the MPC subflow has been closed before the msk is accepted,
+	 * msk will never be accept-ed, close it now
+	 */
+	if (!msk->first && msk->in_accept_queue) {
+		sock_set_flag(sk, SOCK_DEAD);
+		inet_sk_state_store(sk, TCP_CLOSE);
 	}
 }
 
@@ -2623,6 +2644,9 @@ static void mptcp_worker(struct work_struct *work)
 	__mptcp_check_send_data_fin(sk);
 	mptcp_check_data_fin(sk);
 
+	if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
+		__mptcp_close_subflow(sk);
+
 	/* There is no point in keeping around an orphaned sk timedout or
 	 * closed, but we need the msk around to reply to incoming DATA_FIN,
 	 * even if it is orphaned and in FIN_WAIT2 state
@@ -2638,9 +2662,6 @@ static void mptcp_worker(struct work_struct *work)
 		}
 	}
 
-	if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
-		__mptcp_close_subflow(msk);
-
 	if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
 		__mptcp_retrans(sk);
 
@@ -3078,6 +3099,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
 	msk->local_key = subflow_req->local_key;
 	msk->token = subflow_req->token;
 	msk->subflow = NULL;
+	msk->in_accept_queue = 1;
 	WRITE_ONCE(msk->fully_established, false);
 	if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
 		WRITE_ONCE(msk->csum_enabled, true);
@@ -3095,8 +3117,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
 	security_inet_csk_clone(nsk, req);
 	bh_unlock_sock(nsk);
 
-	/* keep a single reference */
-	__sock_put(nsk);
+	/* note: the newly allocated socket refcount is 2 now */
 	return nsk;
 }
 
@@ -3152,8 +3173,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
 			goto out;
 		}
 
-		/* acquire the 2nd reference for the owning socket */
-		sock_hold(new_mptcp_sock);
 		newsk = new_mptcp_sock;
 		MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEPASSIVEACK);
 	} else {
@@ -3704,6 +3723,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
 		struct sock *newsk = newsock->sk;
 
 		set_bit(SOCK_CUSTOM_SOCKOPT, &newsock->flags);
+		msk->in_accept_queue = 0;
 
 		lock_sock(newsk);
 
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 61fd8eabfca20..3a2db1b862dd7 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -295,7 +295,8 @@ struct mptcp_sock {
 	u8		recvmsg_inq:1,
 			cork:1,
 			nodelay:1,
-			fastopening:1;
+			fastopening:1,
+			in_accept_queue:1;
 	int		connect_flags;
 	struct work_struct work;
 	struct sk_buff  *ooo_last_skb;
@@ -666,6 +667,8 @@ void mptcp_subflow_set_active(struct mptcp_subflow_context *subflow);
 
 bool mptcp_subflow_active(struct mptcp_subflow_context *subflow);
 
+void mptcp_subflow_drop_ctx(struct sock *ssk);
+
 static inline void mptcp_subflow_tcp_fallback(struct sock *sk,
 					      struct mptcp_subflow_context *ctx)
 {
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index a631a5e6fc7b5..932a3e0eb22de 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -699,9 +699,10 @@ static bool subflow_hmac_valid(const struct request_sock *req,
 
 static void mptcp_force_close(struct sock *sk)
 {
-	/* the msk is not yet exposed to user-space */
+	/* the msk is not yet exposed to user-space, and refcount is 2 */
 	inet_sk_state_store(sk, TCP_CLOSE);
 	sk_common_release(sk);
+	sock_put(sk);
 }
 
 static void subflow_ulp_fallback(struct sock *sk,
@@ -717,7 +718,7 @@ static void subflow_ulp_fallback(struct sock *sk,
 	mptcp_subflow_ops_undo_override(sk);
 }
 
-static void subflow_drop_ctx(struct sock *ssk)
+void mptcp_subflow_drop_ctx(struct sock *ssk)
 {
 	struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk);
 
@@ -823,7 +824,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 
 			if (new_msk)
 				mptcp_copy_inaddrs(new_msk, child);
-			subflow_drop_ctx(child);
+			mptcp_subflow_drop_ctx(child);
 			goto out;
 		}
 
@@ -914,7 +915,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 	return child;
 
 dispose_child:
-	subflow_drop_ctx(child);
+	mptcp_subflow_drop_ctx(child);
 	tcp_rsk(req)->drop_req = true;
 	inet_csk_prepare_for_destroy_sock(child);
 	tcp_done(child);
@@ -1866,7 +1867,6 @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s
 		struct sock *sk = (struct sock *)msk;
 		bool do_cancel_work;
 
-		sock_hold(sk);
 		lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
 		next = msk->dl_next;
 		msk->first = NULL;
@@ -1954,6 +1954,13 @@ static void subflow_ulp_release(struct sock *ssk)
 		 * when the subflow is still unaccepted
 		 */
 		release = ctx->disposable || list_empty(&ctx->node);
+
+		/* inet_child_forget() does not call sk_state_change(),
+		 * explicitly trigger the socket close machinery
+		 */
+		if (!release && !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW,
+						  &mptcp_sk(sk)->flags))
+			mptcp_schedule_work(sk);
 		sock_put(sk);
 	}
 

From 0a3f4f1f9c27215e4ddcd312558342e57b93e518 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 9 Mar 2023 15:50:00 +0100
Subject: [PATCH 157/411] mptcp: fix UaF in listener shutdown

As reported by Christoph after having refactored the passive
socket initialization, the mptcp listener shutdown path is prone
to an UaF issue.

  BUG: KASAN: use-after-free in _raw_spin_lock_bh+0x73/0xe0
  Write of size 4 at addr ffff88810cb23098 by task syz-executor731/1266

  CPU: 1 PID: 1266 Comm: syz-executor731 Not tainted 6.2.0-rc59af4eaa31c1f6c00c8f1e448ed99a45c66340dd5 #6
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
  Call Trace:
   <TASK>
   dump_stack_lvl+0x6e/0x91
   print_report+0x16a/0x46f
   kasan_report+0xad/0x130
   kasan_check_range+0x14a/0x1a0
   _raw_spin_lock_bh+0x73/0xe0
   subflow_error_report+0x6d/0x110
   sk_error_report+0x3b/0x190
   tcp_disconnect+0x138c/0x1aa0
   inet_child_forget+0x6f/0x2e0
   inet_csk_listen_stop+0x209/0x1060
   __mptcp_close_ssk+0x52d/0x610
   mptcp_destroy_common+0x165/0x640
   mptcp_destroy+0x13/0x80
   __mptcp_destroy_sock+0xe7/0x270
   __mptcp_close+0x70e/0x9b0
   mptcp_close+0x2b/0x150
   inet_release+0xe9/0x1f0
   __sock_release+0xd2/0x280
   sock_close+0x15/0x20
   __fput+0x252/0xa20
   task_work_run+0x169/0x250
   exit_to_user_mode_prepare+0x113/0x120
   syscall_exit_to_user_mode+0x1d/0x40
   do_syscall_64+0x48/0x90
   entry_SYSCALL_64_after_hwframe+0x72/0xdc

The msk grace period can legitly expire in between the last
reference count dropped in mptcp_subflow_queue_clean() and
the later eventual access in inet_csk_listen_stop()

After the previous patch we don't need anymore special-casing
msk listener socket cleanup: the mptcp worker will process each
of the unaccepted msk sockets.

Just drop the now unnecessary code.

Please note this commit depends on the two parent ones:

  mptcp: refactor passive socket initialization
  mptcp: use the workqueue to destroy unaccepted sockets

Fixes: 6aeed9045071 ("mptcp: fix race on unaccepted mptcp sockets")
Cc: stable@vger.kernel.org
Reported-and-tested-by: Christoph Paasch <cpaasch@apple.com>
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/346
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/mptcp/protocol.c |  7 ++---
 net/mptcp/protocol.h |  1 -
 net/mptcp/subflow.c  | 72 --------------------------------------------
 3 files changed, 2 insertions(+), 78 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 2a2093d61835c..60b23b2716c40 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2365,12 +2365,9 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 		mptcp_subflow_drop_ctx(ssk);
 	} else {
 		/* otherwise tcp will dispose of the ssk and subflow ctx */
-		if (ssk->sk_state == TCP_LISTEN) {
-			tcp_set_state(ssk, TCP_CLOSE);
-			mptcp_subflow_queue_clean(sk, ssk);
-			inet_csk_listen_stop(ssk);
+		if (ssk->sk_state == TCP_LISTEN)
 			mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
-		}
+
 		__tcp_close(ssk, 0);
 
 		/* close acquired an extra ref */
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 3a2db1b862dd7..339a6f0729898 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -629,7 +629,6 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
 		     struct mptcp_subflow_context *subflow);
 void __mptcp_subflow_send_ack(struct sock *ssk);
 void mptcp_subflow_reset(struct sock *ssk);
-void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk);
 void mptcp_sock_graft(struct sock *sk, struct socket *parent);
 struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
 bool __mptcp_close(struct sock *sk, long timeout);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 932a3e0eb22de..9c57575df84c4 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1826,78 +1826,6 @@ static void subflow_state_change(struct sock *sk)
 	}
 }
 
-void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
-{
-	struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
-	struct mptcp_sock *msk, *next, *head = NULL;
-	struct request_sock *req;
-
-	/* build a list of all unaccepted mptcp sockets */
-	spin_lock_bh(&queue->rskq_lock);
-	for (req = queue->rskq_accept_head; req; req = req->dl_next) {
-		struct mptcp_subflow_context *subflow;
-		struct sock *ssk = req->sk;
-		struct mptcp_sock *msk;
-
-		if (!sk_is_mptcp(ssk))
-			continue;
-
-		subflow = mptcp_subflow_ctx(ssk);
-		if (!subflow || !subflow->conn)
-			continue;
-
-		/* skip if already in list */
-		msk = mptcp_sk(subflow->conn);
-		if (msk->dl_next || msk == head)
-			continue;
-
-		msk->dl_next = head;
-		head = msk;
-	}
-	spin_unlock_bh(&queue->rskq_lock);
-	if (!head)
-		return;
-
-	/* can't acquire the msk socket lock under the subflow one,
-	 * or will cause ABBA deadlock
-	 */
-	release_sock(listener_ssk);
-
-	for (msk = head; msk; msk = next) {
-		struct sock *sk = (struct sock *)msk;
-		bool do_cancel_work;
-
-		lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
-		next = msk->dl_next;
-		msk->first = NULL;
-		msk->dl_next = NULL;
-
-		do_cancel_work = __mptcp_close(sk, 0);
-		release_sock(sk);
-		if (do_cancel_work) {
-			/* lockdep will report a false positive ABBA deadlock
-			 * between cancel_work_sync and the listener socket.
-			 * The involved locks belong to different sockets WRT
-			 * the existing AB chain.
-			 * Using a per socket key is problematic as key
-			 * deregistration requires process context and must be
-			 * performed at socket disposal time, in atomic
-			 * context.
-			 * Just tell lockdep to consider the listener socket
-			 * released here.
-			 */
-			mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_);
-			mptcp_cancel_work(sk);
-			mutex_acquire(&listener_sk->sk_lock.dep_map,
-				      SINGLE_DEPTH_NESTING, 0, _RET_IP_);
-		}
-		sock_put(sk);
-	}
-
-	/* we are still under the listener msk socket lock */
-	lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING);
-}
-
 static int subflow_ulp_init(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);

From 840742b7ed0e123e47af9c5c3902746f3d6b64a2 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Thu, 9 Mar 2023 15:50:01 +0100
Subject: [PATCH 158/411] selftests: mptcp: userspace pm: fix printed values

In case of errors, the printed message had the expected and the seen
value inverted.

This patch simply correct the order: first the expected value, then the
one that has been seen.

Fixes: 10d4273411be ("selftests: mptcp: userspace: print error details if any")
Cc: stable@vger.kernel.org
Acked-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/mptcp/userspace_pm.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 66c5be25c13d0..48e52f995a98c 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -240,7 +240,7 @@ check_expected_one()
 	fi
 
 	stdbuf -o0 -e0 printf "\tExpected value for '%s': '%s', got '%s'.\n" \
-		"${var}" "${!var}" "${!exp}"
+		"${var}" "${!exp}" "${!var}"
 	return 1
 }
 

From 822467a48e938e661965d09df5fcac66f7291050 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Thu, 9 Mar 2023 15:50:02 +0100
Subject: [PATCH 159/411] mptcp: add ro_after_init for tcp{,v6}_prot_override

Add __ro_after_init labels for the variables tcp_prot_override and
tcpv6_prot_override, just like other variables adjacent to them, to
indicate that they are initialised from the init hooks and no writes
occur afterwards.

Fixes: b19bc2945b40 ("mptcp: implement delegated actions")
Cc: stable@vger.kernel.org
Fixes: 51fa7f8ebf0e ("mptcp: mark ops structures as ro_after_init")
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/mptcp/subflow.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 9c57575df84c4..2aadc87333690 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -628,7 +628,7 @@ static struct request_sock_ops mptcp_subflow_v6_request_sock_ops __ro_after_init
 static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops __ro_after_init;
 static struct inet_connection_sock_af_ops subflow_v6_specific __ro_after_init;
 static struct inet_connection_sock_af_ops subflow_v6m_specific __ro_after_init;
-static struct proto tcpv6_prot_override;
+static struct proto tcpv6_prot_override __ro_after_init;
 
 static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
@@ -926,7 +926,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 }
 
 static struct inet_connection_sock_af_ops subflow_specific __ro_after_init;
-static struct proto tcp_prot_override;
+static struct proto tcp_prot_override __ro_after_init;
 
 enum mapping_status {
 	MAPPING_OK,

From 3ba14528684f528566fb7d956bfbfb958b591d86 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Thu, 9 Mar 2023 15:50:03 +0100
Subject: [PATCH 160/411] mptcp: avoid setting TCP_CLOSE state twice

tcp_set_state() is called from tcp_done() already.

There is then no need to first set the state to TCP_CLOSE, then call
tcp_done().

Fixes: d582484726c4 ("mptcp: fix fallback for MP_JOIN subflows")
Cc: stable@vger.kernel.org
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/362
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/mptcp/subflow.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 2aadc87333690..a0041360ee9d9 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -406,7 +406,6 @@ void mptcp_subflow_reset(struct sock *ssk)
 	/* must hold: tcp_done() could drop last reference on parent */
 	sock_hold(sk);
 
-	tcp_set_state(ssk, TCP_CLOSE);
 	tcp_send_active_reset(ssk, GFP_ATOMIC);
 	tcp_done(ssk);
 	if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags) &&

From cee4034a3db1d30c3243dd51506a9d4ab1a849fa Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 9 Mar 2023 15:50:04 +0100
Subject: [PATCH 161/411] mptcp: fix lockdep false positive in
 mptcp_pm_nl_create_listen_socket()

Christoph reports a lockdep splat in the mptcp_subflow_create_socket()
error path, when such function is invoked by
mptcp_pm_nl_create_listen_socket().

Such code path acquires two separates, nested socket lock, with the
internal lock operation lacking the "nested" annotation. Adding that
in sock_release() for mptcp's sake only could be confusing.

Instead just add a new lockclass to the in-kernel msk socket,
re-initializing the lockdep infra after the socket creation.

Fixes: ad2171009d96 ("mptcp: fix locking for in-kernel listener creation")
Cc: stable@vger.kernel.org
Reported-by: Christoph Paasch <cpaasch@apple.com>
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/354
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Tested-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/mptcp/pm_netlink.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 56628b52d1001..5c8dea49626c3 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -997,9 +997,13 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
 	return ret;
 }
 
+static struct lock_class_key mptcp_slock_keys[2];
+static struct lock_class_key mptcp_keys[2];
+
 static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
 					    struct mptcp_pm_addr_entry *entry)
 {
+	bool is_ipv6 = sk->sk_family == AF_INET6;
 	int addrlen = sizeof(struct sockaddr_in);
 	struct sockaddr_storage addr;
 	struct socket *ssock;
@@ -1016,6 +1020,18 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
 	if (!newsk)
 		return -EINVAL;
 
+	/* The subflow socket lock is acquired in a nested to the msk one
+	 * in several places, even by the TCP stack, and this msk is a kernel
+	 * socket: lockdep complains. Instead of propagating the _nested
+	 * modifiers in several places, re-init the lock class for the msk
+	 * socket to an mptcp specific one.
+	 */
+	sock_lock_init_class_and_name(newsk,
+				      is_ipv6 ? "mlock-AF_INET6" : "mlock-AF_INET",
+				      &mptcp_slock_keys[is_ipv6],
+				      is_ipv6 ? "msk_lock-AF_INET6" : "msk_lock-AF_INET",
+				      &mptcp_keys[is_ipv6]);
+
 	lock_sock(newsk);
 	ssock = __mptcp_nmpc_socket(mptcp_sk(newsk));
 	release_sock(newsk);

From 47053904e18282af4525a02e3e0f519f014fc7f9 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Fri, 24 Feb 2023 19:16:40 +0000
Subject: [PATCH 162/411] KVM: arm64: timers: Convert per-vcpu virtual offset
 to a global value

Having a per-vcpu virtual offset is a pain. It needs to be synchronized
on each update, and expands badly to a setup where different timers can
have different offsets, or have composite offsets (as with NV).

So let's start by replacing the use of the CNTVOFF_EL2 shadow register
(which we want to reclaim for NV anyway), and make the virtual timer
carry a pointer to a VM-wide offset.

This simplifies the code significantly. It also addresses two terrible bugs:

- The use of CNTVOFF_EL2 leads to some nice offset corruption
  when the sysreg gets reset, as reported by Joey.

- The kvm mutex is taken from a vcpu ioctl, which goes against
  the locking rules...

Reported-by: Joey Gouly <joey.gouly@arm.com>
Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230224173915.GA17407@e124191.cambridge.arm.com
Tested-by: Joey Gouly <joey.gouly@arm.com>
Link: https://lore.kernel.org/r/20230224191640.3396734-1-maz@kernel.org
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
---
 arch/arm64/include/asm/kvm_host.h |  3 +++
 arch/arm64/kvm/arch_timer.c       | 45 +++++++------------------------
 arch/arm64/kvm/hypercalls.c       |  2 +-
 include/kvm/arm_arch_timer.h      | 15 +++++++++++
 4 files changed, 29 insertions(+), 36 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index a1892a8f60323..bcd774d74f349 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -193,6 +193,9 @@ struct kvm_arch {
 	/* Interrupt controller */
 	struct vgic_dist	vgic;
 
+	/* Timers */
+	struct arch_timer_vm_data timer_data;
+
 	/* Mandated version of PSCI */
 	u32 psci_version;
 
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 00610477ec7bd..e1af4301b913d 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -84,14 +84,10 @@ u64 timer_get_cval(struct arch_timer_context *ctxt)
 
 static u64 timer_get_offset(struct arch_timer_context *ctxt)
 {
-	struct kvm_vcpu *vcpu = ctxt->vcpu;
+	if (ctxt->offset.vm_offset)
+		return *ctxt->offset.vm_offset;
 
-	switch(arch_timer_ctx_index(ctxt)) {
-	case TIMER_VTIMER:
-		return __vcpu_sys_reg(vcpu, CNTVOFF_EL2);
-	default:
-		return 0;
-	}
+	return 0;
 }
 
 static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
@@ -128,15 +124,12 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval)
 
 static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset)
 {
-	struct kvm_vcpu *vcpu = ctxt->vcpu;
-
-	switch(arch_timer_ctx_index(ctxt)) {
-	case TIMER_VTIMER:
-		__vcpu_sys_reg(vcpu, CNTVOFF_EL2) = offset;
-		break;
-	default:
+	if (!ctxt->offset.vm_offset) {
 		WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt));
+		return;
 	}
+
+	WRITE_ONCE(*ctxt->offset.vm_offset, offset);
 }
 
 u64 kvm_phys_timer_read(void)
@@ -765,25 +758,6 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-/* Make the updates of cntvoff for all vtimer contexts atomic */
-static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff)
-{
-	unsigned long i;
-	struct kvm *kvm = vcpu->kvm;
-	struct kvm_vcpu *tmp;
-
-	mutex_lock(&kvm->lock);
-	kvm_for_each_vcpu(i, tmp, kvm)
-		timer_set_offset(vcpu_vtimer(tmp), cntvoff);
-
-	/*
-	 * When called from the vcpu create path, the CPU being created is not
-	 * included in the loop above, so we just set it here as well.
-	 */
-	timer_set_offset(vcpu_vtimer(vcpu), cntvoff);
-	mutex_unlock(&kvm->lock);
-}
-
 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_cpu *timer = vcpu_timer(vcpu);
@@ -791,10 +765,11 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
 	struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
 
 	vtimer->vcpu = vcpu;
+	vtimer->offset.vm_offset = &vcpu->kvm->arch.timer_data.voffset;
 	ptimer->vcpu = vcpu;
 
 	/* Synchronize cntvoff across all vtimers of a VM. */
-	update_vtimer_cntvoff(vcpu, kvm_phys_timer_read());
+	timer_set_offset(vtimer, kvm_phys_timer_read());
 	timer_set_offset(ptimer, 0);
 
 	hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
@@ -840,7 +815,7 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
 		break;
 	case KVM_REG_ARM_TIMER_CNT:
 		timer = vcpu_vtimer(vcpu);
-		update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value);
+		timer_set_offset(timer, kvm_phys_timer_read() - value);
 		break;
 	case KVM_REG_ARM_TIMER_CVAL:
 		timer = vcpu_vtimer(vcpu);
diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c
index 64c086c02c603..5da884e11337a 100644
--- a/arch/arm64/kvm/hypercalls.c
+++ b/arch/arm64/kvm/hypercalls.c
@@ -44,7 +44,7 @@ static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val)
 	feature = smccc_get_arg1(vcpu);
 	switch (feature) {
 	case KVM_PTP_VIRT_COUNTER:
-		cycles = systime_snapshot.cycles - vcpu_read_sys_reg(vcpu, CNTVOFF_EL2);
+		cycles = systime_snapshot.cycles - vcpu->kvm->arch.timer_data.voffset;
 		break;
 	case KVM_PTP_PHYS_COUNTER:
 		cycles = systime_snapshot.cycles;
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index 71916de7c6c4d..c52a6e6839da9 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -23,6 +23,19 @@ enum kvm_arch_timer_regs {
 	TIMER_REG_CTL,
 };
 
+struct arch_timer_offset {
+	/*
+	 * If set, pointer to one of the offsets in the kvm's offset
+	 * structure. If NULL, assume a zero offset.
+	 */
+	u64	*vm_offset;
+};
+
+struct arch_timer_vm_data {
+	/* Offset applied to the virtual timer/counter */
+	u64	voffset;
+};
+
 struct arch_timer_context {
 	struct kvm_vcpu			*vcpu;
 
@@ -32,6 +45,8 @@ struct arch_timer_context {
 	/* Emulated Timer (may be unused) */
 	struct hrtimer			hrtimer;
 
+	/* Offset for this counter/timer */
+	struct arch_timer_offset	offset;
 	/*
 	 * We have multiple paths which can save/restore the timer state onto
 	 * the hardware, so we need some way of keeping track of where the

From cb090e64cf25602b9adaf32d5dfc9c8bec493cd1 Mon Sep 17 00:00:00 2001
From: Zheng Wang <zyytlz.wz@163.com>
Date: Fri, 10 Mar 2023 16:40:07 +0800
Subject: [PATCH 163/411] hwmon: (xgene) Fix use after free bug in
 xgene_hwmon_remove due to race condition

In xgene_hwmon_probe, &ctx->workq is bound with xgene_hwmon_evt_work.
Then it will be started.

If we remove the driver which will call xgene_hwmon_remove to clean up,
there may be unfinished work.

The possible sequence is as follows:

Fix it by finishing the work before cleanup in xgene_hwmon_remove.

CPU0                  CPU1

                    |xgene_hwmon_evt_work
xgene_hwmon_remove   |
kfifo_free(&ctx->async_msg_fifo);|
                    |
                    |kfifo_out_spinlocked
                    |//use &ctx->async_msg_fifo
Fixes: 2ca492e22cb7 ("hwmon: (xgene) Fix crash when alarm occurs before driver probe")
Signed-off-by: Zheng Wang <zyytlz.wz@163.com>
Link: https://lore.kernel.org/r/20230310084007.1403388-1-zyytlz.wz@163.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/xgene-hwmon.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hwmon/xgene-hwmon.c b/drivers/hwmon/xgene-hwmon.c
index 5cde837bfd094..d1abea49f01be 100644
--- a/drivers/hwmon/xgene-hwmon.c
+++ b/drivers/hwmon/xgene-hwmon.c
@@ -761,6 +761,7 @@ static int xgene_hwmon_remove(struct platform_device *pdev)
 {
 	struct xgene_hwmon_dev *ctx = platform_get_drvdata(pdev);
 
+	cancel_work_sync(&ctx->workq);
 	hwmon_device_unregister(ctx->hwmon_dev);
 	kfifo_free(&ctx->async_msg_fifo);
 	if (acpi_disabled)

From c93f5e2ab53243b17febabb9422a697017d3d49a Mon Sep 17 00:00:00 2001
From: Marcus Folkesson <marcus.folkesson@gmail.com>
Date: Fri, 10 Mar 2023 08:50:35 +0100
Subject: [PATCH 164/411] hwmon: (ina3221) return prober error code

ret is set to 0 which do not indicate an error.
Return -EINVAL instead.

Fixes: a9e9dd9c6de5 ("hwmon: (ina3221) Read channel input source info from DT")
Signed-off-by: Marcus Folkesson <marcus.folkesson@gmail.com>
Link: https://lore.kernel.org/r/20230310075035.246083-1-marcus.folkesson@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/ina3221.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/ina3221.c b/drivers/hwmon/ina3221.c
index e06186986444e..f3a4c5633b1ea 100644
--- a/drivers/hwmon/ina3221.c
+++ b/drivers/hwmon/ina3221.c
@@ -772,7 +772,7 @@ static int ina3221_probe_child_from_dt(struct device *dev,
 		return ret;
 	} else if (val > INA3221_CHANNEL3) {
 		dev_err(dev, "invalid reg %d of %pOFn\n", val, child);
-		return ret;
+		return -EINVAL;
 	}
 
 	input = &ina->inputs[val];

From 4783b9cb374af02d49740e00e2da19fd4ed6dec4 Mon Sep 17 00:00:00 2001
From: Yazen Ghannam <yazen.ghannam@amd.com>
Date: Wed, 1 Mar 2023 22:14:20 +0000
Subject: [PATCH 165/411] x86/mce: Make sure logged MCEs are processed after
 sysfs update

A recent change introduced a flag to queue up errors found during
boot-time polling. These errors will be processed during late init once
the MCE subsystem is fully set up.

A number of sysfs updates call mce_restart() which goes through a subset
of the CPU init flow. This includes polling MCA banks and logging any
errors found. Since the same function is used as boot-time polling,
errors will be queued. However, the system is now past late init, so the
errors will remain queued until another error is found and the workqueue
is triggered.

Call mce_schedule_work() at the end of mce_restart() so that queued
errors are processed.

Fixes: 3bff147b187d ("x86/mce: Defer processing of early errors")
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20230301221420.2203184-1-yazen.ghannam@amd.com
---
 arch/x86/kernel/cpu/mce/core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 7832a69d170e7..2eec60f50057a 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -2355,6 +2355,7 @@ static void mce_restart(void)
 {
 	mce_timer_delete_all();
 	on_each_cpu(mce_cpu_restart, NULL, 1);
+	mce_schedule_work();
 }
 
 /* Toggle features for corrected errors */

From 8d655e65237643c48ada2c131b83679bf1105373 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Sun, 12 Mar 2023 09:03:12 -0700
Subject: [PATCH 166/411] hwmon: (ucd90320) Add minimum delay between bus
 accesses

When probing the ucd90320 access to some of the registers randomly fails.
Sometimes it NACKs a transfer, sometimes it returns just random data and
the PEC check fails.

Experimentation shows that this seems to be triggered by a register access
directly back to back with a previous register write. Experimentation also
shows that inserting a small delay after register writes makes the issue go
away.

Use a similar solution to what the max15301 driver does to solve the same
problem. Create a custom set of bus read and write functions that make sure
that the delay is added.

Fixes: a470f11c5ba2 ("hwmon: (pmbus/ucd9000) Add support for UCD90320 Power Sequencer")
Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Link: https://lore.kernel.org/r/20230312160312.2227405-1-lars@metafoo.de
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/ucd9000.c | 75 +++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

diff --git a/drivers/hwmon/pmbus/ucd9000.c b/drivers/hwmon/pmbus/ucd9000.c
index 75fc770c9e403..3daaf22378322 100644
--- a/drivers/hwmon/pmbus/ucd9000.c
+++ b/drivers/hwmon/pmbus/ucd9000.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/debugfs.h>
+#include <linux/delay.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
@@ -16,6 +17,7 @@
 #include <linux/i2c.h>
 #include <linux/pmbus.h>
 #include <linux/gpio/driver.h>
+#include <linux/timekeeping.h>
 #include "pmbus.h"
 
 enum chips { ucd9000, ucd90120, ucd90124, ucd90160, ucd90320, ucd9090,
@@ -65,6 +67,7 @@ struct ucd9000_data {
 	struct gpio_chip gpio;
 #endif
 	struct dentry *debugfs;
+	ktime_t write_time;
 };
 #define to_ucd9000_data(_info) container_of(_info, struct ucd9000_data, info)
 
@@ -73,6 +76,73 @@ struct ucd9000_debugfs_entry {
 	u8 index;
 };
 
+/*
+ * It has been observed that the UCD90320 randomly fails register access when
+ * doing another access right on the back of a register write. To mitigate this
+ * make sure that there is a minimum delay between a write access and the
+ * following access. The 250us is based on experimental data. At a delay of
+ * 200us the issue seems to go away. Add a bit of extra margin to allow for
+ * system to system differences.
+ */
+#define UCD90320_WAIT_DELAY_US 250
+
+static inline void ucd90320_wait(const struct ucd9000_data *data)
+{
+	s64 delta = ktime_us_delta(ktime_get(), data->write_time);
+
+	if (delta < UCD90320_WAIT_DELAY_US)
+		udelay(UCD90320_WAIT_DELAY_US - delta);
+}
+
+static int ucd90320_read_word_data(struct i2c_client *client, int page,
+				   int phase, int reg)
+{
+	const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
+	struct ucd9000_data *data = to_ucd9000_data(info);
+
+	if (reg >= PMBUS_VIRT_BASE)
+		return -ENXIO;
+
+	ucd90320_wait(data);
+	return pmbus_read_word_data(client, page, phase, reg);
+}
+
+static int ucd90320_read_byte_data(struct i2c_client *client, int page, int reg)
+{
+	const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
+	struct ucd9000_data *data = to_ucd9000_data(info);
+
+	ucd90320_wait(data);
+	return pmbus_read_byte_data(client, page, reg);
+}
+
+static int ucd90320_write_word_data(struct i2c_client *client, int page,
+				    int reg, u16 word)
+{
+	const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
+	struct ucd9000_data *data = to_ucd9000_data(info);
+	int ret;
+
+	ucd90320_wait(data);
+	ret = pmbus_write_word_data(client, page, reg, word);
+	data->write_time = ktime_get();
+
+	return ret;
+}
+
+static int ucd90320_write_byte(struct i2c_client *client, int page, u8 value)
+{
+	const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
+	struct ucd9000_data *data = to_ucd9000_data(info);
+	int ret;
+
+	ucd90320_wait(data);
+	ret = pmbus_write_byte(client, page, value);
+	data->write_time = ktime_get();
+
+	return ret;
+}
+
 static int ucd9000_get_fan_config(struct i2c_client *client, int fan)
 {
 	int fan_config = 0;
@@ -598,6 +668,11 @@ static int ucd9000_probe(struct i2c_client *client)
 		info->read_byte_data = ucd9000_read_byte_data;
 		info->func[0] |= PMBUS_HAVE_FAN12 | PMBUS_HAVE_STATUS_FAN12
 		  | PMBUS_HAVE_FAN34 | PMBUS_HAVE_STATUS_FAN34;
+	} else if (mid->driver_data == ucd90320) {
+		info->read_byte_data = ucd90320_read_byte_data;
+		info->read_word_data = ucd90320_read_word_data;
+		info->write_byte = ucd90320_write_byte;
+		info->write_word_data = ucd90320_write_word_data;
 	}
 
 	ucd9000_probe_gpio(client, mid, data);

From 00d85e81796b17a29a0e096c5a4735daa47adef8 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Sun, 12 Mar 2023 20:37:23 +0100
Subject: [PATCH 167/411] hwmon: tmp512: drop of_match_ptr for ID table
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The driver will match mostly by DT table (even thought there is regular
ID table) so there is little benefit in of_match_ptr (this also allows
ACPI matching via PRP0001, even though it might not be relevant here).
This also fixes !CONFIG_OF error:

  drivers/hwmon/tmp513.c:610:34: error: ‘tmp51x_of_match’ defined but not used [-Werror=unused-const-variable=]

Fixes: 59dfa75e5d82 ("hwmon: Add driver for Texas Instruments TMP512/513 sensor chips.")
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230312193723.478032-2-krzysztof.kozlowski@linaro.org
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/tmp513.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwmon/tmp513.c b/drivers/hwmon/tmp513.c
index 47bbe47e062fd..7d5f7441aceb1 100644
--- a/drivers/hwmon/tmp513.c
+++ b/drivers/hwmon/tmp513.c
@@ -758,7 +758,7 @@ static int tmp51x_probe(struct i2c_client *client)
 static struct i2c_driver tmp51x_driver = {
 	.driver = {
 		.name	= "tmp51x",
-		.of_match_table = of_match_ptr(tmp51x_of_match),
+		.of_match_table = tmp51x_of_match,
 	},
 	.probe_new	= tmp51x_probe,
 	.id_table	= tmp51x_id,

From 5a522150093a0eabae9470a70a37a6e436bfad08 Mon Sep 17 00:00:00 2001
From: Gautam Dawar <gautam.dawar@amd.com>
Date: Wed, 1 Mar 2023 22:02:01 +0530
Subject: [PATCH 168/411] vhost-vdpa: free iommu domain after last use during
 cleanup

Currently vhost_vdpa_cleanup() unmaps the DMA mappings by calling
`iommu_unmap(v->domain, map->start, map->size);`
from vhost_vdpa_general_unmap() when the parent vDPA driver doesn't
provide DMA config operations.

However, the IOMMU domain referred to by `v->domain` is freed in
vhost_vdpa_free_domain() before vhost_vdpa_cleanup() in
vhost_vdpa_release() which results in NULL pointer de-reference.
Accordingly, moving the call to vhost_vdpa_free_domain() in
vhost_vdpa_cleanup() would makes sense. This will also help
detaching the dma device in error handling of vhost_vdpa_alloc_domain().

This issue was observed on terminating QEMU with SIGQUIT.

Fixes: 037d4305569a ("vhost-vdpa: call vhost_vdpa_cleanup during the release")
Signed-off-by: Gautam Dawar <gautam.dawar@amd.com>
Message-Id: <20230301163203.29883-1-gautam.dawar@amd.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
---
 drivers/vhost/vdpa.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index dc12dbd5b43ba..7be9d9d8f01c8 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -1169,6 +1169,7 @@ static int vhost_vdpa_alloc_domain(struct vhost_vdpa *v)
 
 err_attach:
 	iommu_domain_free(v->domain);
+	v->domain = NULL;
 	return ret;
 }
 
@@ -1213,6 +1214,7 @@ static void vhost_vdpa_cleanup(struct vhost_vdpa *v)
 			vhost_vdpa_remove_as(v, asid);
 	}
 
+	vhost_vdpa_free_domain(v);
 	vhost_dev_cleanup(&v->vdev);
 	kfree(v->vdev.vqs);
 }
@@ -1285,7 +1287,6 @@ static int vhost_vdpa_release(struct inode *inode, struct file *filep)
 	vhost_vdpa_clean_irq(v);
 	vhost_vdpa_reset(v);
 	vhost_dev_stop(&v->vdev);
-	vhost_vdpa_free_domain(v);
 	vhost_vdpa_config_put(v);
 	vhost_vdpa_cleanup(v);
 	mutex_unlock(&d->mutex);

From b4cca6d48eb3fa6f0d9caba4329b1a2b0ff67a77 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= <eperezma@redhat.com>
Date: Thu, 2 Mar 2023 19:18:57 +0100
Subject: [PATCH 169/411] vdpa_sim: set last_used_idx as last_avail_idx in
 vdpasim_queue_ready
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Starting from an used_idx different than 0 is needed in use cases like
virtual machine migration.  Not doing so and letting the caller set an
avail idx different than 0 causes destination device to try to use old
buffers that source driver already recover and are not available
anymore.

Since vdpa_sim does not support receive inflight descriptors as a
destination of a migration, let's set both avail_idx and used_idx the
same at vq start.  This is how vhost-user works in a
VHOST_SET_VRING_BASE call.

Although the simple fix is to set last_used_idx at vdpasim_set_vq_state,
it would be reset at vdpasim_queue_ready.  The last_avail_idx case is
fixed with commit 0e84f918fac8 ("vdpa_sim: not reset state in
vdpasim_queue_ready").  Since the only option is to make it equal to
last_avail_idx, adding the only change needed here.

This was discovered and tested live migrating the vdpa_sim_net device.

Fixes: 2c53d0f64c06 ("vdpasim: vDPA device simulator")
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
Message-Id: <20230302181857.925374-1-eperezma@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vdpa/vdpa_sim/vdpa_sim.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c
index 6a0a658146269..eea23c630f7c0 100644
--- a/drivers/vdpa/vdpa_sim/vdpa_sim.c
+++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c
@@ -68,6 +68,17 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx)
 			  (uintptr_t)vq->device_addr);
 
 	vq->vring.last_avail_idx = last_avail_idx;
+
+	/*
+	 * Since vdpa_sim does not support receive inflight descriptors as a
+	 * destination of a migration, let's set both avail_idx and used_idx
+	 * the same at vq start.  This is how vhost-user works in a
+	 * VHOST_SET_VRING_BASE call.
+	 *
+	 * Although the simple fix is to set last_used_idx at
+	 * vdpasim_set_vq_state, it would be reset at vdpasim_queue_ready.
+	 */
+	vq->vring.last_used_idx = last_avail_idx;
 	vq->vring.notify = vdpasim_vq_notify;
 }
 

From ae43c20da2a77c508715a9c77845b4e87e6a1e25 Mon Sep 17 00:00:00 2001
From: Rong Tao <rongtao@cestc.cn>
Date: Thu, 9 Mar 2023 14:13:07 +0800
Subject: [PATCH 170/411] tools/virtio: Ignore virtio-trace/trace-agent

since commit 108fc82596e3("tools: Add guest trace agent as a user tool")
introduce virtio-trace/trace-agent, it should be ignored in the git tree.

Signed-off-by: Rong Tao <rongtao@cestc.cn>
Message-Id: <tencent_52B2BC2F47540A5FEB46E710BD0C8485B409@qq.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 tools/virtio/.gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/virtio/.gitignore b/tools/virtio/.gitignore
index 075588c4da081..9934d48d9a557 100644
--- a/tools/virtio/.gitignore
+++ b/tools/virtio/.gitignore
@@ -2,3 +2,4 @@
 *.d
 virtio_test
 vringh_test
+virtio-trace/trace-agent

From a52e5cdbe8016d4e3e6322fd93d71afddb9a5af9 Mon Sep 17 00:00:00 2001
From: Sven Schnelle <svens@linux.ibm.com>
Date: Tue, 7 Mar 2023 14:35:23 +0100
Subject: [PATCH 171/411] s390/ipl: add missing intersection check to
 ipl_report handling

The code which handles the ipl report is searching for a free location
in memory where it could copy the component and certificate entries to.
It checks for intersection between the sections required for the kernel
and the component/certificate data area, but fails to check whether
the data structures linking these data areas together intersect.

This might cause the iplreport copy code to overwrite the iplreport
itself. Fix this by adding two addtional intersection checks.

Cc: <stable@vger.kernel.org>
Fixes: 9641b8cc733f ("s390/ipl: read IPL report at early boot")
Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Reviewed-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/boot/ipl_report.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c
index 9b14045065b6e..74b5cd2648622 100644
--- a/arch/s390/boot/ipl_report.c
+++ b/arch/s390/boot/ipl_report.c
@@ -57,11 +57,19 @@ static unsigned long find_bootdata_space(struct ipl_rb_components *comps,
 	if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size &&
 	    intersects(initrd_data.start, initrd_data.size, safe_addr, size))
 		safe_addr = initrd_data.start + initrd_data.size;
+	if (intersects(safe_addr, size, (unsigned long)comps, comps->len)) {
+		safe_addr = (unsigned long)comps + comps->len;
+		goto repeat;
+	}
 	for_each_rb_entry(comp, comps)
 		if (intersects(safe_addr, size, comp->addr, comp->len)) {
 			safe_addr = comp->addr + comp->len;
 			goto repeat;
 		}
+	if (intersects(safe_addr, size, (unsigned long)certs, certs->len)) {
+		safe_addr = (unsigned long)certs + certs->len;
+		goto repeat;
+	}
 	for_each_rb_entry(cert, certs)
 		if (intersects(safe_addr, size, cert->addr, cert->len)) {
 			safe_addr = cert->addr + cert->len;

From ab909509850b27fd39b8ba99e44cda39dbc3858c Mon Sep 17 00:00:00 2001
From: Niklas Schnelle <schnelle@linux.ibm.com>
Date: Mon, 6 Mar 2023 16:10:11 +0100
Subject: [PATCH 172/411] PCI: s390: Fix use-after-free of PCI resources with
 per-function hotplug

On s390 PCI functions may be hotplugged individually even when they
belong to a multi-function device. In particular on an SR-IOV device VFs
may be removed and later re-added.

In commit a50297cf8235 ("s390/pci: separate zbus creation from
scanning") it was missed however that struct pci_bus and struct
zpci_bus's resource list retained a reference to the PCI functions MMIO
resources even though those resources are released and freed on
hot-unplug. These stale resources may subsequently be claimed when the
PCI function re-appears resulting in use-after-free.

One idea of fixing this use-after-free in s390 specific code that was
investigated was to simply keep resources around from the moment a PCI
function first appeared until the whole virtual PCI bus created for
a multi-function device disappears. The problem with this however is
that due to the requirement of artificial MMIO addreesses (address
cookies) extra logic is then needed to keep the address cookies
compatible on re-plug. At the same time the MMIO resources semantically
belong to the PCI function so tying their lifecycle to the function
seems more logical.

Instead a simpler approach is to remove the resources of an individually
hot-unplugged PCI function from the PCI bus's resource list while
keeping the resources of other PCI functions on the PCI bus untouched.

This is done by introducing pci_bus_remove_resource() to remove an
individual resource. Similarly the resource also needs to be removed
from the struct zpci_bus's resource list. It turns out however, that
there is really no need to add the MMIO resources to the struct
zpci_bus's resource list at all and instead we can simply use the
zpci_bar_struct's resource pointer directly.

Fixes: a50297cf8235 ("s390/pci: separate zbus creation from scanning")
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://lore.kernel.org/r/20230306151014.60913-2-schnelle@linux.ibm.com
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/pci/pci.c     | 16 ++++++++++------
 arch/s390/pci/pci_bus.c | 12 +++++-------
 arch/s390/pci/pci_bus.h |  3 +--
 drivers/pci/bus.c       | 21 +++++++++++++++++++++
 include/linux/pci.h     |  1 +
 5 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index ef38b1514c77a..e16afacc8fd1b 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -544,8 +544,7 @@ static struct resource *__alloc_res(struct zpci_dev *zdev, unsigned long start,
 	return r;
 }
 
-int zpci_setup_bus_resources(struct zpci_dev *zdev,
-			     struct list_head *resources)
+int zpci_setup_bus_resources(struct zpci_dev *zdev)
 {
 	unsigned long addr, size, flags;
 	struct resource *res;
@@ -581,7 +580,6 @@ int zpci_setup_bus_resources(struct zpci_dev *zdev,
 			return -ENOMEM;
 		}
 		zdev->bars[i].res = res;
-		pci_add_resource(resources, res);
 	}
 	zdev->has_resources = 1;
 
@@ -590,17 +588,23 @@ int zpci_setup_bus_resources(struct zpci_dev *zdev,
 
 static void zpci_cleanup_bus_resources(struct zpci_dev *zdev)
 {
+	struct resource *res;
 	int i;
 
+	pci_lock_rescan_remove();
 	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
-		if (!zdev->bars[i].size || !zdev->bars[i].res)
+		res = zdev->bars[i].res;
+		if (!res)
 			continue;
 
+		release_resource(res);
+		pci_bus_remove_resource(zdev->zbus->bus, res);
 		zpci_free_iomap(zdev, zdev->bars[i].map_idx);
-		release_resource(zdev->bars[i].res);
-		kfree(zdev->bars[i].res);
+		zdev->bars[i].res = NULL;
+		kfree(res);
 	}
 	zdev->has_resources = 0;
+	pci_unlock_rescan_remove();
 }
 
 int pcibios_device_add(struct pci_dev *pdev)
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
index 6a8da1b742ae5..a99926af2b69a 100644
--- a/arch/s390/pci/pci_bus.c
+++ b/arch/s390/pci/pci_bus.c
@@ -41,9 +41,7 @@ static int zpci_nb_devices;
  */
 static int zpci_bus_prepare_device(struct zpci_dev *zdev)
 {
-	struct resource_entry *window, *n;
-	struct resource *res;
-	int rc;
+	int rc, i;
 
 	if (!zdev_enabled(zdev)) {
 		rc = zpci_enable_device(zdev);
@@ -57,10 +55,10 @@ static int zpci_bus_prepare_device(struct zpci_dev *zdev)
 	}
 
 	if (!zdev->has_resources) {
-		zpci_setup_bus_resources(zdev, &zdev->zbus->resources);
-		resource_list_for_each_entry_safe(window, n, &zdev->zbus->resources) {
-			res = window->res;
-			pci_bus_add_resource(zdev->zbus->bus, res, 0);
+		zpci_setup_bus_resources(zdev);
+		for (i = 0; i < PCI_STD_NUM_BARS; i++) {
+			if (zdev->bars[i].res)
+				pci_bus_add_resource(zdev->zbus->bus, zdev->bars[i].res, 0);
 		}
 	}
 
diff --git a/arch/s390/pci/pci_bus.h b/arch/s390/pci/pci_bus.h
index e96c9860e0644..af9f0ac79a1b1 100644
--- a/arch/s390/pci/pci_bus.h
+++ b/arch/s390/pci/pci_bus.h
@@ -30,8 +30,7 @@ static inline void zpci_zdev_get(struct zpci_dev *zdev)
 
 int zpci_alloc_domain(int domain);
 void zpci_free_domain(int domain);
-int zpci_setup_bus_resources(struct zpci_dev *zdev,
-			     struct list_head *resources);
+int zpci_setup_bus_resources(struct zpci_dev *zdev);
 
 static inline struct zpci_dev *zdev_from_bus(struct pci_bus *bus,
 					     unsigned int devfn)
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 83ae838ceb5f0..549c4bd5caeca 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -76,6 +76,27 @@ struct resource *pci_bus_resource_n(const struct pci_bus *bus, int n)
 }
 EXPORT_SYMBOL_GPL(pci_bus_resource_n);
 
+void pci_bus_remove_resource(struct pci_bus *bus, struct resource *res)
+{
+	struct pci_bus_resource *bus_res, *tmp;
+	int i;
+
+	for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) {
+		if (bus->resource[i] == res) {
+			bus->resource[i] = NULL;
+			return;
+		}
+	}
+
+	list_for_each_entry_safe(bus_res, tmp, &bus->resources, list) {
+		if (bus_res->res == res) {
+			list_del(&bus_res->list);
+			kfree(bus_res);
+			return;
+		}
+	}
+}
+
 void pci_bus_remove_resources(struct pci_bus *bus)
 {
 	int i;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index fafd8020c6d7f..b50e5c79f7e32 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1438,6 +1438,7 @@ void pci_bus_add_resource(struct pci_bus *bus, struct resource *res,
 			  unsigned int flags);
 struct resource *pci_bus_resource_n(const struct pci_bus *bus, int n);
 void pci_bus_remove_resources(struct pci_bus *bus);
+void pci_bus_remove_resource(struct pci_bus *bus, struct resource *res);
 int devm_request_pci_bus_resources(struct device *dev,
 				   struct list_head *resources);
 

From d7a0bdbf17276b757d2b89f5351bbee9ecf58fe6 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Fri, 10 Mar 2023 14:19:55 +0100
Subject: [PATCH 173/411] s390: update defconfigs

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
---
 arch/s390/configs/debug_defconfig    | 13 ++++---------
 arch/s390/configs/defconfig          | 12 +++---------
 arch/s390/configs/zfcpdump_defconfig |  2 +-
 3 files changed, 8 insertions(+), 19 deletions(-)

diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 3c68fe49042c2..4ccf66d29fc24 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -23,7 +23,6 @@ CONFIG_NUMA_BALANCING=y
 CONFIG_MEMCG=y
 CONFIG_BLK_CGROUP=y
 CONFIG_CFS_BANDWIDTH=y
-CONFIG_RT_GROUP_SCHED=y
 CONFIG_CGROUP_PIDS=y
 CONFIG_CGROUP_RDMA=y
 CONFIG_CGROUP_FREEZER=y
@@ -90,7 +89,6 @@ CONFIG_MINIX_SUBPARTITION=y
 CONFIG_SOLARIS_X86_PARTITION=y
 CONFIG_UNIXWARE_DISKLABEL=y
 CONFIG_IOSCHED_BFQ=y
-CONFIG_BFQ_GROUP_IOSCHED=y
 CONFIG_BINFMT_MISC=m
 CONFIG_ZSWAP=y
 CONFIG_ZSMALLOC_STAT=y
@@ -298,7 +296,6 @@ CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_NAT=m
 CONFIG_IP_NF_TARGET_MASQUERADE=m
 CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
 CONFIG_IP_NF_RAW=m
@@ -340,7 +337,6 @@ CONFIG_BRIDGE_MRP=y
 CONFIG_VLAN_8021Q=m
 CONFIG_VLAN_8021Q_GVRP=y
 CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_CBQ=m
 CONFIG_NET_SCH_HTB=m
 CONFIG_NET_SCH_HFSC=m
 CONFIG_NET_SCH_PRIO=m
@@ -351,7 +347,6 @@ CONFIG_NET_SCH_SFQ=m
 CONFIG_NET_SCH_TEQL=m
 CONFIG_NET_SCH_TBF=m
 CONFIG_NET_SCH_GRED=m
-CONFIG_NET_SCH_DSMARK=m
 CONFIG_NET_SCH_NETEM=m
 CONFIG_NET_SCH_DRR=m
 CONFIG_NET_SCH_MQPRIO=m
@@ -363,14 +358,11 @@ CONFIG_NET_SCH_INGRESS=m
 CONFIG_NET_SCH_PLUG=m
 CONFIG_NET_SCH_ETS=m
 CONFIG_NET_CLS_BASIC=m
-CONFIG_NET_CLS_TCINDEX=m
 CONFIG_NET_CLS_ROUTE4=m
 CONFIG_NET_CLS_FW=m
 CONFIG_NET_CLS_U32=m
 CONFIG_CLS_U32_PERF=y
 CONFIG_CLS_U32_MARK=y
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_RSVP6=m
 CONFIG_NET_CLS_FLOW=m
 CONFIG_NET_CLS_CGROUP=y
 CONFIG_NET_CLS_BPF=m
@@ -584,7 +576,7 @@ CONFIG_DIAG288_WATCHDOG=m
 CONFIG_FB=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
-# CONFIG_HID is not set
+# CONFIG_HID_SUPPORT is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_INFINIBAND=m
 CONFIG_INFINIBAND_USER_ACCESS=m
@@ -828,6 +820,7 @@ CONFIG_PANIC_ON_OOPS=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_WQ_WATCHDOG=y
 CONFIG_TEST_LOCKUP=m
+CONFIG_DEBUG_PREEMPT=y
 CONFIG_PROVE_LOCKING=y
 CONFIG_LOCK_STAT=y
 CONFIG_DEBUG_ATOMIC_SLEEP=y
@@ -843,6 +836,7 @@ CONFIG_RCU_CPU_STALL_TIMEOUT=300
 # CONFIG_RCU_TRACE is not set
 CONFIG_LATENCYTOP=y
 CONFIG_BOOTTIME_TRACING=y
+CONFIG_FPROBE=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_STACK_TRACER=y
 CONFIG_IRQSOFF_TRACER=y
@@ -857,6 +851,7 @@ CONFIG_SAMPLES=y
 CONFIG_SAMPLE_TRACE_PRINTK=m
 CONFIG_SAMPLE_FTRACE_DIRECT=m
 CONFIG_SAMPLE_FTRACE_DIRECT_MULTI=m
+CONFIG_SAMPLE_FTRACE_OPS=m
 CONFIG_DEBUG_ENTRY=y
 CONFIG_CIO_INJECT=y
 CONFIG_KUNIT=m
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 9ab91632f74cc..693297a2e8973 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -21,7 +21,6 @@ CONFIG_NUMA_BALANCING=y
 CONFIG_MEMCG=y
 CONFIG_BLK_CGROUP=y
 CONFIG_CFS_BANDWIDTH=y
-CONFIG_RT_GROUP_SCHED=y
 CONFIG_CGROUP_PIDS=y
 CONFIG_CGROUP_RDMA=y
 CONFIG_CGROUP_FREEZER=y
@@ -85,7 +84,6 @@ CONFIG_MINIX_SUBPARTITION=y
 CONFIG_SOLARIS_X86_PARTITION=y
 CONFIG_UNIXWARE_DISKLABEL=y
 CONFIG_IOSCHED_BFQ=y
-CONFIG_BFQ_GROUP_IOSCHED=y
 CONFIG_BINFMT_MISC=m
 CONFIG_ZSWAP=y
 CONFIG_ZSMALLOC_STAT=y
@@ -289,7 +287,6 @@ CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IP_NF_NAT=m
 CONFIG_IP_NF_TARGET_MASQUERADE=m
 CONFIG_IP_NF_MANGLE=m
-CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
 CONFIG_IP_NF_TARGET_TTL=m
 CONFIG_IP_NF_RAW=m
@@ -330,7 +327,6 @@ CONFIG_BRIDGE_MRP=y
 CONFIG_VLAN_8021Q=m
 CONFIG_VLAN_8021Q_GVRP=y
 CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_CBQ=m
 CONFIG_NET_SCH_HTB=m
 CONFIG_NET_SCH_HFSC=m
 CONFIG_NET_SCH_PRIO=m
@@ -341,7 +337,6 @@ CONFIG_NET_SCH_SFQ=m
 CONFIG_NET_SCH_TEQL=m
 CONFIG_NET_SCH_TBF=m
 CONFIG_NET_SCH_GRED=m
-CONFIG_NET_SCH_DSMARK=m
 CONFIG_NET_SCH_NETEM=m
 CONFIG_NET_SCH_DRR=m
 CONFIG_NET_SCH_MQPRIO=m
@@ -353,14 +348,11 @@ CONFIG_NET_SCH_INGRESS=m
 CONFIG_NET_SCH_PLUG=m
 CONFIG_NET_SCH_ETS=m
 CONFIG_NET_CLS_BASIC=m
-CONFIG_NET_CLS_TCINDEX=m
 CONFIG_NET_CLS_ROUTE4=m
 CONFIG_NET_CLS_FW=m
 CONFIG_NET_CLS_U32=m
 CONFIG_CLS_U32_PERF=y
 CONFIG_CLS_U32_MARK=y
-CONFIG_NET_CLS_RSVP=m
-CONFIG_NET_CLS_RSVP6=m
 CONFIG_NET_CLS_FLOW=m
 CONFIG_NET_CLS_CGROUP=y
 CONFIG_NET_CLS_BPF=m
@@ -573,7 +565,7 @@ CONFIG_DIAG288_WATCHDOG=m
 CONFIG_FB=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
-# CONFIG_HID is not set
+# CONFIG_HID_SUPPORT is not set
 # CONFIG_USB_SUPPORT is not set
 CONFIG_INFINIBAND=m
 CONFIG_INFINIBAND_USER_ACCESS=m
@@ -795,6 +787,7 @@ CONFIG_RCU_REF_SCALE_TEST=m
 CONFIG_RCU_CPU_STALL_TIMEOUT=60
 CONFIG_LATENCYTOP=y
 CONFIG_BOOTTIME_TRACING=y
+CONFIG_FPROBE=y
 CONFIG_FUNCTION_PROFILER=y
 CONFIG_STACK_TRACER=y
 CONFIG_SCHED_TRACER=y
@@ -805,6 +798,7 @@ CONFIG_SAMPLES=y
 CONFIG_SAMPLE_TRACE_PRINTK=m
 CONFIG_SAMPLE_FTRACE_DIRECT=m
 CONFIG_SAMPLE_FTRACE_DIRECT_MULTI=m
+CONFIG_SAMPLE_FTRACE_OPS=m
 CONFIG_KUNIT=m
 CONFIG_KUNIT_DEBUGFS=y
 CONFIG_LKDTM=m
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index a9c0c81d1de99..33a232bb68af9 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -58,7 +58,7 @@ CONFIG_ZFCP=y
 # CONFIG_VMCP is not set
 # CONFIG_MONWRITER is not set
 # CONFIG_S390_VMUR is not set
-# CONFIG_HID is not set
+# CONFIG_HID_SUPPORT is not set
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_VHOST_MENU is not set
 # CONFIG_IOMMU_SUPPORT is not set

From ee892ea83d99610fa33bea612de058e0955eec3a Mon Sep 17 00:00:00 2001
From: Ashutosh Dixit <ashutosh.dixit@intel.com>
Date: Fri, 3 Feb 2023 07:53:09 -0800
Subject: [PATCH 174/411] drm/i915/hwmon: Enable PL1 power limit

Previous documentation suggested that PL1 power limit is always
enabled. However we now find this not to be the case on some
platforms (such as ATSM). Therefore enable PL1 power limit during hwmon
initialization.

Bspec: 51864

v2: Add Bspec reference (Gwan-gyeong)
v3: Add Fixes tag

Fixes: 99f55efb79114 ("drm/i915/hwmon: Power PL1 limit and TDP setting")
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230203155309.1042297-1-ashutosh.dixit@intel.com
(cherry picked from commit 0349c41b05968befaffa5fbb7e73d0ee6004f610)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_hwmon.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpu/drm/i915/i915_hwmon.c b/drivers/gpu/drm/i915/i915_hwmon.c
index 1225bc432f0d5..4683a5b96eff1 100644
--- a/drivers/gpu/drm/i915/i915_hwmon.c
+++ b/drivers/gpu/drm/i915/i915_hwmon.c
@@ -687,6 +687,11 @@ hwm_get_preregistration_info(struct drm_i915_private *i915)
 		for_each_gt(gt, i915, i)
 			hwm_energy(&hwmon->ddat_gt[i], &energy);
 	}
+
+	/* Enable PL1 power limit */
+	if (i915_mmio_reg_valid(hwmon->rg.pkg_rapl_limit))
+		hwm_locked_with_pm_intel_uncore_rmw(ddat, hwmon->rg.pkg_rapl_limit,
+						    PKG_PWR_LIM_1_EN, PKG_PWR_LIM_1_EN);
 }
 
 void i915_hwmon_register(struct drm_i915_private *i915)

From 897f453c106380e57600c19a0a0485ceb4f3b0d9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 20 Feb 2023 17:17:30 +0200
Subject: [PATCH 175/411] drm/i915: Fix audio ELD handling for DP MST
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I forgot to call intel_audio_compute_config() on DP MST, which
means ELD doesn't get populated and passed to the audio driver.

References: https://gitlab.freedesktop.org/drm/intel/-/issues/8097
Fixes: 5d986635e296 ("drm/i915/audio: Precompute the ELD")
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230220151731.6852-1-ville.syrjala@linux.intel.com
Reviewed-by: Uma Shankar <uma.shankar@intel.com>
(cherry picked from commit 518b761a7b0e2bb2fac2518f041c71b461adf761)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_dp_mst.c | 25 +++++++++++++--------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index 054a009e800d7..2106b3de225a0 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -265,6 +265,19 @@ static int intel_dp_mst_update_slots(struct intel_encoder *encoder,
 	return 0;
 }
 
+static bool intel_dp_mst_has_audio(const struct drm_connector_state *conn_state)
+{
+	const struct intel_digital_connector_state *intel_conn_state =
+		to_intel_digital_connector_state(conn_state);
+	struct intel_connector *connector =
+		to_intel_connector(conn_state->connector);
+
+	if (intel_conn_state->force_audio == HDMI_AUDIO_AUTO)
+		return connector->port->has_audio;
+	else
+		return intel_conn_state->force_audio == HDMI_AUDIO_ON;
+}
+
 static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
 				       struct intel_crtc_state *pipe_config,
 				       struct drm_connector_state *conn_state)
@@ -272,10 +285,6 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
 	struct intel_dp *intel_dp = &intel_mst->primary->dp;
-	struct intel_connector *connector =
-		to_intel_connector(conn_state->connector);
-	struct intel_digital_connector_state *intel_conn_state =
-		to_intel_digital_connector_state(conn_state);
 	const struct drm_display_mode *adjusted_mode =
 		&pipe_config->hw.adjusted_mode;
 	struct link_config_limits limits;
@@ -287,11 +296,9 @@ static int intel_dp_mst_compute_config(struct intel_encoder *encoder,
 	pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB;
 	pipe_config->has_pch_encoder = false;
 
-	if (intel_conn_state->force_audio == HDMI_AUDIO_AUTO)
-		pipe_config->has_audio = connector->port->has_audio;
-	else
-		pipe_config->has_audio =
-			intel_conn_state->force_audio == HDMI_AUDIO_ON;
+	pipe_config->has_audio =
+		intel_dp_mst_has_audio(conn_state) &&
+		intel_audio_compute_config(encoder, pipe_config, conn_state);
 
 	/*
 	 * for MST we always configure max link bw - the spec doesn't

From 71c602103c74b277bef3d20a308874a33ec8326d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jouni=20H=C3=B6gander?= <jouni.hogander@intel.com>
Date: Tue, 21 Feb 2023 10:53:04 +0200
Subject: [PATCH 176/411] drm/i915/psr: Use calculated io and fast wake lines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently we are using hardcoded 7 for io and fast wake lines.

According to Bspec io and fast wake times are both 42us for
DISPLAY_VER >= 12 and 50us and 32us for older platforms.

Calculate line counts for these and configure them into PSR2_CTL
accordingly

Use 45 us for the fast wake calculation as 42 seems to be too
tight based on testing.

Bspec: 49274, 4289

Cc: Mika Kahola <mika.kahola@intel.com>
Cc: José Roberto de Souza <jose.souza@intel.com>
Fixes: 64cf40a125ff ("drm/i915/psr: Program default IO buffer Wake and Fast Wake")
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/7725
Signed-off-by: Jouni Högander <jouni.hogander@intel.com>
Reviewed-by: Stanislav Lisovskiy <stanislav.lisovskiy@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230221085304.3382297-1-jouni.hogander@intel.com
(cherry picked from commit cb42e8ede5b475c096e473b86c356b1158b4bc3b)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 .../drm/i915/display/intel_display_types.h    |  2 +
 drivers/gpu/drm/i915/display/intel_psr.c      | 78 +++++++++++++++----
 2 files changed, 63 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 54c517ca9632f..582234f0c49ac 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -1631,6 +1631,8 @@ struct intel_psr {
 	bool psr2_sel_fetch_cff_enabled;
 	bool req_psr2_sdp_prior_scanline;
 	u8 sink_sync_latency;
+	u8 io_wake_lines;
+	u8 fast_wake_lines;
 	ktime_t last_entry_attempt;
 	ktime_t last_exit;
 	bool sink_not_reliable;
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 7a72e15e68369..9f1a0bebae240 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -542,6 +542,14 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp)
 	val |= EDP_PSR2_FRAME_BEFORE_SU(max_t(u8, intel_dp->psr.sink_sync_latency + 1, 2));
 	val |= intel_psr2_get_tp_time(intel_dp);
 
+	if (DISPLAY_VER(dev_priv) >= 12) {
+		if (intel_dp->psr.io_wake_lines < 9 &&
+		    intel_dp->psr.fast_wake_lines < 9)
+			val |= TGL_EDP_PSR2_BLOCK_COUNT_NUM_2;
+		else
+			val |= TGL_EDP_PSR2_BLOCK_COUNT_NUM_3;
+	}
+
 	/* Wa_22012278275:adl-p */
 	if (IS_ADLP_DISPLAY_STEP(dev_priv, STEP_A0, STEP_E0)) {
 		static const u8 map[] = {
@@ -558,31 +566,21 @@ static void hsw_activate_psr2(struct intel_dp *intel_dp)
 		 * Still using the default IO_BUFFER_WAKE and FAST_WAKE, see
 		 * comments bellow for more information
 		 */
-		u32 tmp, lines = 7;
-
-		val |= TGL_EDP_PSR2_BLOCK_COUNT_NUM_2;
+		u32 tmp;
 
-		tmp = map[lines - TGL_EDP_PSR2_IO_BUFFER_WAKE_MIN_LINES];
+		tmp = map[intel_dp->psr.io_wake_lines - TGL_EDP_PSR2_IO_BUFFER_WAKE_MIN_LINES];
 		tmp = tmp << TGL_EDP_PSR2_IO_BUFFER_WAKE_SHIFT;
 		val |= tmp;
 
-		tmp = map[lines - TGL_EDP_PSR2_FAST_WAKE_MIN_LINES];
+		tmp = map[intel_dp->psr.fast_wake_lines - TGL_EDP_PSR2_FAST_WAKE_MIN_LINES];
 		tmp = tmp << TGL_EDP_PSR2_FAST_WAKE_MIN_SHIFT;
 		val |= tmp;
 	} else if (DISPLAY_VER(dev_priv) >= 12) {
-		/*
-		 * TODO: 7 lines of IO_BUFFER_WAKE and FAST_WAKE are default
-		 * values from BSpec. In order to setting an optimal power
-		 * consumption, lower than 4k resolution mode needs to decrease
-		 * IO_BUFFER_WAKE and FAST_WAKE. And higher than 4K resolution
-		 * mode needs to increase IO_BUFFER_WAKE and FAST_WAKE.
-		 */
-		val |= TGL_EDP_PSR2_BLOCK_COUNT_NUM_2;
-		val |= TGL_EDP_PSR2_IO_BUFFER_WAKE(7);
-		val |= TGL_EDP_PSR2_FAST_WAKE(7);
+		val |= TGL_EDP_PSR2_IO_BUFFER_WAKE(intel_dp->psr.io_wake_lines);
+		val |= TGL_EDP_PSR2_FAST_WAKE(intel_dp->psr.fast_wake_lines);
 	} else if (DISPLAY_VER(dev_priv) >= 9) {
-		val |= EDP_PSR2_IO_BUFFER_WAKE(7);
-		val |= EDP_PSR2_FAST_WAKE(7);
+		val |= EDP_PSR2_IO_BUFFER_WAKE(intel_dp->psr.io_wake_lines);
+		val |= EDP_PSR2_FAST_WAKE(intel_dp->psr.fast_wake_lines);
 	}
 
 	if (intel_dp->psr.req_psr2_sdp_prior_scanline)
@@ -842,6 +840,46 @@ static bool _compute_psr2_sdp_prior_scanline_indication(struct intel_dp *intel_d
 	return true;
 }
 
+static bool _compute_psr2_wake_times(struct intel_dp *intel_dp,
+				     struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+	int io_wake_lines, io_wake_time, fast_wake_lines, fast_wake_time;
+	u8 max_wake_lines;
+
+	if (DISPLAY_VER(i915) >= 12) {
+		io_wake_time = 42;
+		/*
+		 * According to Bspec it's 42us, but based on testing
+		 * it is not enough -> use 45 us.
+		 */
+		fast_wake_time = 45;
+		max_wake_lines = 12;
+	} else {
+		io_wake_time = 50;
+		fast_wake_time = 32;
+		max_wake_lines = 8;
+	}
+
+	io_wake_lines = intel_usecs_to_scanlines(
+		&crtc_state->uapi.adjusted_mode, io_wake_time);
+	fast_wake_lines = intel_usecs_to_scanlines(
+		&crtc_state->uapi.adjusted_mode, fast_wake_time);
+
+	if (io_wake_lines > max_wake_lines ||
+	    fast_wake_lines > max_wake_lines)
+		return false;
+
+	if (i915->params.psr_safest_params)
+		io_wake_lines = fast_wake_lines = max_wake_lines;
+
+	/* According to Bspec lower limit should be set as 7 lines. */
+	intel_dp->psr.io_wake_lines = max(io_wake_lines, 7);
+	intel_dp->psr.fast_wake_lines = max(fast_wake_lines, 7);
+
+	return true;
+}
+
 static bool intel_psr2_config_valid(struct intel_dp *intel_dp,
 				    struct intel_crtc_state *crtc_state)
 {
@@ -936,6 +974,12 @@ static bool intel_psr2_config_valid(struct intel_dp *intel_dp,
 		return false;
 	}
 
+	if (!_compute_psr2_wake_times(intel_dp, crtc_state)) {
+		drm_dbg_kms(&dev_priv->drm,
+			    "PSR2 not enabled, Unable to use long enough wake times\n");
+		return false;
+	}
+
 	if (HAS_PSR2_SEL_FETCH(dev_priv)) {
 		if (!intel_psr2_sel_fetch_config_valid(intel_dp, crtc_state) &&
 		    !HAS_PSR_HW_TRACKING(dev_priv)) {

From 46bc23dcd94569270d02c4c1f7e62ae01ebd53bb Mon Sep 17 00:00:00 2001
From: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Date: Thu, 23 Feb 2023 10:06:19 +0530
Subject: [PATCH 177/411] drm/i915/dg2: Add HDMI pixel clock frequencies 267.30
 and 319.89 MHz

Add snps phy table values for HDMI pixel clocks 267.30 MHz and
319.89 MHz. Values are based on the Bspec algorithm for
PLL programming for HDMI.

Cc: stable@vger.kernel.org
Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8008
Signed-off-by: Ankit Nautiyal <ankit.k.nautiyal@intel.com>
Reviewed-by: Uma Shankar <uma.shankar@intel.com>
Signed-off-by: Uma Shankar <uma.shankar@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230223043619.3941382-1-ankit.k.nautiyal@intel.com
(cherry picked from commit d46746b8b13cbd377ffc733e465d25800459a31b)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/display/intel_snps_phy.c | 62 +++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/drivers/gpu/drm/i915/display/intel_snps_phy.c b/drivers/gpu/drm/i915/display/intel_snps_phy.c
index c65c771f5c461..1cfb94b5cedbd 100644
--- a/drivers/gpu/drm/i915/display/intel_snps_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_snps_phy.c
@@ -1419,6 +1419,36 @@ static const struct intel_mpllb_state dg2_hdmi_262750 = {
 		REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
 };
 
+static const struct intel_mpllb_state dg2_hdmi_267300 = {
+	.clock = 267300,
+	.ref_control =
+		REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
+	.mpllb_cp =
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 7) |
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
+	.mpllb_div =
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 1) |
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 3),
+	.mpllb_div2 =
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 74) |
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
+	.mpllb_fracn1 =
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
+	.mpllb_fracn2 =
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 30146) |
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 36699),
+	.mpllb_sscen =
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
+};
+
 static const struct intel_mpllb_state dg2_hdmi_268500 = {
 	.clock = 268500,
 	.ref_control =
@@ -1509,6 +1539,36 @@ static const struct intel_mpllb_state dg2_hdmi_241500 = {
 		REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
 };
 
+static const struct intel_mpllb_state dg2_hdmi_319890 = {
+	.clock = 319890,
+	.ref_control =
+		REG_FIELD_PREP(SNPS_PHY_REF_CONTROL_REF_RANGE, 3),
+	.mpllb_cp =
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT, 6) |
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP, 14) |
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_INT_GS, 64) |
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_CP_PROP_GS, 124),
+	.mpllb_div =
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_DIV5_CLK_EN, 1) |
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_TX_CLK_DIV, 1) |
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_PMIX_EN, 1) |
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_V2I, 2) |
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_FREQ_VCO, 2),
+	.mpllb_div2 =
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_REF_CLK_DIV, 1) |
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_MULTIPLIER, 94) |
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_HDMI_DIV, 1),
+	.mpllb_fracn1 =
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_CGG_UPDATE_EN, 1) |
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_EN, 1) |
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_DEN, 65535),
+	.mpllb_fracn2 =
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_QUOT, 64094) |
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_FRACN_REM, 13631),
+	.mpllb_sscen =
+		REG_FIELD_PREP(SNPS_PHY_MPLLB_SSC_UP_SPREAD, 1),
+};
+
 static const struct intel_mpllb_state dg2_hdmi_497750 = {
 	.clock = 497750,
 	.ref_control =
@@ -1696,8 +1756,10 @@ static const struct intel_mpllb_state * const dg2_hdmi_tables[] = {
 	&dg2_hdmi_209800,
 	&dg2_hdmi_241500,
 	&dg2_hdmi_262750,
+	&dg2_hdmi_267300,
 	&dg2_hdmi_268500,
 	&dg2_hdmi_296703,
+	&dg2_hdmi_319890,
 	&dg2_hdmi_497750,
 	&dg2_hdmi_592000,
 	&dg2_hdmi_593407,

From 193c41926d152761764894f46e23b53c00186a82 Mon Sep 17 00:00:00 2001
From: Andrea Righi <andrea.righi@canonical.com>
Date: Mon, 20 Feb 2023 18:18:58 +0100
Subject: [PATCH 178/411] drm/i915/sseu: fix max_subslices
 array-index-out-of-bounds access

It seems that commit bc3c5e0809ae ("drm/i915/sseu: Don't try to store EU
mask internally in UAPI format") exposed a potential out-of-bounds
access, reported by UBSAN as following on a laptop with a gen 11 i915
card:

  UBSAN: array-index-out-of-bounds in drivers/gpu/drm/i915/gt/intel_sseu.c:65:27
  index 6 is out of range for type 'u16 [6]'
  CPU: 2 PID: 165 Comm: systemd-udevd Not tainted 6.2.0-9-generic #9-Ubuntu
  Hardware name: Dell Inc. XPS 13 9300/077Y9N, BIOS 1.11.0 03/22/2022
  Call Trace:
   <TASK>
   show_stack+0x4e/0x61
   dump_stack_lvl+0x4a/0x6f
   dump_stack+0x10/0x18
   ubsan_epilogue+0x9/0x3a
   __ubsan_handle_out_of_bounds.cold+0x42/0x47
   gen11_compute_sseu_info+0x121/0x130 [i915]
   intel_sseu_info_init+0x15d/0x2b0 [i915]
   intel_gt_init_mmio+0x23/0x40 [i915]
   i915_driver_mmio_probe+0x129/0x400 [i915]
   ? intel_gt_probe_all+0x91/0x2e0 [i915]
   i915_driver_probe+0xe1/0x3f0 [i915]
   ? drm_privacy_screen_get+0x16d/0x190 [drm]
   ? acpi_dev_found+0x64/0x80
   i915_pci_probe+0xac/0x1b0 [i915]
   ...

According to the definition of sseu_dev_info, eu_mask->hsw is limited to
a maximum of GEN_MAX_SS_PER_HSW_SLICE (6) sub-slices, but
gen11_sseu_info_init() can potentially set 8 sub-slices, in the
!IS_JSL_EHL(gt->i915) case.

Fix this by reserving up to 8 slots for max_subslices in the eu_mask
struct.

Reported-by: Emil Renner Berthing <emil.renner.berthing@canonical.com>
Signed-off-by: Andrea Righi <andrea.righi@canonical.com>
Fixes: bc3c5e0809ae ("drm/i915/sseu: Don't try to store EU mask internally in UAPI format")
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230220171858.131416-1-andrea.righi@canonical.com
(cherry picked from commit 3cba09a6ac86ea1d456909626eb2685596c07822)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_sseu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
index aa87d3832d60d..d7e8c374f153e 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -27,7 +27,7 @@ struct drm_printer;
  * is only relevant to pre-Xe_HP platforms (Xe_HP and beyond use the
  * I915_MAX_SS_FUSE_BITS value below).
  */
-#define GEN_MAX_SS_PER_HSW_SLICE	6
+#define GEN_MAX_SS_PER_HSW_SLICE	8
 
 /*
  * Maximum number of 32-bit registers used by hardware to express the

From e0e6b416b25ee14716f3549e0cbec1011b193809 Mon Sep 17 00:00:00 2001
From: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
Date: Thu, 2 Mar 2023 13:08:20 +0100
Subject: [PATCH 179/411] drm/i915/active: Fix misuse of non-idle barriers as
 fence trackers

Users reported oopses on list corruptions when using i915 perf with a
number of concurrently running graphics applications.  Root cause analysis
pointed at an issue in barrier processing code -- a race among perf open /
close replacing active barriers with perf requests on kernel context and
concurrent barrier preallocate / acquire operations performed during user
context first pin / last unpin.

When adding a request to a composite tracker, we try to reuse an existing
fence tracker, already allocated and registered with that composite.  The
tracker we obtain may already track another fence, may be an idle barrier,
or an active barrier.

If the tracker we get occurs a non-idle barrier then we try to delete that
barrier from a list of barrier tasks it belongs to.  However, while doing
that we don't respect return value from a function that performs the
barrier deletion.  Should the deletion ever fail, we would end up reusing
the tracker still registered as a barrier task.  Since the same structure
field is reused with both fence callback lists and barrier tasks list,
list corruptions would likely occur.

Barriers are now deleted from a barrier tasks list by temporarily removing
the list content, traversing that content with skip over the node to be
deleted, then populating the list back with the modified content.  Should
that intentionally racy concurrent deletion attempts be not serialized,
one or more of those may fail because of the list being temporary empty.

Related code that ignores the results of barrier deletion was initially
introduced in v5.4 by commit d8af05ff38ae ("drm/i915: Allow sharing the
idle-barrier from other kernel requests").  However, all users of the
barrier deletion routine were apparently serialized at that time, then the
issue didn't exhibit itself.  Results of git bisect with help of a newly
developed igt@gem_barrier_race@remote-request IGT test indicate that list
corruptions might start to appear after commit 311770173fac ("drm/i915/gt:
Schedule request retirement when timeline idles"), introduced in v5.5.

Respect results of barrier deletion attempts -- mark the barrier as idle
only if successfully deleted from the list.  Then, before proceeding with
setting our fence as the one currently tracked, make sure that the tracker
we've got is not a non-idle barrier.  If that check fails then don't use
that tracker but go back and try to acquire a new, usable one.

v3: use unlikely() to document what outcome we expect (Andi),
  - fix bad grammar in commit description.
v2: no code changes,
  - blame commit 311770173fac ("drm/i915/gt: Schedule request retirement
    when timeline idles"), v5.5, not commit d8af05ff38ae ("drm/i915: Allow
    sharing the idle-barrier from other kernel requests"), v5.4,
  - reword commit description.

Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/6333
Fixes: 311770173fac ("drm/i915/gt: Schedule request retirement when timeline idles")
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: stable@vger.kernel.org # v5.5
Cc: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230302120820.48740-1-janusz.krzysztofik@linux.intel.com
(cherry picked from commit 506006055769b10d1b2b4e22f636f3b45e0e9fc7)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
---
 drivers/gpu/drm/i915/i915_active.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 7412abf166a8c..a9fea115f2d26 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -422,12 +422,12 @@ replace_barrier(struct i915_active *ref, struct i915_active_fence *active)
 	 * we can use it to substitute for the pending idle-barrer
 	 * request that we want to emit on the kernel_context.
 	 */
-	__active_del_barrier(ref, node_from_active(active));
-	return true;
+	return __active_del_barrier(ref, node_from_active(active));
 }
 
 int i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
 {
+	u64 idx = i915_request_timeline(rq)->fence_context;
 	struct dma_fence *fence = &rq->fence;
 	struct i915_active_fence *active;
 	int err;
@@ -437,16 +437,19 @@ int i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
 	if (err)
 		return err;
 
-	active = active_instance(ref, i915_request_timeline(rq)->fence_context);
-	if (!active) {
-		err = -ENOMEM;
-		goto out;
-	}
+	do {
+		active = active_instance(ref, idx);
+		if (!active) {
+			err = -ENOMEM;
+			goto out;
+		}
+
+		if (replace_barrier(ref, active)) {
+			RCU_INIT_POINTER(active->fence, NULL);
+			atomic_dec(&ref->count);
+		}
+	} while (unlikely(is_barrier(active)));
 
-	if (replace_barrier(ref, active)) {
-		RCU_INIT_POINTER(active->fence, NULL);
-		atomic_dec(&ref->count);
-	}
 	if (!__i915_active_fence_set(active, fence))
 		__i915_active_acquire(ref);
 

From d6fd48eff7506bb866a54e40369df8899f2078a9 Mon Sep 17 00:00:00 2001
From: "Borislav Petkov (AMD)" <bp@alien8.de>
Date: Wed, 15 Feb 2023 11:01:42 +0100
Subject: [PATCH 180/411] virt/coco/sev-guest: Check SEV_SNP attribute at probe
 time

No need to check it on every ioctl. And yes, this is a common SEV driver
but it does only SNP-specific operations currently. This can be
revisited later, when more use cases appear.

No functional changes.

Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20230307192449.24732-3-bp@alien8.de
---
 arch/x86/kernel/sev.c                   | 3 ---
 drivers/virt/coco/sev-guest/sev-guest.c | 3 +++
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 679026a640efd..c644c34372e8c 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -2183,9 +2183,6 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned
 	struct ghcb *ghcb;
 	int ret;
 
-	if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
-		return -ENODEV;
-
 	if (!fw_err)
 		return -EINVAL;
 
diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c
index 7b4e9009f3355..ed5d6ae1a1447 100644
--- a/drivers/virt/coco/sev-guest/sev-guest.c
+++ b/drivers/virt/coco/sev-guest/sev-guest.c
@@ -703,6 +703,9 @@ static int __init sev_guest_probe(struct platform_device *pdev)
 	void __iomem *mapping;
 	int ret;
 
+	if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+		return -ENODEV;
+
 	if (!dev->platform_data)
 		return -ENODEV;
 

From 970ab823743fb54b42002ec76c51481f67436444 Mon Sep 17 00:00:00 2001
From: "Borislav Petkov (AMD)" <bp@alien8.de>
Date: Wed, 15 Feb 2023 11:39:41 +0100
Subject: [PATCH 181/411] virt/coco/sev-guest: Simplify extended guest request
 handling

Return a specific error code - -ENOSPC - to signal the too small cert
data buffer instead of checking exit code and exitinfo2.

While at it, hoist the *fw_err assignment in snp_issue_guest_request()
so that a proper error value is returned to the callers.

  [ Tom: check override_err instead of err. ]

Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230307192449.24732-4-bp@alien8.de
---
 arch/x86/kernel/sev.c                   | 11 +++---
 drivers/virt/coco/sev-guest/sev-guest.c | 48 +++++++++++++------------
 2 files changed, 32 insertions(+), 27 deletions(-)

diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index c644c34372e8c..6a3e1425ba175 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -2209,15 +2209,16 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned
 	if (ret)
 		goto e_put;
 
+	*fw_err = ghcb->save.sw_exit_info_2;
 	if (ghcb->save.sw_exit_info_2) {
 		/* Number of expected pages are returned in RBX */
 		if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST &&
-		    ghcb->save.sw_exit_info_2 == SNP_GUEST_REQ_INVALID_LEN)
+		    ghcb->save.sw_exit_info_2 == SNP_GUEST_REQ_INVALID_LEN) {
 			input->data_npages = ghcb_get_rbx(ghcb);
-
-		*fw_err = ghcb->save.sw_exit_info_2;
-
-		ret = -EIO;
+			ret = -ENOSPC;
+		} else {
+			ret = -EIO;
+		}
 	}
 
 e_put:
diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c
index ed5d6ae1a1447..e61db0b15b7a6 100644
--- a/drivers/virt/coco/sev-guest/sev-guest.c
+++ b/drivers/virt/coco/sev-guest/sev-guest.c
@@ -322,7 +322,8 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in
 				u8 type, void *req_buf, size_t req_sz, void *resp_buf,
 				u32 resp_sz, __u64 *fw_err)
 {
-	unsigned long err;
+	unsigned long err, override_err = 0;
+	unsigned int override_npages = 0;
 	u64 seqno;
 	int rc;
 
@@ -338,6 +339,7 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in
 	if (rc)
 		return rc;
 
+retry_request:
 	/*
 	 * Call firmware to process the request. In this function the encrypted
 	 * message enters shared memory with the host. So after this call the
@@ -346,17 +348,24 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in
 	 */
 	rc = snp_issue_guest_request(exit_code, &snp_dev->input, &err);
 
-	/*
-	 * If the extended guest request fails due to having too small of a
-	 * certificate data buffer, retry the same guest request without the
-	 * extended data request in order to increment the sequence number
-	 * and thus avoid IV reuse.
-	 */
-	if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST &&
-	    err == SNP_GUEST_REQ_INVALID_LEN) {
-		const unsigned int certs_npages = snp_dev->input.data_npages;
+	switch (rc) {
+	case -ENOSPC:
+		/*
+		 * If the extended guest request fails due to having too
+		 * small of a certificate data buffer, retry the same
+		 * guest request without the extended data request in
+		 * order to increment the sequence number and thus avoid
+		 * IV reuse.
+		 */
+		override_npages = snp_dev->input.data_npages;
+		exit_code	= SVM_VMGEXIT_GUEST_REQUEST;
 
-		exit_code = SVM_VMGEXIT_GUEST_REQUEST;
+		/*
+		 * Override the error to inform callers the given extended
+		 * request buffer size was too small and give the caller the
+		 * required buffer size.
+		 */
+		override_err	= SNP_GUEST_REQ_INVALID_LEN;
 
 		/*
 		 * If this call to the firmware succeeds, the sequence number can
@@ -366,15 +375,7 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in
 		 * of the VMPCK and the error code being propagated back to the
 		 * user as an ioctl() return code.
 		 */
-		rc = snp_issue_guest_request(exit_code, &snp_dev->input, &err);
-
-		/*
-		 * Override the error to inform callers the given extended
-		 * request buffer size was too small and give the caller the
-		 * required buffer size.
-		 */
-		err = SNP_GUEST_REQ_INVALID_LEN;
-		snp_dev->input.data_npages = certs_npages;
+		goto retry_request;
 	}
 
 	/*
@@ -386,7 +387,10 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in
 	snp_inc_msg_seqno(snp_dev);
 
 	if (fw_err)
-		*fw_err = err;
+		*fw_err = override_err ?: err;
+
+	if (override_npages)
+		snp_dev->input.data_npages = override_npages;
 
 	/*
 	 * If an extended guest request was issued and the supplied certificate
@@ -394,7 +398,7 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in
 	 * prevent IV reuse. If the standard request was successful, return -EIO
 	 * back to the caller as would have originally been returned.
 	 */
-	if (!rc && err == SNP_GUEST_REQ_INVALID_LEN)
+	if (!rc && override_err == SNP_GUEST_REQ_INVALID_LEN)
 		return -EIO;
 
 	if (rc) {

From c5a338274bdb894f088767bea856be344d0ccaef Mon Sep 17 00:00:00 2001
From: "Borislav Petkov (AMD)" <bp@alien8.de>
Date: Wed, 15 Feb 2023 11:43:43 +0100
Subject: [PATCH 182/411] virt/coco/sev-guest: Remove the disable_vmpck label
 in handle_guest_request()

Call the function directly instead.

No functional changes.

Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20230307192449.24732-5-bp@alien8.de
---
 drivers/virt/coco/sev-guest/sev-guest.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c
index e61db0b15b7a6..a51bd4afd5ab7 100644
--- a/drivers/virt/coco/sev-guest/sev-guest.c
+++ b/drivers/virt/coco/sev-guest/sev-guest.c
@@ -405,7 +405,8 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in
 		dev_alert(snp_dev->dev,
 			  "Detected error from ASP request. rc: %d, fw_err: %llu\n",
 			  rc, *fw_err);
-		goto disable_vmpck;
+		snp_disable_vmpck(snp_dev);
+		return rc;
 	}
 
 	rc = verify_and_dec_payload(snp_dev, resp_buf, resp_sz);
@@ -413,14 +414,11 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in
 		dev_alert(snp_dev->dev,
 			  "Detected unexpected decode failure from ASP. rc: %d\n",
 			  rc);
-		goto disable_vmpck;
+		snp_disable_vmpck(snp_dev);
+		return rc;
 	}
 
 	return 0;
-
-disable_vmpck:
-	snp_disable_vmpck(snp_dev);
-	return rc;
 }
 
 static int get_report(struct snp_guest_dev *snp_dev, struct snp_guest_request_ioctl *arg)

From 0fdb6cc7c89cb5e0cbc45dbdbafb8e3fb92ddc95 Mon Sep 17 00:00:00 2001
From: "Borislav Petkov (AMD)" <bp@alien8.de>
Date: Tue, 7 Mar 2023 09:19:19 -0600
Subject: [PATCH 183/411] virt/coco/sev-guest: Carve out the request issuing
 logic into a helper

This makes the code flow a lot easier to follow.

No functional changes.

  [ Tom: touchups. ]

Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20230307192449.24732-6-bp@alien8.de
---
 drivers/virt/coco/sev-guest/sev-guest.c | 44 +++++++++++++++----------
 1 file changed, 27 insertions(+), 17 deletions(-)

diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c
index a51bd4afd5ab7..07dafe22b27a7 100644
--- a/drivers/virt/coco/sev-guest/sev-guest.c
+++ b/drivers/virt/coco/sev-guest/sev-guest.c
@@ -318,27 +318,12 @@ static int enc_payload(struct snp_guest_dev *snp_dev, u64 seqno, int version, u8
 	return __enc_payload(snp_dev, req, payload, sz);
 }
 
-static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, int msg_ver,
-				u8 type, void *req_buf, size_t req_sz, void *resp_buf,
-				u32 resp_sz, __u64 *fw_err)
+static int __handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, __u64 *fw_err)
 {
 	unsigned long err, override_err = 0;
 	unsigned int override_npages = 0;
-	u64 seqno;
 	int rc;
 
-	/* Get message sequence and verify that its a non-zero */
-	seqno = snp_get_msg_seqno(snp_dev);
-	if (!seqno)
-		return -EIO;
-
-	memset(snp_dev->response, 0, sizeof(struct snp_guest_msg));
-
-	/* Encrypt the userspace provided payload */
-	rc = enc_payload(snp_dev, seqno, msg_ver, type, req_buf, req_sz);
-	if (rc)
-		return rc;
-
 retry_request:
 	/*
 	 * Call firmware to process the request. In this function the encrypted
@@ -347,7 +332,6 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in
 	 * prevent reuse of the IV.
 	 */
 	rc = snp_issue_guest_request(exit_code, &snp_dev->input, &err);
-
 	switch (rc) {
 	case -ENOSPC:
 		/*
@@ -401,7 +385,33 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in
 	if (!rc && override_err == SNP_GUEST_REQ_INVALID_LEN)
 		return -EIO;
 
+	return rc;
+}
+
+static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, int msg_ver,
+				u8 type, void *req_buf, size_t req_sz, void *resp_buf,
+				u32 resp_sz, __u64 *fw_err)
+{
+	u64 seqno;
+	int rc;
+
+	/* Get message sequence and verify that its a non-zero */
+	seqno = snp_get_msg_seqno(snp_dev);
+	if (!seqno)
+		return -EIO;
+
+	memset(snp_dev->response, 0, sizeof(struct snp_guest_msg));
+
+	/* Encrypt the userspace provided payload */
+	rc = enc_payload(snp_dev, seqno, msg_ver, type, req_buf, req_sz);
+	if (rc)
+		return rc;
+
+	rc = __handle_guest_request(snp_dev, exit_code, fw_err);
 	if (rc) {
+		if (rc == -EIO && *fw_err == SNP_GUEST_REQ_INVALID_LEN)
+			return rc;
+
 		dev_alert(snp_dev->dev,
 			  "Detected error from ASP request. rc: %d, fw_err: %llu\n",
 			  rc, *fw_err);

From 2cde14b187b458b7dcf99360fe4435dae1645a1f Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Date: Wed, 1 Mar 2023 17:25:08 +0100
Subject: [PATCH 184/411] accel: Build sub-directories based on config options

When accel drivers are disabled do not process into
sub-directories and create built-in archives:

  AR      drivers/accel/habanalabs/built-in.a
  AR      drivers/accel/ivpu/built-in.a

Fixes: 35b137630f08 ("accel/ivpu: Introduce a new DRM driver for Intel VPU")
Signed-off-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230301162508.3963484-1-stanislaw.gruszka@linux.intel.com
(cherry picked from commit dd61bbd0d1fba48cd9464e047a7f90b70a463e39)
Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
---
 drivers/accel/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/accel/Makefile b/drivers/accel/Makefile
index 07aa77aed1c8d..f22fd44d586b2 100644
--- a/drivers/accel/Makefile
+++ b/drivers/accel/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 
-obj-y	+= habanalabs/
-obj-y	+= ivpu/
+obj-$(CONFIG_DRM_ACCEL_HABANALABS)	+= habanalabs/
+obj-$(CONFIG_DRM_ACCEL_IVPU)		+= ivpu/

From d25bae7dc7b0668cb2a1325c64eb32d5fea4e5a9 Mon Sep 17 00:00:00 2001
From: "Borislav Petkov (AMD)" <bp@alien8.de>
Date: Wed, 15 Feb 2023 11:54:59 +0100
Subject: [PATCH 185/411] virt/coco/sev-guest: Do some code style cleanups

Remove unnecessary linebreaks, make the code more compact.

No functional changes.

Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20230307192449.24732-7-bp@alien8.de
---
 drivers/virt/coco/sev-guest/sev-guest.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c
index 07dafe22b27a7..81a53c31ff46e 100644
--- a/drivers/virt/coco/sev-guest/sev-guest.c
+++ b/drivers/virt/coco/sev-guest/sev-guest.c
@@ -412,18 +412,14 @@ static int handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, in
 		if (rc == -EIO && *fw_err == SNP_GUEST_REQ_INVALID_LEN)
 			return rc;
 
-		dev_alert(snp_dev->dev,
-			  "Detected error from ASP request. rc: %d, fw_err: %llu\n",
-			  rc, *fw_err);
+		dev_alert(snp_dev->dev, "Detected error from ASP request. rc: %d, fw_err: %llu\n", rc, *fw_err);
 		snp_disable_vmpck(snp_dev);
 		return rc;
 	}
 
 	rc = verify_and_dec_payload(snp_dev, resp_buf, resp_sz);
 	if (rc) {
-		dev_alert(snp_dev->dev,
-			  "Detected unexpected decode failure from ASP. rc: %d\n",
-			  rc);
+		dev_alert(snp_dev->dev, "Detected unexpected decode failure from ASP. rc: %d\n", rc);
 		snp_disable_vmpck(snp_dev);
 		return rc;
 	}

From fa4ae42cc60a7dea30e8f2db444b808d80862345 Mon Sep 17 00:00:00 2001
From: "Borislav Petkov (AMD)" <bp@alien8.de>
Date: Thu, 16 Feb 2023 10:50:11 +0100
Subject: [PATCH 186/411] virt/coco/sev-guest: Convert the sw_exit_info_2
 checking to a switch-case

snp_issue_guest_request() checks the value returned by the hypervisor in
sw_exit_info_2 and returns a different error depending on it.

Convert those checks into a switch-case to make it more readable when
more error values are going to be checked in the future.

No functional changes.

Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Link: https://lore.kernel.org/r/20230307192449.24732-8-bp@alien8.de
---
 arch/x86/kernel/sev.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index 6a3e1425ba175..d67884fb38c1c 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -2210,15 +2210,21 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned
 		goto e_put;
 
 	*fw_err = ghcb->save.sw_exit_info_2;
-	if (ghcb->save.sw_exit_info_2) {
+	switch (*fw_err) {
+	case 0:
+		break;
+
+	case SNP_GUEST_REQ_INVALID_LEN:
 		/* Number of expected pages are returned in RBX */
-		if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST &&
-		    ghcb->save.sw_exit_info_2 == SNP_GUEST_REQ_INVALID_LEN) {
+		if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
 			input->data_npages = ghcb_get_rbx(ghcb);
 			ret = -ENOSPC;
-		} else {
-			ret = -EIO;
+			break;
 		}
+		fallthrough;
+	default:
+		ret = -EIO;
+		break;
 	}
 
 e_put:

From 72f7754dcf31c87c92c0c353dcf747814cc5ce10 Mon Sep 17 00:00:00 2001
From: Dionna Glaze <dionnaglaze@google.com>
Date: Thu, 16 Feb 2023 11:08:02 +0100
Subject: [PATCH 187/411] virt/coco/sev-guest: Add throttling awareness

A potentially malicious SEV guest can constantly hammer the hypervisor
using this driver to send down requests and thus prevent or at least
considerably hinder other guests from issuing requests to the secure
processor which is a shared platform resource.

Therefore, the host is permitted and encouraged to throttle such guest
requests.

Add the capability to handle the case when the hypervisor throttles
excessive numbers of requests issued by the guest. Otherwise, the VM
platform communication key will be disabled, preventing the guest from
attesting itself.

Realistically speaking, a well-behaved guest should not even care about
throttling. During its lifetime, it would end up issuing a handful of
requests which the hardware can easily handle.

This is more to address the case of a malicious guest. Such guest should
get throttled and if its VMPCK gets disabled, then that's its own
wrongdoing and perhaps that guest even deserves it.

To the implementation: the hypervisor signals with SNP_GUEST_REQ_ERR_BUSY
that the guest requests should be throttled. That error code is returned
in the upper 32-bit half of exitinfo2 and this is part of the GHCB spec
v2.

So the guest is given a throttling period of 1 minute in which it
retries the request every 2 seconds. This is a good default but if it
turns out to not pan out in practice, it can be tweaked later.

For safety, since the encryption algorithm in GHCBv2 is AES_GCM, control
must remain in the kernel to complete the request with the current
sequence number. Returning without finishing the request allows the
guest to make another request but with different message contents. This
is IV reuse, and breaks cryptographic protections.

  [ bp:
    - Rewrite commit message and do a simplified version.
    - The stable tags are supposed to denote that a cleanup should go
      upfront before backporting this so that any future fixes to this
      can preserve the sanity of the backporter(s). ]

Fixes: d5af44dde546 ("x86/sev: Provide support for SNP guest request NAEs")
Signed-off-by: Dionna Glaze <dionnaglaze@google.com>
Co-developed-by: Borislav Petkov (AMD) <bp@alien8.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
Cc: <stable@kernel.org> # d6fd48eff750 ("virt/coco/sev-guest: Check SEV_SNP attribute at probe time")
Cc: <stable@kernel.org> # 970ab823743f (" virt/coco/sev-guest: Simplify extended guest request handling")
Cc: <stable@kernel.org> # c5a338274bdb ("virt/coco/sev-guest: Remove the disable_vmpck label in handle_guest_request()")
Cc: <stable@kernel.org> # 0fdb6cc7c89c ("virt/coco/sev-guest: Carve out the request issuing logic into a helper")
Cc: <stable@kernel.org> # d25bae7dc7b0 ("virt/coco/sev-guest: Do some code style cleanups")
Cc: <stable@kernel.org> # fa4ae42cc60a ("virt/coco/sev-guest: Convert the sw_exit_info_2 checking to a switch-case")
Link: https://lore.kernel.org/r/20230214164638.1189804-2-dionnaglaze@google.com
---
 arch/x86/include/asm/sev-common.h       |  3 ++-
 arch/x86/kernel/sev.c                   |  4 ++++
 drivers/virt/coco/sev-guest/sev-guest.c | 19 ++++++++++++++++++-
 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h
index b8357d6ecd47e..b63be696b776a 100644
--- a/arch/x86/include/asm/sev-common.h
+++ b/arch/x86/include/asm/sev-common.h
@@ -128,8 +128,9 @@ struct snp_psc_desc {
 	struct psc_entry entries[VMGEXIT_PSC_MAX_ENTRY];
 } __packed;
 
-/* Guest message request error code */
+/* Guest message request error codes */
 #define SNP_GUEST_REQ_INVALID_LEN	BIT_ULL(32)
+#define SNP_GUEST_REQ_ERR_BUSY		BIT_ULL(33)
 
 #define GHCB_MSR_TERM_REQ		0x100
 #define GHCB_MSR_TERM_REASON_SET_POS	12
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c
index d67884fb38c1c..3f664ab277c49 100644
--- a/arch/x86/kernel/sev.c
+++ b/arch/x86/kernel/sev.c
@@ -2214,6 +2214,10 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, unsigned
 	case 0:
 		break;
 
+	case SNP_GUEST_REQ_ERR_BUSY:
+		ret = -EAGAIN;
+		break;
+
 	case SNP_GUEST_REQ_INVALID_LEN:
 		/* Number of expected pages are returned in RBX */
 		if (exit_code == SVM_VMGEXIT_EXT_GUEST_REQUEST) {
diff --git a/drivers/virt/coco/sev-guest/sev-guest.c b/drivers/virt/coco/sev-guest/sev-guest.c
index 81a53c31ff46e..46f1a8d558b0b 100644
--- a/drivers/virt/coco/sev-guest/sev-guest.c
+++ b/drivers/virt/coco/sev-guest/sev-guest.c
@@ -31,6 +31,9 @@
 #define AAD_LEN		48
 #define MSG_HDR_VER	1
 
+#define SNP_REQ_MAX_RETRY_DURATION	(60*HZ)
+#define SNP_REQ_RETRY_DELAY		(2*HZ)
+
 struct snp_guest_crypto {
 	struct crypto_aead *tfm;
 	u8 *iv, *authtag;
@@ -320,7 +323,8 @@ static int enc_payload(struct snp_guest_dev *snp_dev, u64 seqno, int version, u8
 
 static int __handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code, __u64 *fw_err)
 {
-	unsigned long err, override_err = 0;
+	unsigned long err = 0xff, override_err = 0;
+	unsigned long req_start = jiffies;
 	unsigned int override_npages = 0;
 	int rc;
 
@@ -360,6 +364,19 @@ static int __handle_guest_request(struct snp_guest_dev *snp_dev, u64 exit_code,
 		 * user as an ioctl() return code.
 		 */
 		goto retry_request;
+
+	/*
+	 * The host may return SNP_GUEST_REQ_ERR_EBUSY if the request has been
+	 * throttled. Retry in the driver to avoid returning and reusing the
+	 * message sequence number on a different message.
+	 */
+	case -EAGAIN:
+		if (jiffies - req_start > SNP_REQ_MAX_RETRY_DURATION) {
+			rc = -ETIMEDOUT;
+			break;
+		}
+		schedule_timeout_killable(SNP_REQ_RETRY_DELAY);
+		goto retry_request;
 	}
 
 	/*

From 3e453522593d74a87cf68a38e14aa36ebca1dbcd Mon Sep 17 00:00:00 2001
From: Xiao Ni <xni@redhat.com>
Date: Wed, 22 Feb 2023 11:59:16 +0800
Subject: [PATCH 188/411] md: Free resources in __md_stop

If md_run() fails after ->active_io is initialized, then percpu_ref_exit
is called in error path. However, later md_free_disk will call
percpu_ref_exit again which leads to a panic because of null pointer
dereference. It can also trigger this bug when resources are initialized
but are freed in error path, then will be freed again in md_free_disk.

BUG: kernel NULL pointer dereference, address: 0000000000000038
Oops: 0000 [#1] PREEMPT SMP
Workqueue: md_misc mddev_delayed_delete
RIP: 0010:free_percpu+0x110/0x630
Call Trace:
 <TASK>
 __percpu_ref_exit+0x44/0x70
 percpu_ref_exit+0x16/0x90
 md_free_disk+0x2f/0x80
 disk_release+0x101/0x180
 device_release+0x84/0x110
 kobject_put+0x12a/0x380
 kobject_put+0x160/0x380
 mddev_delayed_delete+0x19/0x30
 process_one_work+0x269/0x680
 worker_thread+0x266/0x640
 kthread+0x151/0x1b0
 ret_from_fork+0x1f/0x30

For creating raid device, md raid calls do_md_run->md_run, dm raid calls
md_run. We alloc those memory in md_run. For stopping raid device, md raid
calls do_md_stop->__md_stop, dm raid calls md_stop->__md_stop. So we can
free those memory resources in __md_stop.

Fixes: 72adae23a72c ("md: Change active_io to percpu")
Reported-and-tested-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Xiao Ni <xni@redhat.com>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/md.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 927a43db5dfbb..f5480778e2f7c 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6256,6 +6256,11 @@ static void __md_stop(struct mddev *mddev)
 		mddev->to_remove = &md_redundancy_group;
 	module_put(pers->owner);
 	clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+
+	percpu_ref_exit(&mddev->writes_pending);
+	percpu_ref_exit(&mddev->active_io);
+	bioset_exit(&mddev->bio_set);
+	bioset_exit(&mddev->sync_set);
 }
 
 void md_stop(struct mddev *mddev)
@@ -6265,10 +6270,6 @@ void md_stop(struct mddev *mddev)
 	 */
 	__md_stop_writes(mddev);
 	__md_stop(mddev);
-	percpu_ref_exit(&mddev->writes_pending);
-	percpu_ref_exit(&mddev->active_io);
-	bioset_exit(&mddev->bio_set);
-	bioset_exit(&mddev->sync_set);
 }
 
 EXPORT_SYMBOL_GPL(md_stop);
@@ -7839,11 +7840,6 @@ static void md_free_disk(struct gendisk *disk)
 {
 	struct mddev *mddev = disk->private_data;
 
-	percpu_ref_exit(&mddev->writes_pending);
-	percpu_ref_exit(&mddev->active_io);
-	bioset_exit(&mddev->bio_set);
-	bioset_exit(&mddev->sync_set);
-
 	mddev_free(mddev);
 }
 

From ee06a3ef7e3cddb62b90ac40aa661d3c12f7cabc Mon Sep 17 00:00:00 2001
From: Jurica Vukadin <jura@vukad.in>
Date: Tue, 7 Mar 2023 20:40:39 +0100
Subject: [PATCH 189/411] kconfig: Update config changed flag before calling
 callback

Prior to commit 5ee546594025 ("kconfig: change sym_change_count to a
boolean flag"), the conf_updated flag was set to the new value *before*
calling the callback. xconfig's save action depends on this behaviour,
because xconfig calls conf_get_changed() directly from the callback and
now sees the old value, thus never enabling the save button or the
shortcut.

Restore the previous behaviour.

Fixes: 5ee546594025 ("kconfig: change sym_change_count to a boolean flag")
Signed-off-by: Jurica Vukadin <jura@vukad.in>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/kconfig/confdata.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/scripts/kconfig/confdata.c b/scripts/kconfig/confdata.c
index b7c9f1dd5e422..992575f1e9769 100644
--- a/scripts/kconfig/confdata.c
+++ b/scripts/kconfig/confdata.c
@@ -1226,10 +1226,12 @@ static void (*conf_changed_callback)(void);
 
 void conf_set_changed(bool val)
 {
-	if (conf_changed_callback && conf_changed != val)
-		conf_changed_callback();
+	bool changed = conf_changed != val;
 
 	conf_changed = val;
+
+	if (conf_changed_callback && changed)
+		conf_changed_callback();
 }
 
 bool conf_get_changed(void)

From 90ae93d8affc1061cd87ca8ddd9a838c7d31a158 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 6 Mar 2023 08:56:36 +0100
Subject: [PATCH 190/411] interconnect: qcom: rpm: fix registration race

The current interconnect provider registration interface is inherently
racy as nodes are not added until the after adding the provider. This
can specifically cause racing DT lookups to fail.

Switch to using the new API where the provider is not registered until
after it has been fully initialised.

Fixes: 62feb14ee8a3 ("interconnect: qcom: Consolidate interconnect RPM support")
Fixes: 30c8fa3ec61a ("interconnect: qcom: Add MSM8916 interconnect provider driver")
Cc: stable@vger.kernel.org	# 5.7
Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Reviewed-by: Jun Nie <jun.nie@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20230306075651.2449-9-johan+linaro@kernel.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/qcom/icc-rpm.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/interconnect/qcom/icc-rpm.c b/drivers/interconnect/qcom/icc-rpm.c
index 91778cfcbc65d..4180a06681b2b 100644
--- a/drivers/interconnect/qcom/icc-rpm.c
+++ b/drivers/interconnect/qcom/icc-rpm.c
@@ -503,7 +503,6 @@ int qnoc_probe(struct platform_device *pdev)
 	}
 
 	provider = &qp->provider;
-	INIT_LIST_HEAD(&provider->nodes);
 	provider->dev = dev;
 	provider->set = qcom_icc_set;
 	provider->pre_aggregate = qcom_icc_pre_bw_aggregate;
@@ -511,12 +510,7 @@ int qnoc_probe(struct platform_device *pdev)
 	provider->xlate_extended = qcom_icc_xlate_extended;
 	provider->data = data;
 
-	ret = icc_provider_add(provider);
-	if (ret) {
-		dev_err(dev, "error adding interconnect provider: %d\n", ret);
-		clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks);
-		return ret;
-	}
+	icc_provider_init(provider);
 
 	for (i = 0; i < num_nodes; i++) {
 		size_t j;
@@ -524,7 +518,7 @@ int qnoc_probe(struct platform_device *pdev)
 		node = icc_node_create(qnodes[i]->id);
 		if (IS_ERR(node)) {
 			ret = PTR_ERR(node);
-			goto err;
+			goto err_remove_nodes;
 		}
 
 		node->name = qnodes[i]->name;
@@ -538,20 +532,26 @@ int qnoc_probe(struct platform_device *pdev)
 	}
 	data->num_nodes = num_nodes;
 
+	ret = icc_provider_register(provider);
+	if (ret)
+		goto err_remove_nodes;
+
 	platform_set_drvdata(pdev, qp);
 
 	/* Populate child NoC devices if any */
 	if (of_get_child_count(dev->of_node) > 0) {
 		ret = of_platform_populate(dev->of_node, NULL, NULL, dev);
 		if (ret)
-			goto err;
+			goto err_deregister_provider;
 	}
 
 	return 0;
-err:
+
+err_deregister_provider:
+	icc_provider_deregister(provider);
+err_remove_nodes:
 	icc_nodes_remove(provider);
 	clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks);
-	icc_provider_del(provider);
 
 	return ret;
 }
@@ -561,9 +561,9 @@ int qnoc_remove(struct platform_device *pdev)
 {
 	struct qcom_icc_provider *qp = platform_get_drvdata(pdev);
 
+	icc_provider_deregister(&qp->provider);
 	icc_nodes_remove(&qp->provider);
 	clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks);
-	icc_provider_del(&qp->provider);
 
 	return 0;
 }

From 6570d1d46eeade82965ccc4a3ab7d778898ef4bf Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 6 Mar 2023 08:56:37 +0100
Subject: [PATCH 191/411] interconnect: qcom: rpmh: fix probe child-node error
 handling

Make sure to clean up and release resources properly also in case probe
fails when populating child devices.

Fixes: 57eb14779dfd ("interconnect: qcom: icc-rpmh: Support child NoC device probe")
Cc: stable@vger.kernel.org      # 6.0
Cc: Luca Weiss <luca.weiss@fairphone.com>
Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20230306075651.2449-10-johan+linaro@kernel.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/qcom/icc-rpmh.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/interconnect/qcom/icc-rpmh.c b/drivers/interconnect/qcom/icc-rpmh.c
index fd17291c61eb9..5168bbf3d92fc 100644
--- a/drivers/interconnect/qcom/icc-rpmh.c
+++ b/drivers/interconnect/qcom/icc-rpmh.c
@@ -235,8 +235,11 @@ int qcom_icc_rpmh_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, qp);
 
 	/* Populate child NoC devices if any */
-	if (of_get_child_count(dev->of_node) > 0)
-		return of_platform_populate(dev->of_node, NULL, NULL, dev);
+	if (of_get_child_count(dev->of_node) > 0) {
+		ret = of_platform_populate(dev->of_node, NULL, NULL, dev);
+		if (ret)
+			goto err;
+	}
 
 	return 0;
 err:

From 74240a5bebd48d8b843c6d0f1acfaa722a5abeb7 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 6 Mar 2023 08:56:38 +0100
Subject: [PATCH 192/411] interconnect: qcom: rpmh: fix registration race

The current interconnect provider registration interface is inherently
racy as nodes are not added until the after adding the provider. This
can specifically cause racing DT lookups to fail.

Switch to using the new API where the provider is not registered until
after it has been fully initialised.

Fixes: 976daac4a1c5 ("interconnect: qcom: Consolidate interconnect RPMh support")
Cc: stable@vger.kernel.org      # 5.7
Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20230306075651.2449-11-johan+linaro@kernel.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/qcom/icc-rpmh.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/interconnect/qcom/icc-rpmh.c b/drivers/interconnect/qcom/icc-rpmh.c
index 5168bbf3d92fc..fdb5e58e408b4 100644
--- a/drivers/interconnect/qcom/icc-rpmh.c
+++ b/drivers/interconnect/qcom/icc-rpmh.c
@@ -192,9 +192,10 @@ int qcom_icc_rpmh_probe(struct platform_device *pdev)
 	provider->pre_aggregate = qcom_icc_pre_aggregate;
 	provider->aggregate = qcom_icc_aggregate;
 	provider->xlate_extended = qcom_icc_xlate_extended;
-	INIT_LIST_HEAD(&provider->nodes);
 	provider->data = data;
 
+	icc_provider_init(provider);
+
 	qp->dev = dev;
 	qp->bcms = desc->bcms;
 	qp->num_bcms = desc->num_bcms;
@@ -203,10 +204,6 @@ int qcom_icc_rpmh_probe(struct platform_device *pdev)
 	if (IS_ERR(qp->voter))
 		return PTR_ERR(qp->voter);
 
-	ret = icc_provider_add(provider);
-	if (ret)
-		return ret;
-
 	for (i = 0; i < qp->num_bcms; i++)
 		qcom_icc_bcm_init(qp->bcms[i], dev);
 
@@ -218,7 +215,7 @@ int qcom_icc_rpmh_probe(struct platform_device *pdev)
 		node = icc_node_create(qn->id);
 		if (IS_ERR(node)) {
 			ret = PTR_ERR(node);
-			goto err;
+			goto err_remove_nodes;
 		}
 
 		node->name = qn->name;
@@ -232,19 +229,27 @@ int qcom_icc_rpmh_probe(struct platform_device *pdev)
 	}
 
 	data->num_nodes = num_nodes;
+
+	ret = icc_provider_register(provider);
+	if (ret)
+		goto err_remove_nodes;
+
 	platform_set_drvdata(pdev, qp);
 
 	/* Populate child NoC devices if any */
 	if (of_get_child_count(dev->of_node) > 0) {
 		ret = of_platform_populate(dev->of_node, NULL, NULL, dev);
 		if (ret)
-			goto err;
+			goto err_deregister_provider;
 	}
 
 	return 0;
-err:
+
+err_deregister_provider:
+	icc_provider_deregister(provider);
+err_remove_nodes:
 	icc_nodes_remove(provider);
-	icc_provider_del(provider);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(qcom_icc_rpmh_probe);
@@ -253,8 +258,8 @@ int qcom_icc_rpmh_remove(struct platform_device *pdev)
 {
 	struct qcom_icc_provider *qp = platform_get_drvdata(pdev);
 
+	icc_provider_deregister(&qp->provider);
 	icc_nodes_remove(&qp->provider);
-	icc_provider_del(&qp->provider);
 
 	return 0;
 }

From bfe7bcd2b9f5215de2144f097f39971180e7ea54 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 6 Mar 2023 08:56:39 +0100
Subject: [PATCH 193/411] interconnect: qcom: msm8974: fix registration race

The current interconnect provider registration interface is inherently
racy as nodes are not added until the after adding the provider. This
can specifically cause racing DT lookups to fail.

Switch to using the new API where the provider is not registered until
after it has been fully initialised.

Fixes: 4e60a9568dc6 ("interconnect: qcom: add msm8974 driver")
Cc: stable@vger.kernel.org      # 5.5
Reviewed-by: Brian Masney <bmasney@redhat.com>
Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20230306075651.2449-12-johan+linaro@kernel.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/qcom/msm8974.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/drivers/interconnect/qcom/msm8974.c b/drivers/interconnect/qcom/msm8974.c
index 5ea192f1141dc..1828deaca4432 100644
--- a/drivers/interconnect/qcom/msm8974.c
+++ b/drivers/interconnect/qcom/msm8974.c
@@ -692,7 +692,6 @@ static int msm8974_icc_probe(struct platform_device *pdev)
 		return ret;
 
 	provider = &qp->provider;
-	INIT_LIST_HEAD(&provider->nodes);
 	provider->dev = dev;
 	provider->set = msm8974_icc_set;
 	provider->aggregate = icc_std_aggregate;
@@ -700,11 +699,7 @@ static int msm8974_icc_probe(struct platform_device *pdev)
 	provider->data = data;
 	provider->get_bw = msm8974_get_bw;
 
-	ret = icc_provider_add(provider);
-	if (ret) {
-		dev_err(dev, "error adding interconnect provider: %d\n", ret);
-		goto err_disable_clks;
-	}
+	icc_provider_init(provider);
 
 	for (i = 0; i < num_nodes; i++) {
 		size_t j;
@@ -712,7 +707,7 @@ static int msm8974_icc_probe(struct platform_device *pdev)
 		node = icc_node_create(qnodes[i]->id);
 		if (IS_ERR(node)) {
 			ret = PTR_ERR(node);
-			goto err_del_icc;
+			goto err_remove_nodes;
 		}
 
 		node->name = qnodes[i]->name;
@@ -729,15 +724,16 @@ static int msm8974_icc_probe(struct platform_device *pdev)
 	}
 	data->num_nodes = num_nodes;
 
+	ret = icc_provider_register(provider);
+	if (ret)
+		goto err_remove_nodes;
+
 	platform_set_drvdata(pdev, qp);
 
 	return 0;
 
-err_del_icc:
+err_remove_nodes:
 	icc_nodes_remove(provider);
-	icc_provider_del(provider);
-
-err_disable_clks:
 	clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks);
 
 	return ret;
@@ -747,9 +743,9 @@ static int msm8974_icc_remove(struct platform_device *pdev)
 {
 	struct msm8974_icc_provider *qp = platform_get_drvdata(pdev);
 
+	icc_provider_deregister(&qp->provider);
 	icc_nodes_remove(&qp->provider);
 	clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks);
-	icc_provider_del(&qp->provider);
 
 	return 0;
 }

From 3aab264875bf3c915ea2517fae1eec213e0b4987 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 6 Mar 2023 08:56:42 +0100
Subject: [PATCH 194/411] interconnect: exynos: fix node leak in probe PM QoS
 error path

Make sure to add the newly allocated interconnect node to the provider
before adding the PM QoS request so that the node is freed on errors.

Fixes: 2f95b9d5cf0b ("interconnect: Add generic interconnect driver for Exynos SoCs")
Cc: stable@vger.kernel.org      # 5.11
Cc: Sylwester Nawrocki <s.nawrocki@samsung.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20230306075651.2449-15-johan+linaro@kernel.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/samsung/exynos.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/interconnect/samsung/exynos.c b/drivers/interconnect/samsung/exynos.c
index 6559d8cf80687..e706658994821 100644
--- a/drivers/interconnect/samsung/exynos.c
+++ b/drivers/interconnect/samsung/exynos.c
@@ -149,6 +149,9 @@ static int exynos_generic_icc_probe(struct platform_device *pdev)
 				 &priv->bus_clk_ratio))
 		priv->bus_clk_ratio = EXYNOS_ICC_DEFAULT_BUS_CLK_RATIO;
 
+	icc_node->data = priv;
+	icc_node_add(icc_node, provider);
+
 	/*
 	 * Register a PM QoS request for the parent (devfreq) device.
 	 */
@@ -157,9 +160,6 @@ static int exynos_generic_icc_probe(struct platform_device *pdev)
 	if (ret < 0)
 		goto err_node_del;
 
-	icc_node->data = priv;
-	icc_node_add(icc_node, provider);
-
 	icc_parent_node = exynos_icc_get_parent(bus_dev->of_node);
 	if (IS_ERR(icc_parent_node)) {
 		ret = PTR_ERR(icc_parent_node);

From c9e46ca612cfbb0cf890f7ae7389b742e90efe64 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 6 Mar 2023 08:56:43 +0100
Subject: [PATCH 195/411] interconnect: exynos: fix registration race

The current interconnect provider registration interface is inherently
racy as nodes are not added until the after adding the provider. This
can specifically cause racing DT lookups to trigger a NULL-pointer
deference when either a NULL pointer or not fully initialised node is
returned from exynos_generic_icc_xlate().

Switch to using the new API where the provider is not registered until
after it has been fully initialised.

Fixes: 2f95b9d5cf0b ("interconnect: Add generic interconnect driver for Exynos SoCs")
Cc: stable@vger.kernel.org      # 5.11
Cc: Sylwester Nawrocki <s.nawrocki@samsung.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20230306075651.2449-16-johan+linaro@kernel.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/samsung/exynos.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/interconnect/samsung/exynos.c b/drivers/interconnect/samsung/exynos.c
index e706658994821..72e42603823b9 100644
--- a/drivers/interconnect/samsung/exynos.c
+++ b/drivers/interconnect/samsung/exynos.c
@@ -98,12 +98,13 @@ static int exynos_generic_icc_remove(struct platform_device *pdev)
 	struct exynos_icc_priv *priv = platform_get_drvdata(pdev);
 	struct icc_node *parent_node, *node = priv->node;
 
+	icc_provider_deregister(&priv->provider);
+
 	parent_node = exynos_icc_get_parent(priv->dev->parent->of_node);
 	if (parent_node && !IS_ERR(parent_node))
 		icc_link_destroy(node, parent_node);
 
 	icc_nodes_remove(&priv->provider);
-	icc_provider_del(&priv->provider);
 
 	return 0;
 }
@@ -132,15 +133,11 @@ static int exynos_generic_icc_probe(struct platform_device *pdev)
 	provider->inter_set = true;
 	provider->data = priv;
 
-	ret = icc_provider_add(provider);
-	if (ret < 0)
-		return ret;
+	icc_provider_init(provider);
 
 	icc_node = icc_node_create(pdev->id);
-	if (IS_ERR(icc_node)) {
-		ret = PTR_ERR(icc_node);
-		goto err_prov_del;
-	}
+	if (IS_ERR(icc_node))
+		return PTR_ERR(icc_node);
 
 	priv->node = icc_node;
 	icc_node->name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "%pOFn",
@@ -171,14 +168,17 @@ static int exynos_generic_icc_probe(struct platform_device *pdev)
 			goto err_pmqos_del;
 	}
 
+	ret = icc_provider_register(provider);
+	if (ret < 0)
+		goto err_pmqos_del;
+
 	return 0;
 
 err_pmqos_del:
 	dev_pm_qos_remove_request(&priv->qos_req);
 err_node_del:
 	icc_nodes_remove(provider);
-err_prov_del:
-	icc_provider_del(provider);
+
 	return ret;
 }
 

From 859ad5f177efa59f6b8a2fac20561ca5cb13c89f Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 6 Mar 2023 08:56:44 +0100
Subject: [PATCH 196/411] interconnect: exynos: drop redundant link destroy

There is no longer any need to explicitly destroy node links as this is
now done when the node is destroyed as part of icc_nodes_remove().

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20230306075651.2449-17-johan+linaro@kernel.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/samsung/exynos.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/interconnect/samsung/exynos.c b/drivers/interconnect/samsung/exynos.c
index 72e42603823b9..ebf09bbf725bd 100644
--- a/drivers/interconnect/samsung/exynos.c
+++ b/drivers/interconnect/samsung/exynos.c
@@ -96,14 +96,8 @@ static struct icc_node *exynos_generic_icc_xlate(struct of_phandle_args *spec,
 static int exynos_generic_icc_remove(struct platform_device *pdev)
 {
 	struct exynos_icc_priv *priv = platform_get_drvdata(pdev);
-	struct icc_node *parent_node, *node = priv->node;
 
 	icc_provider_deregister(&priv->provider);
-
-	parent_node = exynos_icc_get_parent(priv->dev->parent->of_node);
-	if (parent_node && !IS_ERR(parent_node))
-		icc_link_destroy(node, parent_node);
-
 	icc_nodes_remove(&priv->provider);
 
 	return 0;

From 5553055c62683ce339f9ef5fb2a26c8331485d68 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 6 Mar 2023 08:56:45 +0100
Subject: [PATCH 197/411] memory: tegra: fix interconnect registration race

The current interconnect provider registration interface is inherently
racy as nodes are not added until the after adding the provider. This
can specifically cause racing DT lookups to fail.

Switch to using the new API where the provider is not registered until
after it has been fully initialised.

Fixes: 06f079816d4c ("memory: tegra-mc: Add interconnect framework")
Cc: stable@vger.kernel.org      # 5.11
Cc: Dmitry Osipenko <digetx@gmail.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20230306075651.2449-18-johan+linaro@kernel.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/memory/tegra/mc.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c
index 592907546ee64..5cd28619ea9fb 100644
--- a/drivers/memory/tegra/mc.c
+++ b/drivers/memory/tegra/mc.c
@@ -794,16 +794,12 @@ static int tegra_mc_interconnect_setup(struct tegra_mc *mc)
 	mc->provider.aggregate = mc->soc->icc_ops->aggregate;
 	mc->provider.xlate_extended = mc->soc->icc_ops->xlate_extended;
 
-	err = icc_provider_add(&mc->provider);
-	if (err)
-		return err;
+	icc_provider_init(&mc->provider);
 
 	/* create Memory Controller node */
 	node = icc_node_create(TEGRA_ICC_MC);
-	if (IS_ERR(node)) {
-		err = PTR_ERR(node);
-		goto del_provider;
-	}
+	if (IS_ERR(node))
+		return PTR_ERR(node);
 
 	node->name = "Memory Controller";
 	icc_node_add(node, &mc->provider);
@@ -830,12 +826,14 @@ static int tegra_mc_interconnect_setup(struct tegra_mc *mc)
 			goto remove_nodes;
 	}
 
+	err = icc_provider_register(&mc->provider);
+	if (err)
+		goto remove_nodes;
+
 	return 0;
 
 remove_nodes:
 	icc_nodes_remove(&mc->provider);
-del_provider:
-	icc_provider_del(&mc->provider);
 
 	return err;
 }

From abd9f1b49cf25eebeaba193c7707355be3f48dae Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 6 Mar 2023 08:56:46 +0100
Subject: [PATCH 198/411] memory: tegra124-emc: fix interconnect registration
 race

The current interconnect provider registration interface is inherently
racy as nodes are not added until the after adding the provider. This
can specifically cause racing DT lookups to fail.

Switch to using the new API where the provider is not registered until
after it has been fully initialised.

Fixes: 380def2d4cf2 ("memory: tegra124: Support interconnect framework")
Cc: stable@vger.kernel.org      # 5.12
Cc: Dmitry Osipenko <digetx@gmail.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20230306075651.2449-19-johan+linaro@kernel.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/memory/tegra/tegra124-emc.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/memory/tegra/tegra124-emc.c b/drivers/memory/tegra/tegra124-emc.c
index 85bc936c02f94..00ed2b6a0d1b2 100644
--- a/drivers/memory/tegra/tegra124-emc.c
+++ b/drivers/memory/tegra/tegra124-emc.c
@@ -1351,15 +1351,13 @@ static int tegra_emc_interconnect_init(struct tegra_emc *emc)
 	emc->provider.aggregate = soc->icc_ops->aggregate;
 	emc->provider.xlate_extended = emc_of_icc_xlate_extended;
 
-	err = icc_provider_add(&emc->provider);
-	if (err)
-		goto err_msg;
+	icc_provider_init(&emc->provider);
 
 	/* create External Memory Controller node */
 	node = icc_node_create(TEGRA_ICC_EMC);
 	if (IS_ERR(node)) {
 		err = PTR_ERR(node);
-		goto del_provider;
+		goto err_msg;
 	}
 
 	node->name = "External Memory Controller";
@@ -1380,12 +1378,14 @@ static int tegra_emc_interconnect_init(struct tegra_emc *emc)
 	node->name = "External Memory (DRAM)";
 	icc_node_add(node, &emc->provider);
 
+	err = icc_provider_register(&emc->provider);
+	if (err)
+		goto remove_nodes;
+
 	return 0;
 
 remove_nodes:
 	icc_nodes_remove(&emc->provider);
-del_provider:
-	icc_provider_del(&emc->provider);
 err_msg:
 	dev_err(emc->dev, "failed to initialize ICC: %d\n", err);
 

From c5587f61ec050f7e9ebb3e2da29d12af63e833d3 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 6 Mar 2023 08:56:47 +0100
Subject: [PATCH 199/411] memory: tegra20-emc: fix interconnect registration
 race

The current interconnect provider registration interface is inherently
racy as nodes are not added until the after adding the provider. This
can specifically cause racing DT lookups to fail.

Switch to using the new API where the provider is not registered until
after it has been fully initialised.

Fixes: d5ef16ba5fbe ("memory: tegra20: Support interconnect framework")
Cc: stable@vger.kernel.org      # 5.11
Cc: Dmitry Osipenko <digetx@gmail.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20230306075651.2449-20-johan+linaro@kernel.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/memory/tegra/tegra20-emc.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/memory/tegra/tegra20-emc.c b/drivers/memory/tegra/tegra20-emc.c
index bd4e37b6552de..fd595c851a278 100644
--- a/drivers/memory/tegra/tegra20-emc.c
+++ b/drivers/memory/tegra/tegra20-emc.c
@@ -1021,15 +1021,13 @@ static int tegra_emc_interconnect_init(struct tegra_emc *emc)
 	emc->provider.aggregate = soc->icc_ops->aggregate;
 	emc->provider.xlate_extended = emc_of_icc_xlate_extended;
 
-	err = icc_provider_add(&emc->provider);
-	if (err)
-		goto err_msg;
+	icc_provider_init(&emc->provider);
 
 	/* create External Memory Controller node */
 	node = icc_node_create(TEGRA_ICC_EMC);
 	if (IS_ERR(node)) {
 		err = PTR_ERR(node);
-		goto del_provider;
+		goto err_msg;
 	}
 
 	node->name = "External Memory Controller";
@@ -1050,12 +1048,14 @@ static int tegra_emc_interconnect_init(struct tegra_emc *emc)
 	node->name = "External Memory (DRAM)";
 	icc_node_add(node, &emc->provider);
 
+	err = icc_provider_register(&emc->provider);
+	if (err)
+		goto remove_nodes;
+
 	return 0;
 
 remove_nodes:
 	icc_nodes_remove(&emc->provider);
-del_provider:
-	icc_provider_del(&emc->provider);
 err_msg:
 	dev_err(emc->dev, "failed to initialize ICC: %d\n", err);
 

From 9db481c909dd6312ccfbdc7e343b50e41c727483 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 6 Mar 2023 08:56:48 +0100
Subject: [PATCH 200/411] memory: tegra30-emc: fix interconnect registration
 race

The current interconnect provider registration interface is inherently
racy as nodes are not added until the after adding the provider. This
can specifically cause racing DT lookups to fail.

Switch to using the new API where the provider is not registered until
after it has been fully initialised.

Fixes: d5ef16ba5fbe ("memory: tegra20: Support interconnect framework")
Cc: stable@vger.kernel.org      # 5.11
Cc: Dmitry Osipenko <digetx@gmail.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20230306075651.2449-21-johan+linaro@kernel.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/memory/tegra/tegra30-emc.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/memory/tegra/tegra30-emc.c b/drivers/memory/tegra/tegra30-emc.c
index 77706e9bc5433..c91e9b7e2e019 100644
--- a/drivers/memory/tegra/tegra30-emc.c
+++ b/drivers/memory/tegra/tegra30-emc.c
@@ -1533,15 +1533,13 @@ static int tegra_emc_interconnect_init(struct tegra_emc *emc)
 	emc->provider.aggregate = soc->icc_ops->aggregate;
 	emc->provider.xlate_extended = emc_of_icc_xlate_extended;
 
-	err = icc_provider_add(&emc->provider);
-	if (err)
-		goto err_msg;
+	icc_provider_init(&emc->provider);
 
 	/* create External Memory Controller node */
 	node = icc_node_create(TEGRA_ICC_EMC);
 	if (IS_ERR(node)) {
 		err = PTR_ERR(node);
-		goto del_provider;
+		goto err_msg;
 	}
 
 	node->name = "External Memory Controller";
@@ -1562,12 +1560,14 @@ static int tegra_emc_interconnect_init(struct tegra_emc *emc)
 	node->name = "External Memory (DRAM)";
 	icc_node_add(node, &emc->provider);
 
+	err = icc_provider_register(&emc->provider);
+	if (err)
+		goto remove_nodes;
+
 	return 0;
 
 remove_nodes:
 	icc_nodes_remove(&emc->provider);
-del_provider:
-	icc_provider_del(&emc->provider);
 err_msg:
 	dev_err(emc->dev, "failed to initialize ICC: %d\n", err);
 

From 3bc57292278a0b6ac4656cad94c14f2453344b57 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 6 Mar 2023 09:36:25 +1100
Subject: [PATCH 201/411] md: avoid signed overflow in slot_store()

slot_store() uses kstrtouint() to get a slot number, but stores the
result in an "int" variable (by casting a pointer).
This can result in a negative slot number if the unsigned int value is
very large.

A negative number means that the slot is empty, but setting a negative
slot number this way will not remove the device from the array.  I don't
think this is a serious problem, but it could cause confusion and it is
best to fix it.

Reported-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/md.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index f5480778e2f7c..39e49e5d71823 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3128,6 +3128,9 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
 		err = kstrtouint(buf, 10, (unsigned int *)&slot);
 		if (err < 0)
 			return err;
+		if (slot < 0)
+			/* overflow */
+			return -ENOSPC;
 	}
 	if (rdev->mddev->pers && slot == -1) {
 		/* Setting 'slot' on an active array requires also

From 131db499162274858bdbd7b5323a639da4aab86c Mon Sep 17 00:00:00 2001
From: Vadim Fedorenko <vadfed@meta.com>
Date: Fri, 10 Mar 2023 07:13:56 -0800
Subject: [PATCH 202/411] bnxt_en: reset PHC frequency in free-running mode

When using a PHC in shared between multiple hosts, the previous
frequency value may not be reset and could lead to host being unable to
compensate the offset with timecounter adjustments. To avoid such state
reset the hardware frequency of PHC to zero on init. Some refactoring is
needed to make code readable.

Fixes: 85036aee1938 ("bnxt_en: Add a non-real time mode to access NIC clock")
Signed-off-by: Vadim Fedorenko <vadfed@meta.com>
Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
Link: https://lore.kernel.org/r/20230310151356.678059-1-vadfed@meta.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     |  6 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt.h     |  2 +
 drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c | 56 ++++++++++---------
 3 files changed, 35 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 808236dc898b8..e2e2c986c82b7 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -6990,11 +6990,9 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
 		if (flags & FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED)
 			bp->fw_cap |= BNXT_FW_CAP_DCBX_AGENT;
 	}
-	if (BNXT_PF(bp) && (flags & FUNC_QCFG_RESP_FLAGS_MULTI_HOST)) {
+	if (BNXT_PF(bp) && (flags & FUNC_QCFG_RESP_FLAGS_MULTI_HOST))
 		bp->flags |= BNXT_FLAG_MULTI_HOST;
-		if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC)
-			bp->fw_cap &= ~BNXT_FW_CAP_PTP_RTC;
-	}
+
 	if (flags & FUNC_QCFG_RESP_FLAGS_RING_MONITOR_ENABLED)
 		bp->fw_cap |= BNXT_FW_CAP_RING_MONITOR;
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index dcb09fbe40078..c0628ac1b798a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -2000,6 +2000,8 @@ struct bnxt {
 	u32			fw_dbg_cap;
 
 #define BNXT_NEW_RM(bp)		((bp)->fw_cap & BNXT_FW_CAP_NEW_RM)
+#define BNXT_PTP_USE_RTC(bp)	(!BNXT_MH(bp) && \
+				 ((bp)->fw_cap & BNXT_FW_CAP_PTP_RTC))
 	u32			hwrm_spec_code;
 	u16			hwrm_cmd_seq;
 	u16                     hwrm_cmd_kong_seq;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
index 4ec8bba18cdd2..a3a3978a4d1c2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c
@@ -63,7 +63,7 @@ static int bnxt_ptp_settime(struct ptp_clock_info *ptp_info,
 						ptp_info);
 	u64 ns = timespec64_to_ns(ts);
 
-	if (ptp->bp->fw_cap & BNXT_FW_CAP_PTP_RTC)
+	if (BNXT_PTP_USE_RTC(ptp->bp))
 		return bnxt_ptp_cfg_settime(ptp->bp, ns);
 
 	spin_lock_bh(&ptp->ptp_lock);
@@ -196,7 +196,7 @@ static int bnxt_ptp_adjtime(struct ptp_clock_info *ptp_info, s64 delta)
 	struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg,
 						ptp_info);
 
-	if (ptp->bp->fw_cap & BNXT_FW_CAP_PTP_RTC)
+	if (BNXT_PTP_USE_RTC(ptp->bp))
 		return bnxt_ptp_adjphc(ptp, delta);
 
 	spin_lock_bh(&ptp->ptp_lock);
@@ -205,34 +205,39 @@ static int bnxt_ptp_adjtime(struct ptp_clock_info *ptp_info, s64 delta)
 	return 0;
 }
 
+static int bnxt_ptp_adjfine_rtc(struct bnxt *bp, long scaled_ppm)
+{
+	s32 ppb = scaled_ppm_to_ppb(scaled_ppm);
+	struct hwrm_port_mac_cfg_input *req;
+	int rc;
+
+	rc = hwrm_req_init(bp, req, HWRM_PORT_MAC_CFG);
+	if (rc)
+		return rc;
+
+	req->ptp_freq_adj_ppb = cpu_to_le32(ppb);
+	req->enables = cpu_to_le32(PORT_MAC_CFG_REQ_ENABLES_PTP_FREQ_ADJ_PPB);
+	rc = hwrm_req_send(bp, req);
+	if (rc)
+		netdev_err(bp->dev,
+			   "ptp adjfine failed. rc = %d\n", rc);
+	return rc;
+}
+
 static int bnxt_ptp_adjfine(struct ptp_clock_info *ptp_info, long scaled_ppm)
 {
 	struct bnxt_ptp_cfg *ptp = container_of(ptp_info, struct bnxt_ptp_cfg,
 						ptp_info);
-	struct hwrm_port_mac_cfg_input *req;
 	struct bnxt *bp = ptp->bp;
-	int rc = 0;
 
-	if (!(ptp->bp->fw_cap & BNXT_FW_CAP_PTP_RTC)) {
-		spin_lock_bh(&ptp->ptp_lock);
-		timecounter_read(&ptp->tc);
-		ptp->cc.mult = adjust_by_scaled_ppm(ptp->cmult, scaled_ppm);
-		spin_unlock_bh(&ptp->ptp_lock);
-	} else {
-		s32 ppb = scaled_ppm_to_ppb(scaled_ppm);
-
-		rc = hwrm_req_init(bp, req, HWRM_PORT_MAC_CFG);
-		if (rc)
-			return rc;
+	if (BNXT_PTP_USE_RTC(bp))
+		return bnxt_ptp_adjfine_rtc(bp, scaled_ppm);
 
-		req->ptp_freq_adj_ppb = cpu_to_le32(ppb);
-		req->enables = cpu_to_le32(PORT_MAC_CFG_REQ_ENABLES_PTP_FREQ_ADJ_PPB);
-		rc = hwrm_req_send(ptp->bp, req);
-		if (rc)
-			netdev_err(ptp->bp->dev,
-				   "ptp adjfine failed. rc = %d\n", rc);
-	}
-	return rc;
+	spin_lock_bh(&ptp->ptp_lock);
+	timecounter_read(&ptp->tc);
+	ptp->cc.mult = adjust_by_scaled_ppm(ptp->cmult, scaled_ppm);
+	spin_unlock_bh(&ptp->ptp_lock);
+	return 0;
 }
 
 void bnxt_ptp_pps_event(struct bnxt *bp, u32 data1, u32 data2)
@@ -879,7 +884,7 @@ int bnxt_ptp_init_rtc(struct bnxt *bp, bool phc_cfg)
 	u64 ns;
 	int rc;
 
-	if (!bp->ptp_cfg || !(bp->fw_cap & BNXT_FW_CAP_PTP_RTC))
+	if (!bp->ptp_cfg || !BNXT_PTP_USE_RTC(bp))
 		return -ENODEV;
 
 	if (!phc_cfg) {
@@ -932,13 +937,14 @@ int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg)
 	atomic_set(&ptp->tx_avail, BNXT_MAX_TX_TS);
 	spin_lock_init(&ptp->ptp_lock);
 
-	if (bp->fw_cap & BNXT_FW_CAP_PTP_RTC) {
+	if (BNXT_PTP_USE_RTC(bp)) {
 		bnxt_ptp_timecounter_init(bp, false);
 		rc = bnxt_ptp_init_rtc(bp, phc_cfg);
 		if (rc)
 			goto out;
 	} else {
 		bnxt_ptp_timecounter_init(bp, true);
+		bnxt_ptp_adjfine_rtc(bp, 0);
 	}
 
 	ptp->ptp_info = bnxt_ptp_caps;

From 22a825c541d775c1dbe7b2402786025acad6727b Mon Sep 17 00:00:00 2001
From: "D. Wythe" <alibuda@linux.alibaba.com>
Date: Wed, 8 Mar 2023 16:17:12 +0800
Subject: [PATCH 203/411] net/smc: fix NULL sndbuf_desc in smc_cdc_tx_handler()

When performing a stress test on SMC-R by rmmod mlx5_ib driver
during the wrk/nginx test, we found that there is a probability
of triggering a panic while terminating all link groups.

This issue dues to the race between smc_smcr_terminate_all()
and smc_buf_create().

			smc_smcr_terminate_all

smc_buf_create
/* init */
conn->sndbuf_desc = NULL;
...

			__smc_lgr_terminate
				smc_conn_kill
					smc_close_abort
						smc_cdc_get_slot_and_msg_send

			__softirqentry_text_start
				smc_wr_tx_process_cqe
					smc_cdc_tx_handler
						READ(conn->sndbuf_desc->len);
						/* panic dues to NULL sndbuf_desc */

conn->sndbuf_desc = xxx;

This patch tries to fix the issue by always to check the sndbuf_desc
before send any cdc msg, to make sure that no null pointer is
seen during cqe processing.

Fixes: 0b29ec643613 ("net/smc: immediate termination for SMCR link groups")
Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
Reviewed-by: Tony Lu <tonylu@linux.alibaba.com>
Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
Link: https://lore.kernel.org/r/1678263432-17329-1-git-send-email-alibuda@linux.alibaba.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/smc/smc_cdc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 53f63bfbaf5f9..89105e95b4523 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -114,6 +114,9 @@ int smc_cdc_msg_send(struct smc_connection *conn,
 	union smc_host_cursor cfed;
 	int rc;
 
+	if (unlikely(!READ_ONCE(conn->sndbuf_desc)))
+		return -ENOBUFS;
+
 	smc_cdc_add_pending_send(conn, pend);
 
 	conn->tx_cdc_seq++;

From 1a9dc5610ef89d807acdcfbff93a558f341a44da Mon Sep 17 00:00:00 2001
From: Daniil Tatianin <d-tatianin@yandex-team.ru>
Date: Thu, 9 Mar 2023 23:15:56 +0300
Subject: [PATCH 204/411] qed/qed_dev: guard against a possible division by
 zero

Previously we would divide total_left_rate by zero if num_vports
happened to be 1 because non_requested_count is calculated as
num_vports - req_count. Guard against this by validating num_vports at
the beginning and returning an error otherwise.

Found by Linux Verification Center (linuxtesting.org) with the SVACE
static analysis tool.

Fixes: bcd197c81f63 ("qed: Add vport WFQ configuration APIs")
Signed-off-by: Daniil Tatianin <d-tatianin@yandex-team.ru>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Link: https://lore.kernel.org/r/20230309201556.191392-1-d-tatianin@yandex-team.ru
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/qlogic/qed/qed_dev.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index d61cd32ec3b65..86a93cac26470 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -5083,6 +5083,11 @@ static int qed_init_wfq_param(struct qed_hwfn *p_hwfn,
 
 	num_vports = p_hwfn->qm_info.num_vports;
 
+	if (num_vports < 2) {
+		DP_NOTICE(p_hwfn, "Unexpected num_vports: %d\n", num_vports);
+		return -EINVAL;
+	}
+
 	/* Accounting for the vports which are configured for WFQ explicitly */
 	for (i = 0; i < num_vports; i++) {
 		u32 tmp_speed;

From feb03fd11c5616f3a47e4714d2f9917d0f1a2edd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= <arinc.unal@arinc9.com>
Date: Fri, 10 Mar 2023 10:33:37 +0300
Subject: [PATCH 205/411] net: dsa: mt7530: remove now incorrect comment
 regarding port 5
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove now incorrect comment regarding port 5 as GMAC5. This is supposed to
be supported since commit 38f790a80560 ("net: dsa: mt7530: Add support for
port 5") under mt7530_setup_port5().

Fixes: 38f790a80560 ("net: dsa: mt7530: Add support for port 5")
Signed-off-by: Arınç ÜNAL <arinc.unal@arinc9.com>
Link: https://lore.kernel.org/r/20230310073338.5836-1-arinc.unal@arinc9.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/mt7530.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index a508402c4ecbf..b1a79460df0e7 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -2201,7 +2201,7 @@ mt7530_setup(struct dsa_switch *ds)
 
 	mt7530_pll_setup(priv);
 
-	/* Enable Port 6 only; P5 as GMAC5 which currently is not supported */
+	/* Enable port 6 */
 	val = mt7530_read(priv, MT7530_MHWTRAP);
 	val &= ~MHWTRAP_P6_DIS & ~MHWTRAP_PHY_ACCESS;
 	val |= MHWTRAP_MANUAL;

From 0b086d76e7b011772b0ac214c6e5fd5816eff2df Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ar=C4=B1n=C3=A7=20=C3=9CNAL?= <arinc.unal@arinc9.com>
Date: Fri, 10 Mar 2023 10:33:38 +0300
Subject: [PATCH 206/411] net: dsa: mt7530: set PLL frequency and trgmii only
 when trgmii is used
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As my testing on the MCM MT7530 switch on MT7621 SoC shows, setting the PLL
frequency does not affect MII modes other than trgmii on port 5 and port 6.
So the assumption is that the operation here called "setting the PLL
frequency" actually sets the frequency of the TRGMII TX clock.

Make it so that it and the rest of the trgmii setup run only when the
trgmii mode is used.

Tested rgmii and trgmii modes of port 6 on MCM MT7530 on MT7621AT Unielec
U7621-06 and standalone MT7530 on MT7623NI Bananapi BPI-R2.

Fixes: b8f126a8d543 ("net-next: dsa: add dsa support for Mediatek MT7530 switch")
Tested-by: Arınç ÜNAL <arinc.unal@arinc9.com>
Signed-off-by: Arınç ÜNAL <arinc.unal@arinc9.com>
Link: https://lore.kernel.org/r/20230310073338.5836-2-arinc.unal@arinc9.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/mt7530.c | 62 ++++++++++++++++++++--------------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index b1a79460df0e7..c2d81b7a429dc 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -430,8 +430,6 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface)
 	switch (interface) {
 	case PHY_INTERFACE_MODE_RGMII:
 		trgint = 0;
-		/* PLL frequency: 125MHz */
-		ncpo1 = 0x0c80;
 		break;
 	case PHY_INTERFACE_MODE_TRGMII:
 		trgint = 1;
@@ -462,38 +460,40 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface)
 	mt7530_rmw(priv, MT7530_P6ECR, P6_INTF_MODE_MASK,
 		   P6_INTF_MODE(trgint));
 
-	/* Lower Tx Driving for TRGMII path */
-	for (i = 0 ; i < NUM_TRGMII_CTRL ; i++)
-		mt7530_write(priv, MT7530_TRGMII_TD_ODT(i),
-			     TD_DM_DRVP(8) | TD_DM_DRVN(8));
-
-	/* Disable MT7530 core and TRGMII Tx clocks */
-	core_clear(priv, CORE_TRGMII_GSW_CLK_CG,
-		   REG_GSWCK_EN | REG_TRGMIICK_EN);
-
-	/* Setup the MT7530 TRGMII Tx Clock */
-	core_write(priv, CORE_PLL_GROUP5, RG_LCDDS_PCW_NCPO1(ncpo1));
-	core_write(priv, CORE_PLL_GROUP6, RG_LCDDS_PCW_NCPO0(0));
-	core_write(priv, CORE_PLL_GROUP10, RG_LCDDS_SSC_DELTA(ssc_delta));
-	core_write(priv, CORE_PLL_GROUP11, RG_LCDDS_SSC_DELTA1(ssc_delta));
-	core_write(priv, CORE_PLL_GROUP4,
-		   RG_SYSPLL_DDSFBK_EN | RG_SYSPLL_BIAS_EN |
-		   RG_SYSPLL_BIAS_LPF_EN);
-	core_write(priv, CORE_PLL_GROUP2,
-		   RG_SYSPLL_EN_NORMAL | RG_SYSPLL_VODEN |
-		   RG_SYSPLL_POSDIV(1));
-	core_write(priv, CORE_PLL_GROUP7,
-		   RG_LCDDS_PCW_NCPO_CHG | RG_LCCDS_C(3) |
-		   RG_LCDDS_PWDB | RG_LCDDS_ISO_EN);
-
-	/* Enable MT7530 core and TRGMII Tx clocks */
-	core_set(priv, CORE_TRGMII_GSW_CLK_CG,
-		 REG_GSWCK_EN | REG_TRGMIICK_EN);
-
-	if (!trgint)
+	if (trgint) {
+		/* Lower Tx Driving for TRGMII path */
+		for (i = 0 ; i < NUM_TRGMII_CTRL ; i++)
+			mt7530_write(priv, MT7530_TRGMII_TD_ODT(i),
+				     TD_DM_DRVP(8) | TD_DM_DRVN(8));
+
+		/* Disable MT7530 core and TRGMII Tx clocks */
+		core_clear(priv, CORE_TRGMII_GSW_CLK_CG,
+			   REG_GSWCK_EN | REG_TRGMIICK_EN);
+
+		/* Setup the MT7530 TRGMII Tx Clock */
+		core_write(priv, CORE_PLL_GROUP5, RG_LCDDS_PCW_NCPO1(ncpo1));
+		core_write(priv, CORE_PLL_GROUP6, RG_LCDDS_PCW_NCPO0(0));
+		core_write(priv, CORE_PLL_GROUP10, RG_LCDDS_SSC_DELTA(ssc_delta));
+		core_write(priv, CORE_PLL_GROUP11, RG_LCDDS_SSC_DELTA1(ssc_delta));
+		core_write(priv, CORE_PLL_GROUP4,
+			   RG_SYSPLL_DDSFBK_EN | RG_SYSPLL_BIAS_EN |
+			   RG_SYSPLL_BIAS_LPF_EN);
+		core_write(priv, CORE_PLL_GROUP2,
+			   RG_SYSPLL_EN_NORMAL | RG_SYSPLL_VODEN |
+			   RG_SYSPLL_POSDIV(1));
+		core_write(priv, CORE_PLL_GROUP7,
+			   RG_LCDDS_PCW_NCPO_CHG | RG_LCCDS_C(3) |
+			   RG_LCDDS_PWDB | RG_LCDDS_ISO_EN);
+
+		/* Enable MT7530 core and TRGMII Tx clocks */
+		core_set(priv, CORE_TRGMII_GSW_CLK_CG,
+			 REG_GSWCK_EN | REG_TRGMIICK_EN);
+	} else {
 		for (i = 0 ; i < NUM_TRGMII_CTRL; i++)
 			mt7530_rmw(priv, MT7530_TRGMII_RD(i),
 				   RD_TAP_MASK, RD_TAP(16));
+	}
+
 	return 0;
 }
 

From 512dd354718b98c60d4ff6017ff8c9f66c10d03f Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 10 Mar 2023 13:37:09 -0600
Subject: [PATCH 207/411] net: ipa: fix a surprising number of bad offsets

A recent commit eliminated a hack that adjusted the offset used for
many GSI registers.  It became possible because we now specify all
GSI register offsets explicitly for every version of IPA.

Unfortunately, a large number of register offsets were *not* updated
as they should have been in that commit.  For IPA v4.5+, the offset
for every GSI register *except* the two inter-EE interrupt masking
registers were supposed to have been reduced by 0xd000.

Tested-by: Luca Weiss <luca.weiss@fairphone.com>
Tested-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org> # SM8350-HDK
Fixes: 59b12b1d27f3 ("net: ipa: kill gsi->virt_raw")
Signed-off-by: Alex Elder <elder@linaro.org>
Link: https://lore.kernel.org/r/20230310193709.1477102-1-elder@linaro.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ipa/reg/gsi_reg-v4.5.c | 56 +++++++++++++++---------------
 drivers/net/ipa/reg/gsi_reg-v4.9.c | 44 +++++++++++------------
 2 files changed, 50 insertions(+), 50 deletions(-)

diff --git a/drivers/net/ipa/reg/gsi_reg-v4.5.c b/drivers/net/ipa/reg/gsi_reg-v4.5.c
index 648b51b88d4e8..2900e5c3ff888 100644
--- a/drivers/net/ipa/reg/gsi_reg-v4.5.c
+++ b/drivers/net/ipa/reg/gsi_reg-v4.5.c
@@ -137,17 +137,17 @@ REG_STRIDE(EV_CH_E_SCRATCH_1, ev_ch_e_scratch_1,
 	   0x0001004c + 0x4000 * GSI_EE_AP, 0x80);
 
 REG_STRIDE(CH_C_DOORBELL_0, ch_c_doorbell_0,
-	   0x0001e000 + 0x4000 * GSI_EE_AP, 0x08);
+	   0x00011000 + 0x4000 * GSI_EE_AP, 0x08);
 
 REG_STRIDE(EV_CH_E_DOORBELL_0, ev_ch_e_doorbell_0,
-	   0x0001e100 + 0x4000 * GSI_EE_AP, 0x08);
+	   0x00011100 + 0x4000 * GSI_EE_AP, 0x08);
 
 static const u32 reg_gsi_status_fmask[] = {
 	[ENABLED]					= BIT(0),
 						/* Bits 1-31 reserved */
 };
 
-REG_FIELDS(GSI_STATUS, gsi_status, 0x0001f000 + 0x4000 * GSI_EE_AP);
+REG_FIELDS(GSI_STATUS, gsi_status, 0x00012000 + 0x4000 * GSI_EE_AP);
 
 static const u32 reg_ch_cmd_fmask[] = {
 	[CH_CHID]					= GENMASK(7, 0),
@@ -155,7 +155,7 @@ static const u32 reg_ch_cmd_fmask[] = {
 	[CH_OPCODE]					= GENMASK(31, 24),
 };
 
-REG_FIELDS(CH_CMD, ch_cmd, 0x0001f008 + 0x4000 * GSI_EE_AP);
+REG_FIELDS(CH_CMD, ch_cmd, 0x00012008 + 0x4000 * GSI_EE_AP);
 
 static const u32 reg_ev_ch_cmd_fmask[] = {
 	[EV_CHID]					= GENMASK(7, 0),
@@ -163,7 +163,7 @@ static const u32 reg_ev_ch_cmd_fmask[] = {
 	[EV_OPCODE]					= GENMASK(31, 24),
 };
 
-REG_FIELDS(EV_CH_CMD, ev_ch_cmd, 0x0001f010 + 0x4000 * GSI_EE_AP);
+REG_FIELDS(EV_CH_CMD, ev_ch_cmd, 0x00012010 + 0x4000 * GSI_EE_AP);
 
 static const u32 reg_generic_cmd_fmask[] = {
 	[GENERIC_OPCODE]				= GENMASK(4, 0),
@@ -172,7 +172,7 @@ static const u32 reg_generic_cmd_fmask[] = {
 						/* Bits 14-31 reserved */
 };
 
-REG_FIELDS(GENERIC_CMD, generic_cmd, 0x0001f018 + 0x4000 * GSI_EE_AP);
+REG_FIELDS(GENERIC_CMD, generic_cmd, 0x00012018 + 0x4000 * GSI_EE_AP);
 
 static const u32 reg_hw_param_2_fmask[] = {
 	[IRAM_SIZE]					= GENMASK(2, 0),
@@ -188,58 +188,58 @@ static const u32 reg_hw_param_2_fmask[] = {
 	[GSI_USE_INTER_EE]				= BIT(31),
 };
 
-REG_FIELDS(HW_PARAM_2, hw_param_2, 0x0001f040 + 0x4000 * GSI_EE_AP);
+REG_FIELDS(HW_PARAM_2, hw_param_2, 0x00012040 + 0x4000 * GSI_EE_AP);
 
-REG(CNTXT_TYPE_IRQ, cntxt_type_irq, 0x0001f080 + 0x4000 * GSI_EE_AP);
+REG(CNTXT_TYPE_IRQ, cntxt_type_irq, 0x00012080 + 0x4000 * GSI_EE_AP);
 
-REG(CNTXT_TYPE_IRQ_MSK, cntxt_type_irq_msk, 0x0001f088 + 0x4000 * GSI_EE_AP);
+REG(CNTXT_TYPE_IRQ_MSK, cntxt_type_irq_msk, 0x00012088 + 0x4000 * GSI_EE_AP);
 
-REG(CNTXT_SRC_CH_IRQ, cntxt_src_ch_irq, 0x0001f090 + 0x4000 * GSI_EE_AP);
+REG(CNTXT_SRC_CH_IRQ, cntxt_src_ch_irq, 0x00012090 + 0x4000 * GSI_EE_AP);
 
-REG(CNTXT_SRC_EV_CH_IRQ, cntxt_src_ev_ch_irq, 0x0001f094 + 0x4000 * GSI_EE_AP);
+REG(CNTXT_SRC_EV_CH_IRQ, cntxt_src_ev_ch_irq, 0x00012094 + 0x4000 * GSI_EE_AP);
 
 REG(CNTXT_SRC_CH_IRQ_MSK, cntxt_src_ch_irq_msk,
-    0x0001f098 + 0x4000 * GSI_EE_AP);
+    0x00012098 + 0x4000 * GSI_EE_AP);
 
 REG(CNTXT_SRC_EV_CH_IRQ_MSK, cntxt_src_ev_ch_irq_msk,
-    0x0001f09c + 0x4000 * GSI_EE_AP);
+    0x0001209c + 0x4000 * GSI_EE_AP);
 
 REG(CNTXT_SRC_CH_IRQ_CLR, cntxt_src_ch_irq_clr,
-    0x0001f0a0 + 0x4000 * GSI_EE_AP);
+    0x000120a0 + 0x4000 * GSI_EE_AP);
 
 REG(CNTXT_SRC_EV_CH_IRQ_CLR, cntxt_src_ev_ch_irq_clr,
-    0x0001f0a4 + 0x4000 * GSI_EE_AP);
+    0x000120a4 + 0x4000 * GSI_EE_AP);
 
-REG(CNTXT_SRC_IEOB_IRQ, cntxt_src_ieob_irq, 0x0001f0b0 + 0x4000 * GSI_EE_AP);
+REG(CNTXT_SRC_IEOB_IRQ, cntxt_src_ieob_irq, 0x000120b0 + 0x4000 * GSI_EE_AP);
 
 REG(CNTXT_SRC_IEOB_IRQ_MSK, cntxt_src_ieob_irq_msk,
-    0x0001f0b8 + 0x4000 * GSI_EE_AP);
+    0x000120b8 + 0x4000 * GSI_EE_AP);
 
 REG(CNTXT_SRC_IEOB_IRQ_CLR, cntxt_src_ieob_irq_clr,
-    0x0001f0c0 + 0x4000 * GSI_EE_AP);
+    0x000120c0 + 0x4000 * GSI_EE_AP);
 
-REG(CNTXT_GLOB_IRQ_STTS, cntxt_glob_irq_stts, 0x0001f100 + 0x4000 * GSI_EE_AP);
+REG(CNTXT_GLOB_IRQ_STTS, cntxt_glob_irq_stts, 0x00012100 + 0x4000 * GSI_EE_AP);
 
-REG(CNTXT_GLOB_IRQ_EN, cntxt_glob_irq_en, 0x0001f108 + 0x4000 * GSI_EE_AP);
+REG(CNTXT_GLOB_IRQ_EN, cntxt_glob_irq_en, 0x00012108 + 0x4000 * GSI_EE_AP);
 
-REG(CNTXT_GLOB_IRQ_CLR, cntxt_glob_irq_clr, 0x0001f110 + 0x4000 * GSI_EE_AP);
+REG(CNTXT_GLOB_IRQ_CLR, cntxt_glob_irq_clr, 0x00012110 + 0x4000 * GSI_EE_AP);
 
-REG(CNTXT_GSI_IRQ_STTS, cntxt_gsi_irq_stts, 0x0001f118 + 0x4000 * GSI_EE_AP);
+REG(CNTXT_GSI_IRQ_STTS, cntxt_gsi_irq_stts, 0x00012118 + 0x4000 * GSI_EE_AP);
 
-REG(CNTXT_GSI_IRQ_EN, cntxt_gsi_irq_en, 0x0001f120 + 0x4000 * GSI_EE_AP);
+REG(CNTXT_GSI_IRQ_EN, cntxt_gsi_irq_en, 0x00012120 + 0x4000 * GSI_EE_AP);
 
-REG(CNTXT_GSI_IRQ_CLR, cntxt_gsi_irq_clr, 0x0001f128 + 0x4000 * GSI_EE_AP);
+REG(CNTXT_GSI_IRQ_CLR, cntxt_gsi_irq_clr, 0x00012128 + 0x4000 * GSI_EE_AP);
 
 static const u32 reg_cntxt_intset_fmask[] = {
 	[INTYPE]					= BIT(0)
 						/* Bits 1-31 reserved */
 };
 
-REG_FIELDS(CNTXT_INTSET, cntxt_intset, 0x0001f180 + 0x4000 * GSI_EE_AP);
+REG_FIELDS(CNTXT_INTSET, cntxt_intset, 0x00012180 + 0x4000 * GSI_EE_AP);
 
-REG_FIELDS(ERROR_LOG, error_log, 0x0001f200 + 0x4000 * GSI_EE_AP);
+REG_FIELDS(ERROR_LOG, error_log, 0x00012200 + 0x4000 * GSI_EE_AP);
 
-REG(ERROR_LOG_CLR, error_log_clr, 0x0001f210 + 0x4000 * GSI_EE_AP);
+REG(ERROR_LOG_CLR, error_log_clr, 0x00012210 + 0x4000 * GSI_EE_AP);
 
 static const u32 reg_cntxt_scratch_0_fmask[] = {
 	[INTER_EE_RESULT]				= GENMASK(2, 0),
@@ -248,7 +248,7 @@ static const u32 reg_cntxt_scratch_0_fmask[] = {
 						/* Bits 8-31 reserved */
 };
 
-REG_FIELDS(CNTXT_SCRATCH_0, cntxt_scratch_0, 0x0001f400 + 0x4000 * GSI_EE_AP);
+REG_FIELDS(CNTXT_SCRATCH_0, cntxt_scratch_0, 0x00012400 + 0x4000 * GSI_EE_AP);
 
 static const struct reg *reg_array[] = {
 	[INTER_EE_SRC_CH_IRQ_MSK]	= &reg_inter_ee_src_ch_irq_msk,
diff --git a/drivers/net/ipa/reg/gsi_reg-v4.9.c b/drivers/net/ipa/reg/gsi_reg-v4.9.c
index 4bf45d264d6b9..8b5d95425a766 100644
--- a/drivers/net/ipa/reg/gsi_reg-v4.9.c
+++ b/drivers/net/ipa/reg/gsi_reg-v4.9.c
@@ -27,7 +27,7 @@ static const u32 reg_ch_c_cntxt_0_fmask[] = {
 };
 
 REG_STRIDE_FIELDS(CH_C_CNTXT_0, ch_c_cntxt_0,
-		  0x0001c000 + 0x4000 * GSI_EE_AP, 0x80);
+		  0x0000f000 + 0x4000 * GSI_EE_AP, 0x80);
 
 static const u32 reg_ch_c_cntxt_1_fmask[] = {
 	[CH_R_LENGTH]					= GENMASK(19, 0),
@@ -35,11 +35,11 @@ static const u32 reg_ch_c_cntxt_1_fmask[] = {
 };
 
 REG_STRIDE_FIELDS(CH_C_CNTXT_1, ch_c_cntxt_1,
-		  0x0001c004 + 0x4000 * GSI_EE_AP, 0x80);
+		  0x0000f004 + 0x4000 * GSI_EE_AP, 0x80);
 
-REG_STRIDE(CH_C_CNTXT_2, ch_c_cntxt_2, 0x0001c008 + 0x4000 * GSI_EE_AP, 0x80);
+REG_STRIDE(CH_C_CNTXT_2, ch_c_cntxt_2, 0x0000f008 + 0x4000 * GSI_EE_AP, 0x80);
 
-REG_STRIDE(CH_C_CNTXT_3, ch_c_cntxt_3, 0x0001c00c + 0x4000 * GSI_EE_AP, 0x80);
+REG_STRIDE(CH_C_CNTXT_3, ch_c_cntxt_3, 0x0000f00c + 0x4000 * GSI_EE_AP, 0x80);
 
 static const u32 reg_ch_c_qos_fmask[] = {
 	[WRR_WEIGHT]					= GENMASK(3, 0),
@@ -53,7 +53,7 @@ static const u32 reg_ch_c_qos_fmask[] = {
 						/* Bits 25-31 reserved */
 };
 
-REG_STRIDE_FIELDS(CH_C_QOS, ch_c_qos, 0x0001c05c + 0x4000 * GSI_EE_AP, 0x80);
+REG_STRIDE_FIELDS(CH_C_QOS, ch_c_qos, 0x0000f05c + 0x4000 * GSI_EE_AP, 0x80);
 
 static const u32 reg_error_log_fmask[] = {
 	[ERR_ARG3]					= GENMASK(3, 0),
@@ -67,16 +67,16 @@ static const u32 reg_error_log_fmask[] = {
 };
 
 REG_STRIDE(CH_C_SCRATCH_0, ch_c_scratch_0,
-	   0x0001c060 + 0x4000 * GSI_EE_AP, 0x80);
+	   0x0000f060 + 0x4000 * GSI_EE_AP, 0x80);
 
 REG_STRIDE(CH_C_SCRATCH_1, ch_c_scratch_1,
-	   0x0001c064 + 0x4000 * GSI_EE_AP, 0x80);
+	   0x0000f064 + 0x4000 * GSI_EE_AP, 0x80);
 
 REG_STRIDE(CH_C_SCRATCH_2, ch_c_scratch_2,
-	   0x0001c068 + 0x4000 * GSI_EE_AP, 0x80);
+	   0x0000f068 + 0x4000 * GSI_EE_AP, 0x80);
 
 REG_STRIDE(CH_C_SCRATCH_3, ch_c_scratch_3,
-	   0x0001c06c + 0x4000 * GSI_EE_AP, 0x80);
+	   0x0000f06c + 0x4000 * GSI_EE_AP, 0x80);
 
 static const u32 reg_ev_ch_e_cntxt_0_fmask[] = {
 	[EV_CHTYPE]					= GENMASK(3, 0),
@@ -89,23 +89,23 @@ static const u32 reg_ev_ch_e_cntxt_0_fmask[] = {
 };
 
 REG_STRIDE_FIELDS(EV_CH_E_CNTXT_0, ev_ch_e_cntxt_0,
-		  0x0001d000 + 0x4000 * GSI_EE_AP, 0x80);
+		  0x00010000 + 0x4000 * GSI_EE_AP, 0x80);
 
 static const u32 reg_ev_ch_e_cntxt_1_fmask[] = {
 	[R_LENGTH]					= GENMASK(15, 0),
 };
 
 REG_STRIDE_FIELDS(EV_CH_E_CNTXT_1, ev_ch_e_cntxt_1,
-		  0x0001d004 + 0x4000 * GSI_EE_AP, 0x80);
+		  0x00010004 + 0x4000 * GSI_EE_AP, 0x80);
 
 REG_STRIDE(EV_CH_E_CNTXT_2, ev_ch_e_cntxt_2,
-	   0x0001d008 + 0x4000 * GSI_EE_AP, 0x80);
+	   0x00010008 + 0x4000 * GSI_EE_AP, 0x80);
 
 REG_STRIDE(EV_CH_E_CNTXT_3, ev_ch_e_cntxt_3,
-	   0x0001d00c + 0x4000 * GSI_EE_AP, 0x80);
+	   0x0001000c + 0x4000 * GSI_EE_AP, 0x80);
 
 REG_STRIDE(EV_CH_E_CNTXT_4, ev_ch_e_cntxt_4,
-	   0x0001d010 + 0x4000 * GSI_EE_AP, 0x80);
+	   0x00010010 + 0x4000 * GSI_EE_AP, 0x80);
 
 static const u32 reg_ev_ch_e_cntxt_8_fmask[] = {
 	[EV_MODT]					= GENMASK(15, 0),
@@ -114,28 +114,28 @@ static const u32 reg_ev_ch_e_cntxt_8_fmask[] = {
 };
 
 REG_STRIDE_FIELDS(EV_CH_E_CNTXT_8, ev_ch_e_cntxt_8,
-		  0x0001d020 + 0x4000 * GSI_EE_AP, 0x80);
+		  0x00010020 + 0x4000 * GSI_EE_AP, 0x80);
 
 REG_STRIDE(EV_CH_E_CNTXT_9, ev_ch_e_cntxt_9,
-	   0x0001d024 + 0x4000 * GSI_EE_AP, 0x80);
+	   0x00010024 + 0x4000 * GSI_EE_AP, 0x80);
 
 REG_STRIDE(EV_CH_E_CNTXT_10, ev_ch_e_cntxt_10,
-	   0x0001d028 + 0x4000 * GSI_EE_AP, 0x80);
+	   0x00010028 + 0x4000 * GSI_EE_AP, 0x80);
 
 REG_STRIDE(EV_CH_E_CNTXT_11, ev_ch_e_cntxt_11,
-	   0x0001d02c + 0x4000 * GSI_EE_AP, 0x80);
+	   0x0001002c + 0x4000 * GSI_EE_AP, 0x80);
 
 REG_STRIDE(EV_CH_E_CNTXT_12, ev_ch_e_cntxt_12,
-	   0x0001d030 + 0x4000 * GSI_EE_AP, 0x80);
+	   0x00010030 + 0x4000 * GSI_EE_AP, 0x80);
 
 REG_STRIDE(EV_CH_E_CNTXT_13, ev_ch_e_cntxt_13,
-	   0x0001d034 + 0x4000 * GSI_EE_AP, 0x80);
+	   0x00010034 + 0x4000 * GSI_EE_AP, 0x80);
 
 REG_STRIDE(EV_CH_E_SCRATCH_0, ev_ch_e_scratch_0,
-	   0x0001d048 + 0x4000 * GSI_EE_AP, 0x80);
+	   0x00010048 + 0x4000 * GSI_EE_AP, 0x80);
 
 REG_STRIDE(EV_CH_E_SCRATCH_1, ev_ch_e_scratch_1,
-	   0x0001d04c + 0x4000 * GSI_EE_AP, 0x80);
+	   0x0001004c + 0x4000 * GSI_EE_AP, 0x80);
 
 REG_STRIDE(CH_C_DOORBELL_0, ch_c_doorbell_0,
 	   0x00011000 + 0x4000 * GSI_EE_AP, 0x08);

From 6175b70df95ed3b8fe177cb20b9b00b1f9008cc4 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 22 Feb 2023 23:01:15 -0800
Subject: [PATCH 208/411] powerpc/pseries: RTAS work area requires
 GENERIC_ALLOCATOR

The RTAS work area allocator uses code that is built by
GENERIC_ALLOCATOR, so the PSERIES Kconfig should select the
required Kconfig symbol to fix multiple build errors.

powerpc64-linux-ld: arch/powerpc/platforms/pseries/rtas-work-area.o: in function `.rtas_work_area_allocator_init':
rtas-work-area.c:(.init.text+0x288): undefined reference to `.gen_pool_create'
powerpc64-linux-ld: rtas-work-area.c:(.init.text+0x2dc): undefined reference to `.gen_pool_set_algo'
powerpc64-linux-ld: rtas-work-area.c:(.init.text+0x310): undefined reference to `.gen_pool_add_owner'
powerpc64-linux-ld: rtas-work-area.c:(.init.text+0x43c): undefined reference to `.gen_pool_destroy'
powerpc64-linux-ld: arch/powerpc/platforms/pseries/rtas-work-area.o:(.toc+0x0): undefined reference to `gen_pool_first_fit_order_align'
powerpc64-linux-ld: arch/powerpc/platforms/pseries/rtas-work-area.o: in function `.__rtas_work_area_alloc':
rtas-work-area.c:(.ref.text+0x14c): undefined reference to `.gen_pool_alloc_algo_owner'
powerpc64-linux-ld: rtas-work-area.c:(.ref.text+0x238): undefined reference to `.gen_pool_alloc_algo_owner'
powerpc64-linux-ld: arch/powerpc/platforms/pseries/rtas-work-area.o: in function `.rtas_work_area_free':
rtas-work-area.c:(.ref.text+0x44c): undefined reference to `.gen_pool_free_owner'

Fixes: 43033bc62d34 ("powerpc/pseries: add RTAS work area allocator")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Nathan Lynch <nathanl@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://msgid.link/20230223070116.660-2-rdunlap@infradead.org
---
 arch/powerpc/platforms/pseries/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index b481c5c8bae11..21b22bf16ce66 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -7,6 +7,7 @@ config PPC_PSERIES
 	select OF_DYNAMIC
 	select FORCE_PCI
 	select PCI_MSI
+	select GENERIC_ALLOCATOR
 	select PPC_XICS
 	select PPC_XIVE_SPAPR
 	select PPC_ICP_NATIVE

From 112e66017bff7f2837030f34c2bc19501e9212d5 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 10 Mar 2023 11:10:56 -0500
Subject: [PATCH 209/411] KVM: nVMX: add missing consistency checks for CR0 and
 CR4

The effective values of the guest CR0 and CR4 registers may differ from
those included in the VMCS12.  In particular, disabling EPT forces
CR4.PAE=1 and disabling unrestricted guest mode forces CR0.PG=CR0.PE=1.

Therefore, checks on these bits cannot be delegated to the processor
and must be performed by KVM.

Reported-by: Reima ISHII <ishiir@g.ecc.u-tokyo.ac.jp>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 7c4f5ca405c75..fd7af786b7248 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3022,7 +3022,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
 					struct vmcs12 *vmcs12,
 					enum vm_entry_failure_code *entry_failure_code)
 {
-	bool ia32e;
+	bool ia32e = !!(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE);
 
 	*entry_failure_code = ENTRY_FAIL_DEFAULT;
 
@@ -3048,6 +3048,13 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
 					   vmcs12->guest_ia32_perf_global_ctrl)))
 		return -EINVAL;
 
+	if (CC((vmcs12->guest_cr0 & (X86_CR0_PG | X86_CR0_PE)) == X86_CR0_PG))
+		return -EINVAL;
+
+	if (CC(ia32e && !(vmcs12->guest_cr4 & X86_CR4_PAE)) ||
+	    CC(ia32e && !(vmcs12->guest_cr0 & X86_CR0_PG)))
+		return -EINVAL;
+
 	/*
 	 * If the load IA32_EFER VM-entry control is 1, the following checks
 	 * are performed on the field for the IA32_EFER MSR:
@@ -3059,7 +3066,6 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
 	 */
 	if (to_vmx(vcpu)->nested.nested_run_pending &&
 	    (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
-		ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
 		if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) ||
 		    CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) ||
 		    CC(((vmcs12->guest_cr0 & X86_CR0_PG) &&

From 77900bffed14894476969042718a9ca05eb1f477 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 14 Mar 2023 07:40:56 -0400
Subject: [PATCH 210/411] KVM: nVMX: remove unnecessary #ifdef

nested_vmx_check_controls() has already run by the time KVM checks host state,
so the "host address space size" exit control can only be set on x86-64 hosts.
Simplify the condition at the cost of adding some dead code to 32-bit kernels.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/nested.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index fd7af786b7248..1bc2b80273c97 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2903,7 +2903,7 @@ static int nested_vmx_check_address_space_size(struct kvm_vcpu *vcpu,
 static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
 				       struct vmcs12 *vmcs12)
 {
-	bool ia32e;
+	bool ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE);
 
 	if (CC(!nested_host_cr0_valid(vcpu, vmcs12->host_cr0)) ||
 	    CC(!nested_host_cr4_valid(vcpu, vmcs12->host_cr4)) ||
@@ -2923,12 +2923,6 @@ static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
 					   vmcs12->host_ia32_perf_global_ctrl)))
 		return -EINVAL;
 
-#ifdef CONFIG_X86_64
-	ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE);
-#else
-	ia32e = false;
-#endif
-
 	if (ia32e) {
 		if (CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
 			return -EINVAL;

From 06e18547286068be803ba28c6d1c377e94f8745b Mon Sep 17 00:00:00 2001
From: Rong Tao <rongtao@cestc.cn>
Date: Wed, 21 Dec 2022 20:24:32 +0800
Subject: [PATCH 211/411] KVM: VMX: Fix indentation coding style issue

Code indentation should use tabs where possible.

Signed-off-by: Rong Tao <rongtao@cestc.cn>
Message-Id: <tencent_31E6ACADCB6915E157CF5113C41803212107@qq.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmx.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index bcac3efcde412..d2d6e1b6c7882 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -874,7 +874,7 @@ void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu)
 	 */
 	if (is_guest_mode(vcpu))
 		eb |= get_vmcs12(vcpu)->exception_bitmap;
-        else {
+	else {
 		int mask = 0, match = 0;
 
 		if (enable_ept && (eb & (1u << PF_VECTOR))) {
@@ -1282,7 +1282,7 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
 		}
 	}
 
-    	if (vmx->nested.need_vmcs12_to_shadow_sync)
+	if (vmx->nested.need_vmcs12_to_shadow_sync)
 		nested_sync_vmcs12_to_shadow(vcpu);
 
 	if (vmx->guest_state_loaded)
@@ -5049,10 +5049,10 @@ static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection)
 	if (to_vmx(vcpu)->nested.nested_run_pending)
 		return -EBUSY;
 
-       /*
-        * An IRQ must not be injected into L2 if it's supposed to VM-Exit,
-        * e.g. if the IRQ arrived asynchronously after checking nested events.
-        */
+	/*
+	 * An IRQ must not be injected into L2 if it's supposed to VM-Exit,
+	 * e.g. if the IRQ arrived asynchronously after checking nested events.
+	 */
 	if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
 		return -EBUSY;
 

From 53293cb81be6db06306666d51e85778725934be5 Mon Sep 17 00:00:00 2001
From: Rong Tao <rongtao@cestc.cn>
Date: Wed, 21 Dec 2022 20:28:49 +0800
Subject: [PATCH 212/411] KVM: VMX: Use tabs instead of spaces for indentation

Code indentation should use tabs where possible and miss a '*'.

Signed-off-by: Rong Tao <rongtao@cestc.cn>
Message-Id: <tencent_A492CB3F9592578451154442830EA1B02C07@qq.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmenter.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index f550540ed54ee..631fd7da2bc36 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -262,7 +262,7 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
 	 * eIBRS has its own protection against poisoned RSB, so it doesn't
 	 * need the RSB filling sequence.  But it does need to be enabled, and a
 	 * single call to retire, before the first unbalanced RET.
-         */
+	 */
 
 	FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\
 			   X86_FEATURE_RSB_VMEXIT_LITE
@@ -311,7 +311,7 @@ SYM_FUNC_END(vmx_do_nmi_irqoff)
  * vmread_error_trampoline - Trampoline from inline asm to vmread_error()
  * @field:	VMCS field encoding that failed
  * @fault:	%true if the VMREAD faulted, %false if it failed
-
+ *
  * Save and restore volatile registers across a call to vmread_error().  Note,
  * all parameters are passed on the stack.
  */

From 3dc40cf89b9b763910cb0540a35f9d66a067475d Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 14 Mar 2023 08:35:16 -0400
Subject: [PATCH 213/411] selftests: KVM: skip hugetlb tests if huge pages are
 not available

Right now, if KVM memory stress tests are run with hugetlb sources but hugetlb is
not available (either in the kernel or because /proc/sys/vm/nr_hugepages is 0)
the test will fail with a memory allocation error.

This makes it impossible to add tests that default to hugetlb-backed memory,
because on a machine with a default configuration they will fail.  Therefore,
check HugePages_Total as well and, if zero, direct the user to enable hugepages
in procfs.  Furthermore, return KSFT_SKIP whenever hugetlb is not available.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/lib/test_util.c | 25 +++++++++++++--------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 5c22fa4c2825e..b772193f6c186 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -165,26 +165,33 @@ size_t get_trans_hugepagesz(void)
 size_t get_def_hugetlb_pagesz(void)
 {
 	char buf[64];
-	const char *tag = "Hugepagesize:";
+	const char *hugepagesize = "Hugepagesize:";
+	const char *hugepages_total = "HugePages_Total:";
 	FILE *f;
 
 	f = fopen("/proc/meminfo", "r");
 	TEST_ASSERT(f != NULL, "Error in opening /proc/meminfo");
 
 	while (fgets(buf, sizeof(buf), f) != NULL) {
-		if (strstr(buf, tag) == buf) {
+		if (strstr(buf, hugepages_total) == buf) {
+			unsigned long long total = strtoull(buf + strlen(hugepages_total), NULL, 10);
+			if (!total) {
+				fprintf(stderr, "HUGETLB is not enabled in /proc/sys/vm/nr_hugepages\n");
+				exit(KSFT_SKIP);
+			}
+		}
+		if (strstr(buf, hugepagesize) == buf) {
 			fclose(f);
-			return strtoull(buf + strlen(tag), NULL, 10) << 10;
+			return strtoull(buf + strlen(hugepagesize), NULL, 10) << 10;
 		}
 	}
 
-	if (feof(f))
-		TEST_FAIL("HUGETLB is not configured in host kernel");
-	else
-		TEST_FAIL("Error in reading /proc/meminfo");
+	if (feof(f)) {
+		fprintf(stderr, "HUGETLB is not configured in host kernel");
+		exit(KSFT_SKIP);
+	}
 
-	fclose(f);
-	return 0;
+	TEST_FAIL("Error in reading /proc/meminfo");
 }
 
 #define ANON_FLAGS	(MAP_PRIVATE | MAP_ANONYMOUS)

From 3ec7a1b2743c07c45f4a0c508114f6cb410ddef3 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 7 Feb 2023 00:21:54 +0000
Subject: [PATCH 214/411] KVM: SVM: Fix a benign off-by-one bug in AVIC
 physical table mask

Define the "physical table max index mask" as bits 8:0, not 9:0.  x2AVIC
currently supports a max of 512 entries, i.e. the max index is 511, and
the inputs to GENMASK_ULL() are inclusive.  The bug is benign as bit 9 is
reserved and never set by KVM, i.e. KVM is just clearing bits that are
guaranteed to be zero.

Note, as of this writing, APM "Rev. 3.39-October 2022" incorrectly states
that bits 11:8 are reserved in Table B-1. VMCB Layout, Control Area.  I.e.
that table wasn't updated when x2AVIC support was added.

Opportunistically fix the comment for the max AVIC ID to align with the
code, and clean up comment formatting too.

Fixes: 4d1d7942e36a ("KVM: SVM: Introduce logic to (de)activate x2AVIC mode")
Cc: stable@vger.kernel.org
Cc: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
Cc: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Tested-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Message-Id: <20230207002156.521736-2-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/svm.h | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index cb1ee53ad3b18..770dcf75eaa97 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -261,20 +261,22 @@ enum avic_ipi_failure_cause {
 	AVIC_IPI_FAILURE_INVALID_BACKING_PAGE,
 };
 
-#define AVIC_PHYSICAL_MAX_INDEX_MASK	GENMASK_ULL(9, 0)
+#define AVIC_PHYSICAL_MAX_INDEX_MASK	GENMASK_ULL(8, 0)
 
 /*
- * For AVIC, the max index allowed for physical APIC ID
- * table is 0xff (255).
+ * For AVIC, the max index allowed for physical APIC ID table is 0xfe (254), as
+ * 0xff is a broadcast to all CPUs, i.e. can't be targeted individually.
  */
 #define AVIC_MAX_PHYSICAL_ID		0XFEULL
 
 /*
- * For x2AVIC, the max index allowed for physical APIC ID
- * table is 0x1ff (511).
+ * For x2AVIC, the max index allowed for physical APIC ID table is 0x1ff (511).
  */
 #define X2AVIC_MAX_PHYSICAL_ID		0x1FFUL
 
+static_assert((AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == AVIC_MAX_PHYSICAL_ID);
+static_assert((X2AVIC_MAX_PHYSICAL_ID & AVIC_PHYSICAL_MAX_INDEX_MASK) == X2AVIC_MAX_PHYSICAL_ID);
+
 #define AVIC_HPA_MASK	~((0xFFFULL << 52) | 0xFFF)
 #define VMCB_AVIC_APIC_BAR_MASK		0xFFFFFFFFFF000ULL
 

From 5999715922c5a3ede5d8fe2a6b17aba58a157d41 Mon Sep 17 00:00:00 2001
From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Date: Tue, 7 Feb 2023 00:21:55 +0000
Subject: [PATCH 215/411] KVM: SVM: Modify AVIC GATag to support max number of
 512 vCPUs

Define AVIC_VCPU_ID_MASK based on AVIC_PHYSICAL_MAX_INDEX, i.e. the mask
that effectively controls the largest guest physical APIC ID supported by
x2AVIC, instead of hardcoding the number of bits to 8 (and the number of
VM bits to 24).

The AVIC GATag is programmed into the AMD IOMMU IRTE to provide a
reference back to KVM in case the IOMMU cannot inject an interrupt into a
non-running vCPU.  In such a case, the IOMMU notifies software by creating
a GALog entry with the corresponded GATag, and KVM then uses the GATag to
find the correct VM+vCPU to kick.  Dropping bit 8 from the GATag results
in kicking the wrong vCPU when targeting vCPUs with x2APIC ID > 255.

Fixes: 4d1d7942e36a ("KVM: SVM: Introduce logic to (de)activate x2AVIC mode")
Cc: stable@vger.kernel.org
Reported-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Tested-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Message-Id: <20230207002156.521736-3-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/avic.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index ca684979e90d6..326341a221536 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -27,19 +27,29 @@
 #include "irq.h"
 #include "svm.h"
 
-/* AVIC GATAG is encoded using VM and VCPU IDs */
-#define AVIC_VCPU_ID_BITS		8
-#define AVIC_VCPU_ID_MASK		((1 << AVIC_VCPU_ID_BITS) - 1)
+/*
+ * Encode the arbitrary VM ID and the vCPU's default APIC ID, i.e the vCPU ID,
+ * into the GATag so that KVM can retrieve the correct vCPU from a GALog entry
+ * if an interrupt can't be delivered, e.g. because the vCPU isn't running.
+ *
+ * For the vCPU ID, use however many bits are currently allowed for the max
+ * guest physical APIC ID (limited by the size of the physical ID table), and
+ * use whatever bits remain to assign arbitrary AVIC IDs to VMs.  Note, the
+ * size of the GATag is defined by hardware (32 bits), but is an opaque value
+ * as far as hardware is concerned.
+ */
+#define AVIC_VCPU_ID_MASK		AVIC_PHYSICAL_MAX_INDEX_MASK
 
-#define AVIC_VM_ID_BITS			24
-#define AVIC_VM_ID_NR			(1 << AVIC_VM_ID_BITS)
-#define AVIC_VM_ID_MASK			((1 << AVIC_VM_ID_BITS) - 1)
+#define AVIC_VM_ID_SHIFT		HWEIGHT32(AVIC_PHYSICAL_MAX_INDEX_MASK)
+#define AVIC_VM_ID_MASK			(GENMASK(31, AVIC_VM_ID_SHIFT) >> AVIC_VM_ID_SHIFT)
 
-#define AVIC_GATAG(x, y)		(((x & AVIC_VM_ID_MASK) << AVIC_VCPU_ID_BITS) | \
+#define AVIC_GATAG(x, y)		(((x & AVIC_VM_ID_MASK) << AVIC_VM_ID_SHIFT) | \
 						(y & AVIC_VCPU_ID_MASK))
-#define AVIC_GATAG_TO_VMID(x)		((x >> AVIC_VCPU_ID_BITS) & AVIC_VM_ID_MASK)
+#define AVIC_GATAG_TO_VMID(x)		((x >> AVIC_VM_ID_SHIFT) & AVIC_VM_ID_MASK)
 #define AVIC_GATAG_TO_VCPUID(x)		(x & AVIC_VCPU_ID_MASK)
 
+static_assert(AVIC_GATAG(AVIC_VM_ID_MASK, AVIC_VCPU_ID_MASK) == -1u);
+
 static bool force_avic;
 module_param_unsafe(force_avic, bool, 0444);
 

From c281794eaa5c635c0edf39c6a9a3b0171000e92f Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 7 Feb 2023 00:21:56 +0000
Subject: [PATCH 216/411] KVM: SVM: WARN if GATag generation drops VM or vCPU
 ID information

WARN if generating a GATag given a VM ID and vCPU ID doesn't yield the
same IDs when pulling the IDs back out of the tag.  Don't bother adding
error handling to callers, this is very much a paranoid sanity check as
KVM fully controls the VM ID and is supposed to reject too-big vCPU IDs.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Tested-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Message-Id: <20230207002156.521736-4-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/avic.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 326341a221536..cfc8ab7730250 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -43,12 +43,21 @@
 #define AVIC_VM_ID_SHIFT		HWEIGHT32(AVIC_PHYSICAL_MAX_INDEX_MASK)
 #define AVIC_VM_ID_MASK			(GENMASK(31, AVIC_VM_ID_SHIFT) >> AVIC_VM_ID_SHIFT)
 
-#define AVIC_GATAG(x, y)		(((x & AVIC_VM_ID_MASK) << AVIC_VM_ID_SHIFT) | \
-						(y & AVIC_VCPU_ID_MASK))
 #define AVIC_GATAG_TO_VMID(x)		((x >> AVIC_VM_ID_SHIFT) & AVIC_VM_ID_MASK)
 #define AVIC_GATAG_TO_VCPUID(x)		(x & AVIC_VCPU_ID_MASK)
 
-static_assert(AVIC_GATAG(AVIC_VM_ID_MASK, AVIC_VCPU_ID_MASK) == -1u);
+#define __AVIC_GATAG(vm_id, vcpu_id)	((((vm_id) & AVIC_VM_ID_MASK) << AVIC_VM_ID_SHIFT) | \
+					 ((vcpu_id) & AVIC_VCPU_ID_MASK))
+#define AVIC_GATAG(vm_id, vcpu_id)					\
+({									\
+	u32 ga_tag = __AVIC_GATAG(vm_id, vcpu_id);			\
+									\
+	WARN_ON_ONCE(AVIC_GATAG_TO_VCPUID(ga_tag) != (vcpu_id));	\
+	WARN_ON_ONCE(AVIC_GATAG_TO_VMID(ga_tag) != (vm_id));		\
+	ga_tag;								\
+})
+
+static_assert(__AVIC_GATAG(AVIC_VM_ID_MASK, AVIC_VCPU_ID_MASK) == -1u);
 
 static bool force_avic;
 module_param_unsafe(force_avic, bool, 0444);

From 4009e0bb7b83d967a14e108d271f003b378f9af9 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Sat, 4 Feb 2023 02:41:48 +0000
Subject: [PATCH 217/411] KVM: selftests: Move the guts of kvm_hypercall() to a
 separate macro

Extract the guts of kvm_hypercall() to a macro so that Xen hypercalls,
which have a different register ABI, can reuse the VMCALL vs. VMMCALL
logic.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20230204024151.1373296-2-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/lib/x86_64/processor.c      | 29 +++++++++++--------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index ae1e573d94ce7..ff901cb47ffca 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -1139,21 +1139,26 @@ const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
 	return NULL;
 }
 
+#define X86_HYPERCALL(inputs...)					\
+({									\
+	uint64_t r;							\
+									\
+	asm volatile("test %[use_vmmcall], %[use_vmmcall]\n\t"		\
+		     "jnz 1f\n\t"					\
+		     "vmcall\n\t"					\
+		     "jmp 2f\n\t"					\
+		     "1: vmmcall\n\t"					\
+		     "2:"						\
+		     : "=a"(r)						\
+		     : [use_vmmcall] "r" (host_cpu_is_amd), inputs);	\
+									\
+	r;								\
+})
+
 uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
 		       uint64_t a3)
 {
-	uint64_t r;
-
-	asm volatile("test %[use_vmmcall], %[use_vmmcall]\n\t"
-		     "jnz 1f\n\t"
-		     "vmcall\n\t"
-		     "jmp 2f\n\t"
-		     "1: vmmcall\n\t"
-		     "2:"
-		     : "=a"(r)
-		     : "a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3),
-		       [use_vmmcall] "r" (host_cpu_is_amd));
-	return r;
+	return X86_HYPERCALL("a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3));
 }
 
 const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void)

From c0c76d99939cb4ac28cbc5ce542cff2b9e1e1b02 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Sat, 4 Feb 2023 02:41:49 +0000
Subject: [PATCH 218/411] KVM: selftests: Add helpers to make Xen-style
 VMCALL/VMMCALL hypercalls

Add wrappers to do hypercalls using VMCALL/VMMCALL and Xen's register ABI
(as opposed to full Xen-style hypercalls through a hypervisor provided
page).  Using the common helpers dedups a pile of code, and uses the
native hypercall instruction when running on AMD.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20230204024151.1373296-3-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/include/x86_64/processor.h  |  2 +
 .../selftests/kvm/lib/x86_64/processor.c      | 10 +++
 .../selftests/kvm/x86_64/xen_shinfo_test.c    | 63 +++----------------
 3 files changed, 21 insertions(+), 54 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 53ffa43c90db1..90387ddcb2a9d 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -1063,6 +1063,8 @@ uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr);
 
 uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
 		       uint64_t a3);
+uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
+void xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
 
 void __vm_xsave_require_permission(int bit, const char *name);
 
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index ff901cb47ffca..c39a4353ba194 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -1161,6 +1161,16 @@ uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
 	return X86_HYPERCALL("a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3));
 }
 
+uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1)
+{
+	return X86_HYPERCALL("a"(nr), "D"(a0), "S"(a1));
+}
+
+void xen_hypercall(uint64_t nr, uint64_t a0, void *a1)
+{
+	GUEST_ASSERT(!__xen_hypercall(nr, a0, a1));
+}
+
 const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void)
 {
 	static struct kvm_cpuid2 *cpuid;
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index 5a3bf8f614176..827b8e126f2f7 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -225,15 +225,8 @@ static void guest_code(void)
 
 	/* Our turn. Deliver event channel (to ourselves) with
 	 * EVTCHNOP_send hypercall. */
-	unsigned long rax;
 	struct evtchn_send s = { .port = 127 };
-	__asm__ __volatile__ ("vmcall" :
-			      "=a" (rax) :
-			      "a" (__HYPERVISOR_event_channel_op),
-			      "D" (EVTCHNOP_send),
-			      "S" (&s));
-
-	GUEST_ASSERT(rax == 0);
+	xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
 
 	guest_wait_for_irq();
 
@@ -242,24 +235,15 @@ static void guest_code(void)
 	/* Deliver "outbound" event channel to an eventfd which
 	 * happens to be one of our own irqfds. */
 	s.port = 197;
-	__asm__ __volatile__ ("vmcall" :
-			      "=a" (rax) :
-			      "a" (__HYPERVISOR_event_channel_op),
-			      "D" (EVTCHNOP_send),
-			      "S" (&s));
-
-	GUEST_ASSERT(rax == 0);
+	xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
 
 	guest_wait_for_irq();
 
 	GUEST_SYNC(13);
 
 	/* Set a timer 100ms in the future. */
-	__asm__ __volatile__ ("vmcall" :
-			      "=a" (rax) :
-			      "a" (__HYPERVISOR_set_timer_op),
-			      "D" (rs->state_entry_time + 100000000));
-	GUEST_ASSERT(rax == 0);
+	xen_hypercall(__HYPERVISOR_set_timer_op,
+		      rs->state_entry_time + 100000000, NULL);
 
 	GUEST_SYNC(14);
 
@@ -281,37 +265,19 @@ static void guest_code(void)
 		.timeout = 0,
 	};
 
-	__asm__ __volatile__ ("vmcall" :
-			      "=a" (rax) :
-			      "a" (__HYPERVISOR_sched_op),
-			      "D" (SCHEDOP_poll),
-			      "S" (&p));
-
-	GUEST_ASSERT(rax == 0);
+	xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
 
 	GUEST_SYNC(17);
 
 	/* Poll for an unset port and wait for the timeout. */
 	p.timeout = 100000000;
-	__asm__ __volatile__ ("vmcall" :
-			      "=a" (rax) :
-			      "a" (__HYPERVISOR_sched_op),
-			      "D" (SCHEDOP_poll),
-			      "S" (&p));
-
-	GUEST_ASSERT(rax == 0);
+	xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
 
 	GUEST_SYNC(18);
 
 	/* A timer will wake the masked port we're waiting on, while we poll */
 	p.timeout = 0;
-	__asm__ __volatile__ ("vmcall" :
-			      "=a" (rax) :
-			      "a" (__HYPERVISOR_sched_op),
-			      "D" (SCHEDOP_poll),
-			      "S" (&p));
-
-	GUEST_ASSERT(rax == 0);
+	xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
 
 	GUEST_SYNC(19);
 
@@ -319,13 +285,7 @@ static void guest_code(void)
 	 * actual interrupt, while we're polling on a different port. */
 	ports[0]++;
 	p.timeout = 0;
-	__asm__ __volatile__ ("vmcall" :
-			      "=a" (rax) :
-			      "a" (__HYPERVISOR_sched_op),
-			      "D" (SCHEDOP_poll),
-			      "S" (&p));
-
-	GUEST_ASSERT(rax == 0);
+	xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
 
 	guest_wait_for_irq();
 
@@ -360,12 +320,7 @@ static void guest_code(void)
 	 * timer IRQ is dropped due to an invalid event channel.
 	 */
 	for (i = 0; i < 100 && !guest_saw_irq; i++)
-		asm volatile("vmcall"
-			     : "=a" (rax)
-			     : "a" (__HYPERVISOR_sched_op),
-			       "D" (SCHEDOP_poll),
-			       "S" (&p)
-			     : "memory");
+		__xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
 
 	/*
 	 * Re-send the timer IRQ if it was (likely) dropped due to the timer

From e7062a98d0b3e0b42089f4c5da633a1ce41b807f Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Sat, 4 Feb 2023 02:41:50 +0000
Subject: [PATCH 219/411] KVM: selftests: Use enum for test numbers in
 xen_shinfo_test

The xen_shinfo_test started off with very few iterations, and the numbers
we used in GUEST_SYNC() were precisely mapped to the RUNSTATE_xxx values
anyway to start with.

It has since grown quite a few more tests, and it's kind of awful to be
handling them all as bare numbers. Especially when I want to add a new
test in the middle. Define an enum for the test stages, and use it both
in the guest code and the host switch statement.

No functional change, if I can count to 24.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20230204024151.1373296-4-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/x86_64/xen_shinfo_test.c    | 133 +++++++++++-------
 1 file changed, 82 insertions(+), 51 deletions(-)

diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index 827b8e126f2f7..1a3d2d51fef01 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -41,6 +41,36 @@
 #define EVTCHN_TEST2 66
 #define EVTCHN_TIMER 13
 
+enum {
+	TEST_INJECT_VECTOR = 0,
+	TEST_RUNSTATE_runnable,
+	TEST_RUNSTATE_blocked,
+	TEST_RUNSTATE_offline,
+	TEST_RUNSTATE_ADJUST,
+	TEST_RUNSTATE_DATA,
+	TEST_STEAL_TIME,
+	TEST_EVTCHN_MASKED,
+	TEST_EVTCHN_UNMASKED,
+	TEST_EVTCHN_SLOWPATH,
+	TEST_EVTCHN_SEND_IOCTL,
+	TEST_EVTCHN_HCALL,
+	TEST_EVTCHN_HCALL_EVENTFD,
+	TEST_TIMER_SETUP,
+	TEST_TIMER_WAIT,
+	TEST_TIMER_RESTORE,
+	TEST_POLL_READY,
+	TEST_POLL_TIMEOUT,
+	TEST_POLL_MASKED,
+	TEST_POLL_WAKE,
+	TEST_TIMER_PAST,
+	TEST_LOCKING_SEND_RACE,
+	TEST_LOCKING_POLL_RACE,
+	TEST_LOCKING_POLL_TIMEOUT,
+	TEST_DONE,
+
+	TEST_GUEST_SAW_IRQ,
+};
+
 #define XEN_HYPERCALL_MSR	0x40000000
 
 #define MIN_STEAL_TIME		50000
@@ -144,7 +174,7 @@ static void evtchn_handler(struct ex_regs *regs)
 	vi->evtchn_pending_sel = 0;
 	guest_saw_irq = true;
 
-	GUEST_SYNC(0x20);
+	GUEST_SYNC(TEST_GUEST_SAW_IRQ);
 }
 
 static void guest_wait_for_irq(void)
@@ -165,41 +195,41 @@ static void guest_code(void)
 	);
 
 	/* Trigger an interrupt injection */
-	GUEST_SYNC(0);
+	GUEST_SYNC(TEST_INJECT_VECTOR);
 
 	guest_wait_for_irq();
 
 	/* Test having the host set runstates manually */
-	GUEST_SYNC(RUNSTATE_runnable);
+	GUEST_SYNC(TEST_RUNSTATE_runnable);
 	GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
 	GUEST_ASSERT(rs->state == 0);
 
-	GUEST_SYNC(RUNSTATE_blocked);
+	GUEST_SYNC(TEST_RUNSTATE_blocked);
 	GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0);
 	GUEST_ASSERT(rs->state == 0);
 
-	GUEST_SYNC(RUNSTATE_offline);
+	GUEST_SYNC(TEST_RUNSTATE_offline);
 	GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0);
 	GUEST_ASSERT(rs->state == 0);
 
 	/* Test runstate time adjust */
-	GUEST_SYNC(4);
+	GUEST_SYNC(TEST_RUNSTATE_ADJUST);
 	GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a);
 	GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b);
 
 	/* Test runstate time set */
-	GUEST_SYNC(5);
+	GUEST_SYNC(TEST_RUNSTATE_DATA);
 	GUEST_ASSERT(rs->state_entry_time >= 0x8000);
 	GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0);
 	GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b);
 	GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a);
 
 	/* sched_yield() should result in some 'runnable' time */
-	GUEST_SYNC(6);
+	GUEST_SYNC(TEST_STEAL_TIME);
 	GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
 
 	/* Attempt to deliver a *masked* interrupt */
-	GUEST_SYNC(7);
+	GUEST_SYNC(TEST_EVTCHN_MASKED);
 
 	/* Wait until we see the bit set */
 	struct shared_info *si = (void *)SHINFO_VADDR;
@@ -207,21 +237,21 @@ static void guest_code(void)
 		__asm__ __volatile__ ("rep nop" : : : "memory");
 
 	/* Now deliver an *unmasked* interrupt */
-	GUEST_SYNC(8);
+	GUEST_SYNC(TEST_EVTCHN_UNMASKED);
 
 	guest_wait_for_irq();
 
 	/* Change memslots and deliver an interrupt */
-	GUEST_SYNC(9);
+	GUEST_SYNC(TEST_EVTCHN_SLOWPATH);
 
 	guest_wait_for_irq();
 
 	/* Deliver event channel with KVM_XEN_HVM_EVTCHN_SEND */
-	GUEST_SYNC(10);
+	GUEST_SYNC(TEST_EVTCHN_SEND_IOCTL);
 
 	guest_wait_for_irq();
 
-	GUEST_SYNC(11);
+	GUEST_SYNC(TEST_EVTCHN_HCALL);
 
 	/* Our turn. Deliver event channel (to ourselves) with
 	 * EVTCHNOP_send hypercall. */
@@ -230,7 +260,7 @@ static void guest_code(void)
 
 	guest_wait_for_irq();
 
-	GUEST_SYNC(12);
+	GUEST_SYNC(TEST_EVTCHN_HCALL_EVENTFD);
 
 	/* Deliver "outbound" event channel to an eventfd which
 	 * happens to be one of our own irqfds. */
@@ -239,23 +269,23 @@ static void guest_code(void)
 
 	guest_wait_for_irq();
 
-	GUEST_SYNC(13);
+	GUEST_SYNC(TEST_TIMER_SETUP);
 
 	/* Set a timer 100ms in the future. */
 	xen_hypercall(__HYPERVISOR_set_timer_op,
 		      rs->state_entry_time + 100000000, NULL);
 
-	GUEST_SYNC(14);
+	GUEST_SYNC(TEST_TIMER_WAIT);
 
 	/* Now wait for the timer */
 	guest_wait_for_irq();
 
-	GUEST_SYNC(15);
+	GUEST_SYNC(TEST_TIMER_RESTORE);
 
 	/* The host has 'restored' the timer. Just wait for it. */
 	guest_wait_for_irq();
 
-	GUEST_SYNC(16);
+	GUEST_SYNC(TEST_POLL_READY);
 
 	/* Poll for an event channel port which is already set */
 	u32 ports[1] = { EVTCHN_TIMER };
@@ -267,19 +297,19 @@ static void guest_code(void)
 
 	xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
 
-	GUEST_SYNC(17);
+	GUEST_SYNC(TEST_POLL_TIMEOUT);
 
 	/* Poll for an unset port and wait for the timeout. */
 	p.timeout = 100000000;
 	xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
 
-	GUEST_SYNC(18);
+	GUEST_SYNC(TEST_POLL_MASKED);
 
 	/* A timer will wake the masked port we're waiting on, while we poll */
 	p.timeout = 0;
 	xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
 
-	GUEST_SYNC(19);
+	GUEST_SYNC(TEST_POLL_WAKE);
 
 	/* A timer wake an *unmasked* port which should wake us with an
 	 * actual interrupt, while we're polling on a different port. */
@@ -289,17 +319,17 @@ static void guest_code(void)
 
 	guest_wait_for_irq();
 
-	GUEST_SYNC(20);
+	GUEST_SYNC(TEST_TIMER_PAST);
 
 	/* Timer should have fired already */
 	guest_wait_for_irq();
 
-	GUEST_SYNC(21);
+	GUEST_SYNC(TEST_LOCKING_SEND_RACE);
 	/* Racing host ioctls */
 
 	guest_wait_for_irq();
 
-	GUEST_SYNC(22);
+	GUEST_SYNC(TEST_LOCKING_POLL_RACE);
 	/* Racing vmcall against host ioctl */
 
 	ports[0] = 0;
@@ -327,12 +357,12 @@ static void guest_code(void)
 	 * expiring while the event channel was invalid.
 	 */
 	if (!guest_saw_irq) {
-		GUEST_SYNC(23);
+		GUEST_SYNC(TEST_LOCKING_POLL_TIMEOUT);
 		goto wait_for_timer;
 	}
 	guest_saw_irq = false;
 
-	GUEST_SYNC(24);
+	GUEST_SYNC(TEST_DONE);
 }
 
 static int cmp_timespec(struct timespec *a, struct timespec *b)
@@ -602,25 +632,26 @@ int main(int argc, char *argv[])
 					    "runstate times don't add up");
 
 			switch (uc.args[1]) {
-			case 0:
+			case TEST_INJECT_VECTOR:
 				if (verbose)
 					printf("Delivering evtchn upcall\n");
 				evtchn_irq_expected = true;
 				vinfo->evtchn_upcall_pending = 1;
 				break;
 
-			case RUNSTATE_runnable...RUNSTATE_offline:
+			case TEST_RUNSTATE_runnable...TEST_RUNSTATE_offline:
 				TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen");
 				if (!do_runstate_tests)
 					goto done;
 				if (verbose)
 					printf("Testing runstate %s\n", runstate_names[uc.args[1]]);
 				rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
-				rst.u.runstate.state = uc.args[1];
+				rst.u.runstate.state = uc.args[1] + RUNSTATE_runnable -
+					TEST_RUNSTATE_runnable;
 				vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
 				break;
 
-			case 4:
+			case TEST_RUNSTATE_ADJUST:
 				if (verbose)
 					printf("Testing RUNSTATE_ADJUST\n");
 				rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
@@ -635,7 +666,7 @@ int main(int argc, char *argv[])
 				vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
 				break;
 
-			case 5:
+			case TEST_RUNSTATE_DATA:
 				if (verbose)
 					printf("Testing RUNSTATE_DATA\n");
 				rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
@@ -647,7 +678,7 @@ int main(int argc, char *argv[])
 				vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
 				break;
 
-			case 6:
+			case TEST_STEAL_TIME:
 				if (verbose)
 					printf("Testing steal time\n");
 				/* Yield until scheduler delay exceeds target */
@@ -657,7 +688,7 @@ int main(int argc, char *argv[])
 				} while (get_run_delay() < rundelay);
 				break;
 
-			case 7:
+			case TEST_EVTCHN_MASKED:
 				if (!do_eventfd_tests)
 					goto done;
 				if (verbose)
@@ -667,7 +698,7 @@ int main(int argc, char *argv[])
 				alarm(1);
 				break;
 
-			case 8:
+			case TEST_EVTCHN_UNMASKED:
 				if (verbose)
 					printf("Testing unmasked event channel\n");
 				/* Unmask that, but deliver the other one */
@@ -678,7 +709,7 @@ int main(int argc, char *argv[])
 				alarm(1);
 				break;
 
-			case 9:
+			case TEST_EVTCHN_SLOWPATH:
 				TEST_ASSERT(!evtchn_irq_expected,
 					    "Expected event channel IRQ but it didn't happen");
 				shinfo->evtchn_pending[1] = 0;
@@ -691,7 +722,7 @@ int main(int argc, char *argv[])
 				alarm(1);
 				break;
 
-			case 10:
+			case TEST_EVTCHN_SEND_IOCTL:
 				TEST_ASSERT(!evtchn_irq_expected,
 					    "Expected event channel IRQ but it didn't happen");
 				if (!do_evtchn_tests)
@@ -711,7 +742,7 @@ int main(int argc, char *argv[])
 				alarm(1);
 				break;
 
-			case 11:
+			case TEST_EVTCHN_HCALL:
 				TEST_ASSERT(!evtchn_irq_expected,
 					    "Expected event channel IRQ but it didn't happen");
 				shinfo->evtchn_pending[1] = 0;
@@ -722,7 +753,7 @@ int main(int argc, char *argv[])
 				alarm(1);
 				break;
 
-			case 12:
+			case TEST_EVTCHN_HCALL_EVENTFD:
 				TEST_ASSERT(!evtchn_irq_expected,
 					    "Expected event channel IRQ but it didn't happen");
 				shinfo->evtchn_pending[0] = 0;
@@ -733,7 +764,7 @@ int main(int argc, char *argv[])
 				alarm(1);
 				break;
 
-			case 13:
+			case TEST_TIMER_SETUP:
 				TEST_ASSERT(!evtchn_irq_expected,
 					    "Expected event channel IRQ but it didn't happen");
 				shinfo->evtchn_pending[1] = 0;
@@ -742,7 +773,7 @@ int main(int argc, char *argv[])
 					printf("Testing guest oneshot timer\n");
 				break;
 
-			case 14:
+			case TEST_TIMER_WAIT:
 				memset(&tmr, 0, sizeof(tmr));
 				tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER;
 				vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
@@ -756,7 +787,7 @@ int main(int argc, char *argv[])
 				alarm(1);
 				break;
 
-			case 15:
+			case TEST_TIMER_RESTORE:
 				TEST_ASSERT(!evtchn_irq_expected,
 					    "Expected event channel IRQ but it didn't happen");
 				shinfo->evtchn_pending[0] = 0;
@@ -770,7 +801,7 @@ int main(int argc, char *argv[])
 				alarm(1);
 				break;
 
-			case 16:
+			case TEST_POLL_READY:
 				TEST_ASSERT(!evtchn_irq_expected,
 					    "Expected event channel IRQ but it didn't happen");
 
@@ -780,14 +811,14 @@ int main(int argc, char *argv[])
 				alarm(1);
 				break;
 
-			case 17:
+			case TEST_POLL_TIMEOUT:
 				if (verbose)
 					printf("Testing SCHEDOP_poll timeout\n");
 				shinfo->evtchn_pending[0] = 0;
 				alarm(1);
 				break;
 
-			case 18:
+			case TEST_POLL_MASKED:
 				if (verbose)
 					printf("Testing SCHEDOP_poll wake on masked event\n");
 
@@ -796,7 +827,7 @@ int main(int argc, char *argv[])
 				alarm(1);
 				break;
 
-			case 19:
+			case TEST_POLL_WAKE:
 				shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 0;
 				if (verbose)
 					printf("Testing SCHEDOP_poll wake on unmasked event\n");
@@ -813,7 +844,7 @@ int main(int argc, char *argv[])
 				alarm(1);
 				break;
 
-			case 20:
+			case TEST_TIMER_PAST:
 				TEST_ASSERT(!evtchn_irq_expected,
 					    "Expected event channel IRQ but it didn't happen");
 				/* Read timer and check it is no longer pending */
@@ -830,7 +861,7 @@ int main(int argc, char *argv[])
 				alarm(1);
 				break;
 
-			case 21:
+			case TEST_LOCKING_SEND_RACE:
 				TEST_ASSERT(!evtchn_irq_expected,
 					    "Expected event channel IRQ but it didn't happen");
 				alarm(0);
@@ -852,7 +883,7 @@ int main(int argc, char *argv[])
 					__vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &uxe);
 				break;
 
-			case 22:
+			case TEST_LOCKING_POLL_RACE:
 				TEST_ASSERT(!evtchn_irq_expected,
 					    "Expected event channel IRQ but it didn't happen");
 
@@ -867,7 +898,7 @@ int main(int argc, char *argv[])
 				vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
 				break;
 
-			case 23:
+			case TEST_LOCKING_POLL_TIMEOUT:
 				/*
 				 * Optional and possibly repeated sync point.
 				 * Injecting the timer IRQ may fail if the
@@ -889,7 +920,7 @@ int main(int argc, char *argv[])
 							 SHINFO_RACE_TIMEOUT * 1000000000ULL;
 				vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
 				break;
-			case 24:
+			case TEST_DONE:
 				TEST_ASSERT(!evtchn_irq_expected,
 					    "Expected event channel IRQ but it didn't happen");
 
@@ -900,7 +931,7 @@ int main(int argc, char *argv[])
 				TEST_ASSERT(ret == 0, "pthread_join() failed: %s", strerror(ret));
 				goto done;
 
-			case 0x20:
+			case TEST_GUEST_SAW_IRQ:
 				TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ");
 				evtchn_irq_expected = false;
 				break;

From e6239a4ec5c51e4d5ee4d1604f741f490c32054c Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Sat, 4 Feb 2023 02:41:51 +0000
Subject: [PATCH 220/411] KVM: selftests: Add EVTCHNOP_send slow path test to
 xen_shinfo_test

When kvm_xen_evtchn_send() takes the slow path because the shinfo GPC
needs to be revalidated, it used to violate the SRCU vs. kvm->lock
locking rules and potentially cause a deadlock.

Now that lockdep is learning to catch such things, make sure that code
path is exercised by the selftest.

Link: https://lore.kernel.org/all/20230113124606.10221-2-dwmw2@infradead.org
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20230204024151.1373296-5-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/x86_64/xen_shinfo_test.c    | 27 +++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index 1a3d2d51fef01..d42701dfbd83a 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -26,6 +26,9 @@
 #define DUMMY_REGION_GPA	(SHINFO_REGION_GPA + (3 * PAGE_SIZE))
 #define DUMMY_REGION_SLOT	11
 
+#define DUMMY_REGION_GPA_2	(SHINFO_REGION_GPA + (4 * PAGE_SIZE))
+#define DUMMY_REGION_SLOT_2	12
+
 #define SHINFO_ADDR	(SHINFO_REGION_GPA)
 #define VCPU_INFO_ADDR	(SHINFO_REGION_GPA + 0x40)
 #define PVTIME_ADDR	(SHINFO_REGION_GPA + PAGE_SIZE)
@@ -54,6 +57,7 @@ enum {
 	TEST_EVTCHN_SLOWPATH,
 	TEST_EVTCHN_SEND_IOCTL,
 	TEST_EVTCHN_HCALL,
+	TEST_EVTCHN_HCALL_SLOWPATH,
 	TEST_EVTCHN_HCALL_EVENTFD,
 	TEST_TIMER_SETUP,
 	TEST_TIMER_WAIT,
@@ -260,6 +264,16 @@ static void guest_code(void)
 
 	guest_wait_for_irq();
 
+	GUEST_SYNC(TEST_EVTCHN_HCALL_SLOWPATH);
+
+	/*
+	 * Same again, but this time the host has messed with memslots so it
+	 * should take the slow path in kvm_xen_set_evtchn().
+	 */
+	xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
+
+	guest_wait_for_irq();
+
 	GUEST_SYNC(TEST_EVTCHN_HCALL_EVENTFD);
 
 	/* Deliver "outbound" event channel to an eventfd which
@@ -753,6 +767,19 @@ int main(int argc, char *argv[])
 				alarm(1);
 				break;
 
+			case TEST_EVTCHN_HCALL_SLOWPATH:
+				TEST_ASSERT(!evtchn_irq_expected,
+					    "Expected event channel IRQ but it didn't happen");
+				shinfo->evtchn_pending[0] = 0;
+
+				if (verbose)
+					printf("Testing guest EVTCHNOP_send direct to evtchn after memslot change\n");
+				vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+							    DUMMY_REGION_GPA_2, DUMMY_REGION_SLOT_2, 1, 0);
+				evtchn_irq_expected = true;
+				alarm(1);
+				break;
+
 			case TEST_EVTCHN_HCALL_EVENTFD:
 				TEST_ASSERT(!evtchn_irq_expected,
 					    "Expected event channel IRQ but it didn't happen");

From c96f57b08012805da323c6bdf929bab1b88d250c Mon Sep 17 00:00:00 2001
From: Vipin Sharma <vipinsh@google.com>
Date: Fri, 3 Feb 2023 17:45:44 -0800
Subject: [PATCH 221/411] KVM: selftests: Make vCPU exit reason test assertion
 common

Make TEST_ASSERT_KVM_EXIT_REASON() macro and replace all exit reason
test assert statements with it.

No functional changes intended.

Signed-off-by: Vipin Sharma <vipinsh@google.com>
Reviewed-by: David Matlack <dmatlack@google.com>
Message-Id: <20230204014547.583711-2-vipinsh@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../testing/selftests/kvm/aarch64/psci_test.c |  4 +--
 .../testing/selftests/kvm/include/test_util.h |  8 ++++++
 .../kvm/lib/s390x/diag318_test_handler.c      |  3 +--
 .../selftests/kvm/s390x/sync_regs_test.c      | 15 +++--------
 .../selftests/kvm/set_memory_region_test.c    |  6 +----
 tools/testing/selftests/kvm/x86_64/amx_test.c |  8 +-----
 .../kvm/x86_64/cr4_cpuid_sync_test.c          |  8 +-----
 .../testing/selftests/kvm/x86_64/debug_regs.c |  2 +-
 .../selftests/kvm/x86_64/flds_emulation.h     |  5 +---
 .../selftests/kvm/x86_64/hyperv_clock.c       |  7 +-----
 .../selftests/kvm/x86_64/hyperv_evmcs.c       |  8 +-----
 .../selftests/kvm/x86_64/hyperv_features.c    | 14 ++---------
 .../testing/selftests/kvm/x86_64/hyperv_ipi.c |  6 +----
 .../selftests/kvm/x86_64/hyperv_svm_test.c    |  7 +-----
 .../selftests/kvm/x86_64/hyperv_tlb_flush.c   | 14 ++---------
 .../selftests/kvm/x86_64/kvm_clock_test.c     |  5 +---
 .../selftests/kvm/x86_64/kvm_pv_test.c        |  5 +---
 .../selftests/kvm/x86_64/monitor_mwait_test.c |  9 +------
 .../kvm/x86_64/nested_exceptions_test.c       |  5 +---
 .../selftests/kvm/x86_64/platform_info_test.c | 14 +++--------
 .../kvm/x86_64/pmu_event_filter_test.c        |  6 +----
 tools/testing/selftests/kvm/x86_64/smm_test.c |  9 +------
 .../testing/selftests/kvm/x86_64/state_test.c |  8 +-----
 .../selftests/kvm/x86_64/svm_int_ctl_test.c   |  8 +-----
 .../kvm/x86_64/svm_nested_shutdown_test.c     |  7 +-----
 .../kvm/x86_64/svm_nested_soft_inject_test.c  |  6 +----
 .../selftests/kvm/x86_64/svm_vmcall_test.c    |  6 +----
 .../selftests/kvm/x86_64/sync_regs_test.c     | 25 ++++---------------
 .../kvm/x86_64/triple_fault_event_test.c      |  9 ++-----
 .../selftests/kvm/x86_64/tsc_scaling_sync.c   |  6 +----
 .../kvm/x86_64/ucna_injection_test.c          | 22 +++-------------
 .../selftests/kvm/x86_64/userspace_io_test.c  |  6 +----
 .../kvm/x86_64/userspace_msr_exit_test.c      | 22 +++-------------
 .../kvm/x86_64/vmx_apic_access_test.c         | 11 ++------
 .../kvm/x86_64/vmx_close_while_nested_test.c  |  5 +---
 .../selftests/kvm/x86_64/vmx_dirty_log_test.c |  7 +-----
 .../vmx_exception_with_invalid_guest_state.c  |  4 +--
 .../x86_64/vmx_invalid_nested_guest_state.c   |  4 +--
 .../kvm/x86_64/vmx_nested_tsc_scaling_test.c  |  6 +----
 .../kvm/x86_64/vmx_preemption_timer_test.c    |  8 +-----
 .../kvm/x86_64/vmx_tsc_adjust_test.c          |  6 +----
 .../selftests/kvm/x86_64/xapic_ipi_test.c     |  6 +----
 .../selftests/kvm/x86_64/xen_shinfo_test.c    |  7 +-----
 .../selftests/kvm/x86_64/xen_vmcall_test.c    |  5 +---
 44 files changed, 69 insertions(+), 293 deletions(-)

diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c
index cfa36f3879484..9b004905d1d31 100644
--- a/tools/testing/selftests/kvm/aarch64/psci_test.c
+++ b/tools/testing/selftests/kvm/aarch64/psci_test.c
@@ -180,9 +180,7 @@ static void host_test_system_suspend(void)
 
 	enter_guest(source);
 
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
-		    "Unhandled exit reason: %u (%s)",
-		    run->exit_reason, exit_reason_str(run->exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(source, KVM_EXIT_SYSTEM_EVENT);
 	TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SUSPEND,
 		    "Unhandled system event: %u (expected: %u)",
 		    run->system_event.type, KVM_SYSTEM_EVENT_SUSPEND);
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 80d6416f3012e..a13663557e2b0 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -63,6 +63,14 @@ void test_assert(bool exp, const char *exp_str,
 		    #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \
 } while (0)
 
+#define TEST_ASSERT_KVM_EXIT_REASON(vcpu, expected) do {		\
+	__u32 exit_reason = (vcpu)->run->exit_reason;			\
+									\
+	TEST_ASSERT(exit_reason == (expected),				\
+		    "Unexpected exit reason: %u (%s)",			\
+		    exit_reason, exit_reason_str(exit_reason));		\
+} while (0)
+
 #define TEST_FAIL(fmt, ...) do { \
 	TEST_ASSERT(false, fmt, ##__VA_ARGS__); \
 	__builtin_unreachable(); \
diff --git a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c
index cdb7daeed5fd9..2c432fa164f19 100644
--- a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c
+++ b/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c
@@ -35,8 +35,7 @@ static uint64_t diag318_handler(void)
 	vcpu_run(vcpu);
 	run = vcpu->run;
 
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
-		    "DIAGNOSE 0x0318 instruction was not intercepted");
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
 	TEST_ASSERT(run->s390_sieic.icptcode == ICPT_INSTRUCTION,
 		    "Unexpected intercept code: 0x%x", run->s390_sieic.icptcode);
 	TEST_ASSERT((run->s390_sieic.ipa & 0xff00) == IPA0_DIAG,
diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
index 2ddde41c44ba9..636a70ddac1ea 100644
--- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
@@ -126,10 +126,7 @@ void test_req_and_verify_all_valid_regs(struct kvm_vcpu *vcpu)
 	run->kvm_valid_regs = TEST_SYNC_FIELDS;
 	rv = _vcpu_run(vcpu);
 	TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
-		    "Unexpected exit reason: %u (%s)\n",
-		    run->exit_reason,
-		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
 	TEST_ASSERT(run->s390_sieic.icptcode == 4 &&
 		    (run->s390_sieic.ipa >> 8) == 0x83 &&
 		    (run->s390_sieic.ipb >> 16) == 0x501,
@@ -165,10 +162,7 @@ void test_set_and_verify_various_reg_values(struct kvm_vcpu *vcpu)
 
 	rv = _vcpu_run(vcpu);
 	TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
-		    "Unexpected exit reason: %u (%s)\n",
-		    run->exit_reason,
-		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
 	TEST_ASSERT(run->s.regs.gprs[11] == 0xBAD1DEA + 1,
 		    "r11 sync regs value incorrect 0x%llx.",
 		    run->s.regs.gprs[11]);
@@ -200,10 +194,7 @@ void test_clear_kvm_dirty_regs_bits(struct kvm_vcpu *vcpu)
 	run->s.regs.diag318 = 0x4B1D;
 	rv = _vcpu_run(vcpu);
 	TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
-		    "Unexpected exit reason: %u (%s)\n",
-		    run->exit_reason,
-		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
 	TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF,
 		    "r11 sync regs value incorrect 0x%llx.",
 		    run->s.regs.gprs[11]);
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index 2ef1d1b72ce43..a849ce23ca975 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -308,7 +308,6 @@ static void test_delete_memory_region(void)
 static void test_zero_memory_regions(void)
 {
 	struct kvm_vcpu *vcpu;
-	struct kvm_run *run;
 	struct kvm_vm *vm;
 
 	pr_info("Testing KVM_RUN with zero added memory regions\n");
@@ -318,10 +317,7 @@ static void test_zero_memory_regions(void)
 
 	vm_ioctl(vm, KVM_SET_NR_MMU_PAGES, (void *)64ul);
 	vcpu_run(vcpu);
-
-	run = vcpu->run;
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
-		    "Unexpected exit_reason = %u\n", run->exit_reason);
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
 
 	kvm_vm_free(vm);
 }
diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c
index bd72c6eb3b670..b646cdb5055ad 100644
--- a/tools/testing/selftests/kvm/x86_64/amx_test.c
+++ b/tools/testing/selftests/kvm/x86_64/amx_test.c
@@ -241,7 +241,6 @@ int main(int argc, char *argv[])
 	struct kvm_regs regs1, regs2;
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
-	struct kvm_run *run;
 	struct kvm_x86_state *state;
 	int xsave_restore_size;
 	vm_vaddr_t amx_cfg, tiledata, xsavedata;
@@ -268,7 +267,6 @@ int main(int argc, char *argv[])
 		    "KVM should enumerate max XSAVE size when XSAVE is supported");
 	xsave_restore_size = kvm_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE);
 
-	run = vcpu->run;
 	vcpu_regs_get(vcpu, &regs1);
 
 	/* Register #NM handler */
@@ -291,10 +289,7 @@ int main(int argc, char *argv[])
 
 	for (stage = 1; ; stage++) {
 		vcpu_run(vcpu);
-		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-			    "Stage %d: unexpected exit reason: %u (%s),\n",
-			    stage, run->exit_reason,
-			    exit_reason_str(run->exit_reason));
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_ABORT:
@@ -350,7 +345,6 @@ int main(int argc, char *argv[])
 		/* Restore state in a new VM.  */
 		vcpu = vm_recreate_with_one_vcpu(vm);
 		vcpu_load_state(vcpu, state);
-		run = vcpu->run;
 		kvm_x86_state_cleanup(state);
 
 		memset(&regs2, 0, sizeof(regs2));
diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
index 1027a671c7d3a..624dc725e14dc 100644
--- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
+++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
@@ -50,7 +50,6 @@ static void guest_code(void)
 int main(int argc, char *argv[])
 {
 	struct kvm_vcpu *vcpu;
-	struct kvm_run *run;
 	struct kvm_vm *vm;
 	struct kvm_sregs sregs;
 	struct ucall uc;
@@ -58,15 +57,10 @@ int main(int argc, char *argv[])
 	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
 
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-	run = vcpu->run;
 
 	while (1) {
 		vcpu_run(vcpu);
-
-		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-			    "Unexpected exit reason: %u (%s),\n",
-			    run->exit_reason,
-			    exit_reason_str(run->exit_reason));
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_SYNC:
diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c
index 7ef99c3359a0d..f6b295e0b2d2b 100644
--- a/tools/testing/selftests/kvm/x86_64/debug_regs.c
+++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c
@@ -204,7 +204,7 @@ int main(void)
 	vcpu_guest_debug_set(vcpu, &debug);
 
 	vcpu_run(vcpu);
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "KVM_EXIT_IO");
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 	cmd = get_ucall(vcpu, &uc);
 	TEST_ASSERT(cmd == UCALL_DONE, "UCALL_DONE");
 
diff --git a/tools/testing/selftests/kvm/x86_64/flds_emulation.h b/tools/testing/selftests/kvm/x86_64/flds_emulation.h
index e43a7df25f2c5..0a1573d52882b 100644
--- a/tools/testing/selftests/kvm/x86_64/flds_emulation.h
+++ b/tools/testing/selftests/kvm/x86_64/flds_emulation.h
@@ -24,10 +24,7 @@ static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu)
 	uint8_t *insn_bytes;
 	uint64_t flags;
 
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
-		    "Unexpected exit reason: %u (%s)",
-		    run->exit_reason,
-		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
 
 	TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
 		    "Unexpected suberror: %u",
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
index 2ee0af0d449ef..f25749eaa6a84 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
@@ -207,13 +207,11 @@ int main(void)
 {
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
-	struct kvm_run *run;
 	struct ucall uc;
 	vm_vaddr_t tsc_page_gva;
 	int stage;
 
 	vm = vm_create_with_one_vcpu(&vcpu, guest_main);
-	run = vcpu->run;
 
 	vcpu_set_hv_cpuid(vcpu);
 
@@ -227,10 +225,7 @@ int main(void)
 
 	for (stage = 1;; stage++) {
 		vcpu_run(vcpu);
-		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-			    "Stage %d: unexpected exit reason: %u (%s),\n",
-			    stage, run->exit_reason,
-			    exit_reason_str(run->exit_reason));
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_ABORT:
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c
index af29e5776d403..7bde0c4dfdbd1 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c
@@ -237,7 +237,6 @@ int main(int argc, char *argv[])
 
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
-	struct kvm_run *run;
 	struct ucall uc;
 	int stage;
 
@@ -266,13 +265,8 @@ int main(int argc, char *argv[])
 	pr_info("Running L1 which uses EVMCS to run L2\n");
 
 	for (stage = 1;; stage++) {
-		run = vcpu->run;
-
 		vcpu_run(vcpu);
-		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-			    "Stage %d: unexpected exit reason: %u (%s),\n",
-			    stage, run->exit_reason,
-			    exit_reason_str(run->exit_reason));
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_ABORT:
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
index c5e3b39edd079..78606de9385da 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -122,7 +122,6 @@ static void guest_test_msrs_access(void)
 {
 	struct kvm_cpuid2 *prev_cpuid = NULL;
 	struct kvm_vcpu *vcpu;
-	struct kvm_run *run;
 	struct kvm_vm *vm;
 	struct ucall uc;
 	int stage = 0;
@@ -151,8 +150,6 @@ static void guest_test_msrs_access(void)
 		vm_init_descriptor_tables(vm);
 		vcpu_init_descriptor_tables(vcpu);
 
-		run = vcpu->run;
-
 		/* TODO: Make this entire test easier to maintain. */
 		if (stage >= 21)
 			vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_SYNIC2, 0);
@@ -494,9 +491,7 @@ static void guest_test_msrs_access(void)
 			 msr->idx, msr->write ? "write" : "read");
 
 		vcpu_run(vcpu);
-		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-			    "unexpected exit reason: %u (%s)",
-			    run->exit_reason, exit_reason_str(run->exit_reason));
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_ABORT:
@@ -518,7 +513,6 @@ static void guest_test_hcalls_access(void)
 {
 	struct kvm_cpuid2 *prev_cpuid = NULL;
 	struct kvm_vcpu *vcpu;
-	struct kvm_run *run;
 	struct kvm_vm *vm;
 	struct ucall uc;
 	int stage = 0;
@@ -550,8 +544,6 @@ static void guest_test_hcalls_access(void)
 			vcpu_init_cpuid(vcpu, prev_cpuid);
 		}
 
-		run = vcpu->run;
-
 		switch (stage) {
 		case 0:
 			vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE);
@@ -669,9 +661,7 @@ static void guest_test_hcalls_access(void)
 		pr_debug("Stage %d: testing hcall: 0x%lx\n", stage, hcall->control);
 
 		vcpu_run(vcpu);
-		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-			    "unexpected exit reason: %u (%s)",
-			    run->exit_reason, exit_reason_str(run->exit_reason));
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_ABORT:
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c
index 0cbb0e646ef8d..6feb5ddb031da 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c
@@ -243,7 +243,6 @@ int main(int argc, char *argv[])
 {
 	struct kvm_vm *vm;
 	struct kvm_vcpu *vcpu[3];
-	unsigned int exit_reason;
 	vm_vaddr_t hcall_page;
 	pthread_t threads[2];
 	int stage = 1, r;
@@ -283,10 +282,7 @@ int main(int argc, char *argv[])
 	while (true) {
 		vcpu_run(vcpu[0]);
 
-		exit_reason = vcpu[0]->run->exit_reason;
-		TEST_ASSERT(exit_reason == KVM_EXIT_IO,
-			    "unexpected exit reason: %u (%s)",
-			    exit_reason, exit_reason_str(exit_reason));
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu[0], KVM_EXIT_IO);
 
 		switch (get_ucall(vcpu[0], &uc)) {
 		case UCALL_SYNC:
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
index 68a7d354ea070..e446d76d1c0c3 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
@@ -156,7 +156,6 @@ int main(int argc, char *argv[])
 	vm_vaddr_t hcall_page;
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
-	struct kvm_run *run;
 	struct ucall uc;
 	int stage;
 
@@ -165,7 +164,6 @@ int main(int argc, char *argv[])
 	/* Create VM */
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
 	vcpu_set_hv_cpuid(vcpu);
-	run = vcpu->run;
 	vcpu_alloc_svm(vm, &nested_gva);
 	vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
 
@@ -177,10 +175,7 @@ int main(int argc, char *argv[])
 
 	for (stage = 1;; stage++) {
 		vcpu_run(vcpu);
-		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-			    "Stage %d: unexpected exit reason: %u (%s),\n",
-			    stage, run->exit_reason,
-			    exit_reason_str(run->exit_reason));
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_ABORT:
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
index 68f97ff720a74..4758b6ef5618e 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
@@ -542,18 +542,13 @@ static void *vcpu_thread(void *arg)
 	struct ucall uc;
 	int old;
 	int r;
-	unsigned int exit_reason;
 
 	r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
 	TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
 		    vcpu->id, r);
 
 	vcpu_run(vcpu);
-	exit_reason = vcpu->run->exit_reason;
-
-	TEST_ASSERT(exit_reason == KVM_EXIT_IO,
-		    "vCPU %u exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO",
-		    vcpu->id, exit_reason, exit_reason_str(exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 	switch (get_ucall(vcpu, &uc)) {
 	case UCALL_ABORT:
@@ -587,7 +582,6 @@ int main(int argc, char *argv[])
 {
 	struct kvm_vm *vm;
 	struct kvm_vcpu *vcpu[3];
-	unsigned int exit_reason;
 	pthread_t threads[2];
 	vm_vaddr_t test_data_page, gva;
 	vm_paddr_t gpa;
@@ -657,11 +651,7 @@ int main(int argc, char *argv[])
 
 	while (true) {
 		vcpu_run(vcpu[0]);
-		exit_reason = vcpu[0]->run->exit_reason;
-
-		TEST_ASSERT(exit_reason == KVM_EXIT_IO,
-			    "unexpected exit reason: %u (%s)",
-			    exit_reason, exit_reason_str(exit_reason));
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu[0], KVM_EXIT_IO);
 
 		switch (get_ucall(vcpu[0], &uc)) {
 		case UCALL_SYNC:
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c
index 813ce282cf561..1778704360a66 100644
--- a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c
@@ -105,7 +105,6 @@ static void setup_clock(struct kvm_vm *vm, struct test_case *test_case)
 static void enter_guest(struct kvm_vcpu *vcpu)
 {
 	struct kvm_clock_data start, end;
-	struct kvm_run *run = vcpu->run;
 	struct kvm_vm *vm = vcpu->vm;
 	struct ucall uc;
 	int i;
@@ -118,9 +117,7 @@ static void enter_guest(struct kvm_vcpu *vcpu)
 		vcpu_run(vcpu);
 		vm_ioctl(vm, KVM_GET_CLOCK, &end);
 
-		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-			    "unexpected exit reason: %u (%s)",
-			    run->exit_reason, exit_reason_str(run->exit_reason));
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_SYNC:
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
index 619655c1a1f39..f774a9e62858f 100644
--- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
@@ -111,14 +111,11 @@ static void pr_hcall(struct ucall *uc)
 
 static void enter_guest(struct kvm_vcpu *vcpu)
 {
-	struct kvm_run *run = vcpu->run;
 	struct ucall uc;
 
 	while (true) {
 		vcpu_run(vcpu);
-		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-			    "unexpected exit reason: %u (%s)",
-			    run->exit_reason, exit_reason_str(run->exit_reason));
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_PR_MSR:
diff --git a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c
index 016070cad36ed..72812644d7f5e 100644
--- a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c
+++ b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c
@@ -64,7 +64,6 @@ int main(int argc, char *argv[])
 {
 	uint64_t disabled_quirks;
 	struct kvm_vcpu *vcpu;
-	struct kvm_run *run;
 	struct kvm_vm *vm;
 	struct ucall uc;
 	int testcase;
@@ -74,18 +73,12 @@ int main(int argc, char *argv[])
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
 	vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
 
-	run = vcpu->run;
-
 	vm_init_descriptor_tables(vm);
 	vcpu_init_descriptor_tables(vcpu);
 
 	while (1) {
 		vcpu_run(vcpu);
-
-		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-			    "Unexpected exit reason: %u (%s),\n",
-			    run->exit_reason,
-			    exit_reason_str(run->exit_reason));
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_SYNC:
diff --git a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c
index ac33835f78f45..6502aa23c2f84 100644
--- a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c
+++ b/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c
@@ -166,12 +166,9 @@ static void __attribute__((__flatten__)) l1_guest_code(void *test_data)
 
 static void assert_ucall_vector(struct kvm_vcpu *vcpu, int vector)
 {
-	struct kvm_run *run = vcpu->run;
 	struct ucall uc;
 
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-		    "Unexpected exit reason: %u (%s),\n",
-		    run->exit_reason, exit_reason_str(run->exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 	switch (get_ucall(vcpu, &uc)) {
 	case UCALL_SYNC:
diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
index 310a104d94f06..c9a07963d68aa 100644
--- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c
+++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
@@ -36,15 +36,12 @@ static void guest_code(void)
 
 static void test_msr_platform_info_enabled(struct kvm_vcpu *vcpu)
 {
-	struct kvm_run *run = vcpu->run;
 	struct ucall uc;
 
 	vm_enable_cap(vcpu->vm, KVM_CAP_MSR_PLATFORM_INFO, true);
 	vcpu_run(vcpu);
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-			"Exit_reason other than KVM_EXIT_IO: %u (%s),\n",
-			run->exit_reason,
-			exit_reason_str(run->exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
 	get_ucall(vcpu, &uc);
 	TEST_ASSERT(uc.cmd == UCALL_SYNC,
 			"Received ucall other than UCALL_SYNC: %lu\n", uc.cmd);
@@ -56,14 +53,9 @@ static void test_msr_platform_info_enabled(struct kvm_vcpu *vcpu)
 
 static void test_msr_platform_info_disabled(struct kvm_vcpu *vcpu)
 {
-	struct kvm_run *run = vcpu->run;
-
 	vm_enable_cap(vcpu->vm, KVM_CAP_MSR_PLATFORM_INFO, false);
 	vcpu_run(vcpu);
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
-			"Exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n",
-			run->exit_reason,
-			exit_reason_str(run->exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
 }
 
 int main(int argc, char *argv[])
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
index bad7ef8c5b92e..2feef25ba6913 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
@@ -151,14 +151,10 @@ static void amd_guest_code(void)
  */
 static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu)
 {
-	struct kvm_run *run = vcpu->run;
 	struct ucall uc;
 
 	vcpu_run(vcpu);
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-		    "Exit_reason other than KVM_EXIT_IO: %u (%s)\n",
-		    run->exit_reason,
-		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 	get_ucall(vcpu, &uc);
 	TEST_ASSERT(uc.cmd == UCALL_SYNC,
 		    "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index cb38a478e1f62..e18b86666e1fc 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -133,7 +133,6 @@ int main(int argc, char *argv[])
 	struct kvm_vcpu *vcpu;
 	struct kvm_regs regs;
 	struct kvm_vm *vm;
-	struct kvm_run *run;
 	struct kvm_x86_state *state;
 	int stage, stage_reported;
 
@@ -142,8 +141,6 @@ int main(int argc, char *argv[])
 	/* Create VM */
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
 
-	run = vcpu->run;
-
 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA,
 				    SMRAM_MEMSLOT, SMRAM_PAGES, 0);
 	TEST_ASSERT(vm_phy_pages_alloc(vm, SMRAM_PAGES, SMRAM_GPA, SMRAM_MEMSLOT)
@@ -169,10 +166,7 @@ int main(int argc, char *argv[])
 
 	for (stage = 1;; stage++) {
 		vcpu_run(vcpu);
-		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-			    "Stage %d: unexpected exit reason: %u (%s),\n",
-			    stage, run->exit_reason,
-			    exit_reason_str(run->exit_reason));
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 		memset(&regs, 0, sizeof(regs));
 		vcpu_regs_get(vcpu, &regs);
@@ -208,7 +202,6 @@ int main(int argc, char *argv[])
 
 		vcpu = vm_recreate_with_one_vcpu(vm);
 		vcpu_load_state(vcpu, state);
-		run = vcpu->run;
 		kvm_x86_state_cleanup(state);
 	}
 
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
index ea578971fb9f9..4c4925a8ab452 100644
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -158,14 +158,12 @@ int main(int argc, char *argv[])
 	struct kvm_regs regs1, regs2;
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
-	struct kvm_run *run;
 	struct kvm_x86_state *state;
 	struct ucall uc;
 	int stage;
 
 	/* Create VM */
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-	run = vcpu->run;
 
 	vcpu_regs_get(vcpu, &regs1);
 
@@ -183,10 +181,7 @@ int main(int argc, char *argv[])
 
 	for (stage = 1;; stage++) {
 		vcpu_run(vcpu);
-		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-			    "Stage %d: unexpected exit reason: %u (%s),\n",
-			    stage, run->exit_reason,
-			    exit_reason_str(run->exit_reason));
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_ABORT:
@@ -214,7 +209,6 @@ int main(int argc, char *argv[])
 		/* Restore state in a new VM.  */
 		vcpu = vm_recreate_with_one_vcpu(vm);
 		vcpu_load_state(vcpu, state);
-		run = vcpu->run;
 		kvm_x86_state_cleanup(state);
 
 		memset(&regs2, 0, sizeof(regs2));
diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
index 4a07ba227b995..32bef39bec217 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
+++ b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
@@ -85,7 +85,6 @@ static void l1_guest_code(struct svm_test_data *svm)
 int main(int argc, char *argv[])
 {
 	struct kvm_vcpu *vcpu;
-	struct kvm_run *run;
 	vm_vaddr_t svm_gva;
 	struct kvm_vm *vm;
 	struct ucall uc;
@@ -103,13 +102,8 @@ int main(int argc, char *argv[])
 	vcpu_alloc_svm(vm, &svm_gva);
 	vcpu_args_set(vcpu, 1, svm_gva);
 
-	run = vcpu->run;
-
 	vcpu_run(vcpu);
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-		    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
-		    run->exit_reason,
-		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 	switch (get_ucall(vcpu, &uc)) {
 	case UCALL_ABORT:
diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c
index e73fcdef47bbe..d6fcdcc3af314 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c
+++ b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c
@@ -42,7 +42,6 @@ static void l1_guest_code(struct svm_test_data *svm, struct idt_entry *idt)
 int main(int argc, char *argv[])
 {
 	struct kvm_vcpu *vcpu;
-	struct kvm_run *run;
 	vm_vaddr_t svm_gva;
 	struct kvm_vm *vm;
 
@@ -55,13 +54,9 @@ int main(int argc, char *argv[])
 	vcpu_alloc_svm(vm, &svm_gva);
 
 	vcpu_args_set(vcpu, 2, svm_gva, vm->idt);
-	run = vcpu->run;
 
 	vcpu_run(vcpu);
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
-		    "Got exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n",
-		    run->exit_reason,
-		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
 
 	kvm_vm_free(vm);
 }
diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
index b34980d45648a..4e2479716da6e 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
+++ b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
@@ -176,16 +176,12 @@ static void run_test(bool is_nmi)
 	memset(&debug, 0, sizeof(debug));
 	vcpu_guest_debug_set(vcpu, &debug);
 
-	struct kvm_run *run = vcpu->run;
 	struct ucall uc;
 
 	alarm(2);
 	vcpu_run(vcpu);
 	alarm(0);
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-		    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
-		    run->exit_reason,
-		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 	switch (get_ucall(vcpu, &uc)) {
 	case UCALL_ABORT:
diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
index c3ac45df74832..8a62cca28cfbb 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
@@ -47,14 +47,10 @@ int main(int argc, char *argv[])
 	vcpu_args_set(vcpu, 1, svm_gva);
 
 	for (;;) {
-		volatile struct kvm_run *run = vcpu->run;
 		struct ucall uc;
 
 		vcpu_run(vcpu);
-		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
-			    run->exit_reason,
-			    exit_reason_str(run->exit_reason));
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_ABORT:
diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
index d2f9b5bdfab20..2da89fdc2471a 100644
--- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
@@ -132,10 +132,7 @@ int main(int argc, char *argv[])
 	/* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
 	run->kvm_valid_regs = TEST_SYNC_FIELDS;
 	rv = _vcpu_run(vcpu);
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-		    "Unexpected exit reason: %u (%s),\n",
-		    run->exit_reason,
-		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 	vcpu_regs_get(vcpu, &regs);
 	compare_regs(&regs, &run->s.regs.regs);
@@ -154,10 +151,7 @@ int main(int argc, char *argv[])
 	run->kvm_valid_regs = TEST_SYNC_FIELDS;
 	run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
 	rv = _vcpu_run(vcpu);
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-		    "Unexpected exit reason: %u (%s),\n",
-		    run->exit_reason,
-		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 	TEST_ASSERT(run->s.regs.regs.rbx == 0xBAD1DEA + 1,
 		    "rbx sync regs value incorrect 0x%llx.",
 		    run->s.regs.regs.rbx);
@@ -181,10 +175,7 @@ int main(int argc, char *argv[])
 	run->kvm_dirty_regs = 0;
 	run->s.regs.regs.rbx = 0xDEADBEEF;
 	rv = _vcpu_run(vcpu);
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-		    "Unexpected exit reason: %u (%s),\n",
-		    run->exit_reason,
-		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 	TEST_ASSERT(run->s.regs.regs.rbx != 0xDEADBEEF,
 		    "rbx sync regs value incorrect 0x%llx.",
 		    run->s.regs.regs.rbx);
@@ -199,10 +190,7 @@ int main(int argc, char *argv[])
 	regs.rbx = 0xBAC0;
 	vcpu_regs_set(vcpu, &regs);
 	rv = _vcpu_run(vcpu);
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-		    "Unexpected exit reason: %u (%s),\n",
-		    run->exit_reason,
-		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 	TEST_ASSERT(run->s.regs.regs.rbx == 0xAAAA,
 		    "rbx sync regs value incorrect 0x%llx.",
 		    run->s.regs.regs.rbx);
@@ -219,10 +207,7 @@ int main(int argc, char *argv[])
 	run->kvm_dirty_regs = TEST_SYNC_FIELDS;
 	run->s.regs.regs.rbx = 0xBBBB;
 	rv = _vcpu_run(vcpu);
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-		    "Unexpected exit reason: %u (%s),\n",
-		    run->exit_reason,
-		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 	TEST_ASSERT(run->s.regs.regs.rbx == 0xBBBB,
 		    "rbx sync regs value incorrect 0x%llx.",
 		    run->s.regs.regs.rbx);
diff --git a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c
index ead5d878a71c4..56306a19144a7 100644
--- a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c
+++ b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c
@@ -89,9 +89,7 @@ int main(void)
 	run = vcpu->run;
 	vcpu_run(vcpu);
 
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-		    "Expected KVM_EXIT_IO, got: %u (%s)\n",
-		    run->exit_reason, exit_reason_str(run->exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 	TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
 		    "Expected IN from port %d from L2, got port %d",
 		    ARBITRARY_IO_PORT, run->io.port);
@@ -111,10 +109,7 @@ int main(void)
 
 
 	if (has_svm) {
-		TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
-			    "Got exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n",
-			    run->exit_reason,
-			    exit_reason_str(run->exit_reason));
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
 	} else {
 		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_DONE:
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
index 47139aab74084..5b669818e39aa 100644
--- a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
+++ b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
@@ -64,14 +64,10 @@ static void *run_vcpu(void *_cpu_nr)
 	pthread_spin_unlock(&create_lock);
 
 	for (;;) {
-		volatile struct kvm_run *run = vcpu->run;
                 struct ucall uc;
 
 		vcpu_run(vcpu);
-                TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-                            "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
-                            run->exit_reason,
-                            exit_reason_str(run->exit_reason));
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 		switch (get_ucall(vcpu, &uc)) {
                 case UCALL_DONE:
diff --git a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
index a897c7fd8abe4..85f34ca7e49e5 100644
--- a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
+++ b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
@@ -137,15 +137,11 @@ static void guest_gp_handler(struct ex_regs *regs)
 
 static void run_vcpu_expect_gp(struct kvm_vcpu *vcpu)
 {
-	unsigned int exit_reason;
 	struct ucall uc;
 
 	vcpu_run(vcpu);
 
-	exit_reason = vcpu->run->exit_reason;
-	TEST_ASSERT(exit_reason == KVM_EXIT_IO,
-		    "exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO",
-		    exit_reason, exit_reason_str(exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 	TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_SYNC,
 		    "Expect UCALL_SYNC\n");
 	TEST_ASSERT(uc.args[1] == SYNC_GP, "#GP is expected.");
@@ -182,7 +178,6 @@ static void *run_ucna_injection(void *arg)
 	struct ucall uc;
 	int old;
 	int r;
-	unsigned int exit_reason;
 
 	r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
 	TEST_ASSERT(r == 0,
@@ -191,10 +186,7 @@ static void *run_ucna_injection(void *arg)
 
 	vcpu_run(params->vcpu);
 
-	exit_reason = params->vcpu->run->exit_reason;
-	TEST_ASSERT(exit_reason == KVM_EXIT_IO,
-		    "unexpected exit reason %u-%s, expected KVM_EXIT_IO",
-		    exit_reason, exit_reason_str(exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
 	TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
 		    "Expect UCALL_SYNC\n");
 	TEST_ASSERT(uc.args[1] == SYNC_FIRST_UCNA, "Injecting first UCNA.");
@@ -204,10 +196,7 @@ static void *run_ucna_injection(void *arg)
 	inject_ucna(params->vcpu, FIRST_UCNA_ADDR);
 	vcpu_run(params->vcpu);
 
-	exit_reason = params->vcpu->run->exit_reason;
-	TEST_ASSERT(exit_reason == KVM_EXIT_IO,
-		    "unexpected exit reason %u-%s, expected KVM_EXIT_IO",
-		    exit_reason, exit_reason_str(exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
 	TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
 		    "Expect UCALL_SYNC\n");
 	TEST_ASSERT(uc.args[1] == SYNC_SECOND_UCNA, "Injecting second UCNA.");
@@ -217,10 +206,7 @@ static void *run_ucna_injection(void *arg)
 	inject_ucna(params->vcpu, SECOND_UCNA_ADDR);
 	vcpu_run(params->vcpu);
 
-	exit_reason = params->vcpu->run->exit_reason;
-	TEST_ASSERT(exit_reason == KVM_EXIT_IO,
-		    "unexpected exit reason %u-%s, expected KVM_EXIT_IO",
-		    exit_reason, exit_reason_str(exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
 	if (get_ucall(params->vcpu, &uc) == UCALL_ABORT) {
 		TEST_ASSERT(false, "vCPU assertion failure: %s.\n",
 			    (const char *)uc.args[0]);
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
index 91076c9787b41..0cb51fa42773b 100644
--- a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
+++ b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
@@ -63,11 +63,7 @@ int main(int argc, char *argv[])
 
 	while (1) {
 		vcpu_run(vcpu);
-
-		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-			    "Unexpected exit reason: %u (%s),\n",
-			    run->exit_reason,
-			    exit_reason_str(run->exit_reason));
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 		if (get_ucall(vcpu, &uc))
 			break;
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
index 25fa55344a10e..3533dc2fbfeeb 100644
--- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
+++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
@@ -410,10 +410,7 @@ static void process_rdmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
 
 	check_for_guest_assert(vcpu);
 
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_X86_RDMSR,
-		    "Unexpected exit reason: %u (%s),\n",
-		    run->exit_reason,
-		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_RDMSR);
 	TEST_ASSERT(run->msr.index == msr_index,
 			"Unexpected msr (0x%04x), expected 0x%04x",
 			run->msr.index, msr_index);
@@ -445,10 +442,7 @@ static void process_wrmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
 
 	check_for_guest_assert(vcpu);
 
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_X86_WRMSR,
-		    "Unexpected exit reason: %u (%s),\n",
-		    run->exit_reason,
-		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_WRMSR);
 	TEST_ASSERT(run->msr.index == msr_index,
 			"Unexpected msr (0x%04x), expected 0x%04x",
 			run->msr.index, msr_index);
@@ -472,15 +466,11 @@ static void process_wrmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
 
 static void process_ucall_done(struct kvm_vcpu *vcpu)
 {
-	struct kvm_run *run = vcpu->run;
 	struct ucall uc;
 
 	check_for_guest_assert(vcpu);
 
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-		    "Unexpected exit reason: %u (%s)",
-		    run->exit_reason,
-		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 	TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_DONE,
 		    "Unexpected ucall command: %lu, expected UCALL_DONE (%d)",
@@ -489,15 +479,11 @@ static void process_ucall_done(struct kvm_vcpu *vcpu)
 
 static uint64_t process_ucall(struct kvm_vcpu *vcpu)
 {
-	struct kvm_run *run = vcpu->run;
 	struct ucall uc = {};
 
 	check_for_guest_assert(vcpu);
 
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-		    "Unexpected exit reason: %u (%s)",
-		    run->exit_reason,
-		    exit_reason_str(run->exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 	switch (get_ucall(vcpu, &uc)) {
 	case UCALL_SYNC:
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
index 5abecf06329ea..2bed5fb3a0d6e 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
@@ -96,21 +96,14 @@ int main(int argc, char *argv[])
 
 		vcpu_run(vcpu);
 		if (apic_access_addr == high_gpa) {
-			TEST_ASSERT(run->exit_reason ==
-				    KVM_EXIT_INTERNAL_ERROR,
-				    "Got exit reason other than KVM_EXIT_INTERNAL_ERROR: %u (%s)\n",
-				    run->exit_reason,
-				    exit_reason_str(run->exit_reason));
+			TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
 			TEST_ASSERT(run->internal.suberror ==
 				    KVM_INTERNAL_ERROR_EMULATION,
 				    "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u\n",
 				    run->internal.suberror);
 			break;
 		}
-		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
-			    run->exit_reason,
-			    exit_reason_str(run->exit_reason));
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_ABORT:
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
index d79651b027402..dad988351493e 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
@@ -64,10 +64,7 @@ int main(int argc, char *argv[])
 		struct ucall uc;
 
 		vcpu_run(vcpu);
-		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
-			    run->exit_reason,
-			    exit_reason_str(run->exit_reason));
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 		if (run->io.port == PORT_L0_EXIT)
 			break;
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
index f0456fb031b1e..e4ad5fef52ffc 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
@@ -73,7 +73,6 @@ int main(int argc, char *argv[])
 
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
-	struct kvm_run *run;
 	struct ucall uc;
 	bool done = false;
 
@@ -84,7 +83,6 @@ int main(int argc, char *argv[])
 	vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
 	vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
 	vcpu_args_set(vcpu, 1, vmx_pages_gva);
-	run = vcpu->run;
 
 	/* Add an extra memory slot for testing dirty logging */
 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
@@ -117,10 +115,7 @@ int main(int argc, char *argv[])
 	while (!done) {
 		memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096);
 		vcpu_run(vcpu);
-		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-			    "Unexpected exit reason: %u (%s),\n",
-			    run->exit_reason,
-			    exit_reason_str(run->exit_reason));
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_ABORT:
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
index ccdfa5dc1a4dc..be0bdb8c6f78c 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
@@ -26,9 +26,7 @@ static void __run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu)
 
 	vcpu_run(vcpu);
 
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
-		    "Expected KVM_EXIT_INTERNAL_ERROR, got %d (%s)\n",
-		    run->exit_reason, exit_reason_str(run->exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
 	TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
 		    "Expected emulation failure, got %d\n",
 		    run->emulation_failure.suberror);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c
index 6bfb4bb471ca2..a100ee5f00093 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c
@@ -74,9 +74,7 @@ int main(int argc, char *argv[])
 	 * The first exit to L0 userspace should be an I/O access from L2.
 	 * Running L1 should launch L2 without triggering an exit to userspace.
 	 */
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-		    "Expected KVM_EXIT_IO, got: %u (%s)\n",
-		    run->exit_reason, exit_reason_str(run->exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 	TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
 		    "Expected IN from port %d from L2, got port %d",
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
index 465a9434d61c2..d427eb146bc58 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
@@ -183,14 +183,10 @@ int main(int argc, char *argv[])
 	vcpu_ioctl(vcpu, KVM_SET_TSC_KHZ, (void *) (tsc_khz / l1_scale_factor));
 
 	for (;;) {
-		volatile struct kvm_run *run = vcpu->run;
 		struct ucall uc;
 
 		vcpu_run(vcpu);
-		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
-			    run->exit_reason,
-			    exit_reason_str(run->exit_reason));
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_ABORT:
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
index 0efdc05969a56..affc328001584 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
@@ -157,7 +157,6 @@ int main(int argc, char *argv[])
 
 	struct kvm_regs regs1, regs2;
 	struct kvm_vm *vm;
-	struct kvm_run *run;
 	struct kvm_vcpu *vcpu;
 	struct kvm_x86_state *state;
 	struct ucall uc;
@@ -173,7 +172,6 @@ int main(int argc, char *argv[])
 
 	/* Create VM */
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-	run = vcpu->run;
 
 	vcpu_regs_get(vcpu, &regs1);
 
@@ -182,10 +180,7 @@ int main(int argc, char *argv[])
 
 	for (stage = 1;; stage++) {
 		vcpu_run(vcpu);
-		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-			    "Stage %d: unexpected exit reason: %u (%s),\n",
-			    stage, run->exit_reason,
-			    exit_reason_str(run->exit_reason));
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_ABORT:
@@ -237,7 +232,6 @@ int main(int argc, char *argv[])
 		/* Restore state in a new VM.  */
 		vcpu = vm_recreate_with_one_vcpu(vm);
 		vcpu_load_state(vcpu, state);
-		run = vcpu->run;
 		kvm_x86_state_cleanup(state);
 
 		memset(&regs2, 0, sizeof(regs2));
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
index ff8ecdf32ae07..2ceb5c78c4427 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
@@ -131,14 +131,10 @@ int main(int argc, char *argv[])
 	vcpu_args_set(vcpu, 1, vmx_pages_gva);
 
 	for (;;) {
-		volatile struct kvm_run *run = vcpu->run;
 		struct ucall uc;
 
 		vcpu_run(vcpu);
-		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
-			    run->exit_reason,
-			    exit_reason_str(run->exit_reason));
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_ABORT:
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
index 3d272d7f961ef..67ac2a3292efd 100644
--- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
@@ -198,7 +198,6 @@ static void *vcpu_thread(void *arg)
 	struct ucall uc;
 	int old;
 	int r;
-	unsigned int exit_reason;
 
 	r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
 	TEST_ASSERT(r == 0,
@@ -207,11 +206,8 @@ static void *vcpu_thread(void *arg)
 
 	fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id);
 	vcpu_run(vcpu);
-	exit_reason = vcpu->run->exit_reason;
 
-	TEST_ASSERT(exit_reason == KVM_EXIT_IO,
-		    "vCPU %u exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO",
-		    vcpu->id, exit_reason, exit_reason_str(exit_reason));
+	TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 	if (get_ucall(vcpu, &uc) == UCALL_ABORT) {
 		TEST_ASSERT(false,
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index d42701dfbd83a..05898ad9f4d99 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -622,15 +622,10 @@ int main(int argc, char *argv[])
 	bool evtchn_irq_expected = false;
 
 	for (;;) {
-		volatile struct kvm_run *run = vcpu->run;
 		struct ucall uc;
 
 		vcpu_run(vcpu);
-
-		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
-			    run->exit_reason,
-			    exit_reason_str(run->exit_reason));
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_ABORT:
diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
index 88914d48c65ed..c94cde3b523f2 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
@@ -122,10 +122,7 @@ int main(int argc, char *argv[])
 			continue;
 		}
 
-		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-			    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
-			    run->exit_reason,
-			    exit_reason_str(run->exit_reason));
+		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
 
 		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_ABORT:

From 6f974494b8077bb1a2a10fe33f62c143f246f102 Mon Sep 17 00:00:00 2001
From: Vipin Sharma <vipinsh@google.com>
Date: Fri, 3 Feb 2023 17:45:45 -0800
Subject: [PATCH 222/411] KVM: selftests: Print expected and actual exit reason
 in KVM exit reason assert

Print what KVM exit reason a test was expecting and what it actually
got int TEST_ASSERT_KVM_EXIT_REASON().

Signed-off-by: Vipin Sharma <vipinsh@google.com>
Message-Id: <20230204014547.583711-3-vipinsh@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/include/test_util.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index a13663557e2b0..a6e9f215ce700 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -67,7 +67,8 @@ void test_assert(bool exp, const char *exp_str,
 	__u32 exit_reason = (vcpu)->run->exit_reason;			\
 									\
 	TEST_ASSERT(exit_reason == (expected),				\
-		    "Unexpected exit reason: %u (%s)",			\
+		    "Wanted KVM exit reason: %u (%s), got: %u (%s)",    \
+		    (expected), exit_reason_str((expected)),		\
 		    exit_reason, exit_reason_str(exit_reason));		\
 } while (0)
 

From 1b3d660e5d7b8b408a2b0988de65672199ebfaf2 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 3 Feb 2023 17:45:46 -0800
Subject: [PATCH 223/411] KVM: selftests: Add macro to generate KVM exit reason
 strings

Add and use a macro to generate the KVM exit reason strings array
instead of relying on developers to correctly copy+paste+edit each
string.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20230204014547.583711-4-vipinsh@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/lib/kvm_util.c | 54 +++++++++++-----------
 1 file changed, 28 insertions(+), 26 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 3ea24a5f4c43e..942f092a8e7d3 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -1815,38 +1815,40 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 		vcpu_dump(stream, vcpu, indent + 2);
 }
 
+#define KVM_EXIT_STRING(x) {KVM_EXIT_##x, #x}
+
 /* Known KVM exit reasons */
 static struct exit_reason {
 	unsigned int reason;
 	const char *name;
 } exit_reasons_known[] = {
-	{KVM_EXIT_UNKNOWN, "UNKNOWN"},
-	{KVM_EXIT_EXCEPTION, "EXCEPTION"},
-	{KVM_EXIT_IO, "IO"},
-	{KVM_EXIT_HYPERCALL, "HYPERCALL"},
-	{KVM_EXIT_DEBUG, "DEBUG"},
-	{KVM_EXIT_HLT, "HLT"},
-	{KVM_EXIT_MMIO, "MMIO"},
-	{KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"},
-	{KVM_EXIT_SHUTDOWN, "SHUTDOWN"},
-	{KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"},
-	{KVM_EXIT_INTR, "INTR"},
-	{KVM_EXIT_SET_TPR, "SET_TPR"},
-	{KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"},
-	{KVM_EXIT_S390_SIEIC, "S390_SIEIC"},
-	{KVM_EXIT_S390_RESET, "S390_RESET"},
-	{KVM_EXIT_DCR, "DCR"},
-	{KVM_EXIT_NMI, "NMI"},
-	{KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
-	{KVM_EXIT_OSI, "OSI"},
-	{KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
-	{KVM_EXIT_DIRTY_RING_FULL, "DIRTY_RING_FULL"},
-	{KVM_EXIT_X86_RDMSR, "RDMSR"},
-	{KVM_EXIT_X86_WRMSR, "WRMSR"},
-	{KVM_EXIT_XEN, "XEN"},
-	{KVM_EXIT_HYPERV, "HYPERV"},
+	KVM_EXIT_STRING(UNKNOWN),
+	KVM_EXIT_STRING(EXCEPTION),
+	KVM_EXIT_STRING(IO),
+	KVM_EXIT_STRING(HYPERCALL),
+	KVM_EXIT_STRING(DEBUG),
+	KVM_EXIT_STRING(HLT),
+	KVM_EXIT_STRING(MMIO),
+	KVM_EXIT_STRING(IRQ_WINDOW_OPEN),
+	KVM_EXIT_STRING(SHUTDOWN),
+	KVM_EXIT_STRING(FAIL_ENTRY),
+	KVM_EXIT_STRING(INTR),
+	KVM_EXIT_STRING(SET_TPR),
+	KVM_EXIT_STRING(TPR_ACCESS),
+	KVM_EXIT_STRING(S390_SIEIC),
+	KVM_EXIT_STRING(S390_RESET),
+	KVM_EXIT_STRING(DCR),
+	KVM_EXIT_STRING(NMI),
+	KVM_EXIT_STRING(INTERNAL_ERROR),
+	KVM_EXIT_STRING(OSI),
+	KVM_EXIT_STRING(PAPR_HCALL),
+	KVM_EXIT_STRING(DIRTY_RING_FULL),
+	KVM_EXIT_STRING(X86_RDMSR),
+	KVM_EXIT_STRING(X86_WRMSR),
+	KVM_EXIT_STRING(XEN),
+	KVM_EXIT_STRING(HYPERV),
 #ifdef KVM_EXIT_MEMORY_NOT_PRESENT
-	{KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
+	KVM_EXIT_STRING(MEMORY_NOT_PRESENT),
 #endif
 };
 

From f3e707413dbe3920a972d0c2b51175180e7de36b Mon Sep 17 00:00:00 2001
From: Vipin Sharma <vipinsh@google.com>
Date: Fri, 3 Feb 2023 17:45:47 -0800
Subject: [PATCH 224/411] KVM: selftests: Sync KVM exit reasons in selftests

Add missing KVM_EXIT_* reasons in KVM selftests from
include/uapi/linux/kvm.h

Signed-off-by: Vipin Sharma <vipinsh@google.com>
Message-Id: <20230204014547.583711-5-vipinsh@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/lib/kvm_util.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 942f092a8e7d3..8ec20ac33de02 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -1842,11 +1842,24 @@ static struct exit_reason {
 	KVM_EXIT_STRING(INTERNAL_ERROR),
 	KVM_EXIT_STRING(OSI),
 	KVM_EXIT_STRING(PAPR_HCALL),
-	KVM_EXIT_STRING(DIRTY_RING_FULL),
+	KVM_EXIT_STRING(S390_UCONTROL),
+	KVM_EXIT_STRING(WATCHDOG),
+	KVM_EXIT_STRING(S390_TSCH),
+	KVM_EXIT_STRING(EPR),
+	KVM_EXIT_STRING(SYSTEM_EVENT),
+	KVM_EXIT_STRING(S390_STSI),
+	KVM_EXIT_STRING(IOAPIC_EOI),
+	KVM_EXIT_STRING(HYPERV),
+	KVM_EXIT_STRING(ARM_NISV),
 	KVM_EXIT_STRING(X86_RDMSR),
 	KVM_EXIT_STRING(X86_WRMSR),
+	KVM_EXIT_STRING(DIRTY_RING_FULL),
+	KVM_EXIT_STRING(AP_RESET_HOLD),
+	KVM_EXIT_STRING(X86_BUS_LOCK),
 	KVM_EXIT_STRING(XEN),
-	KVM_EXIT_STRING(HYPERV),
+	KVM_EXIT_STRING(RISCV_SBI),
+	KVM_EXIT_STRING(RISCV_CSR),
+	KVM_EXIT_STRING(NOTIFY),
 #ifdef KVM_EXIT_MEMORY_NOT_PRESENT
 	KVM_EXIT_STRING(MEMORY_NOT_PRESENT),
 #endif

From 934ef33ee75c3846f605f18b65048acd147e3918 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Mon, 13 Mar 2023 15:45:48 +0100
Subject: [PATCH 225/411] x86/PVH: obtain VGA console info in Dom0

A new platform-op was added to Xen to allow obtaining the same VGA
console information PV Dom0 is handed. Invoke the new function and have
the output data processed by xen_init_vga().

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Juergen Gross <jgross@suse.com>

Link: https://lore.kernel.org/r/8f315e92-7bda-c124-71cc-478ab9c5e610@suse.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/xen/Makefile            |  2 +-
 arch/x86/xen/enlighten_pv.c      |  3 ++-
 arch/x86/xen/enlighten_pvh.c     | 13 +++++++++++++
 arch/x86/xen/vga.c               |  5 ++---
 arch/x86/xen/xen-ops.h           |  7 ++++---
 include/xen/interface/platform.h |  3 +++
 6 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 3c5b52fbe4a7f..a9ec8c9f5c5dd 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -45,6 +45,6 @@ obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= spinlock.o
 
 obj-$(CONFIG_XEN_DEBUG_FS)	+= debugfs.o
 
-obj-$(CONFIG_XEN_PV_DOM0)	+= vga.o
+obj-$(CONFIG_XEN_DOM0)		+= vga.o
 
 obj-$(CONFIG_XEN_EFI)		+= efi.o
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 5b13796628770..68f5f5d209dfa 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1389,7 +1389,8 @@ asmlinkage __visible void __init xen_start_kernel(struct start_info *si)
 
 		x86_platform.set_legacy_features =
 				xen_dom0_set_legacy_features;
-		xen_init_vga(info, xen_start_info->console.dom0.info_size);
+		xen_init_vga(info, xen_start_info->console.dom0.info_size,
+			     &boot_params.screen_info);
 		xen_start_info->console.domU.mfn = 0;
 		xen_start_info->console.domU.evtchn = 0;
 
diff --git a/arch/x86/xen/enlighten_pvh.c b/arch/x86/xen/enlighten_pvh.c
index bcae606bbc5cf..1da44aca896c6 100644
--- a/arch/x86/xen/enlighten_pvh.c
+++ b/arch/x86/xen/enlighten_pvh.c
@@ -43,6 +43,19 @@ void __init xen_pvh_init(struct boot_params *boot_params)
 	x86_init.oem.banner = xen_banner;
 
 	xen_efi_init(boot_params);
+
+	if (xen_initial_domain()) {
+		struct xen_platform_op op = {
+			.cmd = XENPF_get_dom0_console,
+		};
+		long ret = HYPERVISOR_platform_op(&op);
+
+		if (ret > 0)
+			xen_init_vga(&op.u.dom0_console,
+				     min(ret * sizeof(char),
+					 sizeof(op.u.dom0_console)),
+				     &boot_params->screen_info);
+	}
 }
 
 void __init mem_map_via_hcall(struct boot_params *boot_params_p)
diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c
index 14ea32e734d59..d97adab8420f4 100644
--- a/arch/x86/xen/vga.c
+++ b/arch/x86/xen/vga.c
@@ -9,10 +9,9 @@
 
 #include "xen-ops.h"
 
-void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
+void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size,
+			 struct screen_info *screen_info)
 {
-	struct screen_info *screen_info = &boot_params.screen_info;
-
 	/* This is drawn from a dump from vgacon:startup in
 	 * standard Linux. */
 	screen_info->orig_video_mode = 3;
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 9a8bb972193d8..a10903785a338 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -108,11 +108,12 @@ static inline void xen_uninit_lock_cpu(int cpu)
 
 struct dom0_vga_console_info;
 
-#ifdef CONFIG_XEN_PV_DOM0
-void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size);
+#ifdef CONFIG_XEN_DOM0
+void __init xen_init_vga(const struct dom0_vga_console_info *, size_t size,
+			 struct screen_info *);
 #else
 static inline void __init xen_init_vga(const struct dom0_vga_console_info *info,
-				       size_t size)
+				       size_t size, struct screen_info *si)
 {
 }
 #endif
diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
index 655d92e803e14..79a443c65ea93 100644
--- a/include/xen/interface/platform.h
+++ b/include/xen/interface/platform.h
@@ -483,6 +483,8 @@ struct xenpf_symdata {
 };
 DEFINE_GUEST_HANDLE_STRUCT(xenpf_symdata);
 
+#define XENPF_get_dom0_console 64
+
 struct xen_platform_op {
 	uint32_t cmd;
 	uint32_t interface_version; /* XENPF_INTERFACE_VERSION */
@@ -506,6 +508,7 @@ struct xen_platform_op {
 		struct xenpf_mem_hotadd        mem_add;
 		struct xenpf_core_parking      core_parking;
 		struct xenpf_symdata           symdata;
+		struct dom0_vga_console_info   dom0_console;
 		uint8_t                        pad[128];
 	} u;
 };

From b4ee9606378bb9520c94d8b96f0305c3696f5c29 Mon Sep 17 00:00:00 2001
From: Xiaogang Chen <Xiaogang.Chen@amd.com>
Date: Wed, 1 Mar 2023 10:21:06 -0600
Subject: [PATCH 226/411] drm/amdkfd: Fix BO offset for multi-VMA page
 migration

svm_migrate_ram_to_vram migrates a prange from sys ram to vram. The prange may
cross multiple vma. Need remember current dst vram offset in the TTM resource for
each migration.

v2: squash in warning fix (Alex)

Signed-off-by: Xiaogang Chen <Xiaogang.Chen@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index de8ce72344fc5..391da6acb3e5a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -289,7 +289,7 @@ static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate)
 static int
 svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
 			 struct migrate_vma *migrate, struct dma_fence **mfence,
-			 dma_addr_t *scratch)
+			 dma_addr_t *scratch, uint64_t ttm_res_offset)
 {
 	uint64_t npages = migrate->npages;
 	struct device *dev = adev->dev;
@@ -299,8 +299,8 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
 	uint64_t i, j;
 	int r;
 
-	pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
-		 prange->last);
+	pr_debug("svms 0x%p [0x%lx 0x%lx 0x%llx]\n", prange->svms, prange->start,
+		 prange->last, ttm_res_offset);
 
 	src = scratch;
 	dst = (uint64_t *)(scratch + npages);
@@ -311,7 +311,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
 		goto out;
 	}
 
-	amdgpu_res_first(prange->ttm_res, prange->offset << PAGE_SHIFT,
+	amdgpu_res_first(prange->ttm_res, ttm_res_offset,
 			 npages << PAGE_SHIFT, &cursor);
 	for (i = j = 0; i < npages; i++) {
 		struct page *spage;
@@ -398,7 +398,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
 static long
 svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
 			struct vm_area_struct *vma, uint64_t start,
-			uint64_t end, uint32_t trigger)
+			uint64_t end, uint32_t trigger, uint64_t ttm_res_offset)
 {
 	struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
 	uint64_t npages = (end - start) >> PAGE_SHIFT;
@@ -451,7 +451,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
 	else
 		pr_debug("0x%lx pages migrated\n", cpages);
 
-	r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch);
+	r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch, ttm_res_offset);
 	migrate_vma_pages(&migrate);
 
 	pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
@@ -499,6 +499,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
 	unsigned long addr, start, end;
 	struct vm_area_struct *vma;
 	struct amdgpu_device *adev;
+	uint64_t ttm_res_offset;
 	unsigned long cpages = 0;
 	long r = 0;
 
@@ -519,6 +520,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
 
 	start = prange->start << PAGE_SHIFT;
 	end = (prange->last + 1) << PAGE_SHIFT;
+	ttm_res_offset = prange->offset << PAGE_SHIFT;
 
 	for (addr = start; addr < end;) {
 		unsigned long next;
@@ -528,13 +530,14 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
 			break;
 
 		next = min(vma->vm_end, end);
-		r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger);
+		r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger, ttm_res_offset);
 		if (r < 0) {
 			pr_debug("failed %ld to migrate\n", r);
 			break;
 		} else {
 			cpages += r;
 		}
+		ttm_res_offset += next - addr;
 		addr = next;
 	}
 

From 8eeddc0d4200762063e1c66b9cc63afa7b24ebf0 Mon Sep 17 00:00:00 2001
From: Xiaogang Chen <xiaogang.chen@amd.com>
Date: Thu, 9 Mar 2023 17:44:55 -0600
Subject: [PATCH 227/411] drm/amdkfd: Get prange->offset after
 svm_range_vram_node_new

During miration to vram prange->offset is valid after vram buffer is located,
either use old one or allocate a new one. Move svm_range_vram_node_new before
migrate for each vma to get valid prange->offset.

v2: squash in warning fix

Fixes: b4ee9606378b ("drm/amdkfd: Fix BO offset for multi-VMA page migration")
Signed-off-by: Xiaogang Chen <Xiaogang.Chen@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 391da6acb3e5a..54933903bcb8a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -305,12 +305,6 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
 	src = scratch;
 	dst = (uint64_t *)(scratch + npages);
 
-	r = svm_range_vram_node_new(adev, prange, true);
-	if (r) {
-		dev_dbg(adev->dev, "fail %d to alloc vram\n", r);
-		goto out;
-	}
-
 	amdgpu_res_first(prange->ttm_res, ttm_res_offset,
 			 npages << PAGE_SHIFT, &cursor);
 	for (i = j = 0; i < npages; i++) {
@@ -391,7 +385,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
 		migrate->dst[i + 3] = 0;
 	}
 #endif
-out:
+
 	return r;
 }
 
@@ -520,6 +514,12 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
 
 	start = prange->start << PAGE_SHIFT;
 	end = (prange->last + 1) << PAGE_SHIFT;
+
+	r = svm_range_vram_node_new(adev, prange, true);
+	if (r) {
+		dev_dbg(adev->dev, "fail %ld to alloc vram\n", r);
+		return r;
+	}
 	ttm_res_offset = prange->offset << PAGE_SHIFT;
 
 	for (addr = start; addr < end;) {
@@ -543,6 +543,8 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
 
 	if (cpages)
 		prange->actual_loc = best_loc;
+	else
+		svm_range_vram_node_free(prange);
 
 	return r < 0 ? r : 0;
 }

From b2ca5c5d416b4e72d1e9d0293fc720e2d525fd42 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Tue, 7 Mar 2023 16:19:02 -0800
Subject: [PATCH 228/411] drm/amdkfd: fix a potential double free in
 pqm_create_queue

Set *q to NULL on errors, otherwise pqm_create_queue would free it
again.

Signed-off-by: Chia-I Wu <olvaffe@gmail.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 5137476ec18e6..4236539d9f932 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -218,8 +218,8 @@ static int init_user_queue(struct process_queue_manager *pqm,
 	return 0;
 
 cleanup:
-	if (dev->shared_resources.enable_mes)
-		uninit_queue(*q);
+	uninit_queue(*q);
+	*q = NULL;
 	return retval;
 }
 

From ab9bdb1213b4b40942af6a383f555d0c14874c1b Mon Sep 17 00:00:00 2001
From: Tim Huang <tim.huang@amd.com>
Date: Wed, 1 Mar 2023 10:53:03 +0800
Subject: [PATCH 229/411] drm/amd/pm: bump SMU 13.0.4 driver_if header version

Align the SMU driver interface version with PMFW to
suppress the version mismatch message on driver loading.

Signed-off-by: Tim Huang <tim.huang@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org # 6.1.x
---
 .../drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h    | 4 ++--
 drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h                  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h
index f77401709d83c..2162ecd1057d1 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_4.h
@@ -27,7 +27,7 @@
 // *** IMPORTANT ***
 // SMU TEAM: Always increment the interface version if
 // any structure is changed in this file
-#define PMFW_DRIVER_IF_VERSION 7
+#define PMFW_DRIVER_IF_VERSION 8
 
 typedef struct {
   int32_t value;
@@ -198,7 +198,7 @@ typedef struct {
   uint16_t SkinTemp;
   uint16_t DeviceState;
   uint16_t CurTemp;                     //[centi-Celsius]
-  uint16_t spare2;
+  uint16_t FilterAlphaValue;
 
   uint16_t AverageGfxclkFrequency;
   uint16_t AverageFclkFrequency;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index 1c0ae2cb757b8..f085cb97a6206 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -29,7 +29,7 @@
 #define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04
 #define SMU13_DRIVER_IF_VERSION_ALDE 0x08
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_0 0x37
-#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x07
+#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x08
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10 0x32
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x37

From a9386ee9681585794dbab95d4ce6826f73d19af6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?B=C5=82a=C5=BCej=20Szczygie=C5=82?= <mumei6102@gmail.com>
Date: Sun, 5 Mar 2023 00:44:31 +0100
Subject: [PATCH 230/411] drm/amd/pm: Fix sienna cichlid incorrect OD volage
 after resume
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Always setup overdrive tables after resume. Preserve only some
user-defined settings in user_overdrive_table if they're set.

Copy restored user_overdrive_table into od_table to get correct
values.

On cold boot, BTC was triggered and GfxVfCurve was calibrated. We
got VfCurve settings (a). On resuming back, BTC will be triggered
again and GfxVfCurve will be recalibrated. VfCurve settings (b)
got may be different from those of cold boot.  So if we reuse
those VfCurve settings (a) got on cold boot on suspend, we can
run into discrepencies.

Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1897
Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2276
Reviewed-by: Evan Quan <evan.quan@amd.com>
Signed-off-by: Błażej Szczygieł <mumei6102@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 .../amd/pm/swsmu/smu11/sienna_cichlid_ppt.c   | 43 ++++++++++++++-----
 1 file changed, 33 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 697e98a0a20ab..75f18681e984c 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -2143,16 +2143,9 @@ static int sienna_cichlid_set_default_od_settings(struct smu_context *smu)
 		(OverDriveTable_t *)smu->smu_table.boot_overdrive_table;
 	OverDriveTable_t *user_od_table =
 		(OverDriveTable_t *)smu->smu_table.user_overdrive_table;
+	OverDriveTable_t user_od_table_bak;
 	int ret = 0;
 
-	/*
-	 * For S3/S4/Runpm resume, no need to setup those overdrive tables again as
-	 *   - either they already have the default OD settings got during cold bootup
-	 *   - or they have some user customized OD settings which cannot be overwritten
-	 */
-	if (smu->adev->in_suspend)
-		return 0;
-
 	ret = smu_cmn_update_table(smu, SMU_TABLE_OVERDRIVE,
 				   0, (void *)boot_od_table, false);
 	if (ret) {
@@ -2163,7 +2156,23 @@ static int sienna_cichlid_set_default_od_settings(struct smu_context *smu)
 	sienna_cichlid_dump_od_table(smu, boot_od_table);
 
 	memcpy(od_table, boot_od_table, sizeof(OverDriveTable_t));
-	memcpy(user_od_table, boot_od_table, sizeof(OverDriveTable_t));
+
+	/*
+	 * For S3/S4/Runpm resume, we need to setup those overdrive tables again,
+	 * but we have to preserve user defined values in "user_od_table".
+	 */
+	if (!smu->adev->in_suspend) {
+		memcpy(user_od_table, boot_od_table, sizeof(OverDriveTable_t));
+		smu->user_dpm_profile.user_od = false;
+	} else if (smu->user_dpm_profile.user_od) {
+		memcpy(&user_od_table_bak, user_od_table, sizeof(OverDriveTable_t));
+		memcpy(user_od_table, boot_od_table, sizeof(OverDriveTable_t));
+		user_od_table->GfxclkFmin = user_od_table_bak.GfxclkFmin;
+		user_od_table->GfxclkFmax = user_od_table_bak.GfxclkFmax;
+		user_od_table->UclkFmin = user_od_table_bak.UclkFmin;
+		user_od_table->UclkFmax = user_od_table_bak.UclkFmax;
+		user_od_table->VddGfxOffset = user_od_table_bak.VddGfxOffset;
+	}
 
 	return 0;
 }
@@ -2373,6 +2382,20 @@ static int sienna_cichlid_od_edit_dpm_table(struct smu_context *smu,
 	return ret;
 }
 
+static int sienna_cichlid_restore_user_od_settings(struct smu_context *smu)
+{
+	struct smu_table_context *table_context = &smu->smu_table;
+	OverDriveTable_t *od_table = table_context->overdrive_table;
+	OverDriveTable_t *user_od_table = table_context->user_overdrive_table;
+	int res;
+
+	res = smu_v11_0_restore_user_od_settings(smu);
+	if (res == 0)
+		memcpy(od_table, user_od_table, sizeof(OverDriveTable_t));
+
+	return res;
+}
+
 static int sienna_cichlid_run_btc(struct smu_context *smu)
 {
 	int res;
@@ -4400,7 +4423,7 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = {
 	.set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range,
 	.set_default_od_settings = sienna_cichlid_set_default_od_settings,
 	.od_edit_dpm_table = sienna_cichlid_od_edit_dpm_table,
-	.restore_user_od_settings = smu_v11_0_restore_user_od_settings,
+	.restore_user_od_settings = sienna_cichlid_restore_user_od_settings,
 	.run_btc = sienna_cichlid_run_btc,
 	.set_power_source = smu_v11_0_set_power_source,
 	.get_pp_feature_mask = smu_cmn_get_pp_feature_mask,

From d71e38df3b730a17ab6b25cabb2ccfe8a7f04385 Mon Sep 17 00:00:00 2001
From: Jane Jian <Jane.Jian@amd.com>
Date: Tue, 28 Feb 2023 18:48:41 +0800
Subject: [PATCH 231/411] drm/amdgpu/vcn: custom video info caps for sriov

for sriov, we added a new flag to indicate av1 support,
this will override the original caps info.

Signed-off-by: Jane Jian <Jane.Jian@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h    |   4 +
 drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h |   3 +-
 drivers/gpu/drm/amd/amdgpu/soc21.c          | 103 ++++++++++++++++++--
 3 files changed, 99 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index b9e9480448afe..4f7bab52282ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -124,6 +124,8 @@ enum AMDGIM_FEATURE_FLAG {
 	AMDGIM_FEATURE_PP_ONE_VF = (1 << 4),
 	/* Indirect Reg Access enabled */
 	AMDGIM_FEATURE_INDIRECT_REG_ACCESS = (1 << 5),
+	/* AV1 Support MODE*/
+	AMDGIM_FEATURE_AV1_SUPPORT = (1 << 6),
 };
 
 enum AMDGIM_REG_ACCESS_FLAG {
@@ -322,6 +324,8 @@ static inline bool is_virtual_machine(void)
 	((!amdgpu_in_reset(adev)) && adev->virt.tdr_debug)
 #define amdgpu_sriov_is_normal(adev) \
 	((!amdgpu_in_reset(adev)) && (!adev->virt.tdr_debug))
+#define amdgpu_sriov_is_av1_support(adev) \
+	((adev)->virt.gim_feature & AMDGIM_FEATURE_AV1_SUPPORT)
 bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
 void amdgpu_virt_init_setting(struct amdgpu_device *adev);
 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index 6c97148ca0ed3..24d42d24e6a01 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -93,7 +93,8 @@ union amd_sriov_msg_feature_flags {
 		uint32_t mm_bw_management  : 1;
 		uint32_t pp_one_vf_mode	   : 1;
 		uint32_t reg_indirect_acc  : 1;
-		uint32_t reserved	   : 26;
+		uint32_t av1_support       : 1;
+		uint32_t reserved	   : 25;
 	} flags;
 	uint32_t all;
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index 061793d390ccc..c82b3a7ea5f08 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -102,6 +102,59 @@ static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_decode_vcn1 =
 	.codec_array = vcn_4_0_0_video_codecs_decode_array_vcn1,
 };
 
+/* SRIOV SOC21, not const since data is controlled by host */
+static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_encode_array_vcn0[] = {
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_encode_array_vcn1[] = {
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+};
+
+static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_encode_vcn0 = {
+	.codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn0),
+	.codec_array = sriov_vcn_4_0_0_video_codecs_encode_array_vcn0,
+};
+
+static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_encode_vcn1 = {
+	.codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn1),
+	.codec_array = sriov_vcn_4_0_0_video_codecs_encode_array_vcn1,
+};
+
+static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn0[] = {
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
+};
+
+static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn1[] = {
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
+};
+
+static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_decode_vcn0 = {
+	.codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn0),
+	.codec_array = sriov_vcn_4_0_0_video_codecs_decode_array_vcn0,
+};
+
+static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_decode_vcn1 = {
+	.codec_count = ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn1),
+	.codec_array = sriov_vcn_4_0_0_video_codecs_decode_array_vcn1,
+};
+
 static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode,
 				 const struct amdgpu_video_codecs **codecs)
 {
@@ -112,16 +165,31 @@ static int soc21_query_video_codecs(struct amdgpu_device *adev, bool encode,
 	case IP_VERSION(4, 0, 0):
 	case IP_VERSION(4, 0, 2):
 	case IP_VERSION(4, 0, 4):
-		if (adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) {
-			if (encode)
-				*codecs = &vcn_4_0_0_video_codecs_encode_vcn1;
-			else
-				*codecs = &vcn_4_0_0_video_codecs_decode_vcn1;
+		if (amdgpu_sriov_vf(adev)) {
+			if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) ||
+			!amdgpu_sriov_is_av1_support(adev)) {
+				if (encode)
+					*codecs = &sriov_vcn_4_0_0_video_codecs_encode_vcn1;
+				else
+					*codecs = &sriov_vcn_4_0_0_video_codecs_decode_vcn1;
+			} else {
+				if (encode)
+					*codecs = &sriov_vcn_4_0_0_video_codecs_encode_vcn0;
+				else
+					*codecs = &sriov_vcn_4_0_0_video_codecs_decode_vcn0;
+			}
 		} else {
-			if (encode)
-				*codecs = &vcn_4_0_0_video_codecs_encode_vcn0;
-			else
-				*codecs = &vcn_4_0_0_video_codecs_decode_vcn0;
+			if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0)) {
+				if (encode)
+					*codecs = &vcn_4_0_0_video_codecs_encode_vcn1;
+				else
+					*codecs = &vcn_4_0_0_video_codecs_decode_vcn1;
+			} else {
+				if (encode)
+					*codecs = &vcn_4_0_0_video_codecs_encode_vcn0;
+				else
+					*codecs = &vcn_4_0_0_video_codecs_decode_vcn0;
+			}
 		}
 		return 0;
 	default:
@@ -730,8 +798,23 @@ static int soc21_common_late_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (amdgpu_sriov_vf(adev))
+	if (amdgpu_sriov_vf(adev)) {
 		xgpu_nv_mailbox_get_irq(adev);
+		if ((adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0) ||
+		!amdgpu_sriov_is_av1_support(adev)) {
+			amdgpu_virt_update_sriov_video_codec(adev,
+							     sriov_vcn_4_0_0_video_codecs_encode_array_vcn1,
+							     ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn1),
+							     sriov_vcn_4_0_0_video_codecs_decode_array_vcn1,
+							     ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn1));
+		} else {
+			amdgpu_virt_update_sriov_video_codec(adev,
+							     sriov_vcn_4_0_0_video_codecs_encode_array_vcn0,
+							     ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_encode_array_vcn0),
+							     sriov_vcn_4_0_0_video_codecs_decode_array_vcn0,
+							     ARRAY_SIZE(sriov_vcn_4_0_0_video_codecs_decode_array_vcn0));
+		}
+	}
 
 	return 0;
 }

From 9da050b0d9e04439d225a2ec3044af70cdfb3933 Mon Sep 17 00:00:00 2001
From: Chia-I Wu <olvaffe@gmail.com>
Date: Wed, 8 Mar 2023 13:37:24 -0800
Subject: [PATCH 232/411] drm/amdkfd: fix potential kgd_mem UAFs

kgd_mem pointers returned by kfd_process_device_translate_handle are
only guaranteed to be valid while p->mutex is held. As soon as the mutex
is unlocked, another thread can free the BO.

Signed-off-by: Chia-I Wu <olvaffe@gmail.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index a0e30f21e12e7..de310ed367ca1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1312,14 +1312,14 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
 		args->n_success = i+1;
 	}
 
-	mutex_unlock(&p->mutex);
-
 	err = amdgpu_amdkfd_gpuvm_sync_memory(dev->adev, (struct kgd_mem *) mem, true);
 	if (err) {
 		pr_debug("Sync memory failed, wait interrupted by user signal\n");
 		goto sync_memory_failed;
 	}
 
+	mutex_unlock(&p->mutex);
+
 	/* Flush TLBs after waiting for the page table updates to complete */
 	for (i = 0; i < args->n_devices; i++) {
 		peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
@@ -1335,9 +1335,9 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep,
 bind_process_to_device_failed:
 get_mem_obj_from_handle_failed:
 map_memory_to_gpu_failed:
+sync_memory_failed:
 	mutex_unlock(&p->mutex);
 copy_from_user_failed:
-sync_memory_failed:
 	kfree(devices_arr);
 
 	return err;
@@ -1351,6 +1351,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
 	void *mem;
 	long err = 0;
 	uint32_t *devices_arr = NULL, i;
+	bool flush_tlb;
 
 	if (!args->n_devices) {
 		pr_debug("Device IDs array empty\n");
@@ -1403,16 +1404,19 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
 		}
 		args->n_success = i+1;
 	}
-	mutex_unlock(&p->mutex);
 
-	if (kfd_flush_tlb_after_unmap(pdd->dev)) {
+	flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev);
+	if (flush_tlb) {
 		err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev,
 				(struct kgd_mem *) mem, true);
 		if (err) {
 			pr_debug("Sync memory failed, wait interrupted by user signal\n");
 			goto sync_memory_failed;
 		}
+	}
+	mutex_unlock(&p->mutex);
 
+	if (flush_tlb) {
 		/* Flush TLBs after waiting for the page table updates to complete */
 		for (i = 0; i < args->n_devices; i++) {
 			peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
@@ -1428,9 +1432,9 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
 bind_process_to_device_failed:
 get_mem_obj_from_handle_failed:
 unmap_memory_from_gpu_failed:
+sync_memory_failed:
 	mutex_unlock(&p->mutex);
 copy_from_user_failed:
-sync_memory_failed:
 	kfree(devices_arr);
 	return err;
 }

From 728cefa53a36ba378ed4a7f31a0c08289687d824 Mon Sep 17 00:00:00 2001
From: Bhawanpreet Lakha <Bhawanpreet.Lakha@amd.com>
Date: Fri, 17 Feb 2023 16:08:21 -0500
Subject: [PATCH 233/411] drm/amd/display: Fix HDCP failing to enable after
 suspend

[Why]
On resume some displays are not ready for HDCP, so they will fail if we
start the hdcp authentintication too soon.

Add a delay so that the displays can be ready before we start.

NOTE: Previoulsy this delay was set to 3 seconds but it was causing
issues with compliance, 2 seconds should enough for compliance and the
s3 resume case.

[How]
Change the Delay to 2 seconds.

Reviewed-by: Aurabindo Pillai <Aurabindo.Pillai@amd.com>
Acked-by: Qingqing Zhuo <qingqing.zhuo@amd.com>
Signed-off-by: Bhawanpreet Lakha <Bhawanpreet.Lakha@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
index 8e572f07ec476..4abfd2c9679f4 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
@@ -561,7 +561,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config)
 	link->dp.mst_enabled = config->mst_enabled;
 	link->dp.usb4_enabled = config->usb4_enabled;
 	display->adjust.disable = MOD_HDCP_DISPLAY_DISABLE_AUTHENTICATION;
-	link->adjust.auth_delay = 0;
+	link->adjust.auth_delay = 2;
 	link->adjust.hdcp1.disable = 0;
 	conn_state = aconnector->base.state;
 

From 3fadda5de8073e2cb65744803a6941736411d55b Mon Sep 17 00:00:00 2001
From: Guchun Chen <guchun.chen@amd.com>
Date: Thu, 9 Mar 2023 10:02:45 +0800
Subject: [PATCH 234/411] drm/amdgpu: move poll enabled/disable into non DC
 path

Some amd asics having reliable hotplug support don't call
drm_kms_helper_poll_init in driver init sequence. However,
due to the unified suspend/resume path for all asics, because
the output_poll_work->func is not set for these asics, a warning
arrives when suspending.

[   90.656049]  <TASK>
[   90.656050]  ? console_unlock+0x4d/0x100
[   90.656053]  ? __irq_work_queue_local+0x27/0x60
[   90.656056]  ? irq_work_queue+0x2b/0x50
[   90.656057]  ? __wake_up_klogd+0x40/0x60
[   90.656059]  __cancel_work_timer+0xed/0x180
[   90.656061]  drm_kms_helper_poll_disable.cold+0x1f/0x2c [drm_kms_helper]
[   90.656072]  amdgpu_device_suspend+0x81/0x170 [amdgpu]
[   90.656180]  amdgpu_pmops_runtime_suspend+0xb5/0x1b0 [amdgpu]
[   90.656269]  pci_pm_runtime_suspend+0x61/0x1b0

drm_kms_helper_poll_enable/disable is valid when poll_init is called in
amdgpu code, which is only used in non DC path. So move such codes into
non-DC path code to get rid of such warnings.

v1: introduce use_kms_poll flag in amdgpu as the poll stuff check
v2: use dc_enabled as the flag to simply code
v3: move code into non DC path instead of relying on any flag

Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2411
Fixes: a4e771729a51 ("drm/probe_helper: sort out poll_running vs poll_enabled")
Reported-by: Bert Karwatzki <spasswolf@web.de>
Suggested-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Suggested-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Guchun Chen <guchun.chen@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c  | 4 ----
 drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 4 ++++
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index c4a4e2fe66814..da5b0258a237b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4145,8 +4145,6 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D3))
 		DRM_WARN("smart shift update failed\n");
 
-	drm_kms_helper_poll_disable(dev);
-
 	if (fbcon)
 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
 
@@ -4243,8 +4241,6 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
 	if (fbcon)
 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
 
-	drm_kms_helper_poll_enable(dev);
-
 	amdgpu_ras_resume(adev);
 
 	if (adev->mode_info.num_crtc) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 503f89a766c37..d60fe7eb5579a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -1618,6 +1618,8 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev)
 	struct drm_connector_list_iter iter;
 	int r;
 
+	drm_kms_helper_poll_disable(dev);
+
 	/* turn off display hw */
 	drm_modeset_lock_all(dev);
 	drm_connector_list_iter_begin(dev, &iter);
@@ -1694,6 +1696,8 @@ int amdgpu_display_resume_helper(struct amdgpu_device *adev)
 
 	drm_modeset_unlock_all(dev);
 
+	drm_kms_helper_poll_enable(dev);
+
 	return 0;
 }
 

From 751281c55579f0cb0e56c9797d4663f689909681 Mon Sep 17 00:00:00 2001
From: Benjamin Cheng <ben@bcheng.me>
Date: Sun, 12 Mar 2023 20:47:39 -0400
Subject: [PATCH 235/411] drm/amd/display: Write to correct dirty_rect

When FB_DAMAGE_CLIPS are provided in a non-MPO scenario, the loop does
not use the counter i. This causes the fill_dc_dity_rect() to always
fill dirty_rects[0], causing graphical artifacts when a damage clip
aware DRM client sends more than 1 damage clip.

Instead, use the flip_addrs->dirty_rect_count which is incremented by
fill_dc_dirty_rect() on a successful fill.

Fixes: 30ebe41582d1 ("drm/amd/display: add FB_DAMAGE_CLIPS support")
Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2453
Signed-off-by: Benjamin Cheng <ben@bcheng.me>
Signed-off-by: Hamza Mahfooz <hamza.mahfooz@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org # 6.1.x
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 009ef917dad47..32abbafd43fa4 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -5105,9 +5105,9 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
 
 		for (; flip_addrs->dirty_rect_count < num_clips; clips++)
 			fill_dc_dirty_rect(new_plane_state->plane,
-					   &dirty_rects[i], clips->x1,
-					   clips->y1, clips->x2 - clips->x1,
-					   clips->y2 - clips->y1,
+					   &dirty_rects[flip_addrs->dirty_rect_count],
+					   clips->x1, clips->y1,
+					   clips->x2 - clips->x1, clips->y2 - clips->y1,
 					   &flip_addrs->dirty_rect_count,
 					   false);
 		return;

From 45aa07fa832412f1de99194f37fd847915d7e0f6 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@amd.com>
Date: Wed, 8 Mar 2023 22:45:59 -0500
Subject: [PATCH 236/411] drm/amdgpu/nv: fix codec array for SR_IOV

Copy paste error.

Fixes: 384334120b66 ("drm/amdgpu/nv: don't expose AV1 if VCN0 is harvested")
Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=4454
Cc: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Acked-by: Guchun Chen <guchun.chen@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/nv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 855d390c41de1..22e25ca285f80 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -1055,8 +1055,8 @@ static int nv_common_late_init(void *handle)
 			amdgpu_virt_update_sriov_video_codec(adev,
 							     sriov_sc_video_codecs_encode_array,
 							     ARRAY_SIZE(sriov_sc_video_codecs_encode_array),
-							     sriov_sc_video_codecs_decode_array_vcn1,
-							     ARRAY_SIZE(sriov_sc_video_codecs_decode_array_vcn1));
+							     sriov_sc_video_codecs_decode_array_vcn0,
+							     ARRAY_SIZE(sriov_sc_video_codecs_decode_array_vcn0));
 		}
 	}
 

From 542a56e8eb4467ae654eefab31ff194569db39cd Mon Sep 17 00:00:00 2001
From: "Guilherme G. Piccoli" <gpiccoli@igalia.com>
Date: Sun, 12 Mar 2023 13:51:00 -0300
Subject: [PATCH 237/411] drm/amdgpu/vcn: Disable indirect SRAM on Vangogh
 broken BIOSes

The VCN firmware loading path enables the indirect SRAM mode if it's
advertised as supported. We might have some cases of FW issues that
prevents this mode to working properly though, ending-up in a failed
probe. An example below, observed in the Steam Deck:

[...]
[drm] failed to load ucode VCN0_RAM(0x3A)
[drm] psp gfx command LOAD_IP_FW(0x6) failed and response status is (0xFFFF0000)
amdgpu 0000:04:00.0: [drm:amdgpu_ring_test_helper [amdgpu]] *ERROR* ring vcn_dec_0 test failed (-110)
[drm:amdgpu_device_init.cold [amdgpu]] *ERROR* hw_init of IP block <vcn_v3_0> failed -110
amdgpu 0000:04:00.0: amdgpu: amdgpu_device_ip_init failed
amdgpu 0000:04:00.0: amdgpu: Fatal error during GPU init
[...]

Disabling the VCN block circumvents this, but it's a very invasive
workaround that turns off the entire feature. So, let's add a quirk
on VCN loading that checks for known problematic BIOSes on Vangogh,
so we can proactively disable the indirect SRAM mode and allow the
HW proper probe and VCN IP block to work fine.

Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/2385
Fixes: 82132ecc5432 ("drm/amdgpu: enable Vangogh VCN indirect sram mode")
Cc: stable@vger.kernel.org
Cc: James Zhu <James.Zhu@amd.com>
Cc: Leo Liu <leo.liu@amd.com>
Signed-off-by: Guilherme G. Piccoli <gpiccoli@igalia.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 25217b05c0ea8..e7974de8b035d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -26,6 +26,7 @@
 
 #include <linux/firmware.h>
 #include <linux/module.h>
+#include <linux/dmi.h>
 #include <linux/pci.h>
 #include <linux/debugfs.h>
 #include <drm/drm_drv.h>
@@ -114,6 +115,24 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
 	    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
 		adev->vcn.indirect_sram = true;
 
+	/*
+	 * Some Steam Deck's BIOS versions are incompatible with the
+	 * indirect SRAM mode, leading to amdgpu being unable to get
+	 * properly probed (and even potentially crashing the kernel).
+	 * Hence, check for these versions here - notice this is
+	 * restricted to Vangogh (Deck's APU).
+	 */
+	if (adev->ip_versions[UVD_HWIP][0] == IP_VERSION(3, 0, 2)) {
+		const char *bios_ver = dmi_get_system_info(DMI_BIOS_VERSION);
+
+		if (bios_ver && (!strncmp("F7A0113", bios_ver, 7) ||
+		     !strncmp("F7A0114", bios_ver, 7))) {
+			adev->vcn.indirect_sram = false;
+			dev_info(adev->dev,
+				"Steam Deck quirk: indirect SRAM disabled on BIOS %s\n", bios_ver);
+		}
+	}
+
 	hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
 	adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version);
 

From 34e0a279a993debaff03158fc2fbf6a00c093643 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 13 Mar 2023 10:30:02 +0100
Subject: [PATCH 238/411] block: do not reverse request order when flushing
 plug list

Commit 26fed4ac4eab ("block: flush plug based on hardware and software
queue order") changed flushing of plug list to submit requests one
device at a time. However while doing that it also started using
list_add_tail() instead of list_add() used previously thus effectively
submitting requests in reverse order. Also when forming a rq_list with
remaining requests (in case two or more devices are used), we
effectively reverse the ordering of the plug list for each device we
process. Submitting requests in reverse order has negative impact on
performance for rotational disks (when BFQ is not in use). We observe
10-25% regression in random 4k write throughput, as well as ~20%
regression in MariaDB OLTP benchmark on rotational storage on btrfs
filesystem.

Fix the problem by preserving ordering of the plug list when inserting
requests into the queuelist as well as by appending to requeue_list
instead of prepending to it.

Fixes: 26fed4ac4eab ("block: flush plug based on hardware and software queue order")
Signed-off-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20230313093002.11756-1-jack@suse.cz
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 5 +++--
 include/linux/blk-mq.h | 6 ++++++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index d0cb2ef18fe21..cf1a39adf9a5b 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2725,6 +2725,7 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
 	struct blk_mq_hw_ctx *this_hctx = NULL;
 	struct blk_mq_ctx *this_ctx = NULL;
 	struct request *requeue_list = NULL;
+	struct request **requeue_lastp = &requeue_list;
 	unsigned int depth = 0;
 	LIST_HEAD(list);
 
@@ -2735,10 +2736,10 @@ static void blk_mq_dispatch_plug_list(struct blk_plug *plug, bool from_sched)
 			this_hctx = rq->mq_hctx;
 			this_ctx = rq->mq_ctx;
 		} else if (this_hctx != rq->mq_hctx || this_ctx != rq->mq_ctx) {
-			rq_list_add(&requeue_list, rq);
+			rq_list_add_tail(&requeue_lastp, rq);
 			continue;
 		}
-		list_add_tail(&rq->queuelist, &list);
+		list_add(&rq->queuelist, &list);
 		depth++;
 	} while (!rq_list_empty(plug->mq_list));
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index dd5ce1137f04a..de0b0c3e7395a 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -228,6 +228,12 @@ static inline unsigned short req_get_ioprio(struct request *req)
 	*(listptr) = rq;				\
 } while (0)
 
+#define rq_list_add_tail(lastpptr, rq)	do {		\
+	(rq)->rq_next = NULL;				\
+	**(lastpptr) = rq;				\
+	*(lastpptr) = &rq->rq_next;			\
+} while (0)
+
 #define rq_list_pop(listptr)				\
 ({							\
 	struct request *__req = NULL;			\

From 28e8cabe80f3e6e3c98121576eda898eeb20f1b1 Mon Sep 17 00:00:00 2001
From: Kristian Overskeid <koverskeid@gmail.com>
Date: Tue, 7 Mar 2023 14:32:29 +0100
Subject: [PATCH 239/411] net: hsr: Don't log netdev_err message on unknown prp
 dst node

If no frames has been exchanged with a node for HSR_NODE_FORGET_TIME, the
node will be deleted from the node_db list. If a frame is sent to the node
after it is deleted, a netdev_err message for each slave interface is
produced. This should not happen with dan nodes because of supervision
frames, but can happen often with san nodes, which clutters the kernel
log. Since the hsr protocol does not support sans, this is only relevant
for the prp protocol.

Signed-off-by: Kristian Overskeid <koverskeid@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/hsr/hsr_framereg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 00db74d96583d..865eda39d6014 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -415,7 +415,7 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb,
 	node_dst = find_node_by_addr_A(&port->hsr->node_db,
 				       eth_hdr(skb)->h_dest);
 	if (!node_dst) {
-		if (net_ratelimit())
+		if (net_ratelimit() && port->hsr->prot_version != PRP_V1)
 			netdev_err(skb->dev, "%s: Unknown node\n", __func__);
 		return;
 	}

From 9026c0bf233db53b86f74f4c620715e94eb32a09 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 13 Mar 2023 00:49:24 +0000
Subject: [PATCH 240/411] ALSA: asihpi: check pao in control_message()

control_message() might be called with pao = NULL.
Here indicates control_message() as sample.

(B)	static void control_message(struct hpi_adapter_obj *pao, ...)
	{                                                   ^^^
		struct hpi_hw_obj *phw = pao->priv;
		...                      ^^^
	}

(A)	void _HPI_6205(struct hpi_adapter_obj *pao, ...)
	{                                      ^^^
		...
		case HPI_OBJ_CONTROL:
(B)			control_message(pao, phm, phr);
			break;          ^^^
		...
	}

	void HPI_6205(...)
	{
		...
(A)		_HPI_6205(NULL, phm, phr);
		...       ^^^^
	}

Therefore, We will get too many warning via cppcheck, like below

	sound/pci/asihpi/hpi6205.c:238:27: warning: Possible null pointer dereference: pao [nullPointer]
		 struct hpi_hw_obj *phw = pao->priv;
		                          ^
	sound/pci/asihpi/hpi6205.c:433:13: note: Calling function '_HPI_6205', 1st argument 'NULL' value is 0
		  _HPI_6205(NULL, phm, phr);
		            ^
	sound/pci/asihpi/hpi6205.c:401:20: note: Calling function 'control_message', 1st argument 'pao' value is 0
	   control_message(pao, phm, phr);
	                   ^
Set phr->error like many functions doing, and don't call _HPI_6205()
with NULL.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87ttypeaqz.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/asihpi/hpi6205.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sound/pci/asihpi/hpi6205.c b/sound/pci/asihpi/hpi6205.c
index 27e11b5f70b97..c7d7eff86727f 100644
--- a/sound/pci/asihpi/hpi6205.c
+++ b/sound/pci/asihpi/hpi6205.c
@@ -430,7 +430,7 @@ void HPI_6205(struct hpi_message *phm, struct hpi_response *phr)
 		pao = hpi_find_adapter(phm->adapter_index);
 	} else {
 		/* subsys messages don't address an adapter */
-		_HPI_6205(NULL, phm, phr);
+		phr->error = HPI_ERROR_INVALID_OBJ_INDEX;
 		return;
 	}
 

From 98e5eb110095ec77cb6d775051d181edbf9cd3cf Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 13 Mar 2023 00:50:28 +0000
Subject: [PATCH 241/411] ALSA: hda/ca0132: fixup buffer overrun at
 tuning_ctl_set()

tuning_ctl_set() might have buffer overrun at (X) if it didn't break
from loop by matching (A).

	static int tuning_ctl_set(...)
	{
		for (i = 0; i < TUNING_CTLS_COUNT; i++)
(A)			if (nid == ca0132_tuning_ctls[i].nid)
				break;

		snd_hda_power_up(...);
(X)		dspio_set_param(..., ca0132_tuning_ctls[i].mid, ...);
		snd_hda_power_down(...);                ^

		return 1;
	}

We will get below error by cppcheck

	sound/pci/hda/patch_ca0132.c:4229:2: note: After for loop, i has value 12
	 for (i = 0; i < TUNING_CTLS_COUNT; i++)
	 ^
	sound/pci/hda/patch_ca0132.c:4234:43: note: Array index out of bounds
	 dspio_set_param(codec, ca0132_tuning_ctls[i].mid, 0x20,
	                                           ^
This patch cares non match case.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Link: https://lore.kernel.org/r/87sfe9eap7.wl-kuninori.morimoto.gx@renesas.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/patch_ca0132.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c
index acde4cd58785e..099722ebaed83 100644
--- a/sound/pci/hda/patch_ca0132.c
+++ b/sound/pci/hda/patch_ca0132.c
@@ -4228,8 +4228,10 @@ static int tuning_ctl_set(struct hda_codec *codec, hda_nid_t nid,
 
 	for (i = 0; i < TUNING_CTLS_COUNT; i++)
 		if (nid == ca0132_tuning_ctls[i].nid)
-			break;
+			goto found;
 
+	return -EINVAL;
+found:
 	snd_hda_power_up(codec);
 	dspio_set_param(codec, ca0132_tuning_ctls[i].mid, 0x20,
 			ca0132_tuning_ctls[i].req,

From 42da2c00b91486980a724c05c29818a7e60a067e Mon Sep 17 00:00:00 2001
From: Xujun Leng <lengxujun2007@126.com>
Date: Sun, 12 Mar 2023 15:14:23 +0800
Subject: [PATCH 242/411] docs: process: typo fix

In the second paragraph of section "Respond to review comments", there is
a spelling mistake: "aganst" should be "against".

Signed-off-by: Xujun Leng <lengxujun2007@126.com>
Link: https://lore.kernel.org/r/20230312071423.3042-1-lengxujun2007@126.com
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/process/submitting-patches.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst
index eac7167dce83d..69ce64e03c70f 100644
--- a/Documentation/process/submitting-patches.rst
+++ b/Documentation/process/submitting-patches.rst
@@ -320,7 +320,7 @@ for their time.  Code review is a tiring and time-consuming process, and
 reviewers sometimes get grumpy.  Even in that case, though, respond
 politely and address the problems they have pointed out.  When sending a next
 version, add a ``patch changelog`` to the cover letter or to individual patches
-explaining difference aganst previous submission (see
+explaining difference against previous submission (see
 :ref:`the_canonical_patch_format`).
 
 See Documentation/process/email-clients.rst for recommendations on email

From d7ba3657d5162bd551e5c653f67f941c94a7dc0a Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@nvidia.com>
Date: Fri, 10 Mar 2023 10:58:57 +0100
Subject: [PATCH 243/411] docs: vfio: fix header path

The text points to a different header file, fix by changing
the path to "uapi".

Signed-off-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Link: https://lore.kernel.org/r/20230310095857.985814-1-jiri@resnulli.us
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/driver-api/vfio.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/driver-api/vfio.rst b/Documentation/driver-api/vfio.rst
index 50b690f7f6639..68abc089d6ddd 100644
--- a/Documentation/driver-api/vfio.rst
+++ b/Documentation/driver-api/vfio.rst
@@ -242,7 +242,7 @@ group and can access them as follows::
 VFIO User API
 -------------------------------------------------------------------------------
 
-Please see include/linux/vfio.h for complete API documentation.
+Please see include/uapi/linux/vfio.h for complete API documentation.
 
 VFIO bus driver API
 -------------------------------------------------------------------------------

From 044b14b51d78a549664efac3a5a4c9edd1211b79 Mon Sep 17 00:00:00 2001
From: Nikita Romanyuk <ufh8945@gmail.com>
Date: Sat, 25 Feb 2023 10:12:28 +0300
Subject: [PATCH 244/411] drivers: video: logo: fix code style issues in
 pnmtologo.c

Signed-off-by: Nikita Romanyuk <ufh8945@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/logo/pnmtologo.c | 667 +++++++++++++++++----------------
 1 file changed, 334 insertions(+), 333 deletions(-)

diff --git a/drivers/video/logo/pnmtologo.c b/drivers/video/logo/pnmtologo.c
index 4718d7895f0b4..78cb95f3cfce6 100644
--- a/drivers/video/logo/pnmtologo.c
+++ b/drivers/video/logo/pnmtologo.c
@@ -1,4 +1,3 @@
-
 /*
  *  Convert a logo in ASCII PNM format to C source suitable for inclusion in
  *  the Linux kernel
@@ -34,37 +33,37 @@ static FILE *out;
 #define LINUX_LOGO_GRAY256	4	/* 256 levels grayscale */
 
 static const char *logo_types[LINUX_LOGO_GRAY256+1] = {
-    [LINUX_LOGO_MONO] = "LINUX_LOGO_MONO",
-    [LINUX_LOGO_VGA16] = "LINUX_LOGO_VGA16",
-    [LINUX_LOGO_CLUT224] = "LINUX_LOGO_CLUT224",
-    [LINUX_LOGO_GRAY256] = "LINUX_LOGO_GRAY256"
+	[LINUX_LOGO_MONO] = "LINUX_LOGO_MONO",
+	[LINUX_LOGO_VGA16] = "LINUX_LOGO_VGA16",
+	[LINUX_LOGO_CLUT224] = "LINUX_LOGO_CLUT224",
+	[LINUX_LOGO_GRAY256] = "LINUX_LOGO_GRAY256"
 };
 
 #define MAX_LINUX_LOGO_COLORS	224
 
 struct color {
-    unsigned char red;
-    unsigned char green;
-    unsigned char blue;
+	unsigned char red;
+	unsigned char green;
+	unsigned char blue;
 };
 
 static const struct color clut_vga16[16] = {
-    { 0x00, 0x00, 0x00 },
-    { 0x00, 0x00, 0xaa },
-    { 0x00, 0xaa, 0x00 },
-    { 0x00, 0xaa, 0xaa },
-    { 0xaa, 0x00, 0x00 },
-    { 0xaa, 0x00, 0xaa },
-    { 0xaa, 0x55, 0x00 },
-    { 0xaa, 0xaa, 0xaa },
-    { 0x55, 0x55, 0x55 },
-    { 0x55, 0x55, 0xff },
-    { 0x55, 0xff, 0x55 },
-    { 0x55, 0xff, 0xff },
-    { 0xff, 0x55, 0x55 },
-    { 0xff, 0x55, 0xff },
-    { 0xff, 0xff, 0x55 },
-    { 0xff, 0xff, 0xff },
+	{ 0x00, 0x00, 0x00 },
+	{ 0x00, 0x00, 0xaa },
+	{ 0x00, 0xaa, 0x00 },
+	{ 0x00, 0xaa, 0xaa },
+	{ 0xaa, 0x00, 0x00 },
+	{ 0xaa, 0x00, 0xaa },
+	{ 0xaa, 0x55, 0x00 },
+	{ 0xaa, 0xaa, 0xaa },
+	{ 0x55, 0x55, 0x55 },
+	{ 0x55, 0x55, 0xff },
+	{ 0x55, 0xff, 0x55 },
+	{ 0x55, 0xff, 0xff },
+	{ 0xff, 0x55, 0x55 },
+	{ 0xff, 0x55, 0xff },
+	{ 0xff, 0xff, 0x55 },
+	{ 0xff, 0xff, 0xff },
 };
 
 
@@ -77,438 +76,440 @@ static unsigned int logo_clutsize;
 static int is_plain_pbm = 0;
 
 static void die(const char *fmt, ...)
-    __attribute__ ((noreturn)) __attribute ((format (printf, 1, 2)));
-static void usage(void) __attribute ((noreturn));
+__attribute__((noreturn)) __attribute((format (printf, 1, 2)));
+static void usage(void) __attribute((noreturn));
 
 
 static unsigned int get_number(FILE *fp)
 {
-    int c, val;
-
-    /* Skip leading whitespace */
-    do {
-	c = fgetc(fp);
-	if (c == EOF)
-	    die("%s: end of file\n", filename);
-	if (c == '#') {
-	    /* Ignore comments 'till end of line */
-	    do {
+	int c, val;
+
+	/* Skip leading whitespace */
+	do {
+		c = fgetc(fp);
+		if (c == EOF)
+			die("%s: end of file\n", filename);
+		if (c == '#') {
+			/* Ignore comments 'till end of line */
+			do {
+				c = fgetc(fp);
+				if (c == EOF)
+					die("%s: end of file\n", filename);
+			} while (c != '\n');
+		}
+	} while (isspace(c));
+
+	/* Parse decimal number */
+	val = 0;
+	while (isdigit(c)) {
+		val = 10*val+c-'0';
+		/* some PBM are 'broken'; GiMP for example exports a PBM without space
+		 * between the digits. This is Ok cause we know a PBM can only have a '1'
+		 * or a '0' for the digit.
+		 */
+		if (is_plain_pbm)
+			break;
 		c = fgetc(fp);
 		if (c == EOF)
-		    die("%s: end of file\n", filename);
-	    } while (c != '\n');
+			die("%s: end of file\n", filename);
 	}
-    } while (isspace(c));
-
-    /* Parse decimal number */
-    val = 0;
-    while (isdigit(c)) {
-	val = 10*val+c-'0';
-	/* some PBM are 'broken'; GiMP for example exports a PBM without space
-	 * between the digits. This is Ok cause we know a PBM can only have a '1'
-	 * or a '0' for the digit. */
-	if (is_plain_pbm)
-		break;
-	c = fgetc(fp);
-	if (c == EOF)
-	    die("%s: end of file\n", filename);
-    }
-    return val;
+	return val;
 }
 
 static unsigned int get_number255(FILE *fp, unsigned int maxval)
 {
-    unsigned int val = get_number(fp);
-    return (255*val+maxval/2)/maxval;
+	unsigned int val = get_number(fp);
+
+	return (255*val+maxval/2)/maxval;
 }
 
 static void read_image(void)
 {
-    FILE *fp;
-    unsigned int i, j;
-    int magic;
-    unsigned int maxval;
-
-    /* open image file */
-    fp = fopen(filename, "r");
-    if (!fp)
-	die("Cannot open file %s: %s\n", filename, strerror(errno));
-
-    /* check file type and read file header */
-    magic = fgetc(fp);
-    if (magic != 'P')
-	die("%s is not a PNM file\n", filename);
-    magic = fgetc(fp);
-    switch (magic) {
+	FILE *fp;
+	unsigned int i, j;
+	int magic;
+	unsigned int maxval;
+
+	/* open image file */
+	fp = fopen(filename, "r");
+	if (!fp)
+		die("Cannot open file %s: %s\n", filename, strerror(errno));
+
+	/* check file type and read file header */
+	magic = fgetc(fp);
+	if (magic != 'P')
+		die("%s is not a PNM file\n", filename);
+	magic = fgetc(fp);
+	switch (magic) {
 	case '1':
 	case '2':
 	case '3':
-	    /* Plain PBM/PGM/PPM */
-	    break;
+		/* Plain PBM/PGM/PPM */
+		break;
 
 	case '4':
 	case '5':
 	case '6':
-	    /* Binary PBM/PGM/PPM */
-	    die("%s: Binary PNM is not supported\n"
+		/* Binary PBM/PGM/PPM */
+		die("%s: Binary PNM is not supported\n"
 		"Use pnmnoraw(1) to convert it to ASCII PNM\n", filename);
 
 	default:
-	    die("%s is not a PNM file\n", filename);
-    }
-    logo_width = get_number(fp);
-    logo_height = get_number(fp);
-
-    /* allocate image data */
-    logo_data = (struct color **)malloc(logo_height*sizeof(struct color *));
-    if (!logo_data)
-	die("%s\n", strerror(errno));
-    for (i = 0; i < logo_height; i++) {
-	logo_data[i] = malloc(logo_width*sizeof(struct color));
+		die("%s is not a PNM file\n", filename);
+	}
+	logo_width = get_number(fp);
+	logo_height = get_number(fp);
+
+	/* allocate image data */
+	logo_data = (struct color **)malloc(logo_height*sizeof(struct color *));
+	if (!logo_data)
+		die("%s\n", strerror(errno));
+	for (i = 0; i < logo_height; i++) {
+		logo_data[i] = malloc(logo_width*sizeof(struct color));
 	if (!logo_data[i])
-	    die("%s\n", strerror(errno));
-    }
+		die("%s\n", strerror(errno));
+	}
 
-    /* read image data */
-    switch (magic) {
+	/* read image data */
+	switch (magic) {
 	case '1':
-	    /* Plain PBM */
-	    is_plain_pbm = 1;
-	    for (i = 0; i < logo_height; i++)
-		for (j = 0; j < logo_width; j++)
-		    logo_data[i][j].red = logo_data[i][j].green =
-			logo_data[i][j].blue = 255*(1-get_number(fp));
-	    break;
+		/* Plain PBM */
+		is_plain_pbm = 1;
+		for (i = 0; i < logo_height; i++)
+			for (j = 0; j < logo_width; j++)
+				logo_data[i][j].red = logo_data[i][j].green =
+					logo_data[i][j].blue = 255*(1-get_number(fp));
+		break;
 
 	case '2':
-	    /* Plain PGM */
-	    maxval = get_number(fp);
-	    for (i = 0; i < logo_height; i++)
-		for (j = 0; j < logo_width; j++)
-		    logo_data[i][j].red = logo_data[i][j].green =
-			logo_data[i][j].blue = get_number255(fp, maxval);
-	    break;
+		/* Plain PGM */
+		maxval = get_number(fp);
+		for (i = 0; i < logo_height; i++)
+			for (j = 0; j < logo_width; j++)
+				logo_data[i][j].red = logo_data[i][j].green =
+					logo_data[i][j].blue = get_number255(fp, maxval);
+		break;
 
 	case '3':
-	    /* Plain PPM */
-	    maxval = get_number(fp);
-	    for (i = 0; i < logo_height; i++)
-		for (j = 0; j < logo_width; j++) {
-		    logo_data[i][j].red = get_number255(fp, maxval);
-		    logo_data[i][j].green = get_number255(fp, maxval);
-		    logo_data[i][j].blue = get_number255(fp, maxval);
-		}
-	    break;
-    }
+		/* Plain PPM */
+		maxval = get_number(fp);
+		for (i = 0; i < logo_height; i++)
+			for (j = 0; j < logo_width; j++) {
+				logo_data[i][j].red = get_number255(fp, maxval);
+				logo_data[i][j].green = get_number255(fp, maxval);
+				logo_data[i][j].blue = get_number255(fp, maxval);
+			}
+		break;
+	}
 
-    /* close file */
-    fclose(fp);
+	/* close file */
+	fclose(fp);
 }
 
 static inline int is_black(struct color c)
 {
-    return c.red == 0 && c.green == 0 && c.blue == 0;
+	return c.red == 0 && c.green == 0 && c.blue == 0;
 }
 
 static inline int is_white(struct color c)
 {
-    return c.red == 255 && c.green == 255 && c.blue == 255;
+	return c.red == 255 && c.green == 255 && c.blue == 255;
 }
 
 static inline int is_gray(struct color c)
 {
-    return c.red == c.green && c.red == c.blue;
+	return c.red == c.green && c.red == c.blue;
 }
 
 static inline int is_equal(struct color c1, struct color c2)
 {
-    return c1.red == c2.red && c1.green == c2.green && c1.blue == c2.blue;
+	return c1.red == c2.red && c1.green == c2.green && c1.blue == c2.blue;
 }
 
 static void write_header(void)
 {
-    /* open logo file */
-    if (outputname) {
-	out = fopen(outputname, "w");
-	if (!out)
-	    die("Cannot create file %s: %s\n", outputname, strerror(errno));
-    } else {
-	out = stdout;
-    }
-
-    fputs("/*\n", out);
-    fputs(" *  DO NOT EDIT THIS FILE!\n", out);
-    fputs(" *\n", out);
-    fprintf(out, " *  It was automatically generated from %s\n", filename);
-    fputs(" *\n", out);
-    fprintf(out, " *  Linux logo %s\n", logoname);
-    fputs(" */\n\n", out);
-    fputs("#include <linux/linux_logo.h>\n\n", out);
-    fprintf(out, "static unsigned char %s_data[] __initdata = {\n",
-	    logoname);
+	/* open logo file */
+	if (outputname) {
+		out = fopen(outputname, "w");
+		if (!out)
+			die("Cannot create file %s: %s\n", outputname, strerror(errno));
+	} else {
+		out = stdout;
+	}
+
+	fputs("/*\n", out);
+	fputs(" *  DO NOT EDIT THIS FILE!\n", out);
+	fputs(" *\n", out);
+	fprintf(out, " *  It was automatically generated from %s\n", filename);
+	fputs(" *\n", out);
+	fprintf(out, " *  Linux logo %s\n", logoname);
+	fputs(" */\n\n", out);
+	fputs("#include <linux/linux_logo.h>\n\n", out);
+	fprintf(out, "static unsigned char %s_data[] __initdata = {\n",
+		logoname);
 }
 
 static void write_footer(void)
 {
-    fputs("\n};\n\n", out);
-    fprintf(out, "const struct linux_logo %s __initconst = {\n", logoname);
-    fprintf(out, "\t.type\t\t= %s,\n", logo_types[logo_type]);
-    fprintf(out, "\t.width\t\t= %d,\n", logo_width);
-    fprintf(out, "\t.height\t\t= %d,\n", logo_height);
-    if (logo_type == LINUX_LOGO_CLUT224) {
-	fprintf(out, "\t.clutsize\t= %d,\n", logo_clutsize);
-	fprintf(out, "\t.clut\t\t= %s_clut,\n", logoname);
-    }
-    fprintf(out, "\t.data\t\t= %s_data\n", logoname);
-    fputs("};\n\n", out);
-
-    /* close logo file */
-    if (outputname)
-	fclose(out);
+	fputs("\n};\n\n", out);
+	fprintf(out, "const struct linux_logo %s __initconst = {\n", logoname);
+	fprintf(out, "\t.type\t\t= %s,\n", logo_types[logo_type]);
+	fprintf(out, "\t.width\t\t= %d,\n", logo_width);
+	fprintf(out, "\t.height\t\t= %d,\n", logo_height);
+	if (logo_type == LINUX_LOGO_CLUT224) {
+		fprintf(out, "\t.clutsize\t= %d,\n", logo_clutsize);
+		fprintf(out, "\t.clut\t\t= %s_clut,\n", logoname);
+	}
+	fprintf(out, "\t.data\t\t= %s_data\n", logoname);
+	fputs("};\n\n", out);
+
+	/* close logo file */
+	if (outputname)
+		fclose(out);
 }
 
 static int write_hex_cnt;
 
 static void write_hex(unsigned char byte)
 {
-    if (write_hex_cnt % 12)
-	fprintf(out, ", 0x%02x", byte);
-    else if (write_hex_cnt)
-	fprintf(out, ",\n\t0x%02x", byte);
-    else
-	fprintf(out, "\t0x%02x", byte);
-    write_hex_cnt++;
+	if (write_hex_cnt % 12)
+		fprintf(out, ", 0x%02x", byte);
+	else if (write_hex_cnt)
+		fprintf(out, ",\n\t0x%02x", byte);
+	else
+		fprintf(out, "\t0x%02x", byte);
+	write_hex_cnt++;
 }
 
 static void write_logo_mono(void)
 {
-    unsigned int i, j;
-    unsigned char val, bit;
-
-    /* validate image */
-    for (i = 0; i < logo_height; i++)
-	for (j = 0; j < logo_width; j++)
-	    if (!is_black(logo_data[i][j]) && !is_white(logo_data[i][j]))
-		die("Image must be monochrome\n");
-
-    /* write file header */
-    write_header();
-
-    /* write logo data */
-    for (i = 0; i < logo_height; i++) {
-	for (j = 0; j < logo_width;) {
-	    for (val = 0, bit = 0x80; bit && j < logo_width; j++, bit >>= 1)
-		if (logo_data[i][j].red)
-		    val |= bit;
-	    write_hex(val);
+	unsigned int i, j;
+	unsigned char val, bit;
+
+	/* validate image */
+	for (i = 0; i < logo_height; i++)
+		for (j = 0; j < logo_width; j++)
+			if (!is_black(logo_data[i][j]) && !is_white(logo_data[i][j]))
+				die("Image must be monochrome\n");
+
+	/* write file header */
+	write_header();
+
+	/* write logo data */
+	for (i = 0; i < logo_height; i++) {
+		for (j = 0; j < logo_width;) {
+			for (val = 0, bit = 0x80; bit && j < logo_width; j++, bit >>= 1)
+				if (logo_data[i][j].red)
+					val |= bit;
+			write_hex(val);
+		}
 	}
-    }
 
-    /* write logo structure and file footer */
-    write_footer();
+	/* write logo structure and file footer */
+	write_footer();
 }
 
 static void write_logo_vga16(void)
 {
-    unsigned int i, j, k;
-    unsigned char val;
-
-    /* validate image */
-    for (i = 0; i < logo_height; i++)
-	for (j = 0; j < logo_width; j++) {
-	    for (k = 0; k < 16; k++)
-		if (is_equal(logo_data[i][j], clut_vga16[k]))
-		    break;
-	    if (k == 16)
-		die("Image must use the 16 console colors only\n"
-		    "Use ppmquant(1) -map clut_vga16.ppm to reduce the number "
-		    "of colors\n");
-	}
+	unsigned int i, j, k;
+	unsigned char val;
 
-    /* write file header */
-    write_header();
-
-    /* write logo data */
-    for (i = 0; i < logo_height; i++)
-	for (j = 0; j < logo_width; j++) {
-	    for (k = 0; k < 16; k++)
-		if (is_equal(logo_data[i][j], clut_vga16[k]))
-		    break;
-	    val = k<<4;
-	    if (++j < logo_width) {
-		for (k = 0; k < 16; k++)
-		    if (is_equal(logo_data[i][j], clut_vga16[k]))
-			break;
-		val |= k;
-	    }
-	    write_hex(val);
-	}
+	/* validate image */
+	for (i = 0; i < logo_height; i++)
+		for (j = 0; j < logo_width; j++) {
+			for (k = 0; k < 16; k++)
+				if (is_equal(logo_data[i][j], clut_vga16[k]))
+					break;
+			if (k == 16)
+				die("Image must use the 16 console colors only\n"
+				    "Use ppmquant(1) -map clut_vga16.ppm to reduce the number "
+				    "of colors\n");
+		}
+
+	/* write file header */
+	write_header();
 
-    /* write logo structure and file footer */
-    write_footer();
+	/* write logo data */
+	for (i = 0; i < logo_height; i++)
+		for (j = 0; j < logo_width; j++) {
+			for (k = 0; k < 16; k++)
+				if (is_equal(logo_data[i][j], clut_vga16[k]))
+					break;
+			val = k<<4;
+			if (++j < logo_width) {
+				for (k = 0; k < 16; k++)
+					if (is_equal(logo_data[i][j], clut_vga16[k]))
+						break;
+				val |= k;
+			}
+			write_hex(val);
+		}
+
+	/* write logo structure and file footer */
+	write_footer();
 }
 
 static void write_logo_clut224(void)
 {
-    unsigned int i, j, k;
-
-    /* validate image */
-    for (i = 0; i < logo_height; i++)
-	for (j = 0; j < logo_width; j++) {
-	    for (k = 0; k < logo_clutsize; k++)
-		if (is_equal(logo_data[i][j], logo_clut[k]))
-		    break;
-	    if (k == logo_clutsize) {
-		if (logo_clutsize == MAX_LINUX_LOGO_COLORS)
-		    die("Image has more than %d colors\n"
-			"Use ppmquant(1) to reduce the number of colors\n",
-			MAX_LINUX_LOGO_COLORS);
-		logo_clut[logo_clutsize++] = logo_data[i][j];
-	    }
-	}
+	unsigned int i, j, k;
 
-    /* write file header */
-    write_header();
+	/* validate image */
+	for (i = 0; i < logo_height; i++)
+		for (j = 0; j < logo_width; j++) {
+			for (k = 0; k < logo_clutsize; k++)
+				if (is_equal(logo_data[i][j], logo_clut[k]))
+					break;
+			if (k == logo_clutsize) {
+				if (logo_clutsize == MAX_LINUX_LOGO_COLORS)
+					die("Image has more than %d colors\n"
+					    "Use ppmquant(1) to reduce the number of colors\n",
+					    MAX_LINUX_LOGO_COLORS);
+				logo_clut[logo_clutsize++] = logo_data[i][j];
+			}
+		}
 
-    /* write logo data */
-    for (i = 0; i < logo_height; i++)
-	for (j = 0; j < logo_width; j++) {
-	    for (k = 0; k < logo_clutsize; k++)
-		if (is_equal(logo_data[i][j], logo_clut[k]))
-		    break;
-	    write_hex(k+32);
+	/* write file header */
+	write_header();
+
+	/* write logo data */
+	for (i = 0; i < logo_height; i++)
+		for (j = 0; j < logo_width; j++) {
+			for (k = 0; k < logo_clutsize; k++)
+				if (is_equal(logo_data[i][j], logo_clut[k]))
+					break;
+			write_hex(k+32);
+		}
+	fputs("\n};\n\n", out);
+
+	/* write logo clut */
+	fprintf(out, "static unsigned char %s_clut[] __initdata = {\n",
+		logoname);
+	write_hex_cnt = 0;
+	for (i = 0; i < logo_clutsize; i++) {
+		write_hex(logo_clut[i].red);
+		write_hex(logo_clut[i].green);
+		write_hex(logo_clut[i].blue);
 	}
-    fputs("\n};\n\n", out);
-
-    /* write logo clut */
-    fprintf(out, "static unsigned char %s_clut[] __initdata = {\n",
-	    logoname);
-    write_hex_cnt = 0;
-    for (i = 0; i < logo_clutsize; i++) {
-	write_hex(logo_clut[i].red);
-	write_hex(logo_clut[i].green);
-	write_hex(logo_clut[i].blue);
-    }
-
-    /* write logo structure and file footer */
-    write_footer();
+
+	/* write logo structure and file footer */
+	write_footer();
 }
 
 static void write_logo_gray256(void)
 {
-    unsigned int i, j;
+	unsigned int i, j;
 
-    /* validate image */
-    for (i = 0; i < logo_height; i++)
-	for (j = 0; j < logo_width; j++)
-	    if (!is_gray(logo_data[i][j]))
-		die("Image must be grayscale\n");
+	/* validate image */
+	for (i = 0; i < logo_height; i++)
+		for (j = 0; j < logo_width; j++)
+			if (!is_gray(logo_data[i][j]))
+				die("Image must be grayscale\n");
 
-    /* write file header */
-    write_header();
+	/* write file header */
+	write_header();
 
-    /* write logo data */
-    for (i = 0; i < logo_height; i++)
-	for (j = 0; j < logo_width; j++)
-	    write_hex(logo_data[i][j].red);
+	/* write logo data */
+	for (i = 0; i < logo_height; i++)
+		for (j = 0; j < logo_width; j++)
+			write_hex(logo_data[i][j].red);
 
-    /* write logo structure and file footer */
-    write_footer();
+	/* write logo structure and file footer */
+	write_footer();
 }
 
 static void die(const char *fmt, ...)
 {
-    va_list ap;
+	va_list ap;
 
-    va_start(ap, fmt);
-    vfprintf(stderr, fmt, ap);
-    va_end(ap);
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
 
-    exit(1);
+	exit(1);
 }
 
 static void usage(void)
 {
-    die("\n"
+	die("\n"
 	"Usage: %s [options] <filename>\n"
 	"\n"
 	"Valid options:\n"
-	"    -h          : display this usage information\n"
-	"    -n <name>   : specify logo name (default: linux_logo)\n"
-	"    -o <output> : output to file <output> instead of stdout\n"
-	"    -t <type>   : specify logo type, one of\n"
-	"                      mono    : monochrome black/white\n"
-	"                      vga16   : 16 colors VGA text palette\n"
-	"                      clut224 : 224 colors (default)\n"
-	"                      gray256 : 256 levels grayscale\n"
+	"	-h		  : display this usage information\n"
+	"	-n <name>   : specify logo name (default: linux_logo)\n"
+	"	-o <output> : output to file <output> instead of stdout\n"
+	"	-t <type>   : specify logo type, one of\n"
+	"					  mono	: monochrome black/white\n"
+	"					  vga16   : 16 colors VGA text palette\n"
+	"					  clut224 : 224 colors (default)\n"
+	"					  gray256 : 256 levels grayscale\n"
 	"\n", programname);
 }
 
 int main(int argc, char *argv[])
 {
-    int opt;
+	int opt;
 
-    programname = argv[0];
+	programname = argv[0];
 
-    opterr = 0;
-    while (1) {
-	opt = getopt(argc, argv, "hn:o:t:");
-	if (opt == -1)
-	    break;
+	opterr = 0;
+	while (1) {
+		opt = getopt(argc, argv, "hn:o:t:");
+		if (opt == -1)
+			break;
 
-	switch (opt) {
-	    case 'h':
-		usage();
-		break;
+		switch (opt) {
+		case 'h':
+			usage();
+			break;
 
-	    case 'n':
-		logoname = optarg;
-		break;
+		case 'n':
+			logoname = optarg;
+			break;
 
-	    case 'o':
-		outputname = optarg;
-		break;
+		case 'o':
+			outputname = optarg;
+			break;
 
-	    case 't':
-		if (!strcmp(optarg, "mono"))
-		    logo_type = LINUX_LOGO_MONO;
-		else if (!strcmp(optarg, "vga16"))
-		    logo_type = LINUX_LOGO_VGA16;
-		else if (!strcmp(optarg, "clut224"))
-		    logo_type = LINUX_LOGO_CLUT224;
-		else if (!strcmp(optarg, "gray256"))
-		    logo_type = LINUX_LOGO_GRAY256;
-		else
-		    usage();
-		break;
+		case 't':
+			if (!strcmp(optarg, "mono"))
+				logo_type = LINUX_LOGO_MONO;
+			else if (!strcmp(optarg, "vga16"))
+				logo_type = LINUX_LOGO_VGA16;
+			else if (!strcmp(optarg, "clut224"))
+				logo_type = LINUX_LOGO_CLUT224;
+			else if (!strcmp(optarg, "gray256"))
+				logo_type = LINUX_LOGO_GRAY256;
+			else
+				usage();
+			break;
 
-	    default:
-		usage();
-		break;
+		default:
+			usage();
+			break;
+		}
 	}
-    }
-    if (optind != argc-1)
-	usage();
+	if (optind != argc-1)
+		usage();
 
-    filename = argv[optind];
+	filename = argv[optind];
 
-    read_image();
-    switch (logo_type) {
+	read_image();
+	switch (logo_type) {
 	case LINUX_LOGO_MONO:
-	    write_logo_mono();
-	    break;
+		write_logo_mono();
+		break;
 
 	case LINUX_LOGO_VGA16:
-	    write_logo_vga16();
-	    break;
+		write_logo_vga16();
+		break;
 
 	case LINUX_LOGO_CLUT224:
-	    write_logo_clut224();
-	    break;
+		write_logo_clut224();
+		break;
 
 	case LINUX_LOGO_GRAY256:
-	    write_logo_gray256();
-	    break;
-    }
-    exit(0);
+		write_logo_gray256();
+		break;
+	}
+	exit(0);
 }

From 8f0e056068f26c0f1121f7f96a6b4515f59160f2 Mon Sep 17 00:00:00 2001
From: Nikita Romanyuk <ufh8945@gmail.com>
Date: Sat, 25 Feb 2023 10:12:29 +0300
Subject: [PATCH 245/411] drivers: video: logo: add SPDX comment, remove GPL
 notice in pnmtologo.c

Signed-off-by: Nikita Romanyuk <ufh8945@gmail.com>
Reviewed-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/logo/pnmtologo.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/video/logo/pnmtologo.c b/drivers/video/logo/pnmtologo.c
index 78cb95f3cfce6..ada5ef6e51b7a 100644
--- a/drivers/video/logo/pnmtologo.c
+++ b/drivers/video/logo/pnmtologo.c
@@ -1,14 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  *  Convert a logo in ASCII PNM format to C source suitable for inclusion in
  *  the Linux kernel
  *
  *  (C) Copyright 2001-2003 by Geert Uytterhoeven <geert@linux-m68k.org>
- *
- *  --------------------------------------------------------------------------
- *
- *  This file is subject to the terms and conditions of the GNU General Public
- *  License. See the file COPYING in the main directory of the Linux
- *  distribution for more details.
  */
 
 #include <ctype.h>

From 7f501aa71da9dc2eaae2b0118a151cad018d33b0 Mon Sep 17 00:00:00 2001
From: Lucy Mielke <mielkesteven@icloud.com>
Date: Tue, 7 Feb 2023 11:06:30 +0100
Subject: [PATCH 246/411] fbdev: omapfb: cleanup inconsistent indentation

This cleans up the indentation according to the Linux kernel coding
style, and should fix the warning created by the kernel test robot.

Fixes: 8b08cf2b64f5 ("OMAP: add TI OMAP framebuffer driver")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Lucy Mielke <mielkesteven@icloud.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/omap/omapfb_main.c | 30 +++++++++++++++-----------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/drivers/video/fbdev/omap/omapfb_main.c b/drivers/video/fbdev/omap/omapfb_main.c
index 1f3df2055ff0d..18736079843dc 100644
--- a/drivers/video/fbdev/omap/omapfb_main.c
+++ b/drivers/video/fbdev/omap/omapfb_main.c
@@ -544,19 +544,25 @@ static int set_fb_var(struct fb_info *fbi,
 		var->yoffset = var->yres_virtual - var->yres;
 
 	if (plane->color_mode == OMAPFB_COLOR_RGB444) {
-		var->red.offset	  = 8; var->red.length	 = 4;
-						var->red.msb_right   = 0;
-		var->green.offset = 4; var->green.length = 4;
-						var->green.msb_right = 0;
-		var->blue.offset  = 0; var->blue.length  = 4;
-						var->blue.msb_right  = 0;
+		var->red.offset		= 8;
+		var->red.length		= 4;
+		var->red.msb_right	= 0;
+		var->green.offset	= 4;
+		var->green.length	= 4;
+		var->green.msb_right	= 0;
+		var->blue.offset	= 0;
+		var->blue.length	= 4;
+		var->blue.msb_right	= 0;
 	} else {
-		var->red.offset	 = 11; var->red.length	 = 5;
-						var->red.msb_right   = 0;
-		var->green.offset = 5;  var->green.length = 6;
-						var->green.msb_right = 0;
-		var->blue.offset = 0;  var->blue.length  = 5;
-						var->blue.msb_right  = 0;
+		var->red.offset		= 11;
+		var->red.length		= 5;
+		var->red.msb_right	= 0;
+		var->green.offset	= 5;
+		var->green.length	= 6;
+		var->green.msb_right	= 0;
+		var->blue.offset	= 0;
+		var->blue.length	= 5;
+		var->blue.msb_right	= 0;
 	}
 
 	var->height		= -1;

From 33bf61c0a1a97e2abff3117751eabad28106a7c5 Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Wed, 8 Mar 2023 08:19:21 +0100
Subject: [PATCH 247/411] MAINTAINERS: orphan SIS FRAMEBUFFER DRIVER

This was triggered by the fact that the webpage:

  http://www.winischhofer.net/linuxsisvga.shtml

cannot be reached anymore.

Thomas Winischhofer is still reachable at the given email address, but he
has not been active since 2005.

Mark the SIS FRAMEBUFFER DRIVER as orphan to reflect the current state.

Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Acked-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 MAINTAINERS | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index ec57c42ed5440..d603402b0810f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -19150,9 +19150,7 @@ W:	http://www.brownhat.org/sis900.html
 F:	drivers/net/ethernet/sis/sis900.*
 
 SIS FRAMEBUFFER DRIVER
-M:	Thomas Winischhofer <thomas@winischhofer.net>
-S:	Maintained
-W:	http://www.winischhofer.net/linuxsisvga.shtml
+S:	Orphan
 F:	Documentation/fb/sisfb.rst
 F:	drivers/video/fbdev/sis/
 F:	include/video/sisfb.h

From f90bd245de82c095187d8c2cabb8b488a39eaecc Mon Sep 17 00:00:00 2001
From: Wei Chen <harperchen1110@gmail.com>
Date: Tue, 7 Mar 2023 13:08:56 +0000
Subject: [PATCH 248/411] fbdev: tgafb: Fix potential divide by zero

fb_set_var would by called when user invokes ioctl with cmd
FBIOPUT_VSCREENINFO. User-provided data would finally reach
tgafb_check_var. In case var->pixclock is assigned to zero,
divide by zero would occur when checking whether reciprocal
of var->pixclock is too high.

Similar crashes have happened in other fbdev drivers. There
is no check and modification on var->pixclock along the call
chain to tgafb_check_var. We believe it could also be triggered
in driver tgafb from user site.

Signed-off-by: Wei Chen <harperchen1110@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/tgafb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/video/fbdev/tgafb.c b/drivers/video/fbdev/tgafb.c
index 14d37c49633c6..b44004880f0d1 100644
--- a/drivers/video/fbdev/tgafb.c
+++ b/drivers/video/fbdev/tgafb.c
@@ -173,6 +173,9 @@ tgafb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 {
 	struct tga_par *par = (struct tga_par *)info->par;
 
+	if (!var->pixclock)
+		return -EINVAL;
+
 	if (par->tga_type == TGA_TYPE_8PLANE) {
 		if (var->bits_per_pixel != 8)
 			return -EINVAL;

From 7eb1220f4bde36a15b26e8f54480406c72081bfa Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Wed, 8 Mar 2023 13:49:50 +0800
Subject: [PATCH 249/411] fbdev: clps711x-fb: Use
 devm_platform_get_and_ioremap_resource()

According to commit 890cc39a8799 ("drivers: provide
devm_platform_get_and_ioremap_resource()"), convert
platform_get_resource(), devm_ioremap_resource() to a single call to
devm_platform_get_and_ioremap_resource(), as this is exactly what this
function does.

Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/clps711x-fb.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/video/fbdev/clps711x-fb.c b/drivers/video/fbdev/clps711x-fb.c
index 45c75ff01eca4..c8bfc608bd9c1 100644
--- a/drivers/video/fbdev/clps711x-fb.c
+++ b/drivers/video/fbdev/clps711x-fb.c
@@ -238,8 +238,7 @@ static int clps711x_fb_probe(struct platform_device *pdev)
 	info->fix.mmio_start = res->start;
 	info->fix.mmio_len = resource_size(res);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	info->screen_base = devm_ioremap_resource(dev, res);
+	info->screen_base = devm_platform_get_and_ioremap_resource(pdev, 1, &res);
 	if (IS_ERR(info->screen_base)) {
 		ret = PTR_ERR(info->screen_base);
 		goto out_fb_release;

From 096dc32bb73d20e0854b3a471379f577f903f0ee Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 10 Mar 2023 08:47:30 -0600
Subject: [PATCH 250/411] fbdev: Use of_property_read_bool() for boolean
 properties

It is preferred to use typed property access functions (i.e.
of_property_read_<type> functions) rather than low-level
of_get_property/of_find_property functions for reading properties.
Convert reading boolean properties to to of_property_read_bool().

Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/offb.c     | 4 ++--
 drivers/video/fbdev/sm501fb.c  | 4 ++--
 drivers/video/fbdev/tcx.c      | 3 +--
 drivers/video/fbdev/xilinxfb.c | 3 +--
 4 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/video/fbdev/offb.c b/drivers/video/fbdev/offb.c
index f7ad6bc9d02d7..b97d251d894b7 100644
--- a/drivers/video/fbdev/offb.c
+++ b/drivers/video/fbdev/offb.c
@@ -549,10 +549,10 @@ static void offb_init_nodriver(struct platform_device *parent, struct device_nod
 	int foreign_endian = 0;
 
 #ifdef __BIG_ENDIAN
-	if (of_get_property(dp, "little-endian", NULL))
+	if (of_property_read_bool(dp, "little-endian"))
 		foreign_endian = FBINFO_FOREIGN_ENDIAN;
 #else
-	if (of_get_property(dp, "big-endian", NULL))
+	if (of_property_read_bool(dp, "big-endian"))
 		foreign_endian = FBINFO_FOREIGN_ENDIAN;
 #endif
 
diff --git a/drivers/video/fbdev/sm501fb.c b/drivers/video/fbdev/sm501fb.c
index f743bfbde2a6c..1f3cbe723def1 100644
--- a/drivers/video/fbdev/sm501fb.c
+++ b/drivers/video/fbdev/sm501fb.c
@@ -1737,10 +1737,10 @@ static int sm501fb_init_fb(struct fb_info *fb, enum sm501_controller head,
 
 #if defined(CONFIG_OF)
 #ifdef __BIG_ENDIAN
-	if (of_get_property(info->dev->parent->of_node, "little-endian", NULL))
+	if (of_property_read_bool(info->dev->parent->of_node, "little-endian"))
 		fb->flags |= FBINFO_FOREIGN_ENDIAN;
 #else
-	if (of_get_property(info->dev->parent->of_node, "big-endian", NULL))
+	if (of_property_read_bool(info->dev->parent->of_node, "big-endian"))
 		fb->flags |= FBINFO_FOREIGN_ENDIAN;
 #endif
 #endif
diff --git a/drivers/video/fbdev/tcx.c b/drivers/video/fbdev/tcx.c
index 01d87f53324d9..f2eaf6e7fff60 100644
--- a/drivers/video/fbdev/tcx.c
+++ b/drivers/video/fbdev/tcx.c
@@ -379,8 +379,7 @@ static int tcx_probe(struct platform_device *op)
 
 	spin_lock_init(&par->lock);
 
-	par->lowdepth =
-		(of_find_property(dp, "tcx-8-bit", NULL) != NULL);
+	par->lowdepth = of_property_read_bool(dp, "tcx-8-bit");
 
 	sbusfb_fill_var(&info->var, dp, 8);
 	info->var.red.length = 8;
diff --git a/drivers/video/fbdev/xilinxfb.c b/drivers/video/fbdev/xilinxfb.c
index 1ac83900a21cc..c17cfffd9a84a 100644
--- a/drivers/video/fbdev/xilinxfb.c
+++ b/drivers/video/fbdev/xilinxfb.c
@@ -469,8 +469,7 @@ static int xilinxfb_of_probe(struct platform_device *pdev)
 		pdata.yvirt = prop[1];
 	}
 
-	if (of_find_property(pdev->dev.of_node, "rotate-display", NULL))
-		pdata.rotate_screen = 1;
+	pdata.rotate_screen = of_property_read_bool(pdev->dev.of_node, "rotate-display");
 
 	platform_set_drvdata(pdev, drvdata);
 	return xilinxfb_assign(pdev, drvdata, &pdata);

From 0db0a1eb444dc9f79241d673ea611741211acdae Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Tue, 14 Mar 2023 13:42:17 +0800
Subject: [PATCH 251/411] fbdev: pxa3xx-gcu: Use
 devm_platform_get_and_ioremap_resource()

According to commit 890cc39a8799 ("drivers: provide
devm_platform_get_and_ioremap_resource()"), convert
platform_get_resource(), devm_ioremap_resource() to a single
call to devm_platform_get_and_ioremap_resource(), as this is exactly
what this function does.

Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/pxa3xx-gcu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/video/fbdev/pxa3xx-gcu.c b/drivers/video/fbdev/pxa3xx-gcu.c
index c3cd1e1cc01b4..d16729215423d 100644
--- a/drivers/video/fbdev/pxa3xx-gcu.c
+++ b/drivers/video/fbdev/pxa3xx-gcu.c
@@ -599,8 +599,7 @@ static int pxa3xx_gcu_probe(struct platform_device *pdev)
 	priv->misc_dev.fops	= &pxa3xx_gcu_miscdev_fops;
 
 	/* handle IO resources */
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	priv->mmio_base = devm_ioremap_resource(dev, r);
+	priv->mmio_base = devm_platform_get_and_ioremap_resource(pdev, 0, &r);
 	if (IS_ERR(priv->mmio_base))
 		return PTR_ERR(priv->mmio_base);
 

From be66c2cbc05ed84821f3dde6439044ab44506525 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Tue, 14 Mar 2023 13:42:18 +0800
Subject: [PATCH 252/411] fbdev: wm8505fb: Use devm_platform_ioremap_resource()

According to commit 7945f929f1a7 ("drivers: provide
devm_platform_ioremap_resource()"), convert platform_get_resource(),
devm_ioremap_resource() to a single call to Use
devm_platform_ioremap_resource(), as this is exactly what this function
does.

Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/wm8505fb.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/video/fbdev/wm8505fb.c b/drivers/video/fbdev/wm8505fb.c
index 8f4d674fa0d03..96a6f7623e197 100644
--- a/drivers/video/fbdev/wm8505fb.c
+++ b/drivers/video/fbdev/wm8505fb.c
@@ -261,7 +261,6 @@ static const struct fb_ops wm8505fb_ops = {
 static int wm8505fb_probe(struct platform_device *pdev)
 {
 	struct wm8505fb_info	*fbi;
-	struct resource	*res;
 	struct display_timings *disp_timing;
 	void			*addr;
 	int ret;
@@ -299,8 +298,7 @@ static int wm8505fb_probe(struct platform_device *pdev)
 	addr = addr + sizeof(struct wm8505fb_info);
 	fbi->fb.pseudo_palette	= addr;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	fbi->regbase = devm_ioremap_resource(&pdev->dev, res);
+	fbi->regbase = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(fbi->regbase))
 		return PTR_ERR(fbi->regbase);
 

From 4c7e8e05bca577380d03a2731a14b3cb5652b4f3 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Tue, 14 Mar 2023 13:42:19 +0800
Subject: [PATCH 253/411] fbdev: xilinxfb: Use
 devm_platform_get_and_ioremap_resource()

According to commit 890cc39a8799 ("drivers: provide
devm_platform_get_and_ioremap_resource()"), convert
platform_get_resource(), devm_ioremap_resource() to a single
call to devm_platform_get_and_ioremap_resource(), as this is exactly
what this function does.

Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/xilinxfb.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/video/fbdev/xilinxfb.c b/drivers/video/fbdev/xilinxfb.c
index c17cfffd9a84a..7911354827dc2 100644
--- a/drivers/video/fbdev/xilinxfb.c
+++ b/drivers/video/fbdev/xilinxfb.c
@@ -273,8 +273,7 @@ static int xilinxfb_assign(struct platform_device *pdev,
 	if (drvdata->flags & BUS_ACCESS_FLAG) {
 		struct resource *res;
 
-		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-		drvdata->regs = devm_ioremap_resource(&pdev->dev, res);
+		drvdata->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 		if (IS_ERR(drvdata->regs))
 			return PTR_ERR(drvdata->regs);
 

From e140980ef211d537833500377ae411c3f232b41d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 14 Mar 2023 17:27:10 +0100
Subject: [PATCH 254/411] fbdev: omapfb: remove omap1 osk driver

Commit 21a3e6eed423 ("ARM: omap1: remove osk-mistral add-on board
support") removed the platform_device definition for the "lcd_osk"
device, so this driver is now unused and can be removed as well.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/omap/Makefile  |  1 -
 drivers/video/fbdev/omap/lcd_osk.c | 86 ------------------------------
 2 files changed, 87 deletions(-)
 delete mode 100644 drivers/video/fbdev/omap/lcd_osk.c

diff --git a/drivers/video/fbdev/omap/Makefile b/drivers/video/fbdev/omap/Makefile
index 504edb9c09dd8..6d5082c76919d 100644
--- a/drivers/video/fbdev/omap/Makefile
+++ b/drivers/video/fbdev/omap/Makefile
@@ -18,7 +18,6 @@ objs-y$(CONFIG_FB_OMAP_LCDC_HWA742) += hwa742.o
 
 lcds-y$(CONFIG_MACH_AMS_DELTA) += lcd_ams_delta.o
 lcds-y$(CONFIG_MACH_OMAP_PALMTE) += lcd_palmte.o
-lcds-y$(CONFIG_MACH_OMAP_OSK) += lcd_osk.o
 
 lcds-y$(CONFIG_FB_OMAP_LCD_MIPID) += lcd_mipid.o
 
diff --git a/drivers/video/fbdev/omap/lcd_osk.c b/drivers/video/fbdev/omap/lcd_osk.c
deleted file mode 100644
index 8168ba0d47fd3..0000000000000
--- a/drivers/video/fbdev/omap/lcd_osk.c
+++ /dev/null
@@ -1,86 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * LCD panel support for the TI OMAP OSK board
- *
- * Copyright (C) 2004 Nokia Corporation
- * Author: Imre Deak <imre.deak@nokia.com>
- * Adapted for OSK by <dirk.behme@de.bosch.com>
- */
-
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/gpio.h>
-
-#include <linux/soc/ti/omap1-io.h>
-#include <linux/soc/ti/omap1-mux.h>
-
-#include "omapfb.h"
-
-static int osk_panel_enable(struct lcd_panel *panel)
-{
-	/* configure PWL pin */
-	omap_cfg_reg(PWL);
-
-	/* Enable PWL unit */
-	omap_writeb(0x01, OMAP_PWL_CLK_ENABLE);
-
-	/* Set PWL level */
-	omap_writeb(0xFF, OMAP_PWL_ENABLE);
-
-	/* set GPIO2 high (lcd power enabled) */
-	gpio_set_value(2, 1);
-
-	return 0;
-}
-
-static void osk_panel_disable(struct lcd_panel *panel)
-{
-	/* Set PWL level to zero */
-	omap_writeb(0x00, OMAP_PWL_ENABLE);
-
-	/* Disable PWL unit */
-	omap_writeb(0x00, OMAP_PWL_CLK_ENABLE);
-
-	/* set GPIO2 low */
-	gpio_set_value(2, 0);
-}
-
-static struct lcd_panel osk_panel = {
-	.name		= "osk",
-	.config		= OMAP_LCDC_PANEL_TFT,
-
-	.bpp		= 16,
-	.data_lines	= 16,
-	.x_res		= 240,
-	.y_res		= 320,
-	.pixel_clock	= 12500,
-	.hsw		= 40,
-	.hfp		= 40,
-	.hbp		= 72,
-	.vsw		= 1,
-	.vfp		= 1,
-	.vbp		= 0,
-	.pcd		= 12,
-
-	.enable		= osk_panel_enable,
-	.disable	= osk_panel_disable,
-};
-
-static int osk_panel_probe(struct platform_device *pdev)
-{
-	omapfb_register_panel(&osk_panel);
-	return 0;
-}
-
-static struct platform_driver osk_panel_driver = {
-	.probe		= osk_panel_probe,
-	.driver		= {
-		.name	= "lcd_osk",
-	},
-};
-
-module_platform_driver(osk_panel_driver);
-
-MODULE_AUTHOR("Imre Deak");
-MODULE_DESCRIPTION("LCD panel support for the TI OMAP OSK board");
-MODULE_LICENSE("GPL");

From 6fa7f537351c8fad0e43e9279efe76dbc942bea0 Mon Sep 17 00:00:00 2001
From: Todd Brandt <todd.e.brandt@linux.intel.com>
Date: Mon, 13 Mar 2023 15:26:52 -0700
Subject: [PATCH 255/411] pm-graph: sleepgraph: Avoid crashing on binary data
 in device names

A regression has occurred in the hid-sensor code where a device
name string has not been initialized to 0, and ends up without
a NULL char and is printed with %s. This includes random binary
data in the device name, which makes its way into the ftrace output
and ends up crashing sleepgraph because it expects the ftrace output
to be ASCII only.

For example: "HID-SENSOR-INT-020b?.39.auto" ends up in ftrace instead
of "HID-SENSOR-INT-020b.39.auto". It causes this crash in sleepgraph:

  File "/usr/bin/sleepgraph", line 5579, in executeSuspend
    for line in fp:
  File "/usr/lib/python3.10/codecs.py", line 322, in decode
    (result, consumed) = self._buffer_decode(data, self.errors, final)
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xff in position
1568: invalid start byte

The issue is present in 6.3-rc1 and is described in full here:
https://bugzilla.kernel.org/show_bug.cgi?id=217169

A separate fix has been submitted to have this issue repaired, but
it has also exposed a larger bug in sleepgraph, since nothing should
make sleepgraph crash. Sleepgraph needs to be able to handle binary
data showing up in ftrace gracefully.

Modify the ftrace processing code to treat it as potentially binary
and to filter out binary data and leave just the ASCII.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=217169
Fixes: 98c062e82451 ("HID: hid-sensor-custom: Allow more custom iio sensors")
Signed-off-by: Todd Brandt <todd.e.brandt@linux.intel.com>
[ rjw: Subject and changelog edits ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 tools/power/pm-graph/sleepgraph.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py
index 82c09cd25cc21..bf4ac24a1c7aa 100755
--- a/tools/power/pm-graph/sleepgraph.py
+++ b/tools/power/pm-graph/sleepgraph.py
@@ -5556,9 +5556,8 @@ def executeSuspend(quiet=False):
 		if not quiet:
 			pprint('CAPTURING TRACE')
 		op = sv.writeDatafileHeader(sv.ftracefile, testdata)
-		fp = open(tp+'trace', 'r')
-		for line in fp:
-			op.write(line)
+		fp = open(tp+'trace', 'rb')
+		op.write(ascii(fp.read()))
 		op.close()
 		sv.fsetVal('', 'trace')
 		sv.platforminfo(cmdafter)

From 91d7b60a65d9f71230ea09b86d2058a884a3c2af Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Wed, 8 Mar 2023 11:26:32 +0000
Subject: [PATCH 256/411] ACPI: PPTT: Fix to avoid sleep in the atomic context
 when PPTT is absent

Commit 0c80f9e165f8 ("ACPI: PPTT: Leave the table mapped for the runtime usage")
enabled to map PPTT once on the first invocation of acpi_get_pptt() and
never unmapped the same allowing it to be used at runtime with out the
hassle of mapping and unmapping the table. This was needed to fetch LLC
information from the PPTT in the cpuhotplug path which is executed in
the atomic context as the acpi_get_table() might sleep waiting for a
mutex.

However it missed to handle the case when there is no PPTT on the system
which results in acpi_get_pptt() being called from all the secondary
CPUs attempting to fetch the LLC information in the atomic context
without knowing the absence of PPTT resulting in the splat like below:

 | BUG: sleeping function called from invalid context at kernel/locking/semaphore.c:164
 | in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 0, name: swapper/1
 | preempt_count: 1, expected: 0
 | RCU nest depth: 0, expected: 0
 | no locks held by swapper/1/0.
 | irq event stamp: 0
 | hardirqs last  enabled at (0): 0x0
 | hardirqs last disabled at (0): copy_process+0x61c/0x1b40
 | softirqs last  enabled at (0): copy_process+0x61c/0x1b40
 | softirqs last disabled at (0): 0x0
 | CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.3.0-rc1 #1
 | Call trace:
 |  dump_backtrace+0xac/0x138
 |  show_stack+0x30/0x48
 |  dump_stack_lvl+0x60/0xb0
 |  dump_stack+0x18/0x28
 |  __might_resched+0x160/0x270
 |  __might_sleep+0x58/0xb0
 |  down_timeout+0x34/0x98
 |  acpi_os_wait_semaphore+0x7c/0xc0
 |  acpi_ut_acquire_mutex+0x58/0x108
 |  acpi_get_table+0x40/0xe8
 |  acpi_get_pptt+0x48/0xa0
 |  acpi_get_cache_info+0x38/0x140
 |  init_cache_level+0xf4/0x118
 |  detect_cache_attributes+0x2e4/0x640
 |  update_siblings_masks+0x3c/0x330
 |  store_cpu_topology+0x88/0xf0
 |  secondary_start_kernel+0xd0/0x168
 |  __secondary_switched+0xb8/0xc0

Update acpi_get_pptt() to consider the fact that PPTT is once checked and
is not available on the system and return NULL avoiding any attempts to
fetch PPTT and thereby avoiding any possible sleep waiting for a mutex
in the atomic context.

Fixes: 0c80f9e165f8 ("ACPI: PPTT: Leave the table mapped for the runtime usage")
Reported-by: Aishwarya TCV <aishwarya.tcv@arm.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Tested-by: Pierre Gondois <pierre.gondois@arm.com>
Cc: 6.0+ <stable@vger.kernel.org> # 6.0+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/pptt.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c
index 10975bb603fb1..a35dd0e41c270 100644
--- a/drivers/acpi/pptt.c
+++ b/drivers/acpi/pptt.c
@@ -536,16 +536,19 @@ static int topology_get_acpi_cpu_tag(struct acpi_table_header *table,
 static struct acpi_table_header *acpi_get_pptt(void)
 {
 	static struct acpi_table_header *pptt;
+	static bool is_pptt_checked;
 	acpi_status status;
 
 	/*
 	 * PPTT will be used at runtime on every CPU hotplug in path, so we
 	 * don't need to call acpi_put_table() to release the table mapping.
 	 */
-	if (!pptt) {
+	if (!pptt && !is_pptt_checked) {
 		status = acpi_get_table(ACPI_SIG_PPTT, 0, &pptt);
 		if (ACPI_FAILURE(status))
 			acpi_pptt_warn_missing();
+
+		is_pptt_checked = true;
 	}
 
 	return pptt;

From 0bc23d8b2237a104d7f8379d687aa4cb82e2968b Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Wed, 8 Mar 2023 21:23:09 +0800
Subject: [PATCH 257/411] ACPI: tools: pfrut: Check if the input of level and
 type is in the right numeric range
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The user provides arbitrary non-numeic value to level and type,
which could bring unexpected behavior. In this case the expected
behavior would be to throw an error.

 pfrut -h
usage: pfrut [OPTIONS]
code injection:
-l, --load
-s, --stage
-a, --activate
-u, --update [stage and activate]
-q, --query
-d, --revid
update telemetry:
-G, --getloginfo
-T, --type(0:execution, 1:history)
-L, --level(0, 1, 2, 4)
-R, --read
-D, --revid log

 pfrut -T A
 pfrut -G
log_level:0
log_type:0
log_revid:2
max_data_size:65536
chunk1_size:0
chunk2_size:1530
rollover_cnt:0
reset_cnt:17

Fix this by restricting the input to be in the expected range.

Reported-by: Hariganesh Govindarajulu <hariganesh.govindarajulu@intel.com>
Suggested-by: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 tools/power/acpi/tools/pfrut/pfrut.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/tools/power/acpi/tools/pfrut/pfrut.c b/tools/power/acpi/tools/pfrut/pfrut.c
index 52aa0351533c3..388c9e3ad0407 100644
--- a/tools/power/acpi/tools/pfrut/pfrut.c
+++ b/tools/power/acpi/tools/pfrut/pfrut.c
@@ -97,7 +97,7 @@ static struct option long_options[] = {
 static void parse_options(int argc, char **argv)
 {
 	int option_index = 0;
-	char *pathname;
+	char *pathname, *endptr;
 	int opt;
 
 	pathname = strdup(argv[0]);
@@ -125,11 +125,23 @@ static void parse_options(int argc, char **argv)
 			log_getinfo = 1;
 			break;
 		case 'T':
-			log_type = atoi(optarg);
+			log_type = strtol(optarg, &endptr, 0);
+			if (*endptr || (log_type != 0 && log_type != 1)) {
+				printf("Number expected: type(0:execution, 1:history) - Quit.\n");
+				exit(1);
+			}
+
 			set_log_type = 1;
 			break;
 		case 'L':
-			log_level = atoi(optarg);
+			log_level = strtol(optarg, &endptr, 0);
+			if (*endptr ||
+			    (log_level != 0 && log_level != 1 &&
+			     log_level != 2 && log_level != 4)) {
+				printf("Number expected: level(0, 1, 2, 4) - Quit.\n");
+				exit(1);
+			}
+
 			set_log_level = 1;
 			break;
 		case 'R':

From c2679254b9c9980d9045f0f722cf093a2b1f7590 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Fri, 10 Mar 2023 17:28:56 -0500
Subject: [PATCH 258/411] tracing: Make tracepoint lockdep check actually test
 something

A while ago where the trace events had the following:

   rcu_read_lock_sched_notrace();
   rcu_dereference_sched(...);
   rcu_read_unlock_sched_notrace();

If the tracepoint is enabled, it could trigger RCU issues if called in
the wrong place. And this warning was only triggered if lockdep was
enabled. If the tracepoint was never enabled with lockdep, the bug would
not be caught. To handle this, the above sequence was done when lockdep
was enabled regardless if the tracepoint was enabled or not (although the
always enabled code really didn't do anything, it would still trigger a
warning).

But a lot has changed since that lockdep code was added. One is, that
sequence no longer triggers any warning. Another is, the tracepoint when
enabled doesn't even do that sequence anymore.

The main check we care about today is whether RCU is "watching" or not.
So if lockdep is enabled, always check if rcu_is_watching() which will
trigger a warning if it is not (tracepoints require RCU to be watching).

Note, that old sequence did add a bit of overhead when lockdep was enabled,
and with the latest kernel updates, would cause the system to slow down
enough to trigger kernel "stalled" warnings.

Link: http://lore.kernel.org/lkml/20140806181801.GA4605@redhat.com
Link: http://lore.kernel.org/lkml/20140807175204.C257CAC5@viggo.jf.intel.com
Link: https://lore.kernel.org/lkml/20230307184645.521db5c9@gandalf.local.home/
Link: https://lore.kernel.org/linux-trace-kernel/20230310172856.77406446@gandalf.local.home

Cc: stable@vger.kernel.org
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Joel Fernandes <joel@joelfernandes.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Paul E. McKenney <paulmck@kernel.org>
Fixes: e6753f23d961 ("tracepoint: Make rcuidle tracepoint callers use SRCU")
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index fa1004fcf8109..2083f2d2f05bc 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -231,12 +231,11 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
  * not add unwanted padding between the beginning of the section and the
  * structure. Force alignment to the same alignment as the section start.
  *
- * When lockdep is enabled, we make sure to always do the RCU portions of
- * the tracepoint code, regardless of whether tracing is on. However,
- * don't check if the condition is false, due to interaction with idle
- * instrumentation. This lets us find RCU issues triggered with tracepoints
- * even when this tracepoint is off. This code has no purpose other than
- * poking RCU a bit.
+ * When lockdep is enabled, we make sure to always test if RCU is
+ * "watching" regardless if the tracepoint is enabled or not. Tracepoints
+ * require RCU to be active, and it should always warn at the tracepoint
+ * site if it is not watching, as it will need to be active when the
+ * tracepoint is enabled.
  */
 #define __DECLARE_TRACE(name, proto, args, cond, data_proto)		\
 	extern int __traceiter_##name(data_proto);			\
@@ -249,9 +248,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
 				TP_ARGS(args),				\
 				TP_CONDITION(cond), 0);			\
 		if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {		\
-			rcu_read_lock_sched_notrace();			\
-			rcu_dereference_sched(__tracepoint_##name.funcs);\
-			rcu_read_unlock_sched_notrace();		\
+			WARN_ON_ONCE(!rcu_is_watching());		\
 		}							\
 	}								\
 	__DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args),		\

From 211baef0eabf4169ce4f73ebd917749d1a7edd74 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl@samba.org>
Date: Mon, 13 Mar 2023 16:09:54 +0100
Subject: [PATCH 259/411] cifs: Fix smb2_set_path_size()

If cifs_get_writable_path() finds a writable file, smb2_compound_op()
must use that file's FID and not the COMPOUND_FID.

Cc: stable@vger.kernel.org
Signed-off-by: Volker Lendecke <vl@samba.org>
Reviewed-by: Paulo Alcantara (SUSE) <pc@manguebit.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2inode.c | 31 ++++++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 9b956294e8643..8dd3791b5c538 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -234,15 +234,32 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
 		size[0] = 8; /* sizeof __le64 */
 		data[0] = ptr;
 
-		rc = SMB2_set_info_init(tcon, server,
-					&rqst[num_rqst], COMPOUND_FID,
-					COMPOUND_FID, current->tgid,
-					FILE_END_OF_FILE_INFORMATION,
-					SMB2_O_INFO_FILE, 0, data, size);
+		if (cfile) {
+			rc = SMB2_set_info_init(tcon, server,
+						&rqst[num_rqst],
+						cfile->fid.persistent_fid,
+						cfile->fid.volatile_fid,
+						current->tgid,
+						FILE_END_OF_FILE_INFORMATION,
+						SMB2_O_INFO_FILE, 0,
+						data, size);
+		} else {
+			rc = SMB2_set_info_init(tcon, server,
+						&rqst[num_rqst],
+						COMPOUND_FID,
+						COMPOUND_FID,
+						current->tgid,
+						FILE_END_OF_FILE_INFORMATION,
+						SMB2_O_INFO_FILE, 0,
+						data, size);
+			if (!rc) {
+				smb2_set_next_command(tcon, &rqst[num_rqst]);
+				smb2_set_related(&rqst[num_rqst]);
+			}
+		}
 		if (rc)
 			goto finished;
-		smb2_set_next_command(tcon, &rqst[num_rqst]);
-		smb2_set_related(&rqst[num_rqst++]);
+		num_rqst++;
 		trace_smb3_set_eof_enter(xid, ses->Suid, tcon->tid, full_path);
 		break;
 	case SMB2_OP_SET_INFO:

From 05ce0448c3f36febd8db0ee0e9e16557f3ab5ee8 Mon Sep 17 00:00:00 2001
From: Shyam Prasad N <sprasad@microsoft.com>
Date: Fri, 10 Mar 2023 15:32:01 +0000
Subject: [PATCH 260/411] cifs: generate signkey for the channel that's
 reconnecting

Before my changes to how multichannel reconnects work, the
primary channel was always used to do a non-binding session
setup. With my changes, that is not the case anymore.
Missed this place where channel at index 0 was forcibly
updated with the signing key.

Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
Reviewed-by: Paulo Alcantara (SUSE) <pc@manguebit.com>
Cc: stable@vger.kernel.org
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2transport.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index 381babc1212c9..d827b7547ffad 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -425,7 +425,7 @@ generate_smb3signingkey(struct cifs_ses *ses,
 
 		/* safe to access primary channel, since it will never go away */
 		spin_lock(&ses->chan_lock);
-		memcpy(ses->chans[0].signkey, ses->smb3signingkey,
+		memcpy(ses->chans[chan_index].signkey, ses->smb3signingkey,
 		       SMB3_SIGN_KEY_SIZE);
 		spin_unlock(&ses->chan_lock);
 

From 9b0cb770f5d7b1ff40bea7ca385438ee94570eec Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 14 Mar 2023 11:21:54 -0700
Subject: [PATCH 261/411] loop: Fix use-after-free issues

do_req_filebacked() calls blk_mq_complete_request() synchronously or
asynchronously when using asynchronous I/O unless memory allocation fails.
Hence, modify loop_handle_cmd() such that it does not dereference 'cmd' nor
'rq' after do_req_filebacked() finished unless we are sure that the request
has not yet been completed. This patch fixes the following kernel crash:

Unable to handle kernel NULL pointer dereference at virtual address 0000000000000054
Call trace:
 css_put.42938+0x1c/0x1ac
 loop_process_work+0xc8c/0xfd4
 loop_rootcg_workfn+0x24/0x34
 process_one_work+0x244/0x558
 worker_thread+0x400/0x8fc
 kthread+0x16c/0x1e0
 ret_from_fork+0x10/0x20

Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Dan Schatzberg <schatzberg.dan@gmail.com>
Fixes: c74d40e8b5e2 ("loop: charge i/o to mem and blk cg")
Fixes: bc07c10a3603 ("block: loop: support DIO & AIO")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20230314182155.80625-1-bvanassche@acm.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/loop.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 839373451c2b7..28eb59fd71ca2 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1859,35 +1859,44 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 
 static void loop_handle_cmd(struct loop_cmd *cmd)
 {
+	struct cgroup_subsys_state *cmd_blkcg_css = cmd->blkcg_css;
+	struct cgroup_subsys_state *cmd_memcg_css = cmd->memcg_css;
 	struct request *rq = blk_mq_rq_from_pdu(cmd);
 	const bool write = op_is_write(req_op(rq));
 	struct loop_device *lo = rq->q->queuedata;
 	int ret = 0;
 	struct mem_cgroup *old_memcg = NULL;
+	const bool use_aio = cmd->use_aio;
 
 	if (write && (lo->lo_flags & LO_FLAGS_READ_ONLY)) {
 		ret = -EIO;
 		goto failed;
 	}
 
-	if (cmd->blkcg_css)
-		kthread_associate_blkcg(cmd->blkcg_css);
-	if (cmd->memcg_css)
+	if (cmd_blkcg_css)
+		kthread_associate_blkcg(cmd_blkcg_css);
+	if (cmd_memcg_css)
 		old_memcg = set_active_memcg(
-			mem_cgroup_from_css(cmd->memcg_css));
+			mem_cgroup_from_css(cmd_memcg_css));
 
+	/*
+	 * do_req_filebacked() may call blk_mq_complete_request() synchronously
+	 * or asynchronously if using aio. Hence, do not touch 'cmd' after
+	 * do_req_filebacked() has returned unless we are sure that 'cmd' has
+	 * not yet been completed.
+	 */
 	ret = do_req_filebacked(lo, rq);
 
-	if (cmd->blkcg_css)
+	if (cmd_blkcg_css)
 		kthread_associate_blkcg(NULL);
 
-	if (cmd->memcg_css) {
+	if (cmd_memcg_css) {
 		set_active_memcg(old_memcg);
-		css_put(cmd->memcg_css);
+		css_put(cmd_memcg_css);
 	}
  failed:
 	/* complete non-aio request */
-	if (!cmd->use_aio || ret) {
+	if (!use_aio || ret) {
 		if (ret == -EOPNOTSUPP)
 			cmd->ret = ret;
 		else

From 00e885efcfbb8712d3e1bfc1ae30639c15ca1d3b Mon Sep 17 00:00:00 2001
From: Chris Leech <cleech@redhat.com>
Date: Fri, 10 Mar 2023 09:09:13 +0800
Subject: [PATCH 262/411] blk-mq: fix "bad unlock balance detected" on q->srcu
 in __blk_mq_run_dispatch_ops

The 'q' parameter of the macro __blk_mq_run_dispatch_ops may not be one
local variable, such as, it is rq->q, then request queue pointed by
this variable could be changed to another queue in case of
BLK_MQ_F_TAG_QUEUE_SHARED after 'dispatch_ops' returns, then
'bad unlock balance' is triggered.

Fixes the issue by adding one local variable for doing srcu lock/unlock.

Fixes: 2a904d00855f ("blk-mq: remove hctx_lock and hctx_unlock")
Cc: Marco Patalano <mpatalan@redhat.com>
Signed-off-by: Chris Leech <cleech@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Link: https://lore.kernel.org/r/20230310010913.1014789-1-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq.h b/block/blk-mq.h
index ef59fee62780d..a7482d2cc82e7 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -378,12 +378,13 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
 #define __blk_mq_run_dispatch_ops(q, check_sleep, dispatch_ops)	\
 do {								\
 	if ((q)->tag_set->flags & BLK_MQ_F_BLOCKING) {		\
+		struct blk_mq_tag_set *__tag_set = (q)->tag_set; \
 		int srcu_idx;					\
 								\
 		might_sleep_if(check_sleep);			\
-		srcu_idx = srcu_read_lock((q)->tag_set->srcu);	\
+		srcu_idx = srcu_read_lock(__tag_set->srcu);	\
 		(dispatch_ops);					\
-		srcu_read_unlock((q)->tag_set->srcu, srcu_idx);	\
+		srcu_read_unlock(__tag_set->srcu, srcu_idx);	\
 	} else {						\
 		rcu_read_lock();				\
 		(dispatch_ops);					\

From dc472c7612297ffc9aea655bf6e9538bec5bfedf Mon Sep 17 00:00:00 2001
From: Ondrej Zary <linux@zary.sk>
Date: Sat, 11 Mar 2023 20:25:38 +0100
Subject: [PATCH 263/411] ata: pata_parport: fix parport release without claim

When adapter is not found, pi->disconnect() is called without previous
pi->connect(). This results in error like this:
parport0: pata_parport tried to release parport when not owner

Add missing out_disconnect label and use it correctly.

Signed-off-by: Ondrej Zary <linux@zary.sk>
Reviewed-by: Sergey Shtylyov <s.shtylyov@omp.ru>
Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
---
 drivers/ata/pata_parport/pata_parport.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/ata/pata_parport/pata_parport.c b/drivers/ata/pata_parport/pata_parport.c
index 294a266a0dda5..31c9677a45e33 100644
--- a/drivers/ata/pata_parport/pata_parport.c
+++ b/drivers/ata/pata_parport/pata_parport.c
@@ -487,12 +487,13 @@ static struct pi_adapter *pi_init_one(struct parport *parport,
 
 	pi_connect(pi);
 	if (ata_host_activate(host, 0, NULL, 0, &pata_parport_sht))
-		goto out_unreg_parport;
+		goto out_disconnect;
 
 	return pi;
 
-out_unreg_parport:
+out_disconnect:
 	pi_disconnect(pi);
+out_unreg_parport:
 	parport_unregister_device(pi->pardev);
 	if (pi->proto->release_proto)
 		pi->proto->release_proto(pi);

From b56bce502f55505a97e381d546ee881928183126 Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <pc@manguebit.com>
Date: Tue, 14 Mar 2023 20:32:53 -0300
Subject: [PATCH 264/411] cifs: set DFS root session in cifs_get_smb_ses()

Set the DFS root session pointer earlier when creating a new SMB
session to prevent racing with smb2_reconnect(), cifs_reconnect_tcon()
and DFS cache refresher.

Signed-off-by: Paulo Alcantara (SUSE) <pc@manguebit.com>
Cc: stable@vger.kernel.org # 6.2
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifs_dfs_ref.c |  1 +
 fs/cifs/cifsglob.h     |  1 -
 fs/cifs/connect.c      |  1 +
 fs/cifs/dfs.c          | 19 ++++++++-----------
 fs/cifs/dfs.h          |  3 ++-
 fs/cifs/fs_context.h   |  1 +
 6 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 2b1a8d55b4ec4..cb40074feb3e9 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -179,6 +179,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct path *path)
 	tmp.source = full_path;
 	tmp.leaf_fullpath = NULL;
 	tmp.UNC = tmp.prepath = NULL;
+	tmp.dfs_root_ses = NULL;
 
 	rc = smb3_fs_context_dup(ctx, &tmp);
 	if (rc) {
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index a99883f16d946..1a8190f71c244 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1749,7 +1749,6 @@ struct cifs_mount_ctx {
 	struct TCP_Server_Info *server;
 	struct cifs_ses *ses;
 	struct cifs_tcon *tcon;
-	struct cifs_ses *root_ses;
 	uuid_t mount_id;
 	char *origin_fullpath, *leaf_fullpath;
 };
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 5233f14f0636a..b96375f137fa8 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2229,6 +2229,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
 	 * need to lock before changing something in the session.
 	 */
 	spin_lock(&cifs_tcp_ses_lock);
+	ses->dfs_root_ses = ctx->dfs_root_ses;
 	list_add(&ses->smb_ses_list, &server->smb_ses_list);
 	spin_unlock(&cifs_tcp_ses_lock);
 
diff --git a/fs/cifs/dfs.c b/fs/cifs/dfs.c
index b64d20374b9c8..6505f1b20147e 100644
--- a/fs/cifs/dfs.c
+++ b/fs/cifs/dfs.c
@@ -95,25 +95,22 @@ static int get_session(struct cifs_mount_ctx *mnt_ctx, const char *full_path)
 	ctx->leaf_fullpath = (char *)full_path;
 	rc = cifs_mount_get_session(mnt_ctx);
 	ctx->leaf_fullpath = NULL;
-	if (!rc) {
-		struct cifs_ses *ses = mnt_ctx->ses;
 
-		mutex_lock(&ses->session_mutex);
-		ses->dfs_root_ses = mnt_ctx->root_ses;
-		mutex_unlock(&ses->session_mutex);
-	}
 	return rc;
 }
 
 static void set_root_ses(struct cifs_mount_ctx *mnt_ctx)
 {
-	if (mnt_ctx->ses) {
+	struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
+	struct cifs_ses *ses = mnt_ctx->ses;
+
+	if (ses) {
 		spin_lock(&cifs_tcp_ses_lock);
-		mnt_ctx->ses->ses_count++;
+		ses->ses_count++;
 		spin_unlock(&cifs_tcp_ses_lock);
-		dfs_cache_add_refsrv_session(&mnt_ctx->mount_id, mnt_ctx->ses);
+		dfs_cache_add_refsrv_session(&mnt_ctx->mount_id, ses);
 	}
-	mnt_ctx->root_ses = mnt_ctx->ses;
+	ctx->dfs_root_ses = mnt_ctx->ses;
 }
 
 static int get_dfs_conn(struct cifs_mount_ctx *mnt_ctx, const char *ref_path, const char *full_path,
@@ -260,7 +257,7 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs)
 	rc = get_session(mnt_ctx, NULL);
 	if (rc)
 		return rc;
-	mnt_ctx->root_ses = mnt_ctx->ses;
+	ctx->dfs_root_ses = mnt_ctx->ses;
 	/*
 	 * If called with 'nodfs' mount option, then skip DFS resolving.  Otherwise unconditionally
 	 * try to get an DFS referral (even cached) to determine whether it is an DFS mount.
diff --git a/fs/cifs/dfs.h b/fs/cifs/dfs.h
index 344bea6d8bab1..baf16df55d7e7 100644
--- a/fs/cifs/dfs.h
+++ b/fs/cifs/dfs.h
@@ -22,9 +22,10 @@ static inline char *dfs_get_path(struct cifs_sb_info *cifs_sb, const char *path)
 static inline int dfs_get_referral(struct cifs_mount_ctx *mnt_ctx, const char *path,
 				   struct dfs_info3_param *ref, struct dfs_cache_tgt_list *tl)
 {
+	struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
 	struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb;
 
-	return dfs_cache_find(mnt_ctx->xid, mnt_ctx->root_ses, cifs_sb->local_nls,
+	return dfs_cache_find(mnt_ctx->xid, ctx->dfs_root_ses, cifs_sb->local_nls,
 			      cifs_remap(cifs_sb), path, ref, tl);
 }
 
diff --git a/fs/cifs/fs_context.h b/fs/cifs/fs_context.h
index 44cb5639ed3ba..1b8d4e27f831c 100644
--- a/fs/cifs/fs_context.h
+++ b/fs/cifs/fs_context.h
@@ -265,6 +265,7 @@ struct smb3_fs_context {
 	bool rootfs:1; /* if it's a SMB root file system */
 	bool witness:1; /* use witness protocol */
 	char *leaf_fullpath;
+	struct cifs_ses *dfs_root_ses;
 };
 
 extern const struct fs_parameter_spec smb3_fs_parameters[];

From 396935de145589c8bfe552fa03a5e38604071829 Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <pc@manguebit.com>
Date: Tue, 14 Mar 2023 20:32:54 -0300
Subject: [PATCH 265/411] cifs: fix use-after-free bug in
 refresh_cache_worker()

The UAF bug occurred because we were putting DFS root sessions in
cifs_umount() while DFS cache refresher was being executed.

Make DFS root sessions have same lifetime as DFS tcons so we can avoid
the use-after-free bug is DFS cache refresher and other places that
require IPCs to get new DFS referrals on.  Also, get rid of mount
group handling in DFS cache as we no longer need it.

This fixes below use-after-free bug catched by KASAN

[ 379.946955] BUG: KASAN: use-after-free in __refresh_tcon.isra.0+0x10b/0xc10 [cifs]
[ 379.947642] Read of size 8 at addr ffff888018f57030 by task kworker/u4:3/56
[ 379.948096]
[ 379.948208] CPU: 0 PID: 56 Comm: kworker/u4:3 Not tainted 6.2.0-rc7-lku #23
[ 379.948661] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS
rel-1.16.0-0-gd239552-rebuilt.opensuse.org 04/01/2014
[ 379.949368] Workqueue: cifs-dfscache refresh_cache_worker [cifs]
[ 379.949942] Call Trace:
[ 379.950113] <TASK>
[ 379.950260] dump_stack_lvl+0x50/0x67
[ 379.950510] print_report+0x16a/0x48e
[ 379.950759] ? __virt_addr_valid+0xd8/0x160
[ 379.951040] ? __phys_addr+0x41/0x80
[ 379.951285] kasan_report+0xdb/0x110
[ 379.951533] ? __refresh_tcon.isra.0+0x10b/0xc10 [cifs]
[ 379.952056] ? __refresh_tcon.isra.0+0x10b/0xc10 [cifs]
[ 379.952585] __refresh_tcon.isra.0+0x10b/0xc10 [cifs]
[ 379.953096] ? __pfx___refresh_tcon.isra.0+0x10/0x10 [cifs]
[ 379.953637] ? __pfx___mutex_lock+0x10/0x10
[ 379.953915] ? lock_release+0xb6/0x720
[ 379.954167] ? __pfx_lock_acquire+0x10/0x10
[ 379.954443] ? refresh_cache_worker+0x34e/0x6d0 [cifs]
[ 379.954960] ? __pfx_wb_workfn+0x10/0x10
[ 379.955239] refresh_cache_worker+0x4ad/0x6d0 [cifs]
[ 379.955755] ? __pfx_refresh_cache_worker+0x10/0x10 [cifs]
[ 379.956323] ? __pfx_lock_acquired+0x10/0x10
[ 379.956615] ? read_word_at_a_time+0xe/0x20
[ 379.956898] ? lockdep_hardirqs_on_prepare+0x12/0x220
[ 379.957235] process_one_work+0x535/0x990
[ 379.957509] ? __pfx_process_one_work+0x10/0x10
[ 379.957812] ? lock_acquired+0xb7/0x5f0
[ 379.958069] ? __list_add_valid+0x37/0xd0
[ 379.958341] ? __list_add_valid+0x37/0xd0
[ 379.958611] worker_thread+0x8e/0x630
[ 379.958861] ? __pfx_worker_thread+0x10/0x10
[ 379.959148] kthread+0x17d/0x1b0
[ 379.959369] ? __pfx_kthread+0x10/0x10
[ 379.959630] ret_from_fork+0x2c/0x50
[ 379.959879] </TASK>

Signed-off-by: Paulo Alcantara (SUSE) <pc@manguebit.com>
Cc: stable@vger.kernel.org # 6.2
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifs_fs_sb.h |   2 -
 fs/cifs/cifsglob.h   |   3 +-
 fs/cifs/connect.c    |   9 +--
 fs/cifs/dfs.c        |  52 ++++++++++++----
 fs/cifs/dfs.h        |  16 +++++
 fs/cifs/dfs_cache.c  | 140 -------------------------------------------
 fs/cifs/dfs_cache.h  |   2 -
 fs/cifs/misc.c       |   7 +++
 8 files changed, 67 insertions(+), 164 deletions(-)

diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 013a4bd65280c..6517591922801 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -61,8 +61,6 @@ struct cifs_sb_info {
 	/* only used when CIFS_MOUNT_USE_PREFIX_PATH is set */
 	char *prepath;
 
-	/* randomly generated 128-bit number for indexing dfs mount groups in referral cache */
-	uuid_t dfs_mount_id;
 	/*
 	 * Indicate whether serverino option was turned off later
 	 * (cifs_autodisable_serverino) in order to match new mounts.
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 1a8190f71c244..08a73dcb77864 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -1233,6 +1233,7 @@ struct cifs_tcon {
 	/* BB add field for back pointer to sb struct(s)? */
 #ifdef CONFIG_CIFS_DFS_UPCALL
 	struct list_head ulist; /* cache update list */
+	struct list_head dfs_ses_list;
 #endif
 	struct delayed_work	query_interfaces; /* query interfaces workqueue job */
 };
@@ -1749,8 +1750,8 @@ struct cifs_mount_ctx {
 	struct TCP_Server_Info *server;
 	struct cifs_ses *ses;
 	struct cifs_tcon *tcon;
-	uuid_t mount_id;
 	char *origin_fullpath, *leaf_fullpath;
+	struct list_head dfs_ses_list;
 };
 
 static inline void free_dfs_info_param(struct dfs_info3_param *param)
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index b96375f137fa8..0eceddde7140a 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3408,7 +3408,8 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 	bool isdfs;
 	int rc;
 
-	uuid_gen(&mnt_ctx.mount_id);
+	INIT_LIST_HEAD(&mnt_ctx.dfs_ses_list);
+
 	rc = dfs_mount_share(&mnt_ctx, &isdfs);
 	if (rc)
 		goto error;
@@ -3428,7 +3429,6 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 	kfree(cifs_sb->prepath);
 	cifs_sb->prepath = ctx->prepath;
 	ctx->prepath = NULL;
-	uuid_copy(&cifs_sb->dfs_mount_id, &mnt_ctx.mount_id);
 
 out:
 	cifs_try_adding_channels(cifs_sb, mnt_ctx.ses);
@@ -3440,7 +3440,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 	return rc;
 
 error:
-	dfs_cache_put_refsrv_sessions(&mnt_ctx.mount_id);
+	dfs_put_root_smb_sessions(&mnt_ctx.dfs_ses_list);
 	kfree(mnt_ctx.origin_fullpath);
 	kfree(mnt_ctx.leaf_fullpath);
 	cifs_mount_put_conns(&mnt_ctx);
@@ -3638,9 +3638,6 @@ cifs_umount(struct cifs_sb_info *cifs_sb)
 	spin_unlock(&cifs_sb->tlink_tree_lock);
 
 	kfree(cifs_sb->prepath);
-#ifdef CONFIG_CIFS_DFS_UPCALL
-	dfs_cache_put_refsrv_sessions(&cifs_sb->dfs_mount_id);
-#endif
 	call_rcu(&cifs_sb->rcu, delayed_free);
 }
 
diff --git a/fs/cifs/dfs.c b/fs/cifs/dfs.c
index 6505f1b20147e..c8bda52fa096c 100644
--- a/fs/cifs/dfs.c
+++ b/fs/cifs/dfs.c
@@ -99,18 +99,27 @@ static int get_session(struct cifs_mount_ctx *mnt_ctx, const char *full_path)
 	return rc;
 }
 
-static void set_root_ses(struct cifs_mount_ctx *mnt_ctx)
+static int get_root_smb_session(struct cifs_mount_ctx *mnt_ctx)
 {
 	struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
+	struct dfs_root_ses *root_ses;
 	struct cifs_ses *ses = mnt_ctx->ses;
 
 	if (ses) {
+		root_ses = kmalloc(sizeof(*root_ses), GFP_KERNEL);
+		if (!root_ses)
+			return -ENOMEM;
+
+		INIT_LIST_HEAD(&root_ses->list);
+
 		spin_lock(&cifs_tcp_ses_lock);
 		ses->ses_count++;
 		spin_unlock(&cifs_tcp_ses_lock);
-		dfs_cache_add_refsrv_session(&mnt_ctx->mount_id, ses);
+		root_ses->ses = ses;
+		list_add_tail(&root_ses->list, &mnt_ctx->dfs_ses_list);
 	}
-	ctx->dfs_root_ses = mnt_ctx->ses;
+	ctx->dfs_root_ses = ses;
+	return 0;
 }
 
 static int get_dfs_conn(struct cifs_mount_ctx *mnt_ctx, const char *ref_path, const char *full_path,
@@ -118,7 +127,8 @@ static int get_dfs_conn(struct cifs_mount_ctx *mnt_ctx, const char *ref_path, co
 {
 	struct smb3_fs_context *ctx = mnt_ctx->fs_ctx;
 	struct dfs_info3_param ref = {};
-	int rc;
+	bool is_refsrv = false;
+	int rc, rc2;
 
 	rc = dfs_cache_get_tgt_referral(ref_path + 1, tit, &ref);
 	if (rc)
@@ -133,8 +143,7 @@ static int get_dfs_conn(struct cifs_mount_ctx *mnt_ctx, const char *ref_path, co
 	if (rc)
 		goto out;
 
-	if (ref.flags & DFSREF_REFERRAL_SERVER)
-		set_root_ses(mnt_ctx);
+	is_refsrv = !!(ref.flags & DFSREF_REFERRAL_SERVER);
 
 	rc = -EREMOTE;
 	if (ref.flags & DFSREF_STORAGE_SERVER) {
@@ -143,13 +152,17 @@ static int get_dfs_conn(struct cifs_mount_ctx *mnt_ctx, const char *ref_path, co
 			goto out;
 
 		/* some servers may not advertise referral capability under ref.flags */
-		if (!(ref.flags & DFSREF_REFERRAL_SERVER) &&
-		    is_tcon_dfs(mnt_ctx->tcon))
-			set_root_ses(mnt_ctx);
+		is_refsrv |= is_tcon_dfs(mnt_ctx->tcon);
 
 		rc = cifs_is_path_remote(mnt_ctx);
 	}
 
+	if (rc == -EREMOTE && is_refsrv) {
+		rc2 = get_root_smb_session(mnt_ctx);
+		if (rc2)
+			rc = rc2;
+	}
+
 out:
 	free_dfs_info_param(&ref);
 	return rc;
@@ -162,6 +175,7 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx)
 	char *ref_path = NULL, *full_path = NULL;
 	struct dfs_cache_tgt_iterator *tit;
 	struct TCP_Server_Info *server;
+	struct cifs_tcon *tcon;
 	char *origin_fullpath = NULL;
 	int num_links = 0;
 	int rc;
@@ -231,12 +245,22 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx)
 
 	if (!rc) {
 		server = mnt_ctx->server;
+		tcon = mnt_ctx->tcon;
 
 		mutex_lock(&server->refpath_lock);
-		server->origin_fullpath = origin_fullpath;
-		server->current_fullpath = server->leaf_fullpath;
+		if (!server->origin_fullpath) {
+			server->origin_fullpath = origin_fullpath;
+			server->current_fullpath = server->leaf_fullpath;
+			origin_fullpath = NULL;
+		}
 		mutex_unlock(&server->refpath_lock);
-		origin_fullpath = NULL;
+
+		if (list_empty(&tcon->dfs_ses_list)) {
+			list_replace_init(&mnt_ctx->dfs_ses_list,
+					  &tcon->dfs_ses_list);
+		} else {
+			dfs_put_root_smb_sessions(&mnt_ctx->dfs_ses_list);
+		}
 	}
 
 out:
@@ -277,7 +301,9 @@ int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs)
 	}
 
 	*isdfs = true;
-	set_root_ses(mnt_ctx);
+	rc = get_root_smb_session(mnt_ctx);
+	if (rc)
+		return rc;
 
 	return __dfs_mount_share(mnt_ctx);
 }
diff --git a/fs/cifs/dfs.h b/fs/cifs/dfs.h
index baf16df55d7e7..13f26e01f7b97 100644
--- a/fs/cifs/dfs.h
+++ b/fs/cifs/dfs.h
@@ -10,6 +10,11 @@
 #include "fs_context.h"
 #include "cifs_unicode.h"
 
+struct dfs_root_ses {
+	struct list_head list;
+	struct cifs_ses *ses;
+};
+
 int dfs_parse_target_referral(const char *full_path, const struct dfs_info3_param *ref,
 			      struct smb3_fs_context *ctx);
 int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx, bool *isdfs);
@@ -44,4 +49,15 @@ static inline char *dfs_get_automount_devname(struct dentry *dentry, void *page)
 							true);
 }
 
+static inline void dfs_put_root_smb_sessions(struct list_head *head)
+{
+	struct dfs_root_ses *root, *tmp;
+
+	list_for_each_entry_safe(root, tmp, head, list) {
+		list_del_init(&root->list);
+		cifs_put_smb_ses(root->ses);
+		kfree(root);
+	}
+}
+
 #endif /* _CIFS_DFS_H */
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index ac86bd0ebd637..1c59811bfa73a 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -49,17 +49,6 @@ struct cache_entry {
 	struct cache_dfs_tgt *tgthint;
 };
 
-/* List of referral server sessions per dfs mount */
-struct mount_group {
-	struct list_head list;
-	uuid_t id;
-	struct cifs_ses *sessions[CACHE_MAX_ENTRIES];
-	int num_sessions;
-	spinlock_t lock;
-	struct list_head refresh_list;
-	struct kref refcount;
-};
-
 static struct kmem_cache *cache_slab __read_mostly;
 static struct workqueue_struct *dfscache_wq __read_mostly;
 
@@ -76,85 +65,10 @@ static atomic_t cache_count;
 static struct hlist_head cache_htable[CACHE_HTABLE_SIZE];
 static DECLARE_RWSEM(htable_rw_lock);
 
-static LIST_HEAD(mount_group_list);
-static DEFINE_MUTEX(mount_group_list_lock);
-
 static void refresh_cache_worker(struct work_struct *work);
 
 static DECLARE_DELAYED_WORK(refresh_task, refresh_cache_worker);
 
-static void __mount_group_release(struct mount_group *mg)
-{
-	int i;
-
-	for (i = 0; i < mg->num_sessions; i++)
-		cifs_put_smb_ses(mg->sessions[i]);
-	kfree(mg);
-}
-
-static void mount_group_release(struct kref *kref)
-{
-	struct mount_group *mg = container_of(kref, struct mount_group, refcount);
-
-	mutex_lock(&mount_group_list_lock);
-	list_del(&mg->list);
-	mutex_unlock(&mount_group_list_lock);
-	__mount_group_release(mg);
-}
-
-static struct mount_group *find_mount_group_locked(const uuid_t *id)
-{
-	struct mount_group *mg;
-
-	list_for_each_entry(mg, &mount_group_list, list) {
-		if (uuid_equal(&mg->id, id))
-			return mg;
-	}
-	return ERR_PTR(-ENOENT);
-}
-
-static struct mount_group *__get_mount_group_locked(const uuid_t *id)
-{
-	struct mount_group *mg;
-
-	mg = find_mount_group_locked(id);
-	if (!IS_ERR(mg))
-		return mg;
-
-	mg = kmalloc(sizeof(*mg), GFP_KERNEL);
-	if (!mg)
-		return ERR_PTR(-ENOMEM);
-	kref_init(&mg->refcount);
-	uuid_copy(&mg->id, id);
-	mg->num_sessions = 0;
-	spin_lock_init(&mg->lock);
-	list_add(&mg->list, &mount_group_list);
-	return mg;
-}
-
-static struct mount_group *get_mount_group(const uuid_t *id)
-{
-	struct mount_group *mg;
-
-	mutex_lock(&mount_group_list_lock);
-	mg = __get_mount_group_locked(id);
-	if (!IS_ERR(mg))
-		kref_get(&mg->refcount);
-	mutex_unlock(&mount_group_list_lock);
-
-	return mg;
-}
-
-static void free_mount_group_list(void)
-{
-	struct mount_group *mg, *tmp_mg;
-
-	list_for_each_entry_safe(mg, tmp_mg, &mount_group_list, list) {
-		list_del_init(&mg->list);
-		__mount_group_release(mg);
-	}
-}
-
 /**
  * dfs_cache_canonical_path - get a canonical DFS path
  *
@@ -704,7 +618,6 @@ void dfs_cache_destroy(void)
 {
 	cancel_delayed_work_sync(&refresh_task);
 	unload_nls(cache_cp);
-	free_mount_group_list();
 	flush_cache_ents();
 	kmem_cache_destroy(cache_slab);
 	destroy_workqueue(dfscache_wq);
@@ -1111,54 +1024,6 @@ int dfs_cache_get_tgt_referral(const char *path, const struct dfs_cache_tgt_iter
 	return rc;
 }
 
-/**
- * dfs_cache_add_refsrv_session - add SMB session of referral server
- *
- * @mount_id: mount group uuid to lookup.
- * @ses: reference counted SMB session of referral server.
- */
-void dfs_cache_add_refsrv_session(const uuid_t *mount_id, struct cifs_ses *ses)
-{
-	struct mount_group *mg;
-
-	if (WARN_ON_ONCE(!mount_id || uuid_is_null(mount_id) || !ses))
-		return;
-
-	mg = get_mount_group(mount_id);
-	if (WARN_ON_ONCE(IS_ERR(mg)))
-		return;
-
-	spin_lock(&mg->lock);
-	if (mg->num_sessions < ARRAY_SIZE(mg->sessions))
-		mg->sessions[mg->num_sessions++] = ses;
-	spin_unlock(&mg->lock);
-	kref_put(&mg->refcount, mount_group_release);
-}
-
-/**
- * dfs_cache_put_refsrv_sessions - put all referral server sessions
- *
- * Put all SMB sessions from the given mount group id.
- *
- * @mount_id: mount group uuid to lookup.
- */
-void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id)
-{
-	struct mount_group *mg;
-
-	if (!mount_id || uuid_is_null(mount_id))
-		return;
-
-	mutex_lock(&mount_group_list_lock);
-	mg = find_mount_group_locked(mount_id);
-	if (IS_ERR(mg)) {
-		mutex_unlock(&mount_group_list_lock);
-		return;
-	}
-	mutex_unlock(&mount_group_list_lock);
-	kref_put(&mg->refcount, mount_group_release);
-}
-
 /* Extract share from DFS target and return a pointer to prefix path or NULL */
 static const char *parse_target_share(const char *target, char **share)
 {
@@ -1384,11 +1249,6 @@ int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb)
 		cifs_dbg(FYI, "%s: not a dfs mount\n", __func__);
 		return 0;
 	}
-
-	if (uuid_is_null(&cifs_sb->dfs_mount_id)) {
-		cifs_dbg(FYI, "%s: no dfs mount group id\n", __func__);
-		return -EINVAL;
-	}
 	/*
 	 * After reconnecting to a different server, unique ids won't match anymore, so we disable
 	 * serverino. This prevents dentry revalidation to think the dentry are stale (ESTALE).
diff --git a/fs/cifs/dfs_cache.h b/fs/cifs/dfs_cache.h
index be3b5a44cf827..e0d39393035a9 100644
--- a/fs/cifs/dfs_cache.h
+++ b/fs/cifs/dfs_cache.h
@@ -40,8 +40,6 @@ int dfs_cache_get_tgt_referral(const char *path, const struct dfs_cache_tgt_iter
 			       struct dfs_info3_param *ref);
 int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it, char **share,
 			    char **prefix);
-void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id);
-void dfs_cache_add_refsrv_session(const uuid_t *mount_id, struct cifs_ses *ses);
 char *dfs_cache_canonical_path(const char *path, const struct nls_table *cp, int remap);
 int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb);
 
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index a0d286ee723dd..6f9c78650528a 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -22,6 +22,7 @@
 #ifdef CONFIG_CIFS_DFS_UPCALL
 #include "dns_resolve.h"
 #include "dfs_cache.h"
+#include "dfs.h"
 #endif
 #include "fs_context.h"
 #include "cached_dir.h"
@@ -134,6 +135,9 @@ tconInfoAlloc(void)
 	spin_lock_init(&ret_buf->stat_lock);
 	atomic_set(&ret_buf->num_local_opens, 0);
 	atomic_set(&ret_buf->num_remote_opens, 0);
+#ifdef CONFIG_CIFS_DFS_UPCALL
+	INIT_LIST_HEAD(&ret_buf->dfs_ses_list);
+#endif
 
 	return ret_buf;
 }
@@ -149,6 +153,9 @@ tconInfoFree(struct cifs_tcon *tcon)
 	atomic_dec(&tconInfoAllocCount);
 	kfree(tcon->nativeFileSystem);
 	kfree_sensitive(tcon->password);
+#ifdef CONFIG_CIFS_DFS_UPCALL
+	dfs_put_root_smb_sessions(&tcon->dfs_ses_list);
+#endif
 	kfree(tcon);
 }
 

From 47dd902aaee9b9341808a3a994793199e7eddb88 Mon Sep 17 00:00:00 2001
From: Dylan Jhong <dylan@andestech.com>
Date: Fri, 10 Mar 2023 15:50:21 +0800
Subject: [PATCH 266/411] RISC-V: mm: Support huge page in vmalloc_fault()

Since RISC-V supports ioremap() with huge page (pud/pmd) mapping,
However, vmalloc_fault() assumes that the vmalloc range is limited
to pte mappings. To complete the vmalloc_fault() function by adding
huge page support.

Fixes: 310f541a027b ("riscv: Enable HAVE_ARCH_HUGE_VMAP for 64BIT")
Cc: stable@vger.kernel.org
Signed-off-by: Dylan Jhong <dylan@andestech.com>
Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Link: https://lore.kernel.org/r/20230310075021.3919290-1-dylan@andestech.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/mm/fault.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 460f785f6e09c..d5f3e501dffb3 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -143,6 +143,8 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
 		no_context(regs, addr);
 		return;
 	}
+	if (pud_leaf(*pud_k))
+		goto flush_tlb;
 
 	/*
 	 * Since the vmalloc area is global, it is unnecessary
@@ -153,6 +155,8 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
 		no_context(regs, addr);
 		return;
 	}
+	if (pmd_leaf(*pmd_k))
+		goto flush_tlb;
 
 	/*
 	 * Make sure the actual PTE exists as well to
@@ -172,6 +176,7 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
 	 * ordering constraint, not a cache flush; it is
 	 * necessary even after writing invalid entries.
 	 */
+flush_tlb:
 	local_flush_tlb_page(addr);
 }
 

From 6015b1aca1a233379625385feb01dd014aca60b5 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 14 Mar 2023 19:32:38 -0700
Subject: [PATCH 267/411] sched_getaffinity: don't assume 'cpumask_size()' is
 fully initialized

The getaffinity() system call uses 'cpumask_size()' to decide how big
the CPU mask is - so far so good.  It is indeed the allocation size of a
cpumask.

But the code also assumes that the whole allocation is initialized
without actually doing so itself.  That's wrong, because we might have
fixed-size allocations (making copying and clearing more efficient), but
not all of it is then necessarily used if 'nr_cpu_ids' is smaller.

Having checked other users of 'cpumask_size()', they all seem to be ok,
either using it purely for the allocation size, or explicitly zeroing
the cpumask before using the size in bytes to copy it.

See for example the ublk_ctrl_get_queue_affinity() function that uses
the proper 'zalloc_cpumask_var()' to make sure that the whole mask is
cleared, whether the storage is on the stack or if it was an external
allocation.

Fix this by just zeroing the allocation before using it.  Do the same
for the compat version of sched_getaffinity(), which had the same logic.

Also, for consistency, make sched_getaffinity() use 'cpumask_bits()' to
access the bits.  For a cpumask_var_t, it ends up being a pointer to the
same data either way, but it's just a good idea to treat it like you
would a 'cpumask_t'.  The compat case already did that.

Reported-by: Ryan Roberts <ryan.roberts@arm.com>
Link: https://lore.kernel.org/lkml/7d026744-6bd6-6827-0471-b5e8eae0be3f@arm.com/
Cc: Yury Norov <yury.norov@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/compat.c     | 2 +-
 kernel/sched/core.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/compat.c b/kernel/compat.c
index 55551989d9da5..fb50f29d9b361 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -152,7 +152,7 @@ COMPAT_SYSCALL_DEFINE3(sched_getaffinity, compat_pid_t,  pid, unsigned int, len,
 	if (len & (sizeof(compat_ulong_t)-1))
 		return -EINVAL;
 
-	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
 		return -ENOMEM;
 
 	ret = sched_getaffinity(pid, mask);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index af017e038b482..488655f2319f5 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8414,14 +8414,14 @@ SYSCALL_DEFINE3(sched_getaffinity, pid_t, pid, unsigned int, len,
 	if (len & (sizeof(unsigned long)-1))
 		return -EINVAL;
 
-	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
 		return -ENOMEM;
 
 	ret = sched_getaffinity(pid, mask);
 	if (ret == 0) {
 		unsigned int retlen = min(len, cpumask_size());
 
-		if (copy_to_user(user_mask_ptr, mask, retlen))
+		if (copy_to_user(user_mask_ptr, cpumask_bits(mask), retlen))
 			ret = -EFAULT;
 		else
 			ret = retlen;

From f446a630802f154ef0087771683bd4f8e9d08384 Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <pc@manguebit.com>
Date: Tue, 14 Mar 2023 20:32:55 -0300
Subject: [PATCH 268/411] cifs: return DFS root session id in DebugData

Return the DFS root session id in /proc/fs/cifs/DebugData to make it
easier to track which IPC tcon was used to get new DFS referrals for a
specific connection, and aids in debugging.

A simple output of it would be

  Sessions:
  1) Address: 192.168.1.13 Uses: 1 Capability: 0x300067   Session Status: 1
  Security type: RawNTLMSSP  SessionId: 0xd80000000009
  User: 0 Cred User: 0
  DFS root session id: 0x128006c000035

Signed-off-by: Paulo Alcantara (SUSE) <pc@manguebit.com>
Cc: stable@vger.kernel.org # 6.2
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifs_debug.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 1911f7016fa1d..19a70a69c7603 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -420,6 +420,11 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
 				   from_kuid(&init_user_ns, ses->linux_uid),
 				   from_kuid(&init_user_ns, ses->cred_uid));
 
+			if (ses->dfs_root_ses) {
+				seq_printf(m, "\n\tDFS root session id: 0x%llx",
+					   ses->dfs_root_ses->Suid);
+			}
+
 			spin_lock(&ses->chan_lock);
 			if (CIFS_CHAN_NEEDS_RECONNECT(ses, 0))
 				seq_puts(m, "\tPrimary channel: DISCONNECTED ");

From 6284e46bdd47743a064fe6ac834a7ac05b1fd206 Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <pc@manguebit.com>
Date: Tue, 14 Mar 2023 20:32:56 -0300
Subject: [PATCH 269/411] cifs: use DFS root session instead of tcon ses

Use DFS root session whenever possible to get new DFS referrals
otherwise we might end up with an IPC tcon (tcon->ses->tcon_ipc) that
doesn't respond to them.  It should be safe accessing
@ses->dfs_root_ses directly in cifs_inval_name_dfs_link_error() as it
has same lifetime as of @tcon.

Signed-off-by: Paulo Alcantara (SUSE) <pc@manguebit.com>
Cc: stable@vger.kernel.org # 6.2
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/misc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 6f9c78650528a..b44fb51968bfb 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -1262,6 +1262,7 @@ int cifs_inval_name_dfs_link_error(const unsigned int xid,
 		 * removing cached DFS targets that the client would eventually
 		 * need during failover.
 		 */
+		ses = CIFS_DFS_ROOT_SES(ses);
 		if (ses->server->ops->get_dfs_refer &&
 		    !ses->server->ops->get_dfs_refer(xid, ses, ref_path, &refs,
 						     &num_refs, cifs_sb->local_nls,

From c753ccb2629f536b8c4feae5c223d5873c814d23 Mon Sep 17 00:00:00 2001
From: Tzafrir Cohen <nvidia@cohens.org.il>
Date: Tue, 14 Mar 2023 15:02:48 +0200
Subject: [PATCH 270/411] Makefile: Make kernelrelease target work with M=

That commit required the use of filechk_kernel.release for the
kernelrelease Makefile target. It is currently only being set when
KBUILD_EXTMOD is not set. Make sure it is set in that case as well.

Fixes: 1cb86b6c3136 ("kbuild: save overridden KERNELRELEASE in include/config/kernel.release")
Signed-off-by: Tzafrir Cohen <nvidia@cohens.org.il>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Makefile b/Makefile
index d7bd0eb9b3463..d0a0ba8e5a2e6 100644
--- a/Makefile
+++ b/Makefile
@@ -1886,6 +1886,8 @@ endif
 
 else # KBUILD_EXTMOD
 
+filechk_kernel.release = echo $(KERNELRELEASE)
+
 ###
 # External module support.
 # When building external modules the kernel used as basis is considered

From 2fd6c4553c962ec7ea8a60c0a3632c7e984800f0 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 13 Mar 2023 05:07:26 +0900
Subject: [PATCH 271/411] kbuild: deb-pkg: make debian source package working
 again

Since commit c5bf2efb058d ("kbuild: deb-pkg: fix binary-arch and clean
in debian/rules"), the source package generated by 'make deb-pkg' fails
to build.

I terribly missed the fact that the intdeb-pkg target may regenerate
include/config/kernel.release due to the following in the top Makefile:

  %pkg: include/config/kernel.release FORCE

Restore KERNELRELEASE= option to avoid the kernel.release disagreement
between build-arch and binary-arch.

Fixes: c5bf2efb058d ("kbuild: deb-pkg: fix binary-arch and clean in debian/rules")
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/package/mkdebian | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian
index f74380036bb54..c6fbfb9f74ba1 100755
--- a/scripts/package/mkdebian
+++ b/scripts/package/mkdebian
@@ -239,6 +239,7 @@ cat <<EOF > debian/rules
 #!$(command -v $MAKE) -f
 
 srctree ?= .
+KERNELRELEASE = ${KERNELRELEASE}
 
 build-indep:
 build-arch:
@@ -250,7 +251,9 @@ build: build-arch
 
 binary-indep:
 binary-arch: build-arch
-	\$(MAKE) -f \$(srctree)/Makefile ARCH=${ARCH} intdeb-pkg
+	\$(MAKE) -f \$(srctree)/Makefile ARCH=${ARCH} \
+	KERNELRELEASE=\$(KERNELRELEASE) intdeb-pkg
+
 clean:
 	rm -rf debian/files debian/linux-*
 	\$(MAKE) -f \$(srctree)/Makefile ARCH=${ARCH} clean

From 7a531c21f83d7c62825d00bee3a76c1ccfb5de9f Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 13 Mar 2023 05:07:27 +0900
Subject: [PATCH 272/411] kbuild: deb-pkg: do not take KERNELRELEASE from the
 source version
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

KERNELRELEASE does not need to match the package version in changelog.
Rather, it conventially matches what is called 'ABINAME', which is a
part of the binary package names.

Both are the same by default, but the former might be overridden by
KDEB_PKGVERSION. In this case, the resulting package would not boot
because /lib/modules/$(uname -r) does not point the module directory.

Partially revert 3ab18a625ce4 ("kbuild: deb-pkg: improve the usability
of source package").

Reported-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Fixes: 3ab18a625ce4 ("kbuild: deb-pkg: improve the usability of source package")
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Tested-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
---
 scripts/package/deb-build-option | 9 ++++-----
 scripts/package/mkdebian         | 1 +
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/scripts/package/deb-build-option b/scripts/package/deb-build-option
index b079b0d121d47..bd53624318f20 100755
--- a/scripts/package/deb-build-option
+++ b/scripts/package/deb-build-option
@@ -8,9 +8,8 @@ if [ -z "${CROSS_COMPILE}${cross_compiling}" -a "${DEB_HOST_ARCH}" != "${DEB_BUI
 fi
 
 version=$(dpkg-parsechangelog -S Version)
-version_upstream="${version%-*}"
-debian_revision="${version#${version_upstream}}"
-debian_revision="${debian_revision#*-}"
+debian_revision="${version##*-}"
 
-echo KERNELRELEASE=${version_upstream}
-echo KBUILD_BUILD_VERSION=${debian_revision}
+if [ "${version}" != "${debian_revision}" ]; then
+	echo KBUILD_BUILD_VERSION=${debian_revision}
+fi
diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian
index c6fbfb9f74ba1..31b050368cd07 100755
--- a/scripts/package/mkdebian
+++ b/scripts/package/mkdebian
@@ -244,6 +244,7 @@ KERNELRELEASE = ${KERNELRELEASE}
 build-indep:
 build-arch:
 	\$(MAKE) -f \$(srctree)/Makefile ARCH=${ARCH} \
+	KERNELRELEASE=\$(KERNELRELEASE) \
 	\$(shell \$(srctree)/scripts/package/deb-build-option) \
 	olddefconfig all
 

From f50aa51c4498d7886cfd9dbf439a2332f234a755 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 13 Mar 2023 05:07:28 +0900
Subject: [PATCH 273/411] kbuild: deb-pkg: set CROSS_COMPILE only when
 undefined

Commit 3ab18a625ce4 ("kbuild: deb-pkg: improve the usability of source
package") set needless CROSS_COMPILE.

For example, 'make allnoconfig bindeb-pkg' on a x86_64 system will set
CROSS_COMPILE=i686-linux-gnu-, where the biarch compiler 'gcc' should
work for building the i386 kernel.

  $ uname -m
  x86_64
  $ make allnoconfig bindeb-pkg >/dev/null
  dpkg-architecture: warning: specified GNU system type i686-linux-gnu does not match CC system type x86_64-linux-gnu, try setting a correct CC environment variable
   dpkg-source --before-build .
   debian/rules binary
  scripts/Kconfig.include:39: C compiler 'i686-linux-gnu-gcc' not found
  make[6]: *** [scripts/kconfig/Makefile:77: olddefconfig] Error 1
  make[5]: *** [Makefile:693: olddefconfig] Error 2
  make[4]: *** [Makefile:358: __build_one_by_one] Error 2
  make[3]: *** [debian/rules:7: build-arch] Error 2
  dpkg-buildpackage: error: debian/rules binary subprocess returned exit status 2
  make[2]: *** [scripts/Makefile.package:127: bindeb-pkg] Error 2
  make[1]: *** [Makefile:1657: bindeb-pkg] Error 2
  make: *** [Makefile:358: __build_one_by_one] Error 2

Check whether CROSS_COMPILE is defined, instead of whether it is non-empty.

If you invoke debian/rules via Kbuild, CROSS_COMPILE is always defined
in the top Makefile.

Fixes: 3ab18a625ce4 ("kbuild: deb-pkg: improve the usability of source package")
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/package/deb-build-option | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/scripts/package/deb-build-option b/scripts/package/deb-build-option
index bd53624318f20..7950eff01781a 100755
--- a/scripts/package/deb-build-option
+++ b/scripts/package/deb-build-option
@@ -1,9 +1,8 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0-only
 
-# Set up CROSS_COMPILE if we are cross-compiling, but not called from the
-# kernel toplevel Makefile
-if [ -z "${CROSS_COMPILE}${cross_compiling}" -a "${DEB_HOST_ARCH}" != "${DEB_BUILD_ARCH}" ]; then
+# Set up CROSS_COMPILE if not defined yet
+if [ "${CROSS_COMPILE+set}" != "set" -a "${DEB_HOST_ARCH}" != "${DEB_BUILD_ARCH}" ]; then
 	echo CROSS_COMPILE=${DEB_HOST_GNU_TYPE}-
 fi
 

From b611daae5efc64e817171a54021d3b334cc1bc41 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 13 Mar 2023 05:07:29 +0900
Subject: [PATCH 274/411] kbuild: deb-pkg: split image and debug objects
 staging out into functions

Prepare for the refactoring in the next commit.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/package/builddeb | 222 ++++++++++++++++++++-------------------
 1 file changed, 116 insertions(+), 106 deletions(-)

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index ff5e7d8e380bb..906889b304a4c 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -51,6 +51,115 @@ create_package() {
 	dpkg-deb $dpkg_deb_opts ${KDEB_COMPRESS:+-Z$KDEB_COMPRESS} --build "$pdir" ..
 }
 
+install_linux_image () {
+	pdir=$1
+	pname=$2
+
+	rm -rf ${pdir}
+
+	# Only some architectures with OF support have this target
+	if is_enabled CONFIG_OF_EARLY_FLATTREE && [ -d "${srctree}/arch/${SRCARCH}/boot/dts" ]; then
+		${MAKE} -f ${srctree}/Makefile INSTALL_DTBS_PATH="${pdir}/usr/lib/linux-image-${KERNELRELEASE}" dtbs_install
+	fi
+
+	if is_enabled CONFIG_MODULES; then
+		${MAKE} -f ${srctree}/Makefile INSTALL_MOD_PATH="${pdir}" modules_install
+		rm -f "${pdir}/lib/modules/${KERNELRELEASE}/build"
+		rm -f "${pdir}/lib/modules/${KERNELRELEASE}/source"
+		if [ "${SRCARCH}" = um ] ; then
+			mkdir -p "${pdir}/usr/lib/uml/modules"
+			mv "${pdir}/lib/modules/${KERNELRELEASE}" "${pdir}/usr/lib/uml/modules/${KERNELRELEASE}"
+		fi
+	fi
+
+	# Install the kernel
+	if [ "${ARCH}" = um ] ; then
+		mkdir -p "${pdir}/usr/bin" "${pdir}/usr/share/doc/${pname}"
+		cp System.map "${pdir}/usr/lib/uml/modules/${KERNELRELEASE}/System.map"
+		cp ${KCONFIG_CONFIG} "${pdir}/usr/share/doc/${pname}/config"
+		gzip "${pdir}/usr/share/doc/${pname}/config"
+	else
+		mkdir -p "${pdir}/boot"
+		cp System.map "${pdir}/boot/System.map-${KERNELRELEASE}"
+		cp ${KCONFIG_CONFIG} "${pdir}/boot/config-${KERNELRELEASE}"
+	fi
+
+	# Not all arches have the same installed path in debian
+	# XXX: have each arch Makefile export a variable of the canonical image install
+	# path instead
+	case "${SRCARCH}" in
+	um)
+		installed_image_path="usr/bin/linux-${KERNELRELEASE}";;
+	parisc|mips|powerpc)
+		installed_image_path="boot/vmlinux-${KERNELRELEASE}";;
+	*)
+		installed_image_path="boot/vmlinuz-${KERNELRELEASE}";;
+	esac
+	cp "$(${MAKE} -s -f ${srctree}/Makefile image_name)" "${pdir}/${installed_image_path}"
+
+	# Install the maintainer scripts
+	# Note: hook scripts under /etc/kernel are also executed by official Debian
+	# kernel packages, as well as kernel packages built using make-kpkg.
+	# make-kpkg sets $INITRD to indicate whether an initramfs is wanted, and
+	# so do we; recent versions of dracut and initramfs-tools will obey this.
+	debhookdir=${KDEB_HOOKDIR:-/etc/kernel}
+	for script in postinst postrm preinst prerm; do
+		mkdir -p "${pdir}${debhookdir}/${script}.d"
+
+		mkdir -p "${pdir}/DEBIAN"
+		cat <<-EOF > "${pdir}/DEBIAN/${script}"
+
+		#!/bin/sh
+
+		set -e
+
+		# Pass maintainer script parameters to hook scripts
+		export DEB_MAINT_PARAMS="\$*"
+
+		# Tell initramfs builder whether it's wanted
+		export INITRD=$(if_enabled_echo CONFIG_BLK_DEV_INITRD Yes No)
+
+		test -d ${debhookdir}/${script}.d && run-parts --arg="${KERNELRELEASE}" --arg="/${installed_image_path}" ${debhookdir}/${script}.d
+		exit 0
+		EOF
+		chmod 755 "${pdir}/DEBIAN/${script}"
+	done
+}
+
+install_linux_image_dbg () {
+	pdir=$1
+	image_pdir=$2
+
+	rm -rf ${pdir}
+
+	for module in $(find ${image_pdir}/lib/modules/ -name *.ko -printf '%P\n'); do
+		module=lib/modules/${module}
+		mkdir -p $(dirname ${pdir}/usr/lib/debug/${module})
+		# only keep debug symbols in the debug file
+		${OBJCOPY} --only-keep-debug ${image_pdir}/${module} ${pdir}/usr/lib/debug/${module}
+		# strip original module from debug symbols
+		${OBJCOPY} --strip-debug ${image_pdir}/${module}
+		# then add a link to those
+		${OBJCOPY} --add-gnu-debuglink=${pdir}/usr/lib/debug/${module} ${image_pdir}/${module}
+	done
+
+	# re-sign stripped modules
+	if is_enabled CONFIG_MODULE_SIG_ALL; then
+		${MAKE} -f ${srctree}/Makefile INSTALL_MOD_PATH="${image_pdir}" modules_sign
+	fi
+
+	# Build debug package
+	# Different tools want the image in different locations
+	# perf
+	mkdir -p ${pdir}/usr/lib/debug/lib/modules/${KERNELRELEASE}/
+	cp vmlinux ${pdir}/usr/lib/debug/lib/modules/${KERNELRELEASE}/
+	# systemtap
+	mkdir -p ${pdir}/usr/lib/debug/boot/
+	ln -s ../lib/modules/${KERNELRELEASE}/vmlinux ${pdir}/usr/lib/debug/boot/vmlinux-${KERNELRELEASE}
+	# kdump-tools
+	ln -s lib/modules/${KERNELRELEASE}/vmlinux ${pdir}/usr/lib/debug/vmlinux-${KERNELRELEASE}
+}
+
 deploy_kernel_headers () {
 	pdir=$1
 
@@ -105,8 +214,6 @@ deploy_libc_headers () {
 }
 
 version=$KERNELRELEASE
-tmpdir=debian/linux-image
-dbg_dir=debian/linux-image-dbg
 packagename=linux-image-$version
 dbg_packagename=$packagename-dbg
 
@@ -114,97 +221,7 @@ if [ "$ARCH" = "um" ] ; then
 	packagename=user-mode-linux-$version
 fi
 
-# Not all arches have the same installed path in debian
-# XXX: have each arch Makefile export a variable of the canonical image install
-# path instead
-case $ARCH in
-um)
-	installed_image_path="usr/bin/linux-$version"
-	;;
-parisc|mips|powerpc)
-	installed_image_path="boot/vmlinux-$version"
-	;;
-*)
-	installed_image_path="boot/vmlinuz-$version"
-esac
-
-BUILD_DEBUG=$(if_enabled_echo CONFIG_DEBUG_INFO Yes)
-
-# Setup the directory structure
-rm -rf "$tmpdir" "$dbg_dir" debian/files
-mkdir -m 755 -p "$tmpdir/DEBIAN"
-mkdir -p "$tmpdir/lib" "$tmpdir/boot"
-
-# Install the kernel
-if [ "$ARCH" = "um" ] ; then
-	mkdir -p "$tmpdir/usr/lib/uml/modules/$version" "$tmpdir/usr/bin" "$tmpdir/usr/share/doc/$packagename"
-	cp System.map "$tmpdir/usr/lib/uml/modules/$version/System.map"
-	cp $KCONFIG_CONFIG "$tmpdir/usr/share/doc/$packagename/config"
-	gzip "$tmpdir/usr/share/doc/$packagename/config"
-else
-	cp System.map "$tmpdir/boot/System.map-$version"
-	cp $KCONFIG_CONFIG "$tmpdir/boot/config-$version"
-fi
-cp "$($MAKE -s -f $srctree/Makefile image_name)" "$tmpdir/$installed_image_path"
-
-if is_enabled CONFIG_OF_EARLY_FLATTREE; then
-	# Only some architectures with OF support have this target
-	if [ -d "${srctree}/arch/$SRCARCH/boot/dts" ]; then
-		$MAKE -f $srctree/Makefile INSTALL_DTBS_PATH="$tmpdir/usr/lib/$packagename" dtbs_install
-	fi
-fi
-
-if is_enabled CONFIG_MODULES; then
-	INSTALL_MOD_PATH="$tmpdir" $MAKE -f $srctree/Makefile modules_install
-	rm -f "$tmpdir/lib/modules/$version/build"
-	rm -f "$tmpdir/lib/modules/$version/source"
-	if [ "$ARCH" = "um" ] ; then
-		mv "$tmpdir/lib/modules/$version"/* "$tmpdir/usr/lib/uml/modules/$version/"
-		rmdir "$tmpdir/lib/modules/$version"
-	fi
-	if [ -n "$BUILD_DEBUG" ] ; then
-		for module in $(find $tmpdir/lib/modules/ -name *.ko -printf '%P\n'); do
-			module=lib/modules/$module
-			mkdir -p $(dirname $dbg_dir/usr/lib/debug/$module)
-			# only keep debug symbols in the debug file
-			$OBJCOPY --only-keep-debug $tmpdir/$module $dbg_dir/usr/lib/debug/$module
-			# strip original module from debug symbols
-			$OBJCOPY --strip-debug $tmpdir/$module
-			# then add a link to those
-			$OBJCOPY --add-gnu-debuglink=$dbg_dir/usr/lib/debug/$module $tmpdir/$module
-		done
-
-		# resign stripped modules
-		if is_enabled CONFIG_MODULE_SIG_ALL; then
-			INSTALL_MOD_PATH="$tmpdir" $MAKE -f $srctree/Makefile modules_sign
-		fi
-	fi
-fi
-
-# Install the maintainer scripts
-# Note: hook scripts under /etc/kernel are also executed by official Debian
-# kernel packages, as well as kernel packages built using make-kpkg.
-# make-kpkg sets $INITRD to indicate whether an initramfs is wanted, and
-# so do we; recent versions of dracut and initramfs-tools will obey this.
-debhookdir=${KDEB_HOOKDIR:-/etc/kernel}
-for script in postinst postrm preinst prerm ; do
-	mkdir -p "$tmpdir$debhookdir/$script.d"
-	cat <<EOF > "$tmpdir/DEBIAN/$script"
-#!/bin/sh
-
-set -e
-
-# Pass maintainer script parameters to hook scripts
-export DEB_MAINT_PARAMS="\$*"
-
-# Tell initramfs builder whether it's wanted
-export INITRD=$(if_enabled_echo CONFIG_BLK_DEV_INITRD Yes No)
-
-test -d $debhookdir/$script.d && run-parts --arg="$version" --arg="/$installed_image_path" $debhookdir/$script.d
-exit 0
-EOF
-	chmod 755 "$tmpdir/DEBIAN/$script"
-done
+rm -f debian/files
 
 if [ "$ARCH" != "um" ]; then
 	if is_enabled CONFIG_MODULES; then
@@ -216,20 +233,13 @@ if [ "$ARCH" != "um" ]; then
 	create_package linux-libc-dev debian/linux-libc-dev
 fi
 
-create_package "$packagename" "$tmpdir"
+install_linux_image debian/linux-image "$packagename"
 
-if [ -n "$BUILD_DEBUG" ] ; then
-	# Build debug package
-	# Different tools want the image in different locations
-	# perf
-	mkdir -p $dbg_dir/usr/lib/debug/lib/modules/$version/
-	cp vmlinux $dbg_dir/usr/lib/debug/lib/modules/$version/
-	# systemtap
-	mkdir -p $dbg_dir/usr/lib/debug/boot/
-	ln -s ../lib/modules/$version/vmlinux $dbg_dir/usr/lib/debug/boot/vmlinux-$version
-	# kdump-tools
-	ln -s lib/modules/$version/vmlinux $dbg_dir/usr/lib/debug/vmlinux-$version
-	create_package "$dbg_packagename" "$dbg_dir"
+if is_enabled CONFIG_DEBUG_INFO; then
+	install_linux_image_dbg debian/linux-image-dbg debian/linux-image
+	create_package "$dbg_packagename" debian/linux-image-dbg
 fi
 
+create_package "$packagename" debian/linux-image
+
 exit 0

From 36862e14e31611f9786622db366327209a7aede7 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Mon, 13 Mar 2023 05:07:30 +0900
Subject: [PATCH 275/411] kbuild: deb-pkg: use dh_listpackages to know enabled
 packages

Use dh_listpackages to get a list of all binary packages.

With this, debian/control lists which binary packages will be produced.
Previously, ARCH=um listed linux-libc-dev in debian/control, but it
was not generated because each of mkdebian and builddeb independently
maintained the if-conditionals.

Another motivation is to allow scripts/package/builddeb to get the
package name (linux-image-*, etc.) dynamically from debian/control.

This will also allow the BuildProfile to control the generation of
the binary packages.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/package/builddeb | 61 ++++++++++++++++++++++------------------
 scripts/package/mkdebian |  7 ++++-
 2 files changed, 39 insertions(+), 29 deletions(-)

diff --git a/scripts/package/builddeb b/scripts/package/builddeb
index 906889b304a4c..c5ae57167d7ce 100755
--- a/scripts/package/builddeb
+++ b/scripts/package/builddeb
@@ -160,7 +160,7 @@ install_linux_image_dbg () {
 	ln -s lib/modules/${KERNELRELEASE}/vmlinux ${pdir}/usr/lib/debug/vmlinux-${KERNELRELEASE}
 }
 
-deploy_kernel_headers () {
+install_kernel_headers () {
 	pdir=$1
 
 	rm -rf $pdir
@@ -198,7 +198,7 @@ deploy_kernel_headers () {
 	ln -s /usr/src/linux-headers-$version $pdir/lib/modules/$version/build
 }
 
-deploy_libc_headers () {
+install_libc_headers () {
 	pdir=$1
 
 	rm -rf $pdir
@@ -213,33 +213,38 @@ deploy_libc_headers () {
 	mv $pdir/usr/include/asm $pdir/usr/include/$host_arch/
 }
 
-version=$KERNELRELEASE
-packagename=linux-image-$version
-dbg_packagename=$packagename-dbg
-
-if [ "$ARCH" = "um" ] ; then
-	packagename=user-mode-linux-$version
-fi
-
 rm -f debian/files
 
-if [ "$ARCH" != "um" ]; then
-	if is_enabled CONFIG_MODULES; then
-		deploy_kernel_headers debian/linux-headers
-		create_package linux-headers-$version debian/linux-headers
-	fi
-
-	deploy_libc_headers debian/linux-libc-dev
-	create_package linux-libc-dev debian/linux-libc-dev
-fi
-
-install_linux_image debian/linux-image "$packagename"
-
-if is_enabled CONFIG_DEBUG_INFO; then
-	install_linux_image_dbg debian/linux-image-dbg debian/linux-image
-	create_package "$dbg_packagename" debian/linux-image-dbg
-fi
-
-create_package "$packagename" debian/linux-image
+packages_enabled=$(dh_listpackages)
+
+for package in ${packages_enabled}
+do
+	case ${package} in
+	*-dbg)
+		# This must be done after linux-image, that is, we expect the
+		# debug package appears after linux-image in debian/control.
+		install_linux_image_dbg debian/linux-image-dbg debian/linux-image;;
+	linux-image-*|user-mode-linux-*)
+		install_linux_image debian/linux-image ${package};;
+	linux-libc-dev)
+		install_libc_headers debian/linux-libc-dev;;
+	linux-headers-*)
+		install_kernel_headers debian/linux-headers;;
+	esac
+done
+
+for package in ${packages_enabled}
+do
+	case ${package} in
+	*-dbg)
+		create_package ${package} debian/linux-image-dbg;;
+	linux-image-*|user-mode-linux-*)
+		create_package ${package} debian/linux-image;;
+	linux-libc-dev)
+		create_package ${package} debian/linux-libc-dev;;
+	linux-headers-*)
+		create_package ${package} debian/linux-headers;;
+	esac
+done
 
 exit 0
diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian
index 31b050368cd07..e80a661a79ee6 100755
--- a/scripts/package/mkdebian
+++ b/scripts/package/mkdebian
@@ -192,7 +192,7 @@ Section: kernel
 Priority: optional
 Maintainer: $maintainer
 Rules-Requires-Root: no
-Build-Depends: bc, rsync, kmod, cpio, bison, flex $extra_build_depends
+Build-Depends: bc, debhelper, rsync, kmod, cpio, bison, flex $extra_build_depends
 Homepage: https://www.kernel.org/
 
 Package: $packagename-$version
@@ -200,6 +200,10 @@ Architecture: $debarch
 Description: Linux kernel, version $version
  This package contains the Linux kernel, modules and corresponding other
  files, version: $version.
+EOF
+
+if [ "${SRCARCH}" != um ]; then
+cat <<EOF >> debian/control
 
 Package: linux-libc-dev
 Section: devel
@@ -222,6 +226,7 @@ Description: Linux kernel headers for $version on $debarch
  This is useful for people who need to build external modules
 EOF
 fi
+fi
 
 if is_enabled CONFIG_DEBUG_INFO; then
 cat <<EOF >> debian/control

From 248401cb2c4612d83eb0c352ee8103b78b8eb365 Mon Sep 17 00:00:00 2001
From: Dave Ertman <david.m.ertman@intel.com>
Date: Fri, 10 Mar 2023 11:48:33 -0800
Subject: [PATCH 276/411] ice: avoid bonding causing auxiliary plug/unplug
 under RTNL lock

RDMA is not supported in ice on a PF that has been added to a bonded
interface. To enforce this, when an interface enters a bond, we unplug
the auxiliary device that supports RDMA functionality.  This unplug
currently happens in the context of handling the netdev bonding event.
This event is sent to the ice driver under RTNL context.  This is causing
a deadlock where the RDMA driver is waiting for the RTNL lock to complete
the removal.

Defer the unplugging/re-plugging of the auxiliary device to the service
task so that it is not performed under the RTNL lock context.

Cc: stable@vger.kernel.org # 6.1.x
Reported-by: Jaroslav Pulchart <jaroslav.pulchart@gooddata.com>
Link: https://lore.kernel.org/netdev/CAK8fFZ6A_Gphw_3-QMGKEFQk=sfCw1Qmq0TVZK3rtAi7vb621A@mail.gmail.com/
Fixes: 5cb1ebdbc434 ("ice: Fix race condition during interface enslave")
Fixes: 4eace75e0853 ("RDMA/irdma: Report the correct link speed")
Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Tested-by: Arpana Arland <arpanax.arland@intel.com> (A Contingent worker at Intel)
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Link: https://lore.kernel.org/r/20230310194833.3074601-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/intel/ice/ice.h      | 14 +++++---------
 drivers/net/ethernet/intel/ice/ice_main.c | 19 ++++++++-----------
 2 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index b0e29e3424018..e809249500e18 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -509,6 +509,7 @@ enum ice_pf_flags {
 	ICE_FLAG_VF_VLAN_PRUNING,
 	ICE_FLAG_LINK_LENIENT_MODE_ENA,
 	ICE_FLAG_PLUG_AUX_DEV,
+	ICE_FLAG_UNPLUG_AUX_DEV,
 	ICE_FLAG_MTU_CHANGED,
 	ICE_FLAG_GNSS,			/* GNSS successfully initialized */
 	ICE_PF_FLAGS_NBITS		/* must be last */
@@ -955,16 +956,11 @@ static inline void ice_set_rdma_cap(struct ice_pf *pf)
  */
 static inline void ice_clear_rdma_cap(struct ice_pf *pf)
 {
-	/* We can directly unplug aux device here only if the flag bit
-	 * ICE_FLAG_PLUG_AUX_DEV is not set because ice_unplug_aux_dev()
-	 * could race with ice_plug_aux_dev() called from
-	 * ice_service_task(). In this case we only clear that bit now and
-	 * aux device will be unplugged later once ice_plug_aux_device()
-	 * called from ice_service_task() finishes (see ice_service_task()).
+	/* defer unplug to service task to avoid RTNL lock and
+	 * clear PLUG bit so that pending plugs don't interfere
 	 */
-	if (!test_and_clear_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags))
-		ice_unplug_aux_dev(pf);
-
+	clear_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags);
+	set_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags);
 	clear_bit(ICE_FLAG_RDMA_ENA, pf->flags);
 }
 #endif /* _ICE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 567694bf098ba..c233464b8f6bf 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2316,18 +2316,15 @@ static void ice_service_task(struct work_struct *work)
 		}
 	}
 
-	if (test_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags)) {
-		/* Plug aux device per request */
-		ice_plug_aux_dev(pf);
+	/* unplug aux dev per request, if an unplug request came in
+	 * while processing a plug request, this will handle it
+	 */
+	if (test_and_clear_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags))
+		ice_unplug_aux_dev(pf);
 
-		/* Mark plugging as done but check whether unplug was
-		 * requested during ice_plug_aux_dev() call
-		 * (e.g. from ice_clear_rdma_cap()) and if so then
-		 * plug aux device.
-		 */
-		if (!test_and_clear_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags))
-			ice_unplug_aux_dev(pf);
-	}
+	/* Plug aux device per request */
+	if (test_and_clear_bit(ICE_FLAG_PLUG_AUX_DEV, pf->flags))
+		ice_plug_aux_dev(pf);
 
 	if (test_and_clear_bit(ICE_FLAG_MTU_CHANGED, pf->flags)) {
 		struct iidc_event *event;

From 4b397c06cb987935b1b097336532aa6b4210e091 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 10 Mar 2023 19:11:09 +0000
Subject: [PATCH 277/411] net: tunnels: annotate lockless accesses to
 dev->needed_headroom

IP tunnels can apparently update dev->needed_headroom
in their xmit path.

This patch takes care of three tunnels xmit, and also the
core LL_RESERVED_SPACE() and LL_RESERVED_SPACE_EXTRA()
helpers.

More changes might be needed for completeness.

BUG: KCSAN: data-race in ip_tunnel_xmit / ip_tunnel_xmit

read to 0xffff88815b9da0ec of 2 bytes by task 888 on cpu 1:
ip_tunnel_xmit+0x1270/0x1730 net/ipv4/ip_tunnel.c:803
__gre_xmit net/ipv4/ip_gre.c:469 [inline]
ipgre_xmit+0x516/0x570 net/ipv4/ip_gre.c:661
__netdev_start_xmit include/linux/netdevice.h:4881 [inline]
netdev_start_xmit include/linux/netdevice.h:4895 [inline]
xmit_one net/core/dev.c:3580 [inline]
dev_hard_start_xmit+0x127/0x400 net/core/dev.c:3596
__dev_queue_xmit+0x1007/0x1eb0 net/core/dev.c:4246
dev_queue_xmit include/linux/netdevice.h:3051 [inline]
neigh_direct_output+0x17/0x20 net/core/neighbour.c:1623
neigh_output include/net/neighbour.h:546 [inline]
ip_finish_output2+0x740/0x840 net/ipv4/ip_output.c:228
ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:316
NF_HOOK_COND include/linux/netfilter.h:291 [inline]
ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:430
dst_output include/net/dst.h:444 [inline]
ip_local_out+0x64/0x80 net/ipv4/ip_output.c:126
iptunnel_xmit+0x34a/0x4b0 net/ipv4/ip_tunnel_core.c:82
ip_tunnel_xmit+0x1451/0x1730 net/ipv4/ip_tunnel.c:813
__gre_xmit net/ipv4/ip_gre.c:469 [inline]
ipgre_xmit+0x516/0x570 net/ipv4/ip_gre.c:661
__netdev_start_xmit include/linux/netdevice.h:4881 [inline]
netdev_start_xmit include/linux/netdevice.h:4895 [inline]
xmit_one net/core/dev.c:3580 [inline]
dev_hard_start_xmit+0x127/0x400 net/core/dev.c:3596
__dev_queue_xmit+0x1007/0x1eb0 net/core/dev.c:4246
dev_queue_xmit include/linux/netdevice.h:3051 [inline]
neigh_direct_output+0x17/0x20 net/core/neighbour.c:1623
neigh_output include/net/neighbour.h:546 [inline]
ip_finish_output2+0x740/0x840 net/ipv4/ip_output.c:228
ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:316
NF_HOOK_COND include/linux/netfilter.h:291 [inline]
ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:430
dst_output include/net/dst.h:444 [inline]
ip_local_out+0x64/0x80 net/ipv4/ip_output.c:126
iptunnel_xmit+0x34a/0x4b0 net/ipv4/ip_tunnel_core.c:82
ip_tunnel_xmit+0x1451/0x1730 net/ipv4/ip_tunnel.c:813
__gre_xmit net/ipv4/ip_gre.c:469 [inline]
ipgre_xmit+0x516/0x570 net/ipv4/ip_gre.c:661
__netdev_start_xmit include/linux/netdevice.h:4881 [inline]
netdev_start_xmit include/linux/netdevice.h:4895 [inline]
xmit_one net/core/dev.c:3580 [inline]
dev_hard_start_xmit+0x127/0x400 net/core/dev.c:3596
__dev_queue_xmit+0x1007/0x1eb0 net/core/dev.c:4246
dev_queue_xmit include/linux/netdevice.h:3051 [inline]
neigh_direct_output+0x17/0x20 net/core/neighbour.c:1623
neigh_output include/net/neighbour.h:546 [inline]
ip_finish_output2+0x740/0x840 net/ipv4/ip_output.c:228
ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:316
NF_HOOK_COND include/linux/netfilter.h:291 [inline]
ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:430
dst_output include/net/dst.h:444 [inline]
ip_local_out+0x64/0x80 net/ipv4/ip_output.c:126
iptunnel_xmit+0x34a/0x4b0 net/ipv4/ip_tunnel_core.c:82
ip_tunnel_xmit+0x1451/0x1730 net/ipv4/ip_tunnel.c:813
__gre_xmit net/ipv4/ip_gre.c:469 [inline]
ipgre_xmit+0x516/0x570 net/ipv4/ip_gre.c:661
__netdev_start_xmit include/linux/netdevice.h:4881 [inline]
netdev_start_xmit include/linux/netdevice.h:4895 [inline]
xmit_one net/core/dev.c:3580 [inline]
dev_hard_start_xmit+0x127/0x400 net/core/dev.c:3596
__dev_queue_xmit+0x1007/0x1eb0 net/core/dev.c:4246
dev_queue_xmit include/linux/netdevice.h:3051 [inline]
neigh_direct_output+0x17/0x20 net/core/neighbour.c:1623
neigh_output include/net/neighbour.h:546 [inline]
ip_finish_output2+0x740/0x840 net/ipv4/ip_output.c:228
ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:316
NF_HOOK_COND include/linux/netfilter.h:291 [inline]
ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:430
dst_output include/net/dst.h:444 [inline]
ip_local_out+0x64/0x80 net/ipv4/ip_output.c:126
iptunnel_xmit+0x34a/0x4b0 net/ipv4/ip_tunnel_core.c:82
ip_tunnel_xmit+0x1451/0x1730 net/ipv4/ip_tunnel.c:813
__gre_xmit net/ipv4/ip_gre.c:469 [inline]
ipgre_xmit+0x516/0x570 net/ipv4/ip_gre.c:661
__netdev_start_xmit include/linux/netdevice.h:4881 [inline]
netdev_start_xmit include/linux/netdevice.h:4895 [inline]
xmit_one net/core/dev.c:3580 [inline]
dev_hard_start_xmit+0x127/0x400 net/core/dev.c:3596
__dev_queue_xmit+0x1007/0x1eb0 net/core/dev.c:4246
dev_queue_xmit include/linux/netdevice.h:3051 [inline]
neigh_direct_output+0x17/0x20 net/core/neighbour.c:1623
neigh_output include/net/neighbour.h:546 [inline]
ip_finish_output2+0x740/0x840 net/ipv4/ip_output.c:228
ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:316
NF_HOOK_COND include/linux/netfilter.h:291 [inline]
ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:430
dst_output include/net/dst.h:444 [inline]
ip_local_out+0x64/0x80 net/ipv4/ip_output.c:126
iptunnel_xmit+0x34a/0x4b0 net/ipv4/ip_tunnel_core.c:82
ip_tunnel_xmit+0x1451/0x1730 net/ipv4/ip_tunnel.c:813
__gre_xmit net/ipv4/ip_gre.c:469 [inline]
ipgre_xmit+0x516/0x570 net/ipv4/ip_gre.c:661
__netdev_start_xmit include/linux/netdevice.h:4881 [inline]
netdev_start_xmit include/linux/netdevice.h:4895 [inline]
xmit_one net/core/dev.c:3580 [inline]
dev_hard_start_xmit+0x127/0x400 net/core/dev.c:3596
__dev_queue_xmit+0x1007/0x1eb0 net/core/dev.c:4246
dev_queue_xmit include/linux/netdevice.h:3051 [inline]
neigh_direct_output+0x17/0x20 net/core/neighbour.c:1623
neigh_output include/net/neighbour.h:546 [inline]
ip_finish_output2+0x740/0x840 net/ipv4/ip_output.c:228
ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:316
NF_HOOK_COND include/linux/netfilter.h:291 [inline]
ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:430
dst_output include/net/dst.h:444 [inline]
ip_local_out+0x64/0x80 net/ipv4/ip_output.c:126
iptunnel_xmit+0x34a/0x4b0 net/ipv4/ip_tunnel_core.c:82
ip_tunnel_xmit+0x1451/0x1730 net/ipv4/ip_tunnel.c:813
__gre_xmit net/ipv4/ip_gre.c:469 [inline]
ipgre_xmit+0x516/0x570 net/ipv4/ip_gre.c:661
__netdev_start_xmit include/linux/netdevice.h:4881 [inline]
netdev_start_xmit include/linux/netdevice.h:4895 [inline]
xmit_one net/core/dev.c:3580 [inline]
dev_hard_start_xmit+0x127/0x400 net/core/dev.c:3596
__dev_queue_xmit+0x1007/0x1eb0 net/core/dev.c:4246

write to 0xffff88815b9da0ec of 2 bytes by task 2379 on cpu 0:
ip_tunnel_xmit+0x1294/0x1730 net/ipv4/ip_tunnel.c:804
__gre_xmit net/ipv4/ip_gre.c:469 [inline]
ipgre_xmit+0x516/0x570 net/ipv4/ip_gre.c:661
__netdev_start_xmit include/linux/netdevice.h:4881 [inline]
netdev_start_xmit include/linux/netdevice.h:4895 [inline]
xmit_one net/core/dev.c:3580 [inline]
dev_hard_start_xmit+0x127/0x400 net/core/dev.c:3596
__dev_queue_xmit+0x1007/0x1eb0 net/core/dev.c:4246
dev_queue_xmit include/linux/netdevice.h:3051 [inline]
neigh_direct_output+0x17/0x20 net/core/neighbour.c:1623
neigh_output include/net/neighbour.h:546 [inline]
ip6_finish_output2+0x9bc/0xc50 net/ipv6/ip6_output.c:134
__ip6_finish_output net/ipv6/ip6_output.c:195 [inline]
ip6_finish_output+0x39a/0x4e0 net/ipv6/ip6_output.c:206
NF_HOOK_COND include/linux/netfilter.h:291 [inline]
ip6_output+0xeb/0x220 net/ipv6/ip6_output.c:227
dst_output include/net/dst.h:444 [inline]
NF_HOOK include/linux/netfilter.h:302 [inline]
mld_sendpack+0x438/0x6a0 net/ipv6/mcast.c:1820
mld_send_cr net/ipv6/mcast.c:2121 [inline]
mld_ifc_work+0x519/0x7b0 net/ipv6/mcast.c:2653
process_one_work+0x3e6/0x750 kernel/workqueue.c:2390
worker_thread+0x5f2/0xa10 kernel/workqueue.c:2537
kthread+0x1ac/0x1e0 kernel/kthread.c:376
ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308

value changed: 0x0dd4 -> 0x0e14

Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 2379 Comm: kworker/0:0 Not tainted 6.3.0-rc1-syzkaller-00002-g8ca09d5fa354-dirty #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/02/2023
Workqueue: mld mld_ifc_work

Fixes: 8eb30be0352d ("ipv6: Create ip6_tnl_xmit")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20230310191109.2384387-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h |  6 ++++--
 net/ipv4/ip_tunnel.c      | 12 ++++++------
 net/ipv6/ip6_tunnel.c     |  4 ++--
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6a14b7b117668..470085b121d3c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -297,9 +297,11 @@ struct hh_cache {
  * relationship HH alignment <= LL alignment.
  */
 #define LL_RESERVED_SPACE(dev) \
-	((((dev)->hard_header_len+(dev)->needed_headroom)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
+	((((dev)->hard_header_len + READ_ONCE((dev)->needed_headroom)) \
+	  & ~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
 #define LL_RESERVED_SPACE_EXTRA(dev,extra) \
-	((((dev)->hard_header_len+(dev)->needed_headroom+(extra))&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
+	((((dev)->hard_header_len + READ_ONCE((dev)->needed_headroom) + (extra)) \
+	  & ~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
 
 struct header_ops {
 	int	(*create) (struct sk_buff *skb, struct net_device *dev,
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index de90b09dfe78f..2541083d49ad6 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -614,10 +614,10 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 	}
 
 	headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
-	if (headroom > dev->needed_headroom)
-		dev->needed_headroom = headroom;
+	if (headroom > READ_ONCE(dev->needed_headroom))
+		WRITE_ONCE(dev->needed_headroom, headroom);
 
-	if (skb_cow_head(skb, dev->needed_headroom)) {
+	if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
 		ip_rt_put(rt);
 		goto tx_dropped;
 	}
@@ -800,10 +800,10 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 
 	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
 			+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
-	if (max_headroom > dev->needed_headroom)
-		dev->needed_headroom = max_headroom;
+	if (max_headroom > READ_ONCE(dev->needed_headroom))
+		WRITE_ONCE(dev->needed_headroom, max_headroom);
 
-	if (skb_cow_head(skb, dev->needed_headroom)) {
+	if (skb_cow_head(skb, READ_ONCE(dev->needed_headroom))) {
 		ip_rt_put(rt);
 		DEV_STATS_INC(dev, tx_dropped);
 		kfree_skb(skb);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 47b6607a13706..5e80e517f0710 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1240,8 +1240,8 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
 	 */
 	max_headroom = LL_RESERVED_SPACE(dst->dev) + sizeof(struct ipv6hdr)
 			+ dst->header_len + t->hlen;
-	if (max_headroom > dev->needed_headroom)
-		dev->needed_headroom = max_headroom;
+	if (max_headroom > READ_ONCE(dev->needed_headroom))
+		WRITE_ONCE(dev->needed_headroom, max_headroom);
 
 	err = ip6_tnl_encap(skb, t, &proto, fl6);
 	if (err)

From c22c3bbf351e4ce905f082649cffa1ff893ea8c1 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 11 Mar 2023 19:34:45 +0100
Subject: [PATCH 278/411] net: phy: smsc: bail out in lan87xx_read_status if
 genphy_read_status fails

If genphy_read_status fails then further access to the PHY may result
in unpredictable behavior. To prevent this bail out immediately if
genphy_read_status fails.

Fixes: 4223dbffed9f ("net: phy: smsc: Re-enable EDPD mode for LAN87xx")
Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Link: https://lore.kernel.org/r/026aa4f2-36f5-1c10-ab9f-cdb17dda6ac4@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/smsc.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
index 00d9eff91dcfa..df2c5435c5c49 100644
--- a/drivers/net/phy/smsc.c
+++ b/drivers/net/phy/smsc.c
@@ -199,8 +199,11 @@ static int lan95xx_config_aneg_ext(struct phy_device *phydev)
 static int lan87xx_read_status(struct phy_device *phydev)
 {
 	struct smsc_phy_priv *priv = phydev->priv;
+	int err;
 
-	int err = genphy_read_status(phydev);
+	err = genphy_read_status(phydev);
+	if (err)
+		return err;
 
 	if (!phydev->link && priv->energy_enable && phydev->irq == PHY_POLL) {
 		/* Disable EDPD to wake up PHY */

From d9ba9934285514f1f95d96326a82398a22dc77f2 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Sat, 11 Mar 2023 19:19:03 -0800
Subject: [PATCH 279/411] tcp: Fix bind() conflict check for dual-stack
 wildcard address.

Paul Holzinger reported [0] that commit 5456262d2baa ("net: Fix
incorrect address comparison when searching for a bind2 bucket")
introduced a bind() regression.  Paul also gave a nice repro that
calls two types of bind() on the same port, both of which now
succeed, but the second call should fail:

  bind(fd1, ::, port) + bind(fd2, 127.0.0.1, port)

The cited commit added address family tests in three functions to
fix the uninit-value KMSAN report. [1]  However, the test added to
inet_bind2_bucket_match_addr_any() removed a necessary conflict
check; the dual-stack wildcard address no longer conflicts with
an IPv4 non-wildcard address.

If tb->family is AF_INET6 and sk->sk_family is AF_INET in
inet_bind2_bucket_match_addr_any(), we still need to check
if tb has the dual-stack wildcard address.

Note that the IPv4 wildcard address does not conflict with
IPv6 non-wildcard addresses.

[0]: https://lore.kernel.org/netdev/e21bf153-80b0-9ec0-15ba-e04a4ad42c34@redhat.com/
[1]: https://lore.kernel.org/netdev/CAG_fn=Ud3zSW7AZWXc+asfMhZVL5ETnvuY44Pmyv4NPv-ijN-A@mail.gmail.com/

Fixes: 5456262d2baa ("net: Fix incorrect address comparison when searching for a bind2 bucket")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Reported-by: Paul Holzinger <pholzing@redhat.com>
Link: https://lore.kernel.org/netdev/CAG_fn=Ud3zSW7AZWXc+asfMhZVL5ETnvuY44Pmyv4NPv-ijN-A@mail.gmail.com/
Reviewed-by: Eric Dumazet <edumazet@google.com>
Tested-by: Paul Holzinger <pholzing@redhat.com>
Reviewed-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/inet_hashtables.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index e41fdc38ce196..6edae38868855 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -828,8 +828,14 @@ bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const
 #if IS_ENABLED(CONFIG_IPV6)
 	struct in6_addr addr_any = {};
 
-	if (sk->sk_family != tb->family)
+	if (sk->sk_family != tb->family) {
+		if (sk->sk_family == AF_INET)
+			return net_eq(ib2_net(tb), net) && tb->port == port &&
+				tb->l3mdev == l3mdev &&
+				ipv6_addr_equal(&tb->v6_rcv_saddr, &addr_any);
+
 		return false;
+	}
 
 	if (sk->sk_family == AF_INET6)
 		return net_eq(ib2_net(tb), net) && tb->port == port &&

From 13715acf8ab5b32a6d7e42686fceeb66df114185 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Sat, 11 Mar 2023 19:19:04 -0800
Subject: [PATCH 280/411] selftest: Add test for bind() conflicts.

The test checks if (IPv4, IPv6) address pair properly conflict or not.

  * IPv4
    * 0.0.0.0
    * 127.0.0.1

  * IPv6
    * ::
    * ::1

If the IPv6 address is [::], the second bind() always fails.

Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/.gitignore      |   1 +
 tools/testing/selftests/net/Makefile        |   1 +
 tools/testing/selftests/net/bind_wildcard.c | 114 ++++++++++++++++++++
 3 files changed, 116 insertions(+)
 create mode 100644 tools/testing/selftests/net/bind_wildcard.c

diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index a6911cae368c7..80f06aa620345 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 bind_bhash
 bind_timewait
+bind_wildcard
 csum
 cmsg_sender
 diag_uid
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 6cd8993454d7e..80fbfe0330f6e 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -80,6 +80,7 @@ TEST_GEN_FILES += sctp_hello
 TEST_GEN_FILES += csum
 TEST_GEN_FILES += nat6to4.o
 TEST_GEN_FILES += ip_local_port_range
+TEST_GEN_FILES += bind_wildcard
 
 TEST_FILES := settings
 
diff --git a/tools/testing/selftests/net/bind_wildcard.c b/tools/testing/selftests/net/bind_wildcard.c
new file mode 100644
index 0000000000000..58edfc15d28bd
--- /dev/null
+++ b/tools/testing/selftests/net/bind_wildcard.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include "../kselftest_harness.h"
+
+FIXTURE(bind_wildcard)
+{
+	struct sockaddr_in addr4;
+	struct sockaddr_in6 addr6;
+	int expected_errno;
+};
+
+FIXTURE_VARIANT(bind_wildcard)
+{
+	const __u32 addr4_const;
+	const struct in6_addr *addr6_const;
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_any)
+{
+	.addr4_const = INADDR_ANY,
+	.addr6_const = &in6addr_any,
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_local)
+{
+	.addr4_const = INADDR_ANY,
+	.addr6_const = &in6addr_loopback,
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any)
+{
+	.addr4_const = INADDR_LOOPBACK,
+	.addr6_const = &in6addr_any,
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_local)
+{
+	.addr4_const = INADDR_LOOPBACK,
+	.addr6_const = &in6addr_loopback,
+};
+
+FIXTURE_SETUP(bind_wildcard)
+{
+	self->addr4.sin_family = AF_INET;
+	self->addr4.sin_port = htons(0);
+	self->addr4.sin_addr.s_addr = htonl(variant->addr4_const);
+
+	self->addr6.sin6_family = AF_INET6;
+	self->addr6.sin6_port = htons(0);
+	self->addr6.sin6_addr = *variant->addr6_const;
+
+	if (variant->addr6_const == &in6addr_any)
+		self->expected_errno = EADDRINUSE;
+	else
+		self->expected_errno = 0;
+}
+
+FIXTURE_TEARDOWN(bind_wildcard)
+{
+}
+
+void bind_sockets(struct __test_metadata *_metadata,
+		  FIXTURE_DATA(bind_wildcard) *self,
+		  struct sockaddr *addr1, socklen_t addrlen1,
+		  struct sockaddr *addr2, socklen_t addrlen2)
+{
+	int fd[2];
+	int ret;
+
+	fd[0] = socket(addr1->sa_family, SOCK_STREAM, 0);
+	ASSERT_GT(fd[0], 0);
+
+	ret = bind(fd[0], addr1, addrlen1);
+	ASSERT_EQ(ret, 0);
+
+	ret = getsockname(fd[0], addr1, &addrlen1);
+	ASSERT_EQ(ret, 0);
+
+	((struct sockaddr_in *)addr2)->sin_port = ((struct sockaddr_in *)addr1)->sin_port;
+
+	fd[1] = socket(addr2->sa_family, SOCK_STREAM, 0);
+	ASSERT_GT(fd[1], 0);
+
+	ret = bind(fd[1], addr2, addrlen2);
+	if (self->expected_errno) {
+		ASSERT_EQ(ret, -1);
+		ASSERT_EQ(errno, self->expected_errno);
+	} else {
+		ASSERT_EQ(ret, 0);
+	}
+
+	close(fd[1]);
+	close(fd[0]);
+}
+
+TEST_F(bind_wildcard, v4_v6)
+{
+	bind_sockets(_metadata, self,
+		     (struct sockaddr *)&self->addr4, sizeof(self->addr6),
+		     (struct sockaddr *)&self->addr6, sizeof(self->addr6));
+}
+
+TEST_F(bind_wildcard, v6_v4)
+{
+	bind_sockets(_metadata, self,
+		     (struct sockaddr *)&self->addr6, sizeof(self->addr6),
+		     (struct sockaddr *)&self->addr4, sizeof(self->addr4));
+}
+
+TEST_HARNESS_MAIN

From 5000fe6c27827a61d8250a7e4a1d26c3298ef4f6 Mon Sep 17 00:00:00 2001
From: Zheng Wang <zyytlz.wz@163.com>
Date: Mon, 13 Mar 2023 00:08:37 +0800
Subject: [PATCH 281/411] nfc: st-nci: Fix use after free bug in ndlc_remove
 due to race condition

This bug influences both st_nci_i2c_remove and st_nci_spi_remove.
Take st_nci_i2c_remove as an example.

In st_nci_i2c_probe, it called ndlc_probe and bound &ndlc->sm_work
with llt_ndlc_sm_work.

When it calls ndlc_recv or timeout handler, it will finally call
schedule_work to start the work.

When we call st_nci_i2c_remove to remove the driver, there
may be a sequence as follows:

Fix it by finishing the work before cleanup in ndlc_remove

CPU0                  CPU1

                    |llt_ndlc_sm_work
st_nci_i2c_remove   |
  ndlc_remove       |
     st_nci_remove  |
     nci_free_device|
     kfree(ndev)    |
//free ndlc->ndev   |
                    |llt_ndlc_rcv_queue
                    |nci_recv_frame
                    |//use ndlc->ndev

Fixes: 35630df68d60 ("NFC: st21nfcb: Add driver for STMicroelectronics ST21NFCB NFC chip")
Signed-off-by: Zheng Wang <zyytlz.wz@163.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230312160837.2040857-1-zyytlz.wz@163.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/st-nci/ndlc.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/nfc/st-nci/ndlc.c b/drivers/nfc/st-nci/ndlc.c
index 755460a73c0dc..d2aa9f766738e 100644
--- a/drivers/nfc/st-nci/ndlc.c
+++ b/drivers/nfc/st-nci/ndlc.c
@@ -282,13 +282,15 @@ EXPORT_SYMBOL(ndlc_probe);
 
 void ndlc_remove(struct llt_ndlc *ndlc)
 {
-	st_nci_remove(ndlc->ndev);
-
 	/* cancel timers */
 	del_timer_sync(&ndlc->t1_timer);
 	del_timer_sync(&ndlc->t2_timer);
 	ndlc->t2_active = false;
 	ndlc->t1_active = false;
+	/* cancel work */
+	cancel_work_sync(&ndlc->sm_work);
+
+	st_nci_remove(ndlc->ndev);
 
 	skb_queue_purge(&ndlc->rcv_q);
 	skb_queue_purge(&ndlc->send_q);

From 5ce76fe1eead179c058d9151ee1f4088cfdc1c6b Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 14 Mar 2023 00:08:40 +0100
Subject: [PATCH 282/411] veth: rely on rtnl_dereference() instead of on
 rcu_dereference() in veth_set_xdp_features()

Fix the following kernel warning in veth_set_xdp_features routine
relying on rtnl_dereference() instead of on rcu_dereference():

=============================
WARNING: suspicious RCU usage
6.3.0-rc1-00144-g064d70527aaa #149 Not tainted
-----------------------------
drivers/net/veth.c:1265 suspicious rcu_dereference_check() usage!

other info that might help us debug this:

rcu_scheduler_active = 2, debug_locks = 1
1 lock held by ip/135:
(net/core/rtnetlink.c:6172)

stack backtrace:
CPU: 1 PID: 135 Comm: ip Not tainted 6.3.0-rc1-00144-g064d70527aaa #149
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1
04/01/2014
Call Trace:
 <TASK>
dump_stack_lvl (lib/dump_stack.c:107)
lockdep_rcu_suspicious (include/linux/context_tracking.h:152)
veth_set_xdp_features (drivers/net/veth.c:1265 (discriminator 9))
veth_newlink (drivers/net/veth.c:1892)
? veth_set_features (drivers/net/veth.c:1774)
? kasan_save_stack (mm/kasan/common.c:47)
? kasan_save_stack (mm/kasan/common.c:46)
? kasan_set_track (mm/kasan/common.c:52)
? alloc_netdev_mqs (include/linux/slab.h:737)
? rcu_read_lock_sched_held (kernel/rcu/update.c:125)
? trace_kmalloc (include/trace/events/kmem.h:54)
? __xdp_rxq_info_reg (net/core/xdp.c:188)
? alloc_netdev_mqs (net/core/dev.c:10657)
? rtnl_create_link (net/core/rtnetlink.c:3312)
rtnl_newlink_create (net/core/rtnetlink.c:3440)
? rtnl_link_get_net_capable.constprop.0 (net/core/rtnetlink.c:3391)
__rtnl_newlink (net/core/rtnetlink.c:3657)
? lock_downgrade (kernel/locking/lockdep.c:5321)
? rtnl_link_unregister (net/core/rtnetlink.c:3487)
rtnl_newlink (net/core/rtnetlink.c:3671)
rtnetlink_rcv_msg (net/core/rtnetlink.c:6174)
? rtnl_link_fill (net/core/rtnetlink.c:6070)
? mark_usage (kernel/locking/lockdep.c:4914)
? mark_usage (kernel/locking/lockdep.c:4914)
netlink_rcv_skb (net/netlink/af_netlink.c:2574)
? rtnl_link_fill (net/core/rtnetlink.c:6070)
? netlink_ack (net/netlink/af_netlink.c:2551)
? lock_acquire (kernel/locking/lockdep.c:467)
? net_generic (include/linux/rcupdate.h:805)
? netlink_deliver_tap (include/linux/rcupdate.h:805)
netlink_unicast (net/netlink/af_netlink.c:1340)
? netlink_attachskb (net/netlink/af_netlink.c:1350)
netlink_sendmsg (net/netlink/af_netlink.c:1942)
? netlink_unicast (net/netlink/af_netlink.c:1861)
? netlink_unicast (net/netlink/af_netlink.c:1861)
sock_sendmsg (net/socket.c:727)
____sys_sendmsg (net/socket.c:2501)
? kernel_sendmsg (net/socket.c:2448)
? __copy_msghdr (net/socket.c:2428)
___sys_sendmsg (net/socket.c:2557)
? mark_usage (kernel/locking/lockdep.c:4914)
? do_recvmmsg (net/socket.c:2544)
? lock_acquire (kernel/locking/lockdep.c:467)
? find_held_lock (kernel/locking/lockdep.c:5159)
? __lock_release (kernel/locking/lockdep.c:5345)
? __might_fault (mm/memory.c:5625)
? lock_downgrade (kernel/locking/lockdep.c:5321)
? __fget_light (include/linux/atomic/atomic-arch-fallback.h:227)
__sys_sendmsg (include/linux/file.h:31)
? __sys_sendmsg_sock (net/socket.c:2572)
? rseq_get_rseq_cs (kernel/rseq.c:275)
? lockdep_hardirqs_on_prepare.part.0 (kernel/locking/lockdep.c:4263)
do_syscall_64 (arch/x86/entry/common.c:50)
entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)
RIP: 0033:0x7f0d1aadeb17
Code: 0f 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 00 f3 0f 1e
fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00
f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10

Fixes: fccca038f300 ("veth: take into account device reconfiguration for xdp_features flag")
Suggested-by: Eric Dumazet <edumazet@google.com>
Reported-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Link: https://lore.kernel.org/netdev/cover.1678364612.git.lorenzo@kernel.org/T/#me4c9d8e985ec7ebee981cfdb5bc5ec651ef4035d
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Reported-by: syzbot+c3d0d9c42d59ff644ea6@syzkaller.appspotmail.com
Reviewed-by: Eric Dumazet <edumazet@google.com>
Tested-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Link: https://lore.kernel.org/r/dfd6a9a7d85e9113063165e1f47b466b90ad7b8a.1678748579.git.lorenzo@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/veth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 293dc3b2c84a6..4da74ac27f9a2 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -1262,7 +1262,7 @@ static void veth_set_xdp_features(struct net_device *dev)
 	struct veth_priv *priv = netdev_priv(dev);
 	struct net_device *peer;
 
-	peer = rcu_dereference(priv->peer);
+	peer = rtnl_dereference(priv->peer);
 	if (peer && peer->real_num_tx_queues <= dev->real_num_rx_queues) {
 		xdp_features_t val = NETDEV_XDP_ACT_BASIC |
 				     NETDEV_XDP_ACT_REDIRECT |

From 35c356924fe3669dfbb1185607ce3b37f70bfa80 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Mon, 13 Mar 2023 18:21:24 +0100
Subject: [PATCH 283/411] mlxsw: spectrum: Fix incorrect parsing depth after
 reload

Spectrum ASICs have a configurable limit on how deep into the packet
they parse. By default, the limit is 96 bytes.

There are several cases where this parsing depth is not enough and there
is a need to increase it. For example, timestamping of PTP packets and a
FIB multipath hash policy that requires hashing on inner fields. The
driver therefore maintains a reference count that reflects the number of
consumers that require an increased parsing depth.

During reload_down() the parsing depth reference count does not
necessarily drop to zero, but the parsing depth itself is restored to
the default during reload_up() when the firmware is reset. It is
therefore possible to end up in situations where the driver thinks that
the parsing depth was increased (reference count is non-zero), when it
is not.

Fix by making sure that all the consumers that increase the parsing
depth reference count also decrease it during reload_down().
Specifically, make sure that when the routing code is de-initialized it
drops the reference count if it was increased because of a FIB multipath
hash policy that requires hashing on inner fields.

Add a warning if the reference count is not zero after the driver was
de-initialized and explicitly reset it to zero during initialization for
good measures.

Fixes: 2d91f0803b84 ("mlxsw: spectrum: Add infrastructure for parsing configuration")
Reported-by: Maksym Yaremchuk <maksymy@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Link: https://lore.kernel.org/r/9c35e1b3e6c1d8f319a2449d14e2b86373f3b3ba.1678727526.git.petrm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c     |  2 ++
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  | 14 ++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index a8f94b7544eea..02a327744a61b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -2937,6 +2937,7 @@ static int mlxsw_sp_netdevice_event(struct notifier_block *unused,
 
 static void mlxsw_sp_parsing_init(struct mlxsw_sp *mlxsw_sp)
 {
+	refcount_set(&mlxsw_sp->parsing.parsing_depth_ref, 0);
 	mlxsw_sp->parsing.parsing_depth = MLXSW_SP_DEFAULT_PARSING_DEPTH;
 	mlxsw_sp->parsing.vxlan_udp_dport = MLXSW_SP_DEFAULT_VXLAN_UDP_DPORT;
 	mutex_init(&mlxsw_sp->parsing.lock);
@@ -2945,6 +2946,7 @@ static void mlxsw_sp_parsing_init(struct mlxsw_sp *mlxsw_sp)
 static void mlxsw_sp_parsing_fini(struct mlxsw_sp *mlxsw_sp)
 {
 	mutex_destroy(&mlxsw_sp->parsing.lock);
+	WARN_ON_ONCE(refcount_read(&mlxsw_sp->parsing.parsing_depth_ref));
 }
 
 struct mlxsw_sp_ipv6_addr_node {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 09e32778b012d..4a73e2fe95ef9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -10381,11 +10381,23 @@ static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
 					      old_inc_parsing_depth);
 	return err;
 }
+
+static void mlxsw_sp_mp_hash_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	bool old_inc_parsing_depth = mlxsw_sp->router->inc_parsing_depth;
+
+	mlxsw_sp_mp_hash_parsing_depth_adjust(mlxsw_sp, old_inc_parsing_depth,
+					      false);
+}
 #else
 static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
 {
 	return 0;
 }
+
+static void mlxsw_sp_mp_hash_fini(struct mlxsw_sp *mlxsw_sp)
+{
+}
 #endif
 
 static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
@@ -10615,6 +10627,7 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
 err_register_inetaddr_notifier:
 	mlxsw_core_flush_owq();
 err_dscp_init:
+	mlxsw_sp_mp_hash_fini(mlxsw_sp);
 err_mp_hash_init:
 	mlxsw_sp_neigh_fini(mlxsw_sp);
 err_neigh_init:
@@ -10655,6 +10668,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
 	unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb);
 	unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb);
 	mlxsw_core_flush_owq();
+	mlxsw_sp_mp_hash_fini(mlxsw_sp);
 	mlxsw_sp_neigh_fini(mlxsw_sp);
 	mlxsw_sp_lb_rif_fini(mlxsw_sp);
 	mlxsw_sp_vrs_fini(mlxsw_sp);

From 13085e1b5cab8ad802904d72e6a6dae85ae0cd20 Mon Sep 17 00:00:00 2001
From: Wenjia Zhang <wenjia@linux.ibm.com>
Date: Mon, 13 Mar 2023 11:08:28 +0100
Subject: [PATCH 284/411] net/smc: fix deadlock triggered by
 cancel_delayed_work_syn()

The following LOCKDEP was detected:
		Workqueue: events smc_lgr_free_work [smc]
		WARNING: possible circular locking dependency detected
		6.1.0-20221027.rc2.git8.56bc5b569087.300.fc36.s390x+debug #1 Not tainted
		------------------------------------------------------
		kworker/3:0/176251 is trying to acquire lock:
		00000000f1467148 ((wq_completion)smc_tx_wq-00000000#2){+.+.}-{0:0},
			at: __flush_workqueue+0x7a/0x4f0
		but task is already holding lock:
		0000037fffe97dc8 ((work_completion)(&(&lgr->free_work)->work)){+.+.}-{0:0},
			at: process_one_work+0x232/0x730
		which lock already depends on the new lock.
		the existing dependency chain (in reverse order) is:
		-> #4 ((work_completion)(&(&lgr->free_work)->work)){+.+.}-{0:0}:
		       __lock_acquire+0x58e/0xbd8
		       lock_acquire.part.0+0xe2/0x248
		       lock_acquire+0xac/0x1c8
		       __flush_work+0x76/0xf0
		       __cancel_work_timer+0x170/0x220
		       __smc_lgr_terminate.part.0+0x34/0x1c0 [smc]
		       smc_connect_rdma+0x15e/0x418 [smc]
		       __smc_connect+0x234/0x480 [smc]
		       smc_connect+0x1d6/0x230 [smc]
		       __sys_connect+0x90/0xc0
		       __do_sys_socketcall+0x186/0x370
		       __do_syscall+0x1da/0x208
		       system_call+0x82/0xb0
		-> #3 (smc_client_lgr_pending){+.+.}-{3:3}:
		       __lock_acquire+0x58e/0xbd8
		       lock_acquire.part.0+0xe2/0x248
		       lock_acquire+0xac/0x1c8
		       __mutex_lock+0x96/0x8e8
		       mutex_lock_nested+0x32/0x40
		       smc_connect_rdma+0xa4/0x418 [smc]
		       __smc_connect+0x234/0x480 [smc]
		       smc_connect+0x1d6/0x230 [smc]
		       __sys_connect+0x90/0xc0
		       __do_sys_socketcall+0x186/0x370
		       __do_syscall+0x1da/0x208
		       system_call+0x82/0xb0
		-> #2 (sk_lock-AF_SMC){+.+.}-{0:0}:
		       __lock_acquire+0x58e/0xbd8
		       lock_acquire.part.0+0xe2/0x248
		       lock_acquire+0xac/0x1c8
		       lock_sock_nested+0x46/0xa8
		       smc_tx_work+0x34/0x50 [smc]
		       process_one_work+0x30c/0x730
		       worker_thread+0x62/0x420
		       kthread+0x138/0x150
		       __ret_from_fork+0x3c/0x58
		       ret_from_fork+0xa/0x40
		-> #1 ((work_completion)(&(&smc->conn.tx_work)->work)){+.+.}-{0:0}:
		       __lock_acquire+0x58e/0xbd8
		       lock_acquire.part.0+0xe2/0x248
		       lock_acquire+0xac/0x1c8
		       process_one_work+0x2bc/0x730
		       worker_thread+0x62/0x420
		       kthread+0x138/0x150
		       __ret_from_fork+0x3c/0x58
		       ret_from_fork+0xa/0x40
		-> #0 ((wq_completion)smc_tx_wq-00000000#2){+.+.}-{0:0}:
		       check_prev_add+0xd8/0xe88
		       validate_chain+0x70c/0xb20
		       __lock_acquire+0x58e/0xbd8
		       lock_acquire.part.0+0xe2/0x248
		       lock_acquire+0xac/0x1c8
		       __flush_workqueue+0xaa/0x4f0
		       drain_workqueue+0xaa/0x158
		       destroy_workqueue+0x44/0x2d8
		       smc_lgr_free+0x9e/0xf8 [smc]
		       process_one_work+0x30c/0x730
		       worker_thread+0x62/0x420
		       kthread+0x138/0x150
		       __ret_from_fork+0x3c/0x58
		       ret_from_fork+0xa/0x40
		other info that might help us debug this:
		Chain exists of:
		  (wq_completion)smc_tx_wq-00000000#2
	  	  --> smc_client_lgr_pending
		  --> (work_completion)(&(&lgr->free_work)->work)
		 Possible unsafe locking scenario:
		       CPU0                    CPU1
		       ----                    ----
		  lock((work_completion)(&(&lgr->free_work)->work));
		                   lock(smc_client_lgr_pending);
		                   lock((work_completion)
					(&(&lgr->free_work)->work));
		  lock((wq_completion)smc_tx_wq-00000000#2);
		 *** DEADLOCK ***
		2 locks held by kworker/3:0/176251:
		 #0: 0000000080183548
			((wq_completion)events){+.+.}-{0:0},
				at: process_one_work+0x232/0x730
		 #1: 0000037fffe97dc8
			((work_completion)
			 (&(&lgr->free_work)->work)){+.+.}-{0:0},
				at: process_one_work+0x232/0x730
		stack backtrace:
		CPU: 3 PID: 176251 Comm: kworker/3:0 Not tainted
		Hardware name: IBM 8561 T01 701 (z/VM 7.2.0)
		Call Trace:
		 [<000000002983c3e4>] dump_stack_lvl+0xac/0x100
		 [<0000000028b477ae>] check_noncircular+0x13e/0x160
		 [<0000000028b48808>] check_prev_add+0xd8/0xe88
		 [<0000000028b49cc4>] validate_chain+0x70c/0xb20
		 [<0000000028b4bd26>] __lock_acquire+0x58e/0xbd8
		 [<0000000028b4cf6a>] lock_acquire.part.0+0xe2/0x248
		 [<0000000028b4d17c>] lock_acquire+0xac/0x1c8
		 [<0000000028addaaa>] __flush_workqueue+0xaa/0x4f0
		 [<0000000028addf9a>] drain_workqueue+0xaa/0x158
		 [<0000000028ae303c>] destroy_workqueue+0x44/0x2d8
		 [<000003ff8029af26>] smc_lgr_free+0x9e/0xf8 [smc]
		 [<0000000028adf3d4>] process_one_work+0x30c/0x730
		 [<0000000028adf85a>] worker_thread+0x62/0x420
		 [<0000000028aeac50>] kthread+0x138/0x150
		 [<0000000028a63914>] __ret_from_fork+0x3c/0x58
		 [<00000000298503da>] ret_from_fork+0xa/0x40
		INFO: lockdep is turned off.
===================================================================

This deadlock occurs because cancel_delayed_work_sync() waits for
the work(&lgr->free_work) to finish, while the &lgr->free_work
waits for the work(lgr->tx_wq), which needs the sk_lock-AF_SMC, that
is already used under the mutex_lock.

The solution is to use cancel_delayed_work() instead, which kills
off a pending work.

Fixes: a52bcc919b14 ("net/smc: improve termination processing")
Signed-off-by: Wenjia Zhang <wenjia@linux.ibm.com>
Reviewed-by: Jan Karcher <jaka@linux.ibm.com>
Reviewed-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Tony Lu <tonylu@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index d52060b2680cf..454356771cda5 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1464,7 +1464,7 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
 	if (lgr->terminating)
 		return;	/* lgr already terminating */
 	/* cancel free_work sync, will terminate when lgr->freeing is set */
-	cancel_delayed_work_sync(&lgr->free_work);
+	cancel_delayed_work(&lgr->free_work);
 	lgr->terminating = 1;
 
 	/* kill remaining link group connections */

From 9d876d3ef27fa84355597ad269939772192356d8 Mon Sep 17 00:00:00 2001
From: Stefan Raspl <raspl@linux.ibm.com>
Date: Mon, 13 Mar 2023 11:08:29 +0100
Subject: [PATCH 285/411] net/smc: Fix device de-init sequence

CLC message initialization was not properly reversed in error handling path.

Reported-and-suggested-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Stefan Raspl <raspl@linux.ibm.com>
Signed-off-by: Wenjia Zhang <wenjia@linux.ibm.com>
Reviewed-by: Tony Lu <tonylu@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index ff6dd86bdc9f3..c6b4a62276f6d 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -3501,6 +3501,7 @@ static int __init smc_init(void)
 out_nl:
 	smc_nl_exit();
 out_ism:
+	smc_clc_exit();
 	smc_ism_exit();
 out_pernet_subsys_stat:
 	unregister_pernet_subsys(&smc_net_stat_ops);

From d8b228318935044dafe3a5bc07ee71a1f1424b8d Mon Sep 17 00:00:00 2001
From: Szymon Heidrich <szymon.heidrich@gmail.com>
Date: Mon, 13 Mar 2023 23:00:45 +0100
Subject: [PATCH 286/411] net: usb: smsc75xx: Limit packet length to skb->len

Packet length retrieved from skb data may be larger than
the actual socket buffer length (up to 9026 bytes). In such
case the cloned skb passed up the network stack will leak
kernel memory contents.

Fixes: d0cad871703b ("smsc75xx: SMSC LAN75xx USB gigabit ethernet adapter driver")
Signed-off-by: Szymon Heidrich <szymon.heidrich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/smsc75xx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index 95de452ff4dad..db34f8d1d6051 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -2212,7 +2212,8 @@ static int smsc75xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 				dev->net->stats.rx_frame_errors++;
 		} else {
 			/* MAX_SINGLE_PACKET_SIZE + 4(CRC) + 2(COE) + 4(Vlan) */
-			if (unlikely(size > (MAX_SINGLE_PACKET_SIZE + ETH_HLEN + 12))) {
+			if (unlikely(size > (MAX_SINGLE_PACKET_SIZE + ETH_HLEN + 12) ||
+				     size > skb->len)) {
 				netif_dbg(dev, rx_err, dev->net,
 					  "size err rx_cmd_a=0x%08x\n",
 					  rx_cmd_a);

From 611e2dabb4b3243d176739fd6a5a34d007fa3f86 Mon Sep 17 00:00:00 2001
From: Daniel Golle <daniel@makrotopia.org>
Date: Tue, 14 Mar 2023 00:34:26 +0000
Subject: [PATCH 287/411] net: ethernet: mtk_eth_soc: reset PCS state
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Reset the internal PCS state machine when changing interface mode.
This prevents confusing the state machine when changing interface
modes, e.g. from SGMII to 2500Base-X or vice-versa.

Fixes: 7e538372694b ("net: ethernet: mediatek: Re-add support SGMII")
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Tested-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: Daniel Golle <daniel@makrotopia.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.h | 4 ++++
 drivers/net/ethernet/mediatek/mtk_sgmii.c   | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
index b65de174c3d9b..084a6badef6d9 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
@@ -542,6 +542,10 @@
 #define SGMII_SEND_AN_ERROR_EN		BIT(11)
 #define SGMII_IF_MODE_MASK		GENMASK(5, 1)
 
+/* Register to reset SGMII design */
+#define SGMII_RESERVED_0	0x34
+#define SGMII_SW_RESET		BIT(0)
+
 /* Register to set SGMII speed, ANA RG_ Control Signals III*/
 #define SGMSYS_ANA_RG_CS3	0x2028
 #define RG_PHY_SPEED_MASK	(BIT(2) | BIT(3))
diff --git a/drivers/net/ethernet/mediatek/mtk_sgmii.c b/drivers/net/ethernet/mediatek/mtk_sgmii.c
index bb00de1003ac4..612f65bb03454 100644
--- a/drivers/net/ethernet/mediatek/mtk_sgmii.c
+++ b/drivers/net/ethernet/mediatek/mtk_sgmii.c
@@ -88,6 +88,10 @@ static int mtk_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
 		regmap_update_bits(mpcs->regmap, SGMSYS_QPHY_PWR_STATE_CTRL,
 				   SGMII_PHYA_PWD, SGMII_PHYA_PWD);
 
+		/* Reset SGMII PCS state */
+		regmap_update_bits(mpcs->regmap, SGMII_RESERVED_0,
+				   SGMII_SW_RESET, SGMII_SW_RESET);
+
 		if (interface == PHY_INTERFACE_MODE_2500BASEX)
 			rgc3 = RG_PHY_SPEED_3_125G;
 		else

From 6e933a804c7db8be64f367f33e63cd7dcc302ebb Mon Sep 17 00:00:00 2001
From: Daniel Golle <daniel@makrotopia.org>
Date: Tue, 14 Mar 2023 00:34:45 +0000
Subject: [PATCH 288/411] net: ethernet: mtk_eth_soc: only write values if
 needed
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Only restart auto-negotiation and write link timer if actually
necessary. This prevents losing the link in case of minor
changes.

Fixes: 7e538372694b ("net: ethernet: mediatek: Re-add support SGMII")
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Tested-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: Daniel Golle <daniel@makrotopia.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_sgmii.c | 24 +++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_sgmii.c b/drivers/net/ethernet/mediatek/mtk_sgmii.c
index 612f65bb03454..83976dc868875 100644
--- a/drivers/net/ethernet/mediatek/mtk_sgmii.c
+++ b/drivers/net/ethernet/mediatek/mtk_sgmii.c
@@ -38,20 +38,16 @@ static int mtk_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
 			  const unsigned long *advertising,
 			  bool permit_pause_to_mac)
 {
+	bool mode_changed = false, changed, use_an;
 	struct mtk_pcs *mpcs = pcs_to_mtk_pcs(pcs);
 	unsigned int rgc3, sgm_mode, bmcr;
 	int advertise, link_timer;
-	bool changed, use_an;
 
 	advertise = phylink_mii_c22_pcs_encode_advertisement(interface,
 							     advertising);
 	if (advertise < 0)
 		return advertise;
 
-	link_timer = phylink_get_link_timer_ns(interface);
-	if (link_timer < 0)
-		return link_timer;
-
 	/* Clearing IF_MODE_BIT0 switches the PCS to BASE-X mode, and
 	 * we assume that fixes it's speed at bitrate = line rate (in
 	 * other words, 1000Mbps or 2500Mbps).
@@ -77,13 +73,16 @@ static int mtk_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
 	}
 
 	if (use_an) {
-		/* FIXME: Do we need to set AN_RESTART here? */
-		bmcr = SGMII_AN_RESTART | SGMII_AN_ENABLE;
+		bmcr = SGMII_AN_ENABLE;
 	} else {
 		bmcr = 0;
 	}
 
 	if (mpcs->interface != interface) {
+		link_timer = phylink_get_link_timer_ns(interface);
+		if (link_timer < 0)
+			return link_timer;
+
 		/* PHYA power down */
 		regmap_update_bits(mpcs->regmap, SGMSYS_QPHY_PWR_STATE_CTRL,
 				   SGMII_PHYA_PWD, SGMII_PHYA_PWD);
@@ -101,16 +100,17 @@ static int mtk_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
 		regmap_update_bits(mpcs->regmap, mpcs->ana_rgc3,
 				   RG_PHY_SPEED_3_125G, rgc3);
 
+		/* Setup the link timer */
+		regmap_write(mpcs->regmap, SGMSYS_PCS_LINK_TIMER, link_timer / 2 / 8);
+
 		mpcs->interface = interface;
+		mode_changed = true;
 	}
 
 	/* Update the advertisement, noting whether it has changed */
 	regmap_update_bits_check(mpcs->regmap, SGMSYS_PCS_ADVERTISE,
 				 SGMII_ADVERTISE, advertise, &changed);
 
-	/* Setup the link timer and QPHY power up inside SGMIISYS */
-	regmap_write(mpcs->regmap, SGMSYS_PCS_LINK_TIMER, link_timer / 2 / 8);
-
 	/* Update the sgmsys mode register */
 	regmap_update_bits(mpcs->regmap, SGMSYS_SGMII_MODE,
 			   SGMII_REMOTE_FAULT_DIS | SGMII_SPEED_DUPLEX_AN |
@@ -118,7 +118,7 @@ static int mtk_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
 
 	/* Update the BMCR */
 	regmap_update_bits(mpcs->regmap, SGMSYS_PCS_CONTROL_1,
-			   SGMII_AN_RESTART | SGMII_AN_ENABLE, bmcr);
+			   SGMII_AN_ENABLE, bmcr);
 
 	/* Release PHYA power down state
 	 * Only removing bit SGMII_PHYA_PWD isn't enough.
@@ -132,7 +132,7 @@ static int mtk_pcs_config(struct phylink_pcs *pcs, unsigned int mode,
 	usleep_range(50, 100);
 	regmap_write(mpcs->regmap, SGMSYS_QPHY_PWR_STATE_CTRL, 0);
 
-	return changed;
+	return changed || mode_changed;
 }
 
 static void mtk_pcs_restart_an(struct phylink_pcs *pcs)

From 0d3c9333d976af41d7dbc6bf4d9d2e95fbdf9c89 Mon Sep 17 00:00:00 2001
From: Liu Ying <victor.liu@nxp.com>
Date: Tue, 14 Mar 2023 13:50:35 +0800
Subject: [PATCH 289/411] drm/bridge: Fix returned array size name for
 atomic_get_input_bus_fmts kdoc

The returned array size for input formats is set through
atomic_get_input_bus_fmts()'s 'num_input_fmts' argument, so use
'num_input_fmts' to represent the array size in the function's kdoc,
not 'num_output_fmts'.

Fixes: 91ea83306bfa ("drm/bridge: Fix the bridge kernel doc")
Fixes: f32df58acc68 ("drm/bridge: Add the necessary bits to support bus format negotiation")
Signed-off-by: Liu Ying <victor.liu@nxp.com>
Reviewed-by: Robert Foss <rfoss@kernel.org>
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20230314055035.3731179-1-victor.liu@nxp.com
---
 include/drm/drm_bridge.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/drm/drm_bridge.h b/include/drm/drm_bridge.h
index 42f86327b40a7..bf964cdfb3300 100644
--- a/include/drm/drm_bridge.h
+++ b/include/drm/drm_bridge.h
@@ -423,11 +423,11 @@ struct drm_bridge_funcs {
 	 *
 	 * The returned array must be allocated with kmalloc() and will be
 	 * freed by the caller. If the allocation fails, NULL should be
-	 * returned. num_output_fmts must be set to the returned array size.
+	 * returned. num_input_fmts must be set to the returned array size.
 	 * Formats listed in the returned array should be listed in decreasing
 	 * preference order (the core will try all formats until it finds one
 	 * that works). When the format is not supported NULL should be
-	 * returned and num_output_fmts should be set to 0.
+	 * returned and num_input_fmts should be set to 0.
 	 *
 	 * This method is called on all elements of the bridge chain as part of
 	 * the bus format negotiation process that happens in

From 4028cbf867f70a3c599c9b0c9509334c56ed97d7 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 9 Mar 2023 16:24:46 +0100
Subject: [PATCH 290/411] drm/meson: dw-hdmi: Fix devm_regulator_*get_enable*()
 conversion again
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

devm_regulator_get_enable_optional() returns -ENODEV if requested
optional regulator is not present. Adjust code for that, because in the
67d0a30128c9 I've incorrectly assumed that it also returns 0 when
regulator is not present.

Reported-by: Ricardo Cañuelo <ricardo.canuelo@collabora.com>
Fixes: 67d0a30128c9 ("drm/meson: dw-hdmi: Fix devm_regulator_*get_enable*() conversion")
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Acked-by: Neil Armstrong <neil.armstrong@linaro.org>
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20230309152446.104913-1-m.szyprowski@samsung.com
---
 drivers/gpu/drm/meson/meson_dw_hdmi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/meson/meson_dw_hdmi.c b/drivers/gpu/drm/meson/meson_dw_hdmi.c
index 534621a13a34d..3d046878ce6cb 100644
--- a/drivers/gpu/drm/meson/meson_dw_hdmi.c
+++ b/drivers/gpu/drm/meson/meson_dw_hdmi.c
@@ -718,7 +718,7 @@ static int meson_dw_hdmi_bind(struct device *dev, struct device *master,
 	dw_plat_data = &meson_dw_hdmi->dw_plat_data;
 
 	ret = devm_regulator_get_enable_optional(dev, "hdmi");
-	if (ret < 0)
+	if (ret < 0 && ret != -ENODEV)
 		return ret;
 
 	meson_dw_hdmi->hdmitx_apb = devm_reset_control_get_exclusive(dev,

From f2c7e3562b4c4f1699acc1538ebf3e75f5cced35 Mon Sep 17 00:00:00 2001
From: Russell Currey <ruscur@russell.cc>
Date: Fri, 10 Mar 2023 16:08:34 +1100
Subject: [PATCH 291/411] powerpc/mm: Fix false detection of read faults
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To support detection of read faults with Radix execute-only memory, the
vma_is_accessible() check in access_error() (which checks for PROT_NONE)
was replaced with a check to see if VM_READ was missing, and if so,
returns true to assert the fault was caused by a bad read.

This is incorrect, as it ignores that both VM_WRITE and VM_EXEC imply
read on powerpc, as defined in protection_map[].  This causes mappings
containing VM_WRITE or VM_EXEC without VM_READ to misreport the cause of
page faults, since the MMU is still allowing reads.

Correct this by restoring the original vma_is_accessible() check for
PROT_NONE mappings, and adding a separate check for Radix PROT_EXEC-only
mappings.

Fixes: 395cac7752b9 ("powerpc/mm: Support execute-only memory on the Radix MMU")
Reported-by: Michal Suchánek <msuchanek@suse.de>
Link: https://lore.kernel.org/r/20230308152702.GR19419@kitsune.suse.cz
Tested-by: Benjamin Gray <bgray@linux.ibm.com>
Signed-off-by: Russell Currey <ruscur@russell.cc>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://msgid.link/20230310050834.63105-1-ruscur@russell.cc
---
 arch/powerpc/mm/fault.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 2bef19cc1b98c..af46aa88422bf 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -271,11 +271,16 @@ static bool access_error(bool is_write, bool is_exec, struct vm_area_struct *vma
 	}
 
 	/*
-	 * Check for a read fault.  This could be caused by a read on an
-	 * inaccessible page (i.e. PROT_NONE), or a Radix MMU execute-only page.
+	 * VM_READ, VM_WRITE and VM_EXEC all imply read permissions, as
+	 * defined in protection_map[].  Read faults can only be caused by
+	 * a PROT_NONE mapping, or with a PROT_EXEC-only mapping on Radix.
 	 */
-	if (unlikely(!(vma->vm_flags & VM_READ)))
+	if (unlikely(!vma_is_accessible(vma)))
 		return true;
+
+	if (unlikely(radix_enabled() && ((vma->vm_flags & VM_ACCESS_FLAGS) == VM_EXEC)))
+		return true;
+
 	/*
 	 * We should ideally do the vma pkey access check here. But in the
 	 * fault path, handle_mm_fault() also does the same check. To avoid

From 63f886597085f346276e3b3c8974de0100d65f32 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Date: Tue, 14 Mar 2023 13:11:05 +0900
Subject: [PATCH 292/411] block: null_blk: Fix handling of fake timeout request

When injecting a fake timeout into the null_blk driver using
fail_io_timeout, the request timeout handler does not execute
blk_mq_complete_request(), so the complete callback is never executed
for a timedout request.

The null_blk driver also has a driver-specific fake timeout mechanism
which does not have this problem. Fix the problem with fail_io_timeout
by using the same meachanism as null_blk internal timeout feature, using
the fake_timeout field of null_blk commands.

Reported-by: Akinobu Mita <akinobu.mita@gmail.com>
Fixes: de3510e52b0a ("null_blk: fix command timeout completion handling")
Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Link: https://lore.kernel.org/r/20230314041106.19173-2-damien.lemoal@opensource.wdc.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/null_blk/main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 4c601ca9552a0..7d95ad203c977 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -1413,8 +1413,7 @@ static inline void nullb_complete_cmd(struct nullb_cmd *cmd)
 	case NULL_IRQ_SOFTIRQ:
 		switch (cmd->nq->dev->queue_mode) {
 		case NULL_Q_MQ:
-			if (likely(!blk_should_fake_timeout(cmd->rq->q)))
-				blk_mq_complete_request(cmd->rq);
+			blk_mq_complete_request(cmd->rq);
 			break;
 		case NULL_Q_BIO:
 			/*
@@ -1675,7 +1674,8 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
 	cmd->rq = bd->rq;
 	cmd->error = BLK_STS_OK;
 	cmd->nq = nq;
-	cmd->fake_timeout = should_timeout_request(bd->rq);
+	cmd->fake_timeout = should_timeout_request(bd->rq) ||
+		blk_should_fake_timeout(bd->rq->q);
 
 	blk_mq_start_request(bd->rq);
 

From b6402014cab0481bdfd1ffff3e1dad714e8e1205 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Date: Tue, 14 Mar 2023 13:11:06 +0900
Subject: [PATCH 293/411] block: null_blk: cleanup null_queue_rq()

Use a local struct request pointer variable to avoid having to
dereference struct blk_mq_queue_data multiple times. While at it, also
fix the function argument indentation and remove a useless "else" after
a return.

Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Pankaj Raghav <p.raghav@samsung.com>
Link: https://lore.kernel.org/r/20230314041106.19173-2-damien.lemoal@opensource.wdc.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/null_blk/main.c | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 7d95ad203c977..9e6b032c8ecc2 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -1657,12 +1657,13 @@ static enum blk_eh_timer_return null_timeout_rq(struct request *rq)
 }
 
 static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
-			 const struct blk_mq_queue_data *bd)
+				  const struct blk_mq_queue_data *bd)
 {
-	struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
+	struct request *rq = bd->rq;
+	struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
 	struct nullb_queue *nq = hctx->driver_data;
-	sector_t nr_sectors = blk_rq_sectors(bd->rq);
-	sector_t sector = blk_rq_pos(bd->rq);
+	sector_t nr_sectors = blk_rq_sectors(rq);
+	sector_t sector = blk_rq_pos(rq);
 	const bool is_poll = hctx->type == HCTX_TYPE_POLL;
 
 	might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
@@ -1671,15 +1672,15 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
 		hrtimer_init(&cmd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 		cmd->timer.function = null_cmd_timer_expired;
 	}
-	cmd->rq = bd->rq;
+	cmd->rq = rq;
 	cmd->error = BLK_STS_OK;
 	cmd->nq = nq;
-	cmd->fake_timeout = should_timeout_request(bd->rq) ||
-		blk_should_fake_timeout(bd->rq->q);
+	cmd->fake_timeout = should_timeout_request(rq) ||
+		blk_should_fake_timeout(rq->q);
 
-	blk_mq_start_request(bd->rq);
+	blk_mq_start_request(rq);
 
-	if (should_requeue_request(bd->rq)) {
+	if (should_requeue_request(rq)) {
 		/*
 		 * Alternate between hitting the core BUSY path, and the
 		 * driver driven requeue path
@@ -1687,22 +1688,20 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
 		nq->requeue_selection++;
 		if (nq->requeue_selection & 1)
 			return BLK_STS_RESOURCE;
-		else {
-			blk_mq_requeue_request(bd->rq, true);
-			return BLK_STS_OK;
-		}
+		blk_mq_requeue_request(rq, true);
+		return BLK_STS_OK;
 	}
 
 	if (is_poll) {
 		spin_lock(&nq->poll_lock);
-		list_add_tail(&bd->rq->queuelist, &nq->poll_list);
+		list_add_tail(&rq->queuelist, &nq->poll_list);
 		spin_unlock(&nq->poll_lock);
 		return BLK_STS_OK;
 	}
 	if (cmd->fake_timeout)
 		return BLK_STS_OK;
 
-	return null_handle_cmd(cmd, sector, nr_sectors, req_op(bd->rq));
+	return null_handle_cmd(cmd, sector, nr_sectors, req_op(rq));
 }
 
 static void cleanup_queue(struct nullb_queue *nq)

From a5fc1441af7719e93dc7a638a960befb694ade89 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20Koutn=C3=BD?= <mkoutny@suse.com>
Date: Tue, 14 Mar 2023 19:33:32 +0100
Subject: [PATCH 294/411] io_uring/sqpoll: Do not set PF_NO_SETAFFINITY on
 sqpoll threads
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Users may specify a CPU where the sqpoll thread would run. This may
conflict with cpuset operations because of strict PF_NO_SETAFFINITY
requirement. That flag is unnecessary for polling "kernel" threads, see
the reasoning in commit 01e68ce08a30 ("io_uring/io-wq: stop setting
PF_NO_SETAFFINITY on io-wq workers"). Drop the flag on poll threads too.

Fixes: 01e68ce08a30 ("io_uring/io-wq: stop setting PF_NO_SETAFFINITY on io-wq workers")
Link: https://lore.kernel.org/all/20230314162559.pnyxdllzgw7jozgx@blackpad/
Signed-off-by: Michal Koutný <mkoutny@suse.com>
Link: https://lore.kernel.org/r/20230314183332.25834-1-mkoutny@suse.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/sqpoll.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c
index 0119d3f1a5569..9db4bc1f521a3 100644
--- a/io_uring/sqpoll.c
+++ b/io_uring/sqpoll.c
@@ -233,7 +233,6 @@ static int io_sq_thread(void *data)
 		set_cpus_allowed_ptr(current, cpumask_of(sqd->sq_cpu));
 	else
 		set_cpus_allowed_ptr(current, cpu_online_mask);
-	current->flags |= PF_NO_SETAFFINITY;
 
 	mutex_lock(&sqd->lock);
 	while (1) {

From 54686b611eb054a5c84976e6f6e03788fa8e6a38 Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Wed, 8 Mar 2023 15:41:32 +0100
Subject: [PATCH 295/411] MAINTAINERS: repair malformed T: entries in NVM
 EXPRESS DRIVERS

The T: entries shall be composed of a SCM tree type (git, hg, quilt, stgit
or topgit) and location.

Add the SCM tree type to the T: entry, and reorder the file entries in
alphabetical order.

Fixes: b508fc354f6d ("nvme: update maintainers information")
Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 MAINTAINERS | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 8d5bc223f3053..1296426b38791 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14872,12 +14872,12 @@ M:	Sagi Grimberg <sagi@grimberg.me>
 L:	linux-nvme@lists.infradead.org
 S:	Supported
 W:	http://git.infradead.org/nvme.git
-T:	git://git.infradead.org/nvme.git
+T:	git git://git.infradead.org/nvme.git
 F:	Documentation/nvme/
-F:	drivers/nvme/host/
 F:	drivers/nvme/common/
-F:	include/linux/nvme.h
+F:	drivers/nvme/host/
 F:	include/linux/nvme-*.h
+F:	include/linux/nvme.h
 F:	include/uapi/linux/nvme_ioctl.h
 
 NVM EXPRESS FABRICS AUTHENTICATION
@@ -14912,7 +14912,7 @@ M:	Chaitanya Kulkarni <kch@nvidia.com>
 L:	linux-nvme@lists.infradead.org
 S:	Supported
 W:	http://git.infradead.org/nvme.git
-T:	git://git.infradead.org/nvme.git
+T:	git git://git.infradead.org/nvme.git
 F:	drivers/nvme/target/
 
 NVMEM FRAMEWORK

From 37f0dc2ec78af0c3f35dd05578763de059f6fe77 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Sat, 4 Mar 2023 07:13:45 +0800
Subject: [PATCH 296/411] nvme: fix handling single range discard request

When investigating one customer report on warning in nvme_setup_discard,
we observed the controller(nvme/tcp) actually exposes
queue_max_discard_segments(req->q) == 1.

Obviously the current code can't handle this situation, since contiguity
merge like normal RW request is taken.

Fix the issue by building range from request sector/nr_sectors directly.

Fixes: b35ba01ea697 ("nvme: support ranged discard requests")
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index c2730b116dc68..d4be525f8100a 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -781,16 +781,26 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req,
 		range = page_address(ns->ctrl->discard_page);
 	}
 
-	__rq_for_each_bio(bio, req) {
-		u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector);
-		u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
-
-		if (n < segments) {
-			range[n].cattr = cpu_to_le32(0);
-			range[n].nlb = cpu_to_le32(nlb);
-			range[n].slba = cpu_to_le64(slba);
+	if (queue_max_discard_segments(req->q) == 1) {
+		u64 slba = nvme_sect_to_lba(ns, blk_rq_pos(req));
+		u32 nlb = blk_rq_sectors(req) >> (ns->lba_shift - 9);
+
+		range[0].cattr = cpu_to_le32(0);
+		range[0].nlb = cpu_to_le32(nlb);
+		range[0].slba = cpu_to_le64(slba);
+		n = 1;
+	} else {
+		__rq_for_each_bio(bio, req) {
+			u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector);
+			u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
+
+			if (n < segments) {
+				range[n].cattr = cpu_to_le32(0);
+				range[n].nlb = cpu_to_le32(nlb);
+				range[n].slba = cpu_to_le64(slba);
+			}
+			n++;
 		}
-		n++;
 	}
 
 	if (WARN_ON_ONCE(n != segments)) {

From a61d265533b7fe0026a02a49916aa564ffe38e4c Mon Sep 17 00:00:00 2001
From: Irvin Cote <irvincoteg@gmail.com>
Date: Wed, 8 Mar 2023 18:05:08 -0300
Subject: [PATCH 297/411] nvme-pci: fixing memory leak in probe teardown path

In case the nvme_probe teardown path is triggered the ctrl ref count does
not reach 0 thus creating a memory leak upon failure of nvme_probe.

Signed-off-by: Irvin Cote <irvincoteg@gmail.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 5b95c94ee40f2..e77a8a873b1ae 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -3073,6 +3073,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	nvme_dev_unmap(dev);
 out_uninit_ctrl:
 	nvme_uninit_ctrl(&dev->ctrl);
+	nvme_put_ctrl(&dev->ctrl);
 	return result;
 }
 

From 9630d80655bfe7e62e4aff2889dc4eae7ceeb887 Mon Sep 17 00:00:00 2001
From: Elmer Miroslav Mosher Golovin <miroslav@mishamosher.com>
Date: Wed, 8 Mar 2023 19:19:29 +0300
Subject: [PATCH 298/411] nvme-pci: add NVME_QUIRK_BOGUS_NID for Netac NV3000

Added a quirk to fix the Netac NV3000 SSD reporting duplicate NGUIDs.

Cc: <stable@vger.kernel.org>
Signed-off-by: Elmer Miroslav Mosher Golovin <miroslav@mishamosher.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index e77a8a873b1ae..8a536d5300a42 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -3416,6 +3416,8 @@ static const struct pci_device_id nvme_id_table[] = {
 		.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
 	{ PCI_DEVICE(0x2646, 0x501E),   /* KINGSTON OM3PGP4xxxxQ OS21011 NVMe SSD */
 		.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+	{ PCI_DEVICE(0x1f40, 0x1202),   /* Netac Technologies Co. NV3000 NVMe SSD */
+		.driver_data = NVME_QUIRK_BOGUS_NID, },
 	{ PCI_DEVICE(0x1f40, 0x5236),   /* Netac Technologies Co. NV7000 NVMe SSD */
 		.driver_data = NVME_QUIRK_BOGUS_NID, },
 	{ PCI_DEVICE(0x1e4B, 0x1001),   /* MAXIO MAP1001 */

From b65d44fa0fe072c91bf41cd8756baa2b4c77eff2 Mon Sep 17 00:00:00 2001
From: Philipp Geulen <p.geulen@js-elektronik.de>
Date: Mon, 13 Mar 2023 11:11:50 +0100
Subject: [PATCH 299/411] nvme-pci: add NVME_QUIRK_BOGUS_NID for Lexar NM620

Added a quirk to fix Lexar NM620 1TB SSD reporting duplicate NGUIDs.

Signed-off-by: Philipp Geulen <p.geulen@js-elektronik.de>
Reviewed-by: Chaitanya Kulkarni <kkch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 8a536d5300a42..b615906263f37 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -3438,6 +3438,8 @@ static const struct pci_device_id nvme_id_table[] = {
 		.driver_data = NVME_QUIRK_BOGUS_NID, },
 	{ PCI_DEVICE(0x1d97, 0x2263), /* Lexar NM610 */
 		.driver_data = NVME_QUIRK_BOGUS_NID, },
+	{ PCI_DEVICE(0x1d97, 0x1d97), /* Lexar NM620 */
+		.driver_data = NVME_QUIRK_BOGUS_NID, },
 	{ PCI_DEVICE(0x1d97, 0x2269), /* Lexar NM760 */
 		.driver_data = NVME_QUIRK_BOGUS_NID, },
 	{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061),

From a3406352c54fbc476f4f6b98159c3ea1c7dbb6fc Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 13 Mar 2023 10:56:22 +0200
Subject: [PATCH 300/411] nvme-tcp: fix opcode reporting in the timeout handler

For non in-capsule writes we reuse the request pdu space for a h2cdata
pdu in order to avoid over allocating space (either preallocate or
dynamically upon receving an r2t pdu). However if the request times out
the core expects to find the opcode in the start of the request, which
we override.

In order to prevent that, without sacrificing additional 24 bytes per
request, we just use the tail of the command pdu space instead (last
24 bytes from the 72 bytes command pdu). That should make the command
opcode always available, and we get away from allocating more space.

If in the future we would need the last 24 bytes of the nvme command
available we would need to allocate a dedicated space for it in the
request, but until then we can avoid doing so.

Reported-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <kkch@nvidia.com>
Tested-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/tcp.c | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 7723a49895244..2e174fad57d77 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -208,6 +208,18 @@ static inline u8 nvme_tcp_ddgst_len(struct nvme_tcp_queue *queue)
 	return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0;
 }
 
+static inline void *nvme_tcp_req_cmd_pdu(struct nvme_tcp_request *req)
+{
+	return req->pdu;
+}
+
+static inline void *nvme_tcp_req_data_pdu(struct nvme_tcp_request *req)
+{
+	/* use the pdu space in the back for the data pdu */
+	return req->pdu + sizeof(struct nvme_tcp_cmd_pdu) -
+		sizeof(struct nvme_tcp_data_pdu);
+}
+
 static inline size_t nvme_tcp_inline_data_size(struct nvme_tcp_request *req)
 {
 	if (nvme_is_fabrics(req->req.cmd))
@@ -614,7 +626,7 @@ static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue,
 
 static void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req)
 {
-	struct nvme_tcp_data_pdu *data = req->pdu;
+	struct nvme_tcp_data_pdu *data = nvme_tcp_req_data_pdu(req);
 	struct nvme_tcp_queue *queue = req->queue;
 	struct request *rq = blk_mq_rq_from_pdu(req);
 	u32 h2cdata_sent = req->pdu_len;
@@ -1038,7 +1050,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
 static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
 {
 	struct nvme_tcp_queue *queue = req->queue;
-	struct nvme_tcp_cmd_pdu *pdu = req->pdu;
+	struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req);
 	bool inline_data = nvme_tcp_has_inline_data(req);
 	u8 hdgst = nvme_tcp_hdgst_len(queue);
 	int len = sizeof(*pdu) + hdgst - req->offset;
@@ -1077,7 +1089,7 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
 static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
 {
 	struct nvme_tcp_queue *queue = req->queue;
-	struct nvme_tcp_data_pdu *pdu = req->pdu;
+	struct nvme_tcp_data_pdu *pdu = nvme_tcp_req_data_pdu(req);
 	u8 hdgst = nvme_tcp_hdgst_len(queue);
 	int len = sizeof(*pdu) - req->offset + hdgst;
 	int ret;
@@ -2284,7 +2296,7 @@ static enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq)
 {
 	struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
 	struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
-	struct nvme_tcp_cmd_pdu *pdu = req->pdu;
+	struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req);
 	u8 opc = pdu->cmd.common.opcode, fctype = pdu->cmd.fabrics.fctype;
 	int qid = nvme_tcp_queue_id(req->queue);
 
@@ -2323,7 +2335,7 @@ static blk_status_t nvme_tcp_map_data(struct nvme_tcp_queue *queue,
 			struct request *rq)
 {
 	struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
-	struct nvme_tcp_cmd_pdu *pdu = req->pdu;
+	struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req);
 	struct nvme_command *c = &pdu->cmd;
 
 	c->common.flags |= NVME_CMD_SGL_METABUF;
@@ -2343,7 +2355,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
 		struct request *rq)
 {
 	struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
-	struct nvme_tcp_cmd_pdu *pdu = req->pdu;
+	struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req);
 	struct nvme_tcp_queue *queue = req->queue;
 	u8 hdgst = nvme_tcp_hdgst_len(queue), ddgst = 0;
 	blk_status_t ret;

From 7e87965d3807ab1f518ef2365f91d5ba6b0c5abe Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 13 Mar 2023 10:56:23 +0200
Subject: [PATCH 301/411] nvme-tcp: add nvme-tcp pdu size build protection

Make sure that we don't somehow mess up the wire structures in the spec.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <kkch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/tcp.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 2e174fad57d77..42c0598c31f2b 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2694,6 +2694,15 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
 
 static int __init nvme_tcp_init_module(void)
 {
+	BUILD_BUG_ON(sizeof(struct nvme_tcp_hdr) != 8);
+	BUILD_BUG_ON(sizeof(struct nvme_tcp_cmd_pdu) != 72);
+	BUILD_BUG_ON(sizeof(struct nvme_tcp_data_pdu) != 24);
+	BUILD_BUG_ON(sizeof(struct nvme_tcp_rsp_pdu) != 24);
+	BUILD_BUG_ON(sizeof(struct nvme_tcp_r2t_pdu) != 24);
+	BUILD_BUG_ON(sizeof(struct nvme_tcp_icreq_pdu) != 128);
+	BUILD_BUG_ON(sizeof(struct nvme_tcp_icresp_pdu) != 128);
+	BUILD_BUG_ON(sizeof(struct nvme_tcp_term_pdu) != 24);
+
 	nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq",
 			WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
 	if (!nvme_tcp_wq)

From 8e19b87cfce2de2125f11363d7dea3d08f16ccae Mon Sep 17 00:00:00 2001
From: Minwoo Im <minwoo.im.dev@gmail.com>
Date: Thu, 9 Mar 2023 23:31:18 +0900
Subject: [PATCH 302/411] nvme-trace: show more opcode names

We have more commands to show in the trace. Sync up.

Signed-off-by: Minwoo Im <minwoo.im.dev@gmail.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/nvme.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 4fad4aa245fb0..779507ac750b8 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -812,6 +812,7 @@ enum nvme_opcode {
 		nvme_opcode_name(nvme_cmd_compare),		\
 		nvme_opcode_name(nvme_cmd_write_zeroes),	\
 		nvme_opcode_name(nvme_cmd_dsm),			\
+		nvme_opcode_name(nvme_cmd_verify),		\
 		nvme_opcode_name(nvme_cmd_resv_register),	\
 		nvme_opcode_name(nvme_cmd_resv_report),		\
 		nvme_opcode_name(nvme_cmd_resv_acquire),	\
@@ -1144,10 +1145,14 @@ enum nvme_admin_opcode {
 		nvme_admin_opcode_name(nvme_admin_ns_mgmt),		\
 		nvme_admin_opcode_name(nvme_admin_activate_fw),		\
 		nvme_admin_opcode_name(nvme_admin_download_fw),		\
+		nvme_admin_opcode_name(nvme_admin_dev_self_test),	\
 		nvme_admin_opcode_name(nvme_admin_ns_attach),		\
 		nvme_admin_opcode_name(nvme_admin_keep_alive),		\
 		nvme_admin_opcode_name(nvme_admin_directive_send),	\
 		nvme_admin_opcode_name(nvme_admin_directive_recv),	\
+		nvme_admin_opcode_name(nvme_admin_virtual_mgmt),	\
+		nvme_admin_opcode_name(nvme_admin_nvme_mi_send),	\
+		nvme_admin_opcode_name(nvme_admin_nvme_mi_recv),	\
 		nvme_admin_opcode_name(nvme_admin_dbbuf),		\
 		nvme_admin_opcode_name(nvme_admin_format_nvm),		\
 		nvme_admin_opcode_name(nvme_admin_security_send),	\

From 6173a77b7e9d3e202bdb9897b23f2a8afe7bf286 Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Date: Mon, 6 Mar 2023 10:13:13 +0900
Subject: [PATCH 303/411] nvmet: avoid potential UAF in nvmet_req_complete()

An nvme target ->queue_response() operation implementation may free the
request passed as argument. Such implementation potentially could result
in a use after free of the request pointer when percpu_ref_put() is
called in nvmet_req_complete().

Avoid such problem by using a local variable to save the sq pointer
before calling __nvmet_req_complete(), thus avoiding dereferencing the
req pointer after that function call.

Fixes: a07b4970f464 ("nvmet: add a generic NVMe target")
Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/core.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index f66ed13d7c11d..3935165048e74 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -756,8 +756,10 @@ static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
 
 void nvmet_req_complete(struct nvmet_req *req, u16 status)
 {
+	struct nvmet_sq *sq = req->sq;
+
 	__nvmet_req_complete(req, status);
-	percpu_ref_put(&req->sq->ref);
+	percpu_ref_put(&sq->ref);
 }
 EXPORT_SYMBOL_GPL(nvmet_req_complete);
 

From 6030363199e3a6341afb467ddddbed56640cbf6a Mon Sep 17 00:00:00 2001
From: Liang He <windhl@126.com>
Date: Wed, 15 Mar 2023 14:20:32 +0800
Subject: [PATCH 304/411] block: sunvdc: add check for mdesc_grab() returning
 NULL

In vdc_port_probe(), we should check the return value of mdesc_grab() as
it may return NULL, which can cause potential NPD bug.

Fixes: 43fdf27470b2 ("[SPARC64]: Abstract out mdesc accesses for better MD update handling.")
Signed-off-by: Liang He <windhl@126.com>
Link: https://lore.kernel.org/r/20230315062032.1741692-1-windhl@126.com
[axboe: style cleanup]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/sunvdc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
index fb855da971ee7..9fa821fa76b07 100644
--- a/drivers/block/sunvdc.c
+++ b/drivers/block/sunvdc.c
@@ -972,6 +972,8 @@ static int vdc_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 	print_version();
 
 	hp = mdesc_grab();
+	if (!hp)
+		return -ENODEV;
 
 	err = -ENODEV;
 	if ((vdev->dev_no << PARTITION_SHIFT) & ~(u64)MINORMASK) {

From 5f27571382ca42daa3e3d40d1b252bf18c2b61d2 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Thu, 23 Feb 2023 17:12:26 +0800
Subject: [PATCH 305/411] block: count 'ios' and 'sectors' when io is done for
 bio-based device

While using iostat for raid, I observed very strange 'await'
occasionally, and turns out it's due to that 'ios' and 'sectors' is
counted in bdev_start_io_acct(), while 'nsecs' is counted in
bdev_end_io_acct(). I'm not sure why they are ccounted like that
but I think this behaviour is obviously wrong because user will get
wrong disk stats.

Fix the problem by counting 'ios' and 'sectors' when io is done, like
what rq-based device does.

Fixes: 394ffa503bc4 ("blk: introduce generic io stat accounting help function")
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20230223091226.1135678-1-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c              | 16 ++++++----------
 drivers/md/dm.c               |  6 +++---
 drivers/nvme/host/multipath.c |  8 ++++----
 include/linux/blkdev.h        |  5 ++---
 4 files changed, 15 insertions(+), 20 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 9e5e0277a4d95..42926e6cb83c8 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -959,16 +959,11 @@ void update_io_ticks(struct block_device *part, unsigned long now, bool end)
 	}
 }
 
-unsigned long bdev_start_io_acct(struct block_device *bdev,
-				 unsigned int sectors, enum req_op op,
+unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op,
 				 unsigned long start_time)
 {
-	const int sgrp = op_stat_group(op);
-
 	part_stat_lock();
 	update_io_ticks(bdev, start_time, false);
-	part_stat_inc(bdev, ios[sgrp]);
-	part_stat_add(bdev, sectors[sgrp], sectors);
 	part_stat_local_inc(bdev, in_flight[op_is_write(op)]);
 	part_stat_unlock();
 
@@ -984,13 +979,12 @@ EXPORT_SYMBOL(bdev_start_io_acct);
  */
 unsigned long bio_start_io_acct(struct bio *bio)
 {
-	return bdev_start_io_acct(bio->bi_bdev, bio_sectors(bio),
-				  bio_op(bio), jiffies);
+	return bdev_start_io_acct(bio->bi_bdev, bio_op(bio), jiffies);
 }
 EXPORT_SYMBOL_GPL(bio_start_io_acct);
 
 void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
-		      unsigned long start_time)
+		      unsigned int sectors, unsigned long start_time)
 {
 	const int sgrp = op_stat_group(op);
 	unsigned long now = READ_ONCE(jiffies);
@@ -998,6 +992,8 @@ void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
 
 	part_stat_lock();
 	update_io_ticks(bdev, now, true);
+	part_stat_inc(bdev, ios[sgrp]);
+	part_stat_add(bdev, sectors[sgrp], sectors);
 	part_stat_add(bdev, nsecs[sgrp], jiffies_to_nsecs(duration));
 	part_stat_local_dec(bdev, in_flight[op_is_write(op)]);
 	part_stat_unlock();
@@ -1007,7 +1003,7 @@ EXPORT_SYMBOL(bdev_end_io_acct);
 void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
 			      struct block_device *orig_bdev)
 {
-	bdev_end_io_acct(orig_bdev, bio_op(bio), start_time);
+	bdev_end_io_acct(orig_bdev, bio_op(bio), bio_sectors(bio), start_time);
 }
 EXPORT_SYMBOL_GPL(bio_end_io_acct_remapped);
 
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index eace45a18d456..f5cc330bb549e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -512,10 +512,10 @@ static void dm_io_acct(struct dm_io *io, bool end)
 		sectors = io->sectors;
 
 	if (!end)
-		bdev_start_io_acct(bio->bi_bdev, sectors, bio_op(bio),
-				   start_time);
+		bdev_start_io_acct(bio->bi_bdev, bio_op(bio), start_time);
 	else
-		bdev_end_io_acct(bio->bi_bdev, bio_op(bio), start_time);
+		bdev_end_io_acct(bio->bi_bdev, bio_op(bio), sectors,
+				 start_time);
 
 	if (static_branch_unlikely(&stats_enabled) &&
 	    unlikely(dm_stats_used(&md->stats))) {
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index fc39d01e7b63b..9171452e2f6d4 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -123,9 +123,8 @@ void nvme_mpath_start_request(struct request *rq)
 		return;
 
 	nvme_req(rq)->flags |= NVME_MPATH_IO_STATS;
-	nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0,
-					blk_rq_bytes(rq) >> SECTOR_SHIFT,
-					req_op(rq), jiffies);
+	nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, req_op(rq),
+						      jiffies);
 }
 EXPORT_SYMBOL_GPL(nvme_mpath_start_request);
 
@@ -136,7 +135,8 @@ void nvme_mpath_end_request(struct request *rq)
 	if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS))
 		return;
 	bdev_end_io_acct(ns->head->disk->part0, req_op(rq),
-		nvme_req(rq)->start_time);
+			 blk_rq_bytes(rq) >> SECTOR_SHIFT,
+			 nvme_req(rq)->start_time);
 }
 
 void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d1aee08f8c181..941304f17492f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1446,11 +1446,10 @@ static inline void blk_wake_io_task(struct task_struct *waiter)
 		wake_up_process(waiter);
 }
 
-unsigned long bdev_start_io_acct(struct block_device *bdev,
-				 unsigned int sectors, enum req_op op,
+unsigned long bdev_start_io_acct(struct block_device *bdev, enum req_op op,
 				 unsigned long start_time);
 void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
-		unsigned long start_time);
+		      unsigned int sectors, unsigned long start_time);
 
 unsigned long bio_start_io_acct(struct bio *bio);
 void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,

From 6c0f5898836c05c6d850a750ed7940ba29e4e6c5 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 13 Mar 2023 13:29:17 -0700
Subject: [PATCH 306/411] md: select BLOCK_LEGACY_AUTOLOAD

When BLOCK_LEGACY_AUTOLOAD is not enable, mdadm is not able to
activate new arrays unless "CREATE names=yes" appears in
mdadm.conf

As this is a regression we need to always enable BLOCK_LEGACY_AUTOLOAD
for when MD is selected - at least until mdadm is updated and the
updates widely available.

Cc: stable@vger.kernel.org # v5.18+
Fixes: fbdee71bb5d8 ("block: deprecate autoloading based on dev_t")
Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/Kconfig | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 998a5cfdbc4e9..662d219c39bf4 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -16,6 +16,10 @@ if MD
 config BLK_DEV_MD
 	tristate "RAID support"
 	select BLOCK_HOLDER_DEPRECATED if SYSFS
+	# BLOCK_LEGACY_AUTOLOAD requirement should be removed
+	# after relevant mdadm enhancements - to make "names=yes"
+	# the default - are widely available.
+	select BLOCK_LEGACY_AUTOLOAD
 	help
 	  This driver lets you combine several hard disk partitions into one
 	  logical block device. This can be used to simply append one

From eb81a2ed4f52be831c9fb879752d89645a312c13 Mon Sep 17 00:00:00 2001
From: Yang Jihong <yangjihong1@huawei.com>
Date: Tue, 14 Mar 2023 04:47:35 +0000
Subject: [PATCH 307/411] perf/core: Fix perf_output_begin parameter is
 incorrectly invoked in perf_event_bpf_output

syzkaller reportes a KASAN issue with stack-out-of-bounds.
The call trace is as follows:
  dump_stack+0x9c/0xd3
  print_address_description.constprop.0+0x19/0x170
  __kasan_report.cold+0x6c/0x84
  kasan_report+0x3a/0x50
  __perf_event_header__init_id+0x34/0x290
  perf_event_header__init_id+0x48/0x60
  perf_output_begin+0x4a4/0x560
  perf_event_bpf_output+0x161/0x1e0
  perf_iterate_sb_cpu+0x29e/0x340
  perf_iterate_sb+0x4c/0xc0
  perf_event_bpf_event+0x194/0x2c0
  __bpf_prog_put.constprop.0+0x55/0xf0
  __cls_bpf_delete_prog+0xea/0x120 [cls_bpf]
  cls_bpf_delete_prog_work+0x1c/0x30 [cls_bpf]
  process_one_work+0x3c2/0x730
  worker_thread+0x93/0x650
  kthread+0x1b8/0x210
  ret_from_fork+0x1f/0x30

commit 267fb27352b6 ("perf: Reduce stack usage of perf_output_begin()")
use on-stack struct perf_sample_data of the caller function.

However, perf_event_bpf_output uses incorrect parameter to convert
small-sized data (struct perf_bpf_event) into large-sized data
(struct perf_sample_data), which causes memory overwriting occurs in
__perf_event_header__init_id.

Fixes: 267fb27352b6 ("perf: Reduce stack usage of perf_output_begin()")
Signed-off-by: Yang Jihong <yangjihong1@huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20230314044735.56551-1-yangjihong1@huawei.com
---
 kernel/events/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index f79fd8b87f75e..296617edbda1c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -9187,7 +9187,7 @@ static void perf_event_bpf_output(struct perf_event *event, void *data)
 
 	perf_event_header__init_id(&bpf_event->event_id.header,
 				   &sample, event);
-	ret = perf_output_begin(&handle, data, event,
+	ret = perf_output_begin(&handle, &sample, event,
 				bpf_event->event_id.header.size);
 	if (ret)
 		return;

From baf1b12a67f5b24f395baca03e442ce27cab0c18 Mon Sep 17 00:00:00 2001
From: Song Liu <song@kernel.org>
Date: Mon, 13 Mar 2023 10:16:08 -0700
Subject: [PATCH 308/411] perf: fix perf_event_context->time

Time readers rely on perf_event_context->[time|timestamp|timeoffset] to get
accurate time_enabled and time_running for an event. The difference between
ctx->timestamp and ctx->time is the among of time when the context is not
enabled. __update_context_time(ctx, false) is used to increase timestamp,
but not time. Therefore, it should only be called in ctx_sched_in() when
EVENT_TIME was not enabled.

Fixes: 09f5e7dc7ad7 ("perf: Fix perf_event_read_local() time")
Signed-off-by: Song Liu <song@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lkml.kernel.org/r/20230313171608.298734-1-song@kernel.org
---
 kernel/events/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 296617edbda1c..52b4aa0b3bd17 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3872,7 +3872,7 @@ ctx_sched_in(struct perf_event_context *ctx, enum event_type_t event_type)
 	if (likely(!ctx->nr_events))
 		return;
 
-	if (is_active ^ EVENT_TIME) {
+	if (!(is_active & EVENT_TIME)) {
 		/* start ctx time */
 		__update_context_time(ctx, false);
 		perf_cgroup_set_timestamp(cpuctx);

From fd0815f632c24878e325821943edccc7fde947a2 Mon Sep 17 00:00:00 2001
From: Budimir Markovic <markovicbudimir@gmail.com>
Date: Wed, 15 Mar 2023 00:29:01 -0700
Subject: [PATCH 309/411] perf: Fix check before add_event_to_groups() in
 perf_group_detach()

Events should only be added to a groups rb tree if they have not been
removed from their context by list_del_event(). Since remove_on_exec
made it possible to call list_del_event() on individual events before
they are detached from their group, perf_group_detach() should check each
sibling's attach_state before calling add_event_to_groups() on it.

Fixes: 2e498d0a74e5 ("perf: Add support for event removal on exec")
Signed-off-by: Budimir Markovic <markovicbudimir@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/ZBFzvQV9tEqoHEtH@gentoo
---
 kernel/events/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 52b4aa0b3bd17..fb3e436bcd4ac 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2163,7 +2163,7 @@ static void perf_group_detach(struct perf_event *event)
 		/* Inherit group flags from the previous leader */
 		sibling->group_caps = event->group_caps;
 
-		if (!RB_EMPTY_NODE(&event->group_node)) {
+		if (sibling->attach_state & PERF_ATTACH_CONTEXT) {
 			add_event_to_groups(sibling, event->ctx);
 
 			if (sibling->state == PERF_EVENT_STATE_ACTIVE)

From 709671ffb15dcd1b4f6afe2a9d8c67c7c4ead4a1 Mon Sep 17 00:00:00 2001
From: Saaem Rizvi <SyedSaaem.Rizvi@amd.com>
Date: Mon, 27 Feb 2023 18:55:07 -0500
Subject: [PATCH 310/411] drm/amd/display: Remove OTG DIV register write for
 Virtual signals.

[WHY]
Hot plugging and then hot unplugging leads to k1 and k2 values to
change, as signal is detected as a virtual signal on hot unplug. Writing
these values to OTG_PIXEL_RATE_DIV register might cause primary display
to blank (known hw bug).

[HOW]
No longer write k1 and k2 values to register if signal is virtual, we
have safe guards in place in the case that k1 and k2 is unassigned so
that an unknown value is not written to the register either.

Cc: stable@vger.kernel.org
Cc: Mario Limonciello <mario.limonciello@amd.com>
Reviewed-by: Samson Tam <Samson.Tam@amd.com>
Reviewed-by: Alvin Lee <Alvin.Lee2@amd.com>
Acked-by: Qingqing Zhuo <qingqing.zhuo@amd.com>
Signed-off-by: Saaem Rizvi <SyedSaaem.Rizvi@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
index 16f892125b6fa..9d14045cccd63 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
@@ -1104,7 +1104,7 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsign
 			*k2_div = PIXEL_RATE_DIV_BY_2;
 		else
 			*k2_div = PIXEL_RATE_DIV_BY_4;
-	} else if (dc_is_dp_signal(stream->signal) || dc_is_virtual_signal(stream->signal)) {
+	} else if (dc_is_dp_signal(stream->signal)) {
 		if (two_pix_per_container) {
 			*k1_div = PIXEL_RATE_DIV_BY_1;
 			*k2_div = PIXEL_RATE_DIV_BY_2;

From 56574f89dbd84004c3fd6485bcaafb5aa9b8be14 Mon Sep 17 00:00:00 2001
From: Wesley Chalmers <Wesley.Chalmers@amd.com>
Date: Thu, 3 Nov 2022 22:29:31 -0400
Subject: [PATCH 311/411] drm/amd/display: Do not set DRR on pipe Commit

[WHY]
Writing to DRR registers such as OTG_V_TOTAL_MIN on the same frame as a
pipe commit can cause underflow.

Cc: stable@vger.kernel.org
Cc: Mario Limonciello <mario.limonciello@amd.com>
Reviewed-by: Jun Lei <Jun.Lei@amd.com>
Acked-by: Qingqing Zhuo <qingqing.zhuo@amd.com>
Signed-off-by: Wesley Chalmers <Wesley.Chalmers@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
index 3b4d4d68359bb..df787fcf8e86e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
@@ -998,8 +998,5 @@ void dcn30_prepare_bandwidth(struct dc *dc,
 			dc->clk_mgr->funcs->set_max_memclk(dc->clk_mgr, dc->clk_mgr->bw_params->clk_table.entries[dc->clk_mgr->bw_params->clk_table.num_entries - 1].memclk_mhz);
 
 	dcn20_prepare_bandwidth(dc, context);
-
-	dc_dmub_srv_p_state_delegate(dc,
-		context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching, context);
 }
 

From cbd6c1b17d3b42b7935526a86ad5f66838767d03 Mon Sep 17 00:00:00 2001
From: Cruise Hung <Cruise.Hung@amd.com>
Date: Thu, 2 Mar 2023 10:33:51 +0800
Subject: [PATCH 312/411] drm/amd/display: Fix DP MST sinks removal issue

[Why]
In USB4 DP tunneling, it's possible to have this scenario that
the path becomes unavailable and CM tears down the path a little bit late.
So, in this case, the HPD is high but fails to read any DPCD register.
That causes the link connection type to be set to sst.
And not all sinks are removed behind the MST branch.

[How]
Restore the link connection type if it fails to read DPCD register.

Cc: stable@vger.kernel.org
Cc: Mario Limonciello <mario.limonciello@amd.com>
Reviewed-by: Wenjing Liu <Wenjing.Liu@amd.com>
Acked-by: Qingqing Zhuo <qingqing.zhuo@amd.com>
Signed-off-by: Cruise Hung <Cruise.Hung@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/link/link_detection.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/gpu/drm/amd/display/dc/link/link_detection.c b/drivers/gpu/drm/amd/display/dc/link/link_detection.c
index 38216c789d777..f70025ef7b69e 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_detection.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_detection.c
@@ -855,6 +855,7 @@ static bool detect_link_and_local_sink(struct dc_link *link,
 	struct dc_sink *prev_sink = NULL;
 	struct dpcd_caps prev_dpcd_caps;
 	enum dc_connection_type new_connection_type = dc_connection_none;
+	enum dc_connection_type pre_connection_type = link->type;
 	const uint32_t post_oui_delay = 30; // 30ms
 
 	DC_LOGGER_INIT(link->ctx->logger);
@@ -957,6 +958,8 @@ static bool detect_link_and_local_sink(struct dc_link *link,
 			}
 
 			if (!detect_dp(link, &sink_caps, reason)) {
+				link->type = pre_connection_type;
+
 				if (prev_sink)
 					dc_sink_release(prev_sink);
 				return false;
@@ -1244,11 +1247,16 @@ bool link_detect(struct dc_link *link, enum dc_detect_reason reason)
 	bool is_delegated_to_mst_top_mgr = false;
 	enum dc_connection_type pre_link_type = link->type;
 
+	DC_LOGGER_INIT(link->ctx->logger);
+
 	is_local_sink_detect_success = detect_link_and_local_sink(link, reason);
 
 	if (is_local_sink_detect_success && link->local_sink)
 		verify_link_capability(link, link->local_sink, reason);
 
+	DC_LOG_DC("%s: link_index=%d is_local_sink_detect_success=%d pre_link_type=%d link_type=%d\n", __func__,
+				link->link_index, is_local_sink_detect_success, pre_link_type, link->type);
+
 	if (is_local_sink_detect_success && link->local_sink &&
 			dc_is_dp_signal(link->local_sink->sink_signal) &&
 			link->dpcd_caps.is_mst_capable)

From 7304ee979b6b6422f41a1312391a5e505fc29ccd Mon Sep 17 00:00:00 2001
From: Ayush Gupta <ayugupta@amd.com>
Date: Thu, 2 Mar 2023 09:58:05 -0500
Subject: [PATCH 313/411] drm/amd/display: disconnect MPCC only on OTG change

[Why]
Framedrops are observed while playing Vp9 and Av1 10 bit
video on 8k resolution using VSR while playback controls
are disappeared/appeared

[How]
Now ODM 2 to 1 is disabled for 5k or greater resolutions on VSR.

Cc: stable@vger.kernel.org
Cc: Mario Limonciello <mario.limonciello@amd.com>
Reviewed-by: Alvin Lee <Alvin.Lee2@amd.com>
Acked-by: Qingqing Zhuo <qingqing.zhuo@amd.com>
Signed-off-by: Ayush Gupta <ayugupta@amd.com>
Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index 74e50c09bb62f..d024007f0f650 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -1915,6 +1915,7 @@ int dcn32_populate_dml_pipes_from_context(
 	bool subvp_in_use = false;
 	uint8_t is_pipe_split_expected[MAX_PIPES] = {0};
 	struct dc_crtc_timing *timing;
+	bool vsr_odm_support = false;
 
 	dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
 
@@ -1932,12 +1933,15 @@ int dcn32_populate_dml_pipes_from_context(
 		timing = &pipe->stream->timing;
 
 		pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal;
+		vsr_odm_support = (res_ctx->pipe_ctx[i].stream->src.width >= 5120 &&
+				res_ctx->pipe_ctx[i].stream->src.width > res_ctx->pipe_ctx[i].stream->dst.width);
 		if (context->stream_count == 1 &&
 				context->stream_status[0].plane_count == 1 &&
 				!dc_is_hdmi_signal(res_ctx->pipe_ctx[i].stream->signal) &&
 				is_h_timing_divisible_by_2(res_ctx->pipe_ctx[i].stream) &&
 				pipe->stream->timing.pix_clk_100hz * 100 > DCN3_2_VMIN_DISPCLK_HZ &&
-				dc->debug.enable_single_display_2to1_odm_policy) {
+				dc->debug.enable_single_display_2to1_odm_policy &&
+				!vsr_odm_support) { //excluding 2to1 ODM combine on >= 5k vsr
 			pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_2to1;
 		}
 		pipe_cnt++;

From 0424a7dfe9129b93f29b277511a60e87f052ac6b Mon Sep 17 00:00:00 2001
From: Shawn Wang <shawnwang@linux.alibaba.com>
Date: Tue, 17 Jan 2023 13:14:50 -0800
Subject: [PATCH 314/411] x86/resctrl: Clear staged_config[] before and after
 it is used

As a temporary storage, staged_config[] in rdt_domain should be cleared
before and after it is used. The stale value in staged_config[] could
cause an MSR access error.

Here is a reproducer on a system with 16 usable CLOSIDs for a 15-way L3
Cache (MBA should be disabled if the number of CLOSIDs for MB is less than
16.) :
	mount -t resctrl resctrl -o cdp /sys/fs/resctrl
	mkdir /sys/fs/resctrl/p{1..7}
	umount /sys/fs/resctrl/
	mount -t resctrl resctrl /sys/fs/resctrl
	mkdir /sys/fs/resctrl/p{1..8}

An error occurs when creating resource group named p8:
    unchecked MSR access error: WRMSR to 0xca0 (tried to write 0x00000000000007ff) at rIP: 0xffffffff82249142 (cat_wrmsr+0x32/0x60)
    Call Trace:
     <IRQ>
     __flush_smp_call_function_queue+0x11d/0x170
     __sysvec_call_function+0x24/0xd0
     sysvec_call_function+0x89/0xc0
     </IRQ>
     <TASK>
     asm_sysvec_call_function+0x16/0x20

When creating a new resource control group, hardware will be configured
by the following process:
    rdtgroup_mkdir()
      rdtgroup_mkdir_ctrl_mon()
        rdtgroup_init_alloc()
          resctrl_arch_update_domains()

resctrl_arch_update_domains() iterates and updates all resctrl_conf_type
whose have_new_ctrl is true. Since staged_config[] holds the same values as
when CDP was enabled, it will continue to update the CDP_CODE and CDP_DATA
configurations. When group p8 is created, get_config_index() called in
resctrl_arch_update_domains() will return 16 and 17 as the CLOSIDs for
CDP_CODE and CDP_DATA, which will be translated to an invalid register -
0xca0 in this scenario.

Fix it by clearing staged_config[] before and after it is used.

[reinette: re-order commit tags]

Fixes: 75408e43509e ("x86/resctrl: Allow different CODE/DATA configurations to be staged")
Suggested-by: Xin Hao <xhao@linux.alibaba.com>
Signed-off-by: Shawn Wang <shawnwang@linux.alibaba.com>
Signed-off-by: Reinette Chatre <reinette.chatre@intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Tested-by: Reinette Chatre <reinette.chatre@intel.com>
Cc:stable@vger.kernel.org
Link: https://lore.kernel.org/all/2fad13f49fbe89687fc40e9a5a61f23a28d1507a.1673988935.git.reinette.chatre%40intel.com
---
 arch/x86/kernel/cpu/resctrl/ctrlmondata.c |  7 ++-----
 arch/x86/kernel/cpu/resctrl/internal.h    |  1 +
 arch/x86/kernel/cpu/resctrl/rdtgroup.c    | 25 +++++++++++++++++++----
 3 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index eb07d4435391b..b44c487727d45 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -368,7 +368,6 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
 {
 	struct resctrl_schema *s;
 	struct rdtgroup *rdtgrp;
-	struct rdt_domain *dom;
 	struct rdt_resource *r;
 	char *tok, *resname;
 	int ret = 0;
@@ -397,10 +396,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
 		goto out;
 	}
 
-	list_for_each_entry(s, &resctrl_schema_all, list) {
-		list_for_each_entry(dom, &s->res->domains, list)
-			memset(dom->staged_config, 0, sizeof(dom->staged_config));
-	}
+	rdt_staged_configs_clear();
 
 	while ((tok = strsep(&buf, "\n")) != NULL) {
 		resname = strim(strsep(&tok, ":"));
@@ -445,6 +441,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
 	}
 
 out:
+	rdt_staged_configs_clear();
 	rdtgroup_kn_unlock(of->kn);
 	cpus_read_unlock();
 	return ret ?: nbytes;
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 8edecc5763d8e..85ceaf9a31ac2 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -555,5 +555,6 @@ void __check_limbo(struct rdt_domain *d, bool force_free);
 void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
 void __init thread_throttle_mode_init(void);
 void __init mbm_config_rftype_init(const char *config);
+void rdt_staged_configs_clear(void);
 
 #endif /* _ASM_X86_RESCTRL_INTERNAL_H */
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 884b6e9a7e31c..6ad33f355861f 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -78,6 +78,19 @@ void rdt_last_cmd_printf(const char *fmt, ...)
 	va_end(ap);
 }
 
+void rdt_staged_configs_clear(void)
+{
+	struct rdt_resource *r;
+	struct rdt_domain *dom;
+
+	lockdep_assert_held(&rdtgroup_mutex);
+
+	for_each_alloc_capable_rdt_resource(r) {
+		list_for_each_entry(dom, &r->domains, list)
+			memset(dom->staged_config, 0, sizeof(dom->staged_config));
+	}
+}
+
 /*
  * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
  * we can keep a bitmap of free CLOSIDs in a single integer.
@@ -3107,7 +3120,9 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
 {
 	struct resctrl_schema *s;
 	struct rdt_resource *r;
-	int ret;
+	int ret = 0;
+
+	rdt_staged_configs_clear();
 
 	list_for_each_entry(s, &resctrl_schema_all, list) {
 		r = s->res;
@@ -3119,20 +3134,22 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
 		} else {
 			ret = rdtgroup_init_cat(s, rdtgrp->closid);
 			if (ret < 0)
-				return ret;
+				goto out;
 		}
 
 		ret = resctrl_arch_update_domains(r, rdtgrp->closid);
 		if (ret < 0) {
 			rdt_last_cmd_puts("Failed to initialize allocations\n");
-			return ret;
+			goto out;
 		}
 
 	}
 
 	rdtgrp->mode = RDT_MODE_SHAREABLE;
 
-	return 0;
+out:
+	rdt_staged_configs_clear();
+	return ret;
 }
 
 static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,

From 20bc9f76b6a2455c6b54b91ae7634f147f64987f Mon Sep 17 00:00:00 2001
From: David Belanger <david.belanger@amd.com>
Date: Tue, 28 Feb 2023 14:11:24 -0500
Subject: [PATCH 315/411] drm/amdkfd: Fixed kfd_process cleanup on module exit.

Handle case when module is unloaded (kfd_exit) before a process space
(mm_struct) is released.

v2: Fixed potential race conditions by removing all kfd_process from
the process table first, then working on releasing the resources.

v3: Fixed loop element access / synchronization.  Fixed extra empty lines.

Signed-off-by: David Belanger <david.belanger@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_module.c  |  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h    |  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 67 +++++++++++++++++++++---
 3 files changed, 62 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index 09b966dc37681..aee2212e52f69 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -77,6 +77,7 @@ static int kfd_init(void)
 
 static void kfd_exit(void)
 {
+	kfd_cleanup_processes();
 	kfd_debugfs_fini();
 	kfd_process_destroy_wq();
 	kfd_procfs_shutdown();
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index bfa30d12406b3..7e4d992e48b3c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -928,6 +928,7 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev);
 
 int kfd_process_create_wq(void);
 void kfd_process_destroy_wq(void);
+void kfd_cleanup_processes(void);
 struct kfd_process *kfd_create_process(struct file *filep);
 struct kfd_process *kfd_get_process(const struct task_struct *task);
 struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 7acd55a814b2f..4208e0f01064d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1167,6 +1167,17 @@ static void kfd_process_free_notifier(struct mmu_notifier *mn)
 	kfd_unref_process(container_of(mn, struct kfd_process, mmu_notifier));
 }
 
+static void kfd_process_notifier_release_internal(struct kfd_process *p)
+{
+	cancel_delayed_work_sync(&p->eviction_work);
+	cancel_delayed_work_sync(&p->restore_work);
+
+	/* Indicate to other users that MM is no longer valid */
+	p->mm = NULL;
+
+	mmu_notifier_put(&p->mmu_notifier);
+}
+
 static void kfd_process_notifier_release(struct mmu_notifier *mn,
 					struct mm_struct *mm)
 {
@@ -1181,17 +1192,22 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
 		return;
 
 	mutex_lock(&kfd_processes_mutex);
+	/*
+	 * Do early return if table is empty.
+	 *
+	 * This could potentially happen if this function is called concurrently
+	 * by mmu_notifier and by kfd_cleanup_pocesses.
+	 *
+	 */
+	if (hash_empty(kfd_processes_table)) {
+		mutex_unlock(&kfd_processes_mutex);
+		return;
+	}
 	hash_del_rcu(&p->kfd_processes);
 	mutex_unlock(&kfd_processes_mutex);
 	synchronize_srcu(&kfd_processes_srcu);
 
-	cancel_delayed_work_sync(&p->eviction_work);
-	cancel_delayed_work_sync(&p->restore_work);
-
-	/* Indicate to other users that MM is no longer valid */
-	p->mm = NULL;
-
-	mmu_notifier_put(&p->mmu_notifier);
+	kfd_process_notifier_release_internal(p);
 }
 
 static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
@@ -1200,6 +1216,43 @@ static const struct mmu_notifier_ops kfd_process_mmu_notifier_ops = {
 	.free_notifier = kfd_process_free_notifier,
 };
 
+/*
+ * This code handles the case when driver is being unloaded before all
+ * mm_struct are released.  We need to safely free the kfd_process and
+ * avoid race conditions with mmu_notifier that might try to free them.
+ *
+ */
+void kfd_cleanup_processes(void)
+{
+	struct kfd_process *p;
+	struct hlist_node *p_temp;
+	unsigned int temp;
+	HLIST_HEAD(cleanup_list);
+
+	/*
+	 * Move all remaining kfd_process from the process table to a
+	 * temp list for processing.   Once done, callback from mmu_notifier
+	 * release will not see the kfd_process in the table and do early return,
+	 * avoiding double free issues.
+	 */
+	mutex_lock(&kfd_processes_mutex);
+	hash_for_each_safe(kfd_processes_table, temp, p_temp, p, kfd_processes) {
+		hash_del_rcu(&p->kfd_processes);
+		synchronize_srcu(&kfd_processes_srcu);
+		hlist_add_head(&p->kfd_processes, &cleanup_list);
+	}
+	mutex_unlock(&kfd_processes_mutex);
+
+	hlist_for_each_entry_safe(p, p_temp, &cleanup_list, kfd_processes)
+		kfd_process_notifier_release_internal(p);
+
+	/*
+	 * Ensures that all outstanding free_notifier get called, triggering
+	 * the release of the kfd_process struct.
+	 */
+	mmu_notifier_synchronize();
+}
+
 static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
 {
 	unsigned long  offset;

From f3921a9a641483784448fb982b2eb738b383d9b9 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Mon, 13 Mar 2023 20:03:08 -0400
Subject: [PATCH 316/411] drm/amdgpu: Don't resume IOMMU after incomplete init

Check kfd->init_complete in kgd2kfd_iommu_resume, consistent with other
kgd2kfd calls. This should fix IOMMU errors on resume from suspend when
KFD IOMMU initialization failed.

Reported-by: Matt Fagnani <matt.fagnani@bell.net>
Link: https://lore.kernel.org/r/4a3b225c-2ffd-e758-4de1-447375e34cad@bell.net/
Link: https://bugzilla.kernel.org/show_bug.cgi?id=217170
Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2454
Cc: Vasant Hegde <vasant.hegde@amd.com>
Cc: Linux regression tracking (Thorsten Leemhuis) <regressions@leemhuis.info>
Cc: stable@vger.kernel.org
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Tested-by: Matt Fagnani <matt.fagnani@bell.net>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 3de7f616a001c..ec70a1658dc38 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -59,6 +59,7 @@ static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
 				unsigned int chunk_size);
 static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
 
+static int kfd_resume_iommu(struct kfd_dev *kfd);
 static int kfd_resume(struct kfd_dev *kfd);
 
 static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
@@ -624,7 +625,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 
 	svm_migrate_init(kfd->adev);
 
-	if (kgd2kfd_resume_iommu(kfd))
+	if (kfd_resume_iommu(kfd))
 		goto device_iommu_error;
 
 	if (kfd_resume(kfd))
@@ -772,6 +773,14 @@ int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
 }
 
 int kgd2kfd_resume_iommu(struct kfd_dev *kfd)
+{
+	if (!kfd->init_complete)
+		return 0;
+
+	return kfd_resume_iommu(kfd);
+}
+
+static int kfd_resume_iommu(struct kfd_dev *kfd)
 {
 	int err = 0;
 

From 37beabe9a891b92174cd1aafbfa881fe9e05aa87 Mon Sep 17 00:00:00 2001
From: Emeel Hakim <ehakim@nvidia.com>
Date: Wed, 8 Feb 2023 14:25:54 +0200
Subject: [PATCH 317/411] net/mlx5e: Fix macsec ASO context alignment

Currently mlx5e_macsec_umr struct does not satisfy hardware memory
alignment requirement. Hence the result of querying advanced steering
operation (ASO) is not copied to the memory region as expected.

Fix by satisfying hardware memory alignment requirement and move
context to be first field in struct for better readability.

Fixes: 1f53da676439 ("net/mlx5e: Create advanced steering operation (ASO) object for MACsec")
Signed-off-by: Emeel Hakim <ehakim@nvidia.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
index 08d0929e82603..8af53178e40d4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c
@@ -89,8 +89,8 @@ struct mlx5e_macsec_rx_sc {
 };
 
 struct mlx5e_macsec_umr {
+	u8 __aligned(64) ctx[MLX5_ST_SZ_BYTES(macsec_aso)];
 	dma_addr_t dma_addr;
-	u8 ctx[MLX5_ST_SZ_BYTES(macsec_aso)];
 	u32 mkey;
 };
 

From 9a92fe1db9e57ea94388a1d768e8ee42af858377 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Fri, 12 Mar 2021 07:21:29 -0600
Subject: [PATCH 318/411] net/mlx5e: Don't cache tunnel offloads capability

When mlx5e attaches again after device health recovery, the device
capabilities might have changed by the eswitch manager.

For example in one flow when ECPF changes the eswitch mode between
legacy and switchdev, it updates the flow table tunnel capability.

The cached value is only used in one place, so just check the capability
there instead.

Fixes: 5bef709d76a2 ("net/mlx5: Enable host PF HCA after eswitch is initialized")
Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Daniel Jurgens <danielj@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h          | 1 -
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c     | 4 +---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c      | 1 -
 drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 1 -
 4 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 4276c6eb68201..4a19ef4a98110 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -313,7 +313,6 @@ struct mlx5e_params {
 		} channel;
 	} mqprio;
 	bool rx_cqe_compress_def;
-	bool tunneled_offload_en;
 	struct dim_cq_moder rx_cq_moderation;
 	struct dim_cq_moder tx_cq_moderation;
 	struct mlx5e_packet_merge_param packet_merge;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 51b5f3cca5047..56fc2aebb9eee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4979,8 +4979,6 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
 	/* TX inline */
 	mlx5_query_min_inline(mdev, &params->tx_min_inline_mode);
 
-	params->tunneled_offload_en = mlx5_tunnel_inner_ft_supported(mdev);
-
 	/* AF_XDP */
 	params->xsk = xsk;
 
@@ -5285,7 +5283,7 @@ static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
 	}
 
 	features = MLX5E_RX_RES_FEATURE_PTP;
-	if (priv->channels.params.tunneled_offload_en)
+	if (mlx5_tunnel_inner_ft_supported(mdev))
 		features |= MLX5E_RX_RES_FEATURE_INNER_FT;
 	err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, features,
 				priv->max_nch, priv->drop_rq.rqn,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 43fd12fb87b87..8ff654b4e9e14 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -755,7 +755,6 @@ static void mlx5e_build_rep_params(struct net_device *netdev)
 	mlx5e_set_rx_cq_mode_params(params, cq_period_mode);
 
 	params->mqprio.num_tc       = 1;
-	params->tunneled_offload_en = false;
 	if (rep->vport != MLX5_VPORT_UPLINK)
 		params->vlan_strip_disable = true;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index c2a4f86bc8909..baa7ef8123139 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -70,7 +70,6 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
 
 	params->packet_merge.type = MLX5E_PACKET_MERGE_NONE;
 	params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN;
-	params->tunneled_offload_en = false;
 
 	/* CQE compression is not supported for IPoIB */
 	params->rx_cqe_compress_def = false;

From ba5d8f72b82cc197355c9340ef89dab813815865 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Thu, 24 Jun 2021 18:22:57 +0300
Subject: [PATCH 319/411] net/mlx5: Fix setting ec_function bit in MANAGE_PAGES

When ECPF is a page supplier, reclaim pages missed to honor the
ec_function bit provided by the firmware. It always used the ec_function
to true during driver unload flow for ECPF. This is incorrect.

Honor the ec_function bit provided by device during page allocation
request event.

Fixes: d6945242f45d ("net/mlx5: Hold pages RB tree per VF")
Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Daniel Jurgens <danielj@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/core/pagealloc.c   | 22 ++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 64d4e7125e9bb..95dc67fb30015 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -82,6 +82,16 @@ static u16 func_id_to_type(struct mlx5_core_dev *dev, u16 func_id, bool ec_funct
 	return func_id <= mlx5_core_max_vfs(dev) ?  MLX5_VF : MLX5_SF;
 }
 
+static u32 mlx5_get_ec_function(u32 function)
+{
+	return function >> 16;
+}
+
+static u32 mlx5_get_func_id(u32 function)
+{
+	return function & 0xffff;
+}
+
 static struct rb_root *page_root_per_function(struct mlx5_core_dev *dev, u32 function)
 {
 	struct rb_root *root;
@@ -665,20 +675,22 @@ static int optimal_reclaimed_pages(void)
 }
 
 static int mlx5_reclaim_root_pages(struct mlx5_core_dev *dev,
-				   struct rb_root *root, u16 func_id)
+				   struct rb_root *root, u32 function)
 {
 	u64 recl_pages_to_jiffies = msecs_to_jiffies(mlx5_tout_ms(dev, RECLAIM_PAGES));
 	unsigned long end = jiffies + recl_pages_to_jiffies;
 
 	while (!RB_EMPTY_ROOT(root)) {
+		u32 ec_function = mlx5_get_ec_function(function);
+		u32 function_id = mlx5_get_func_id(function);
 		int nclaimed;
 		int err;
 
-		err = reclaim_pages(dev, func_id, optimal_reclaimed_pages(),
-				    &nclaimed, false, mlx5_core_is_ecpf(dev));
+		err = reclaim_pages(dev, function_id, optimal_reclaimed_pages(),
+				    &nclaimed, false, ec_function);
 		if (err) {
-			mlx5_core_warn(dev, "failed reclaiming pages (%d) for func id 0x%x\n",
-				       err, func_id);
+			mlx5_core_warn(dev, "reclaim_pages err (%d) func_id=0x%x ec_func=0x%x\n",
+				       err, function_id, ec_function);
 			return err;
 		}
 

From 7ba930fc25def6fd736abcdfa224272948a65cf7 Mon Sep 17 00:00:00 2001
From: Daniel Jurgens <danielj@nvidia.com>
Date: Thu, 20 Oct 2022 00:13:50 +0300
Subject: [PATCH 320/411] net/mlx5: Disable eswitch before waiting for VF pages

The offending commit changed the ordering of moving to legacy mode and
waiting for the VF pages. Moving to legacy mode is important in
bluefield, because it sends the host driver into error state, and frees
its pages. Without this transition we end up waiting 2 minutes for
pages that aren't coming before carrying on with the unload process.

Fixes: f019679ea5f2 ("net/mlx5: E-switch, Remove dependency between sriov and eswitch mode")
Signed-off-by: Daniel Jurgens <danielj@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 540840e80493b..f36a3aa4b5c8f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1364,8 +1364,8 @@ static void mlx5_unload(struct mlx5_core_dev *dev)
 {
 	mlx5_devlink_traps_unregister(priv_to_devlink(dev));
 	mlx5_sf_dev_table_destroy(dev);
-	mlx5_sriov_detach(dev);
 	mlx5_eswitch_disable(dev->priv.eswitch);
+	mlx5_sriov_detach(dev);
 	mlx5_lag_remove_mdev(dev);
 	mlx5_ec_cleanup(dev);
 	mlx5_sf_hw_table_destroy(dev);

From 1313d78ac0c1cfcff7bdece8da54b080e71487c4 Mon Sep 17 00:00:00 2001
From: Maor Dickman <maord@nvidia.com>
Date: Tue, 7 Feb 2023 15:07:00 +0200
Subject: [PATCH 321/411] net/mlx5: E-switch, Fix wrong usage of source port
 rewrite in split rules

In few cases, rules with mirror use case are split to two FTEs, one which
do the mirror action and forward to second FTE which do the rest of the rule
actions and the second redirect action.
In case of mirror rules which do split and forward to ovs internal port or
VF stack devices, source port rewrite should be used in the second FTE but
it is wrongly also set in the first FTE which break the offload.

Fix this issue by removing the wrong check if source port rewrite is needed to
be used on the first FTE of the split and instead return EOPNOTSUPP which will
block offload of rules which mirror to ovs internal port or VF stack devices
which isn't supported.

Fixes: 10742efc20a4 ("net/mlx5e: VF tunnel TX traffic offloading")
Fixes: a508728a4c8b ("net/mlx5e: VF tunnel RX traffic offloading")
Signed-off-by: Maor Dickman <maord@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index d766a64b18234..22075943bb582 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -723,11 +723,11 @@ mlx5_eswitch_add_fwd_rule(struct mlx5_eswitch *esw,
 
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 	for (i = 0; i < esw_attr->split_count; i++) {
-		if (esw_is_indir_table(esw, attr))
-			err = esw_setup_indir_table(dest, &flow_act, esw, attr, false, &i);
-		else if (esw_is_chain_src_port_rewrite(esw, esw_attr))
-			err = esw_setup_chain_src_port_rewrite(dest, &flow_act, esw, chains, attr,
-							       &i);
+		if (esw_attr->dests[i].flags & MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
+			/* Source port rewrite (forward to ovs internal port or statck device) isn't
+			 * supported in the rule of split action.
+			 */
+			err = -EOPNOTSUPP;
 		else
 			esw_setup_vport_dest(dest, &flow_act, esw, esw_attr, i, i, false);
 

From 28d3815a629cbdee660dd1c9de28d77cb3d77917 Mon Sep 17 00:00:00 2001
From: Maor Dickman <maord@nvidia.com>
Date: Wed, 8 Feb 2023 11:37:41 +0200
Subject: [PATCH 322/411] net/mlx5: E-switch, Fix missing set of split_count
 when forward to ovs internal port

Rules with mirror actions are split to two FTEs when the actions after the mirror
action contains pedit, vlan push/pop or ct. Forward to ovs internal port adds
implicit header rewrite (pedit) but missing trigger to do split.

Fix by setting split_count when forwarding to ovs internal port which
will trigger split in mirror rules.

Fixes: 27484f7170ed ("net/mlx5e: Offload tc rules that redirect to ovs internal port")
Signed-off-by: Maor Dickman <maord@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 70b8d2dfa751f..90944bf271ce4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -4304,6 +4304,7 @@ int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv,
 
 	esw_attr->dest_int_port = dest_int_port;
 	esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
+	esw_attr->split_count = out_index;
 
 	/* Forward to root fdb for matching against the new source vport */
 	attr->dest_chain = 0;

From c9668f0b1d28570327dbba189f2c61f6f9e43ae7 Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@nvidia.com>
Date: Sun, 12 Feb 2023 11:01:43 +0200
Subject: [PATCH 323/411] net/mlx5e: Fix cleanup null-ptr deref on encap lock

During module is unloaded while a peer tc flow is still offloaded,
first the peer uplink rep profile is changed to a nic profile, and so
neigh encap lock is destroyed. Next during unload, the VF reps netdevs
are unregistered which causes the original non-peer tc flow to be deleted,
which deletes the peer flow. The peer flow deletion detaches the encap
entry and try to take the already destroyed encap lock, causing the
below trace.

Fix this by clearing peer flows during tc eswitch cleanup
(mlx5e_tc_esw_cleanup()).

Relevant trace:
[ 4316.837128] BUG: kernel NULL pointer dereference, address: 00000000000001d8
[ 4316.842239] RIP: 0010:__mutex_lock+0xb5/0xc40
[ 4316.851897] Call Trace:
[ 4316.852481]  <TASK>
[ 4316.857214]  mlx5e_rep_neigh_entry_release+0x93/0x790 [mlx5_core]
[ 4316.858258]  mlx5e_rep_encap_entry_detach+0xa7/0xf0 [mlx5_core]
[ 4316.859134]  mlx5e_encap_dealloc+0xa3/0xf0 [mlx5_core]
[ 4316.859867]  clean_encap_dests.part.0+0x5c/0xe0 [mlx5_core]
[ 4316.860605]  mlx5e_tc_del_fdb_flow+0x32a/0x810 [mlx5_core]
[ 4316.862609]  __mlx5e_tc_del_fdb_peer_flow+0x1a2/0x250 [mlx5_core]
[ 4316.863394]  mlx5e_tc_del_flow+0x(/0x630 [mlx5_core]
[ 4316.864090]  mlx5e_flow_put+0x5f/0x100 [mlx5_core]
[ 4316.864771]  mlx5e_delete_flower+0x4de/0xa40 [mlx5_core]
[ 4316.865486]  tc_setup_cb_reoffload+0x20/0x80
[ 4316.865905]  fl_reoffload+0x47c/0x510 [cls_flower]
[ 4316.869181]  tcf_block_playback_offloads+0x91/0x1d0
[ 4316.869649]  tcf_block_unbind+0xe7/0x1b0
[ 4316.870049]  tcf_block_offload_cmd.isra.0+0x1ee/0x270
[ 4316.879266]  tcf_block_offload_unbind+0x61/0xa0
[ 4316.879711]  __tcf_block_put+0xa4/0x310

Fixes: 04de7dda7394 ("net/mlx5e: Infrastructure for duplicated offloading of TC flows")
Fixes: 1418ddd96afd ("net/mlx5e: Duplicate offloaded TC eswitch rules under uplink LAG")
Signed-off-by: Paul Blakey <paulb@nvidia.com>
Reviewed-by: Chris Mi <cmi@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 90944bf271ce4..cc35cbc9934d1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -5464,6 +5464,16 @@ int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv)
 
 void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv)
 {
+	struct mlx5e_rep_priv *rpriv;
+	struct mlx5_eswitch *esw;
+	struct mlx5e_priv *priv;
+
+	rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
+	priv = netdev_priv(rpriv->netdev);
+	esw = priv->mdev->priv.eswitch;
+
+	mlx5e_tc_clean_fdb_peer_flows(esw);
+
 	mlx5e_tc_tun_cleanup(uplink_priv->encap);
 
 	mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);

From dd64572490c3d7aab04083db8791fab157a941ed Mon Sep 17 00:00:00 2001
From: Gal Pressman <gal@nvidia.com>
Date: Tue, 24 Jan 2023 17:34:32 +0200
Subject: [PATCH 324/411] net/mlx5e: kTLS, Fix missing error unwind on
 unsupported cipher type

Do proper error unwinding when adding an unsupported TX/RX cipher type.
Move the switch case prior to key creation so there's less to unwind,
and change the goto label name to describe the action performed instead
of what failed.

Fixes: 4960c414db35 ("net/mlx5e: Support 256 bit keys with kTLS device offload")
Signed-off-by: Gal Pressman <gal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mellanox/mlx5/core/en_accel/ktls_rx.c     | 24 ++++++++++---------
 .../mellanox/mlx5/core/en_accel/ktls_tx.c     | 22 +++++++++--------
 2 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
index 4be770443b0cd..9b597cb245985 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c
@@ -621,15 +621,6 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
 	if (unlikely(!priv_rx))
 		return -ENOMEM;
 
-	dek = mlx5_ktls_create_key(priv->tls->dek_pool, crypto_info);
-	if (IS_ERR(dek)) {
-		err = PTR_ERR(dek);
-		goto err_create_key;
-	}
-	priv_rx->dek = dek;
-
-	INIT_LIST_HEAD(&priv_rx->list);
-	spin_lock_init(&priv_rx->lock);
 	switch (crypto_info->cipher_type) {
 	case TLS_CIPHER_AES_GCM_128:
 		priv_rx->crypto_info.crypto_info_128 =
@@ -642,9 +633,20 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
 	default:
 		WARN_ONCE(1, "Unsupported cipher type %u\n",
 			  crypto_info->cipher_type);
-		return -EOPNOTSUPP;
+		err = -EOPNOTSUPP;
+		goto err_cipher_type;
 	}
 
+	dek = mlx5_ktls_create_key(priv->tls->dek_pool, crypto_info);
+	if (IS_ERR(dek)) {
+		err = PTR_ERR(dek);
+		goto err_cipher_type;
+	}
+	priv_rx->dek = dek;
+
+	INIT_LIST_HEAD(&priv_rx->list);
+	spin_lock_init(&priv_rx->lock);
+
 	rxq = mlx5e_ktls_sk_get_rxq(sk);
 	priv_rx->rxq = rxq;
 	priv_rx->sk = sk;
@@ -677,7 +679,7 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk,
 	mlx5e_tir_destroy(&priv_rx->tir);
 err_create_tir:
 	mlx5_ktls_destroy_key(priv->tls->dek_pool, priv_rx->dek);
-err_create_key:
+err_cipher_type:
 	kfree(priv_rx);
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index 60b3e08a10286..0e4c0a093293a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -469,14 +469,6 @@ int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk,
 	if (IS_ERR(priv_tx))
 		return PTR_ERR(priv_tx);
 
-	dek = mlx5_ktls_create_key(priv->tls->dek_pool, crypto_info);
-	if (IS_ERR(dek)) {
-		err = PTR_ERR(dek);
-		goto err_create_key;
-	}
-	priv_tx->dek = dek;
-
-	priv_tx->expected_seq = start_offload_tcp_sn;
 	switch (crypto_info->cipher_type) {
 	case TLS_CIPHER_AES_GCM_128:
 		priv_tx->crypto_info.crypto_info_128 =
@@ -489,8 +481,18 @@ int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk,
 	default:
 		WARN_ONCE(1, "Unsupported cipher type %u\n",
 			  crypto_info->cipher_type);
-		return -EOPNOTSUPP;
+		err = -EOPNOTSUPP;
+		goto err_pool_push;
 	}
+
+	dek = mlx5_ktls_create_key(priv->tls->dek_pool, crypto_info);
+	if (IS_ERR(dek)) {
+		err = PTR_ERR(dek);
+		goto err_pool_push;
+	}
+
+	priv_tx->dek = dek;
+	priv_tx->expected_seq = start_offload_tcp_sn;
 	priv_tx->tx_ctx = tls_offload_ctx_tx(tls_ctx);
 
 	mlx5e_set_ktls_tx_priv_ctx(tls_ctx, priv_tx);
@@ -500,7 +502,7 @@ int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk,
 
 	return 0;
 
-err_create_key:
+err_pool_push:
 	pool_push(pool, priv_tx);
 	return err;
 }

From 031a163f2c476adcb2c01e27a7d323e66174ac11 Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Tue, 28 Feb 2023 10:36:19 +0200
Subject: [PATCH 325/411] net/mlx5: Set BREAK_FW_WAIT flag first when removing
 driver

Currently, BREAK_FW_WAIT flag is set after syncing with fw_reset.
However, fw_reset can call mlx5_load_one() which is waiting for fw
init bit and BREAK_FW_WAIT flag is intended to stop. e.g.: the driver
might wait on a loop it should exit.
Fix it by setting the flag before syncing with fw_reset.

Fixes: 8324a02c342a ("net/mlx5: Add exit route when waiting for FW")
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index f36a3aa4b5c8f..f1de152a61135 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1789,11 +1789,11 @@ static void remove_one(struct pci_dev *pdev)
 	struct mlx5_core_dev *dev  = pci_get_drvdata(pdev);
 	struct devlink *devlink = priv_to_devlink(dev);
 
+	set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
 	/* mlx5_drain_fw_reset() is using devlink APIs. Hence, we must drain
 	 * fw_reset before unregistering the devlink.
 	 */
 	mlx5_drain_fw_reset(dev);
-	set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state);
 	devlink_unregister(devlink);
 	mlx5_sriov_disable(pdev);
 	mlx5_crdump_disable(dev);

From 78dee7befd56987283c13877b834c0aa97ad51b9 Mon Sep 17 00:00:00 2001
From: Adham Faris <afaris@nvidia.com>
Date: Mon, 23 Jan 2023 10:09:01 +0200
Subject: [PATCH 326/411] net/mlx5e: Lower maximum allowed MTU in XSK to match
 XDP prerequisites

XSK redirecting XDP programs require linearity, hence applies
restrictions on the MTU. For PAGE_SIZE=4K, MTU shouldn't exceed 3498.

Features that contradict with XDP such HW-LRO and HW-GRO are enforced
by the driver in advance, during XSK params validation, except for MTU,
which was not enforced before this patch.

This has been spotted during test scenario described below:
Attaching xdpsock program (PAGE_SIZE=4K), with MTU < 3498, detaching
XDP program, changing the MTU to arbitrary value in the range
[3499, 3754], attaching XDP program again, which ended up with failure
since MTU is > 3498.

This commit lowers the XSK MTU limitation to be aligned with XDP MTU
limitation, since XSK socket is meaningless without XDP program.

Signed-off-by: Adham Faris <afaris@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 56fc2aebb9eee..a7f2ab22cc40c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4169,13 +4169,17 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev,
 		struct xsk_buff_pool *xsk_pool =
 			mlx5e_xsk_get_pool(&chs->params, chs->params.xsk, ix);
 		struct mlx5e_xsk_param xsk;
+		int max_xdp_mtu;
 
 		if (!xsk_pool)
 			continue;
 
 		mlx5e_build_xsk_param(xsk_pool, &xsk);
+		max_xdp_mtu = mlx5e_xdp_max_mtu(new_params, &xsk);
 
-		if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev)) {
+		/* Validate XSK params and XDP MTU in advance */
+		if (!mlx5e_validate_xsk_param(new_params, &xsk, mdev) ||
+		    new_params->sw_mtu > max_xdp_mtu) {
 			u32 hr = mlx5e_get_linear_rq_headroom(new_params, &xsk);
 			int max_mtu_frame, max_mtu_page, max_mtu;
 
@@ -4185,9 +4189,9 @@ static bool mlx5e_xsk_validate_mtu(struct net_device *netdev,
 			 */
 			max_mtu_frame = MLX5E_HW2SW_MTU(new_params, xsk.chunk_size - hr);
 			max_mtu_page = MLX5E_HW2SW_MTU(new_params, SKB_MAX_HEAD(0));
-			max_mtu = min(max_mtu_frame, max_mtu_page);
+			max_mtu = min3(max_mtu_frame, max_mtu_page, max_xdp_mtu);
 
-			netdev_err(netdev, "MTU %d is too big for an XSK running on channel %u. Try MTU <= %d\n",
+			netdev_err(netdev, "MTU %d is too big for an XSK running on channel %u or its redirection XDP program. Try MTU <= %d\n",
 				   new_params->sw_mtu, ix, max_mtu);
 			return false;
 		}

From d1a0075ad6b693c2bd41e7aedb8a4f3b74b6999c Mon Sep 17 00:00:00 2001
From: Oz Shlomo <ozsh@nvidia.com>
Date: Thu, 16 Feb 2023 12:34:21 +0000
Subject: [PATCH 327/411] net/sched: TC, fix raw counter initialization

Freed counters may be reused by fs core.
As such, raw counters may not be initialized to zero.

Cache the counter values when the action stats object is initialized to
have a proper base value for calculating the difference from the previous
query.

Fixes: 2b68d659a704 ("net/mlx5e: TC, support per action stats")
Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
Reviewed-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c
index 626cb7470fa57..07c1895a2b234 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act_stats.c
@@ -64,6 +64,7 @@ mlx5e_tc_act_stats_add(struct mlx5e_tc_act_stats_handle *handle,
 {
 	struct mlx5e_tc_act_stats *act_stats, *old_act_stats;
 	struct rhashtable *ht = &handle->ht;
+	u64 lastused;
 	int err = 0;
 
 	act_stats = kvzalloc(sizeof(*act_stats), GFP_KERNEL);
@@ -73,6 +74,10 @@ mlx5e_tc_act_stats_add(struct mlx5e_tc_act_stats_handle *handle,
 	act_stats->tc_act_cookie = act_cookie;
 	act_stats->counter = counter;
 
+	mlx5_fc_query_cached_raw(counter,
+				 &act_stats->lastbytes,
+				 &act_stats->lastpackets, &lastused);
+
 	rcu_read_lock();
 	old_act_stats = rhashtable_lookup_get_insert_fast(ht,
 							  &act_stats->hash,

From 1166add424dae14ccdb64a6eefbf26766c9d0ef2 Mon Sep 17 00:00:00 2001
From: Oz Shlomo <ozsh@nvidia.com>
Date: Tue, 21 Feb 2023 14:46:56 +0000
Subject: [PATCH 328/411] net/mlx5e: TC, fix missing error code

Missing error code when mlx5e_tc_act_stats_create fails

Fixes: d13674b1d14c ("net/mlx5e: TC, map tc action cookie to a hw counter")
Reported-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
Reviewed-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index cc35cbc9934d1..d2e191ee07047 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -5305,8 +5305,10 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
 	mlx5e_tc_debugfs_init(tc, mlx5e_fs_get_debugfs_root(priv->fs));
 
 	tc->action_stats_handle = mlx5e_tc_act_stats_create();
-	if (IS_ERR(tc->action_stats_handle))
+	if (IS_ERR(tc->action_stats_handle)) {
+		err = PTR_ERR(tc->action_stats_handle);
 		goto err_act_stats;
+	}
 
 	return 0;
 
@@ -5441,8 +5443,10 @@ int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv)
 	}
 
 	uplink_priv->action_stats_handle = mlx5e_tc_act_stats_create();
-	if (IS_ERR(uplink_priv->action_stats_handle))
+	if (IS_ERR(uplink_priv->action_stats_handle)) {
+		err = PTR_ERR(uplink_priv->action_stats_handle);
 		goto err_action_counter;
+	}
 
 	return 0;
 

From b23bf10cca59b2955fa5c2b5e4f753962b4f88ca Mon Sep 17 00:00:00 2001
From: Oz Shlomo <ozsh@nvidia.com>
Date: Tue, 21 Feb 2023 15:24:39 +0000
Subject: [PATCH 329/411] net/mlx5e: TC, fix cloned flow attribute

Currently the cloned flow attr resets the original tc action cookies
count.
Fix that by resetting the cloned flow attribute.

Fixes: cca7eac13856 ("net/mlx5e: TC, store tc action cookies per attr")
Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
Reviewed-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index d2e191ee07047..6bfed633343ab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -3752,7 +3752,7 @@ mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr,
 	parse_attr->filter_dev = attr->parse_attr->filter_dev;
 	attr2->action = 0;
 	attr2->counter = NULL;
-	attr->tc_act_cookies_count = 0;
+	attr2->tc_act_cookies_count = 0;
 	attr2->flags = 0;
 	attr2->parse_attr = parse_attr;
 	attr2->dest_chain = 0;

From c7b7c64ab5821352db0b3fbaa92773e5a60bfaa7 Mon Sep 17 00:00:00 2001
From: Oz Shlomo <ozsh@nvidia.com>
Date: Wed, 22 Feb 2023 10:03:36 +0000
Subject: [PATCH 330/411] net/mlx5e: TC, Remove error message log print

The cited commit attempts to update the hw stats when dumping tc actions.
However, the driver may be called to update the stats of a police action
that may not be in hardware. In such cases the driver will fail to lookup
the police action object and will output an error message both to extack
and dmesg. The dmesg error is confusing as it may not indicate an actual
error.

Remove the dmesg error.

Fixes: 2b68d659a704 ("net/mlx5e: TC, support per action stats")
Signed-off-by: Oz Shlomo <ozsh@nvidia.com>
Reviewed-by: Paul Blakey <paulb@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c
index c4378afdec09e..1bd1c94fb9776 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/police.c
@@ -178,7 +178,6 @@ tc_act_police_stats(struct mlx5e_priv *priv,
 	meter = mlx5e_tc_meter_get(priv->mdev, &params);
 	if (IS_ERR(meter)) {
 		NL_SET_ERR_MSG_MOD(fl_act->extack, "Failed to get flow meter");
-		mlx5_core_err(priv->mdev, "Failed to get flow meter %d\n", params.index);
 		return PTR_ERR(meter);
 	}
 

From 6acd352dfee558194643adbed7e849fe80fd1b93 Mon Sep 17 00:00:00 2001
From: Li zeming <zeming@nfschina.com>
Date: Sat, 18 Mar 2023 02:25:38 +0800
Subject: [PATCH 331/411] io_uring: rsrc: Optimize return value variable 'ret'

The initialization assignment of the variable ret is changed to 0, only
in 'goto fail;' Use the ret variable as the function return value.

Signed-off-by: Li zeming <zeming@nfschina.com>
Link: https://lore.kernel.org/r/20230317182538.3027-1-zeming@nfschina.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/rsrc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 056f40946ff68..09a16d709cb55 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -410,7 +410,7 @@ __cold static int io_rsrc_data_alloc(struct io_ring_ctx *ctx,
 				     unsigned nr, struct io_rsrc_data **pdata)
 {
 	struct io_rsrc_data *data;
-	int ret = -ENOMEM;
+	int ret = 0;
 	unsigned i;
 
 	data = kzalloc(sizeof(*data), GFP_KERNEL);

From a5bb73b3f5db1a4e91402ad132b59b13d2651ed9 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Tue, 14 Mar 2023 02:31:45 -0700
Subject: [PATCH 332/411] hwmon: (adm1266) Set `can_sleep` flag for GPIO chip

The adm1266 driver uses I2C bus access in its GPIO chip `set` and `get`
implementation. This means these functions can sleep and the GPIO chip
should set the `can_sleep` property to true.

This will ensure that a warning is printed when trying to set or get the
GPIO value from a context that potentially can't sleep.

Fixes: d98dfad35c38 ("hwmon: (pmbus/adm1266) Add support for GPIOs")
Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Link: https://lore.kernel.org/r/20230314093146.2443845-1-lars@metafoo.de
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/adm1266.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hwmon/pmbus/adm1266.c b/drivers/hwmon/pmbus/adm1266.c
index ec5f932fc6f0f..1ac2b2f4c5705 100644
--- a/drivers/hwmon/pmbus/adm1266.c
+++ b/drivers/hwmon/pmbus/adm1266.c
@@ -301,6 +301,7 @@ static int adm1266_config_gpio(struct adm1266_data *data)
 	data->gc.label = name;
 	data->gc.parent = &data->client->dev;
 	data->gc.owner = THIS_MODULE;
+	data->gc.can_sleep = true;
 	data->gc.base = -1;
 	data->gc.names = data->gpio_names;
 	data->gc.ngpio = ARRAY_SIZE(data->gpio_names);

From ab00709310eedcd8dae0df1f66d332f9bc64c99e Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Tue, 14 Mar 2023 02:31:46 -0700
Subject: [PATCH 333/411] hwmon: (ltc2992) Set `can_sleep` flag for GPIO chip

The ltc2992 drivers uses a mutex and I2C bus access in its GPIO chip `set`
and `get` implementation. This means these functions can sleep and the GPIO
chip should set the `can_sleep` property to true.

This will ensure that a warning is printed when trying to set or get the
GPIO value from a context that potentially can't sleep.

Fixes: 9ca26df1ba25 ("hwmon: (ltc2992) Add support for GPIOs.")
Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Link: https://lore.kernel.org/r/20230314093146.2443845-2-lars@metafoo.de
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/ltc2992.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hwmon/ltc2992.c b/drivers/hwmon/ltc2992.c
index 88514152d9306..69341de397cb9 100644
--- a/drivers/hwmon/ltc2992.c
+++ b/drivers/hwmon/ltc2992.c
@@ -323,6 +323,7 @@ static int ltc2992_config_gpio(struct ltc2992_state *st)
 	st->gc.label = name;
 	st->gc.parent = &st->client->dev;
 	st->gc.owner = THIS_MODULE;
+	st->gc.can_sleep = true;
 	st->gc.base = -1;
 	st->gc.names = st->gpio_names;
 	st->gc.ngpio = ARRAY_SIZE(st->gpio_names);

From 7c10131803e45269ddc6c817f19ed649110f3cae Mon Sep 17 00:00:00 2001
From: Shawn Bohrer <sbohrer@cloudflare.com>
Date: Tue, 14 Mar 2023 10:33:51 -0500
Subject: [PATCH 334/411] veth: Fix use after free in XDP_REDIRECT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 718a18a0c8a6 ("veth: Rework veth_xdp_rcv_skb in order
to accept non-linear skb") introduced a bug where it tried to
use pskb_expand_head() if the headroom was less than
XDP_PACKET_HEADROOM.  This however uses kmalloc to expand the head,
which will later allow consume_skb() to free the skb while is it still
in use by AF_XDP.

Previously if the headroom was less than XDP_PACKET_HEADROOM we
continued on to allocate a new skb from pages so this restores that
behavior.

BUG: KASAN: use-after-free in __xsk_rcv+0x18d/0x2c0
Read of size 78 at addr ffff888976250154 by task napi/iconduit-g/148640

CPU: 5 PID: 148640 Comm: napi/iconduit-g Kdump: loaded Tainted: G           O       6.1.4-cloudflare-kasan-2023.1.2 #1
Hardware name: Quanta Computer Inc. QuantaPlex T41S-2U/S2S-MB, BIOS S2S_3B10.03 06/21/2018
Call Trace:
  <TASK>
  dump_stack_lvl+0x34/0x48
  print_report+0x170/0x473
  ? __xsk_rcv+0x18d/0x2c0
  kasan_report+0xad/0x130
  ? __xsk_rcv+0x18d/0x2c0
  kasan_check_range+0x149/0x1a0
  memcpy+0x20/0x60
  __xsk_rcv+0x18d/0x2c0
  __xsk_map_redirect+0x1f3/0x490
  ? veth_xdp_rcv_skb+0x89c/0x1ba0 [veth]
  xdp_do_redirect+0x5ca/0xd60
  veth_xdp_rcv_skb+0x935/0x1ba0 [veth]
  ? __netif_receive_skb_list_core+0x671/0x920
  ? veth_xdp+0x670/0x670 [veth]
  veth_xdp_rcv+0x304/0xa20 [veth]
  ? do_xdp_generic+0x150/0x150
  ? veth_xdp_rcv_one+0xde0/0xde0 [veth]
  ? _raw_spin_lock_bh+0xe0/0xe0
  ? newidle_balance+0x887/0xe30
  ? __perf_event_task_sched_in+0xdb/0x800
  veth_poll+0x139/0x571 [veth]
  ? veth_xdp_rcv+0xa20/0xa20 [veth]
  ? _raw_spin_unlock+0x39/0x70
  ? finish_task_switch.isra.0+0x17e/0x7d0
  ? __switch_to+0x5cf/0x1070
  ? __schedule+0x95b/0x2640
  ? io_schedule_timeout+0x160/0x160
  __napi_poll+0xa1/0x440
  napi_threaded_poll+0x3d1/0x460
  ? __napi_poll+0x440/0x440
  ? __kthread_parkme+0xc6/0x1f0
  ? __napi_poll+0x440/0x440
  kthread+0x2a2/0x340
  ? kthread_complete_and_exit+0x20/0x20
  ret_from_fork+0x22/0x30
  </TASK>

Freed by task 148640:
  kasan_save_stack+0x23/0x50
  kasan_set_track+0x21/0x30
  kasan_save_free_info+0x2a/0x40
  ____kasan_slab_free+0x169/0x1d0
  slab_free_freelist_hook+0xd2/0x190
  __kmem_cache_free+0x1a1/0x2f0
  skb_release_data+0x449/0x600
  consume_skb+0x9f/0x1c0
  veth_xdp_rcv_skb+0x89c/0x1ba0 [veth]
  veth_xdp_rcv+0x304/0xa20 [veth]
  veth_poll+0x139/0x571 [veth]
  __napi_poll+0xa1/0x440
  napi_threaded_poll+0x3d1/0x460
  kthread+0x2a2/0x340
  ret_from_fork+0x22/0x30

The buggy address belongs to the object at ffff888976250000
  which belongs to the cache kmalloc-2k of size 2048
The buggy address is located 340 bytes inside of
  2048-byte region [ffff888976250000, ffff888976250800)

The buggy address belongs to the physical page:
page:00000000ae18262a refcount:2 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x976250
head:00000000ae18262a order:3 compound_mapcount:0 compound_pincount:0
flags: 0x2ffff800010200(slab|head|node=0|zone=2|lastcpupid=0x1ffff)
raw: 002ffff800010200 0000000000000000 dead000000000122 ffff88810004cf00
raw: 0000000000000000 0000000080080008 00000002ffffffff 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
  ffff888976250000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
  ffff888976250080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> ffff888976250100: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                                                  ^
  ffff888976250180: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
  ffff888976250200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb

Fixes: 718a18a0c8a6 ("veth: Rework veth_xdp_rcv_skb in order to accept non-linear skb")
Signed-off-by: Shawn Bohrer <sbohrer@cloudflare.com>
Acked-by: Lorenzo Bianconi <lorenzo@kernel.org>
Acked-by: Toshiaki Makita <toshiaki.makita1@gmail.com>
Acked-by: Toke Høiland-Jørgensen <toke@kernel.org>
Link: https://lore.kernel.org/r/20230314153351.2201328-1-sbohrer@cloudflare.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/veth.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 4da74ac27f9a2..a30a66cace143 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -708,7 +708,8 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
 	u32 frame_sz;
 
 	if (skb_shared(skb) || skb_head_is_locked(skb) ||
-	    skb_shinfo(skb)->nr_frags) {
+	    skb_shinfo(skb)->nr_frags ||
+	    skb_headroom(skb) < XDP_PACKET_HEADROOM) {
 		u32 size, len, max_head_size, off;
 		struct sk_buff *nskb;
 		struct page *page;
@@ -773,9 +774,6 @@ static int veth_convert_skb_to_xdp_buff(struct veth_rq *rq,
 
 		consume_skb(skb);
 		skb = nskb;
-	} else if (skb_headroom(skb) < XDP_PACKET_HEADROOM &&
-		   pskb_expand_head(skb, VETH_XDP_HEADROOM, 0, GFP_ATOMIC)) {
-		goto drop;
 	}
 
 	/* SKB "head" area always have tailroom for skb_shared_info */

From cd356010ce4c69ac7e1a40586112df24d22c6a4b Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 14 Mar 2023 17:30:25 +0200
Subject: [PATCH 335/411] net: phy: mscc: fix deadlock in
 phy_ethtool_{get,set}_wol()

Since the blamed commit, phy_ethtool_get_wol() and phy_ethtool_set_wol()
acquire phydev->lock, but the mscc phy driver implementations,
vsc85xx_wol_get() and vsc85xx_wol_set(), acquire the same lock as well,
resulting in a deadlock.

$ ip link set swp3 down
============================================
WARNING: possible recursive locking detected
mscc_felix 0000:00:00.5 swp3: Link is Down
--------------------------------------------
ip/375 is trying to acquire lock:
ffff3d7e82e987a8 (&dev->lock){+.+.}-{4:4}, at: vsc85xx_wol_get+0x2c/0xf4

but task is already holding lock:
ffff3d7e82e987a8 (&dev->lock){+.+.}-{4:4}, at: phy_ethtool_get_wol+0x3c/0x6c

other info that might help us debug this:
 Possible unsafe locking scenario:

       CPU0
       ----
  lock(&dev->lock);
  lock(&dev->lock);

 *** DEADLOCK ***

 May be due to missing lock nesting notation

2 locks held by ip/375:
 #0: ffffd43b2a955788 (rtnl_mutex){+.+.}-{4:4}, at: rtnetlink_rcv_msg+0x144/0x58c
 #1: ffff3d7e82e987a8 (&dev->lock){+.+.}-{4:4}, at: phy_ethtool_get_wol+0x3c/0x6c

Call trace:
 __mutex_lock+0x98/0x454
 mutex_lock_nested+0x2c/0x38
 vsc85xx_wol_get+0x2c/0xf4
 phy_ethtool_get_wol+0x50/0x6c
 phy_suspend+0x84/0xcc
 phy_state_machine+0x1b8/0x27c
 phy_stop+0x70/0x154
 phylink_stop+0x34/0xc0
 dsa_port_disable_rt+0x2c/0xa4
 dsa_slave_close+0x38/0xec
 __dev_close_many+0xc8/0x16c
 __dev_change_flags+0xdc/0x218
 dev_change_flags+0x24/0x6c
 do_setlink+0x234/0xea4
 __rtnl_newlink+0x46c/0x878
 rtnl_newlink+0x50/0x7c
 rtnetlink_rcv_msg+0x16c/0x58c

Removing the mutex_lock(&phydev->lock) calls from the driver restores
the functionality.

Fixes: 2f987d486610 ("net: phy: Add locks to ethtool functions")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://lore.kernel.org/r/20230314153025.2372970-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/mscc/mscc_main.c | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c
index 8a13b1ad9a330..62bf99e45af16 100644
--- a/drivers/net/phy/mscc/mscc_main.c
+++ b/drivers/net/phy/mscc/mscc_main.c
@@ -280,12 +280,9 @@ static int vsc85xx_wol_set(struct phy_device *phydev,
 	u16 pwd[3] = {0, 0, 0};
 	struct ethtool_wolinfo *wol_conf = wol;
 
-	mutex_lock(&phydev->lock);
 	rc = phy_select_page(phydev, MSCC_PHY_PAGE_EXTENDED_2);
-	if (rc < 0) {
-		rc = phy_restore_page(phydev, rc, rc);
-		goto out_unlock;
-	}
+	if (rc < 0)
+		return phy_restore_page(phydev, rc, rc);
 
 	if (wol->wolopts & WAKE_MAGIC) {
 		/* Store the device address for the magic packet */
@@ -323,7 +320,7 @@ static int vsc85xx_wol_set(struct phy_device *phydev,
 
 	rc = phy_restore_page(phydev, rc, rc > 0 ? 0 : rc);
 	if (rc < 0)
-		goto out_unlock;
+		return rc;
 
 	if (wol->wolopts & WAKE_MAGIC) {
 		/* Enable the WOL interrupt */
@@ -331,22 +328,19 @@ static int vsc85xx_wol_set(struct phy_device *phydev,
 		reg_val |= MII_VSC85XX_INT_MASK_WOL;
 		rc = phy_write(phydev, MII_VSC85XX_INT_MASK, reg_val);
 		if (rc)
-			goto out_unlock;
+			return rc;
 	} else {
 		/* Disable the WOL interrupt */
 		reg_val = phy_read(phydev, MII_VSC85XX_INT_MASK);
 		reg_val &= (~MII_VSC85XX_INT_MASK_WOL);
 		rc = phy_write(phydev, MII_VSC85XX_INT_MASK, reg_val);
 		if (rc)
-			goto out_unlock;
+			return rc;
 	}
 	/* Clear WOL iterrupt status */
 	reg_val = phy_read(phydev, MII_VSC85XX_INT_STATUS);
 
-out_unlock:
-	mutex_unlock(&phydev->lock);
-
-	return rc;
+	return 0;
 }
 
 static void vsc85xx_wol_get(struct phy_device *phydev,
@@ -358,10 +352,9 @@ static void vsc85xx_wol_get(struct phy_device *phydev,
 	u16 pwd[3] = {0, 0, 0};
 	struct ethtool_wolinfo *wol_conf = wol;
 
-	mutex_lock(&phydev->lock);
 	rc = phy_select_page(phydev, MSCC_PHY_PAGE_EXTENDED_2);
 	if (rc < 0)
-		goto out_unlock;
+		goto out_restore_page;
 
 	reg_val = __phy_read(phydev, MSCC_PHY_WOL_MAC_CONTROL);
 	if (reg_val & SECURE_ON_ENABLE)
@@ -377,9 +370,8 @@ static void vsc85xx_wol_get(struct phy_device *phydev,
 		}
 	}
 
-out_unlock:
+out_restore_page:
 	phy_restore_page(phydev, rc, rc > 0 ? 0 : rc);
-	mutex_unlock(&phydev->lock);
 }
 
 #if IS_ENABLED(CONFIG_OF_MDIO)

From 5bc9e2d43f86105a95f86fa096fb4e517bb0ce73 Mon Sep 17 00:00:00 2001
From: Ondrej Zary <linux@zary.sk>
Date: Tue, 14 Mar 2023 23:58:05 +0100
Subject: [PATCH 336/411] ata: pata_parport: fix memory leaks

When ida_alloc() fails, "pi" is not freed although the misleading
comment says otherwise.
Move the ida_alloc() call up so we really don't have to free "pi" in
case of ida_alloc() failure.

Also move ida_free() call from pi_remove_one() to
pata_parport_dev_release(). It was dereferencing already freed dev
pointer.

Testing revealed leak even in non-failure case which was tracked down
to missing put_device() call after bus_find_device_by_name(). As a
result, pata_parport_dev_release() was never called.

Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <error27@gmail.com>
Link: https://lore.kernel.org/r/202303111822.IHNchbkp-lkp@intel.com/
Signed-off-by: Ondrej Zary <linux@zary.sk>
Signed-off-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
---
 drivers/ata/pata_parport/pata_parport.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/drivers/ata/pata_parport/pata_parport.c b/drivers/ata/pata_parport/pata_parport.c
index 31c9677a45e33..c1576d943b436 100644
--- a/drivers/ata/pata_parport/pata_parport.c
+++ b/drivers/ata/pata_parport/pata_parport.c
@@ -381,6 +381,7 @@ static void pata_parport_dev_release(struct device *dev)
 {
 	struct pi_adapter *pi = container_of(dev, struct pi_adapter, dev);
 
+	ida_free(&pata_parport_bus_dev_ids, dev->id);
 	kfree(pi);
 }
 
@@ -433,23 +434,27 @@ static struct pi_adapter *pi_init_one(struct parport *parport,
 	if (bus_for_each_dev(&pata_parport_bus_type, NULL, &match, pi_find_dev))
 		return NULL;
 
+	id = ida_alloc(&pata_parport_bus_dev_ids, GFP_KERNEL);
+	if (id < 0)
+		return NULL;
+
 	pi = kzalloc(sizeof(struct pi_adapter), GFP_KERNEL);
-	if (!pi)
+	if (!pi) {
+		ida_free(&pata_parport_bus_dev_ids, id);
 		return NULL;
+	}
 
 	/* set up pi->dev before pi_probe_unit() so it can use dev_printk() */
 	pi->dev.parent = &pata_parport_bus;
 	pi->dev.bus = &pata_parport_bus_type;
 	pi->dev.driver = &pr->driver;
 	pi->dev.release = pata_parport_dev_release;
-	id = ida_alloc(&pata_parport_bus_dev_ids, GFP_KERNEL);
-	if (id < 0)
-		return NULL; /* pata_parport_dev_release will do kfree(pi) */
 	pi->dev.id = id;
 	dev_set_name(&pi->dev, "pata_parport.%u", pi->dev.id);
 	if (device_register(&pi->dev)) {
 		put_device(&pi->dev);
-		goto out_ida_free;
+		/* pata_parport_dev_release will do ida_free(dev->id) and kfree(pi) */
+		return NULL;
 	}
 
 	pi->proto = pr;
@@ -464,8 +469,7 @@ static struct pi_adapter *pi_init_one(struct parport *parport,
 	pi->port = parport->base;
 
 	par_cb.private = pi;
-	pi->pardev = parport_register_dev_model(parport, DRV_NAME, &par_cb,
-						pi->dev.id);
+	pi->pardev = parport_register_dev_model(parport, DRV_NAME, &par_cb, id);
 	if (!pi->pardev)
 		goto out_module_put;
 
@@ -501,8 +505,7 @@ static struct pi_adapter *pi_init_one(struct parport *parport,
 	module_put(pi->proto->owner);
 out_unreg_dev:
 	device_unregister(&pi->dev);
-out_ida_free:
-	ida_free(&pata_parport_bus_dev_ids, pi->dev.id);
+	/* pata_parport_dev_release will do ida_free(dev->id) and kfree(pi) */
 	return NULL;
 }
 
@@ -627,8 +630,7 @@ static void pi_remove_one(struct device *dev)
 	pi_disconnect(pi);
 	pi_release(pi);
 	device_unregister(dev);
-	ida_free(&pata_parport_bus_dev_ids, dev->id);
-	/* pata_parport_dev_release will do kfree(pi) */
+	/* pata_parport_dev_release will do ida_free(dev->id) and kfree(pi) */
 }
 
 static ssize_t delete_device_store(struct bus_type *bus, const char *buf,
@@ -644,6 +646,7 @@ static ssize_t delete_device_store(struct bus_type *bus, const char *buf,
 	}
 
 	pi_remove_one(dev);
+	put_device(dev);
 	mutex_unlock(&pi_mutex);
 
 	return count;

From 7ad2c39860dc0ca01d2152232224d2124e160fe3 Mon Sep 17 00:00:00 2001
From: Yu Zhe <yuzhe@nfschina.com>
Date: Thu, 16 Mar 2023 16:39:54 +0800
Subject: [PATCH 337/411] xen: remove unnecessary (void*) conversions

Pointer variables of void * type do not require type cast.

Signed-off-by: Yu Zhe <yuzhe@nfschina.com>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20230316083954.4223-1-yuzhe@nfschina.com
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/xen/xenfs/xensyms.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/xen/xenfs/xensyms.c b/drivers/xen/xenfs/xensyms.c
index c6c73a33c44d5..b799bc759c15f 100644
--- a/drivers/xen/xenfs/xensyms.c
+++ b/drivers/xen/xenfs/xensyms.c
@@ -64,7 +64,7 @@ static int xensyms_next_sym(struct xensyms *xs)
 
 static void *xensyms_start(struct seq_file *m, loff_t *pos)
 {
-	struct xensyms *xs = (struct xensyms *)m->private;
+	struct xensyms *xs = m->private;
 
 	xs->op.u.symdata.symnum = *pos;
 
@@ -76,7 +76,7 @@ static void *xensyms_start(struct seq_file *m, loff_t *pos)
 
 static void *xensyms_next(struct seq_file *m, void *p, loff_t *pos)
 {
-	struct xensyms *xs = (struct xensyms *)m->private;
+	struct xensyms *xs = m->private;
 
 	xs->op.u.symdata.symnum = ++(*pos);
 
@@ -88,7 +88,7 @@ static void *xensyms_next(struct seq_file *m, void *p, loff_t *pos)
 
 static int xensyms_show(struct seq_file *m, void *p)
 {
-	struct xensyms *xs = (struct xensyms *)m->private;
+	struct xensyms *xs = m->private;
 	struct xenpf_symdata *symdata = &xs->op.u.symdata;
 
 	seq_printf(m, "%016llx %c %s\n", symdata->address,
@@ -120,7 +120,7 @@ static int xensyms_open(struct inode *inode, struct file *file)
 		return ret;
 
 	m = file->private_data;
-	xs = (struct xensyms *)m->private;
+	xs = m->private;
 
 	xs->namelen = XEN_KSYM_NAME_LEN + 1;
 	xs->name = kzalloc(xs->namelen, GFP_KERNEL);
@@ -138,7 +138,7 @@ static int xensyms_open(struct inode *inode, struct file *file)
 static int xensyms_release(struct inode *inode, struct file *file)
 {
 	struct seq_file *m = file->private_data;
-	struct xensyms *xs = (struct xensyms *)m->private;
+	struct xensyms *xs = m->private;
 
 	kfree(xs->name);
 	return seq_release_private(inode, file);

From cbebd68f59f03633469f3ecf9bea99cd6cce3854 Mon Sep 17 00:00:00 2001
From: Nikita Zhandarovich <n.zhandarovich@fintech.ru>
Date: Mon, 6 Mar 2023 08:06:56 -0800
Subject: [PATCH 338/411] x86/mm: Fix use of uninitialized buffer in
 sme_enable()

cmdline_find_option() may fail before doing any initialization of
the buffer array. This may lead to unpredictable results when the same
buffer is used later in calls to strncmp() function.  Fix the issue by
returning early if cmdline_find_option() returns an error.

Found by Linux Verification Center (linuxtesting.org) with static
analysis tool SVACE.

Fixes: aca20d546214 ("x86/mm: Add support to make use of Secure Memory Encryption")
Signed-off-by: Nikita Zhandarovich <n.zhandarovich@fintech.ru>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
Cc: <stable@kernel.org>
Link: https://lore.kernel.org/r/20230306160656.14844-1-n.zhandarovich@fintech.ru
---
 arch/x86/mm/mem_encrypt_identity.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index 88cccd65029db..c6efcf559d882 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -600,7 +600,8 @@ void __init sme_enable(struct boot_params *bp)
 	cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
 				     ((u64)bp->ext_cmd_line_ptr << 32));
 
-	cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer));
+	if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0)
+		return;
 
 	if (!strncmp(buffer, cmdline_on, sizeof(buffer)))
 		sme_me_mask = me_mask;

From 5da28edd7bd5518f97175ecea77615bb729a7a28 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 16 Mar 2023 12:11:42 +0000
Subject: [PATCH 339/411] io_uring/msg_ring: let target know allocated index

msg_ring requests transferring files support auto index selection via
IORING_FILE_INDEX_ALLOC, however they don't return the selected index
to the target ring and there is no other good way for the userspace to
know where is the receieved file.

Return the index for allocated slots and 0 otherwise, which is
consistent with other fixed file installing requests.

Cc: stable@vger.kernel.org # v6.0+
Fixes: e6130eba8a848 ("io_uring: add support for passing fixed file descriptors")
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://github.com/axboe/liburing/issues/809
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/msg_ring.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index 8803c0979e2a9..85fd7ce5f05b8 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -202,7 +202,7 @@ static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flag
 	 * completes with -EOVERFLOW, then the sender must ensure that a
 	 * later IORING_OP_MSG_RING delivers the message.
 	 */
-	if (!io_post_aux_cqe(target_ctx, msg->user_data, msg->len, 0))
+	if (!io_post_aux_cqe(target_ctx, msg->user_data, ret, 0))
 		ret = -EOVERFLOW;
 out_unlock:
 	io_double_unlock_ctx(target_ctx);
@@ -229,6 +229,8 @@ static int io_msg_send_fd(struct io_kiocb *req, unsigned int issue_flags)
 	struct io_ring_ctx *ctx = req->ctx;
 	struct file *src_file = msg->src_file;
 
+	if (msg->len)
+		return -EINVAL;
 	if (target_ctx == ctx)
 		return -EINVAL;
 	if (target_ctx->flags & IORING_SETUP_R_DISABLED)

From 81f59a26f3d59c6aeb137b7b5546848779222c65 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 16 Mar 2023 00:50:17 +0900
Subject: [PATCH 340/411] kbuild: rpm-pkg: move source components to
 rpmbuild/SOURCES

Prepare to add more files to the source RPM.

Also, fix the build error when KCONFIG_CONFIG is set:
  error: Bad file: ./.config: No such file or directory

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 .gitignore               |  1 +
 Makefile                 |  2 +-
 scripts/Makefile.package |  2 +-
 scripts/package/mkspec   | 12 ++++++++----
 4 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/.gitignore b/.gitignore
index 8fe465f251c03..70ec6037fa7ac 100644
--- a/.gitignore
+++ b/.gitignore
@@ -78,6 +78,7 @@ modules.order
 # RPM spec file (make rpm-pkg)
 #
 /*.spec
+/rpmbuild/
 
 #
 # Debian directory (make deb-pkg)
diff --git a/Makefile b/Makefile
index d0a0ba8e5a2e6..dfff9f8d28e54 100644
--- a/Makefile
+++ b/Makefile
@@ -1605,7 +1605,7 @@ MRPROPER_FILES += include/config include/generated          \
 		  certs/signing_key.pem \
 		  certs/x509.genkey \
 		  vmlinux-gdb.py \
-		  *.spec \
+		  *.spec rpmbuild \
 		  rust/libmacros.so
 
 # clean - Delete most, but leave enough to build external modules
diff --git a/scripts/Makefile.package b/scripts/Makefile.package
index b941e6341b364..a0355bdeebff4 100644
--- a/scripts/Makefile.package
+++ b/scripts/Makefile.package
@@ -89,7 +89,7 @@ PHONY += srcrpm-pkg
 srcrpm-pkg: linux.tar.gz
 	$(CONFIG_SHELL) $(MKSPEC) >$(objtree)/kernel.spec
 	+rpmbuild $(RPMOPTS) --target $(UTS_MACHINE)-linux -bs kernel.spec \
-	--define='_smp_mflags %{nil}' --define='_sourcedir .' --define='_srcrpmdir .'
+	--define='_smp_mflags %{nil}' --define='_sourcedir rpmbuild/SOURCES' --define='_srcrpmdir .'
 
 # binrpm-pkg
 # ---------------------------------------------------------------------------
diff --git a/scripts/package/mkspec b/scripts/package/mkspec
index 3c550960dd395..5f007137f5a02 100755
--- a/scripts/package/mkspec
+++ b/scripts/package/mkspec
@@ -15,15 +15,19 @@ if [ "$1" = prebuilt ]; then
 	MAKE="$MAKE -f $srctree/Makefile"
 else
 	S=
+
+	mkdir -p rpmbuild/SOURCES
+	cp linux.tar.gz rpmbuild/SOURCES
+	cp "${KCONFIG_CONFIG}" rpmbuild/SOURCES/config
 fi
 
-if grep -q CONFIG_MODULES=y .config; then
+if grep -q CONFIG_MODULES=y include/config/auto.conf; then
 	M=
 else
 	M=DEL
 fi
 
-if grep -q CONFIG_DRM=y .config; then
+if grep -q CONFIG_DRM=y include/config/auto.conf; then
 	PROVIDES=kernel-drm
 fi
 
@@ -48,7 +52,7 @@ sed -e '/^DEL/d' -e 's/^\t*//' <<EOF
 	Vendor: The Linux Community
 	URL: https://www.kernel.org
 $S	Source0: linux.tar.gz
-$S	Source1: .config
+$S	Source1: config
 	Provides: $PROVIDES
 $S	BuildRequires: bc binutils bison dwarves
 $S	BuildRequires: (elfutils-libelf-devel or libelf-devel) flex
@@ -85,7 +89,7 @@ $S$M	against the $__KERNELRELEASE kernel package.
 $S$M
 $S	%prep
 $S	%setup -q -n linux
-$S	cp %{SOURCE1} .
+$S	cp %{SOURCE1} .config
 $S
 $S	%build
 $S	$MAKE %{?_smp_mflags} KERNELRELEASE=$KERNELRELEASE KBUILD_BUILD_VERSION=%{release}

From 05e96e96a315fa49faca4da2aedd1761a218b616 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 16 Mar 2023 00:50:18 +0900
Subject: [PATCH 341/411] kbuild: use git-archive for source package creation

Commit 5c3d1d0abb12 ("kbuild: add a tool to list files ignored by git")
added a new tool, scripts/list-gitignored. My intention was to create
source packages without cleaning the source tree, without relying on git.

Linus strongly objected to it, and suggested using 'git archive' instead.
[1] [2] [3]

This commit goes in that direction - Remove scripts/list-gitignored.c
and rewrites Makefiles and scripts to use 'git archive' for building
Debian and RPM source packages. It also makes 'make perf-tar*-src-pkg'
use 'git archive' again.

Going forward, building source packages is only possible in a git-managed
tree. Building binary packages does not require git.

[1]: https://lore.kernel.org/lkml/CAHk-=wi49sMaC7vY1yMagk7eqLK=1jHeHQ=yZ_k45P=xBccnmA@mail.gmail.com/
[2]: https://lore.kernel.org/lkml/CAHk-=wh5AixGsLeT0qH2oZHKq0FLUTbyTw4qY921L=PwYgoGVw@mail.gmail.com/
[3]: https://lore.kernel.org/lkml/CAHk-=wgM-W6Fu==EoAVCabxyX8eYBz9kNC88-tm9ExRQwA79UQ@mail.gmail.com/

Fixes: 5c3d1d0abb12 ("kbuild: add a tool to list files ignored by git")
Fixes: e0ca16749ac3 ("kbuild: make perf-tar*-src-pkg work without relying on git")
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 Makefile                       |    7 +-
 scripts/.gitignore             |    1 -
 scripts/Makefile               |    2 +-
 scripts/Makefile.package       |  146 ++---
 scripts/check-git              |   14 +
 scripts/list-gitignored.c      | 1057 --------------------------------
 scripts/package/gen-diff-patch |   44 ++
 scripts/package/mkdebian       |   10 +-
 scripts/package/mkspec         |   10 +
 scripts/setlocalversion        |   45 +-
 10 files changed, 181 insertions(+), 1155 deletions(-)
 create mode 100755 scripts/check-git
 delete mode 100644 scripts/list-gitignored.c
 create mode 100755 scripts/package/gen-diff-patch

diff --git a/Makefile b/Makefile
index dfff9f8d28e54..0de1288dc4514 100644
--- a/Makefile
+++ b/Makefile
@@ -274,8 +274,7 @@ no-dot-config-targets := $(clean-targets) \
 			 cscope gtags TAGS tags help% %docs check% coccicheck \
 			 $(version_h) headers headers_% archheaders archscripts \
 			 %asm-generic kernelversion %src-pkg dt_binding_check \
-			 outputmakefile rustavailable rustfmt rustfmtcheck \
-			 scripts_package
+			 outputmakefile rustavailable rustfmt rustfmtcheck
 # Installation targets should not require compiler. Unfortunately, vdso_install
 # is an exception where build artifacts may be updated. This must be fixed.
 no-compiler-targets := $(no-dot-config-targets) install dtbs_install \
@@ -1656,10 +1655,6 @@ distclean: mrproper
 %pkg: include/config/kernel.release FORCE
 	$(Q)$(MAKE) -f $(srctree)/scripts/Makefile.package $@
 
-PHONY += scripts_package
-scripts_package: scripts_basic
-	$(Q)$(MAKE) $(build)=scripts scripts/list-gitignored
-
 # Brief documentation of the typical targets used
 # ---------------------------------------------------------------------------
 
diff --git a/scripts/.gitignore b/scripts/.gitignore
index feb43045d1b1e..6e9ce6720a05a 100644
--- a/scripts/.gitignore
+++ b/scripts/.gitignore
@@ -3,7 +3,6 @@
 /generate_rust_target
 /insert-sys-cert
 /kallsyms
-/list-gitignored
 /module.lds
 /recordmcount
 /sign-file
diff --git a/scripts/Makefile b/scripts/Makefile
index e8917975905ca..32b6ba7227284 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -38,7 +38,7 @@ HOSTCFLAGS_sorttable.o += -DMCOUNT_SORT_ENABLED
 endif
 
 # The following programs are only built on demand
-hostprogs += list-gitignored unifdef
+hostprogs += unifdef
 
 # The module linker script is preprocessed on demand
 targets += module.lds
diff --git a/scripts/Makefile.package b/scripts/Makefile.package
index a0355bdeebff4..61f72eb8d9be7 100644
--- a/scripts/Makefile.package
+++ b/scripts/Makefile.package
@@ -2,6 +2,7 @@
 # Makefile for the different targets used to generate full packages of a kernel
 
 include $(srctree)/scripts/Kbuild.include
+include $(srctree)/scripts/Makefile.lib
 
 KERNELPATH := kernel-$(subst -,_,$(KERNELRELEASE))
 KBUILD_PKG_ROOTCMD ?="fakeroot -u"
@@ -26,54 +27,46 @@ fi ; \
 tar -I $(KGZIP) -c $(RCS_TAR_IGNORE) -f $(2).tar.gz \
 	--transform 's:^:$(2)/:S' $(TAR_CONTENT) $(3)
 
-# .tmp_filelist .tmp_filelist_exclude
+# tarball compression
 # ---------------------------------------------------------------------------
 
-scripts/list-gitignored: FORCE
-	$(Q)$(MAKE) -f $(srctree)/Makefile scripts_package
+%.tar.gz: %.tar
+	$(call cmd,gzip)
 
-# 1f5d3a6b6532e25a5cdf1f311956b2b03d343a48 removed '*.rej' from .gitignore,
-# but it is definitely a generated file.
-filechk_filelist = \
-	$< --exclude='*.rej' --output=$@_exclude --prefix=./ --rootdir=$(srctree) --stat=-
+%.tar.bz2: %.tar
+	$(call cmd,bzip2)
 
-.tmp_filelist: scripts/list-gitignored FORCE
-	$(call filechk,filelist)
+%.tar.xz: %.tar
+	$(call cmd,xzmisc)
 
-# tarball
-# ---------------------------------------------------------------------------
-
-quiet_cmd_tar = TAR     $@
-      cmd_tar = tar -c -f $@ $(tar-compress-opt) $(tar-exclude-opt) \
-                --owner=0 --group=0 --sort=name \
-                --transform 's:^\.:$*:S' -C $(tar-rootdir) .
-
-tar-rootdir := $(srctree)
+%.tar.zst: %.tar
+	$(call cmd,zstd)
 
-%.tar:
-	$(call cmd,tar)
-
-%.tar.gz: private tar-compress-opt := -I $(KGZIP)
-%.tar.gz:
-	$(call cmd,tar)
+# Git
+# ---------------------------------------------------------------------------
 
-%.tar.bz2: private tar-compress-opt := -I $(KBZIP2)
-%.tar.bz2:
-	$(call cmd,tar)
+filechk_HEAD = git -C $(srctree) rev-parse --verify HEAD 2>/dev/null
 
-%.tar.xz: private tar-compress-opt := -I $(XZ)
-%.tar.xz:
-	$(call cmd,tar)
+.tmp_HEAD: check-git FORCE
+	$(call filechk,HEAD)
 
-%.tar.zst: private tar-compress-opt := -I $(ZSTD)
-%.tar.zst:
-	$(call cmd,tar)
+PHONY += check-git
+check-git:
+	@if ! $(srctree)/scripts/check-git; then \
+		echo >&2 "error: creating source package requires git repository"; \
+		false; \
+	fi
 
 # Linux source tarball
 # ---------------------------------------------------------------------------
 
-linux.tar.gz: tar-exclude-opt = --exclude=./$@ --exclude-from=$<_exclude
-linux.tar.gz: .tmp_filelist
+quiet_cmd_archive_linux = ARCHIVE $@
+      cmd_archive_linux = \
+	git -C $(srctree) archive --output=$$(realpath $@) --prefix=$(basename $@)/ $$(cat $<)
+
+targets += linux.tar
+linux.tar: .tmp_HEAD FORCE
+	$(call if_changed,archive_linux)
 
 # rpm-pkg
 # ---------------------------------------------------------------------------
@@ -148,74 +141,62 @@ snap-pkg:
 # dir-pkg tar*-pkg - tarball targets
 # ---------------------------------------------------------------------------
 
-tar-pkg-tarball = linux-$(KERNELRELEASE)-$(ARCH).$(1)
-tar-pkg-phony = $(subst .,,$(1))-pkg
-
 tar-install: FORCE
 	$(Q)$(MAKE) -f $(srctree)/Makefile
 	+$(Q)$(srctree)/scripts/package/buildtar $@
 
+quiet_cmd_tar = TAR     $@
+      cmd_tar = cd $<; tar cf ../$@ --owner=root --group=root --sort=name *
+
+linux-$(KERNELRELEASE)-$(ARCH).tar: tar-install
+	$(call cmd,tar)
+
 PHONY += dir-pkg
 dir-pkg: tar-install
 	@echo "Kernel tree successfully created in $<"
 
-define tar-pkg-rule
-PHONY += $(tar-pkg-phony)
-$(tar-pkg-phony): $(tar-pkg-tarball)
+PHONY += tar-pkg
+tar-pkg: linux-$(KERNELRELEASE)-$(ARCH).tar
 	@:
 
-$(tar-pkg-tarball): private tar-rootdir := tar-install
-$(tar-pkg-tarball): tar-install
-endef
-
-$(foreach x, tar tar.gz tar.bz2 tar.xz tar.zst, $(eval $(call tar-pkg-rule,$(x))))
+tar%-pkg: linux-$(KERNELRELEASE)-$(ARCH).tar.% FORCE
+	@:
 
 # perf-tar*-src-pkg - generate a source tarball with perf source
 # ---------------------------------------------------------------------------
 
-perf-tar-src-pkg-tarball = perf-$(KERNELVERSION).$(1)
-perf-tar-src-pkg-phony   = perf-$(subst .,,$(1))-src-pkg
-
-quiet_cmd_stage_perf_src = STAGE   $@
-      cmd_stage_perf_src = \
-	rm -rf $@; \
-	mkdir -p $@; \
-	tar -c -f - --exclude-from=$<_exclude -C $(srctree) --files-from=$(srctree)/tools/perf/MANIFEST | \
-	tar -x -f - -C $@
-
-.tmp_perf: .tmp_filelist
-	$(call cmd,stage_perf_src)
-
-filechk_perf_head = \
-	if test -z "$(git -C $(srctree) rev-parse --show-cdup 2>/dev/null)" && \
-	       head=$$(git -C $(srctree) rev-parse --verify HEAD 2>/dev/null); then \
-		echo $$head; \
-	else \
-		echo "not a git tree"; \
-	fi
+.tmp_perf:
+	$(Q)mkdir .tmp_perf
 
-.tmp_perf/HEAD: .tmp_perf FORCE
-	$(call filechk,perf_head)
+.tmp_perf/HEAD: .tmp_HEAD | .tmp_perf
+	$(call cmd,copy)
 
 quiet_cmd_perf_version_file = GEN     $@
       cmd_perf_version_file = cd $(srctree)/tools/perf; util/PERF-VERSION-GEN $(dir $(abspath $@))
 
-# PERF-VERSION-FILE and HEAD are independent, but this avoids updating the
+# PERF-VERSION-FILE and .tmp_HEAD are independent, but this avoids updating the
 # timestamp of PERF-VERSION-FILE.
 # The best is to fix tools/perf/util/PERF-VERSION-GEN.
-.tmp_perf/PERF-VERSION-FILE: .tmp_perf/HEAD $(srctree)/tools/perf/util/PERF-VERSION-GEN
+.tmp_perf/PERF-VERSION-FILE: .tmp_HEAD $(srctree)/tools/perf/util/PERF-VERSION-GEN | .tmp_perf
 	$(call cmd,perf_version_file)
 
-define perf-tar-src-pkg-rule
-PHONY += $(perf-tar-src-pkg-phony)
-$(perf-tar-src-pkg-phony): $(perf-tar-src-pkg-tarball)
-	@:
+quiet_cmd_archive_perf = ARCHIVE $@
+      cmd_archive_perf = \
+	git -C $(srctree) archive --output=$$(realpath $@) --prefix=$(basename $@)/ \
+	--add-file=$$(realpath $(word 2, $^)) \
+	--add-file=$$(realpath $(word 3, $^)) \
+	$$(cat $(word 2, $^))^{tree} $$(cat $<)
 
-$(perf-tar-src-pkg-tarball): private tar-rootdir := .tmp_perf
-$(perf-tar-src-pkg-tarball): .tmp_filelist .tmp_perf/HEAD .tmp_perf/PERF-VERSION-FILE
-endef
+targets += perf-$(KERNELVERSION).tar
+perf-$(KERNELVERSION).tar: tools/perf/MANIFEST .tmp_perf/HEAD .tmp_perf/PERF-VERSION-FILE FORCE
+	$(call if_changed,archive_perf)
+
+PHONY += perf-tar-src-pkg
+perf-tar-src-pkg: perf-$(KERNELVERSION).tar
+	@:
 
-$(foreach x, tar tar.gz tar.bz2 tar.xz tar.zst, $(eval $(call perf-tar-src-pkg-rule,$(x))))
+perf-tar%-src-pkg: perf-$(KERNELVERSION).tar.% FORCE
+	@:
 
 # Help text displayed when executing 'make help'
 # ---------------------------------------------------------------------------
@@ -243,4 +224,13 @@ help:
 PHONY += FORCE
 FORCE:
 
+# Read all saved command lines and dependencies for the $(targets) we
+# may be building above, using $(if_changed{,_dep}). As an
+# optimization, we don't need to read them if the target does not
+# exist, we will rebuild anyway in that case.
+
+existing-targets := $(wildcard $(sort $(targets)))
+
+-include $(foreach f,$(existing-targets),$(dir $(f)).$(notdir $(f)).cmd)
+
 .PHONY: $(PHONY)
diff --git a/scripts/check-git b/scripts/check-git
new file mode 100755
index 0000000000000..2ca6c5df10dd7
--- /dev/null
+++ b/scripts/check-git
@@ -0,0 +1,14 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# succeed if we are in a git repository
+
+srctree="$(dirname $0)/.."
+
+if ! git -C "${srctree}" rev-parse --verify HEAD >/dev/null 2>/dev/null; then
+	exit 1
+fi
+
+if ! test -z $(git -C "${srctree}" rev-parse --show-cdup 2>/dev/null); then
+	exit 1
+fi
diff --git a/scripts/list-gitignored.c b/scripts/list-gitignored.c
deleted file mode 100644
index f9941f8dcd2b3..0000000000000
--- a/scripts/list-gitignored.c
+++ /dev/null
@@ -1,1057 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-//
-// Traverse the source tree, parsing all .gitignore files, and print file paths
-// that are ignored by git.
-// The output is suitable to the --exclude-from option of tar.
-// This is useful until the --exclude-vcs-ignores option gets working correctly.
-//
-// Copyright (C) 2023 Masahiro Yamada <masahiroy@kernel.org>
-//                      (a lot of code imported from GIT)
-
-#include <assert.h>
-#include <dirent.h>
-#include <errno.h>
-#include <fcntl.h>
-#include <getopt.h>
-#include <stdarg.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-// Imported from commit 23c56f7bd5f1667f8b793d796bf30e39545920f6 in GIT
-//
-//---------------------------(IMPORT FROM GIT BEGIN)---------------------------
-
-// Copied from environment.c
-
-static bool ignore_case;
-
-// Copied from git-compat-util.h
-
-/* Sane ctype - no locale, and works with signed chars */
-#undef isascii
-#undef isspace
-#undef isdigit
-#undef isalpha
-#undef isalnum
-#undef isprint
-#undef islower
-#undef isupper
-#undef tolower
-#undef toupper
-#undef iscntrl
-#undef ispunct
-#undef isxdigit
-
-static const unsigned char sane_ctype[256];
-#define GIT_SPACE 0x01
-#define GIT_DIGIT 0x02
-#define GIT_ALPHA 0x04
-#define GIT_GLOB_SPECIAL 0x08
-#define GIT_REGEX_SPECIAL 0x10
-#define GIT_PATHSPEC_MAGIC 0x20
-#define GIT_CNTRL 0x40
-#define GIT_PUNCT 0x80
-#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
-#define isascii(x) (((x) & ~0x7f) == 0)
-#define isspace(x) sane_istest(x,GIT_SPACE)
-#define isdigit(x) sane_istest(x,GIT_DIGIT)
-#define isalpha(x) sane_istest(x,GIT_ALPHA)
-#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
-#define isprint(x) ((x) >= 0x20 && (x) <= 0x7e)
-#define islower(x) sane_iscase(x, 1)
-#define isupper(x) sane_iscase(x, 0)
-#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
-#define iscntrl(x) (sane_istest(x,GIT_CNTRL))
-#define ispunct(x) sane_istest(x, GIT_PUNCT | GIT_REGEX_SPECIAL | \
-		GIT_GLOB_SPECIAL | GIT_PATHSPEC_MAGIC)
-#define isxdigit(x) (hexval_table[(unsigned char)(x)] != -1)
-#define tolower(x) sane_case((unsigned char)(x), 0x20)
-#define toupper(x) sane_case((unsigned char)(x), 0)
-
-static inline int sane_case(int x, int high)
-{
-	if (sane_istest(x, GIT_ALPHA))
-		x = (x & ~0x20) | high;
-	return x;
-}
-
-static inline int sane_iscase(int x, int is_lower)
-{
-	if (!sane_istest(x, GIT_ALPHA))
-		return 0;
-
-	if (is_lower)
-		return (x & 0x20) != 0;
-	else
-		return (x & 0x20) == 0;
-}
-
-// Copied from ctype.c
-
-enum {
-	S = GIT_SPACE,
-	A = GIT_ALPHA,
-	D = GIT_DIGIT,
-	G = GIT_GLOB_SPECIAL,	/* *, ?, [, \\ */
-	R = GIT_REGEX_SPECIAL,	/* $, (, ), +, ., ^, {, | */
-	P = GIT_PATHSPEC_MAGIC, /* other non-alnum, except for ] and } */
-	X = GIT_CNTRL,
-	U = GIT_PUNCT,
-	Z = GIT_CNTRL | GIT_SPACE
-};
-
-static const unsigned char sane_ctype[256] = {
-	X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X,		/*   0.. 15 */
-	X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,		/*  16.. 31 */
-	S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P,		/*  32.. 47 */
-	D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G,		/*  48.. 63 */
-	P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,		/*  64.. 79 */
-	A, A, A, A, A, A, A, A, A, A, A, G, G, U, R, P,		/*  80.. 95 */
-	P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,		/*  96..111 */
-	A, A, A, A, A, A, A, A, A, A, A, R, R, U, P, X,		/* 112..127 */
-	/* Nothing in the 128.. range */
-};
-
-// Copied from hex.c
-
-static const signed char hexval_table[256] = {
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 00-07 */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 08-0f */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 10-17 */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 18-1f */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 20-27 */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 28-2f */
-	  0,  1,  2,  3,  4,  5,  6,  7,		/* 30-37 */
-	  8,  9, -1, -1, -1, -1, -1, -1,		/* 38-3f */
-	 -1, 10, 11, 12, 13, 14, 15, -1,		/* 40-47 */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 48-4f */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 50-57 */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 58-5f */
-	 -1, 10, 11, 12, 13, 14, 15, -1,		/* 60-67 */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 68-67 */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 70-77 */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 78-7f */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 80-87 */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 88-8f */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 90-97 */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* 98-9f */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* a0-a7 */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* a8-af */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* b0-b7 */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* b8-bf */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* c0-c7 */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* c8-cf */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* d0-d7 */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* d8-df */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* e0-e7 */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* e8-ef */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* f0-f7 */
-	 -1, -1, -1, -1, -1, -1, -1, -1,		/* f8-ff */
-};
-
-// Copied from wildmatch.h
-
-#define WM_CASEFOLD 1
-#define WM_PATHNAME 2
-
-#define WM_NOMATCH 1
-#define WM_MATCH 0
-#define WM_ABORT_ALL -1
-#define WM_ABORT_TO_STARSTAR -2
-
-// Copied from wildmatch.c
-
-typedef unsigned char uchar;
-
-// local modification: remove NEGATE_CLASS(2)
-
-#define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \
-				    && *(class) == *(litmatch) \
-				    && strncmp((char*)class, litmatch, len) == 0)
-
-// local modification: simpilify macros
-#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
-#define ISGRAPH(c) (isprint(c) && !isspace(c))
-#define ISPRINT(c) isprint(c)
-#define ISDIGIT(c) isdigit(c)
-#define ISALNUM(c) isalnum(c)
-#define ISALPHA(c) isalpha(c)
-#define ISCNTRL(c) iscntrl(c)
-#define ISLOWER(c) islower(c)
-#define ISPUNCT(c) ispunct(c)
-#define ISSPACE(c) isspace(c)
-#define ISUPPER(c) isupper(c)
-#define ISXDIGIT(c) isxdigit(c)
-
-/* Match pattern "p" against "text" */
-static int dowild(const uchar *p, const uchar *text, unsigned int flags)
-{
-	uchar p_ch;
-	const uchar *pattern = p;
-
-	for ( ; (p_ch = *p) != '\0'; text++, p++) {
-		int matched, match_slash, negated;
-		uchar t_ch, prev_ch;
-		if ((t_ch = *text) == '\0' && p_ch != '*')
-			return WM_ABORT_ALL;
-		if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
-			t_ch = tolower(t_ch);
-		if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
-			p_ch = tolower(p_ch);
-		switch (p_ch) {
-		case '\\':
-			/* Literal match with following character.  Note that the test
-			 * in "default" handles the p[1] == '\0' failure case. */
-			p_ch = *++p;
-			/* FALLTHROUGH */
-		default:
-			if (t_ch != p_ch)
-				return WM_NOMATCH;
-			continue;
-		case '?':
-			/* Match anything but '/'. */
-			if ((flags & WM_PATHNAME) && t_ch == '/')
-				return WM_NOMATCH;
-			continue;
-		case '*':
-			if (*++p == '*') {
-				const uchar *prev_p = p - 2;
-				while (*++p == '*') {}
-				if (!(flags & WM_PATHNAME))
-					/* without WM_PATHNAME, '*' == '**' */
-					match_slash = 1;
-				else if ((prev_p < pattern || *prev_p == '/') &&
-				    (*p == '\0' || *p == '/' ||
-				     (p[0] == '\\' && p[1] == '/'))) {
-					/*
-					 * Assuming we already match 'foo/' and are at
-					 * <star star slash>, just assume it matches
-					 * nothing and go ahead match the rest of the
-					 * pattern with the remaining string. This
-					 * helps make foo/<*><*>/bar (<> because
-					 * otherwise it breaks C comment syntax) match
-					 * both foo/bar and foo/a/bar.
-					 */
-					if (p[0] == '/' &&
-					    dowild(p + 1, text, flags) == WM_MATCH)
-						return WM_MATCH;
-					match_slash = 1;
-				} else /* WM_PATHNAME is set */
-					match_slash = 0;
-			} else
-				/* without WM_PATHNAME, '*' == '**' */
-				match_slash = flags & WM_PATHNAME ? 0 : 1;
-			if (*p == '\0') {
-				/* Trailing "**" matches everything.  Trailing "*" matches
-				 * only if there are no more slash characters. */
-				if (!match_slash) {
-					if (strchr((char *)text, '/'))
-						return WM_NOMATCH;
-				}
-				return WM_MATCH;
-			} else if (!match_slash && *p == '/') {
-				/*
-				 * _one_ asterisk followed by a slash
-				 * with WM_PATHNAME matches the next
-				 * directory
-				 */
-				const char *slash = strchr((char*)text, '/');
-				if (!slash)
-					return WM_NOMATCH;
-				text = (const uchar*)slash;
-				/* the slash is consumed by the top-level for loop */
-				break;
-			}
-			while (1) {
-				if (t_ch == '\0')
-					break;
-				/*
-				 * Try to advance faster when an asterisk is
-				 * followed by a literal. We know in this case
-				 * that the string before the literal
-				 * must belong to "*".
-				 * If match_slash is false, do not look past
-				 * the first slash as it cannot belong to '*'.
-				 */
-				if (!is_glob_special(*p)) {
-					p_ch = *p;
-					if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
-						p_ch = tolower(p_ch);
-					while ((t_ch = *text) != '\0' &&
-					       (match_slash || t_ch != '/')) {
-						if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
-							t_ch = tolower(t_ch);
-						if (t_ch == p_ch)
-							break;
-						text++;
-					}
-					if (t_ch != p_ch)
-						return WM_NOMATCH;
-				}
-				if ((matched = dowild(p, text, flags)) != WM_NOMATCH) {
-					if (!match_slash || matched != WM_ABORT_TO_STARSTAR)
-						return matched;
-				} else if (!match_slash && t_ch == '/')
-					return WM_ABORT_TO_STARSTAR;
-				t_ch = *++text;
-			}
-			return WM_ABORT_ALL;
-		case '[':
-			p_ch = *++p;
-			if (p_ch == '^')
-				p_ch = '!';
-			/* Assign literal 1/0 because of "matched" comparison. */
-			negated = p_ch == '!' ? 1 : 0;
-			if (negated) {
-				/* Inverted character class. */
-				p_ch = *++p;
-			}
-			prev_ch = 0;
-			matched = 0;
-			do {
-				if (!p_ch)
-					return WM_ABORT_ALL;
-				if (p_ch == '\\') {
-					p_ch = *++p;
-					if (!p_ch)
-						return WM_ABORT_ALL;
-					if (t_ch == p_ch)
-						matched = 1;
-				} else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') {
-					p_ch = *++p;
-					if (p_ch == '\\') {
-						p_ch = *++p;
-						if (!p_ch)
-							return WM_ABORT_ALL;
-					}
-					if (t_ch <= p_ch && t_ch >= prev_ch)
-						matched = 1;
-					else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) {
-						uchar t_ch_upper = toupper(t_ch);
-						if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch)
-							matched = 1;
-					}
-					p_ch = 0; /* This makes "prev_ch" get set to 0. */
-				} else if (p_ch == '[' && p[1] == ':') {
-					const uchar *s;
-					int i;
-					for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/
-					if (!p_ch)
-						return WM_ABORT_ALL;
-					i = p - s - 1;
-					if (i < 0 || p[-1] != ':') {
-						/* Didn't find ":]", so treat like a normal set. */
-						p = s - 2;
-						p_ch = '[';
-						if (t_ch == p_ch)
-							matched = 1;
-						continue;
-					}
-					if (CC_EQ(s,i, "alnum")) {
-						if (ISALNUM(t_ch))
-							matched = 1;
-					} else if (CC_EQ(s,i, "alpha")) {
-						if (ISALPHA(t_ch))
-							matched = 1;
-					} else if (CC_EQ(s,i, "blank")) {
-						if (ISBLANK(t_ch))
-							matched = 1;
-					} else if (CC_EQ(s,i, "cntrl")) {
-						if (ISCNTRL(t_ch))
-							matched = 1;
-					} else if (CC_EQ(s,i, "digit")) {
-						if (ISDIGIT(t_ch))
-							matched = 1;
-					} else if (CC_EQ(s,i, "graph")) {
-						if (ISGRAPH(t_ch))
-							matched = 1;
-					} else if (CC_EQ(s,i, "lower")) {
-						if (ISLOWER(t_ch))
-							matched = 1;
-					} else if (CC_EQ(s,i, "print")) {
-						if (ISPRINT(t_ch))
-							matched = 1;
-					} else if (CC_EQ(s,i, "punct")) {
-						if (ISPUNCT(t_ch))
-							matched = 1;
-					} else if (CC_EQ(s,i, "space")) {
-						if (ISSPACE(t_ch))
-							matched = 1;
-					} else if (CC_EQ(s,i, "upper")) {
-						if (ISUPPER(t_ch))
-							matched = 1;
-						else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch))
-							matched = 1;
-					} else if (CC_EQ(s,i, "xdigit")) {
-						if (ISXDIGIT(t_ch))
-							matched = 1;
-					} else /* malformed [:class:] string */
-						return WM_ABORT_ALL;
-					p_ch = 0; /* This makes "prev_ch" get set to 0. */
-				} else if (t_ch == p_ch)
-					matched = 1;
-			} while (prev_ch = p_ch, (p_ch = *++p) != ']');
-			if (matched == negated ||
-			    ((flags & WM_PATHNAME) && t_ch == '/'))
-				return WM_NOMATCH;
-			continue;
-		}
-	}
-
-	return *text ? WM_NOMATCH : WM_MATCH;
-}
-
-/* Match the "pattern" against the "text" string. */
-static int wildmatch(const char *pattern, const char *text, unsigned int flags)
-{
-	// local modification: move WM_CASEFOLD here
-	if (ignore_case)
-		flags |= WM_CASEFOLD;
-
-	return dowild((const uchar*)pattern, (const uchar*)text, flags);
-}
-
-// Copied from dir.h
-
-#define PATTERN_FLAG_NODIR 1
-#define PATTERN_FLAG_ENDSWITH 4
-#define PATTERN_FLAG_MUSTBEDIR 8
-#define PATTERN_FLAG_NEGATIVE 16
-
-// Copied from dir.c
-
-static int fspathncmp(const char *a, const char *b, size_t count)
-{
-	return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count);
-}
-
-static int simple_length(const char *match)
-{
-	int len = -1;
-
-	for (;;) {
-		unsigned char c = *match++;
-		len++;
-		if (c == '\0' || is_glob_special(c))
-			return len;
-	}
-}
-
-static int no_wildcard(const char *string)
-{
-	return string[simple_length(string)] == '\0';
-}
-
-static void parse_path_pattern(const char **pattern,
-			       int *patternlen,
-			       unsigned *flags,
-			       int *nowildcardlen)
-{
-	const char *p = *pattern;
-	size_t i, len;
-
-	*flags = 0;
-	if (*p == '!') {
-		*flags |= PATTERN_FLAG_NEGATIVE;
-		p++;
-	}
-	len = strlen(p);
-	if (len && p[len - 1] == '/') {
-		len--;
-		*flags |= PATTERN_FLAG_MUSTBEDIR;
-	}
-	for (i = 0; i < len; i++) {
-		if (p[i] == '/')
-			break;
-	}
-	if (i == len)
-		*flags |= PATTERN_FLAG_NODIR;
-	*nowildcardlen = simple_length(p);
-	/*
-	 * we should have excluded the trailing slash from 'p' too,
-	 * but that's one more allocation. Instead just make sure
-	 * nowildcardlen does not exceed real patternlen
-	 */
-	if (*nowildcardlen > len)
-		*nowildcardlen = len;
-	if (*p == '*' && no_wildcard(p + 1))
-		*flags |= PATTERN_FLAG_ENDSWITH;
-	*pattern = p;
-	*patternlen = len;
-}
-
-static void trim_trailing_spaces(char *buf)
-{
-	char *p, *last_space = NULL;
-
-	for (p = buf; *p; p++)
-		switch (*p) {
-		case ' ':
-			if (!last_space)
-				last_space = p;
-			break;
-		case '\\':
-			p++;
-			if (!*p)
-				return;
-			/* fallthrough */
-		default:
-			last_space = NULL;
-		}
-
-	if (last_space)
-		*last_space = '\0';
-}
-
-static int match_basename(const char *basename, int basenamelen,
-			  const char *pattern, int prefix, int patternlen,
-			  unsigned flags)
-{
-	if (prefix == patternlen) {
-		if (patternlen == basenamelen &&
-		    !fspathncmp(pattern, basename, basenamelen))
-			return 1;
-	} else if (flags & PATTERN_FLAG_ENDSWITH) {
-		/* "*literal" matching against "fooliteral" */
-		if (patternlen - 1 <= basenamelen &&
-		    !fspathncmp(pattern + 1,
-				   basename + basenamelen - (patternlen - 1),
-				   patternlen - 1))
-			return 1;
-	} else {
-		// local modification: call wildmatch() directly
-		if (!wildmatch(pattern, basename, flags))
-			return 1;
-	}
-	return 0;
-}
-
-static int match_pathname(const char *pathname, int pathlen,
-			  const char *base, int baselen,
-			  const char *pattern, int prefix, int patternlen)
-{
-	// local modification: remove local variables
-
-	/*
-	 * match with FNM_PATHNAME; the pattern has base implicitly
-	 * in front of it.
-	 */
-	if (*pattern == '/') {
-		pattern++;
-		patternlen--;
-		prefix--;
-	}
-
-	/*
-	 * baselen does not count the trailing slash. base[] may or
-	 * may not end with a trailing slash though.
-	 */
-	if (pathlen < baselen + 1 ||
-	    (baselen && pathname[baselen] != '/') ||
-	    fspathncmp(pathname, base, baselen))
-		return 0;
-
-	// local modification: simplified because always baselen > 0
-	pathname += baselen + 1;
-	pathlen -= baselen + 1;
-
-	if (prefix) {
-		/*
-		 * if the non-wildcard part is longer than the
-		 * remaining pathname, surely it cannot match.
-		 */
-		if (prefix > pathlen)
-			return 0;
-
-		if (fspathncmp(pattern, pathname, prefix))
-			return 0;
-		pattern += prefix;
-		patternlen -= prefix;
-		pathname += prefix;
-		pathlen -= prefix;
-
-		/*
-		 * If the whole pattern did not have a wildcard,
-		 * then our prefix match is all we need; we
-		 * do not need to call fnmatch at all.
-		 */
-		if (!patternlen && !pathlen)
-			return 1;
-	}
-
-	// local modification: call wildmatch() directly
-	return !wildmatch(pattern, pathname, WM_PATHNAME);
-}
-
-// Copied from git/utf8.c
-
-static const char utf8_bom[] = "\357\273\277";
-
-//----------------------------(IMPORT FROM GIT END)----------------------------
-
-struct pattern {
-	unsigned int flags;
-	int nowildcardlen;
-	int patternlen;
-	int dirlen;
-	char pattern[];
-};
-
-static struct pattern **pattern_list;
-static int nr_patterns, alloced_patterns;
-
-// Remember the number of patterns at each directory level
-static int *nr_patterns_at;
-// Track the current/max directory level;
-static int depth, max_depth;
-static bool debug_on;
-static FILE *out_fp, *stat_fp;
-static char *prefix = "";
-static char *progname;
-
-static void __attribute__((noreturn)) perror_exit(const char *s)
-{
-	perror(s);
-
-	exit(EXIT_FAILURE);
-}
-
-static void __attribute__((noreturn)) error_exit(const char *fmt, ...)
-{
-	va_list args;
-
-	fprintf(stderr, "%s: error: ", progname);
-
-	va_start(args, fmt);
-	vfprintf(stderr, fmt, args);
-	va_end(args);
-
-	exit(EXIT_FAILURE);
-}
-
-static void debug(const char *fmt, ...)
-{
-	va_list args;
-	int i;
-
-	if (!debug_on)
-		return;
-
-	fprintf(stderr, "[DEBUG] ");
-
-	for (i = 0; i < depth * 2; i++)
-		fputc(' ', stderr);
-
-	va_start(args, fmt);
-	vfprintf(stderr, fmt, args);
-	va_end(args);
-}
-
-static void *xrealloc(void *ptr, size_t size)
-{
-	ptr = realloc(ptr, size);
-	if (!ptr)
-		perror_exit(progname);
-
-	return ptr;
-}
-
-static void *xmalloc(size_t size)
-{
-	return xrealloc(NULL, size);
-}
-
-// similar to last_matching_pattern_from_list() in GIT
-static bool is_ignored(const char *path, int pathlen, int dirlen, bool is_dir)
-{
-	int i;
-
-	// Search in the reverse order because the last matching pattern wins.
-	for (i = nr_patterns - 1; i >= 0; i--) {
-		struct pattern *p = pattern_list[i];
-		unsigned int flags = p->flags;
-		const char *gitignore_dir = p->pattern + p->patternlen + 1;
-		bool ignored;
-
-		if ((flags & PATTERN_FLAG_MUSTBEDIR) && !is_dir)
-			continue;
-
-		if (flags & PATTERN_FLAG_NODIR) {
-			if (!match_basename(path + dirlen + 1,
-					    pathlen - dirlen - 1,
-					    p->pattern,
-					    p->nowildcardlen,
-					    p->patternlen,
-					    p->flags))
-				continue;
-		} else {
-			if (!match_pathname(path, pathlen,
-					    gitignore_dir, p->dirlen,
-					    p->pattern,
-					    p->nowildcardlen,
-					    p->patternlen))
-				continue;
-		}
-
-		debug("%s: matches %s%s%s (%s/.gitignore)\n", path,
-		      flags & PATTERN_FLAG_NEGATIVE ? "!" : "", p->pattern,
-		      flags & PATTERN_FLAG_MUSTBEDIR ? "/" : "",
-		      gitignore_dir);
-
-		ignored = (flags & PATTERN_FLAG_NEGATIVE) == 0;
-		if (ignored)
-			debug("Ignore: %s\n", path);
-
-		return ignored;
-	}
-
-	debug("%s: no match\n", path);
-
-	return false;
-}
-
-static void add_pattern(const char *string, const char *dir, int dirlen)
-{
-	struct pattern *p;
-	int patternlen, nowildcardlen;
-	unsigned int flags;
-
-	parse_path_pattern(&string, &patternlen, &flags, &nowildcardlen);
-
-	if (patternlen == 0)
-		return;
-
-	p = xmalloc(sizeof(*p) + patternlen + dirlen + 2);
-
-	memcpy(p->pattern, string, patternlen);
-	p->pattern[patternlen] = 0;
-	memcpy(p->pattern + patternlen + 1, dir, dirlen);
-	p->pattern[patternlen + 1 + dirlen] = 0;
-
-	p->patternlen = patternlen;
-	p->nowildcardlen = nowildcardlen;
-	p->dirlen = dirlen;
-	p->flags = flags;
-
-	debug("Add pattern: %s%s%s\n",
-	      flags & PATTERN_FLAG_NEGATIVE ? "!" : "", p->pattern,
-	      flags & PATTERN_FLAG_MUSTBEDIR ? "/" : "");
-
-	if (nr_patterns >= alloced_patterns) {
-		alloced_patterns += 128;
-		pattern_list = xrealloc(pattern_list,
-					sizeof(*pattern_list) * alloced_patterns);
-	}
-
-	pattern_list[nr_patterns++] = p;
-}
-
-// similar to add_patterns_from_buffer() in GIT
-static void add_patterns_from_gitignore(const char *dir, int dirlen)
-{
-	struct stat st;
-	char path[PATH_MAX], *buf, *entry;
-	size_t size;
-	int fd, pathlen, i;
-
-	pathlen = snprintf(path, sizeof(path), "%s/.gitignore", dir);
-	if (pathlen >= sizeof(path))
-		error_exit("%s: too long path was truncated\n", path);
-
-	fd = open(path, O_RDONLY | O_NOFOLLOW);
-	if (fd < 0) {
-		if (errno != ENOENT)
-			return perror_exit(path);
-		return;
-	}
-
-	if (fstat(fd, &st) < 0)
-		perror_exit(path);
-
-	size = st.st_size;
-
-	buf = xmalloc(size + 1);
-	if (read(fd, buf, st.st_size) != st.st_size)
-		perror_exit(path);
-
-	buf[st.st_size] = '\n';
-	if (close(fd))
-		perror_exit(path);
-
-	debug("Parse %s\n", path);
-
-	entry = buf;
-
-	// skip utf8 bom
-	if (!strncmp(entry, utf8_bom, strlen(utf8_bom)))
-		entry += strlen(utf8_bom);
-
-	for (i = entry - buf; i < size; i++) {
-		if (buf[i] == '\n') {
-			if (entry != buf + i && entry[0] != '#') {
-				buf[i - (i && buf[i-1] == '\r')] = 0;
-				trim_trailing_spaces(entry);
-				add_pattern(entry, dir, dirlen);
-			}
-			entry = buf + i + 1;
-		}
-	}
-
-	free(buf);
-}
-
-// Save the current number of patterns and increment the depth
-static void increment_depth(void)
-{
-	if (depth >= max_depth) {
-		max_depth += 1;
-		nr_patterns_at = xrealloc(nr_patterns_at,
-					  sizeof(*nr_patterns_at) * max_depth);
-	}
-
-	nr_patterns_at[depth] = nr_patterns;
-	depth++;
-}
-
-// Decrement the depth, and free up the patterns of this directory level.
-static void decrement_depth(void)
-{
-	depth--;
-	assert(depth >= 0);
-
-	while (nr_patterns > nr_patterns_at[depth])
-		free(pattern_list[--nr_patterns]);
-}
-
-static void print_path(const char *path)
-{
-	// The path always starts with "./"
-	assert(strlen(path) >= 2);
-
-	// Replace the root directory with a preferred prefix.
-	// This is useful for the tar command.
-	fprintf(out_fp, "%s%s\n", prefix, path + 2);
-}
-
-static void print_stat(const char *path, struct stat *st)
-{
-	if (!stat_fp)
-		return;
-
-	if (!S_ISREG(st->st_mode) && !S_ISLNK(st->st_mode))
-		return;
-
-	assert(strlen(path) >= 2);
-
-	fprintf(stat_fp, "%c %9ld %10ld %s\n",
-		S_ISLNK(st->st_mode) ? 'l' : '-',
-		st->st_size, st->st_mtim.tv_sec, path + 2);
-}
-
-// Traverse the entire directory tree, parsing .gitignore files.
-// Print file paths that are not tracked by git.
-//
-// Return true if all files under the directory are ignored, false otherwise.
-static bool traverse_directory(const char *dir, int dirlen)
-{
-	bool all_ignored = true;
-	DIR *dirp;
-
-	debug("Enter[%d]: %s\n", depth, dir);
-	increment_depth();
-
-	add_patterns_from_gitignore(dir, dirlen);
-
-	dirp = opendir(dir);
-	if (!dirp)
-		perror_exit(dir);
-
-	while (1) {
-		struct dirent *d;
-		struct stat st;
-		char path[PATH_MAX];
-		int pathlen;
-		bool ignored;
-
-		errno = 0;
-		d = readdir(dirp);
-		if (!d) {
-			if (errno)
-				perror_exit(dir);
-			break;
-		}
-
-		if (!strcmp(d->d_name, "..") || !strcmp(d->d_name, "."))
-			continue;
-
-		pathlen = snprintf(path, sizeof(path), "%s/%s", dir, d->d_name);
-		if (pathlen >= sizeof(path))
-			error_exit("%s: too long path was truncated\n", path);
-
-		if (lstat(path, &st) < 0)
-			perror_exit(path);
-
-		if ((!S_ISREG(st.st_mode) && !S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode)) ||
-		    is_ignored(path, pathlen, dirlen, S_ISDIR(st.st_mode))) {
-			ignored = true;
-		} else {
-			if (S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode))
-				// If all the files in a directory are ignored,
-				// let's ignore that directory as well. This
-				// will avoid empty directories in the tarball.
-				ignored = traverse_directory(path, pathlen);
-			else
-				ignored = false;
-		}
-
-		if (ignored) {
-			print_path(path);
-		} else {
-			print_stat(path, &st);
-			all_ignored = false;
-		}
-	}
-
-	if (closedir(dirp))
-		perror_exit(dir);
-
-	decrement_depth();
-	debug("Leave[%d]: %s\n", depth, dir);
-
-	return all_ignored;
-}
-
-static void usage(void)
-{
-	fprintf(stderr,
-		"usage: %s [options]\n"
-		"\n"
-		"Show files that are ignored by git\n"
-		"\n"
-		"options:\n"
-		"  -d, --debug                  print debug messages to stderr\n"
-		"  -e, --exclude PATTERN        add the given exclude pattern\n"
-		"  -h, --help                   show this help message and exit\n"
-		"  -i, --ignore-case            Ignore case differences between the patterns and the files\n"
-		"  -o, --output FILE            output the ignored files to a file (default: '-', i.e. stdout)\n"
-		"  -p, --prefix PREFIX          prefix added to each path (default: empty string)\n"
-		"  -r, --rootdir DIR            root of the source tree (default: current working directory)\n"
-		"  -s, --stat FILE              output the file stat of non-ignored files to a file\n",
-		progname);
-}
-
-static void open_output(const char *pathname, FILE **fp)
-{
-	if (strcmp(pathname, "-")) {
-		*fp = fopen(pathname, "w");
-		if (!*fp)
-			perror_exit(pathname);
-	} else {
-		*fp = stdout;
-	}
-}
-
-static void close_output(const char *pathname, FILE *fp)
-{
-	fflush(fp);
-
-	if (ferror(fp))
-		error_exit("not all data was written to the output\n");
-
-	if (fclose(fp))
-		perror_exit(pathname);
-}
-
-int main(int argc, char *argv[])
-{
-	const char *output = "-";
-	const char *rootdir = ".";
-	const char *stat = NULL;
-
-	progname = strrchr(argv[0], '/');
-	if (progname)
-		progname++;
-	else
-		progname = argv[0];
-
-	while (1) {
-		static struct option long_options[] = {
-			{"debug",       no_argument,       NULL, 'd'},
-			{"help",        no_argument,       NULL, 'h'},
-			{"ignore-case", no_argument,       NULL, 'i'},
-			{"output",      required_argument, NULL, 'o'},
-			{"prefix",      required_argument, NULL, 'p'},
-			{"rootdir",     required_argument, NULL, 'r'},
-			{"stat",        required_argument, NULL, 's'},
-			{"exclude",     required_argument, NULL, 'x'},
-			{},
-		};
-
-		int c = getopt_long(argc, argv, "dhino:p:r:s:x:", long_options, NULL);
-
-		if (c == -1)
-			break;
-
-		switch (c) {
-		case 'd':
-			debug_on = true;
-			break;
-		case 'h':
-			usage();
-			exit(0);
-		case 'i':
-			ignore_case = true;
-			break;
-		case 'o':
-			output = optarg;
-			break;
-		case 'p':
-			prefix = optarg;
-			break;
-		case 'r':
-			rootdir = optarg;
-			break;
-		case 's':
-			stat = optarg;
-			break;
-		case 'x':
-			add_pattern(optarg, ".", strlen("."));
-			break;
-		case '?':
-			usage();
-			/* fallthrough */
-		default:
-			exit(EXIT_FAILURE);
-		}
-	}
-
-	open_output(output, &out_fp);
-	if (stat && stat[0])
-		open_output(stat, &stat_fp);
-
-	if (chdir(rootdir))
-		perror_exit(rootdir);
-
-	add_pattern(".git/", ".", strlen("."));
-
-	if (traverse_directory(".", strlen(".")))
-		print_path("./");
-
-	assert(depth == 0);
-
-	while (nr_patterns > 0)
-		free(pattern_list[--nr_patterns]);
-	free(pattern_list);
-	free(nr_patterns_at);
-
-	close_output(output, out_fp);
-	if (stat_fp)
-		close_output(stat, stat_fp);
-
-	return 0;
-}
diff --git a/scripts/package/gen-diff-patch b/scripts/package/gen-diff-patch
new file mode 100755
index 0000000000000..f842ab50a780c
--- /dev/null
+++ b/scripts/package/gen-diff-patch
@@ -0,0 +1,44 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
+
+diff_patch="${1}"
+untracked_patch="${2}"
+srctree=$(dirname $0)/../..
+
+rm -f ${diff_patch} ${untracked_patch}
+
+if ! ${srctree}/scripts/check-git; then
+	exit
+fi
+
+mkdir -p "$(dirname ${diff_patch})" "$(dirname ${untracked_patch})"
+
+git -C "${srctree}" diff HEAD > "${diff_patch}"
+
+if [ ! -s "${diff_patch}" ]; then
+	rm -f "${diff_patch}"
+	exit
+fi
+
+git -C ${srctree} status --porcelain --untracked-files=all |
+while read stat path
+do
+	if [ "${stat}" = '??' ]; then
+
+		if ! diff -u /dev/null "${srctree}/${path}" > .tmp_diff &&
+			! head -n1 .tmp_diff | grep -q "Binary files"; then
+			{
+				echo "--- /dev/null"
+				echo "+++ linux/$path"
+				cat .tmp_diff | tail -n +3
+			} >> ${untracked_patch}
+		fi
+	fi
+done
+
+rm -f .tmp_diff
+
+if [ ! -s "${diff_patch}" ]; then
+	rm -f "${diff_patch}"
+	exit
+fi
diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian
index e80a661a79ee6..e20a2b5be9eb2 100755
--- a/scripts/package/mkdebian
+++ b/scripts/package/mkdebian
@@ -91,7 +91,7 @@ version=$KERNELRELEASE
 if [ -n "$KDEB_PKGVERSION" ]; then
 	packageversion=$KDEB_PKGVERSION
 else
-	packageversion=$version-$($srctree/init/build-version)
+	packageversion=$(${srctree}/scripts/setlocalversion --no-local ${srctree})-$($srctree/init/build-version)
 fi
 sourcename=${KDEB_SOURCENAME:-linux-upstream}
 
@@ -152,6 +152,14 @@ mkdir -p debian/patches
 } > debian/patches/config
 echo config > debian/patches/series
 
+$(dirname $0)/gen-diff-patch debian/patches/diff.patch debian/patches/untracked.patch
+if [ -f debian/patches/diff.patch ]; then
+	echo diff.patch >> debian/patches/series
+fi
+if [ -f debian/patches/untracked.patch ]; then
+	echo untracked.patch >> debian/patches/series
+fi
+
 echo $debarch > debian/arch
 extra_build_depends=", $(if_enabled_echo CONFIG_UNWINDER_ORC libelf-dev:native)"
 extra_build_depends="$extra_build_depends, $(if_enabled_echo CONFIG_SYSTEM_TRUSTED_KEYRING libssl-dev:native)"
diff --git a/scripts/package/mkspec b/scripts/package/mkspec
index 5f007137f5a02..b7d1dc28a5d6d 100755
--- a/scripts/package/mkspec
+++ b/scripts/package/mkspec
@@ -19,6 +19,8 @@ else
 	mkdir -p rpmbuild/SOURCES
 	cp linux.tar.gz rpmbuild/SOURCES
 	cp "${KCONFIG_CONFIG}" rpmbuild/SOURCES/config
+	$(dirname $0)/gen-diff-patch rpmbuild/SOURCES/diff.patch rpmbuild/SOURCES/untracked.patch
+	touch rpmbuild/SOURCES/diff.patch rpmbuild/SOURCES/untracked.patch
 fi
 
 if grep -q CONFIG_MODULES=y include/config/auto.conf; then
@@ -53,6 +55,8 @@ sed -e '/^DEL/d' -e 's/^\t*//' <<EOF
 	URL: https://www.kernel.org
 $S	Source0: linux.tar.gz
 $S	Source1: config
+$S	Source2: diff.patch
+$S	Source3: untracked.patch
 	Provides: $PROVIDES
 $S	BuildRequires: bc binutils bison dwarves
 $S	BuildRequires: (elfutils-libelf-devel or libelf-devel) flex
@@ -90,6 +94,12 @@ $S$M
 $S	%prep
 $S	%setup -q -n linux
 $S	cp %{SOURCE1} .config
+$S	if [ -s %{SOURCE2} ]; then
+$S		patch -p1 < %{SOURCE2}
+$S	fi
+$S	if [ -s %{SOURCE3} ]; then
+$S		patch -p1 < %{SOURCE3}
+$S	fi
 $S
 $S	%build
 $S	$MAKE %{?_smp_mflags} KERNELRELEASE=$KERNELRELEASE KBUILD_BUILD_VERSION=%{release}
diff --git a/scripts/setlocalversion b/scripts/setlocalversion
index e54839a42d4b4..3d3babac82982 100755
--- a/scripts/setlocalversion
+++ b/scripts/setlocalversion
@@ -11,10 +11,16 @@
 #
 
 usage() {
-	echo "Usage: $0 [srctree]" >&2
+	echo "Usage: $0 [--no-local] [srctree]" >&2
 	exit 1
 }
 
+no_local=false
+if test "$1" = "--no-local"; then
+	no_local=true
+	shift
+fi
+
 srctree=.
 if test $# -gt 0; then
 	srctree=$1
@@ -26,14 +32,22 @@ fi
 
 scm_version()
 {
-	local short
+	local short=false
+	local no_dirty=false
 	local tag
-	short=false
+
+	while [ $# -gt 0 ];
+	do
+		case "$1" in
+		--short)
+			short=true;;
+		--no-dirty)
+			no_dirty=true;;
+		esac
+		shift
+	done
 
 	cd "$srctree"
-	if test "$1" = "--short"; then
-		short=true
-	fi
 
 	if test -n "$(git rev-parse --show-cdup 2>/dev/null)"; then
 		return
@@ -75,6 +89,10 @@ scm_version()
 		printf '%s%s' -g "$(echo $head | cut -c1-12)"
 	fi
 
+	if ${no_dirty}; then
+		return
+	fi
+
 	# Check for uncommitted changes.
 	# This script must avoid any write attempt to the source tree, which
 	# might be read-only.
@@ -110,11 +128,6 @@ collect_files()
 	echo "$res"
 }
 
-if ! test -e include/config/auto.conf; then
-	echo "Error: kernelrelease not valid - run 'make prepare' to update it" >&2
-	exit 1
-fi
-
 if [ -z "${KERNELVERSION}" ]; then
 	echo "KERNELVERSION is not set" >&2
 	exit 1
@@ -126,6 +139,16 @@ if test ! "$srctree" -ef .; then
 	file_localversion="${file_localversion}$(collect_files "$srctree"/localversion*)"
 fi
 
+if ${no_local}; then
+	echo "${KERNELVERSION}$(scm_version --no-dirty)"
+	exit 0
+fi
+
+if ! test -e include/config/auto.conf; then
+	echo "Error: kernelrelease not valid - run 'make prepare' to update it" >&2
+	exit 1
+fi
+
 # version string from CONFIG_LOCALVERSION
 config_localversion=$(sed -n 's/^CONFIG_LOCALVERSION=\(.*\)$/\1/p' include/config/auto.conf)
 

From 203873a535d627c668f293be0cb73e26c30f9cc7 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Thu, 16 Mar 2023 11:38:19 +0100
Subject: [PATCH 342/411] fbdev: stifb: Provide valid pixelclock and add
 fb_check_var() checks

Find a valid modeline depending on the machine graphic card
configuration and add the fb_check_var() function to validate
Xorg provided graphics settings.

Signed-off-by: Helge Deller <deller@gmx.de>
Cc: stable@vger.kernel.org
---
 drivers/video/fbdev/stifb.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c
index 3feb6e40d56d8..ef8a4c5fc6875 100644
--- a/drivers/video/fbdev/stifb.c
+++ b/drivers/video/fbdev/stifb.c
@@ -921,6 +921,28 @@ SETUP_HCRX(struct stifb_info *fb)
 
 /* ------------------- driver specific functions --------------------------- */
 
+static int
+stifb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
+{
+	struct stifb_info *fb = container_of(info, struct stifb_info, info);
+
+	if (var->xres != fb->info.var.xres ||
+	    var->yres != fb->info.var.yres ||
+	    var->bits_per_pixel != fb->info.var.bits_per_pixel)
+		return -EINVAL;
+
+	var->xres_virtual = var->xres;
+	var->yres_virtual = var->yres;
+	var->xoffset = 0;
+	var->yoffset = 0;
+	var->grayscale = fb->info.var.grayscale;
+	var->red.length = fb->info.var.red.length;
+	var->green.length = fb->info.var.green.length;
+	var->blue.length = fb->info.var.blue.length;
+
+	return 0;
+}
+
 static int
 stifb_setcolreg(u_int regno, u_int red, u_int green,
 	      u_int blue, u_int transp, struct fb_info *info)
@@ -1145,6 +1167,7 @@ stifb_init_display(struct stifb_info *fb)
 
 static const struct fb_ops stifb_ops = {
 	.owner		= THIS_MODULE,
+	.fb_check_var	= stifb_check_var,
 	.fb_setcolreg	= stifb_setcolreg,
 	.fb_blank	= stifb_blank,
 	.fb_fillrect	= stifb_fillrect,
@@ -1164,6 +1187,7 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref)
 	struct stifb_info *fb;
 	struct fb_info *info;
 	unsigned long sti_rom_address;
+	char modestr[32];
 	char *dev_name;
 	int bpp, xres, yres;
 
@@ -1342,6 +1366,9 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref)
 	info->flags = FBINFO_HWACCEL_COPYAREA | FBINFO_HWACCEL_FILLRECT;
 	info->pseudo_palette = &fb->pseudo_palette;
 
+	scnprintf(modestr, sizeof(modestr), "%dx%d-%d", xres, yres, bpp);
+	fb_find_mode(&info->var, info, modestr, NULL, 0, NULL, bpp);
+
 	/* This has to be done !!! */
 	if (fb_alloc_cmap(&info->cmap, NR_PALETTE, 0))
 		goto out_err1;

From 92e2a00f2987483e1f9253625828622edd442e61 Mon Sep 17 00:00:00 2001
From: Wei Chen <harperchen1110@gmail.com>
Date: Wed, 15 Mar 2023 07:18:31 +0000
Subject: [PATCH 343/411] fbdev: nvidia: Fix potential divide by zero

variable var->pixclock can be set by user. In case it
equals to zero, divide by zero would occur in nvidiafb_set_par.

Similar crashes have happened in other fbdev drivers. There
is no check and modification on var->pixclock along the call
chain to nvidia_check_var and nvidiafb_set_par. We believe it
could also be triggered in driver nvidia from user site.

Signed-off-by: Wei Chen <harperchen1110@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/nvidia/nvidia.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/video/fbdev/nvidia/nvidia.c b/drivers/video/fbdev/nvidia/nvidia.c
index e60a276b4855d..ea4ba3dfb96bb 100644
--- a/drivers/video/fbdev/nvidia/nvidia.c
+++ b/drivers/video/fbdev/nvidia/nvidia.c
@@ -764,6 +764,8 @@ static int nvidiafb_check_var(struct fb_var_screeninfo *var,
 	int pitch, err = 0;
 
 	NVTRACE_ENTER();
+	if (!var->pixclock)
+		return -EINVAL;
 
 	var->transp.offset = 0;
 	var->transp.length = 0;

From d823685486a3446d061fed7c7d2f80af984f119a Mon Sep 17 00:00:00 2001
From: Wei Chen <harperchen1110@gmail.com>
Date: Wed, 15 Mar 2023 08:33:47 +0000
Subject: [PATCH 344/411] fbdev: intelfb: Fix potential divide by zero

Variable var->pixclock is controlled by user and can be assigned
to zero. Without proper check, divide by zero would occur in
intelfbhw_validate_mode and intelfbhw_mode_to_hw.

Error out if var->pixclock is zero.

Signed-off-by: Wei Chen <harperchen1110@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/intelfb/intelfbdrv.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/video/fbdev/intelfb/intelfbdrv.c b/drivers/video/fbdev/intelfb/intelfbdrv.c
index 0a9e5067b2010..a81095b2b1ea5 100644
--- a/drivers/video/fbdev/intelfb/intelfbdrv.c
+++ b/drivers/video/fbdev/intelfb/intelfbdrv.c
@@ -1222,6 +1222,9 @@ static int intelfb_check_var(struct fb_var_screeninfo *var,
 
 	dinfo = GET_DINFO(info);
 
+	if (!var->pixclock)
+		return -EINVAL;
+
 	/* update the pitch */
 	if (intelfbhw_validate_mode(dinfo, var) != 0)
 		return -EINVAL;

From 61ac4b86a4c047c20d5cb423ddd87496f14d9868 Mon Sep 17 00:00:00 2001
From: Wei Chen <harperchen1110@gmail.com>
Date: Wed, 15 Mar 2023 09:05:18 +0000
Subject: [PATCH 345/411] fbdev: lxfb: Fix potential divide by zero

var->pixclock can be assigned to zero by user. Without proper
check, divide by zero would occur in lx_set_clock.

Error out if var->pixclock is zero.

Signed-off-by: Wei Chen <harperchen1110@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/geode/lxfb_core.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/video/fbdev/geode/lxfb_core.c b/drivers/video/fbdev/geode/lxfb_core.c
index 8130e9eee2b4b..556d8b1a9e06a 100644
--- a/drivers/video/fbdev/geode/lxfb_core.c
+++ b/drivers/video/fbdev/geode/lxfb_core.c
@@ -235,6 +235,9 @@ static void get_modedb(struct fb_videomode **modedb, unsigned int *size)
 
 static int lxfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
 {
+	if (!var->pixclock)
+		return -EINVAL;
+
 	if (var->xres > 1920 || var->yres > 1440)
 		return -EINVAL;
 

From 44a3b36b42acfc433aaaf526191dd12fbb919fdb Mon Sep 17 00:00:00 2001
From: Wei Chen <harperchen1110@gmail.com>
Date: Wed, 15 Mar 2023 09:22:54 +0000
Subject: [PATCH 346/411] fbdev: au1200fb: Fix potential divide by zero

var->pixclock can be assigned to zero by user. Without
proper check, divide by zero would occur when invoking
macro PICOS2KHZ in au1200fb_fb_check_var.

Error out if var->pixclock is zero.

Signed-off-by: Wei Chen <harperchen1110@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/au1200fb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/video/fbdev/au1200fb.c b/drivers/video/fbdev/au1200fb.c
index 81c3154544287..b6b22fa4a8a01 100644
--- a/drivers/video/fbdev/au1200fb.c
+++ b/drivers/video/fbdev/au1200fb.c
@@ -1040,6 +1040,9 @@ static int au1200fb_fb_check_var(struct fb_var_screeninfo *var,
 	u32 pixclock;
 	int screen_size, plane;
 
+	if (!var->pixclock)
+		return -EINVAL;
+
 	plane = fbdev->plane;
 
 	/* Make sure that the mode respect all LCD controller and

From 29413f05fe34e8824551b91f660fde781249417d Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 10 Mar 2023 08:47:29 -0600
Subject: [PATCH 347/411] fbdev: Use of_property_present() for testing DT
 property presence

It is preferred to use typed property access functions (i.e.
of_property_read_<type> functions) rather than low-level
of_get_property/of_find_property functions for reading properties. As
part of this, convert of_get_property/of_find_property calls to the
recently added of_property_present() helper when we just want to test
for presence of a property and nothing more.

Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/amba-clcd.c                          | 2 +-
 drivers/video/fbdev/bw2.c                                | 2 +-
 drivers/video/fbdev/cg3.c                                | 2 +-
 drivers/video/fbdev/omap2/omapfb/dss/omapdss-boot-init.c | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/video/fbdev/amba-clcd.c b/drivers/video/fbdev/amba-clcd.c
index f65c96d1394d3..e45338227be6e 100644
--- a/drivers/video/fbdev/amba-clcd.c
+++ b/drivers/video/fbdev/amba-clcd.c
@@ -854,7 +854,7 @@ static struct clcd_board *clcdfb_of_get_board(struct amba_device *dev)
 	board->caps = CLCD_CAP_ALL;
 	board->check = clcdfb_check;
 	board->decode = clcdfb_decode;
-	if (of_find_property(node, "memory-region", NULL)) {
+	if (of_property_present(node, "memory-region")) {
 		board->setup = clcdfb_of_vram_setup;
 		board->mmap = clcdfb_of_vram_mmap;
 		board->remove = clcdfb_of_vram_remove;
diff --git a/drivers/video/fbdev/bw2.c b/drivers/video/fbdev/bw2.c
index 6403ae07970d6..9cbadcd18b256 100644
--- a/drivers/video/fbdev/bw2.c
+++ b/drivers/video/fbdev/bw2.c
@@ -306,7 +306,7 @@ static int bw2_probe(struct platform_device *op)
 	if (!par->regs)
 		goto out_release_fb;
 
-	if (!of_find_property(dp, "width", NULL)) {
+	if (!of_property_present(dp, "width")) {
 		err = bw2_do_default_mode(par, info, &linebytes);
 		if (err)
 			goto out_unmap_regs;
diff --git a/drivers/video/fbdev/cg3.c b/drivers/video/fbdev/cg3.c
index bdcc3f6ab6665..3a37fff4df366 100644
--- a/drivers/video/fbdev/cg3.c
+++ b/drivers/video/fbdev/cg3.c
@@ -393,7 +393,7 @@ static int cg3_probe(struct platform_device *op)
 
 	cg3_blank(FB_BLANK_UNBLANK, info);
 
-	if (!of_find_property(dp, "width", NULL)) {
+	if (!of_property_present(dp, "width")) {
 		err = cg3_do_default_mode(par);
 		if (err)
 			goto out_unmap_screen;
diff --git a/drivers/video/fbdev/omap2/omapfb/dss/omapdss-boot-init.c b/drivers/video/fbdev/omap2/omapfb/dss/omapdss-boot-init.c
index 0ae0cab252d3d..09f719af0d0c9 100644
--- a/drivers/video/fbdev/omap2/omapfb/dss/omapdss-boot-init.c
+++ b/drivers/video/fbdev/omap2/omapfb/dss/omapdss-boot-init.c
@@ -192,7 +192,7 @@ static int __init omapdss_boot_init(void)
 	omapdss_walk_device(dss, true);
 
 	for_each_available_child_of_node(dss, child) {
-		if (!of_find_property(child, "compatible", NULL))
+		if (!of_property_present(child, "compatible"))
 			continue;
 
 		omapdss_walk_device(child, true);

From d2acf789088bb562cea342b6a24e646df4d47839 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Thu, 16 Mar 2023 15:26:05 +0000
Subject: [PATCH 348/411] io_uring/rsrc: fix folio accounting

| BUG: Bad page state in process kworker/u8:0  pfn:5c001
| page:00000000bfda61c8 refcount:0 mapcount:0 mapping:0000000000000000 index:0x20001 pfn:0x5c001
| head:0000000011409842 order:9 entire_mapcount:0 nr_pages_mapped:0 pincount:1
| anon flags: 0x3fffc00000b0004(uptodate|head|mappedtodisk|swapbacked|node=0|zone=0|lastcpupid=0xffff)
| raw: 03fffc0000000000 fffffc0000700001 ffffffff00700903 0000000100000000
| raw: 0000000000000200 0000000000000000 00000000ffffffff 0000000000000000
| head: 03fffc00000b0004 dead000000000100 dead000000000122 ffff00000a809dc1
| head: 0000000000020000 0000000000000000 00000000ffffffff 0000000000000000
| page dumped because: nonzero pincount
| CPU: 3 PID: 9 Comm: kworker/u8:0 Not tainted 6.3.0-rc2-00001-gc6811bf0cd87 #1
| Hardware name: linux,dummy-virt (DT)
| Workqueue: events_unbound io_ring_exit_work
| Call trace:
|  dump_backtrace+0x13c/0x208
|  show_stack+0x34/0x58
|  dump_stack_lvl+0x150/0x1a8
|  dump_stack+0x20/0x30
|  bad_page+0xec/0x238
|  free_tail_pages_check+0x280/0x350
|  free_pcp_prepare+0x60c/0x830
|  free_unref_page+0x50/0x498
|  free_compound_page+0xcc/0x100
|  free_transhuge_page+0x1f0/0x2b8
|  destroy_large_folio+0x80/0xc8
|  __folio_put+0xc4/0xf8
|  gup_put_folio+0xd0/0x250
|  unpin_user_page+0xcc/0x128
|  io_buffer_unmap+0xec/0x2c0
|  __io_sqe_buffers_unregister+0xa4/0x1e0
|  io_ring_exit_work+0x68c/0x1188
|  process_one_work+0x91c/0x1a58
|  worker_thread+0x48c/0xe30
|  kthread+0x278/0x2f0
|  ret_from_fork+0x10/0x20

Mark reports an issue with the recent patches coalescing compound pages
while registering them in io_uring. The reason is that we try to drop
excessive references with folio_put_refs(), but pages were acquired
with pin_user_pages(), which has extra accounting and so should be put
down with matching unpin_user_pages() or at least gup_put_folio().

As a fix unpin_user_pages() all but first page instead, and let's figure
out a better API after.

Fixes: 57bebf807e2abcf8 ("io_uring/rsrc: optimise registered huge pages")
Reported-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Tested-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/10efd5507d6d1f05ea0f3c601830e08767e189bd.1678980230.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 io_uring/rsrc.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 09a16d709cb55..e2bac9f899029 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -1235,7 +1235,13 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, struct iovec *iov,
 			}
 		}
 		if (folio) {
-			folio_put_refs(folio, nr_pages - 1);
+			/*
+			 * The pages are bound to the folio, it doesn't
+			 * actually unpin them but drops all but one reference,
+			 * which is usually put down by io_buffer_unmap().
+			 * Note, needs a better helper.
+			 */
+			unpin_user_pages(&pages[1], nr_pages - 1);
 			nr_pages = 1;
 		}
 	}

From 8f0d196e4dc137470bbd5de98278d941c8002fcb Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Thu, 16 Mar 2023 12:16:30 +0100
Subject: [PATCH 349/411] block: remove obsolete config BLOCK_COMPAT

Before commit bdc1ddad3e5f ("compat_ioctl: block: move
blkdev_compat_ioctl() into ioctl.c"), the config BLOCK_COMPAT was used to
include compat_ioctl.c into the kernel build. With this commit, the code
is moved into ioctl.c and included with the config COMPAT. So, since then,
the config BLOCK_COMPAT has no effect and any further purpose.

Remove this obsolete config BLOCK_COMPAT.

Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20230316111630.4897-1-lukas.bulwahn@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/Kconfig | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/block/Kconfig b/block/Kconfig
index 5d9d9c84d5165..941b2dca70db7 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -204,9 +204,6 @@ config BLK_INLINE_ENCRYPTION_FALLBACK
 
 source "block/partitions/Kconfig"
 
-config BLOCK_COMPAT
-	def_bool COMPAT
-
 config BLK_MQ_PCI
 	def_bool PCI
 

From 077706165717686a2a6a71405fef036cd5b37ae0 Mon Sep 17 00:00:00 2001
From: Arseniy Krasnov <avkrasnov@sberdevices.ru>
Date: Tue, 14 Mar 2023 14:05:48 +0300
Subject: [PATCH 350/411] virtio/vsock: don't use skbuff state to account
 credit

'skb->len' can vary when we partially read the data, this complicates the
calculation of credit to be updated in 'virtio_transport_inc_rx_pkt()/
virtio_transport_dec_rx_pkt()'.

Also in 'virtio_transport_dec_rx_pkt()' we were miscalculating the
credit since 'skb->len' was redundant.

For these reasons, let's replace the use of skbuff state to calculate new
'rx_bytes'/'fwd_cnt' values with explicit value as input argument. This
makes code more simple, because it is not needed to change skbuff state
before each call to update 'rx_bytes'/'fwd_cnt'.

Fixes: 71dc9ec9ac7d ("virtio/vsock: replace virtio_vsock_pkt with sk_buff")
Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Acked-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/virtio_transport_common.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index a1581c77cf84a..618680fd9906f 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -241,21 +241,18 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
 }
 
 static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
-					struct sk_buff *skb)
+					u32 len)
 {
-	if (vvs->rx_bytes + skb->len > vvs->buf_alloc)
+	if (vvs->rx_bytes + len > vvs->buf_alloc)
 		return false;
 
-	vvs->rx_bytes += skb->len;
+	vvs->rx_bytes += len;
 	return true;
 }
 
 static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs,
-					struct sk_buff *skb)
+					u32 len)
 {
-	int len;
-
-	len = skb_headroom(skb) - sizeof(struct virtio_vsock_hdr) - skb->len;
 	vvs->rx_bytes -= len;
 	vvs->fwd_cnt += len;
 }
@@ -388,7 +385,9 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
 		skb_pull(skb, bytes);
 
 		if (skb->len == 0) {
-			virtio_transport_dec_rx_pkt(vvs, skb);
+			u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len);
+
+			virtio_transport_dec_rx_pkt(vvs, pkt_len);
 			consume_skb(skb);
 		} else {
 			__skb_queue_head(&vvs->rx_queue, skb);
@@ -437,17 +436,17 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
 
 	while (!msg_ready) {
 		struct virtio_vsock_hdr *hdr;
+		size_t pkt_len;
 
 		skb = __skb_dequeue(&vvs->rx_queue);
 		if (!skb)
 			break;
 		hdr = virtio_vsock_hdr(skb);
+		pkt_len = (size_t)le32_to_cpu(hdr->len);
 
 		if (dequeued_len >= 0) {
-			size_t pkt_len;
 			size_t bytes_to_copy;
 
-			pkt_len = (size_t)le32_to_cpu(hdr->len);
 			bytes_to_copy = min(user_buf_len, pkt_len);
 
 			if (bytes_to_copy) {
@@ -484,7 +483,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
 				msg->msg_flags |= MSG_EOR;
 		}
 
-		virtio_transport_dec_rx_pkt(vvs, skb);
+		virtio_transport_dec_rx_pkt(vvs, pkt_len);
 		kfree_skb(skb);
 	}
 
@@ -1040,7 +1039,7 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
 
 	spin_lock_bh(&vvs->rx_lock);
 
-	can_enqueue = virtio_transport_inc_rx_pkt(vvs, skb);
+	can_enqueue = virtio_transport_inc_rx_pkt(vvs, len);
 	if (!can_enqueue) {
 		free_pkt = true;
 		goto out;

From 6825e6b4f8e53799d83bc39ca6ec5baed4e2adde Mon Sep 17 00:00:00 2001
From: Arseniy Krasnov <avkrasnov@sberdevices.ru>
Date: Tue, 14 Mar 2023 14:06:53 +0300
Subject: [PATCH 351/411] virtio/vsock: remove redundant 'skb_pull()' call

Since we now no longer use 'skb->len' to update credit, there is no sense
to update skbuff state, because it is used only once after dequeue to
copy data and then will be released.

Fixes: 71dc9ec9ac7d ("virtio/vsock: replace virtio_vsock_pkt with sk_buff")
Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Acked-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/virtio_transport_common.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 618680fd9906f..9a411475e2010 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -465,7 +465,6 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
 					dequeued_len = err;
 				} else {
 					user_buf_len -= bytes_to_copy;
-					skb_pull(skb, bytes_to_copy);
 				}
 
 				spin_lock_bh(&vvs->rx_lock);

From 8daaf39f7f6ef53a11817f6a11ec104016c3545f Mon Sep 17 00:00:00 2001
From: Arseniy Krasnov <avkrasnov@sberdevices.ru>
Date: Tue, 14 Mar 2023 14:08:20 +0300
Subject: [PATCH 352/411] virtio/vsock: don't drop skbuff on copy failure

This returns behaviour of SOCK_STREAM read as before skbuff usage. When
copying to user fails current skbuff won't be dropped, but returned to
sockets's queue. Technically instead of 'skb_dequeue()', 'skb_peek()' is
called and when skbuff becomes empty, it is removed from queue by
'__skb_unlink()'.

Fixes: 71dc9ec9ac7d ("virtio/vsock: replace virtio_vsock_pkt with sk_buff")
Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Acked-by: Bobby Eshleman <bobby.eshleman@bytedance.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/vmw_vsock/virtio_transport_common.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 9a411475e2010..6564192e7f201 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -364,7 +364,7 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
 
 	spin_lock_bh(&vvs->rx_lock);
 	while (total < len && !skb_queue_empty(&vvs->rx_queue)) {
-		skb = __skb_dequeue(&vvs->rx_queue);
+		skb = skb_peek(&vvs->rx_queue);
 
 		bytes = len - total;
 		if (bytes > skb->len)
@@ -388,9 +388,8 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
 			u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len);
 
 			virtio_transport_dec_rx_pkt(vvs, pkt_len);
+			__skb_unlink(skb, &vvs->rx_queue);
 			consume_skb(skb);
-		} else {
-			__skb_queue_head(&vvs->rx_queue, skb);
 		}
 	}
 

From 7e699d2a4e8104d304e921ac5e0a0c73f0f7b623 Mon Sep 17 00:00:00 2001
From: Arseniy Krasnov <avkrasnov@sberdevices.ru>
Date: Tue, 14 Mar 2023 14:09:27 +0300
Subject: [PATCH 353/411] test/vsock: copy to user failure test

This adds SOCK_STREAM and SOCK_SEQPACKET tests for invalid buffer case.
It tries to read data to NULL buffer (data already presents in socket's
queue), then uses valid buffer. For SOCK_STREAM second read must return
data, because skbuff is not dropped, but for SOCK_SEQPACKET skbuff will
be dropped by kernel, and 'recv()' will return EAGAIN.

Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/vsock/vsock_test.c | 118 +++++++++++++++++++++++++++++++
 1 file changed, 118 insertions(+)

diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 67e9f9df3a8c4..3de10dbb50f52 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -860,6 +860,114 @@ static void test_stream_poll_rcvlowat_client(const struct test_opts *opts)
 	close(fd);
 }
 
+#define INV_BUF_TEST_DATA_LEN 512
+
+static void test_inv_buf_client(const struct test_opts *opts, bool stream)
+{
+	unsigned char data[INV_BUF_TEST_DATA_LEN] = {0};
+	ssize_t ret;
+	int fd;
+
+	if (stream)
+		fd = vsock_stream_connect(opts->peer_cid, 1234);
+	else
+		fd = vsock_seqpacket_connect(opts->peer_cid, 1234);
+
+	if (fd < 0) {
+		perror("connect");
+		exit(EXIT_FAILURE);
+	}
+
+	control_expectln("SENDDONE");
+
+	/* Use invalid buffer here. */
+	ret = recv(fd, NULL, sizeof(data), 0);
+	if (ret != -1) {
+		fprintf(stderr, "expected recv(2) failure, got %zi\n", ret);
+		exit(EXIT_FAILURE);
+	}
+
+	if (errno != ENOMEM) {
+		fprintf(stderr, "unexpected recv(2) errno %d\n", errno);
+		exit(EXIT_FAILURE);
+	}
+
+	ret = recv(fd, data, sizeof(data), MSG_DONTWAIT);
+
+	if (stream) {
+		/* For SOCK_STREAM we must continue reading. */
+		if (ret != sizeof(data)) {
+			fprintf(stderr, "expected recv(2) success, got %zi\n", ret);
+			exit(EXIT_FAILURE);
+		}
+		/* Don't check errno in case of success. */
+	} else {
+		/* For SOCK_SEQPACKET socket's queue must be empty. */
+		if (ret != -1) {
+			fprintf(stderr, "expected recv(2) failure, got %zi\n", ret);
+			exit(EXIT_FAILURE);
+		}
+
+		if (errno != EAGAIN) {
+			fprintf(stderr, "unexpected recv(2) errno %d\n", errno);
+			exit(EXIT_FAILURE);
+		}
+	}
+
+	control_writeln("DONE");
+
+	close(fd);
+}
+
+static void test_inv_buf_server(const struct test_opts *opts, bool stream)
+{
+	unsigned char data[INV_BUF_TEST_DATA_LEN] = {0};
+	ssize_t res;
+	int fd;
+
+	if (stream)
+		fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+	else
+		fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL);
+
+	if (fd < 0) {
+		perror("accept");
+		exit(EXIT_FAILURE);
+	}
+
+	res = send(fd, data, sizeof(data), 0);
+	if (res != sizeof(data)) {
+		fprintf(stderr, "unexpected send(2) result %zi\n", res);
+		exit(EXIT_FAILURE);
+	}
+
+	control_writeln("SENDDONE");
+
+	control_expectln("DONE");
+
+	close(fd);
+}
+
+static void test_stream_inv_buf_client(const struct test_opts *opts)
+{
+	test_inv_buf_client(opts, true);
+}
+
+static void test_stream_inv_buf_server(const struct test_opts *opts)
+{
+	test_inv_buf_server(opts, true);
+}
+
+static void test_seqpacket_inv_buf_client(const struct test_opts *opts)
+{
+	test_inv_buf_client(opts, false);
+}
+
+static void test_seqpacket_inv_buf_server(const struct test_opts *opts)
+{
+	test_inv_buf_server(opts, false);
+}
+
 static struct test_case test_cases[] = {
 	{
 		.name = "SOCK_STREAM connection reset",
@@ -920,6 +1028,16 @@ static struct test_case test_cases[] = {
 		.run_client = test_seqpacket_bigmsg_client,
 		.run_server = test_seqpacket_bigmsg_server,
 	},
+	{
+		.name = "SOCK_STREAM test invalid buffer",
+		.run_client = test_stream_inv_buf_client,
+		.run_server = test_stream_inv_buf_server,
+	},
+	{
+		.name = "SOCK_SEQPACKET test invalid buffer",
+		.run_client = test_seqpacket_inv_buf_client,
+		.run_server = test_seqpacket_inv_buf_server,
+	},
 	{},
 };
 

From b830c9642386867863ac64295185f896ff2928ac Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Tue, 14 Mar 2023 10:45:43 -0700
Subject: [PATCH 354/411] ice: xsk: disable txq irq before flushing hw

ice_qp_dis() intends to stop a given queue pair that is a target of xsk
pool attach/detach. One of the steps is to disable interrupts on these
queues. It currently is broken in a way that txq irq is turned off
*after* HW flush which in turn takes no effect.

ice_qp_dis():
-> ice_qvec_dis_irq()
--> disable rxq irq
--> flush hw
-> ice_vsi_stop_tx_ring()
-->disable txq irq

Below splat can be triggered by following steps:
- start xdpsock WITHOUT loading xdp prog
- run xdp_rxq_info with XDP_TX action on this interface
- start traffic
- terminate xdpsock

[  256.312485] BUG: kernel NULL pointer dereference, address: 0000000000000018
[  256.319560] #PF: supervisor read access in kernel mode
[  256.324775] #PF: error_code(0x0000) - not-present page
[  256.329994] PGD 0 P4D 0
[  256.332574] Oops: 0000 [#1] PREEMPT SMP NOPTI
[  256.337006] CPU: 3 PID: 32 Comm: ksoftirqd/3 Tainted: G           OE      6.2.0-rc5+ #51
[  256.345218] Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0008.031920191559 03/19/2019
[  256.355807] RIP: 0010:ice_clean_rx_irq_zc+0x9c/0x7d0 [ice]
[  256.361423] Code: b7 8f 8a 00 00 00 66 39 ca 0f 84 f1 04 00 00 49 8b 47 40 4c 8b 24 d0 41 0f b7 45 04 66 25 ff 3f 66 89 04 24 0f 84 85 02 00 00 <49> 8b 44 24 18 0f b7 14 24 48 05 00 01 00 00 49 89 04 24 49 89 44
[  256.380463] RSP: 0018:ffffc900088bfd20 EFLAGS: 00010206
[  256.385765] RAX: 000000000000003c RBX: 0000000000000035 RCX: 000000000000067f
[  256.393012] RDX: 0000000000000775 RSI: 0000000000000000 RDI: ffff8881deb3ac80
[  256.400256] RBP: 000000000000003c R08: ffff889847982710 R09: 0000000000010000
[  256.407500] R10: ffffffff82c060c0 R11: 0000000000000004 R12: 0000000000000000
[  256.414746] R13: ffff88811165eea0 R14: ffffc9000d255000 R15: ffff888119b37600
[  256.421990] FS:  0000000000000000(0000) GS:ffff8897e0cc0000(0000) knlGS:0000000000000000
[  256.430207] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  256.436036] CR2: 0000000000000018 CR3: 0000000005c0a006 CR4: 00000000007706e0
[  256.443283] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  256.450527] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  256.457770] PKRU: 55555554
[  256.460529] Call Trace:
[  256.463015]  <TASK>
[  256.465157]  ? ice_xmit_zc+0x6e/0x150 [ice]
[  256.469437]  ice_napi_poll+0x46d/0x680 [ice]
[  256.473815]  ? _raw_spin_unlock_irqrestore+0x1b/0x40
[  256.478863]  __napi_poll+0x29/0x160
[  256.482409]  net_rx_action+0x136/0x260
[  256.486222]  __do_softirq+0xe8/0x2e5
[  256.489853]  ? smpboot_thread_fn+0x2c/0x270
[  256.494108]  run_ksoftirqd+0x2a/0x50
[  256.497747]  smpboot_thread_fn+0x1c1/0x270
[  256.501907]  ? __pfx_smpboot_thread_fn+0x10/0x10
[  256.506594]  kthread+0xea/0x120
[  256.509785]  ? __pfx_kthread+0x10/0x10
[  256.513597]  ret_from_fork+0x29/0x50
[  256.517238]  </TASK>

In fact, irqs were not disabled and napi managed to be scheduled and run
while xsk_pool pointer was still valid, but SW ring of xdp_buff pointers
was already freed.

To fix this, call ice_qvec_dis_irq() after ice_vsi_stop_tx_ring(). Also
while at it, remove redundant ice_clean_rx_ring() call - this is handled
in ice_qp_clean_rings().

Fixes: 2d4238f55697 ("ice: Add support for AF_XDP")
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Reviewed-by: Larysa Zaremba <larysa.zaremba@intel.com>
Tested-by: Chandan Kumar Rout <chandanx.rout@intel.com> (A Contingent Worker at Intel)
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/ice/ice_xsk.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 31565bbafa224..d1e489da7363f 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -184,8 +184,6 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
 	}
 	netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx));
 
-	ice_qvec_dis_irq(vsi, rx_ring, q_vector);
-
 	ice_fill_txq_meta(vsi, tx_ring, &txq_meta);
 	err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta);
 	if (err)
@@ -200,10 +198,11 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx)
 		if (err)
 			return err;
 	}
+	ice_qvec_dis_irq(vsi, rx_ring, q_vector);
+
 	err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true);
 	if (err)
 		return err;
-	ice_clean_rx_ring(rx_ring);
 
 	ice_qvec_toggle_napi(vsi, q_vector, false);
 	ice_qp_clean_rings(vsi, q_idx);

From 636e8adf7878eab3614250234341bde45537f47a Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 14 Mar 2023 20:24:04 +0200
Subject: [PATCH 355/411] net: dsa: don't error out when drivers return
 ETH_DATA_LEN in .port_max_mtu()

Currently, when dsa_slave_change_mtu() is called on a user port where
dev->max_mtu is 1500 (as returned by ds->ops->port_max_mtu()), the code
will stumble upon this check:

	if (new_master_mtu > mtu_limit)
		return -ERANGE;

because new_master_mtu is adjusted for the tagger overhead but mtu_limit
is not.

But it would be good if the logic went through, for example if the DSA
master really depends on an MTU adjustment to accept DSA-tagged frames.

To make the code pass through the check, we need to adjust mtu_limit for
the overhead as well, if the minimum restriction was caused by the DSA
user port's MTU (dev->max_mtu). A DSA user port MTU and a DSA master MTU
are always offset by the protocol overhead.

Currently no drivers return 1500 .port_max_mtu(), but this is only
temporary and a bug in itself - mv88e6xxx should have done that, but
since commit b9c587fed61c ("dsa: mv88e6xxx: Include tagger overhead when
setting MTU for DSA and CPU ports") it no longer does. This is a
preparation for fixing that.

Fixes: bfcb813203e6 ("net: dsa: configure the MTU for switch ports")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/slave.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 6957971c2db23..cac17183589fe 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1933,6 +1933,7 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
 	int new_master_mtu;
 	int old_master_mtu;
 	int mtu_limit;
+	int overhead;
 	int cpu_mtu;
 	int err;
 
@@ -1961,9 +1962,10 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
 			largest_mtu = slave_mtu;
 	}
 
-	mtu_limit = min_t(int, master->max_mtu, dev->max_mtu);
+	overhead = dsa_tag_protocol_overhead(cpu_dp->tag_ops);
+	mtu_limit = min_t(int, master->max_mtu, dev->max_mtu + overhead);
 	old_master_mtu = master->mtu;
-	new_master_mtu = largest_mtu + dsa_tag_protocol_overhead(cpu_dp->tag_ops);
+	new_master_mtu = largest_mtu + overhead;
 	if (new_master_mtu > mtu_limit)
 		return -ERANGE;
 
@@ -1998,8 +2000,7 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu)
 
 out_port_failed:
 	if (new_master_mtu != old_master_mtu)
-		dsa_port_mtu_change(cpu_dp, old_master_mtu -
-				    dsa_tag_protocol_overhead(cpu_dp->tag_ops));
+		dsa_port_mtu_change(cpu_dp, old_master_mtu - overhead);
 out_cpu_failed:
 	if (new_master_mtu != old_master_mtu)
 		dev_set_mtu(master, old_master_mtu);

From 7e9517375a14f44ee830ca1c3278076dd65fcc8f Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 14 Mar 2023 20:24:05 +0200
Subject: [PATCH 356/411] net: dsa: mv88e6xxx: fix max_mtu of 1492 on 6165,
 6191, 6220, 6250, 6290

There are 3 classes of switch families that the driver is aware of, as
far as mv88e6xxx_change_mtu() is concerned:

- MTU configuration is available per port. Here, the
  chip->info->ops->port_set_jumbo_size() method will be present.

- MTU configuration is global to the switch. Here, the
  chip->info->ops->set_max_frame_size() method will be present.

- We don't know how to change the MTU. Here, none of the above methods
  will be present.

Switch families MV88E6165, MV88E6191, MV88E6220, MV88E6250 and MV88E6290
fall in category 3.

The blamed commit has adjusted the MTU for all 3 categories by EDSA_HLEN
(8 bytes), resulting in a new maximum MTU of 1492 being reported by the
driver for these switches.

I don't have the hardware to test, but I do have a MV88E6390 switch on
which I can simulate this by commenting out its .port_set_jumbo_size
definition from mv88e6390_ops. The result is this set of messages at
probe time:

mv88e6085 d0032004.mdio-mii:10: nonfatal error -34 setting MTU to 1500 on port 1
mv88e6085 d0032004.mdio-mii:10: nonfatal error -34 setting MTU to 1500 on port 2
mv88e6085 d0032004.mdio-mii:10: nonfatal error -34 setting MTU to 1500 on port 3
mv88e6085 d0032004.mdio-mii:10: nonfatal error -34 setting MTU to 1500 on port 4
mv88e6085 d0032004.mdio-mii:10: nonfatal error -34 setting MTU to 1500 on port 5
mv88e6085 d0032004.mdio-mii:10: nonfatal error -34 setting MTU to 1500 on port 6
mv88e6085 d0032004.mdio-mii:10: nonfatal error -34 setting MTU to 1500 on port 7
mv88e6085 d0032004.mdio-mii:10: nonfatal error -34 setting MTU to 1500 on port 8

It is highly implausible that there exist Ethernet switches which don't
support the standard MTU of 1500 octets, and this is what the DSA
framework says as well - the error comes from dsa_slave_create() ->
dsa_slave_change_mtu(slave_dev, ETH_DATA_LEN).

But the error messages are alarming, and it would be good to suppress
them.

As a consequence of this unlikeliness, we reimplement mv88e6xxx_get_max_mtu()
and mv88e6xxx_change_mtu() on switches from the 3rd category as follows:
the maximum supported MTU is 1500, and any request to set the MTU to a
value larger than that fails in dev_validate_mtu().

Fixes: b9c587fed61c ("dsa: mv88e6xxx: Include tagger overhead when setting MTU for DSA and CPU ports")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 0a5d6c7bb128d..30383c4f8fd0e 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -3549,7 +3549,7 @@ static int mv88e6xxx_get_max_mtu(struct dsa_switch *ds, int port)
 		return 10240 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN;
 	else if (chip->info->ops->set_max_frame_size)
 		return 1632 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN;
-	return 1522 - VLAN_ETH_HLEN - EDSA_HLEN - ETH_FCS_LEN;
+	return ETH_DATA_LEN;
 }
 
 static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
@@ -3557,6 +3557,17 @@ static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
 	struct mv88e6xxx_chip *chip = ds->priv;
 	int ret = 0;
 
+	/* For families where we don't know how to alter the MTU,
+	 * just accept any value up to ETH_DATA_LEN
+	 */
+	if (!chip->info->ops->port_set_jumbo_size &&
+	    !chip->info->ops->set_max_frame_size) {
+		if (new_mtu > ETH_DATA_LEN)
+			return -EINVAL;
+
+		return 0;
+	}
+
 	if (dsa_is_dsa_port(ds, port) || dsa_is_cpu_port(ds, port))
 		new_mtu += EDSA_HLEN;
 
@@ -3565,9 +3576,6 @@ static int mv88e6xxx_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
 		ret = chip->info->ops->port_set_jumbo_size(chip, port, new_mtu);
 	else if (chip->info->ops->set_max_frame_size)
 		ret = chip->info->ops->set_max_frame_size(chip, new_mtu);
-	else
-		if (new_mtu > 1522)
-			ret = -EINVAL;
 	mv88e6xxx_reg_unlock(chip);
 
 	return ret;

From 1a87e641d8a50c30b63b1d90819bc607b4327596 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 14 Mar 2023 14:18:27 -0500
Subject: [PATCH 357/411] net: Use of_property_read_bool() for boolean
 properties

It is preferred to use typed property access functions (i.e.
of_property_read_<type> functions) rather than low-level
of_get_property/of_find_property functions for reading properties.
Convert reading boolean properties to of_property_read_bool().

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Acked-by: Marc Kleine-Budde <mkl@pengutronix.de> # for net/can
Acked-by: Kalle Valo <kvalo@kernel.org>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Acked-by: Francois Romieu <romieu@fr.zoreil.com>
Reviewed-by: Wei Fang <wei.fang@nxp.com>
Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/cc770/cc770_platform.c          | 12 ++++++------
 drivers/net/ethernet/cadence/macb_main.c        |  2 +-
 drivers/net/ethernet/davicom/dm9000.c           |  4 ++--
 drivers/net/ethernet/freescale/fec_main.c       |  2 +-
 drivers/net/ethernet/freescale/fec_mpc52xx.c    |  2 +-
 drivers/net/ethernet/freescale/gianfar.c        |  4 ++--
 drivers/net/ethernet/ibm/emac/core.c            |  8 ++++----
 drivers/net/ethernet/ibm/emac/rgmii.c           |  2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c |  3 +--
 drivers/net/ethernet/sun/niu.c                  |  2 +-
 drivers/net/ethernet/ti/cpsw-phy-sel.c          |  3 +--
 drivers/net/ethernet/ti/netcp_ethss.c           |  8 +++-----
 drivers/net/ethernet/via/via-velocity.c         |  3 +--
 drivers/net/ethernet/via/via-velocity.h         |  2 +-
 drivers/net/ethernet/xilinx/ll_temac_main.c     |  9 ++++-----
 drivers/net/wan/fsl_ucc_hdlc.c                  | 11 +++--------
 drivers/net/wireless/ti/wlcore/spi.c            |  3 +--
 net/ncsi/ncsi-manage.c                          |  4 ++--
 18 files changed, 36 insertions(+), 48 deletions(-)

diff --git a/drivers/net/can/cc770/cc770_platform.c b/drivers/net/can/cc770/cc770_platform.c
index 8d916e2ee6c25..8dcc32e4e30ef 100644
--- a/drivers/net/can/cc770/cc770_platform.c
+++ b/drivers/net/can/cc770/cc770_platform.c
@@ -93,20 +93,20 @@ static int cc770_get_of_node_data(struct platform_device *pdev,
 	if (priv->can.clock.freq > 8000000)
 		priv->cpu_interface |= CPUIF_DMC;
 
-	if (of_get_property(np, "bosch,divide-memory-clock", NULL))
+	if (of_property_read_bool(np, "bosch,divide-memory-clock"))
 		priv->cpu_interface |= CPUIF_DMC;
-	if (of_get_property(np, "bosch,iso-low-speed-mux", NULL))
+	if (of_property_read_bool(np, "bosch,iso-low-speed-mux"))
 		priv->cpu_interface |= CPUIF_MUX;
 
 	if (!of_get_property(np, "bosch,no-comperator-bypass", NULL))
 		priv->bus_config |= BUSCFG_CBY;
-	if (of_get_property(np, "bosch,disconnect-rx0-input", NULL))
+	if (of_property_read_bool(np, "bosch,disconnect-rx0-input"))
 		priv->bus_config |= BUSCFG_DR0;
-	if (of_get_property(np, "bosch,disconnect-rx1-input", NULL))
+	if (of_property_read_bool(np, "bosch,disconnect-rx1-input"))
 		priv->bus_config |= BUSCFG_DR1;
-	if (of_get_property(np, "bosch,disconnect-tx1-output", NULL))
+	if (of_property_read_bool(np, "bosch,disconnect-tx1-output"))
 		priv->bus_config |= BUSCFG_DT1;
-	if (of_get_property(np, "bosch,polarity-dominant", NULL))
+	if (of_property_read_bool(np, "bosch,polarity-dominant"))
 		priv->bus_config |= BUSCFG_POL;
 
 	prop = of_get_property(np, "bosch,clock-out-frequency", &prop_size);
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 6e141a8bbf43c..66e30561569eb 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -4990,7 +4990,7 @@ static int macb_probe(struct platform_device *pdev)
 		bp->jumbo_max_len = macb_config->jumbo_max_len;
 
 	bp->wol = 0;
-	if (of_get_property(np, "magic-packet", NULL))
+	if (of_property_read_bool(np, "magic-packet"))
 		bp->wol |= MACB_WOL_HAS_MAGIC_PACKET;
 	device_set_wakeup_capable(&pdev->dev, bp->wol & MACB_WOL_HAS_MAGIC_PACKET);
 
diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index b21e56de61671..05a89ab6766c4 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -1393,9 +1393,9 @@ static struct dm9000_plat_data *dm9000_parse_dt(struct device *dev)
 	if (!pdata)
 		return ERR_PTR(-ENOMEM);
 
-	if (of_find_property(np, "davicom,ext-phy", NULL))
+	if (of_property_read_bool(np, "davicom,ext-phy"))
 		pdata->flags |= DM9000_PLATF_EXT_PHY;
-	if (of_find_property(np, "davicom,no-eeprom", NULL))
+	if (of_property_read_bool(np, "davicom,no-eeprom"))
 		pdata->flags |= DM9000_PLATF_NO_EEPROM;
 
 	ret = of_get_mac_address(np, pdata->dev_addr);
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index c73e25f8995eb..f3b16a6673e2b 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -4251,7 +4251,7 @@ fec_probe(struct platform_device *pdev)
 	if (ret)
 		goto failed_ipc_init;
 
-	if (of_get_property(np, "fsl,magic-packet", NULL))
+	if (of_property_read_bool(np, "fsl,magic-packet"))
 		fep->wol_flag |= FEC_WOL_HAS_MAGIC_PACKET;
 
 	ret = fec_enet_init_stop_mode(fep, np);
diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c
index a7f4c3c29f3e4..b88816b71ddff 100644
--- a/drivers/net/ethernet/freescale/fec_mpc52xx.c
+++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c
@@ -937,7 +937,7 @@ static int mpc52xx_fec_probe(struct platform_device *op)
 	priv->phy_node = of_parse_phandle(np, "phy-handle", 0);
 
 	/* the 7-wire property means don't use MII mode */
-	if (of_find_property(np, "fsl,7-wire-mode", NULL)) {
+	if (of_property_read_bool(np, "fsl,7-wire-mode")) {
 		priv->seven_wire_mode = 1;
 		dev_info(&ndev->dev, "using 7-wire PHY mode\n");
 	}
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index b2def295523ab..38d5013c6fedf 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -787,10 +787,10 @@ static int gfar_of_init(struct platform_device *ofdev, struct net_device **pdev)
 	else
 		priv->interface = gfar_get_interface(dev);
 
-	if (of_find_property(np, "fsl,magic-packet", NULL))
+	if (of_property_read_bool(np, "fsl,magic-packet"))
 		priv->device_flags |= FSL_GIANFAR_DEV_HAS_MAGIC_PACKET;
 
-	if (of_get_property(np, "fsl,wake-on-filer", NULL))
+	if (of_property_read_bool(np, "fsl,wake-on-filer"))
 		priv->device_flags |= FSL_GIANFAR_DEV_HAS_WAKE_ON_FILER;
 
 	priv->phy_node = of_parse_phandle(np, "phy-handle", 0);
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 9b08e41ccc294..c97095abd26ab 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -2939,9 +2939,9 @@ static int emac_init_config(struct emac_instance *dev)
 	}
 
 	/* Fixup some feature bits based on the device tree */
-	if (of_get_property(np, "has-inverted-stacr-oc", NULL))
+	if (of_property_read_bool(np, "has-inverted-stacr-oc"))
 		dev->features |= EMAC_FTR_STACR_OC_INVERT;
-	if (of_get_property(np, "has-new-stacr-staopc", NULL))
+	if (of_property_read_bool(np, "has-new-stacr-staopc"))
 		dev->features |= EMAC_FTR_HAS_NEW_STACR;
 
 	/* CAB lacks the appropriate properties */
@@ -3042,7 +3042,7 @@ static int emac_probe(struct platform_device *ofdev)
 	 * property here for now, but new flat device trees should set a
 	 * status property to "disabled" instead.
 	 */
-	if (of_get_property(np, "unused", NULL) || !of_device_is_available(np))
+	if (of_property_read_bool(np, "unused") || !of_device_is_available(np))
 		return -ENODEV;
 
 	/* Find ourselves in the bootlist if we are there */
@@ -3333,7 +3333,7 @@ static void __init emac_make_bootlist(void)
 
 		if (of_match_node(emac_match, np) == NULL)
 			continue;
-		if (of_get_property(np, "unused", NULL))
+		if (of_property_read_bool(np, "unused"))
 			continue;
 		idx = of_get_property(np, "cell-index", NULL);
 		if (idx == NULL)
diff --git a/drivers/net/ethernet/ibm/emac/rgmii.c b/drivers/net/ethernet/ibm/emac/rgmii.c
index 242ef976fd15e..50358cf001306 100644
--- a/drivers/net/ethernet/ibm/emac/rgmii.c
+++ b/drivers/net/ethernet/ibm/emac/rgmii.c
@@ -242,7 +242,7 @@ static int rgmii_probe(struct platform_device *ofdev)
 	}
 
 	/* Check for RGMII flags */
-	if (of_get_property(ofdev->dev.of_node, "has-mdio", NULL))
+	if (of_property_read_bool(ofdev->dev.of_node, "has-mdio"))
 		dev->flags |= EMAC_RGMII_FLAG_HAS_MDIO;
 
 	/* CAB lacks the right properties, fix this up */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
index ac8580f501e2e..ac550d1ac0159 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
@@ -213,8 +213,7 @@ imx_dwmac_parse_dt(struct imx_priv_data *dwmac, struct device *dev)
 	struct device_node *np = dev->of_node;
 	int err = 0;
 
-	if (of_get_property(np, "snps,rmii_refclk_ext", NULL))
-		dwmac->rmii_refclk_ext = true;
+	dwmac->rmii_refclk_ext = of_property_read_bool(np, "snps,rmii_refclk_ext");
 
 	dwmac->clk_tx = devm_clk_get(dev, "tx");
 	if (IS_ERR(dwmac->clk_tx)) {
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index e6144d963eaaa..ab8b09a9ef61d 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -9271,7 +9271,7 @@ static int niu_get_of_props(struct niu *np)
 	if (model)
 		strcpy(np->vpd.model, model);
 
-	if (of_find_property(dp, "hot-swappable-phy", NULL)) {
+	if (of_property_read_bool(dp, "hot-swappable-phy")) {
 		np->flags |= (NIU_FLAGS_10G | NIU_FLAGS_FIBER |
 			NIU_FLAGS_HOTPLUG_PHY);
 	}
diff --git a/drivers/net/ethernet/ti/cpsw-phy-sel.c b/drivers/net/ethernet/ti/cpsw-phy-sel.c
index e8f38e3f7706e..25e707d7b87ce 100644
--- a/drivers/net/ethernet/ti/cpsw-phy-sel.c
+++ b/drivers/net/ethernet/ti/cpsw-phy-sel.c
@@ -226,8 +226,7 @@ static int cpsw_phy_sel_probe(struct platform_device *pdev)
 	if (IS_ERR(priv->gmii_sel))
 		return PTR_ERR(priv->gmii_sel);
 
-	if (of_find_property(pdev->dev.of_node, "rmii-clock-ext", NULL))
-		priv->rmii_clock_external = true;
+	priv->rmii_clock_external = of_property_read_bool(pdev->dev.of_node, "rmii-clock-ext");
 
 	dev_set_drvdata(&pdev->dev, priv);
 
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index 751fb0bc65c50..2adf82a32bf6a 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -3583,13 +3583,11 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev,
 	/* init the hw stats lock */
 	spin_lock_init(&gbe_dev->hw_stats_lock);
 
-	if (of_find_property(node, "enable-ale", NULL)) {
-		gbe_dev->enable_ale = true;
+	gbe_dev->enable_ale = of_property_read_bool(node, "enable-ale");
+	if (gbe_dev->enable_ale)
 		dev_info(dev, "ALE enabled\n");
-	} else {
-		gbe_dev->enable_ale = false;
+	else
 		dev_dbg(dev, "ALE bypass enabled*\n");
-	}
 
 	ret = of_property_read_u32(node, "tx-queue",
 				   &gbe_dev->tx_queue_id);
diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c
index a502812ac418a..86f7843b4591c 100644
--- a/drivers/net/ethernet/via/via-velocity.c
+++ b/drivers/net/ethernet/via/via-velocity.c
@@ -2709,8 +2709,7 @@ static int velocity_get_platform_info(struct velocity_info *vptr)
 	struct resource res;
 	int ret;
 
-	if (of_get_property(vptr->dev->of_node, "no-eeprom", NULL))
-		vptr->no_eeprom = 1;
+	vptr->no_eeprom = of_property_read_bool(vptr->dev->of_node, "no-eeprom");
 
 	ret = of_address_to_resource(vptr->dev->of_node, 0, &res);
 	if (ret) {
diff --git a/drivers/net/ethernet/via/via-velocity.h b/drivers/net/ethernet/via/via-velocity.h
index ffdac6fac0549..f64ed39b93d83 100644
--- a/drivers/net/ethernet/via/via-velocity.h
+++ b/drivers/net/ethernet/via/via-velocity.h
@@ -1383,7 +1383,7 @@ struct velocity_info {
 	struct device *dev;
 	struct pci_dev *pdev;
 	struct net_device *netdev;
-	int no_eeprom;
+	bool no_eeprom;
 
 	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
 	u8 ip_addr[4];
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 1066420d6a83a..e0ac1bcd9925c 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -1455,12 +1455,11 @@ static int temac_probe(struct platform_device *pdev)
 	 * endianness mode.  Default for OF devices is big-endian.
 	 */
 	little_endian = false;
-	if (temac_np) {
-		if (of_get_property(temac_np, "little-endian", NULL))
-			little_endian = true;
-	} else if (pdata) {
+	if (temac_np)
+		little_endian = of_property_read_bool(temac_np, "little-endian");
+	else if (pdata)
 		little_endian = pdata->reg_little_endian;
-	}
+
 	if (little_endian) {
 		lp->temac_ior = _temac_ior_le;
 		lp->temac_iow = _temac_iow_le;
diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c
index 1c53b55469270..47c2ad7a3e429 100644
--- a/drivers/net/wan/fsl_ucc_hdlc.c
+++ b/drivers/net/wan/fsl_ucc_hdlc.c
@@ -1177,14 +1177,9 @@ static int ucc_hdlc_probe(struct platform_device *pdev)
 	uhdlc_priv->dev = &pdev->dev;
 	uhdlc_priv->ut_info = ut_info;
 
-	if (of_get_property(np, "fsl,tdm-interface", NULL))
-		uhdlc_priv->tsa = 1;
-
-	if (of_get_property(np, "fsl,ucc-internal-loopback", NULL))
-		uhdlc_priv->loopback = 1;
-
-	if (of_get_property(np, "fsl,hdlc-bus", NULL))
-		uhdlc_priv->hdlc_bus = 1;
+	uhdlc_priv->tsa = of_property_read_bool(np, "fsl,tdm-interface");
+	uhdlc_priv->loopback = of_property_read_bool(np, "fsl,ucc-internal-loopback");
+	uhdlc_priv->hdlc_bus = of_property_read_bool(np, "fsl,hdlc-bus");
 
 	if (uhdlc_priv->tsa == 1) {
 		utdm = kzalloc(sizeof(*utdm), GFP_KERNEL);
diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c
index 2d2edddc77bdd..3f88e6a0a510e 100644
--- a/drivers/net/wireless/ti/wlcore/spi.c
+++ b/drivers/net/wireless/ti/wlcore/spi.c
@@ -447,8 +447,7 @@ static int wlcore_probe_of(struct spi_device *spi, struct wl12xx_spi_glue *glue,
 	dev_info(&spi->dev, "selected chip family is %s\n",
 		 pdev_data->family->name);
 
-	if (of_find_property(dt_node, "clock-xtal", NULL))
-		pdev_data->ref_clock_xtal = true;
+	pdev_data->ref_clock_xtal = of_property_read_bool(dt_node, "clock-xtal");
 
 	/* optional clock frequency params */
 	of_property_read_u32(dt_node, "ref-clock-frequency",
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 80713febfac6d..d9da942ad53dd 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -1803,8 +1803,8 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev,
 	pdev = to_platform_device(dev->dev.parent);
 	if (pdev) {
 		np = pdev->dev.of_node;
-		if (np && (of_get_property(np, "mellanox,multi-host", NULL) ||
-			   of_get_property(np, "mlx,multi-host", NULL)))
+		if (np && (of_property_read_bool(np, "mellanox,multi-host") ||
+			   of_property_read_bool(np, "mlx,multi-host")))
 			ndp->mlx_multi_host = true;
 	}
 

From fa0f1ba7c8233118b6fdaa65e2f5ded563d3e1fa Mon Sep 17 00:00:00 2001
From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Date: Wed, 15 Mar 2023 09:52:22 +0800
Subject: [PATCH 358/411] virtio_net: fix page_to_skb() miss headroom

Because headroom is not passed to page_to_skb(), this causes the shinfo
exceeds the range. Then the frags of shinfo are changed by other process.

[  157.724634] stack segment: 0000 [#1] PREEMPT SMP NOPTI
[  157.725358] CPU: 3 PID: 679 Comm: xdp_pass_user_f Tainted: G            E      6.2.0+ #150
[  157.726401] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/4
[  157.727820] RIP: 0010:skb_release_data+0x11b/0x180
[  157.728449] Code: 44 24 02 48 83 c3 01 39 d8 7e be 48 89 d8 48 c1 e0 04 41 80 7d 7e 00 49 8b 6c 04 30 79 0c 48 89 ef e8 89 b
[  157.730751] RSP: 0018:ffffc90000178b48 EFLAGS: 00010202
[  157.731383] RAX: 0000000000000010 RBX: 0000000000000001 RCX: 0000000000000000
[  157.732270] RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff888100dd0b00
[  157.733117] RBP: 5d5d76010f6e2408 R08: ffff888100dd0b2c R09: 0000000000000000
[  157.734013] R10: ffffffff82effd30 R11: 000000000000a14e R12: ffff88810981ffc0
[  157.734904] R13: ffff888100dd0b00 R14: 0000000000000002 R15: 0000000000002310
[  157.735793] FS:  00007f06121d9740(0000) GS:ffff88842fcc0000(0000) knlGS:0000000000000000
[  157.736794] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  157.737522] CR2: 00007ffd9a56c084 CR3: 0000000104bda001 CR4: 0000000000770ee0
[  157.738420] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  157.739283] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  157.740146] PKRU: 55555554
[  157.740502] Call Trace:
[  157.740843]  <IRQ>
[  157.741117]  kfree_skb_reason+0x50/0x120
[  157.741613]  __udp4_lib_rcv+0x52b/0x5e0
[  157.742132]  ip_protocol_deliver_rcu+0xaf/0x190
[  157.742715]  ip_local_deliver_finish+0x77/0xa0
[  157.743280]  ip_sublist_rcv_finish+0x80/0x90
[  157.743834]  ip_list_rcv_finish.constprop.0+0x16f/0x190
[  157.744493]  ip_list_rcv+0x126/0x140
[  157.744952]  __netif_receive_skb_list_core+0x29b/0x2c0
[  157.745602]  __netif_receive_skb_list+0xed/0x160
[  157.746190]  ? udp4_gro_receive+0x275/0x350
[  157.746732]  netif_receive_skb_list_internal+0xf2/0x1b0
[  157.747398]  napi_gro_receive+0xd1/0x210
[  157.747911]  virtnet_receive+0x75/0x1c0
[  157.748422]  virtnet_poll+0x48/0x1b0
[  157.748878]  __napi_poll+0x29/0x1b0
[  157.749330]  net_rx_action+0x27a/0x340
[  157.749812]  __do_softirq+0xf3/0x2fb
[  157.750298]  do_softirq+0xa2/0xd0
[  157.750745]  </IRQ>
[  157.751563]  <TASK>
[  157.752329]  __local_bh_enable_ip+0x6d/0x80
[  157.753178]  virtnet_xdp_set+0x482/0x860
[  157.754159]  ? __pfx_virtnet_xdp+0x10/0x10
[  157.755129]  dev_xdp_install+0xa4/0xe0
[  157.756033]  dev_xdp_attach+0x20b/0x5e0
[  157.756933]  do_setlink+0x82e/0xc90
[  157.757777]  ? __nla_validate_parse+0x12b/0x1e0
[  157.758744]  rtnl_setlink+0xd8/0x170
[  157.759549]  ? mod_objcg_state+0xcb/0x320
[  157.760328]  ? security_capable+0x37/0x60
[  157.761209]  ? security_capable+0x37/0x60
[  157.762072]  rtnetlink_rcv_msg+0x145/0x3d0
[  157.762929]  ? ___slab_alloc+0x327/0x610
[  157.763754]  ? __alloc_skb+0x141/0x170
[  157.764533]  ? __pfx_rtnetlink_rcv_msg+0x10/0x10
[  157.765422]  netlink_rcv_skb+0x58/0x110
[  157.766229]  netlink_unicast+0x21f/0x330
[  157.766951]  netlink_sendmsg+0x240/0x4a0
[  157.767654]  sock_sendmsg+0x93/0xa0
[  157.768434]  ? sockfd_lookup_light+0x12/0x70
[  157.769245]  __sys_sendto+0xfe/0x170
[  157.770079]  ? handle_mm_fault+0xe9/0x2d0
[  157.770859]  ? preempt_count_add+0x51/0xa0
[  157.771645]  ? up_read+0x3c/0x80
[  157.772340]  ? do_user_addr_fault+0x1e9/0x710
[  157.773166]  ? kvm_read_and_reset_apf_flags+0x49/0x60
[  157.774087]  __x64_sys_sendto+0x29/0x30
[  157.774856]  do_syscall_64+0x3c/0x90
[  157.775518]  entry_SYSCALL_64_after_hwframe+0x72/0xdc
[  157.776382] RIP: 0033:0x7f06122def70

Fixes: 18117a842ab0 ("virtio-net: remove xdp related info from page_to_skb()")
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 1a309cfb4976a..8ecf7a341d543 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -446,7 +446,8 @@ static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
 static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 				   struct receive_queue *rq,
 				   struct page *page, unsigned int offset,
-				   unsigned int len, unsigned int truesize)
+				   unsigned int len, unsigned int truesize,
+				   unsigned int headroom)
 {
 	struct sk_buff *skb;
 	struct virtio_net_hdr_mrg_rxbuf *hdr;
@@ -464,11 +465,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
 	else
 		hdr_padded_len = sizeof(struct padded_vnet_hdr);
 
-	buf = p;
+	buf = p - headroom;
 	len -= hdr_len;
 	offset += hdr_padded_len;
 	p += hdr_padded_len;
-	tailroom = truesize - hdr_padded_len - len;
+	tailroom = truesize - headroom  - hdr_padded_len - len;
 
 	shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
@@ -1009,7 +1010,7 @@ static struct sk_buff *receive_big(struct net_device *dev,
 {
 	struct page *page = buf;
 	struct sk_buff *skb =
-		page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
+		page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0);
 
 	stats->bytes += len - vi->hdr_len;
 	if (unlikely(!skb))
@@ -1332,7 +1333,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	rcu_read_unlock();
 
 skip_xdp:
-	head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
+	head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom);
 	curr_skb = head_skb;
 
 	if (unlikely(!curr_skb))

From 1a3bd6eabae35afc5c6dbe2651f21467cf8ad3fd Mon Sep 17 00:00:00 2001
From: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Date: Wed, 15 Mar 2023 09:52:23 +0800
Subject: [PATCH 359/411] virtio_net: free xdp shinfo frags when
 build_skb_from_xdp_buff() fails

build_skb_from_xdp_buff() may return NULL, in this case
we need to free the frags of xdp shinfo.

Fixes: fab89bafa95b ("virtio-net: support multi-buffer xdp")
Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/virtio_net.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 8ecf7a341d543..2396c28c01221 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -1273,9 +1273,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 
 		switch (act) {
 		case XDP_PASS:
+			head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz);
+			if (unlikely(!head_skb))
+				goto err_xdp_frags;
+
 			if (unlikely(xdp_page != page))
 				put_page(page);
-			head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz);
 			rcu_read_unlock();
 			return head_skb;
 		case XDP_TX:

From 7f5ebf5dae42e710162f1c481ebcf28ab7b741c7 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Wed, 15 Mar 2023 08:41:14 +0100
Subject: [PATCH 360/411] ravb: avoid PHY being resumed when interface is not
 up

RAVB doesn't need mdiobus suspend/resume, that's why it sets
'mac_managed_pm'. However, setting it needs to be moved from init to
probe, so mdiobus PM functions will really never be called (e.g. when
the interface is not up yet during suspend/resume).

Fixes: 4924c0cdce75 ("net: ravb: Fix PHY state warning splat during system resume")
Suggested-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
Reviewed-by: Sergey Shtylyov <s.shtylyov@omp.ru>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/renesas/ravb_main.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 0f54849a38235..894e2690c6437 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -1455,8 +1455,6 @@ static int ravb_phy_init(struct net_device *ndev)
 		phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_100baseT_Half_BIT);
 	}
 
-	/* Indicate that the MAC is responsible for managing PHY PM */
-	phydev->mac_managed_pm = true;
 	phy_attached_info(phydev);
 
 	return 0;
@@ -2379,6 +2377,8 @@ static int ravb_mdio_init(struct ravb_private *priv)
 {
 	struct platform_device *pdev = priv->pdev;
 	struct device *dev = &pdev->dev;
+	struct phy_device *phydev;
+	struct device_node *pn;
 	int error;
 
 	/* Bitbang init */
@@ -2400,6 +2400,14 @@ static int ravb_mdio_init(struct ravb_private *priv)
 	if (error)
 		goto out_free_bus;
 
+	pn = of_parse_phandle(dev->of_node, "phy-handle", 0);
+	phydev = of_phy_find_device(pn);
+	if (phydev) {
+		phydev->mac_managed_pm = true;
+		put_device(&phydev->mdio.dev);
+	}
+	of_node_put(pn);
+
 	return 0;
 
 out_free_bus:

From c6be7136afb224a01d4cde2983ddebac8da98693 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Wed, 15 Mar 2023 08:41:15 +0100
Subject: [PATCH 361/411] sh_eth: avoid PHY being resumed when interface is not
 up

SH_ETH doesn't need mdiobus suspend/resume, that's why it sets
'mac_managed_pm'. However, setting it needs to be moved from init to
probe, so mdiobus PM functions will really never be called (e.g. when
the interface is not up yet during suspend/resume).

Fixes: 6a1dbfefdae4 ("net: sh_eth: Fix PHY state warning splat during system resume")
Suggested-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
Reviewed-by: Sergey Shtylyov <s.shtylyov@omp.ru>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/renesas/sh_eth.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index ed17163d78114..d8ec729825be4 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -2029,8 +2029,6 @@ static int sh_eth_phy_init(struct net_device *ndev)
 	if (mdp->cd->register_type != SH_ETH_REG_GIGABIT)
 		phy_set_max_speed(phydev, SPEED_100);
 
-	/* Indicate that the MAC is responsible for managing PHY PM */
-	phydev->mac_managed_pm = true;
 	phy_attached_info(phydev);
 
 	return 0;
@@ -3097,6 +3095,8 @@ static int sh_mdio_init(struct sh_eth_private *mdp,
 	struct bb_info *bitbang;
 	struct platform_device *pdev = mdp->pdev;
 	struct device *dev = &mdp->pdev->dev;
+	struct phy_device *phydev;
+	struct device_node *pn;
 
 	/* create bit control struct for PHY */
 	bitbang = devm_kzalloc(dev, sizeof(struct bb_info), GFP_KERNEL);
@@ -3133,6 +3133,14 @@ static int sh_mdio_init(struct sh_eth_private *mdp,
 	if (ret)
 		goto out_free_bus;
 
+	pn = of_parse_phandle(dev->of_node, "phy-handle", 0);
+	phydev = of_phy_find_device(pn);
+	if (phydev) {
+		phydev->mac_managed_pm = true;
+		put_device(&phydev->mdio.dev);
+	}
+	of_node_put(pn);
+
 	return 0;
 
 out_free_bus:

From 8a2618e14f81604a9b6ad305d57e0c8da939cd65 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Wed, 15 Mar 2023 14:40:09 +0200
Subject: [PATCH 362/411] ipv4: Fix incorrect table ID in IOCTL path

Commit f96a3d74554d ("ipv4: Fix incorrect route flushing when source
address is deleted") started to take the table ID field in the FIB info
structure into account when determining if two structures are identical
or not. This field is initialized using the 'fc_table' field in the
route configuration structure, which is not set when adding a route via
IOCTL.

The above can result in user space being able to install two identical
routes that only differ in the table ID field of their associated FIB
info.

Fix by initializing the table ID field in the route configuration
structure in the IOCTL path.

Before the fix:

 # ip route add default via 192.0.2.2
 # route add default gw 192.0.2.2
 # ip -4 r show default
 # default via 192.0.2.2 dev dummy10
 # default via 192.0.2.2 dev dummy10

After the fix:

 # ip route add default via 192.0.2.2
 # route add default gw 192.0.2.2
 SIOCADDRT: File exists
 # ip -4 r show default
 default via 192.0.2.2 dev dummy10

Audited the code paths to ensure there are no other paths that do not
properly initialize the route configuration structure when installing a
route.

Fixes: 5a56a0b3a45d ("net: Don't delete routes in different VRFs")
Fixes: f96a3d74554d ("ipv4: Fix incorrect route flushing when source address is deleted")
Reported-by: gaoxingwang <gaoxingwang1@huawei.com>
Link: https://lore.kernel.org/netdev/20230314144159.2354729-1-gaoxingwang1@huawei.com/
Tested-by: gaoxingwang <gaoxingwang1@huawei.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20230315124009.4015212-1-idosch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/fib_frontend.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index b5736ef16ed2d..390f4be7f7bec 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -576,6 +576,9 @@ static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt,
 			cfg->fc_scope = RT_SCOPE_UNIVERSE;
 	}
 
+	if (!cfg->fc_table)
+		cfg->fc_table = RT_TABLE_MAIN;
+
 	if (cmd == SIOCDELRT)
 		return 0;
 

From 43ffe6caccc7a1bb9d7442fbab521efbf6c1378c Mon Sep 17 00:00:00 2001
From: Szymon Heidrich <szymon.heidrich@gmail.com>
Date: Thu, 16 Mar 2023 12:05:40 +0100
Subject: [PATCH 363/411] net: usb: smsc75xx: Move packet length check to
 prevent kernel panic in skb_pull

Packet length check needs to be located after size and align_count
calculation to prevent kernel panic in skb_pull() in case
rx_cmd_a & RX_CMD_A_RED evaluates to true.

Fixes: d8b228318935 ("net: usb: smsc75xx: Limit packet length to skb->len")
Signed-off-by: Szymon Heidrich <szymon.heidrich@gmail.com>
Link: https://lore.kernel.org/r/20230316110540.77531-1-szymon.heidrich@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/usb/smsc75xx.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index db34f8d1d6051..5d6454fedb3f1 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -2200,6 +2200,13 @@ static int smsc75xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 		size = (rx_cmd_a & RX_CMD_A_LEN) - RXW_PADDING;
 		align_count = (4 - ((size + RXW_PADDING) % 4)) % 4;
 
+		if (unlikely(size > skb->len)) {
+			netif_dbg(dev, rx_err, dev->net,
+				  "size err rx_cmd_a=0x%08x\n",
+				  rx_cmd_a);
+			return 0;
+		}
+
 		if (unlikely(rx_cmd_a & RX_CMD_A_RED)) {
 			netif_dbg(dev, rx_err, dev->net,
 				  "Error rx_cmd_a=0x%08x\n", rx_cmd_a);
@@ -2212,8 +2219,7 @@ static int smsc75xx_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 				dev->net->stats.rx_frame_errors++;
 		} else {
 			/* MAX_SINGLE_PACKET_SIZE + 4(CRC) + 2(COE) + 4(Vlan) */
-			if (unlikely(size > (MAX_SINGLE_PACKET_SIZE + ETH_HLEN + 12) ||
-				     size > skb->len)) {
+			if (unlikely(size > (MAX_SINGLE_PACKET_SIZE + ETH_HLEN + 12))) {
 				netif_dbg(dev, rx_err, dev->net,
 					  "size err rx_cmd_a=0x%08x\n",
 					  rx_cmd_a);

From 37d010399f7552add2b68e2b347901c83562dab8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Wed, 15 Mar 2023 13:55:38 +0100
Subject: [PATCH 364/411] net: atlantic: Fix crash when XDP is enabled but no
 program is loaded
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The aq_xdp_run_prog() function falls back to the XDP_ABORTED action
handler (using a goto) if the operations for any of the other actions fail.
The XDP_ABORTED handler in turn calls the bpf_warn_invalid_xdp_action()
tracepoint. However, the function also jumps into the XDP_PASS helper if no
XDP program is loaded on the device, which means the XDP_ABORTED handler
can be run with a NULL program pointer. This results in a NULL pointer
deref because the tracepoint dereferences the 'prog' pointer passed to it.

This situation can happen in multiple ways:
- If a packet arrives between the removal of the program from the interface
  and the static_branch_dec() in aq_xdp_setup()
- If there are multiple devices using the same driver in the system and
  one of them has an XDP program loaded and the other does not.

Fix this by refactoring the aq_xdp_run_prog() function to remove the 'goto
pass' handling if there is no XDP program loaded. Instead, factor out the
skb building in a separate small helper function.

Fixes: 26efaef759a1 ("net: atlantic: Implement xdp data plane")
Reported-by: Freysteinn Alfredsson <Freysteinn.Alfredsson@kau.se>
Tested-by: Freysteinn Alfredsson <Freysteinn.Alfredsson@kau.se>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/r/20230315125539.103319-1-toke@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../net/ethernet/aquantia/atlantic/aq_ring.c  | 28 ++++++++++++++-----
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index 1e8d902e1c8ea..7f933175cbdac 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -412,6 +412,25 @@ int aq_xdp_xmit(struct net_device *dev, int num_frames,
 	return num_frames - drop;
 }
 
+static struct sk_buff *aq_xdp_build_skb(struct xdp_buff *xdp,
+					struct net_device *dev,
+					struct aq_ring_buff_s *buff)
+{
+	struct xdp_frame *xdpf;
+	struct sk_buff *skb;
+
+	xdpf = xdp_convert_buff_to_frame(xdp);
+	if (unlikely(!xdpf))
+		return NULL;
+
+	skb = xdp_build_skb_from_frame(xdpf, dev);
+	if (!skb)
+		return NULL;
+
+	aq_get_rxpages_xdp(buff, xdp);
+	return skb;
+}
+
 static struct sk_buff *aq_xdp_run_prog(struct aq_nic_s *aq_nic,
 				       struct xdp_buff *xdp,
 				       struct aq_ring_s *rx_ring,
@@ -431,7 +450,7 @@ static struct sk_buff *aq_xdp_run_prog(struct aq_nic_s *aq_nic,
 
 	prog = READ_ONCE(rx_ring->xdp_prog);
 	if (!prog)
-		goto pass;
+		return aq_xdp_build_skb(xdp, aq_nic->ndev, buff);
 
 	prefetchw(xdp->data_hard_start); /* xdp_frame write */
 
@@ -442,17 +461,12 @@ static struct sk_buff *aq_xdp_run_prog(struct aq_nic_s *aq_nic,
 	act = bpf_prog_run_xdp(prog, xdp);
 	switch (act) {
 	case XDP_PASS:
-pass:
-		xdpf = xdp_convert_buff_to_frame(xdp);
-		if (unlikely(!xdpf))
-			goto out_aborted;
-		skb = xdp_build_skb_from_frame(xdpf, aq_nic->ndev);
+		skb = aq_xdp_build_skb(xdp, aq_nic->ndev, buff);
 		if (!skb)
 			goto out_aborted;
 		u64_stats_update_begin(&rx_ring->stats.rx.syncp);
 		++rx_ring->stats.rx.xdp_pass;
 		u64_stats_update_end(&rx_ring->stats.rx.syncp);
-		aq_get_rxpages_xdp(buff, xdp);
 		return skb;
 	case XDP_TX:
 		xdpf = xdp_convert_buff_to_frame(xdp);

From 3d87debb8ed2649608ff432699e7c961c0c6f03b Mon Sep 17 00:00:00 2001
From: Alexandra Winter <wintera@linux.ibm.com>
Date: Wed, 15 Mar 2023 14:14:35 +0100
Subject: [PATCH 365/411] net/iucv: Fix size of interrupt data

iucv_irq_data needs to be 4 bytes larger.
These bytes are not used by the iucv module, but written by
the z/VM hypervisor in case a CPU is deconfigured.

Reported as:
BUG dma-kmalloc-64 (Not tainted): kmalloc Redzone overwritten
-----------------------------------------------------------------------------
0x0000000000400564-0x0000000000400567 @offset=1380. First byte 0x80 instead of 0xcc
Allocated in iucv_cpu_prepare+0x44/0xd0 age=167839 cpu=2 pid=1
__kmem_cache_alloc_node+0x166/0x450
kmalloc_node_trace+0x3a/0x70
iucv_cpu_prepare+0x44/0xd0
cpuhp_invoke_callback+0x156/0x2f0
cpuhp_issue_call+0xf0/0x298
__cpuhp_setup_state_cpuslocked+0x136/0x338
__cpuhp_setup_state+0xf4/0x288
iucv_init+0xf4/0x280
do_one_initcall+0x78/0x390
do_initcalls+0x11a/0x140
kernel_init_freeable+0x25e/0x2a0
kernel_init+0x2e/0x170
__ret_from_fork+0x3c/0x58
ret_from_fork+0xa/0x40
Freed in iucv_init+0x92/0x280 age=167839 cpu=2 pid=1
__kmem_cache_free+0x308/0x358
iucv_init+0x92/0x280
do_one_initcall+0x78/0x390
do_initcalls+0x11a/0x140
kernel_init_freeable+0x25e/0x2a0
kernel_init+0x2e/0x170
__ret_from_fork+0x3c/0x58
ret_from_fork+0xa/0x40
Slab 0x0000037200010000 objects=32 used=30 fp=0x0000000000400640 flags=0x1ffff00000010200(slab|head|node=0|zone=0|
Object 0x0000000000400540 @offset=1344 fp=0x0000000000000000
Redzone  0000000000400500: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc  ................
Redzone  0000000000400510: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc  ................
Redzone  0000000000400520: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc  ................
Redzone  0000000000400530: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc  ................
Object   0000000000400540: 00 01 00 03 00 00 00 00 00 00 00 00 00 00 00 00  ................
Object   0000000000400550: f3 86 81 f2 f4 82 f8 82 f0 f0 f0 f0 f0 f0 f0 f2  ................
Object   0000000000400560: 00 00 00 00 80 00 00 00 cc cc cc cc cc cc cc cc  ................
Object   0000000000400570: cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc cc  ................
Redzone  0000000000400580: cc cc cc cc cc cc cc cc                          ........
Padding  00000000004005d4: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a  ZZZZZZZZZZZZZZZZ
Padding  00000000004005e4: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a  ZZZZZZZZZZZZZZZZ
Padding  00000000004005f4: 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a 5a              ZZZZZZZZZZZZ
CPU: 6 PID: 121030 Comm: 116-pai-crypto. Not tainted 6.3.0-20230221.rc0.git4.99b8246b2d71.300.fc37.s390x+debug #1
Hardware name: IBM 3931 A01 704 (z/VM 7.3.0)
Call Trace:
[<000000032aa034ec>] dump_stack_lvl+0xac/0x100
[<0000000329f5a6cc>] check_bytes_and_report+0x104/0x140
[<0000000329f5aa78>] check_object+0x370/0x3c0
[<0000000329f5ede6>] free_debug_processing+0x15e/0x348
[<0000000329f5f06a>] free_to_partial_list+0x9a/0x2f0
[<0000000329f5f4a4>] __slab_free+0x1e4/0x3a8
[<0000000329f61768>] __kmem_cache_free+0x308/0x358
[<000000032a91465c>] iucv_cpu_dead+0x6c/0x88
[<0000000329c2fc66>] cpuhp_invoke_callback+0x156/0x2f0
[<000000032aa062da>] _cpu_down.constprop.0+0x22a/0x5e0
[<0000000329c3243e>] cpu_device_down+0x4e/0x78
[<000000032a61dee0>] device_offline+0xc8/0x118
[<000000032a61e048>] online_store+0x60/0xe0
[<000000032a08b6b0>] kernfs_fop_write_iter+0x150/0x1e8
[<0000000329fab65c>] vfs_write+0x174/0x360
[<0000000329fab9fc>] ksys_write+0x74/0x100
[<000000032aa03a5a>] __do_syscall+0x1da/0x208
[<000000032aa177b2>] system_call+0x82/0xb0
INFO: lockdep is turned off.
FIX dma-kmalloc-64: Restoring kmalloc Redzone 0x0000000000400564-0x0000000000400567=0xcc
FIX dma-kmalloc-64: Object at 0x0000000000400540 not freed

Fixes: 2356f4cb1911 ("[S390]: Rewrite of the IUCV base code, part 2")
Signed-off-by: Alexandra Winter <wintera@linux.ibm.com>
Link: https://lore.kernel.org/r/20230315131435.4113889-1-wintera@linux.ibm.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/iucv/iucv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index eb0295d900395..fc3fddeb6f36d 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -83,7 +83,7 @@ struct iucv_irq_data {
 	u16 ippathid;
 	u8  ipflags1;
 	u8  iptype;
-	u32 res2[8];
+	u32 res2[9];
 };
 
 struct iucv_irq_list {

From f38373345c65529639a01fba3675eb8cb4c579c3 Mon Sep 17 00:00:00 2001
From: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Date: Wed, 15 Mar 2023 14:41:17 +0100
Subject: [PATCH 366/411] i825xx: sni_82596: use eth_hw_addr_set()

netdev->dev_addr is now const, we can't write to it directly.
Copy scrambled mac address octects into an array then eth_hw_addr_set().

Fixes: adeef3e32146 ("net: constify netdev->dev_addr")
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
Link: https://lore.kernel.org/r/20230315134117.79511-1-tsbogend@alpha.franken.de
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/i825xx/sni_82596.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/i825xx/sni_82596.c b/drivers/net/ethernet/i825xx/sni_82596.c
index daec9ce04531b..54bb4d9a0d1ea 100644
--- a/drivers/net/ethernet/i825xx/sni_82596.c
+++ b/drivers/net/ethernet/i825xx/sni_82596.c
@@ -78,6 +78,7 @@ static int sni_82596_probe(struct platform_device *dev)
 	void __iomem *mpu_addr;
 	void __iomem *ca_addr;
 	u8 __iomem *eth_addr;
+	u8 mac[ETH_ALEN];
 
 	res = platform_get_resource(dev, IORESOURCE_MEM, 0);
 	ca = platform_get_resource(dev, IORESOURCE_MEM, 1);
@@ -109,12 +110,13 @@ static int sni_82596_probe(struct platform_device *dev)
 		goto probe_failed;
 
 	/* someone seems to like messed up stuff */
-	netdevice->dev_addr[0] = readb(eth_addr + 0x0b);
-	netdevice->dev_addr[1] = readb(eth_addr + 0x0a);
-	netdevice->dev_addr[2] = readb(eth_addr + 0x09);
-	netdevice->dev_addr[3] = readb(eth_addr + 0x08);
-	netdevice->dev_addr[4] = readb(eth_addr + 0x07);
-	netdevice->dev_addr[5] = readb(eth_addr + 0x06);
+	mac[0] = readb(eth_addr + 0x0b);
+	mac[1] = readb(eth_addr + 0x0a);
+	mac[2] = readb(eth_addr + 0x09);
+	mac[3] = readb(eth_addr + 0x08);
+	mac[4] = readb(eth_addr + 0x07);
+	mac[5] = readb(eth_addr + 0x06);
+	eth_hw_addr_set(netdevice, mac);
 	iounmap(eth_addr);
 
 	if (netdevice->irq < 0) {

From 24994513ad13ff2c47ba91d2b5df82c3d496c370 Mon Sep 17 00:00:00 2001
From: Po-Hsu Lin <po-hsu.lin@canonical.com>
Date: Thu, 16 Mar 2023 00:53:53 +0800
Subject: [PATCH 367/411] selftests: net: devlink_port_split.py: skip test if
 no suitable device available

The `devlink -j port show` command output may not contain the "flavour"
key, an example from Ubuntu 22.10 s390x LPAR(5.19.0-37-generic), with
mlx4 driver and iproute2-5.15.0:
  {"port":{"pci/0001:00:00.0/1":{"type":"eth","netdev":"ens301"},
           "pci/0001:00:00.0/2":{"type":"eth","netdev":"ens301d1"},
           "pci/0002:00:00.0/1":{"type":"eth","netdev":"ens317"},
           "pci/0002:00:00.0/2":{"type":"eth","netdev":"ens317d1"}}}

This will cause a KeyError exception.

Create a validate_devlink_output() to check for this "flavour" from
devlink command output to avoid this KeyError exception. Also let
it handle the check for `devlink -j dev show` output in main().

Apart from this, if the test was not started because the max lanes of
the designated device is 0. The script will still return 0 and thus
causing a false-negative test result.

Use a found_max_lanes flag to determine if these tests were skipped
due to this reason and return KSFT_SKIP to make it more clear.

Link: https://bugs.launchpad.net/bugs/1937133
Fixes: f3348a82e727 ("selftests: net: Add port split test")
Signed-off-by: Po-Hsu Lin <po-hsu.lin@canonical.com>
Link: https://lore.kernel.org/r/20230315165353.229590-1-po-hsu.lin@canonical.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/net/devlink_port_split.py       | 36 ++++++++++++++++---
 1 file changed, 31 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py
index 2b5d6ff873738..2d84c7a0be6b2 100755
--- a/tools/testing/selftests/net/devlink_port_split.py
+++ b/tools/testing/selftests/net/devlink_port_split.py
@@ -59,6 +59,8 @@ def get_if_names(dev):
         assert stderr == ""
         ports = json.loads(stdout)['port']
 
+        validate_devlink_output(ports, 'flavour')
+
         for port in ports:
             if dev in port:
                 if ports[port]['flavour'] == 'physical':
@@ -220,6 +222,27 @@ def split_splittable_port(port, k, lanes, dev):
     unsplit(port.bus_info)
 
 
+def validate_devlink_output(devlink_data, target_property=None):
+    """
+    Determine if test should be skipped by checking:
+      1. devlink_data contains values
+      2. The target_property exist in devlink_data
+    """
+    skip_reason = None
+    if any(devlink_data.values()):
+        if target_property:
+            skip_reason = "{} not found in devlink output, test skipped".format(target_property)
+            for key in devlink_data:
+                if target_property in devlink_data[key]:
+                    skip_reason = None
+    else:
+        skip_reason = 'devlink output is empty, test skipped'
+
+    if skip_reason:
+        print(skip_reason)
+        sys.exit(KSFT_SKIP)
+
+
 def make_parser():
     parser = argparse.ArgumentParser(description='A test for port splitting.')
     parser.add_argument('--dev',
@@ -240,12 +263,9 @@ def main(cmdline=None):
         stdout, stderr = run_command(cmd)
         assert stderr == ""
 
+        validate_devlink_output(json.loads(stdout))
         devs = json.loads(stdout)['dev']
-        if devs:
-            dev = list(devs.keys())[0]
-        else:
-            print("no devlink device was found, test skipped")
-            sys.exit(KSFT_SKIP)
+        dev = list(devs.keys())[0]
 
     cmd = "devlink dev show %s" % dev
     stdout, stderr = run_command(cmd)
@@ -255,6 +275,7 @@ def main(cmdline=None):
 
     ports = devlink_ports(dev)
 
+    found_max_lanes = False
     for port in ports.if_names:
         max_lanes = get_max_lanes(port.name)
 
@@ -277,6 +298,11 @@ def main(cmdline=None):
                 split_splittable_port(port, lane, max_lanes, dev)
 
                 lane //= 2
+        found_max_lanes = True
+
+    if not found_max_lanes:
+        print(f"Test not started, no port of device {dev} reports max_lanes")
+        sys.exit(KSFT_SKIP)
 
 
 if __name__ == "__main__":

From 470efd68a4653d9819d391489886432cd31bcd0b Mon Sep 17 00:00:00 2001
From: Daniil Tatianin <d-tatianin@yandex-team.ru>
Date: Wed, 15 Mar 2023 22:46:18 +0300
Subject: [PATCH 368/411] qed/qed_mng_tlv: correctly zero out ->min instead of
 ->hour

This fixes an issue where ->hour would erroneously get zeroed out
instead of ->min because of a bad copy paste.

Found by Linux Verification Center (linuxtesting.org) with the SVACE
static analysis tool.

Fixes: f240b6882211 ("qed: Add support for processing fcoe tlv request.")
Signed-off-by: Daniil Tatianin <d-tatianin@yandex-team.ru>
Link: https://lore.kernel.org/r/20230315194618.579286-1-d-tatianin@yandex-team.ru
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c b/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c
index 6190adf965bca..f55eed092f25d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mng_tlv.c
@@ -422,7 +422,7 @@ qed_mfw_get_tlv_time_value(struct qed_mfw_tlv_time *p_time,
 	if (p_time->hour > 23)
 		p_time->hour = 0;
 	if (p_time->min > 59)
-		p_time->hour = 0;
+		p_time->min = 0;
 	if (p_time->msec > 999)
 		p_time->msec = 0;
 	if (p_time->usec > 999)

From 1b0120e4db0bf2838d1ce741195ce4b7cc100b91 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Wed, 15 Mar 2023 21:25:17 +0100
Subject: [PATCH 369/411] hsr: ratelimit only when errors are printed

Recently, when automatically merging -net and net-next in MPTCP devel
tree, our CI reported [1] a conflict in hsr, the same as the one
reported by Stephen in netdev [2].

When looking at the conflict, I noticed it is in fact the v1 [3] that
has been applied in -net and the v2 [4] in net-next. Maybe the v1 was
applied by accident.

As mentioned by Jakub Kicinski [5], the new condition makes more sense
before the net_ratelimit(), not to update net_ratelimit's state which is
unnecessary if we're not going to print either way.

Here, this modification applies the v2 but in -net.

Link: https://github.com/multipath-tcp/mptcp_net-next/actions/runs/4423171069 [1]
Link: https://lore.kernel.org/netdev/20230315100914.53fc1760@canb.auug.org.au/ [2]
Link: https://lore.kernel.org/netdev/20230307133229.127442-1-koverskeid@gmail.com/ [3]
Link: https://lore.kernel.org/netdev/20230309092302.179586-1-koverskeid@gmail.com/ [4]
Link: https://lore.kernel.org/netdev/20230308232001.2fb62013@kernel.org/ [5]
Fixes: 28e8cabe80f3 ("net: hsr: Don't log netdev_err message on unknown prp dst node")
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Reviewed-by: Steen Hegelund <Steen.Hegelund@microchip.com>
Link: https://lore.kernel.org/r/20230315-net-20230315-hsr_framereg-ratelimit-v1-1-61d2ef176d11@tessares.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/hsr/hsr_framereg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 865eda39d6014..b77f1189d19d1 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -415,7 +415,7 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb,
 	node_dst = find_node_by_addr_A(&port->hsr->node_db,
 				       eth_hdr(skb)->h_dest);
 	if (!node_dst) {
-		if (net_ratelimit() && port->hsr->prot_version != PRP_V1)
+		if (port->hsr->prot_version != PRP_V1 && net_ratelimit())
 			netdev_err(skb->dev, "%s: Unknown node\n", __func__);
 		return;
 	}

From 054abb515f346b8f30a0a11953d9f786d3e76813 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 15 Mar 2023 16:03:49 -0700
Subject: [PATCH 370/411] tools: ynl: make definitions optional again

definitions are optional, commit in question breaks cli for ethtool.

Fixes: 6517a60b0307 ("tools: ynl: move the enum classes to shared code")
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/lib/nlspec.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py
index 960a356e8225e..e01a72d066384 100644
--- a/tools/net/ynl/lib/nlspec.py
+++ b/tools/net/ynl/lib/nlspec.py
@@ -387,7 +387,8 @@ def _dictify_ops_directional(self):
     def resolve(self):
         self.resolve_up(super())
 
-        for elem in self.yaml['definitions']:
+        definitions = self.yaml.get('definitions', [])
+        for elem in definitions:
             if elem['type'] == 'enum' or elem['type'] == 'flags':
                 self.consts[elem['name']] = self.new_enum(elem)
             else:

From 4e16b6a748df52800c90f3ab181aef8129bd332f Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 15 Mar 2023 16:03:50 -0700
Subject: [PATCH 371/411] ynl: broaden the license even more

I relicensed Netlink spec code to GPL-2.0 OR BSD-3-Clause but
we still put a slightly different license on the uAPI header
than the rest of the code. Use the Linux-syscall-note on all
the specs and all generated code. It's moot for kernel code,
but should not hurt. This way the licenses match everywhere.

Cc: Chuck Lever <chuck.lever@oracle.com>
Fixes: 37d9df224d1e ("ynl: re-license uniformly under GPL-2.0 OR BSD-3-Clause")
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/genetlink-c.yaml        | 2 +-
 Documentation/netlink/genetlink-legacy.yaml   | 2 +-
 Documentation/netlink/genetlink.yaml          | 2 +-
 Documentation/netlink/specs/ethtool.yaml      | 2 +-
 Documentation/netlink/specs/fou.yaml          | 2 +-
 Documentation/netlink/specs/netdev.yaml       | 2 +-
 Documentation/userspace-api/netlink/specs.rst | 3 ++-
 include/uapi/linux/fou.h                      | 2 +-
 include/uapi/linux/netdev.h                   | 2 +-
 net/core/netdev-genl-gen.c                    | 2 +-
 net/core/netdev-genl-gen.h                    | 2 +-
 net/ipv4/fou_nl.c                             | 2 +-
 net/ipv4/fou_nl.h                             | 2 +-
 tools/include/uapi/linux/netdev.h             | 2 +-
 tools/net/ynl/ynl-gen-c.py                    | 8 ++++----
 15 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml
index f082a5ad7cf1d..5c3642b3f802d 100644
--- a/Documentation/netlink/genetlink-c.yaml
+++ b/Documentation/netlink/genetlink-c.yaml
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
 %YAML 1.2
 ---
 $id: http://kernel.org/schemas/netlink/genetlink-c.yaml#
diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml
index c6b8c77f7d12e..5e98c6d2b9aae 100644
--- a/Documentation/netlink/genetlink-legacy.yaml
+++ b/Documentation/netlink/genetlink-legacy.yaml
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
 %YAML 1.2
 ---
 $id: http://kernel.org/schemas/netlink/genetlink-legacy.yaml#
diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml
index b2d56ab9e615d..d35dcd6f8d82a 100644
--- a/Documentation/netlink/genetlink.yaml
+++ b/Documentation/netlink/genetlink.yaml
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
 %YAML 1.2
 ---
 $id: http://kernel.org/schemas/netlink/genetlink-legacy.yaml#
diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml
index 18ecb7d90cbe5..4727c067e2ba3 100644
--- a/Documentation/netlink/specs/ethtool.yaml
+++ b/Documentation/netlink/specs/ethtool.yaml
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
 
 name: ethtool
 
diff --git a/Documentation/netlink/specs/fou.yaml b/Documentation/netlink/specs/fou.yaml
index cff104288723b..3e13826a3fdf1 100644
--- a/Documentation/netlink/specs/fou.yaml
+++ b/Documentation/netlink/specs/fou.yaml
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
 
 name: fou
 
diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml
index 753e5914a8b7c..b99e7ffef7a15 100644
--- a/Documentation/netlink/specs/netdev.yaml
+++ b/Documentation/netlink/specs/netdev.yaml
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
 
 name: netdev
 
diff --git a/Documentation/userspace-api/netlink/specs.rst b/Documentation/userspace-api/netlink/specs.rst
index 2122e0c4a3998..a22442ba1d30b 100644
--- a/Documentation/userspace-api/netlink/specs.rst
+++ b/Documentation/userspace-api/netlink/specs.rst
@@ -24,7 +24,8 @@ YAML specifications can be found under ``Documentation/netlink/specs/``
 This document describes details of the schema.
 See :doc:`intro-specs` for a practical starting guide.
 
-All specs must be licensed under ``GPL-2.0-only OR BSD-3-Clause``
+All specs must be licensed under
+``((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)``
 to allow for easy adoption in user space code.
 
 Compatibility levels
diff --git a/include/uapi/linux/fou.h b/include/uapi/linux/fou.h
index 5041c35984938..b5cd3e7b3775a 100644
--- a/include/uapi/linux/fou.h
+++ b/include/uapi/linux/fou.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
 /* Do not edit directly, auto-generated from: */
 /*	Documentation/netlink/specs/fou.yaml */
 /* YNL-GEN uapi header */
diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h
index ed134fbdfd32d..639524b59930b 100644
--- a/include/uapi/linux/netdev.h
+++ b/include/uapi/linux/netdev.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
 /* Do not edit directly, auto-generated from: */
 /*	Documentation/netlink/specs/netdev.yaml */
 /* YNL-GEN uapi header */
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
index 9e10802587fc3..3abab70d66ddc 100644
--- a/net/core/netdev-genl-gen.c
+++ b/net/core/netdev-genl-gen.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
 /* Do not edit directly, auto-generated from: */
 /*	Documentation/netlink/specs/netdev.yaml */
 /* YNL-GEN kernel source */
diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h
index 2c5fc7d1e8a74..74d74fc231679 100644
--- a/net/core/netdev-genl-gen.h
+++ b/net/core/netdev-genl-gen.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
 /* Do not edit directly, auto-generated from: */
 /*	Documentation/netlink/specs/netdev.yaml */
 /* YNL-GEN kernel header */
diff --git a/net/ipv4/fou_nl.c b/net/ipv4/fou_nl.c
index 5c14fe030eda5..6c37c4f98ccab 100644
--- a/net/ipv4/fou_nl.c
+++ b/net/ipv4/fou_nl.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
 /* Do not edit directly, auto-generated from: */
 /*	Documentation/netlink/specs/fou.yaml */
 /* YNL-GEN kernel source */
diff --git a/net/ipv4/fou_nl.h b/net/ipv4/fou_nl.h
index 58b1e1ed4b3bb..dbd0780a5d34c 100644
--- a/net/ipv4/fou_nl.h
+++ b/net/ipv4/fou_nl.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
 /* Do not edit directly, auto-generated from: */
 /*	Documentation/netlink/specs/fou.yaml */
 /* YNL-GEN kernel header */
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index ed134fbdfd32d..639524b59930b 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
 /* Do not edit directly, auto-generated from: */
 /*	Documentation/netlink/specs/netdev.yaml */
 /* YNL-GEN uapi header */
diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index d47376f19de72..3b4d03a50fc1b 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
 
 import argparse
 import collections
@@ -2068,12 +2068,12 @@ def main():
 
     _, spec_kernel = find_kernel_root(args.spec)
     if args.mode == 'uapi':
-        cw.p('/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause */')
+        cw.p('/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */')
     else:
         if args.header:
-            cw.p('/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */')
+            cw.p('/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */')
         else:
-            cw.p('// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause')
+            cw.p('// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)')
     cw.p("/* Do not edit directly, auto-generated from: */")
     cw.p(f"/*\t{spec_kernel} */")
     cw.p(f"/* YNL-GEN {args.mode} {'header' if args.header else 'source'} */")

From cfab77c0b54583816faac5f9abdf588c13895a9d Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 15 Mar 2023 16:03:51 -0700
Subject: [PATCH 372/411] ynl: make the tooling check the license

The (only recently documented) expectation is that all specs
are under a certain license, but we don't actually enforce it.
What's worse we then go ahead and assume the license was right,
outputting the expected license into generated files.

Fixes: 37d9df224d1e ("ynl: re-license uniformly under GPL-2.0 OR BSD-3-Clause")
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/net/ynl/lib/nlspec.py |  8 ++++++++
 tools/net/ynl/ynl-gen-c.py  | 13 +++++++------
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py
index e01a72d066384..d04450c2a44af 100644
--- a/tools/net/ynl/lib/nlspec.py
+++ b/tools/net/ynl/lib/nlspec.py
@@ -274,6 +274,7 @@ class SpecFamily(SpecElement):
 
     Attributes:
         proto     protocol type (e.g. genetlink)
+        license   spec license (loaded from an SPDX tag on the spec)
 
         attr_sets  dict of attribute sets
         msgs       dict of all messages (index by name)
@@ -283,6 +284,13 @@ class SpecFamily(SpecElement):
     """
     def __init__(self, spec_path, schema_path=None):
         with open(spec_path, "r") as stream:
+            prefix = '# SPDX-License-Identifier: '
+            first = stream.readline().strip()
+            if not first.startswith(prefix):
+                raise Exception('SPDX license tag required in the spec')
+            self.license = first[len(prefix):]
+
+            stream.seek(0)
             spec = yaml.safe_load(stream)
 
         self._resolution_list = []
diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index 3b4d03a50fc1b..c16671a02621b 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -2059,6 +2059,10 @@ def main():
 
     try:
         parsed = Family(args.spec)
+        if parsed.license != '((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)':
+            print('Spec license:', parsed.license)
+            print('License must be: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)')
+            os.sys.exit(1)
     except yaml.YAMLError as exc:
         print(exc)
         os.sys.exit(1)
@@ -2067,13 +2071,10 @@ def main():
     cw = CodeWriter(BaseNlLib(), out_file)
 
     _, spec_kernel = find_kernel_root(args.spec)
-    if args.mode == 'uapi':
-        cw.p('/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */')
+    if args.mode == 'uapi' or args.header:
+        cw.p(f'/* SPDX-License-Identifier: {parsed.license} */')
     else:
-        if args.header:
-            cw.p('/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */')
-        else:
-            cw.p('// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)')
+        cw.p(f'// SPDX-License-Identifier: {parsed.license}')
     cw.p("/* Do not edit directly, auto-generated from: */")
     cw.p(f"/*\t{spec_kernel} */")
     cw.p(f"/* YNL-GEN {args.mode} {'header' if args.header else 'source'} */")

From 5ae06327a3a5bad4ee246d81df203b1b00a7b390 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Thu, 16 Mar 2023 01:19:16 +0200
Subject: [PATCH 373/411] net: dsa: microchip: fix RGMII delay configuration on
 KSZ8765/KSZ8794/KSZ8795

The blamed commit has replaced a ksz_write8() call to address
REG_PORT_5_CTRL_6 (0x56) with a ksz_set_xmii() -> ksz_pwrite8() call to
regs[P_XMII_CTRL_1], which is also defined as 0x56 for ksz8795_regs[].

The trouble is that, when compared to ksz_write8(), ksz_pwrite8() also
adjusts the register offset with the port base address. So in reality,
ksz_pwrite8(offset=0x56) accesses register 0x56 + 0x50 = 0xa6, which in
this switch appears to be unmapped, and the RGMII delay configuration on
the CPU port does nothing.

So if the switch wasn't fine with the RGMII delay configuration done
through pin strapping and relied on Linux to apply a different one in
order to pass traffic, this is now broken.

Using the offset translation logic imposed by ksz_pwrite8(), the correct
value for regs[P_XMII_CTRL_1] should have been 0x6 on ksz8795_regs[], in
order to really end up accessing register 0x56.

Static code analysis shows that, despite there being multiple other
accesses to regs[P_XMII_CTRL_1] in this driver, the only code path that
is applicable to ksz8795_regs[] and ksz8_dev_ops is ksz_set_xmii().
Therefore, the problem is isolated to RGMII delays.

In its current form, ksz8795_regs[] contains the same value for
P_XMII_CTRL_0 and for P_XMII_CTRL_1, and this raises valid suspicions
that writes made by the driver to regs[P_XMII_CTRL_0] might overwrite
writes made to regs[P_XMII_CTRL_1] or vice versa.

Again, static analysis shows that the only accesses to P_XMII_CTRL_0
from the driver are made from code paths which are not reachable with
ksz8_dev_ops. So the accesses made by ksz_set_xmii() are safe for this
switch family.

[ vladimiroltean: rewrote commit message ]

Fixes: c476bede4b0f ("net: dsa: microchip: ksz8795: use common xmii function")
Signed-off-by: Marek Vasut <marex@denx.de>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Arun Ramadoss <arun.ramadoss@microchip.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/20230315231916.2998480-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/microchip/ksz_common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 729b36eeb2c46..7fc2155d93d6e 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -319,7 +319,7 @@ static const u16 ksz8795_regs[] = {
 	[S_BROADCAST_CTRL]		= 0x06,
 	[S_MULTICAST_CTRL]		= 0x04,
 	[P_XMII_CTRL_0]			= 0x06,
-	[P_XMII_CTRL_1]			= 0x56,
+	[P_XMII_CTRL_1]			= 0x06,
 };
 
 static const u32 ksz8795_masks[] = {

From 8de2bd02439eb839a452a853c1004c2c45ff6fef Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Thu, 16 Mar 2023 11:37:52 +0800
Subject: [PATCH 374/411] Revert "net/sched: act_api: move TCA_EXT_WARN_MSG to
 the correct hierarchy"

This reverts commit 923b2e30dc9cd05931da0f64e2e23d040865c035.

This is not a correct fix as TCA_EXT_WARN_MSG is not a hierarchy to
TCA_ACT_TAB. I didn't notice the TC actions use different enum when adding
TCA_EXT_WARN_MSG. To fix the difference I will add a new WARN enum in
TCA_ROOT_MAX as Jamal suggested.

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/sched/act_api.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 34c5086750416..fce5228860990 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1596,12 +1596,12 @@ static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[],
 	if (tcf_action_dump(skb, actions, bind, ref, false) < 0)
 		goto out_nlmsg_trim;
 
+	nla_nest_end(skb, nest);
+
 	if (extack && extack->_msg &&
 	    nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
 		goto out_nlmsg_trim;
 
-	nla_nest_end(skb, nest);
-
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 
 	return skb->len;

From 2f59823fe696caa844249a90bb3f9aeda69cfe5c Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Thu, 16 Mar 2023 11:37:53 +0800
Subject: [PATCH 375/411] net/sched: act_api: add specific EXT_WARN_MSG for tc
 action

In my previous commit 0349b8779cc9 ("sched: add new attr TCA_EXT_WARN_MSG
to report tc extact message") I didn't notice the tc action use different
enum with filter. So we can't use TCA_EXT_WARN_MSG directly for tc action.
Let's add a TCA_ROOT_EXT_WARN_MSG for tc action specifically and put this
param before going to the TCA_ACT_TAB nest.

Fixes: 0349b8779cc9 ("sched: add new attr TCA_EXT_WARN_MSG to report tc extact message")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/rtnetlink.h | 1 +
 net/sched/act_api.c            | 8 ++++----
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 25a0af57dd5ed..51c13cf9c5aee 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -789,6 +789,7 @@ enum {
 	TCA_ROOT_FLAGS,
 	TCA_ROOT_COUNT,
 	TCA_ROOT_TIME_DELTA, /* in msecs */
+	TCA_ROOT_EXT_WARN_MSG,
 	__TCA_ROOT_MAX,
 #define	TCA_ROOT_MAX (__TCA_ROOT_MAX - 1)
 };
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index fce5228860990..296fc1afedd82 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1589,6 +1589,10 @@ static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[],
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
 
+	if (extack && extack->_msg &&
+	    nla_put_string(skb, TCA_ROOT_EXT_WARN_MSG, extack->_msg))
+		goto out_nlmsg_trim;
+
 	nest = nla_nest_start_noflag(skb, TCA_ACT_TAB);
 	if (!nest)
 		goto out_nlmsg_trim;
@@ -1598,10 +1602,6 @@ static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[],
 
 	nla_nest_end(skb, nest);
 
-	if (extack && extack->_msg &&
-	    nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
-		goto out_nlmsg_trim;
-
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 
 	return skb->len;

From 769639c1fe8a98129aa97c8ee981639db1e8955c Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 16 Mar 2023 15:02:34 -0700
Subject: [PATCH 376/411] net: xdp: don't call notifiers during driver init

Drivers will commonly perform feature setting during init, if they use
the xdp_set_features_flag() helper they'll likely run into an ASSERT_RTNL()
inside call_netdevice_notifiers_info().

Don't call the notifier until the device is actually registered.
Nothing should be tracking the device until its registered and
after its unregistration has started.

Fixes: 4d5ab0ad964d ("net/mlx5e: take into account device reconfiguration for xdp_features flag")
Link: https://lore.kernel.org/r/20230316220234.598091-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/core/xdp.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/core/xdp.c b/net/core/xdp.c
index 87e654b7d06c1..b5737e47ec412 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -781,7 +781,9 @@ void xdp_set_features_flag(struct net_device *dev, xdp_features_t val)
 		return;
 
 	dev->xdp_features = val;
-	call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev);
+
+	if (dev->reg_state == NETREG_REGISTERED)
+		call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev);
 }
 EXPORT_SYMBOL_GPL(xdp_set_features_flag);
 

From dd172d0c2cea3c14ca9d007eeab51bec7676ece7 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Thu, 16 Mar 2023 09:51:33 -0500
Subject: [PATCH 377/411] net: ipa: reg: include <linux/bug.h>

When "reg.h" got created, it included calls to WARN() and WARN_ON().
Those macros are defined via <linux/bug.h>.  In addition, it uses
is_power_of_2(), which is defined in <linux/log2.h>.  Include those
files so IPA "reg.h" has access to all definitions it requires.

Meanwhile, <linux/bits.h> is included but nothing defined therein
is required directly in "reg.h", so get rid of that.

Fixes: 81772e444dbe ("net: ipa: start generalizing "ipa_reg"")
Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ipa/reg.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ipa/reg.h b/drivers/net/ipa/reg.h
index 57b457f39b6e2..2ee07eebca677 100644
--- a/drivers/net/ipa/reg.h
+++ b/drivers/net/ipa/reg.h
@@ -6,7 +6,8 @@
 #define _REG_H_
 
 #include <linux/types.h>
-#include <linux/bits.h>
+#include <linux/log2.h>
+#include <linux/bug.h>
 
 /**
  * struct reg - A register descriptor

From 55c49e5c94411a82a0ba06dc615ed12b6387dec5 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Thu, 16 Mar 2023 09:51:34 -0500
Subject: [PATCH 378/411] net: ipa: add two missing declarations

When gsi_reg_init() got added, its declaration was added to
"gsi_reg.h" without declaring the two struct pointer types it uses.
Add these struct declarations to "gsi_reg.h".

Fixes: 3c506add35c7 ("net: ipa: introduce gsi_reg_init()")
Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ipa/gsi_reg.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ipa/gsi_reg.h b/drivers/net/ipa/gsi_reg.h
index f62f0a5c653d1..48fde65fa2e8a 100644
--- a/drivers/net/ipa/gsi_reg.h
+++ b/drivers/net/ipa/gsi_reg.h
@@ -10,6 +10,10 @@
 
 #include <linux/bits.h>
 
+struct platform_device;
+
+struct gsi;
+
 /**
  * DOC: GSI Registers
  *

From 786bbe50e1d5777f0b6ec7b4c2de6189d6f7feb4 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Thu, 16 Mar 2023 09:51:35 -0500
Subject: [PATCH 379/411] net: ipa: kill FILT_ROUT_CACHE_CFG IPA register

A recent commit defined a few IPA registers used for IPA v5.0+.
One of those was a mistake.  Although the filter and router caches
get *flushed* using a single register, they use distinct registers
(ENDP_FILTER_CACHE_CFG and ENDP_ROUTER_CACHE_CFG) for configuration.

And although there *exists* a FILT_ROUT_CACHE_CFG register, it is
not needed in upstream code.  So get rid of definitions related to
FILT_ROUT_CACHE_CFG, because they are not needed.

Fixes: 8ba59716d16a ("net: ipa: define IPA v5.0+ registers")
Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ipa/ipa_reg.c | 4 ++--
 drivers/net/ipa/ipa_reg.h | 9 ---------
 2 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ipa/ipa_reg.c b/drivers/net/ipa/ipa_reg.c
index 735fa65916097..463a31dfa9f47 100644
--- a/drivers/net/ipa/ipa_reg.c
+++ b/drivers/net/ipa/ipa_reg.c
@@ -39,7 +39,8 @@ static bool ipa_reg_id_valid(struct ipa *ipa, enum ipa_reg_id reg_id)
 		return version <= IPA_VERSION_3_1;
 
 	case ENDP_FILTER_ROUTER_HSH_CFG:
-		return version != IPA_VERSION_4_2;
+		return version < IPA_VERSION_5_0 &&
+			version != IPA_VERSION_4_2;
 
 	case IRQ_SUSPEND_EN:
 	case IRQ_SUSPEND_CLR:
@@ -52,7 +53,6 @@ static bool ipa_reg_id_valid(struct ipa *ipa, enum ipa_reg_id reg_id)
 	case QSB_MAX_WRITES:
 	case QSB_MAX_READS:
 	case FILT_ROUT_HASH_EN:
-	case FILT_ROUT_CACHE_CFG:
 	case FILT_ROUT_HASH_FLUSH:
 	case FILT_ROUT_CACHE_FLUSH:
 	case STATE_AGGR_ACTIVE:
diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h
index 28aa1351dd488..ff2be8be0f683 100644
--- a/drivers/net/ipa/ipa_reg.h
+++ b/drivers/net/ipa/ipa_reg.h
@@ -61,7 +61,6 @@ enum ipa_reg_id {
 	QSB_MAX_WRITES,
 	QSB_MAX_READS,
 	FILT_ROUT_HASH_EN,				/* Not IPA v5.0+ */
-	FILT_ROUT_CACHE_CFG,				/* IPA v5.0+ */
 	FILT_ROUT_HASH_FLUSH,				/* Not IPA v5.0+ */
 	FILT_ROUT_CACHE_FLUSH,				/* IPA v5.0+ */
 	STATE_AGGR_ACTIVE,
@@ -206,14 +205,6 @@ enum ipa_reg_qsb_max_reads_field_id {
 	GEN_QMB_1_MAX_READS_BEATS,			/* IPA v4.0+ */
 };
 
-/* FILT_ROUT_CACHE_CFG register */
-enum ipa_reg_filt_rout_cache_cfg_field_id {
-	ROUTER_CACHE_EN,
-	FILTER_CACHE_EN,
-	LOW_PRI_HASH_HIT_DISABLE,
-	LRU_EVICTION_THRESHOLD,
-};
-
 /* FILT_ROUT_HASH_EN and FILT_ROUT_HASH_FLUSH registers */
 enum ipa_reg_filt_rout_hash_field_id {
 	IPV6_ROUTER_HASH,

From 21e8aaca401ce2b45ece1d8fabd29d422de7b48e Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Thu, 16 Mar 2023 09:51:36 -0500
Subject: [PATCH 380/411] net: ipa: fix some register validity checks

A recent commit defined HW_PARAM_4 as a GSI register ID but did not
add it to gsi_reg_id_valid() to indicate it's valid (for IPA v5.0+).
Add version checks for the HW_PARAM_2 and INTER_EE IRQ GSI registers
there as well.

IPA v5.0 supports up to 8 source and destination resource groups.
Update the validity check (and the comments where the register IDs
are defined) to reflect that.  Similarly update comments and
validity checks for the hash/cache-related registers.

Note that this patch fixes an omission and constrains things
further, but these don't technically represent bugs.

Fixes: f651334e1ef5 ("net: ipa: add HW_PARAM_4 GSI register")
Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ipa/gsi_reg.c |  9 ++++++++-
 drivers/net/ipa/ipa_reg.c | 24 ++++++++++++++++--------
 drivers/net/ipa/ipa_reg.h | 12 ++++++------
 3 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ipa/gsi_reg.c b/drivers/net/ipa/gsi_reg.c
index 1412b67304c8e..1651fbad4bd54 100644
--- a/drivers/net/ipa/gsi_reg.c
+++ b/drivers/net/ipa/gsi_reg.c
@@ -15,6 +15,14 @@ static bool gsi_reg_id_valid(struct gsi *gsi, enum gsi_reg_id reg_id)
 	switch (reg_id) {
 	case INTER_EE_SRC_CH_IRQ_MSK:
 	case INTER_EE_SRC_EV_CH_IRQ_MSK:
+		return gsi->version >= IPA_VERSION_3_5;
+
+	case HW_PARAM_2:
+		return gsi->version >= IPA_VERSION_3_5_1;
+
+	case HW_PARAM_4:
+		return gsi->version >= IPA_VERSION_5_0;
+
 	case CH_C_CNTXT_0:
 	case CH_C_CNTXT_1:
 	case CH_C_CNTXT_2:
@@ -43,7 +51,6 @@ static bool gsi_reg_id_valid(struct gsi *gsi, enum gsi_reg_id reg_id)
 	case CH_CMD:
 	case EV_CH_CMD:
 	case GENERIC_CMD:
-	case HW_PARAM_2:
 	case CNTXT_TYPE_IRQ:
 	case CNTXT_TYPE_IRQ_MSK:
 	case CNTXT_SRC_CH_IRQ:
diff --git a/drivers/net/ipa/ipa_reg.c b/drivers/net/ipa/ipa_reg.c
index 463a31dfa9f47..3f475428ddddb 100644
--- a/drivers/net/ipa/ipa_reg.c
+++ b/drivers/net/ipa/ipa_reg.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 /* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved.
- * Copyright (C) 2019-2022 Linaro Ltd.
+ * Copyright (C) 2019-2023 Linaro Ltd.
  */
 
 #include <linux/io.h>
@@ -15,6 +15,17 @@ static bool ipa_reg_id_valid(struct ipa *ipa, enum ipa_reg_id reg_id)
 	enum ipa_version version = ipa->version;
 
 	switch (reg_id) {
+	case FILT_ROUT_HASH_EN:
+		return version == IPA_VERSION_4_2;
+
+	case FILT_ROUT_HASH_FLUSH:
+		return version < IPA_VERSION_5_0 && version != IPA_VERSION_4_2;
+
+	case FILT_ROUT_CACHE_FLUSH:
+	case ENDP_FILTER_CACHE_CFG:
+	case ENDP_ROUTER_CACHE_CFG:
+		return version >= IPA_VERSION_5_0;
+
 	case IPA_BCR:
 	case COUNTER_CFG:
 		return version < IPA_VERSION_4_5;
@@ -32,11 +43,13 @@ static bool ipa_reg_id_valid(struct ipa *ipa, enum ipa_reg_id reg_id)
 	case SRC_RSRC_GRP_45_RSRC_TYPE:
 	case DST_RSRC_GRP_45_RSRC_TYPE:
 		return version <= IPA_VERSION_3_1 ||
-		       version == IPA_VERSION_4_5;
+		       version == IPA_VERSION_4_5 ||
+		       version == IPA_VERSION_5_0;
 
 	case SRC_RSRC_GRP_67_RSRC_TYPE:
 	case DST_RSRC_GRP_67_RSRC_TYPE:
-		return version <= IPA_VERSION_3_1;
+		return version <= IPA_VERSION_3_1 ||
+		       version == IPA_VERSION_5_0;
 
 	case ENDP_FILTER_ROUTER_HSH_CFG:
 		return version < IPA_VERSION_5_0 &&
@@ -52,9 +65,6 @@ static bool ipa_reg_id_valid(struct ipa *ipa, enum ipa_reg_id reg_id)
 	case SHARED_MEM_SIZE:
 	case QSB_MAX_WRITES:
 	case QSB_MAX_READS:
-	case FILT_ROUT_HASH_EN:
-	case FILT_ROUT_HASH_FLUSH:
-	case FILT_ROUT_CACHE_FLUSH:
 	case STATE_AGGR_ACTIVE:
 	case LOCAL_PKT_PROC_CNTXT:
 	case AGGR_FORCE_CLOSE:
@@ -76,8 +86,6 @@ static bool ipa_reg_id_valid(struct ipa *ipa, enum ipa_reg_id reg_id)
 	case ENDP_INIT_RSRC_GRP:
 	case ENDP_INIT_SEQ:
 	case ENDP_STATUS:
-	case ENDP_FILTER_CACHE_CFG:
-	case ENDP_ROUTER_CACHE_CFG:
 	case IPA_IRQ_STTS:
 	case IPA_IRQ_EN:
 	case IPA_IRQ_CLR:
diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h
index ff2be8be0f683..7dd65d39333dd 100644
--- a/drivers/net/ipa/ipa_reg.h
+++ b/drivers/net/ipa/ipa_reg.h
@@ -60,8 +60,8 @@ enum ipa_reg_id {
 	SHARED_MEM_SIZE,
 	QSB_MAX_WRITES,
 	QSB_MAX_READS,
-	FILT_ROUT_HASH_EN,				/* Not IPA v5.0+ */
-	FILT_ROUT_HASH_FLUSH,				/* Not IPA v5.0+ */
+	FILT_ROUT_HASH_EN,				/* IPA v4.2 */
+	FILT_ROUT_HASH_FLUSH,			/* Not IPA v4.2 nor IPA v5.0+ */
 	FILT_ROUT_CACHE_FLUSH,				/* IPA v5.0+ */
 	STATE_AGGR_ACTIVE,
 	IPA_BCR,					/* Not IPA v4.5+ */
@@ -76,12 +76,12 @@ enum ipa_reg_id {
 	TIMERS_PULSE_GRAN_CFG,				/* IPA v4.5+ */
 	SRC_RSRC_GRP_01_RSRC_TYPE,
 	SRC_RSRC_GRP_23_RSRC_TYPE,
-	SRC_RSRC_GRP_45_RSRC_TYPE,		/* Not IPA v3.5+, IPA v4.5 */
-	SRC_RSRC_GRP_67_RSRC_TYPE,			/* Not IPA v3.5+ */
+	SRC_RSRC_GRP_45_RSRC_TYPE,	/* Not IPA v3.5+; IPA v4.5, IPA v5.0 */
+	SRC_RSRC_GRP_67_RSRC_TYPE,		/* Not IPA v3.5+; IPA v5.0 */
 	DST_RSRC_GRP_01_RSRC_TYPE,
 	DST_RSRC_GRP_23_RSRC_TYPE,
-	DST_RSRC_GRP_45_RSRC_TYPE,		/* Not IPA v3.5+, IPA v4.5 */
-	DST_RSRC_GRP_67_RSRC_TYPE,			/* Not IPA v3.5+ */
+	DST_RSRC_GRP_45_RSRC_TYPE,	/* Not IPA v3.5+; IPA v4.5, IPA v5.0 */
+	DST_RSRC_GRP_67_RSRC_TYPE,		/* Not IPA v3.5+; IPA v5.0 */
 	ENDP_INIT_CTRL,		/* Not IPA v4.2+ for TX, not IPA v4.0+ for RX */
 	ENDP_INIT_CFG,
 	ENDP_INIT_NAT,			/* TX only */

From c00133a9e87ea5324d0b883d801eb6656f26739b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=B6nig?= <ckoenig.leichtzumerken@gmail.com>
Date: Thu, 16 Mar 2023 08:26:47 +0100
Subject: [PATCH 381/411] drm/ttm: drop extra ttm_bo_put in ttm_bo_cleanup_refs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

That was accidentially left over when we switched to the delayed delete
worker.

Suggested-by: Matthew Auld <matthew.william.auld@gmail.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
Fixes: 9bff18d13473 ("drm/ttm: use per BO cleanup workers")
Reported-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Tested-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230316072647.406707-1-christian.koenig@amd.com
---
 drivers/gpu/drm/ttm/ttm_bo.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 326a3d13a8295..c286c6ffe07f6 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -295,8 +295,6 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
 	if (unlock_resv)
 		dma_resv_unlock(bo->base.resv);
 
-	ttm_bo_put(bo);
-
 	return 0;
 }
 

From 90de546d9a0b3c771667af18bb3f80567eabb89b Mon Sep 17 00:00:00 2001
From: Liang He <windhl@126.com>
Date: Wed, 15 Mar 2023 14:00:21 +0800
Subject: [PATCH 382/411] ethernet: sun: add check for the mdesc_grab()

In vnet_port_probe() and vsw_port_probe(), we should
check the return value of mdesc_grab() as it may
return NULL which can caused NPD bugs.

Fixes: 5d01fa0c6bd8 ("ldmvsw: Add ldmvsw.c driver code")
Fixes: 43fdf27470b2 ("[SPARC64]: Abstract out mdesc accesses for better MD update handling.")
Signed-off-by: Liang He <windhl@126.com>
Reviewed-by: Piotr Raczynski <piotr.raczynski@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sun/ldmvsw.c  | 3 +++
 drivers/net/ethernet/sun/sunvnet.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c
index 8addee6d04bd8..734a817d3c945 100644
--- a/drivers/net/ethernet/sun/ldmvsw.c
+++ b/drivers/net/ethernet/sun/ldmvsw.c
@@ -287,6 +287,9 @@ static int vsw_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 
 	hp = mdesc_grab();
 
+	if (!hp)
+		return -ENODEV;
+
 	rmac = mdesc_get_property(hp, vdev->mp, remote_macaddr_prop, &len);
 	err = -ENODEV;
 	if (!rmac) {
diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index fe86fbd585861..e220620d0ffc9 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -433,6 +433,9 @@ static int vnet_port_probe(struct vio_dev *vdev, const struct vio_device_id *id)
 
 	hp = mdesc_grab();
 
+	if (!hp)
+		return -ENODEV;
+
 	vp = vnet_find_parent(hp, vdev->mp, vdev);
 	if (IS_ERR(vp)) {
 		pr_err("Cannot find port parent vnet\n");

From e05bb97d9c9dd4ba5739a27921044c935a7fb3be Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Wed, 15 Mar 2023 16:04:23 +0900
Subject: [PATCH 383/411] net: renesas: rswitch: Fix the output value of quote
 from rswitch_rx()

If the RX descriptor doesn't have any data, the output value of quote
from rswitch_rx() will be increased unexpectedily. So, fix it.

Reported-by: Volodymyr Babchuk <volodymyr_babchuk@epam.com>
Fixes: 3590918b5d07 ("net: ethernet: renesas: Add support for "Ethernet Switch"")
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/rswitch.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c
index 853394e5bb8b9..46d8d9c8fc19f 100644
--- a/drivers/net/ethernet/renesas/rswitch.c
+++ b/drivers/net/ethernet/renesas/rswitch.c
@@ -702,13 +702,14 @@ static bool rswitch_rx(struct net_device *ndev, int *quota)
 	u16 pkt_len;
 	u32 get_ts;
 
+	if (*quota <= 0)
+		return true;
+
 	boguscnt = min_t(int, gq->ring_size, *quota);
 	limit = boguscnt;
 
 	desc = &gq->rx_ring[gq->cur];
 	while ((desc->desc.die_dt & DT_MASK) != DT_FEMPTY) {
-		if (--boguscnt < 0)
-			break;
 		dma_rmb();
 		pkt_len = le16_to_cpu(desc->desc.info_ds) & RX_DS;
 		skb = gq->skbs[gq->cur];
@@ -734,6 +735,9 @@ static bool rswitch_rx(struct net_device *ndev, int *quota)
 
 		gq->cur = rswitch_next_queue_index(gq, true, 1);
 		desc = &gq->rx_ring[gq->cur];
+
+		if (--boguscnt <= 0)
+			break;
 	}
 
 	num = rswitch_get_num_cur_queues(gq);
@@ -745,7 +749,7 @@ static bool rswitch_rx(struct net_device *ndev, int *quota)
 		goto err;
 	gq->dirty = rswitch_next_queue_index(gq, false, num);
 
-	*quota -= limit - (++boguscnt);
+	*quota -= limit - boguscnt;
 
 	return boguscnt <= 0;
 

From 2c59e993c86ad57afde26eaf1beb35694da5fbfe Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Wed, 15 Mar 2023 16:04:24 +0900
Subject: [PATCH 384/411] net: renesas: rswitch: Fix GWTSDIE register handling

Since the GWCA has the TX timestamp feature, this driver
should not disable it if one of ports is opened. So, fix it.

Reported-by: Phong Hoang <phong.hoang.wz@renesas.com>
Fixes: 33f5d733b589 ("net: renesas: rswitch: Improve TX timestamp accuracy")
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/rswitch.c | 9 +++++++--
 drivers/net/ethernet/renesas/rswitch.h | 1 +
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c
index 46d8d9c8fc19f..c4f93d24c6a42 100644
--- a/drivers/net/ethernet/renesas/rswitch.c
+++ b/drivers/net/ethernet/renesas/rswitch.c
@@ -1441,7 +1441,10 @@ static int rswitch_open(struct net_device *ndev)
 	rswitch_enadis_data_irq(rdev->priv, rdev->tx_queue->index, true);
 	rswitch_enadis_data_irq(rdev->priv, rdev->rx_queue->index, true);
 
-	iowrite32(GWCA_TS_IRQ_BIT, rdev->priv->addr + GWTSDIE);
+	if (bitmap_empty(rdev->priv->opened_ports, RSWITCH_NUM_PORTS))
+		iowrite32(GWCA_TS_IRQ_BIT, rdev->priv->addr + GWTSDIE);
+
+	bitmap_set(rdev->priv->opened_ports, rdev->port, 1);
 
 	return 0;
 };
@@ -1452,8 +1455,10 @@ static int rswitch_stop(struct net_device *ndev)
 	struct rswitch_gwca_ts_info *ts_info, *ts_info2;
 
 	netif_tx_stop_all_queues(ndev);
+	bitmap_clear(rdev->priv->opened_ports, rdev->port, 1);
 
-	iowrite32(GWCA_TS_IRQ_BIT, rdev->priv->addr + GWTSDID);
+	if (bitmap_empty(rdev->priv->opened_ports, RSWITCH_NUM_PORTS))
+		iowrite32(GWCA_TS_IRQ_BIT, rdev->priv->addr + GWTSDID);
 
 	list_for_each_entry_safe(ts_info, ts_info2, &rdev->priv->gwca.ts_info_list, list) {
 		if (ts_info->port != rdev->port)
diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h
index 27d3d38c055f0..b3e0411b408ef 100644
--- a/drivers/net/ethernet/renesas/rswitch.h
+++ b/drivers/net/ethernet/renesas/rswitch.h
@@ -998,6 +998,7 @@ struct rswitch_private {
 	struct rcar_gen4_ptp_private *ptp_priv;
 
 	struct rswitch_device *rdev[RSWITCH_NUM_PORTS];
+	DECLARE_BITMAP(opened_ports, RSWITCH_NUM_PORTS);
 
 	struct rswitch_gwca gwca;
 	struct rswitch_etha etha[RSWITCH_NUM_PORTS];

From 9ec7eb60dcbcb6c41076defbc5df7bbd95ceaba5 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <razor@blackwall.org>
Date: Wed, 15 Mar 2023 13:18:40 +0200
Subject: [PATCH 385/411] bonding: restore IFF_MASTER/SLAVE flags on bond
 enslave ether type change

Add bond_ether_setup helper which is used to fix ether_setup() calls in the
bonding driver. It takes care of both IFF_MASTER and IFF_SLAVE flags, the
former is always restored and the latter only if it was set.
If the bond enslaves non-ARPHRD_ETHER device (changes its type), then
releases it and enslaves ARPHRD_ETHER device (changes back) then we
use ether_setup() to restore the bond device type but it also resets its
flags and removes IFF_MASTER and IFF_SLAVE[1]. Use the bond_ether_setup
helper to restore both after such transition.

[1] reproduce (nlmon is non-ARPHRD_ETHER):
 $ ip l add nlmon0 type nlmon
 $ ip l add bond2 type bond mode active-backup
 $ ip l set nlmon0 master bond2
 $ ip l set nlmon0 nomaster
 $ ip l add bond1 type bond
 (we use bond1 as ARPHRD_ETHER device to restore bond2's mode)
 $ ip l set bond1 master bond2
 $ ip l sh dev bond2
 37: bond2: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 1000
    link/ether be:d7:c5:40:5b:cc brd ff:ff:ff:ff:ff:ff promiscuity 0 minmtu 68 maxmtu 1500
 (notice bond2's IFF_MASTER is missing)

Fixes: e36b9d16c6a6 ("bonding: clean muticast addresses when device changes type")
Signed-off-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 00646aa315c30..4bd911f9d3f95 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1775,6 +1775,19 @@ void bond_lower_state_changed(struct slave *slave)
 		slave_err(bond_dev, slave_dev, "Error: %s\n", errmsg);	\
 } while (0)
 
+/* The bonding driver uses ether_setup() to convert a master bond device
+ * to ARPHRD_ETHER, that resets the target netdevice's flags so we always
+ * have to restore the IFF_MASTER flag, and only restore IFF_SLAVE if it was set
+ */
+static void bond_ether_setup(struct net_device *bond_dev)
+{
+	unsigned int slave_flag = bond_dev->flags & IFF_SLAVE;
+
+	ether_setup(bond_dev);
+	bond_dev->flags |= IFF_MASTER | slave_flag;
+	bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+}
+
 /* enslave device <slave> to bond device <master> */
 int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
 		 struct netlink_ext_ack *extack)
@@ -1866,10 +1879,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
 
 			if (slave_dev->type != ARPHRD_ETHER)
 				bond_setup_by_slave(bond_dev, slave_dev);
-			else {
-				ether_setup(bond_dev);
-				bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
-			}
+			else
+				bond_ether_setup(bond_dev);
 
 			call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE,
 						 bond_dev);

From e667d469098671261d558be0cd93dca4d285ce1e Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <razor@blackwall.org>
Date: Wed, 15 Mar 2023 13:18:41 +0200
Subject: [PATCH 386/411] bonding: restore bond's IFF_SLAVE flag if a non-eth
 dev enslave fails

syzbot reported a warning[1] where the bond device itself is a slave and
we try to enslave a non-ethernet device as the first slave which fails
but then in the error path when ether_setup() restores the bond device
it also clears all flags. In my previous fix[2] I restored the
IFF_MASTER flag, but I didn't consider the case that the bond device
itself might also be a slave with IFF_SLAVE set, so we need to restore
that flag as well. Use the bond_ether_setup helper which does the right
thing and restores the bond's flags properly.

Steps to reproduce using a nlmon dev:
 $ ip l add nlmon0 type nlmon
 $ ip l add bond1 type bond
 $ ip l add bond2 type bond
 $ ip l set bond1 master bond2
 $ ip l set dev nlmon0 master bond1
 $ ip -d l sh dev bond1
 22: bond1: <BROADCAST,MULTICAST,MASTER> mtu 1500 qdisc noqueue master bond2 state DOWN mode DEFAULT group default qlen 1000
 (now bond1's IFF_SLAVE flag is gone and we'll hit a warning[3] if we
  try to delete it)

[1] https://syzkaller.appspot.com/bug?id=391c7b1f6522182899efba27d891f1743e8eb3ef
[2] commit 7d5cd2ce5292 ("bonding: correctly handle bonding type change on enslave failure")
[3] example warning:
 [   27.008664] bond1: (slave nlmon0): The slave device specified does not support setting the MAC address
 [   27.008692] bond1: (slave nlmon0): Error -95 calling set_mac_address
 [   32.464639] bond1 (unregistering): Released all slaves
 [   32.464685] ------------[ cut here ]------------
 [   32.464686] WARNING: CPU: 1 PID: 2004 at net/core/dev.c:10829 unregister_netdevice_many+0x72a/0x780
 [   32.464694] Modules linked in: br_netfilter bridge bonding virtio_net
 [   32.464699] CPU: 1 PID: 2004 Comm: ip Kdump: loaded Not tainted 5.18.0-rc3+ #47
 [   32.464703] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.1-2.fc37 04/01/2014
 [   32.464704] RIP: 0010:unregister_netdevice_many+0x72a/0x780
 [   32.464707] Code: 99 fd ff ff ba 90 1a 00 00 48 c7 c6 f4 02 66 96 48 c7 c7 20 4d 35 96 c6 05 fa c7 2b 02 01 e8 be 6f 4a 00 0f 0b e9 73 fd ff ff <0f> 0b e9 5f fd ff ff 80 3d e3 c7 2b 02 00 0f 85 3b fd ff ff ba 59
 [   32.464710] RSP: 0018:ffffa006422d7820 EFLAGS: 00010206
 [   32.464712] RAX: ffff8f6e077140a0 RBX: ffffa006422d7888 RCX: 0000000000000000
 [   32.464714] RDX: ffff8f6e12edbe58 RSI: 0000000000000296 RDI: ffffffff96d4a520
 [   32.464716] RBP: ffff8f6e07714000 R08: ffffffff96d63600 R09: ffffa006422d7728
 [   32.464717] R10: 0000000000000ec0 R11: ffffffff9698c988 R12: ffff8f6e12edb140
 [   32.464719] R13: dead000000000122 R14: dead000000000100 R15: ffff8f6e12edb140
 [   32.464723] FS:  00007f297c2f1740(0000) GS:ffff8f6e5d900000(0000) knlGS:0000000000000000
 [   32.464725] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 [   32.464726] CR2: 00007f297bf1c800 CR3: 00000000115e8000 CR4: 0000000000350ee0
 [   32.464730] Call Trace:
 [   32.464763]  <TASK>
 [   32.464767]  rtnl_dellink+0x13e/0x380
 [   32.464776]  ? cred_has_capability.isra.0+0x68/0x100
 [   32.464780]  ? __rtnl_unlock+0x33/0x60
 [   32.464783]  ? bpf_lsm_capset+0x10/0x10
 [   32.464786]  ? security_capable+0x36/0x50
 [   32.464790]  rtnetlink_rcv_msg+0x14e/0x3b0
 [   32.464792]  ? _copy_to_iter+0xb1/0x790
 [   32.464796]  ? post_alloc_hook+0xa0/0x160
 [   32.464799]  ? rtnl_calcit.isra.0+0x110/0x110
 [   32.464802]  netlink_rcv_skb+0x50/0xf0
 [   32.464806]  netlink_unicast+0x216/0x340
 [   32.464809]  netlink_sendmsg+0x23f/0x480
 [   32.464812]  sock_sendmsg+0x5e/0x60
 [   32.464815]  ____sys_sendmsg+0x22c/0x270
 [   32.464818]  ? import_iovec+0x17/0x20
 [   32.464821]  ? sendmsg_copy_msghdr+0x59/0x90
 [   32.464823]  ? do_set_pte+0xa0/0xe0
 [   32.464828]  ___sys_sendmsg+0x81/0xc0
 [   32.464832]  ? mod_objcg_state+0xc6/0x300
 [   32.464835]  ? refill_obj_stock+0xa9/0x160
 [   32.464838]  ? memcg_slab_free_hook+0x1a5/0x1f0
 [   32.464842]  __sys_sendmsg+0x49/0x80
 [   32.464847]  do_syscall_64+0x3b/0x90
 [   32.464851]  entry_SYSCALL_64_after_hwframe+0x44/0xae
 [   32.464865] RIP: 0033:0x7f297bf2e5e7
 [   32.464868] Code: 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10
 [   32.464869] RSP: 002b:00007ffd96c824c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
 [   32.464872] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f297bf2e5e7
 [   32.464874] RDX: 0000000000000000 RSI: 00007ffd96c82540 RDI: 0000000000000003
 [   32.464875] RBP: 00000000640f19de R08: 0000000000000001 R09: 000000000000007c
 [   32.464876] R10: 00007f297bffabe0 R11: 0000000000000246 R12: 0000000000000001
 [   32.464877] R13: 00007ffd96c82d20 R14: 00007ffd96c82610 R15: 000055bfe38a7020
 [   32.464881]  </TASK>
 [   32.464882] ---[ end trace 0000000000000000 ]---

Fixes: 7d5cd2ce5292 ("bonding: correctly handle bonding type change on enslave failure")
Reported-by: syzbot+9dfc3f3348729cc82277@syzkaller.appspotmail.com
Link: https://syzkaller.appspot.com/bug?id=391c7b1f6522182899efba27d891f1743e8eb3ef
Signed-off-by: Nikolay Aleksandrov <razor@blackwall.org>
Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
Acked-by: Jonathan Toppins <jtoppins@redhat.com>
Acked-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 4bd911f9d3f95..236e5219c8112 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2300,9 +2300,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
 			eth_hw_addr_random(bond_dev);
 		if (bond_dev->type != ARPHRD_ETHER) {
 			dev_close(bond_dev);
-			ether_setup(bond_dev);
-			bond_dev->flags |= IFF_MASTER;
-			bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+			bond_ether_setup(bond_dev);
 		}
 	}
 

From 222c94ec0ad48b951f0f692a7cf5bcf7a6bcb6b1 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <razor@blackwall.org>
Date: Wed, 15 Mar 2023 13:18:42 +0200
Subject: [PATCH 387/411] selftests: bonding: add tests for ether type changes

Add new network selftests for the bonding device which exercise the ether
type changing call paths. They also test for the recent syzbot bug[1] which
causes a warning and results in wrong device flags (IFF_SLAVE missing).
The test adds three bond devices and a nlmon device, enslaves one of the
bond devices to the other and then uses the nlmon device for successful
and unsuccesful enslaves both of which change the bond ether type. Thus
we can test for both MASTER and SLAVE flags at the same time.

If the flags are properly restored we get:
TEST: Change ether type of an enslaved bond device with unsuccessful enslave   [ OK ]
TEST: Change ether type of an enslaved bond device with successful enslave   [ OK ]

[1] https://syzkaller.appspot.com/bug?id=391c7b1f6522182899efba27d891f1743e8eb3ef

Signed-off-by: Nikolay Aleksandrov <razor@blackwall.org>
Reviewed-by: Michal Kubiak <michal.kubiak@intel.com>
Acked-by: Jonathan Toppins <jtoppins@redhat.com>
Acked-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/bonding/Makefile    |  3 +-
 .../net/bonding/bond-eth-type-change.sh       | 85 +++++++++++++++++++
 2 files changed, 87 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh

diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile
index 8e3b786a748f9..a39bb2560d9bf 100644
--- a/tools/testing/selftests/drivers/net/bonding/Makefile
+++ b/tools/testing/selftests/drivers/net/bonding/Makefile
@@ -8,7 +8,8 @@ TEST_PROGS := \
 	dev_addr_lists.sh \
 	mode-1-recovery-updelay.sh \
 	mode-2-recovery-updelay.sh \
-	option_prio.sh
+	option_prio.sh \
+	bond-eth-type-change.sh
 
 TEST_FILES := \
 	lag_lib.sh \
diff --git a/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh b/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh
new file mode 100755
index 0000000000000..5cdd22048ba70
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh
@@ -0,0 +1,85 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test bond device ether type changing
+#
+
+ALL_TESTS="
+	bond_test_unsuccessful_enslave_type_change
+	bond_test_successful_enslave_type_change
+"
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/net_forwarding_lib.sh
+
+bond_check_flags()
+{
+	local bonddev=$1
+
+	ip -d l sh dev "$bonddev" | grep -q "MASTER"
+	check_err $? "MASTER flag is missing from the bond device"
+
+	ip -d l sh dev "$bonddev" | grep -q "SLAVE"
+	check_err $? "SLAVE flag is missing from the bond device"
+}
+
+# test enslaved bond dev type change from ARPHRD_ETHER and back
+# this allows us to test both MASTER and SLAVE flags at once
+bond_test_enslave_type_change()
+{
+	local test_success=$1
+	local devbond0="test-bond0"
+	local devbond1="test-bond1"
+	local devbond2="test-bond2"
+	local nonethdev="test-noneth0"
+
+	# create a non-ARPHRD_ETHER device for testing (e.g. nlmon type)
+	ip link add name "$nonethdev" type nlmon
+	check_err $? "could not create a non-ARPHRD_ETHER device (nlmon)"
+	ip link add name "$devbond0" type bond
+	if [ $test_success -eq 1 ]; then
+		# we need devbond0 in active-backup mode to successfully enslave nonethdev
+		ip link set dev "$devbond0" type bond mode active-backup
+		check_err $? "could not change bond mode to active-backup"
+	fi
+	ip link add name "$devbond1" type bond
+	ip link add name "$devbond2" type bond
+	ip link set dev "$devbond0" master "$devbond1"
+	check_err $? "could not enslave $devbond0 to $devbond1"
+	# change bond type to non-ARPHRD_ETHER
+	ip link set dev "$nonethdev" master "$devbond0" 1>/dev/null 2>/dev/null
+	ip link set dev "$nonethdev" nomaster 1>/dev/null 2>/dev/null
+	# restore ARPHRD_ETHER type by enslaving such device
+	ip link set dev "$devbond2" master "$devbond0"
+	check_err $? "could not enslave $devbond2 to $devbond0"
+	ip link set dev "$devbond1" nomaster
+
+	bond_check_flags "$devbond0"
+
+	# clean up
+	ip link del dev "$devbond0"
+	ip link del dev "$devbond1"
+	ip link del dev "$devbond2"
+	ip link del dev "$nonethdev"
+}
+
+bond_test_unsuccessful_enslave_type_change()
+{
+	RET=0
+
+	bond_test_enslave_type_change 0
+	log_test "Change ether type of an enslaved bond device with unsuccessful enslave"
+}
+
+bond_test_successful_enslave_type_change()
+{
+	RET=0
+
+	bond_test_enslave_type_change 1
+	log_test "Change ether type of an enslaved bond device with successful enslave"
+}
+
+tests_run
+
+exit "$EXIT_STATUS"

From 884a1f956179b561e7e7f1d29fc331cf53233c89 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Thu, 13 Oct 2022 12:42:29 +0200
Subject: [PATCH 388/411] tools/power turbostat: update dump of
 SECONDARY_TURBO_RATIO_LIMIT

cosmetic only (but useful if you copy/paste)

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index aba460410dbd1..7ae3086c5ec0c 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2538,7 +2538,7 @@ static void dump_turbo_ratio_limits(int trl_msr_offset, int family, int model)
 
 	get_msr(base_cpu, trl_msr_offset, &msr);
 	fprintf(outf, "cpu%d: MSR_%sTURBO_RATIO_LIMIT: 0x%08llx\n",
-		base_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY" : "", msr);
+		base_cpu, trl_msr_offset == MSR_SECONDARY_TURBO_RATIO_LIMIT ? "SECONDARY_" : "", msr);
 
 	if (has_turbo_ratio_group_limits(family, model)) {
 		get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &core_counts);

From 9c08581728ccadc6b9e4135b7b8d31948e814f6f Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Tue, 18 Oct 2022 15:23:37 -0400
Subject: [PATCH 389/411] tools/power turbostat: Provide better debug messages
 for failed capabilities accesses

turbostat reports some capabilities access errors and not others.  Provide
the same debug message for all errors.

[lenb: remove extra quotes]

Cc: David Arcari <darcari@redhat.com>
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.8 |  2 +-
 tools/power/x86/turbostat/turbostat.c | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index c7b26a3603afe..7dcb4f4f3d55a 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -340,7 +340,7 @@ starts a new interval.
 must be run as root.
 Alternatively, non-root users can be enabled to run turbostat this way:
 
-# setcap cap_sys_admin,cap_sys_rawio,cap_sys_nice=+ep ./turbostat
+# setcap cap_sys_admin,cap_sys_rawio,cap_sys_nice=+ep path/to/turbostat
 
 # chmod +r /dev/cpu/*/msr
 
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 7ae3086c5ec0c..d246ef6153dc9 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -670,7 +670,8 @@ static int perf_instr_count_open(int cpu_num)
 	/* counter for cpu_num, including user + kernel and all processes */
 	fd = perf_event_open(&pea, -1, cpu_num, -1, 0);
 	if (fd == -1) {
-		warn("cpu%d: perf instruction counter", cpu_num);
+		warnx("capget(CAP_PERFMON) failed, try \"# setcap cap_sys_admin=ep %s\"",
+		      progname);
 		BIC_NOT_PRESENT(BIC_IPC);
 	}
 
@@ -3502,9 +3503,6 @@ void msr_sum_record(void)
 /*
  * set_my_sched_priority(pri)
  * return previous
- *
- * if non-root, do this:
- * # /sbin/setcap cap_sys_rawio,cap_sys_nice=+ep /usr/bin/turbostat
  */
 int set_my_sched_priority(int priority)
 {
@@ -3518,7 +3516,8 @@ int set_my_sched_priority(int priority)
 
 	retval = setpriority(PRIO_PROCESS, 0, priority);
 	if (retval)
-		err(retval, "setpriority(%d)", priority);
+		errx(retval, "capget(CAP_SYS_NICE) failed,try \"# setcap cap_sys_nice=ep %s\"",
+		     progname);
 
 	errno = 0;
 	retval = getpriority(PRIO_PROCESS, 0);
@@ -5476,7 +5475,8 @@ void print_dev_latency(void)
 
 	fd = open(path, O_RDONLY);
 	if (fd < 0) {
-		warn("fopen %s\n", path);
+		warnx("capget(CAP_SYS_ADMIN) failed, try \"# setcap cap_sys_admin=ep %s\"",
+		      progname);
 		return;
 	}
 

From 40aafc7d58d3544f152a863a0e9863014b6d5d8c Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Thu, 15 Dec 2022 10:18:16 -0500
Subject: [PATCH 390/411] tools/power turbostat: Fix /dev/cpu_dma_latency
 warnings

When running as non-root the following error is seen in turbostat:

turbostat: fopen /dev/cpu_dma_latency
: Permission denied

turbostat and the man page have information on how to avoid other
permission errors, so these can be fixed the same way.

Provide better /dev/cpu_dma_latency warnings that provide instructions on
how to avoid the error, and update the man page.

Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Cc: linux-pm@vger.kernel.org
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.8 | 2 ++
 tools/power/x86/turbostat/turbostat.c | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 7dcb4f4f3d55a..8f08c3fd498d5 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -344,6 +344,8 @@ Alternatively, non-root users can be enabled to run turbostat this way:
 
 # chmod +r /dev/cpu/*/msr
 
+# chmod +r /dev/cpu_dma_latency
+
 .B "turbostat "
 reads hardware counters, but doesn't write them.
 So it will not interfere with the OS or other programs, including
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index d246ef6153dc9..9aed2620a2d2d 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -5482,7 +5482,7 @@ void print_dev_latency(void)
 
 	retval = read(fd, (void *)&value, sizeof(int));
 	if (retval != sizeof(int)) {
-		warn("read %s\n", path);
+		warn("read failed %s\n", path);
 		close(fd);
 		return;
 	}

From 6cbfedc7afc93a7f06d4a2f7c4d4732b20adfc2b Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 17 Mar 2023 11:25:56 -0400
Subject: [PATCH 391/411] tools/power turbostat: remove stray newlines from
 warn/warnx strings

warn(3) terminates strings with newlines

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 9aed2620a2d2d..649b48e53a310 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -5482,7 +5482,7 @@ void print_dev_latency(void)
 
 	retval = read(fd, (void *)&value, sizeof(int));
 	if (retval != sizeof(int)) {
-		warn("read failed %s\n", path);
+		warn("read failed %s", path);
 		close(fd);
 		return;
 	}
@@ -5543,7 +5543,7 @@ void process_cpuid()
 	edx_flags = edx;
 
 	if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
-		warnx("get_msr(UCODE)\n");
+		warnx("get_msr(UCODE)");
 
 	/*
 	 * check max extended function levels of CPUID.

From 93cac4150727dae0ee89f501dd75413b88eedec0 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Wed, 4 Jan 2023 22:23:53 +0800
Subject: [PATCH 392/411] tools/power turbostat: Introduce support for EMR

Introduce support for EMR.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Reviewed-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Tested-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 649b48e53a310..eba8a20a4b009 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -5462,6 +5462,9 @@ unsigned int intel_model_duplicates(unsigned int model)
 
 	case INTEL_FAM6_ICELAKE_D:
 		return INTEL_FAM6_ICELAKE_X;
+
+	case INTEL_FAM6_EMERALDRAPIDS_X:
+		return INTEL_FAM6_SAPPHIRERAPIDS_X;
 	}
 	return model;
 }

From 92c25393586ac799b9b7d9e50434f3c44a7622c4 Mon Sep 17 00:00:00 2001
From: Antti Laakso <antti.laakso@intel.com>
Date: Wed, 25 Jan 2023 15:17:50 +0200
Subject: [PATCH 393/411] tools/power turbostat: fix decoding of HWP_STATUS

The "excursion to minimum" information is in bit2
in HWP_STATUS MSR. Fix the bitmask used for
decoding the register.

Signed-off-by: Antti Laakso <antti.laakso@intel.com>
Reviewed-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index eba8a20a4b009..7424c35fe2091 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -4425,7 +4425,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
 
 	fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
 		"(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
-		cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x2) ? "" : "No-");
+		cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x4) ? "" : "No-");
 
 	return 0;
 }

From de7839ee02c651335db6bf0af24cf5cac3cc4c72 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Fri, 17 Mar 2023 11:34:10 -0400
Subject: [PATCH 394/411] tools/power turbostat: version 2023.03.17

Happy St. Patrick's Day!

Signed-off-by: Len Brown <len.brown@intel.com>
---
 tools/power/x86/turbostat/turbostat.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 7424c35fe2091..8a36ba5df9f90 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -3,7 +3,7 @@
  * turbostat -- show CPU frequency and C-state residency
  * on modern Intel and AMD processors.
  *
- * Copyright (c) 2022 Intel Corporation.
+ * Copyright (c) 2023 Intel Corporation.
  * Len Brown <len.brown@intel.com>
  */
 
@@ -670,8 +670,7 @@ static int perf_instr_count_open(int cpu_num)
 	/* counter for cpu_num, including user + kernel and all processes */
 	fd = perf_event_open(&pea, -1, cpu_num, -1, 0);
 	if (fd == -1) {
-		warnx("capget(CAP_PERFMON) failed, try \"# setcap cap_sys_admin=ep %s\"",
-		      progname);
+		warnx("capget(CAP_PERFMON) failed, try \"# setcap cap_sys_admin=ep %s\"", progname);
 		BIC_NOT_PRESENT(BIC_IPC);
 	}
 
@@ -3516,8 +3515,7 @@ int set_my_sched_priority(int priority)
 
 	retval = setpriority(PRIO_PROCESS, 0, priority);
 	if (retval)
-		errx(retval, "capget(CAP_SYS_NICE) failed,try \"# setcap cap_sys_nice=ep %s\"",
-		     progname);
+		errx(retval, "capget(CAP_SYS_NICE) failed,try \"# setcap cap_sys_nice=ep %s\"", progname);
 
 	errno = 0;
 	retval = getpriority(PRIO_PROCESS, 0);
@@ -5478,8 +5476,7 @@ void print_dev_latency(void)
 
 	fd = open(path, O_RDONLY);
 	if (fd < 0) {
-		warnx("capget(CAP_SYS_ADMIN) failed, try \"# setcap cap_sys_admin=ep %s\"",
-		      progname);
+		warnx("capget(CAP_SYS_ADMIN) failed, try \"# setcap cap_sys_admin=ep %s\"", progname);
 		return;
 	}
 
@@ -6228,7 +6225,7 @@ int get_and_dump_counters(void)
 
 void print_version()
 {
-	fprintf(outf, "turbostat version 2022.10.04 - Len Brown <lenb@kernel.org>\n");
+	fprintf(outf, "turbostat version 2023.03.17 - Len Brown <lenb@kernel.org>\n");
 }
 
 #define COMMAND_LINE_SIZE 2048

From 70e42feab2e20618ddd0cbfc4ab4b08628236ecd Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 17 Mar 2023 21:53:52 -0400
Subject: [PATCH 395/411] ext4: fix possible double unlock when moving a
 directory

Fixes: 0813299c586b ("ext4: Fix possible corruption when moving a directory")
Link: https://lore.kernel.org/r/5efbe1b9-ad8b-4a4f-b422-24824d2b775c@kili.mountain
Reported-by: Dan Carpenter <error27@gmail.com>
Reported-by: syzbot+0c73d1d8b952c5f3d714@syzkaller.appspotmail.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/namei.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 31e21de56432d..a5010b5b8a8c1 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -3884,10 +3884,8 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir,
 				goto end_rename;
 		}
 		retval = ext4_rename_dir_prepare(handle, &old);
-		if (retval) {
-			inode_unlock(old.inode);
+		if (retval)
 			goto end_rename;
-		}
 	}
 	/*
 	 * If we're renaming a file within an inline_data dir and adding or

From efbcbb12ee99f750c9f25c873b55ad774871de2a Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 17 Mar 2023 13:51:17 -0700
Subject: [PATCH 396/411] media: m5mols: fix off-by-one loop termination error

The __find_restype() function loops over the m5mols_default_ffmt[]
array, and the termination condition ends up being wrong: instead of
stopping when the iterator becomes the size of the array it traverses,
it stops after it has already overshot the array.

Now, in practice this doesn't likely matter, because the code will
always find the entry it looks for, and will thus return early and never
hit that last extra iteration.

But it turns out that clang will unroll the loop fully, because it has
only two iterations (well, three due to the off-by-one bug), and then
clang will end up just giving up in the middle of the loop unrolling
when it notices that the code walks past the end of the array.

And that made 'objtool' very unhappy indeed, because the generated code
just falls off the edge of the universe, and ends up falling through to
the next function, causing this warning:

   drivers/media/i2c/m5mols/m5mols.o: warning: objtool: m5mols_set_fmt() falls through to next function m5mols_get_frame_desc()

Fix the loop ending condition.

Reported-by: Jens Axboe <axboe@kernel.dk>
Analyzed-by: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Analyzed-by: Nick Desaulniers <ndesaulniers@google.com>
Link: https://lore.kernel.org/linux-block/CAHk-=wgTSdKYbmB1JYM5vmHMcD9J9UZr0mn7BOYM_LudrP+Xvw@mail.gmail.com/
Fixes: bc125106f8af ("[media] Add support for M-5MOLS 8 Mega Pixel camera ISP")
Cc: HeungJun, Kim <riverful.kim@samsung.com>
Cc: Sylwester Nawrocki <s.nawrocki@samsung.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Mauro Carvalho Chehab <mchehab@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/media/i2c/m5mols/m5mols_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/i2c/m5mols/m5mols_core.c b/drivers/media/i2c/m5mols/m5mols_core.c
index 2b01873ba0db5..5c2336f318d9a 100644
--- a/drivers/media/i2c/m5mols/m5mols_core.c
+++ b/drivers/media/i2c/m5mols/m5mols_core.c
@@ -488,7 +488,7 @@ static enum m5mols_restype __find_restype(u32 code)
 	do {
 		if (code == m5mols_default_ffmt[type].code)
 			return type;
-	} while (type++ != SIZE_DEFAULT_FFMT);
+	} while (++type != SIZE_DEFAULT_FFMT);
 
 	return 0;
 }

From bc4f359b3b607daac0290d0038561237a86b38cb Mon Sep 17 00:00:00 2001
From: Anton Gusev <aagusev@ispras.ru>
Date: Tue, 31 Jan 2023 10:58:18 +0300
Subject: [PATCH 397/411] tracing: Fix wrong return in kprobe_event_gen_test.c

Overwriting the error code with the deletion result may cause the
function to return 0 despite encountering an error. Commit b111545d26c0
("tracing: Remove the useless value assignment in
test_create_synth_event()") solves a similar issue by
returning the original error code, so this patch does the same.

Found by Linux Verification Center (linuxtesting.org) with SVACE.

Link: https://lore.kernel.org/linux-trace-kernel/20230131075818.5322-1-aagusev@ispras.ru

Signed-off-by: Anton Gusev <aagusev@ispras.ru>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/kprobe_event_gen_test.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c
index 4850fdfe27f1c..5a4b722b50451 100644
--- a/kernel/trace/kprobe_event_gen_test.c
+++ b/kernel/trace/kprobe_event_gen_test.c
@@ -146,7 +146,7 @@ static int __init test_gen_kprobe_cmd(void)
 	if (trace_event_file_is_valid(gen_kprobe_test))
 		gen_kprobe_test = NULL;
 	/* We got an error after creating the event, delete it */
-	ret = kprobe_event_delete("gen_kprobe_test");
+	kprobe_event_delete("gen_kprobe_test");
 	goto out;
 }
 
@@ -211,7 +211,7 @@ static int __init test_gen_kretprobe_cmd(void)
 	if (trace_event_file_is_valid(gen_kretprobe_test))
 		gen_kretprobe_test = NULL;
 	/* We got an error after creating the event, delete it */
-	ret = kprobe_event_delete("gen_kretprobe_test");
+	kprobe_event_delete("gen_kretprobe_test");
 	goto out;
 }
 

From 7a025e066e0f0afd39cc88a089929ccb945ce9e8 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Thu, 9 Mar 2023 10:04:14 -0500
Subject: [PATCH 398/411] tracing/osnoise: set several trace_osnoise.c
 variables storage-class-specifier to static

smatch reports several similar warnings
kernel/trace/trace_osnoise.c:220:1: warning:
  symbol '__pcpu_scope_per_cpu_osnoise_var' was not declared. Should it be static?
kernel/trace/trace_osnoise.c:243:1: warning:
  symbol '__pcpu_scope_per_cpu_timerlat_var' was not declared. Should it be static?
kernel/trace/trace_osnoise.c:335:14: warning:
  symbol 'interface_lock' was not declared. Should it be static?
kernel/trace/trace_osnoise.c:2242:5: warning:
  symbol 'timerlat_min_period' was not declared. Should it be static?
kernel/trace/trace_osnoise.c:2243:5: warning:
  symbol 'timerlat_max_period' was not declared. Should it be static?

These variables are only used in trace_osnoise.c, so it should be static

Link: https://lore.kernel.org/linux-trace-kernel/20230309150414.4036764-1-trix@redhat.com

Signed-off-by: Tom Rix <trix@redhat.com>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Acked-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_osnoise.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 04f0fdae19a1c..9176bb7a9bb45 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -217,7 +217,7 @@ struct osnoise_variables {
 /*
  * Per-cpu runtime information.
  */
-DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var);
+static DEFINE_PER_CPU(struct osnoise_variables, per_cpu_osnoise_var);
 
 /*
  * this_cpu_osn_var - Return the per-cpu osnoise_variables on its relative CPU
@@ -240,7 +240,7 @@ struct timerlat_variables {
 	u64			count;
 };
 
-DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var);
+static DEFINE_PER_CPU(struct timerlat_variables, per_cpu_timerlat_var);
 
 /*
  * this_cpu_tmr_var - Return the per-cpu timerlat_variables on its relative CPU
@@ -332,7 +332,7 @@ struct timerlat_sample {
 /*
  * Protect the interface.
  */
-struct mutex interface_lock;
+static struct mutex interface_lock;
 
 /*
  * Tracer data.
@@ -2239,8 +2239,8 @@ static struct trace_min_max_param osnoise_print_stack = {
 /*
  * osnoise/timerlat_period: min 100 us, max 1 s
  */
-u64 timerlat_min_period = 100;
-u64 timerlat_max_period = 1000000;
+static u64 timerlat_min_period = 100;
+static u64 timerlat_max_period = 1000000;
 static struct trace_min_max_param timerlat_period = {
 	.lock	= &interface_lock,
 	.val	= &osnoise_data.timerlat_period,

From 4c42f5f0d1dd20bddd9f940beb1e6ccad60c4498 Mon Sep 17 00:00:00 2001
From: Tero Kristo <tero.kristo@linux.intel.com>
Date: Fri, 10 Mar 2023 12:04:50 +0200
Subject: [PATCH 399/411] trace/hwlat: Do not wipe the contents of per-cpu
 thread data

Do not wipe the contents of the per-cpu kthread data when starting the
tracer, as this will completely forget about already running instances
and can later start new additional per-cpu threads.

Link: https://lore.kernel.org/all/20230302113654.2984709-1-tero.kristo@linux.intel.com/
Link: https://lkml.kernel.org/r/20230310100451.3948583-2-tero.kristo@linux.intel.com

Cc: stable@vger.kernel.org
Fixes: f46b16520a087 ("trace/hwlat: Implement the per-cpu mode")
Signed-off-by: Tero Kristo <tero.kristo@linux.intel.com>
Acked-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_hwlat.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index d440ddd5fd8b2..edc26dc22c3f1 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -584,9 +584,6 @@ static int start_per_cpu_kthreads(struct trace_array *tr)
 	 */
 	cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask);
 
-	for_each_online_cpu(cpu)
-		per_cpu(hwlat_per_cpu_data, cpu).kthread = NULL;
-
 	for_each_cpu(cpu, current_mask) {
 		retval = start_cpu_kthread(cpu);
 		if (retval)

From 08697bca9bbba15f2058fdbd9f970bd5f6a8a2e8 Mon Sep 17 00:00:00 2001
From: Tero Kristo <tero.kristo@linux.intel.com>
Date: Fri, 10 Mar 2023 12:04:51 +0200
Subject: [PATCH 400/411] trace/hwlat: Do not start per-cpu thread if it is
 already running

The hwlatd tracer will end up starting multiple per-cpu threads with
the following script:

    #!/bin/sh
    cd /sys/kernel/debug/tracing
    echo 0 > tracing_on
    echo hwlat > current_tracer
    echo per-cpu > hwlat_detector/mode
    echo 100000 > hwlat_detector/width
    echo 200000 > hwlat_detector/window
    echo 1 > tracing_on

To fix the issue, check if the hwlatd thread for the cpu is already
running, before starting a new one. Along with the previous patch, this
avoids running multiple instances of the same CPU thread on the system.

Link: https://lore.kernel.org/all/20230302113654.2984709-1-tero.kristo@linux.intel.com/
Link: https://lkml.kernel.org/r/20230310100451.3948583-3-tero.kristo@linux.intel.com

Cc: stable@vger.kernel.org
Fixes: f46b16520a087 ("trace/hwlat: Implement the per-cpu mode")
Signed-off-by: Tero Kristo <tero.kristo@linux.intel.com>
Acked-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_hwlat.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index edc26dc22c3f1..c4945f8adc119 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -492,6 +492,10 @@ static int start_cpu_kthread(unsigned int cpu)
 {
 	struct task_struct *kthread;
 
+	/* Do not start a new hwlatd thread if it is already running */
+	if (per_cpu(hwlat_per_cpu_data, cpu).kthread)
+		return 0;
+
 	kthread = kthread_run_on_cpu(kthread_fn, NULL, cpu, "hwlatd/%u");
 	if (IS_ERR(kthread)) {
 		pr_err(BANNER "could not start sampling thread\n");

From 8732565549011cabbea08329a1aefd78a68d96c7 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Sat, 11 Mar 2023 08:51:13 -0500
Subject: [PATCH 401/411] ftrace: Set direct_ops storage-class-specifier to
 static

smatch reports this warning
kernel/trace/ftrace.c:2594:19: warning:
  symbol 'direct_ops' was not declared. Should it be static?

The variable direct_ops is only used in ftrace.c, so it should be static

Link: https://lore.kernel.org/linux-trace-kernel/20230311135113.711824-1-trix@redhat.com

Signed-off-by: Tom Rix <trix@redhat.com>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a47f7d93e32d2..ec2897a760045 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2503,7 +2503,7 @@ static void call_direct_funcs(unsigned long ip, unsigned long pip,
 	arch_ftrace_set_direct_caller(fregs, addr);
 }
 
-struct ftrace_ops direct_ops = {
+static struct ftrace_ops direct_ops = {
 	.func		= call_direct_funcs,
 	.flags		= FTRACE_OPS_FL_DIRECT | FTRACE_OPS_FL_SAVE_REGS
 			  | FTRACE_OPS_FL_PERMANENT,

From e400be674a1a40e9dcb2e95f84d6c1fd2d88f31d Mon Sep 17 00:00:00 2001
From: Sung-hun Kim <sfoon.kim@samsung.com>
Date: Tue, 14 Mar 2023 10:37:07 +0900
Subject: [PATCH 402/411] tracing: Make splice_read available again

Since the commit 36e2c7421f02 ("fs: don't allow splice read/write
without explicit ops") is applied to the kernel, splice() and
sendfile() calls on the trace file (/sys/kernel/debug/tracing
/trace) return EINVAL.

This patch restores these system calls by initializing splice_read
in file_operations of the trace file. This patch only enables such
functionalities for the read case.

Link: https://lore.kernel.org/linux-trace-kernel/20230314013707.28814-1-sfoon.kim@samsung.com

Cc: stable@vger.kernel.org
Fixes: 36e2c7421f02 ("fs: don't allow splice read/write without explicit ops")
Signed-off-by: Sung-hun Kim <sfoon.kim@samsung.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index fbb602a8b64b4..4e9a7a9520259 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5164,6 +5164,8 @@ loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
 static const struct file_operations tracing_fops = {
 	.open		= tracing_open,
 	.read		= seq_read,
+	.read_iter	= seq_read_iter,
+	.splice_read	= generic_file_splice_read,
 	.write		= tracing_write_stub,
 	.llseek		= tracing_lseek,
 	.release	= tracing_release,

From a98151ad53b53f010ee364ec2fd06445b328578b Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Wed, 15 Mar 2023 15:24:46 +0100
Subject: [PATCH 403/411] ring-buffer: remove obsolete comment for
 free_buffer_page()

The comment refers to mm/slob.c which is being removed. It comes from
commit ed56829cb319 ("ring_buffer: reset buffer page when freeing") and
according to Steven the borrowed code was a page mapcount and mapping
reset, which was later removed by commit e4c2ce82ca27 ("ring_buffer:
allocate buffer page pointer"). Thus the comment is not accurate anyway,
remove it.

Link: https://lore.kernel.org/linux-trace-kernel/20230315142446.27040-1-vbabka@suse.cz

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Reported-by: Mike Rapoport <mike.rapoport@gmail.com>
Suggested-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Fixes: e4c2ce82ca27 ("ring_buffer: allocate buffer page pointer")
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Mukesh Ojha <quic_mojha@quicinc.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 071184324d185..3c7cd135333f0 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -354,10 +354,6 @@ static void rb_init_page(struct buffer_data_page *bpage)
 	local_set(&bpage->commit, 0);
 }
 
-/*
- * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
- * this issue out.
- */
 static void free_buffer_page(struct buffer_page *bpage)
 {
 	free_page((unsigned long)bpage->page);

From 71c7a30442b724717a30d5e7d1662ba4904eb3d4 Mon Sep 17 00:00:00 2001
From: Costa Shulyupin <costa.shul@redhat.com>
Date: Thu, 16 Mar 2023 16:45:35 +0200
Subject: [PATCH 404/411] tracing/hwlat: Replace sched_setaffinity with
 set_cpus_allowed_ptr

There is a problem with the behavior of hwlat in a container,
resulting in incorrect output. A warning message is generated:
"cpumask changed while in round-robin mode, switching to mode none",
and the tracing_cpumask is ignored. This issue arises because
the kernel thread, hwlatd, is not a part of the container, and
the function sched_setaffinity is unable to locate it using its PID.
Additionally, the task_struct of hwlatd is already known.
Ultimately, the function set_cpus_allowed_ptr achieves
the same outcome as sched_setaffinity, but employs task_struct
instead of PID.

Test case:

  # cd /sys/kernel/tracing
  # echo 0 > tracing_on
  # echo round-robin > hwlat_detector/mode
  # echo hwlat > current_tracer
  # unshare --fork --pid bash -c 'echo 1 > tracing_on'
  # dmesg -c

Actual behavior:

[573502.809060] hwlat_detector: cpumask changed while in round-robin mode, switching to mode none

Link: https://lore.kernel.org/linux-trace-kernel/20230316144535.1004952-1-costa.shul@redhat.com

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Fixes: 0330f7aa8ee63 ("tracing: Have hwlat trace migrate across tracing_cpumask CPUs")
Signed-off-by: Costa Shulyupin <costa.shul@redhat.com>
Acked-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_hwlat.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c
index c4945f8adc119..2f37a6e68aa9f 100644
--- a/kernel/trace/trace_hwlat.c
+++ b/kernel/trace/trace_hwlat.c
@@ -339,7 +339,7 @@ static void move_to_next_cpu(void)
 	cpumask_clear(current_mask);
 	cpumask_set_cpu(next_cpu, current_mask);
 
-	sched_setaffinity(0, current_mask);
+	set_cpus_allowed_ptr(current, current_mask);
 	return;
 
  change_mode:
@@ -446,7 +446,7 @@ static int start_single_kthread(struct trace_array *tr)
 
 	}
 
-	sched_setaffinity(kthread->pid, current_mask);
+	set_cpus_allowed_ptr(kthread, current_mask);
 
 	kdata->kthread = kthread;
 	wake_up_process(kthread);

From e8d018dd0257f744ca50a729e3d042cf2ec9da65 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 19 Mar 2023 13:27:55 -0700
Subject: [PATCH 405/411] Linux 6.3-rc3

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index b1506df8b8d83..a2c310df21459 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
 VERSION = 6
 PATCHLEVEL = 3
 SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION = -rc3
 NAME = Hurr durr I'ma ninja sloth
 
 # *DOCUMENTATION*

From fbaa38214cd9e150764ccaa82e04ecf42cc1140c Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Sat, 11 Mar 2023 15:40:01 +0100
Subject: [PATCH 406/411] cxl/pci: Fix CDAT retrieval on big endian

The CDAT exposed in sysfs differs between little endian and big endian
arches:  On big endian, every 4 bytes are byte-swapped.

PCI Configuration Space is little endian (PCI r3.0 sec 6.1).  Accessors
such as pci_read_config_dword() implicitly swap bytes on big endian.
That way, the macros in include/uapi/linux/pci_regs.h work regardless of
the arch's endianness.  For an example of implicit byte-swapping, see
ppc4xx_pciex_read_config(), which calls in_le32(), which uses lwbrx
(Load Word Byte-Reverse Indexed).

DOE Read/Write Data Mailbox Registers are unlike other registers in
Configuration Space in that they contain or receive a 4 byte portion of
an opaque byte stream (a "Data Object" per PCIe r6.0 sec 7.9.24.5f).
They need to be copied to or from the request/response buffer verbatim.
So amend pci_doe_send_req() and pci_doe_recv_resp() to undo the implicit
byte-swapping.

The CXL_DOE_TABLE_ACCESS_* and PCI_DOE_DATA_OBJECT_DISC_* macros assume
implicit byte-swapping.  Byte-swap requests after constructing them with
those macros and byte-swap responses before parsing them.

Change the request and response type to __le32 to avoid sparse warnings.
Per a request from Jonathan, replace sizeof(u32) with sizeof(__le32) for
consistency.

Fixes: c97006046c79 ("cxl/port: Read CDAT table")
Tested-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Cc: stable@vger.kernel.org # v6.0+
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Link: https://lore.kernel.org/r/3051114102f41d19df3debbee123129118fc5e6d.1678543498.git.lukas@wunner.de
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/core/pci.c  | 26 +++++++++++++-------------
 drivers/pci/doe.c       | 25 ++++++++++++++-----------
 include/linux/pci-doe.h |  8 ++++++--
 3 files changed, 33 insertions(+), 26 deletions(-)

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 7328a25524113..49a99a84b6aa6 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -462,7 +462,7 @@ static struct pci_doe_mb *find_cdat_doe(struct device *uport)
 	return NULL;
 }
 
-#define CDAT_DOE_REQ(entry_handle)					\
+#define CDAT_DOE_REQ(entry_handle) cpu_to_le32				\
 	(FIELD_PREP(CXL_DOE_TABLE_ACCESS_REQ_CODE,			\
 		    CXL_DOE_TABLE_ACCESS_REQ_CODE_READ) |		\
 	 FIELD_PREP(CXL_DOE_TABLE_ACCESS_TABLE_TYPE,			\
@@ -475,8 +475,8 @@ static void cxl_doe_task_complete(struct pci_doe_task *task)
 }
 
 struct cdat_doe_task {
-	u32 request_pl;
-	u32 response_pl[32];
+	__le32 request_pl;
+	__le32 response_pl[32];
 	struct completion c;
 	struct pci_doe_task task;
 };
@@ -510,10 +510,10 @@ static int cxl_cdat_get_length(struct device *dev,
 		return rc;
 	}
 	wait_for_completion(&t.c);
-	if (t.task.rv < sizeof(u32))
+	if (t.task.rv < sizeof(__le32))
 		return -EIO;
 
-	*length = t.response_pl[1];
+	*length = le32_to_cpu(t.response_pl[1]);
 	dev_dbg(dev, "CDAT length %zu\n", *length);
 
 	return 0;
@@ -524,13 +524,13 @@ static int cxl_cdat_read_table(struct device *dev,
 			       struct cxl_cdat *cdat)
 {
 	size_t length = cdat->length;
-	u32 *data = cdat->table;
+	__le32 *data = cdat->table;
 	int entry_handle = 0;
 
 	do {
 		DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(entry_handle), t);
 		size_t entry_dw;
-		u32 *entry;
+		__le32 *entry;
 		int rc;
 
 		rc = pci_doe_submit_task(cdat_doe, &t.task);
@@ -540,21 +540,21 @@ static int cxl_cdat_read_table(struct device *dev,
 		}
 		wait_for_completion(&t.c);
 		/* 1 DW header + 1 DW data min */
-		if (t.task.rv < (2 * sizeof(u32)))
+		if (t.task.rv < (2 * sizeof(__le32)))
 			return -EIO;
 
 		/* Get the CXL table access header entry handle */
 		entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE,
-					 t.response_pl[0]);
+					 le32_to_cpu(t.response_pl[0]));
 		entry = t.response_pl + 1;
-		entry_dw = t.task.rv / sizeof(u32);
+		entry_dw = t.task.rv / sizeof(__le32);
 		/* Skip Header */
 		entry_dw -= 1;
-		entry_dw = min(length / sizeof(u32), entry_dw);
+		entry_dw = min(length / sizeof(__le32), entry_dw);
 		/* Prevent length < 1 DW from causing a buffer overflow */
 		if (entry_dw) {
-			memcpy(data, entry, entry_dw * sizeof(u32));
-			length -= entry_dw * sizeof(u32);
+			memcpy(data, entry, entry_dw * sizeof(__le32));
+			length -= entry_dw * sizeof(__le32);
 			data += entry_dw;
 		}
 	} while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY);
diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c
index 66d9ab2886468..6f097932ccbf9 100644
--- a/drivers/pci/doe.c
+++ b/drivers/pci/doe.c
@@ -128,7 +128,7 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb,
 		return -EIO;
 
 	/* Length is 2 DW of header + length of payload in DW */
-	length = 2 + task->request_pl_sz / sizeof(u32);
+	length = 2 + task->request_pl_sz / sizeof(__le32);
 	if (length > PCI_DOE_MAX_LENGTH)
 		return -EIO;
 	if (length == PCI_DOE_MAX_LENGTH)
@@ -141,9 +141,9 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb,
 	pci_write_config_dword(pdev, offset + PCI_DOE_WRITE,
 			       FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH,
 					  length));
-	for (i = 0; i < task->request_pl_sz / sizeof(u32); i++)
+	for (i = 0; i < task->request_pl_sz / sizeof(__le32); i++)
 		pci_write_config_dword(pdev, offset + PCI_DOE_WRITE,
-				       task->request_pl[i]);
+				       le32_to_cpu(task->request_pl[i]));
 
 	pci_doe_write_ctrl(doe_mb, PCI_DOE_CTRL_GO);
 
@@ -195,11 +195,11 @@ static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *tas
 
 	/* First 2 dwords have already been read */
 	length -= 2;
-	payload_length = min(length, task->response_pl_sz / sizeof(u32));
+	payload_length = min(length, task->response_pl_sz / sizeof(__le32));
 	/* Read the rest of the response payload */
 	for (i = 0; i < payload_length; i++) {
-		pci_read_config_dword(pdev, offset + PCI_DOE_READ,
-				      &task->response_pl[i]);
+		pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val);
+		task->response_pl[i] = cpu_to_le32(val);
 		/* Prior to the last ack, ensure Data Object Ready */
 		if (i == (payload_length - 1) && !pci_doe_data_obj_ready(doe_mb))
 			return -EIO;
@@ -217,7 +217,7 @@ static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *tas
 	if (FIELD_GET(PCI_DOE_STATUS_ERROR, val))
 		return -EIO;
 
-	return min(length, task->response_pl_sz / sizeof(u32)) * sizeof(u32);
+	return min(length, task->response_pl_sz / sizeof(__le32)) * sizeof(__le32);
 }
 
 static void signal_task_complete(struct pci_doe_task *task, int rv)
@@ -317,14 +317,16 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
 {
 	u32 request_pl = FIELD_PREP(PCI_DOE_DATA_OBJECT_DISC_REQ_3_INDEX,
 				    *index);
+	__le32 request_pl_le = cpu_to_le32(request_pl);
+	__le32 response_pl_le;
 	u32 response_pl;
 	DECLARE_COMPLETION_ONSTACK(c);
 	struct pci_doe_task task = {
 		.prot.vid = PCI_VENDOR_ID_PCI_SIG,
 		.prot.type = PCI_DOE_PROTOCOL_DISCOVERY,
-		.request_pl = &request_pl,
+		.request_pl = &request_pl_le,
 		.request_pl_sz = sizeof(request_pl),
-		.response_pl = &response_pl,
+		.response_pl = &response_pl_le,
 		.response_pl_sz = sizeof(response_pl),
 		.complete = pci_doe_task_complete,
 		.private = &c,
@@ -340,6 +342,7 @@ static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 *index, u16 *vid,
 	if (task.rv != sizeof(response_pl))
 		return -EIO;
 
+	response_pl = le32_to_cpu(response_pl_le);
 	*vid = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID, response_pl);
 	*protocol = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_PROTOCOL,
 			      response_pl);
@@ -533,8 +536,8 @@ int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task)
 	 * DOE requests must be a whole number of DW and the response needs to
 	 * be big enough for at least 1 DW
 	 */
-	if (task->request_pl_sz % sizeof(u32) ||
-	    task->response_pl_sz < sizeof(u32))
+	if (task->request_pl_sz % sizeof(__le32) ||
+	    task->response_pl_sz < sizeof(__le32))
 		return -EINVAL;
 
 	if (test_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags))
diff --git a/include/linux/pci-doe.h b/include/linux/pci-doe.h
index ed9b4df792b88..43765eaf2342c 100644
--- a/include/linux/pci-doe.h
+++ b/include/linux/pci-doe.h
@@ -34,6 +34,10 @@ struct pci_doe_mb;
  * @work: Used internally by the mailbox
  * @doe_mb: Used internally by the mailbox
  *
+ * Payloads are treated as opaque byte streams which are transmitted verbatim,
+ * without byte-swapping.  If payloads contain little-endian register values,
+ * the caller is responsible for conversion with cpu_to_le32() / le32_to_cpu().
+ *
  * The payload sizes and rv are specified in bytes with the following
  * restrictions concerning the protocol.
  *
@@ -45,9 +49,9 @@ struct pci_doe_mb;
  */
 struct pci_doe_task {
 	struct pci_doe_protocol prot;
-	u32 *request_pl;
+	__le32 *request_pl;
 	size_t request_pl_sz;
-	u32 *response_pl;
+	__le32 *response_pl;
 	size_t response_pl_sz;
 	int rv;
 	void (*complete)(struct pci_doe_task *task);

From 34bafc747c54fb58c1908ec3116fa6137393e596 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Sat, 11 Mar 2023 15:40:02 +0100
Subject: [PATCH 407/411] cxl/pci: Handle truncated CDAT header

cxl_cdat_get_length() only checks whether the DOE response size is
sufficient for the Table Access response header (1 dword), but not the
succeeding CDAT header (1 dword length plus other fields).

It thus returns whatever uninitialized memory happens to be on the stack
if a truncated DOE response with only 1 dword was received.  Fix it.

Fixes: c97006046c79 ("cxl/port: Read CDAT table")
Reported-by: Ming Li <ming4.li@intel.com>
Tested-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Ming Li <ming4.li@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: stable@vger.kernel.org # v6.0+
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Link: https://lore.kernel.org/r/000e69cd163461c8b1bc2cf4155b6e25402c29c7.1678543498.git.lukas@wunner.de
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/core/pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 49a99a84b6aa6..87da8c935185a 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -510,7 +510,7 @@ static int cxl_cdat_get_length(struct device *dev,
 		return rc;
 	}
 	wait_for_completion(&t.c);
-	if (t.task.rv < sizeof(__le32))
+	if (t.task.rv < 2 * sizeof(__le32))
 		return -EIO;
 
 	*length = le32_to_cpu(t.response_pl[1]);

From b56faef2312057db20479b240eb71bd2e51fb51c Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Sat, 11 Mar 2023 15:40:03 +0100
Subject: [PATCH 408/411] cxl/pci: Handle truncated CDAT entries

If truncated CDAT entries are received from a device, the concatenation
of those entries constitutes a corrupt CDAT, yet is happily exposed to
user space.

Avoid by verifying response lengths and erroring out if truncation is
detected.

The last CDAT entry may still be truncated despite the checks introduced
herein if the length in the CDAT header is too small.  However, that is
easily detectable by user space because it reaches EOF prematurely.
A subsequent commit which rightsizes the CDAT response allocation closes
that remaining loophole.

The two lines introduced here which exceed 80 chars are shortened to
less than 80 chars by a subsequent commit which migrates to a
synchronous DOE API and replaces "t.task.rv" by "rc".

The existing acpi_cdat_header and acpi_table_cdat struct definitions
provided by ACPICA cannot be used because they do not employ __le16 or
__le32 types.  I believe that cannot be changed because those types are
Linux-specific and ACPI is specified for little endian platforms only,
hence doesn't care about endianness.  So duplicate the structs.

Fixes: c97006046c79 ("cxl/port: Read CDAT table")
Tested-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: stable@vger.kernel.org # v6.0+
Link: https://lore.kernel.org/r/bce3aebc0e8e18a1173425a7a865b232c3912963.1678543498.git.lukas@wunner.de
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/core/pci.c | 13 +++++++++----
 drivers/cxl/cxlpci.h   | 14 ++++++++++++++
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 87da8c935185a..fb600dfbf5a6e 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -529,8 +529,8 @@ static int cxl_cdat_read_table(struct device *dev,
 
 	do {
 		DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(entry_handle), t);
+		struct cdat_entry_header *entry;
 		size_t entry_dw;
-		__le32 *entry;
 		int rc;
 
 		rc = pci_doe_submit_task(cdat_doe, &t.task);
@@ -539,14 +539,19 @@ static int cxl_cdat_read_table(struct device *dev,
 			return rc;
 		}
 		wait_for_completion(&t.c);
-		/* 1 DW header + 1 DW data min */
-		if (t.task.rv < (2 * sizeof(__le32)))
+
+		/* 1 DW Table Access Response Header + CDAT entry */
+		entry = (struct cdat_entry_header *)(t.response_pl + 1);
+		if ((entry_handle == 0 &&
+		     t.task.rv != sizeof(__le32) + sizeof(struct cdat_header)) ||
+		    (entry_handle > 0 &&
+		     (t.task.rv < sizeof(__le32) + sizeof(*entry) ||
+		      t.task.rv != sizeof(__le32) + le16_to_cpu(entry->length))))
 			return -EIO;
 
 		/* Get the CXL table access header entry handle */
 		entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE,
 					 le32_to_cpu(t.response_pl[0]));
-		entry = t.response_pl + 1;
 		entry_dw = t.task.rv / sizeof(__le32);
 		/* Skip Header */
 		entry_dw -= 1;
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index be6a2ef3cce37..0465ef963cd6a 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -68,6 +68,20 @@ enum cxl_regloc_type {
 	CXL_REGLOC_RBI_TYPES
 };
 
+struct cdat_header {
+	__le32 length;
+	u8 revision;
+	u8 checksum;
+	u8 reserved[6];
+	__le32 sequence;
+} __packed;
+
+struct cdat_entry_header {
+	u8 type;
+	u8 reserved;
+	__le16 length;
+} __packed;
+
 int devm_cxl_port_enumerate_dports(struct cxl_port *port);
 struct cxl_dev_state;
 int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,

From 4fe2c13d59d849be3b45371e3913ec5dc77fc0fb Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Sat, 11 Mar 2023 15:40:04 +0100
Subject: [PATCH 409/411] cxl/pci: Handle excessive CDAT length

If the length in the CDAT header is larger than the concatenation of the
header and all table entries, then the CDAT exposed to user space
contains trailing null bytes.

Not every consumer may be able to handle that.  Per Postel's robustness
principle, "be liberal in what you accept" and silently reduce the
cached length to avoid exposing those null bytes.

Fixes: c97006046c79 ("cxl/port: Read CDAT table")
Tested-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: stable@vger.kernel.org # v6.0+
Link: https://lore.kernel.org/r/6d98b3c7da5343172bd3ccabfabbc1f31c079d74.1678543498.git.lukas@wunner.de
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/cxl/core/pci.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index fb600dfbf5a6e..523d5b9fd7fcf 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -564,6 +564,9 @@ static int cxl_cdat_read_table(struct device *dev,
 		}
 	} while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY);
 
+	/* Length in CDAT header may exceed concatenation of CDAT entries */
+	cdat->length -= length;
+
 	return 0;
 }
 

From 92dc899c3b4927f3cfa23f55bf759171234b5802 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Sat, 11 Mar 2023 15:40:05 +0100
Subject: [PATCH 410/411] PCI/DOE: Silence WARN splat with
 CONFIG_DEBUG_OBJECTS=y

Gregory Price reports a WARN splat with CONFIG_DEBUG_OBJECTS=y upon CXL
probing because pci_doe_submit_task() invokes INIT_WORK() instead of
INIT_WORK_ONSTACK() for a work_struct that was allocated on the stack.

All callers of pci_doe_submit_task() allocate the work_struct on the
stack, so replace INIT_WORK() with INIT_WORK_ONSTACK() as a backportable
short-term fix.

The long-term fix implemented by a subsequent commit is to move to a
synchronous API which allocates the work_struct internally in the DOE
library.

Stacktrace for posterity:

WARNING: CPU: 0 PID: 23 at lib/debugobjects.c:545 __debug_object_init.cold+0x18/0x183
CPU: 0 PID: 23 Comm: kworker/u2:1 Not tainted 6.1.0-0.rc1.20221019gitaae703b02f92.17.fc38.x86_64 #1
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
Call Trace:
 pci_doe_submit_task+0x5d/0xd0
 pci_doe_discovery+0xb4/0x100
 pcim_doe_create_mb+0x219/0x290
 cxl_pci_probe+0x192/0x430
 local_pci_probe+0x41/0x80
 pci_device_probe+0xb3/0x220
 really_probe+0xde/0x380
 __driver_probe_device+0x78/0x170
 driver_probe_device+0x1f/0x90
 __driver_attach_async_helper+0x5c/0xe0
 async_run_entry_fn+0x30/0x130
 process_one_work+0x294/0x5b0

Fixes: 9d24322e887b ("PCI/DOE: Add DOE mailbox support functions")
Link: https://lore.kernel.org/linux-cxl/Y1bOniJliOFszvIK@memverge.com/
Reported-by: Gregory Price <gregory.price@memverge.com>
Tested-by: Ira Weiny <ira.weiny@intel.com>
Tested-by: Gregory Price <gregory.price@memverge.com>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Gregory Price <gregory.price@memverge.com>
Cc: stable@vger.kernel.org # v6.0+
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://lore.kernel.org/r/67a9117f463ecdb38a2dbca6a20391ce2f1e7a06.1678543498.git.lukas@wunner.de
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/pci/doe.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c
index 6f097932ccbf9..c14ffdf23f870 100644
--- a/drivers/pci/doe.c
+++ b/drivers/pci/doe.c
@@ -523,6 +523,8 @@ EXPORT_SYMBOL_GPL(pci_doe_supports_prot);
  * task->complete will be called when the state machine is done processing this
  * task.
  *
+ * @task must be allocated on the stack.
+ *
  * Excess data will be discarded.
  *
  * RETURNS: 0 when task has been successfully queued, -ERRNO on error
@@ -544,7 +546,7 @@ int pci_doe_submit_task(struct pci_doe_mb *doe_mb, struct pci_doe_task *task)
 		return -EIO;
 
 	task->doe_mb = doe_mb;
-	INIT_WORK(&task->work, doe_statemachine_work);
+	INIT_WORK_ONSTACK(&task->work, doe_statemachine_work);
 	queue_work(doe_mb->work_queue, &task->work);
 	return 0;
 }

From abf04be0e7071f2bcd39bf97ba407e7d4439785e Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Sat, 11 Mar 2023 15:40:06 +0100
Subject: [PATCH 411/411] PCI/DOE: Fix memory leak with CONFIG_DEBUG_OBJECTS=y

After a pci_doe_task completes, its work_struct needs to be destroyed
to avoid a memory leak with CONFIG_DEBUG_OBJECTS=y.

Fixes: 9d24322e887b ("PCI/DOE: Add DOE mailbox support functions")
Tested-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: stable@vger.kernel.org # v6.0+
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Link: https://lore.kernel.org/r/775768b4912531c3b887d405fc51a50e465e1bf9.1678543498.git.lukas@wunner.de
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/pci/doe.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c
index c14ffdf23f870..e5e9b287b9766 100644
--- a/drivers/pci/doe.c
+++ b/drivers/pci/doe.c
@@ -224,6 +224,7 @@ static void signal_task_complete(struct pci_doe_task *task, int rv)
 {
 	task->rv = rv;
 	task->complete(task);
+	destroy_work_on_stack(&task->work);
 }
 
 static void signal_task_abort(struct pci_doe_task *task, int rv)