From 8c1b21198551d795a44f08ad185f716732b47bbe Mon Sep 17 00:00:00 2001
From: Milan Landaverde <milan@mdaverde.com>
Date: Tue, 22 Mar 2022 10:49:45 -0400
Subject: [PATCH 01/57] bpf/bpftool: Add unprivileged_bpf_disabled check
 against value of 2

In [1], we added a kconfig knob that can set
/proc/sys/kernel/unprivileged_bpf_disabled to 2

We now check against this value in bpftool feature probe

[1] https://lore.kernel.org/bpf/74ec548079189e4e4dffaeb42b8987bb3c852eee.1620765074.git.daniel@iogearbox.net

Signed-off-by: Milan Landaverde <milan@mdaverde.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Quentin Monnet <quentin@isovalent.com>
Acked-by: KP Singh <kpsingh@kernel.org>
Link: https://lore.kernel.org/bpf/20220322145012.1315376-1-milan@mdaverde.com
---
 tools/bpf/bpftool/feature.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index c2f43a5d38e01..290998c82de12 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -207,7 +207,10 @@ static void probe_unprivileged_disabled(void)
 			printf("bpf() syscall for unprivileged users is enabled\n");
 			break;
 		case 1:
-			printf("bpf() syscall restricted to privileged users\n");
+			printf("bpf() syscall restricted to privileged users (without recovery)\n");
+			break;
+		case 2:
+			printf("bpf() syscall restricted to privileged users (admin can change)\n");
 			break;
 		case -1:
 			printf("Unable to retrieve required privileges for bpf() syscall\n");

From 9052e4e83762c40d89f6650fec161ad9001e93f3 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Wed, 23 Mar 2022 16:35:26 +0900
Subject: [PATCH 02/57] fprobe: Fix smatch type mismatch warning

Fix the type mismatching warning of 'rethook_node vs fprobe_rethook_node'
found by Smatch.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/164802092611.1732982.12268174743437084619.stgit@devnote2
---
 kernel/trace/fprobe.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 8b2dd5b9dcd14..63b2321b22a08 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -150,15 +150,15 @@ static int fprobe_init_rethook(struct fprobe *fp, int num)
 
 	fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler);
 	for (i = 0; i < size; i++) {
-		struct rethook_node *node;
+		struct fprobe_rethook_node *node;
 
-		node = kzalloc(sizeof(struct fprobe_rethook_node), GFP_KERNEL);
+		node = kzalloc(sizeof(*node), GFP_KERNEL);
 		if (!node) {
 			rethook_free(fp->rethook);
 			fp->rethook = NULL;
 			return -ENOMEM;
 		}
-		rethook_add_node(fp->rethook, node);
+		rethook_add_node(fp->rethook, &node->node);
 	}
 	return 0;
 }

From 261608f3105ce65e9fd01919f72ead74dcb9f14d Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Wed, 23 Mar 2022 16:35:36 +0900
Subject: [PATCH 03/57] fprobe: Fix sparse warning for acccessing __rcu
 ftrace_hash

Since ftrace_ops::local_hash::filter_hash field is an __rcu pointer,
we have to use rcu_access_pointer() to access it.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/164802093635.1732982.4938094876018890866.stgit@devnote2
---
 kernel/trace/fprobe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 63b2321b22a08..89d9f994ebb02 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -215,7 +215,7 @@ int register_fprobe(struct fprobe *fp, const char *filter, const char *notfilter
 	 * correctly calculate the total number of filtered symbols
 	 * from both filter and notfilter.
 	 */
-	hash = fp->ops.local_hash.filter_hash;
+	hash = rcu_access_pointer(fp->ops.local_hash.filter_hash);
 	if (WARN_ON_ONCE(!hash))
 		goto out;
 

From 98870605b3742c38353b50b6dbad33fab9de415f Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Thu, 24 Mar 2022 16:37:32 +0800
Subject: [PATCH 04/57] bpf: Sync comments for bpf_get_stack

Commit ee2a098851bf missed updating the comments for helper bpf_get_stack
in tools/include/uapi/linux/bpf.h. Sync it.

Fixes: ee2a098851bf ("bpf: Adjust BPF stack helper functions to accommodate skip > 0")
Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/ce54617746b7ed5e9ba3b844e55e74cb8a60e0b5.1648110794.git.geliang.tang@suse.com
---
 tools/include/uapi/linux/bpf.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 7604e7d5438f8..d14b10b85e51f 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3009,8 +3009,8 @@ union bpf_attr {
  *
  * 			# sysctl kernel.perf_event_max_stack=<new value>
  * 	Return
- * 		A non-negative value equal to or less than *size* on success,
- * 		or a negative error in case of failure.
+ * 		The non-negative copied *buf* length equal to or less than
+ * 		*size* on success, or a negative error in case of failure.
  *
  * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
  * 	Description
@@ -4316,8 +4316,8 @@ union bpf_attr {
  *
  *			# sysctl kernel.perf_event_max_stack=<new value>
  *	Return
- *		A non-negative value equal to or less than *size* on success,
- *		or a negative error in case of failure.
+ * 		The non-negative copied *buf* length equal to or less than
+ * 		*size* on success, or a negative error in case of failure.
  *
  * long bpf_load_hdr_opt(struct bpf_sock_ops *skops, void *searchby_res, u32 len, u64 flags)
  *	Description

From c29a4920dfcaa1433b09e2674f605f72767a385c Mon Sep 17 00:00:00 2001
From: Yuntao Wang <ytcoode@gmail.com>
Date: Fri, 25 Mar 2022 00:42:38 +0800
Subject: [PATCH 05/57] bpf: Fix maximum permitted number of arguments check

Since the m->arg_size array can hold up to MAX_BPF_FUNC_ARGS argument
sizes, it's ok that nargs is equal to MAX_BPF_FUNC_ARGS.

Signed-off-by: Yuntao Wang <ytcoode@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20220324164238.1274915-1-ytcoode@gmail.com
---
 kernel/bpf/btf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 24788ce564a08..0918a39279f6c 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -5507,7 +5507,7 @@ int btf_distill_func_proto(struct bpf_verifier_log *log,
 	}
 	args = (const struct btf_param *)(func + 1);
 	nargs = btf_type_vlen(func);
-	if (nargs >= MAX_BPF_FUNC_ARGS) {
+	if (nargs > MAX_BPF_FUNC_ARGS) {
 		bpf_log(log,
 			"The function %s has %d arguments. Too many.\n",
 			tname, nargs);

From 99dea2c664d7bc7e4f6f6947182d0d365165a998 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 25 Mar 2022 15:56:43 -0700
Subject: [PATCH 06/57] selftests/bpf: fix selftest after random: Urandom_read
 tracepoint removal

14c174633f34 ("random: remove unused tracepoints") removed all the
tracepoints from drivers/char/random.c, one of which,
random:urandom_read, was used by stacktrace_build_id selftest to trigger
stack trace capture.

Fix breakage by switching to kprobing urandom_read() function.

Suggested-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220325225643.2606-1-andrii@kernel.org
---
 .../selftests/bpf/progs/test_stacktrace_build_id.c   | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
index 36a707e7c7a7a..6c62bfb8bb6fa 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
@@ -39,16 +39,8 @@ struct {
 	__type(value, stack_trace_t);
 } stack_amap SEC(".maps");
 
-/* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */
-struct random_urandom_args {
-	unsigned long long pad;
-	int got_bits;
-	int pool_left;
-	int input_left;
-};
-
-SEC("tracepoint/random/urandom_read")
-int oncpu(struct random_urandom_args *args)
+SEC("kprobe/urandom_read")
+int oncpu(struct pt_regs *args)
 {
 	__u32 max_len = sizeof(struct bpf_stack_build_id)
 			* PERF_MAX_STACK_DEPTH;

From ef8a257b4e499a979364b1f9caf25a325f6ee8b8 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 28 Mar 2022 10:37:03 +0200
Subject: [PATCH 07/57] bpftool: Fix generated code in codegen_asserts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Arnaldo reported perf compilation fail with:

  $ make -k BUILD_BPF_SKEL=1 CORESIGHT=1 PYTHON=python3
  ...
  In file included from util/bpf_counter.c:28:
  /tmp/build/perf//util/bpf_skel/bperf_leader.skel.h: In function ‘bperf_leader_bpf__assert’:
  /tmp/build/perf//util/bpf_skel/bperf_leader.skel.h:351:51: error: unused parameter ‘s’ [-Werror=unused-parameter]
    351 | bperf_leader_bpf__assert(struct bperf_leader_bpf *s)
        |                          ~~~~~~~~~~~~~~~~~~~~~~~~~^
  cc1: all warnings being treated as errors

If there's nothing to generate in the new assert function,
we will get unused 's' warn/error, adding 'unused' attribute to it.

Fixes: 08d4dba6ae77 ("bpftool: Bpf skeletons assert type sizes")
Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Link: https://lore.kernel.org/bpf/20220328083703.2880079-1-jolsa@kernel.org
---
 tools/bpf/bpftool/gen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 7ba7ff55d2eaa..91af2850b5057 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -477,7 +477,7 @@ static void codegen_asserts(struct bpf_object *obj, const char *obj_name)
 	codegen("\
 		\n\
 		__attribute__((unused)) static void			    \n\
-		%1$s__assert(struct %1$s *s)				    \n\
+		%1$s__assert(struct %1$s *s __attribute__((unused)))	    \n\
 		{							    \n\
 		#ifdef __cplusplus					    \n\
 		#define _Static_assert static_assert			    \n\

From 73f9b911faa74ac5107879de05c9489c419f41bb Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sat, 26 Mar 2022 11:27:05 +0900
Subject: [PATCH 08/57] kprobes: Use rethook for kretprobe if possible

Use rethook for kretprobe function return hooking if the arch sets
CONFIG_HAVE_RETHOOK=y. In this case, CONFIG_KRETPROBE_ON_RETHOOK is
set to 'y' automatically, and the kretprobe internal data fields
switches to use rethook. If not, it continues to use kretprobe
specific function return hooks.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/164826162556.2455864.12255833167233452047.stgit@devnote2
---
 arch/Kconfig                |   8 ++-
 include/linux/kprobes.h     |  51 ++++++++++++++-
 kernel/Makefile             |   1 +
 kernel/kprobes.c            | 124 ++++++++++++++++++++++++++++++------
 kernel/trace/trace_kprobe.c |   4 +-
 5 files changed, 163 insertions(+), 25 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 84bc1de02720b..33e06966f2487 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -164,7 +164,13 @@ config ARCH_USE_BUILTIN_BSWAP
 
 config KRETPROBES
 	def_bool y
-	depends on KPROBES && HAVE_KRETPROBES
+	depends on KPROBES && (HAVE_KRETPROBES || HAVE_RETHOOK)
+
+config KRETPROBE_ON_RETHOOK
+	def_bool y
+	depends on HAVE_RETHOOK
+	depends on KRETPROBES
+	select RETHOOK
 
 config USER_RETURN_NOTIFIER
 	bool
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 312ff997c7436..157168769fc26 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -28,6 +28,7 @@
 #include <linux/ftrace.h>
 #include <linux/refcount.h>
 #include <linux/freelist.h>
+#include <linux/rethook.h>
 #include <asm/kprobes.h>
 
 #ifdef CONFIG_KPROBES
@@ -149,13 +150,20 @@ struct kretprobe {
 	int maxactive;
 	int nmissed;
 	size_t data_size;
+#ifdef CONFIG_KRETPROBE_ON_RETHOOK
+	struct rethook *rh;
+#else
 	struct freelist_head freelist;
 	struct kretprobe_holder *rph;
+#endif
 };
 
 #define KRETPROBE_MAX_DATA_SIZE	4096
 
 struct kretprobe_instance {
+#ifdef CONFIG_KRETPROBE_ON_RETHOOK
+	struct rethook_node node;
+#else
 	union {
 		struct freelist_node freelist;
 		struct rcu_head rcu;
@@ -164,6 +172,7 @@ struct kretprobe_instance {
 	struct kretprobe_holder *rph;
 	kprobe_opcode_t *ret_addr;
 	void *fp;
+#endif
 	char data[];
 };
 
@@ -186,10 +195,24 @@ extern void kprobe_busy_begin(void);
 extern void kprobe_busy_end(void);
 
 #ifdef CONFIG_KRETPROBES
-extern void arch_prepare_kretprobe(struct kretprobe_instance *ri,
-				   struct pt_regs *regs);
+/* Check whether @p is used for implementing a trampoline. */
 extern int arch_trampoline_kprobe(struct kprobe *p);
 
+#ifdef CONFIG_KRETPROBE_ON_RETHOOK
+static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance *ri)
+{
+	RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(),
+		"Kretprobe is accessed from instance under preemptive context");
+
+	return (struct kretprobe *)READ_ONCE(ri->node.rethook->data);
+}
+static nokprobe_inline unsigned long get_kretprobe_retaddr(struct kretprobe_instance *ri)
+{
+	return ri->node.ret_addr;
+}
+#else
+extern void arch_prepare_kretprobe(struct kretprobe_instance *ri,
+				   struct pt_regs *regs);
 void arch_kretprobe_fixup_return(struct pt_regs *regs,
 				 kprobe_opcode_t *correct_ret_addr);
 
@@ -232,6 +255,12 @@ static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance
 	return READ_ONCE(ri->rph->rp);
 }
 
+static nokprobe_inline unsigned long get_kretprobe_retaddr(struct kretprobe_instance *ri)
+{
+	return (unsigned long)ri->ret_addr;
+}
+#endif /* CONFIG_KRETPROBE_ON_RETHOOK */
+
 #else /* !CONFIG_KRETPROBES */
 static inline void arch_prepare_kretprobe(struct kretprobe *rp,
 					struct pt_regs *regs)
@@ -395,7 +424,11 @@ void unregister_kretprobe(struct kretprobe *rp);
 int register_kretprobes(struct kretprobe **rps, int num);
 void unregister_kretprobes(struct kretprobe **rps, int num);
 
+#ifdef CONFIG_KRETPROBE_ON_RETHOOK
+#define kprobe_flush_task(tk)	do {} while (0)
+#else
 void kprobe_flush_task(struct task_struct *tk);
+#endif
 
 void kprobe_free_init_mem(void);
 
@@ -509,6 +542,19 @@ static inline bool is_kprobe_optinsn_slot(unsigned long addr)
 #endif /* !CONFIG_OPTPROBES */
 
 #ifdef CONFIG_KRETPROBES
+#ifdef CONFIG_KRETPROBE_ON_RETHOOK
+static nokprobe_inline bool is_kretprobe_trampoline(unsigned long addr)
+{
+	return is_rethook_trampoline(addr);
+}
+
+static nokprobe_inline
+unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp,
+				      struct llist_node **cur)
+{
+	return rethook_find_ret_addr(tsk, (unsigned long)fp, cur);
+}
+#else
 static nokprobe_inline bool is_kretprobe_trampoline(unsigned long addr)
 {
 	return (void *)addr == kretprobe_trampoline_addr();
@@ -516,6 +562,7 @@ static nokprobe_inline bool is_kretprobe_trampoline(unsigned long addr)
 
 unsigned long kretprobe_find_ret_addr(struct task_struct *tsk, void *fp,
 				      struct llist_node **cur);
+#endif
 #else
 static nokprobe_inline bool is_kretprobe_trampoline(unsigned long addr)
 {
diff --git a/kernel/Makefile b/kernel/Makefile
index 56f4ee97f3284..471d71935e90a 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -108,6 +108,7 @@ obj-$(CONFIG_TRACING) += trace/
 obj-$(CONFIG_TRACE_CLOCK) += trace/
 obj-$(CONFIG_RING_BUFFER) += trace/
 obj-$(CONFIG_TRACEPOINTS) += trace/
+obj-$(CONFIG_RETHOOK) += trace/
 obj-$(CONFIG_IRQ_WORK) += irq_work.o
 obj-$(CONFIG_CPU_PM) += cpu_pm.o
 obj-$(CONFIG_BPF) += bpf/
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 185badc780b7c..dbe57df2e199e 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1237,6 +1237,27 @@ void kprobes_inc_nmissed_count(struct kprobe *p)
 }
 NOKPROBE_SYMBOL(kprobes_inc_nmissed_count);
 
+static struct kprobe kprobe_busy = {
+	.addr = (void *) get_kprobe,
+};
+
+void kprobe_busy_begin(void)
+{
+	struct kprobe_ctlblk *kcb;
+
+	preempt_disable();
+	__this_cpu_write(current_kprobe, &kprobe_busy);
+	kcb = get_kprobe_ctlblk();
+	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+}
+
+void kprobe_busy_end(void)
+{
+	__this_cpu_write(current_kprobe, NULL);
+	preempt_enable();
+}
+
+#if !defined(CONFIG_KRETPROBE_ON_RETHOOK)
 static void free_rp_inst_rcu(struct rcu_head *head)
 {
 	struct kretprobe_instance *ri = container_of(head, struct kretprobe_instance, rcu);
@@ -1258,26 +1279,6 @@ static void recycle_rp_inst(struct kretprobe_instance *ri)
 }
 NOKPROBE_SYMBOL(recycle_rp_inst);
 
-static struct kprobe kprobe_busy = {
-	.addr = (void *) get_kprobe,
-};
-
-void kprobe_busy_begin(void)
-{
-	struct kprobe_ctlblk *kcb;
-
-	preempt_disable();
-	__this_cpu_write(current_kprobe, &kprobe_busy);
-	kcb = get_kprobe_ctlblk();
-	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
-}
-
-void kprobe_busy_end(void)
-{
-	__this_cpu_write(current_kprobe, NULL);
-	preempt_enable();
-}
-
 /*
  * This function is called from delayed_put_task_struct() when a task is
  * dead and cleaned up to recycle any kretprobe instances associated with
@@ -1327,6 +1328,7 @@ static inline void free_rp_inst(struct kretprobe *rp)
 		rp->rph = NULL;
 	}
 }
+#endif	/* !CONFIG_KRETPROBE_ON_RETHOOK */
 
 /* Add the new probe to 'ap->list'. */
 static int add_new_kprobe(struct kprobe *ap, struct kprobe *p)
@@ -1925,6 +1927,7 @@ static struct notifier_block kprobe_exceptions_nb = {
 
 #ifdef CONFIG_KRETPROBES
 
+#if !defined(CONFIG_KRETPROBE_ON_RETHOOK)
 /* This assumes the 'tsk' is the current task or the is not running. */
 static kprobe_opcode_t *__kretprobe_find_ret_addr(struct task_struct *tsk,
 						  struct llist_node **cur)
@@ -2087,6 +2090,57 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
 	return 0;
 }
 NOKPROBE_SYMBOL(pre_handler_kretprobe);
+#else /* CONFIG_KRETPROBE_ON_RETHOOK */
+/*
+ * This kprobe pre_handler is registered with every kretprobe. When probe
+ * hits it will set up the return probe.
+ */
+static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
+{
+	struct kretprobe *rp = container_of(p, struct kretprobe, kp);
+	struct kretprobe_instance *ri;
+	struct rethook_node *rhn;
+
+	rhn = rethook_try_get(rp->rh);
+	if (!rhn) {
+		rp->nmissed++;
+		return 0;
+	}
+
+	ri = container_of(rhn, struct kretprobe_instance, node);
+
+	if (rp->entry_handler && rp->entry_handler(ri, regs))
+		rethook_recycle(rhn);
+	else
+		rethook_hook(rhn, regs, kprobe_ftrace(p));
+
+	return 0;
+}
+NOKPROBE_SYMBOL(pre_handler_kretprobe);
+
+static void kretprobe_rethook_handler(struct rethook_node *rh, void *data,
+				      struct pt_regs *regs)
+{
+	struct kretprobe *rp = (struct kretprobe *)data;
+	struct kretprobe_instance *ri;
+	struct kprobe_ctlblk *kcb;
+
+	/* The data must NOT be null. This means rethook data structure is broken. */
+	if (WARN_ON_ONCE(!data))
+		return;
+
+	__this_cpu_write(current_kprobe, &rp->kp);
+	kcb = get_kprobe_ctlblk();
+	kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+	ri = container_of(rh, struct kretprobe_instance, node);
+	rp->handler(ri, regs);
+
+	__this_cpu_write(current_kprobe, NULL);
+}
+NOKPROBE_SYMBOL(kretprobe_rethook_handler);
+
+#endif /* !CONFIG_KRETPROBE_ON_RETHOOK */
 
 /**
  * kprobe_on_func_entry() -- check whether given address is function entry
@@ -2155,6 +2209,29 @@ int register_kretprobe(struct kretprobe *rp)
 		rp->maxactive = num_possible_cpus();
 #endif
 	}
+#ifdef CONFIG_KRETPROBE_ON_RETHOOK
+	rp->rh = rethook_alloc((void *)rp, kretprobe_rethook_handler);
+	if (!rp->rh)
+		return -ENOMEM;
+
+	for (i = 0; i < rp->maxactive; i++) {
+		inst = kzalloc(sizeof(struct kretprobe_instance) +
+			       rp->data_size, GFP_KERNEL);
+		if (inst == NULL) {
+			rethook_free(rp->rh);
+			rp->rh = NULL;
+			return -ENOMEM;
+		}
+		rethook_add_node(rp->rh, &inst->node);
+	}
+	rp->nmissed = 0;
+	/* Establish function entry probe point */
+	ret = register_kprobe(&rp->kp);
+	if (ret != 0) {
+		rethook_free(rp->rh);
+		rp->rh = NULL;
+	}
+#else	/* !CONFIG_KRETPROBE_ON_RETHOOK */
 	rp->freelist.head = NULL;
 	rp->rph = kzalloc(sizeof(struct kretprobe_holder), GFP_KERNEL);
 	if (!rp->rph)
@@ -2179,6 +2256,7 @@ int register_kretprobe(struct kretprobe *rp)
 	ret = register_kprobe(&rp->kp);
 	if (ret != 0)
 		free_rp_inst(rp);
+#endif
 	return ret;
 }
 EXPORT_SYMBOL_GPL(register_kretprobe);
@@ -2217,7 +2295,11 @@ void unregister_kretprobes(struct kretprobe **rps, int num)
 	for (i = 0; i < num; i++) {
 		if (__unregister_kprobe_top(&rps[i]->kp) < 0)
 			rps[i]->kp.addr = NULL;
+#ifdef CONFIG_KRETPROBE_ON_RETHOOK
+		rethook_free(rps[i]->rh);
+#else
 		rps[i]->rph->rp = NULL;
+#endif
 	}
 	mutex_unlock(&kprobe_mutex);
 
@@ -2225,7 +2307,9 @@ void unregister_kretprobes(struct kretprobe **rps, int num)
 	for (i = 0; i < num; i++) {
 		if (rps[i]->kp.addr) {
 			__unregister_kprobe_bottom(&rps[i]->kp);
+#ifndef CONFIG_KRETPROBE_ON_RETHOOK
 			free_rp_inst(rps[i]);
+#endif
 		}
 	}
 }
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index b62fd785b599b..47cebef78532c 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1433,7 +1433,7 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
 	fbuffer.regs = regs;
 	entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
 	entry->func = (unsigned long)tk->rp.kp.addr;
-	entry->ret_ip = (unsigned long)ri->ret_addr;
+	entry->ret_ip = get_kretprobe_retaddr(ri);
 	store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
 
 	trace_event_buffer_commit(&fbuffer);
@@ -1628,7 +1628,7 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
 		return;
 
 	entry->func = (unsigned long)tk->rp.kp.addr;
-	entry->ret_ip = (unsigned long)ri->ret_addr;
+	entry->ret_ip = get_kretprobe_retaddr(ri);
 	store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
 	perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs,
 			      head, NULL);

From f3a112c0c40dd96d53c8bdf3ea8d94d528f3b7b8 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sat, 26 Mar 2022 11:27:17 +0900
Subject: [PATCH 09/57] x86,rethook,kprobes: Replace kretprobe with rethook on
 x86

Replaces the kretprobe code with rethook on x86. With this patch,
kretprobe on x86 uses the rethook instead of kretprobe specific
trampoline code.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/bpf/164826163692.2455864.13745421016848209527.stgit@devnote2
---
 arch/x86/Kconfig                 |   1 +
 arch/x86/include/asm/unwind.h    |  23 +++---
 arch/x86/kernel/Makefile         |   1 +
 arch/x86/kernel/kprobes/common.h |   1 +
 arch/x86/kernel/kprobes/core.c   | 107 --------------------------
 arch/x86/kernel/rethook.c        | 125 +++++++++++++++++++++++++++++++
 arch/x86/kernel/unwind_orc.c     |  10 +--
 7 files changed, 144 insertions(+), 124 deletions(-)
 create mode 100644 arch/x86/kernel/rethook.c

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7340d9f01b621..ff45a27fc29ce 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -224,6 +224,7 @@ config X86
 	select HAVE_KPROBES_ON_FTRACE
 	select HAVE_FUNCTION_ERROR_INJECTION
 	select HAVE_KRETPROBES
+	select HAVE_RETHOOK
 	select HAVE_KVM
 	select HAVE_LIVEPATCH			if X86_64
 	select HAVE_MIXED_BREAKPOINTS_REGS
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 2a1f8734416dc..7cede4dc21f00 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -4,7 +4,7 @@
 
 #include <linux/sched.h>
 #include <linux/ftrace.h>
-#include <linux/kprobes.h>
+#include <linux/rethook.h>
 #include <asm/ptrace.h>
 #include <asm/stacktrace.h>
 
@@ -16,7 +16,7 @@ struct unwind_state {
 	unsigned long stack_mask;
 	struct task_struct *task;
 	int graph_idx;
-#ifdef CONFIG_KRETPROBES
+#if defined(CONFIG_RETHOOK)
 	struct llist_node *kr_cur;
 #endif
 	bool error;
@@ -104,19 +104,18 @@ void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
 #endif
 
 static inline
-unsigned long unwind_recover_kretprobe(struct unwind_state *state,
-				       unsigned long addr, unsigned long *addr_p)
+unsigned long unwind_recover_rethook(struct unwind_state *state,
+				     unsigned long addr, unsigned long *addr_p)
 {
-#ifdef CONFIG_KRETPROBES
-	return is_kretprobe_trampoline(addr) ?
-		kretprobe_find_ret_addr(state->task, addr_p, &state->kr_cur) :
-		addr;
-#else
-	return addr;
+#ifdef CONFIG_RETHOOK
+	if (is_rethook_trampoline(addr))
+		return rethook_find_ret_addr(state->task, (unsigned long)addr_p,
+					     &state->kr_cur);
 #endif
+	return addr;
 }
 
-/* Recover the return address modified by kretprobe and ftrace_graph. */
+/* Recover the return address modified by rethook and ftrace_graph. */
 static inline
 unsigned long unwind_recover_ret_addr(struct unwind_state *state,
 				     unsigned long addr, unsigned long *addr_p)
@@ -125,7 +124,7 @@ unsigned long unwind_recover_ret_addr(struct unwind_state *state,
 
 	ret = ftrace_graph_ret_addr(state->task, &state->graph_idx,
 				    addr, addr_p);
-	return unwind_recover_kretprobe(state, ret, addr_p);
+	return unwind_recover_rethook(state, ret, addr_p);
 }
 
 /*
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 6462e3dd98f49..c41ef42adbe8a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -103,6 +103,7 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
 obj-$(CONFIG_FTRACE_SYSCALLS)	+= ftrace.o
 obj-$(CONFIG_X86_TSC)		+= trace_clock.o
 obj-$(CONFIG_TRACING)		+= trace.o
+obj-$(CONFIG_RETHOOK)		+= rethook.o
 obj-$(CONFIG_CRASH_CORE)	+= crash_core_$(BITS).o
 obj-$(CONFIG_KEXEC_CORE)	+= machine_kexec_$(BITS).o
 obj-$(CONFIG_KEXEC_CORE)	+= relocate_kernel_$(BITS).o crash.o
diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h
index 7d3a2e2daf011..c993521d49332 100644
--- a/arch/x86/kernel/kprobes/common.h
+++ b/arch/x86/kernel/kprobes/common.h
@@ -6,6 +6,7 @@
 
 #include <asm/asm.h>
 #include <asm/frame.h>
+#include <asm/insn.h>
 
 #ifdef CONFIG_X86_64
 
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 8ef933c03afac..7c4ab8870da44 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -811,18 +811,6 @@ set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
 		= (regs->flags & X86_EFLAGS_IF);
 }
 
-void arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs)
-{
-	unsigned long *sara = stack_addr(regs);
-
-	ri->ret_addr = (kprobe_opcode_t *) *sara;
-	ri->fp = sara;
-
-	/* Replace the return addr with trampoline addr */
-	*sara = (unsigned long) &__kretprobe_trampoline;
-}
-NOKPROBE_SYMBOL(arch_prepare_kretprobe);
-
 static void kprobe_post_process(struct kprobe *cur, struct pt_regs *regs,
 			       struct kprobe_ctlblk *kcb)
 {
@@ -1023,101 +1011,6 @@ int kprobe_int3_handler(struct pt_regs *regs)
 }
 NOKPROBE_SYMBOL(kprobe_int3_handler);
 
-/*
- * When a retprobed function returns, this code saves registers and
- * calls trampoline_handler() runs, which calls the kretprobe's handler.
- */
-asm(
-	".text\n"
-	".global __kretprobe_trampoline\n"
-	".type __kretprobe_trampoline, @function\n"
-	"__kretprobe_trampoline:\n"
-#ifdef CONFIG_X86_64
-	ANNOTATE_NOENDBR
-	/* Push a fake return address to tell the unwinder it's a kretprobe. */
-	"	pushq $__kretprobe_trampoline\n"
-	UNWIND_HINT_FUNC
-	/* Save the 'sp - 8', this will be fixed later. */
-	"	pushq %rsp\n"
-	"	pushfq\n"
-	SAVE_REGS_STRING
-	"	movq %rsp, %rdi\n"
-	"	call trampoline_handler\n"
-	RESTORE_REGS_STRING
-	/* In trampoline_handler(), 'regs->flags' is copied to 'regs->sp'. */
-	"	addq $8, %rsp\n"
-	"	popfq\n"
-#else
-	/* Push a fake return address to tell the unwinder it's a kretprobe. */
-	"	pushl $__kretprobe_trampoline\n"
-	UNWIND_HINT_FUNC
-	/* Save the 'sp - 4', this will be fixed later. */
-	"	pushl %esp\n"
-	"	pushfl\n"
-	SAVE_REGS_STRING
-	"	movl %esp, %eax\n"
-	"	call trampoline_handler\n"
-	RESTORE_REGS_STRING
-	/* In trampoline_handler(), 'regs->flags' is copied to 'regs->sp'. */
-	"	addl $4, %esp\n"
-	"	popfl\n"
-#endif
-	ASM_RET
-	".size __kretprobe_trampoline, .-__kretprobe_trampoline\n"
-);
-NOKPROBE_SYMBOL(__kretprobe_trampoline);
-/*
- * __kretprobe_trampoline() skips updating frame pointer. The frame pointer
- * saved in trampoline_handler() points to the real caller function's
- * frame pointer. Thus the __kretprobe_trampoline() doesn't have a
- * standard stack frame with CONFIG_FRAME_POINTER=y.
- * Let's mark it non-standard function. Anyway, FP unwinder can correctly
- * unwind without the hint.
- */
-STACK_FRAME_NON_STANDARD_FP(__kretprobe_trampoline);
-
-/* This is called from kretprobe_trampoline_handler(). */
-void arch_kretprobe_fixup_return(struct pt_regs *regs,
-				 kprobe_opcode_t *correct_ret_addr)
-{
-	unsigned long *frame_pointer = &regs->sp + 1;
-
-	/* Replace fake return address with real one. */
-	*frame_pointer = (unsigned long)correct_ret_addr;
-}
-
-/*
- * Called from __kretprobe_trampoline
- */
-__used __visible void trampoline_handler(struct pt_regs *regs)
-{
-	unsigned long *frame_pointer;
-
-	/* fixup registers */
-	regs->cs = __KERNEL_CS;
-#ifdef CONFIG_X86_32
-	regs->gs = 0;
-#endif
-	regs->ip = (unsigned long)&__kretprobe_trampoline;
-	regs->orig_ax = ~0UL;
-	regs->sp += sizeof(long);
-	frame_pointer = &regs->sp + 1;
-
-	/*
-	 * The return address at 'frame_pointer' is recovered by the
-	 * arch_kretprobe_fixup_return() which called from the
-	 * kretprobe_trampoline_handler().
-	 */
-	kretprobe_trampoline_handler(regs, frame_pointer);
-
-	/*
-	 * Copy FLAGS to 'pt_regs::sp' so that __kretprobe_trapmoline()
-	 * can do RET right after POPF.
-	 */
-	regs->sp = regs->flags;
-}
-NOKPROBE_SYMBOL(trampoline_handler);
-
 int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 {
 	struct kprobe *cur = kprobe_running();
diff --git a/arch/x86/kernel/rethook.c b/arch/x86/kernel/rethook.c
new file mode 100644
index 0000000000000..af7e6713a07a7
--- /dev/null
+++ b/arch/x86/kernel/rethook.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * x86 implementation of rethook. Mostly copied from arch/x86/kernel/kprobes/core.c.
+ */
+#include <linux/bug.h>
+#include <linux/rethook.h>
+#include <linux/kprobes.h>
+#include <linux/objtool.h>
+
+#include "kprobes/common.h"
+
+__visible void arch_rethook_trampoline_callback(struct pt_regs *regs);
+
+#ifndef ANNOTATE_NOENDBR
+#define ANNOTATE_NOENDBR
+#endif
+
+/*
+ * When a target function returns, this code saves registers and calls
+ * arch_rethook_trampoline_callback(), which calls the rethook handler.
+ */
+asm(
+	".text\n"
+	".global arch_rethook_trampoline\n"
+	".type arch_rethook_trampoline, @function\n"
+	"arch_rethook_trampoline:\n"
+#ifdef CONFIG_X86_64
+	ANNOTATE_NOENDBR	/* This is only jumped from ret instruction */
+	/* Push a fake return address to tell the unwinder it's a rethook. */
+	"	pushq $arch_rethook_trampoline\n"
+	UNWIND_HINT_FUNC
+	/* Save the 'sp - 8', this will be fixed later. */
+	"	pushq %rsp\n"
+	"	pushfq\n"
+	SAVE_REGS_STRING
+	"	movq %rsp, %rdi\n"
+	"	call arch_rethook_trampoline_callback\n"
+	RESTORE_REGS_STRING
+	/* In the callback function, 'regs->flags' is copied to 'regs->sp'. */
+	"	addq $8, %rsp\n"
+	"	popfq\n"
+#else
+	/* Push a fake return address to tell the unwinder it's a rethook. */
+	"	pushl $arch_rethook_trampoline\n"
+	UNWIND_HINT_FUNC
+	/* Save the 'sp - 4', this will be fixed later. */
+	"	pushl %esp\n"
+	"	pushfl\n"
+	SAVE_REGS_STRING
+	"	movl %esp, %eax\n"
+	"	call arch_rethook_trampoline_callback\n"
+	RESTORE_REGS_STRING
+	/* In the callback function, 'regs->flags' is copied to 'regs->sp'. */
+	"	addl $4, %esp\n"
+	"	popfl\n"
+#endif
+	ASM_RET
+	".size arch_rethook_trampoline, .-arch_rethook_trampoline\n"
+);
+NOKPROBE_SYMBOL(arch_rethook_trampoline);
+
+/*
+ * Called from arch_rethook_trampoline
+ */
+__used __visible void arch_rethook_trampoline_callback(struct pt_regs *regs)
+{
+	unsigned long *frame_pointer;
+
+	/* fixup registers */
+	regs->cs = __KERNEL_CS;
+#ifdef CONFIG_X86_32
+	regs->gs = 0;
+#endif
+	regs->ip = (unsigned long)&arch_rethook_trampoline;
+	regs->orig_ax = ~0UL;
+	regs->sp += sizeof(long);
+	frame_pointer = &regs->sp + 1;
+
+	/*
+	 * The return address at 'frame_pointer' is recovered by the
+	 * arch_rethook_fixup_return() which called from this
+	 * rethook_trampoline_handler().
+	 */
+	rethook_trampoline_handler(regs, (unsigned long)frame_pointer);
+
+	/*
+	 * Copy FLAGS to 'pt_regs::sp' so that arch_rethook_trapmoline()
+	 * can do RET right after POPF.
+	 */
+	regs->sp = regs->flags;
+}
+NOKPROBE_SYMBOL(arch_rethook_trampoline_callback);
+
+/*
+ * arch_rethook_trampoline() skips updating frame pointer. The frame pointer
+ * saved in arch_rethook_trampoline_callback() points to the real caller
+ * function's frame pointer. Thus the arch_rethook_trampoline() doesn't have
+ * a standard stack frame with CONFIG_FRAME_POINTER=y.
+ * Let's mark it non-standard function. Anyway, FP unwinder can correctly
+ * unwind without the hint.
+ */
+STACK_FRAME_NON_STANDARD_FP(arch_rethook_trampoline);
+
+/* This is called from rethook_trampoline_handler(). */
+void arch_rethook_fixup_return(struct pt_regs *regs,
+			       unsigned long correct_ret_addr)
+{
+	unsigned long *frame_pointer = &regs->sp + 1;
+
+	/* Replace fake return address with real one. */
+	*frame_pointer = correct_ret_addr;
+}
+NOKPROBE_SYMBOL(arch_rethook_fixup_return);
+
+void arch_rethook_prepare(struct rethook_node *rh, struct pt_regs *regs, bool mcount)
+{
+	unsigned long *stack = (unsigned long *)regs->sp;
+
+	rh->ret_addr = stack[0];
+	rh->frame = regs->sp;
+
+	/* Replace the return addr with trampoline addr */
+	stack[0] = (unsigned long) arch_rethook_trampoline;
+}
+NOKPROBE_SYMBOL(arch_rethook_prepare);
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 2de3c8c5eba9f..794fdef2501ab 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -550,15 +550,15 @@ bool unwind_next_frame(struct unwind_state *state)
 		}
 		/*
 		 * There is a small chance to interrupt at the entry of
-		 * __kretprobe_trampoline() where the ORC info doesn't exist.
-		 * That point is right after the RET to __kretprobe_trampoline()
+		 * arch_rethook_trampoline() where the ORC info doesn't exist.
+		 * That point is right after the RET to arch_rethook_trampoline()
 		 * which was modified return address.
-		 * At that point, the @addr_p of the unwind_recover_kretprobe()
+		 * At that point, the @addr_p of the unwind_recover_rethook()
 		 * (this has to point the address of the stack entry storing
 		 * the modified return address) must be "SP - (a stack entry)"
 		 * because SP is incremented by the RET.
 		 */
-		state->ip = unwind_recover_kretprobe(state, state->ip,
+		state->ip = unwind_recover_rethook(state, state->ip,
 				(unsigned long *)(state->sp - sizeof(long)));
 		state->regs = (struct pt_regs *)sp;
 		state->prev_regs = NULL;
@@ -573,7 +573,7 @@ bool unwind_next_frame(struct unwind_state *state)
 			goto err;
 		}
 		/* See UNWIND_HINT_TYPE_REGS case comment. */
-		state->ip = unwind_recover_kretprobe(state, state->ip,
+		state->ip = unwind_recover_rethook(state, state->ip,
 				(unsigned long *)(state->sp - sizeof(long)));
 
 		if (state->full_regs)

From 0ef6f5c09371f17e142814e6996d6dfb8741925b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Sat, 26 Mar 2022 11:27:28 +0900
Subject: [PATCH 10/57] x86,rethook: Fix arch_rethook_trampoline() to generate
 a complete pt_regs

Currently arch_rethook_trampoline() generates an almost complete
pt_regs on-stack, everything except regs->ss that is, that currently
points to the fake return address, which is not a valid segment
descriptor.

Since interpretation of regs->[sb]p should be done in the context of
regs->ss, and we have code actually doing that (see
arch/x86/lib/insn-eval.c for instance), complete the job by also
pushing ss.

This ensures that anybody who does do look at regs->ss doesn't
mysteriously malfunction, avoiding much future pain.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Link: https://lore.kernel.org/bpf/164826164851.2455864.17272661073069737350.stgit@devnote2
---
 arch/x86/kernel/rethook.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/rethook.c b/arch/x86/kernel/rethook.c
index af7e6713a07a7..8a1c0111ae792 100644
--- a/arch/x86/kernel/rethook.c
+++ b/arch/x86/kernel/rethook.c
@@ -29,29 +29,31 @@ asm(
 	/* Push a fake return address to tell the unwinder it's a rethook. */
 	"	pushq $arch_rethook_trampoline\n"
 	UNWIND_HINT_FUNC
-	/* Save the 'sp - 8', this will be fixed later. */
+	"       pushq $" __stringify(__KERNEL_DS) "\n"
+	/* Save the 'sp - 16', this will be fixed later. */
 	"	pushq %rsp\n"
 	"	pushfq\n"
 	SAVE_REGS_STRING
 	"	movq %rsp, %rdi\n"
 	"	call arch_rethook_trampoline_callback\n"
 	RESTORE_REGS_STRING
-	/* In the callback function, 'regs->flags' is copied to 'regs->sp'. */
-	"	addq $8, %rsp\n"
+	/* In the callback function, 'regs->flags' is copied to 'regs->ss'. */
+	"	addq $16, %rsp\n"
 	"	popfq\n"
 #else
 	/* Push a fake return address to tell the unwinder it's a rethook. */
 	"	pushl $arch_rethook_trampoline\n"
 	UNWIND_HINT_FUNC
-	/* Save the 'sp - 4', this will be fixed later. */
+	"	pushl %ss\n"
+	/* Save the 'sp - 8', this will be fixed later. */
 	"	pushl %esp\n"
 	"	pushfl\n"
 	SAVE_REGS_STRING
 	"	movl %esp, %eax\n"
 	"	call arch_rethook_trampoline_callback\n"
 	RESTORE_REGS_STRING
-	/* In the callback function, 'regs->flags' is copied to 'regs->sp'. */
-	"	addl $4, %esp\n"
+	/* In the callback function, 'regs->flags' is copied to 'regs->ss'. */
+	"	addl $8, %esp\n"
 	"	popfl\n"
 #endif
 	ASM_RET
@@ -73,8 +75,8 @@ __used __visible void arch_rethook_trampoline_callback(struct pt_regs *regs)
 #endif
 	regs->ip = (unsigned long)&arch_rethook_trampoline;
 	regs->orig_ax = ~0UL;
-	regs->sp += sizeof(long);
-	frame_pointer = &regs->sp + 1;
+	regs->sp += 2*sizeof(long);
+	frame_pointer = (long *)(regs + 1);
 
 	/*
 	 * The return address at 'frame_pointer' is recovered by the
@@ -84,10 +86,10 @@ __used __visible void arch_rethook_trampoline_callback(struct pt_regs *regs)
 	rethook_trampoline_handler(regs, (unsigned long)frame_pointer);
 
 	/*
-	 * Copy FLAGS to 'pt_regs::sp' so that arch_rethook_trapmoline()
+	 * Copy FLAGS to 'pt_regs::ss' so that arch_rethook_trapmoline()
 	 * can do RET right after POPF.
 	 */
-	regs->sp = regs->flags;
+	*(unsigned long *)&regs->ss = regs->flags;
 }
 NOKPROBE_SYMBOL(arch_rethook_trampoline_callback);
 
@@ -105,7 +107,7 @@ STACK_FRAME_NON_STANDARD_FP(arch_rethook_trampoline);
 void arch_rethook_fixup_return(struct pt_regs *regs,
 			       unsigned long correct_ret_addr)
 {
-	unsigned long *frame_pointer = &regs->sp + 1;
+	unsigned long *frame_pointer = (void *)(regs + 1);
 
 	/* Replace fake return address with real one. */
 	*frame_pointer = correct_ret_addr;

From 45c23bf4d1a416d32e509f83719a7399e35bdaf9 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <mhiramat@kernel.org>
Date: Sat, 26 Mar 2022 11:27:40 +0900
Subject: [PATCH 11/57] x86,kprobes: Fix optprobe trampoline to generate
 complete pt_regs

Currently the optprobe trampoline template code ganerate an
almost complete pt_regs on-stack, everything except regs->ss.
The 'regs->ss' points to the top of stack, which is not a
valid segment decriptor.

As same as the rethook does, complete the job by also pushing ss.

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/164826166027.2455864.14759128090648961900.stgit@devnote2
---
 arch/x86/kernel/kprobes/opt.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index b4a54a52aa595..e6b8c5362b945 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -106,7 +106,8 @@ asm (
 			".global optprobe_template_entry\n"
 			"optprobe_template_entry:\n"
 #ifdef CONFIG_X86_64
-			/* We don't bother saving the ss register */
+			"       pushq $" __stringify(__KERNEL_DS) "\n"
+			/* Save the 'sp - 8', this will be fixed later. */
 			"	pushq %rsp\n"
 			"	pushfq\n"
 			".global optprobe_template_clac\n"
@@ -121,14 +122,17 @@ asm (
 			".global optprobe_template_call\n"
 			"optprobe_template_call:\n"
 			ASM_NOP5
-			/* Move flags to rsp */
+			/* Copy 'regs->flags' into 'regs->ss'. */
 			"	movq 18*8(%rsp), %rdx\n"
-			"	movq %rdx, 19*8(%rsp)\n"
+			"	movq %rdx, 20*8(%rsp)\n"
 			RESTORE_REGS_STRING
-			/* Skip flags entry */
-			"	addq $8, %rsp\n"
+			/* Skip 'regs->flags' and 'regs->sp'. */
+			"	addq $16, %rsp\n"
+			/* And pop flags register from 'regs->ss'. */
 			"	popfq\n"
 #else /* CONFIG_X86_32 */
+			"	pushl %ss\n"
+			/* Save the 'sp - 4', this will be fixed later. */
 			"	pushl %esp\n"
 			"	pushfl\n"
 			".global optprobe_template_clac\n"
@@ -142,12 +146,13 @@ asm (
 			".global optprobe_template_call\n"
 			"optprobe_template_call:\n"
 			ASM_NOP5
-			/* Move flags into esp */
+			/* Copy 'regs->flags' into 'regs->ss'. */
 			"	movl 14*4(%esp), %edx\n"
-			"	movl %edx, 15*4(%esp)\n"
+			"	movl %edx, 16*4(%esp)\n"
 			RESTORE_REGS_STRING
-			/* Skip flags entry */
-			"	addl $4, %esp\n"
+			/* Skip 'regs->flags' and 'regs->sp'. */
+			"	addl $8, %esp\n"
+			/* And pop flags register from 'regs->ss'. */
 			"	popfl\n"
 #endif
 			".global optprobe_template_end\n"
@@ -179,6 +184,8 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
 		kprobes_inc_nmissed_count(&op->kp);
 	} else {
 		struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+		/* Adjust stack pointer */
+		regs->sp += sizeof(long);
 		/* Save skipped registers */
 		regs->cs = __KERNEL_CS;
 #ifdef CONFIG_X86_32

From a95a4d9b39b0324402569ed7395aae59b8fd2b11 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Mon, 28 Mar 2022 16:21:20 +0200
Subject: [PATCH 12/57] xsk: Do not write NULL in SW ring at allocation failure

For the case when xp_alloc_batch() is used but the batched allocation
cannot be used, there is a slow path that uses the non-batched
xp_alloc(). When it fails to allocate an entry, it returns NULL. The
current code wrote this NULL into the entry of the provided results
array (pointer to the driver SW ring usually) and returned. This might
not be what the driver expects and to make things simpler, just write
successfully allocated xdp_buffs into the SW ring,. The driver might
have information in there that is still important after an allocation
failure.

Note that at this point in time, there are no drivers using
xp_alloc_batch() that could trigger this slow path. But one might get
added.

Fixes: 47e4075df300 ("xsk: Batched buffer allocation for the pool")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220328142123.170157-2-maciej.fijalkowski@intel.com
---
 net/xdp/xsk_buff_pool.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index b34fca6ada869..af040ffa14ff3 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -591,9 +591,13 @@ u32 xp_alloc_batch(struct xsk_buff_pool *pool, struct xdp_buff **xdp, u32 max)
 	u32 nb_entries1 = 0, nb_entries2;
 
 	if (unlikely(pool->dma_need_sync)) {
+		struct xdp_buff *buff;
+
 		/* Slow path */
-		*xdp = xp_alloc(pool);
-		return !!*xdp;
+		buff = xp_alloc(pool);
+		if (buff)
+			*xdp = buff;
+		return !!buff;
 	}
 
 	if (unlikely(pool->free_list_cnt)) {

From 30d19d57d513821c58de4556e7445982ed22b923 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Mon, 28 Mar 2022 16:21:21 +0200
Subject: [PATCH 13/57] ice: xsk: Eliminate unnecessary loop iteration

The NIC Tx ring completion routine cleans entries from the ring in
batches. However, it processes one more batch than it is supposed
to. Note that this does not matter from a functionality point of view
since it will not find a set DD bit for the next batch and just exit
the loop. But from a performance perspective, it is faster to
terminate the loop before and not issue an expensive read over PCIe to
get the DD bit.

Fixes: 126cdfe1007a ("ice: xsk: Improve AF_XDP ZC Tx and use batching API")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220328142123.170157-3-maciej.fijalkowski@intel.com
---
 drivers/net/ethernet/intel/ice/ice_xsk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 88853a6ed9317..51427cb4971a5 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -754,7 +754,7 @@ static u16 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring, int napi_budget)
 		next_dd = next_dd + tx_thresh;
 		if (next_dd >= desc_cnt)
 			next_dd = tx_thresh - 1;
-	} while (budget--);
+	} while (--budget);
 
 	xdp_ring->next_dd = next_dd;
 

From 0ec1713009c5cc24244c918def1cd14080be27e3 Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Mon, 28 Mar 2022 16:21:22 +0200
Subject: [PATCH 14/57] ice: xsk: Stop Rx processing when ntc catches ntu

This can happen with big budget values and some breakage of re-filling
descriptors as we do not clear the entry that ntu is pointing at the end
of ice_alloc_rx_bufs_zc. So if ntc is at ntu then it might be the case
that status_error0 has an old, uncleared value and ntc would go over
with processing which would result in false results.

Break Rx loop when ntc == ntu to avoid broken behavior.

Fixes: 3876ff525de7 ("ice: xsk: Handle SW XDP ring wrap and bump tail more often")
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220328142123.170157-4-maciej.fijalkowski@intel.com
---
 drivers/net/ethernet/intel/ice/ice_xsk.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 51427cb4971a5..dfbcaf08520ee 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -608,6 +608,9 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
 		 */
 		dma_rmb();
 
+		if (unlikely(rx_ring->next_to_clean == rx_ring->next_to_use))
+			break;
+
 		xdp = *ice_xdp_buf(rx_ring, rx_ring->next_to_clean);
 
 		size = le16_to_cpu(rx_desc->wb.pkt_len) &

From 1ac2524de7b366633fc336db6c94062768d0ab03 Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Mon, 28 Mar 2022 16:21:23 +0200
Subject: [PATCH 15/57] ice: xsk: Fix indexing in ice_tx_xsk_pool()

Ice driver tries to always create XDP rings array to be
num_possible_cpus() sized, regardless of user's queue count setting that
can be changed via ethtool -L for example.

Currently, ice_tx_xsk_pool() calculates the qid by decrementing the
ring->q_index by the count of XDP queues, but ring->q_index is set to 'i
+ vsi->alloc_txq'.

When user did ethtool -L $IFACE combined 1, alloc_txq is 1, but
vsi->num_xdp_txq is still num_possible_cpus(). Then, ice_tx_xsk_pool()
will do OOB access and in the final result ring would not get xsk_pool
pointer assigned. Then, each ice_xsk_wakeup() call will fail with error
and it will not be possible to get into NAPI and do the processing from
driver side.

Fix this by decrementing vsi->alloc_txq instead of vsi->num_xdp_txq from
ring-q_index in ice_tx_xsk_pool() so the calculation is reflected to the
setting of ring->q_index.

Fixes: 22bf877e528f ("ice: introduce XDP_TX fallback path")
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220328142123.170157-5-maciej.fijalkowski@intel.com
---
 drivers/net/ethernet/intel/ice/ice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index b0b27bfcd7a28..d4f1874df7d0b 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -710,7 +710,7 @@ static inline struct xsk_buff_pool *ice_tx_xsk_pool(struct ice_tx_ring *ring)
 	struct ice_vsi *vsi = ring->vsi;
 	u16 qid;
 
-	qid = ring->q_index - vsi->num_xdp_txq;
+	qid = ring->q_index - vsi->alloc_txq;
 
 	if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps))
 		return NULL;

From ccaff3d56acc47c257a99b2807b7c78a9467cf09 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Fri, 25 Mar 2022 13:03:04 -0700
Subject: [PATCH 16/57] selftests/bpf: Fix clang compilation errors

llvm upstream patch ([1]) added to issue warning for code like
  void test() {
    int j = 0;
    for (int i = 0; i < 1000; i++)
            j++;
    return;
  }

This triggered several errors in selftests/bpf build since
compilation flag -Werror is used.
  ...
  test_lpm_map.c:212:15: error: variable 'n_matches' set but not used [-Werror,-Wunused-but-set-variable]
        size_t i, j, n_matches, n_matches_after_delete, n_nodes, n_lookups;
                     ^
  test_lpm_map.c:212:26: error: variable 'n_matches_after_delete' set but not used [-Werror,-Wunused-but-set-variable]
        size_t i, j, n_matches, n_matches_after_delete, n_nodes, n_lookups;
                                ^
  ...
  prog_tests/get_stack_raw_tp.c:32:15: error: variable 'cnt' set but not used [-Werror,-Wunused-but-set-variable]
        static __u64 cnt;
                     ^
  ...

  For test_lpm_map.c, 'n_matches'/'n_matches_after_delete' are changed to be volatile
  in order to silent the warning. I didn't remove these two declarations since
  they are referenced in a commented code which might be used by people in certain
  cases. For get_stack_raw_tp.c, the variable 'cnt' is removed.

  [1] https://reviews.llvm.org/D122271

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220325200304.2915588-1-yhs@fb.com
---
 tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c | 3 ---
 tools/testing/selftests/bpf/test_lpm_map.c                | 3 ++-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
index e834a01de16ac..16048978a1eff 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
@@ -29,11 +29,8 @@ static void get_stack_print_output(void *ctx, int cpu, void *data, __u32 size)
 	 */
 	struct get_stack_trace_t e;
 	int i, num_stack;
-	static __u64 cnt;
 	struct ksym *ks;
 
-	cnt++;
-
 	memset(&e, 0, sizeof(e));
 	memcpy(&e, data, size <= sizeof(e) ? size : sizeof(e));
 
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
index baa3e3ecae824..aa294612e0a7d 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -209,7 +209,8 @@ static void test_lpm_order(void)
 static void test_lpm_map(int keysize)
 {
 	LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
-	size_t i, j, n_matches, n_matches_after_delete, n_nodes, n_lookups;
+	volatile size_t n_matches, n_matches_after_delete;
+	size_t i, j, n_nodes, n_lookups;
 	struct tlpm_node *t, *list = NULL;
 	struct bpf_lpm_trie_key *key;
 	uint8_t *data, *value;

From f19c44452b58a84d95e209b847f5495d91c9983a Mon Sep 17 00:00:00 2001
From: Martin Varghese <martin.varghese@nokia.com>
Date: Mon, 28 Mar 2022 11:11:48 +0530
Subject: [PATCH 17/57] openvswitch: Fixed nd target mask field in the flow
 dump.

IPv6 nd target mask was not getting populated in flow dump.

In the function __ovs_nla_put_key the icmp code mask field was checked
instead of icmp code key field to classify the flow as neighbour discovery.

ufid:bdfbe3e5-60c2-43b0-a5ff-dfcac1c37328, recirc_id(0),dp_hash(0/0),
skb_priority(0/0),in_port(ovs-nm1),skb_mark(0/0),ct_state(0/0),
ct_zone(0/0),ct_mark(0/0),ct_label(0/0),
eth(src=00:00:00:00:00:00/00:00:00:00:00:00,
dst=00:00:00:00:00:00/00:00:00:00:00:00),
eth_type(0x86dd),
ipv6(src=::/::,dst=::/::,label=0/0,proto=58,tclass=0/0,hlimit=0/0,frag=no),
icmpv6(type=135,code=0),
nd(target=2001::2/::,
sll=00:00:00:00:00:00/00:00:00:00:00:00,
tll=00:00:00:00:00:00/00:00:00:00:00:00),
packets:10, bytes:860, used:0.504s, dp:ovs, actions:ovs-nm2

Fixes: e64457191a25 (openvswitch: Restructure datapath.c and flow.c)
Signed-off-by: Martin Varghese <martin.varghese@nokia.com>
Link: https://lore.kernel.org/r/20220328054148.3057-1-martinvarghesenokia@gmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/openvswitch/flow_netlink.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 5176f6ccac8ef..cc282a58b75b9 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2230,8 +2230,8 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
 			icmpv6_key->icmpv6_type = ntohs(output->tp.src);
 			icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
 
-			if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
-			    icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
+			if (swkey->tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
+			    swkey->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
 				struct ovs_key_nd *nd_key;
 
 				nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));

From 5352a761308397a0e6250fdc629bb3f615b94747 Mon Sep 17 00:00:00 2001
From: Duoming Zhou <duoming@zju.edu.cn>
Date: Mon, 28 Mar 2022 21:00:14 +0800
Subject: [PATCH 18/57] ax25: fix UAF bug in ax25_send_control()

There are UAF bugs in ax25_send_control(), when we call ax25_release()
to deallocate ax25_dev. The possible race condition is shown below:

      (Thread 1)              |     (Thread 2)
ax25_dev_device_up() //(1)    |
                              | ax25_kill_by_device()
ax25_bind()          //(2)    |
ax25_connect()                | ...
 ax25->state = AX25_STATE_1   |
 ...                          | ax25_dev_device_down() //(3)

      (Thread 3)
ax25_release()                |
 ax25_dev_put()  //(4) FREE   |
 case AX25_STATE_1:           |
  ax25_send_control()         |
   alloc_skb()       //USE    |

The refcount of ax25_dev increases in position (1) and (2), and
decreases in position (3) and (4). The ax25_dev will be freed
before dereference sites in ax25_send_control().

The following is part of the report:

[  102.297448] BUG: KASAN: use-after-free in ax25_send_control+0x33/0x210
[  102.297448] Read of size 8 at addr ffff888009e6e408 by task ax25_close/602
[  102.297448] Call Trace:
[  102.303751]  ax25_send_control+0x33/0x210
[  102.303751]  ax25_release+0x356/0x450
[  102.305431]  __sock_release+0x6d/0x120
[  102.305431]  sock_close+0xf/0x20
[  102.305431]  __fput+0x11f/0x420
[  102.305431]  task_work_run+0x86/0xd0
[  102.307130]  get_signal+0x1075/0x1220
[  102.308253]  arch_do_signal_or_restart+0x1df/0xc00
[  102.308253]  exit_to_user_mode_prepare+0x150/0x1e0
[  102.308253]  syscall_exit_to_user_mode+0x19/0x50
[  102.308253]  do_syscall_64+0x48/0x90
[  102.308253]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[  102.308253] RIP: 0033:0x405ae7

This patch defers the free operation of ax25_dev and net_device after
all corresponding dereference sites in ax25_release() to avoid UAF.

Fixes: 9fd75b66b8f6 ("ax25: Fix refcount leaks caused by ax25_cb_del()")
Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/ax25/af_ax25.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 992b6e5d85d71..f5686c463bc0d 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -991,10 +991,6 @@ static int ax25_release(struct socket *sock)
 	sock_orphan(sk);
 	ax25 = sk_to_ax25(sk);
 	ax25_dev = ax25->ax25_dev;
-	if (ax25_dev) {
-		dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker);
-		ax25_dev_put(ax25_dev);
-	}
 
 	if (sk->sk_type == SOCK_SEQPACKET) {
 		switch (ax25->state) {
@@ -1056,6 +1052,10 @@ static int ax25_release(struct socket *sock)
 		sk->sk_state_change(sk);
 		ax25_destroy_socket(ax25);
 	}
+	if (ax25_dev) {
+		dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker);
+		ax25_dev_put(ax25_dev);
+	}
 
 	sock->sk   = NULL;
 	release_sock(sk);

From 82e31755e55fbcea6a9dfaae5fe4860ade17cbc0 Mon Sep 17 00:00:00 2001
From: Duoming Zhou <duoming@zju.edu.cn>
Date: Mon, 28 Mar 2022 21:00:15 +0800
Subject: [PATCH 19/57] ax25: Fix UAF bugs in ax25 timers

There are race conditions that may lead to UAF bugs in
ax25_heartbeat_expiry(), ax25_t1timer_expiry(), ax25_t2timer_expiry(),
ax25_t3timer_expiry() and ax25_idletimer_expiry(), when we call
ax25_release() to deallocate ax25_dev.

One of the UAF bugs caused by ax25_release() is shown below:

      (Thread 1)                    |      (Thread 2)
ax25_dev_device_up() //(1)          |
...                                 | ax25_kill_by_device()
ax25_bind()          //(2)          |
ax25_connect()                      | ...
 ax25_std_establish_data_link()     |
  ax25_start_t1timer()              | ax25_dev_device_down() //(3)
   mod_timer(&ax25->t1timer,..)     |
                                    | ax25_release()
   (wait a time)                    |  ...
                                    |  ax25_dev_put(ax25_dev) //(4)FREE
   ax25_t1timer_expiry()            |
    ax25->ax25_dev->values[..] //USE|  ...
     ...                            |

We increase the refcount of ax25_dev in position (1) and (2), and
decrease the refcount of ax25_dev in position (3) and (4).
The ax25_dev will be freed in position (4) and be used in
ax25_t1timer_expiry().

The fail log is shown below:
==============================================================

[  106.116942] BUG: KASAN: use-after-free in ax25_t1timer_expiry+0x1c/0x60
[  106.116942] Read of size 8 at addr ffff88800bda9028 by task swapper/0/0
[  106.116942] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.17.0-06123-g0905eec574
[  106.116942] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-14
[  106.116942] Call Trace:
...
[  106.116942]  ax25_t1timer_expiry+0x1c/0x60
[  106.116942]  call_timer_fn+0x122/0x3d0
[  106.116942]  __run_timers.part.0+0x3f6/0x520
[  106.116942]  run_timer_softirq+0x4f/0xb0
[  106.116942]  __do_softirq+0x1c2/0x651
...

This patch adds del_timer_sync() in ax25_release(), which could ensure
that all timers stop before we deallocate ax25_dev.

Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/ax25/af_ax25.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index f5686c463bc0d..363d47f945324 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1053,6 +1053,11 @@ static int ax25_release(struct socket *sock)
 		ax25_destroy_socket(ax25);
 	}
 	if (ax25_dev) {
+		del_timer_sync(&ax25->timer);
+		del_timer_sync(&ax25->t1timer);
+		del_timer_sync(&ax25->t2timer);
+		del_timer_sync(&ax25->t3timer);
+		del_timer_sync(&ax25->idletimer);
 		dev_put_track(ax25_dev->dev, &ax25_dev->dev_tracker);
 		ax25_dev_put(ax25_dev);
 	}

From ad7da1ce5749c0eb4f09dd7e5510123be56f10fb Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Tue, 29 Mar 2022 00:03:50 +0200
Subject: [PATCH 20/57] net: lan966x: fix kernel oops on ioctl when I/F is down

ioctls handled by phy_mii_ioctl() will cause a kernel oops when the
interface is down. Fix it by making sure there is a PHY attached.

Fixes: 735fec995b21 ("net: lan966x: Implement SIOCSHWTSTAMP and SIOCGHWTSTAMP")
Signed-off-by: Michael Walle <michael@walle.cc>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://lore.kernel.org/r/20220328220350.3118969-1-michael@walle.cc
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/microchip/lan966x/lan966x_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
index e1bcb28039dc7..1f8c67f0261bf 100644
--- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
+++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c
@@ -408,6 +408,9 @@ static int lan966x_port_ioctl(struct net_device *dev, struct ifreq *ifr,
 		}
 	}
 
+	if (!dev->phydev)
+		return -ENODEV;
+
 	return phy_mii_ioctl(dev->phydev, ifr, cmd);
 }
 

From 6094e391e643fddd65d4b938c1d499a838dd4907 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vkoul@kernel.org>
Date: Sat, 26 Mar 2022 01:37:31 +0530
Subject: [PATCH 21/57] dt-bindings: net: qcom,ethqos: Document SM8150 SoC
 compatible

SM8150 has an ethernet controller and it needs a different
configuration, so add a new compatible for this.

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
[bhsharma: Massage the commit log]
Signed-off-by: Bhupesh Sharma <bhupesh.sharma@linaro.org>
Link: https://lore.kernel.org/r/20220325200731.1585554-1-bhupesh.sharma@linaro.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/devicetree/bindings/net/qcom,ethqos.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/qcom,ethqos.txt b/Documentation/devicetree/bindings/net/qcom,ethqos.txt
index fcf5035810b57..1f5746849a716 100644
--- a/Documentation/devicetree/bindings/net/qcom,ethqos.txt
+++ b/Documentation/devicetree/bindings/net/qcom,ethqos.txt
@@ -7,7 +7,9 @@ This device has following properties:
 
 Required properties:
 
-- compatible: Should be qcom,qcs404-ethqos"
+- compatible: Should be one of:
+		"qcom,qcs404-ethqos"
+		"qcom,sm8150-ethqos"
 
 - reg: Address and length of the register set for the device
 

From 866b7a278cdb51eb158cd8513bc7438fc857804a Mon Sep 17 00:00:00 2001
From: Zheng Yongjun <zhengyongjun3@huawei.com>
Date: Tue, 29 Mar 2022 09:08:00 +0000
Subject: [PATCH 22/57] net: dsa: felix: fix possible NULL pointer dereference

As the possible failure of the allocation, kzalloc() may return NULL
pointer.
Therefore, it should be better to check the 'sgi' in order to prevent
the dereference of NULL pointer.

Fixes: 23ae3a7877718 ("net: dsa: felix: add stream gate settings for psfp").
Signed-off-by: Zheng Yongjun <zhengyongjun3@huawei.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://lore.kernel.org/r/20220329090800.130106-1-zhengyongjun3@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/ocelot/felix_vsc9959.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
index 62d52e0874e9d..8d382b27e6257 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -1928,6 +1928,10 @@ static int vsc9959_psfp_filter_add(struct ocelot *ocelot, int port,
 		case FLOW_ACTION_GATE:
 			size = struct_size(sgi, entries, a->gate.num_entries);
 			sgi = kzalloc(size, GFP_KERNEL);
+			if (!sgi) {
+				ret = -ENOMEM;
+				goto err;
+			}
 			vsc9959_psfp_parse_gate(a, sgi);
 			ret = vsc9959_psfp_sgi_table_add(ocelot, sgi);
 			if (ret) {

From 8f0588e80e33273c2fa219da4622affab0cdd22f Mon Sep 17 00:00:00 2001
From: Jonathan Lemon <jonathan.lemon@gmail.com>
Date: Tue, 29 Mar 2022 09:03:54 -0700
Subject: [PATCH 23/57] ptp: ocp: handle error from nvmem_device_find

nvmem_device_find returns a valid pointer or IS_ERR().
Handle this properly.

Fixes: 0cfcdd1ebcfe ("ptp: ocp: add nvmem interface for accessing eeprom")
Signed-off-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Link: https://lore.kernel.org/r/20220329160354.4035-1-jonathan.lemon@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/ptp/ptp_ocp.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/ptp/ptp_ocp.c b/drivers/ptp/ptp_ocp.c
index c3d0fcf609e37..0feaa4b453175 100644
--- a/drivers/ptp/ptp_ocp.c
+++ b/drivers/ptp/ptp_ocp.c
@@ -1214,10 +1214,9 @@ ptp_ocp_nvmem_device_get(struct ptp_ocp *bp, const void * const tag)
 static inline void
 ptp_ocp_nvmem_device_put(struct nvmem_device **nvmemp)
 {
-	if (*nvmemp != NULL) {
+	if (!IS_ERR_OR_NULL(*nvmemp))
 		nvmem_device_put(*nvmemp);
-		*nvmemp = NULL;
-	}
+	*nvmemp = NULL;
 }
 
 static void
@@ -1241,13 +1240,15 @@ ptp_ocp_read_eeprom(struct ptp_ocp *bp)
 		}
 		if (!nvmem) {
 			nvmem = ptp_ocp_nvmem_device_get(bp, tag);
-			if (!nvmem)
-				goto out;
+			if (IS_ERR(nvmem)) {
+				ret = PTR_ERR(nvmem);
+				goto fail;
+			}
 		}
 		ret = nvmem_device_read(nvmem, map->off, map->len,
 					BP_MAP_ENTRY_ADDR(bp, map));
 		if (ret != map->len)
-			goto read_fail;
+			goto fail;
 	}
 
 	bp->has_eeprom_data = true;
@@ -1256,7 +1257,7 @@ ptp_ocp_read_eeprom(struct ptp_ocp *bp)
 	ptp_ocp_nvmem_device_put(&nvmem);
 	return;
 
-read_fail:
+fail:
 	dev_err(&bp->pdev->dev, "could not read eeprom: %d\n", ret);
 	goto out;
 }

From c9ad266bbef58dcbb6e74a6dbc5c4c2ed166e9b7 Mon Sep 17 00:00:00 2001
From: Martin Habets <habetsm.xilinx@gmail.com>
Date: Tue, 29 Mar 2022 17:07:49 +0100
Subject: [PATCH 24/57] sfc: Avoid NULL pointer dereference on systems without
 numa awareness
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On such systems cpumask_of_node() returns NULL, which bitmap
operations are not happy with.

Fixes: c265b569a45f ("sfc: default config to 1 channel/core in local NUMA node only")
Fixes: 09a99ab16c60 ("sfc: set affinity hints in local NUMA node only")
Signed-off-by: Martin Habets <habetsm.xilinx@gmail.com>
Reviewed-by: Íñigo Huguet <ihuguet@redhat.com>
Link: https://lore.kernel.org/r/164857006953.8140.3265568858101821256.stgit@palantir17.mph.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/sfc/efx_channels.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c
index d6fdcdc530caf..f9064532beb66 100644
--- a/drivers/net/ethernet/sfc/efx_channels.c
+++ b/drivers/net/ethernet/sfc/efx_channels.c
@@ -91,11 +91,9 @@ static unsigned int count_online_cores(struct efx_nic *efx, bool local_node)
 	}
 
 	cpumask_copy(filter_mask, cpu_online_mask);
-	if (local_node) {
-		int numa_node = pcibus_to_node(efx->pci_dev->bus);
-
-		cpumask_and(filter_mask, filter_mask, cpumask_of_node(numa_node));
-	}
+	if (local_node)
+		cpumask_and(filter_mask, filter_mask,
+			    cpumask_of_pcibus(efx->pci_dev->bus));
 
 	count = 0;
 	for_each_cpu(cpu, filter_mask) {
@@ -386,8 +384,7 @@ int efx_probe_interrupts(struct efx_nic *efx)
 #if defined(CONFIG_SMP)
 void efx_set_interrupt_affinity(struct efx_nic *efx)
 {
-	int numa_node = pcibus_to_node(efx->pci_dev->bus);
-	const struct cpumask *numa_mask = cpumask_of_node(numa_node);
+	const struct cpumask *numa_mask = cpumask_of_pcibus(efx->pci_dev->bus);
 	struct efx_channel *channel;
 	unsigned int cpu;
 

From ec59f128a9bd4255798abb1e06ac3b442f46ef68 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 29 Mar 2022 21:31:24 -0400
Subject: [PATCH 25/57] wireguard: queueing: use CFI-safe ptr_ring cleanup
 function

We make too nuanced use of ptr_ring to entirely move to the skb_array
wrappers, but we at least should avoid the naughty function pointer cast
when cleaning up skbs. Otherwise RAP/CFI will honk at us. This patch
uses the __skb_array_destroy_skb wrapper for the cleanup, rather than
directly providing kfree_skb, which is what other drivers in the same
situation do too.

Reported-by: PaX Team <pageexec@freemail.hu>
Fixes: 886fcee939ad ("wireguard: receive: use ring buffer for incoming handshakes")
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/wireguard/queueing.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireguard/queueing.c b/drivers/net/wireguard/queueing.c
index 1de413b19e342..8084e7408c0ae 100644
--- a/drivers/net/wireguard/queueing.c
+++ b/drivers/net/wireguard/queueing.c
@@ -4,6 +4,7 @@
  */
 
 #include "queueing.h"
+#include <linux/skb_array.h>
 
 struct multicore_worker __percpu *
 wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr)
@@ -42,7 +43,7 @@ void wg_packet_queue_free(struct crypt_queue *queue, bool purge)
 {
 	free_percpu(queue->worker);
 	WARN_ON(!purge && !__ptr_ring_empty(&queue->ring));
-	ptr_ring_cleanup(&queue->ring, purge ? (void(*)(void*))kfree_skb : NULL);
+	ptr_ring_cleanup(&queue->ring, purge ? __skb_array_destroy_skb : NULL);
 }
 
 #define NEXT(skb) ((skb)->prev)

From ca93ca23409b827b48a2fc0a692496d3f7b67944 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 29 Mar 2022 21:31:25 -0400
Subject: [PATCH 26/57] wireguard: selftests: simplify RNG seeding

The seed_rng() function was written to work across lots of old kernels,
back when WireGuard used a big compatibility layer. Now that things have
evolved, we can vastly simplify this, by just marking the RNG as seeded.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/wireguard/qemu/init.c | 26 +++++--------------
 1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c
index c9698120ac9d8..0b45055d9de0e 100644
--- a/tools/testing/selftests/wireguard/qemu/init.c
+++ b/tools/testing/selftests/wireguard/qemu/init.c
@@ -56,26 +56,14 @@ static void print_banner(void)
 
 static void seed_rng(void)
 {
-	int fd;
-	struct {
-		int entropy_count;
-		int buffer_size;
-		unsigned char buffer[256];
-	} entropy = {
-		.entropy_count = sizeof(entropy.buffer) * 8,
-		.buffer_size = sizeof(entropy.buffer),
-		.buffer = "Adding real entropy is not actually important for these tests. Don't try this at home, kids!"
-	};
+	int bits = 256, fd;
 
-	if (mknod("/dev/urandom", S_IFCHR | 0644, makedev(1, 9)))
-		panic("mknod(/dev/urandom)");
-	fd = open("/dev/urandom", O_WRONLY);
+	pretty_message("[+] Fake seeding RNG...");
+	fd = open("/dev/random", O_WRONLY);
 	if (fd < 0)
-		panic("open(urandom)");
-	for (int i = 0; i < 256; ++i) {
-		if (ioctl(fd, RNDADDENTROPY, &entropy) < 0)
-			panic("ioctl(urandom)");
-	}
+		panic("open(random)");
+	if (ioctl(fd, RNDADDTOENTCNT, &bits) < 0)
+		panic("ioctl(RNDADDTOENTCNT)");
 	close(fd);
 }
 
@@ -270,10 +258,10 @@ static void check_leaks(void)
 
 int main(int argc, char *argv[])
 {
-	seed_rng();
 	ensure_console();
 	print_banner();
 	mount_filesystems();
+	seed_rng();
 	kmod_selftests();
 	enable_logging();
 	clear_leaks();

From bbbf962d9460194993ee1943a793a0a0af4a7fbf Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Tue, 29 Mar 2022 21:31:26 -0400
Subject: [PATCH 27/57] wireguard: socket: free skb in send6 when ipv6 is
 disabled

I got a memory leak report:

unreferenced object 0xffff8881191fc040 (size 232):
  comm "kworker/u17:0", pid 23193, jiffies 4295238848 (age 3464.870s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<ffffffff814c3ef4>] slab_post_alloc_hook+0x84/0x3b0
    [<ffffffff814c8977>] kmem_cache_alloc_node+0x167/0x340
    [<ffffffff832974fb>] __alloc_skb+0x1db/0x200
    [<ffffffff82612b5d>] wg_socket_send_buffer_to_peer+0x3d/0xc0
    [<ffffffff8260e94a>] wg_packet_send_handshake_initiation+0xfa/0x110
    [<ffffffff8260ec81>] wg_packet_handshake_send_worker+0x21/0x30
    [<ffffffff8119c558>] process_one_work+0x2e8/0x770
    [<ffffffff8119ca2a>] worker_thread+0x4a/0x4b0
    [<ffffffff811a88e0>] kthread+0x120/0x160
    [<ffffffff8100242f>] ret_from_fork+0x1f/0x30

In function wg_socket_send_buffer_as_reply_to_skb() or wg_socket_send_
buffer_to_peer(), the semantics of send6() is required to free skb. But
when CONFIG_IPV6 is disable, kfree_skb() is missing. This patch adds it
to fix this bug.

Signed-off-by: Wang Hai <wanghai38@huawei.com>
Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/wireguard/socket.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c
index 6f07b949cb81d..467eef0e563bf 100644
--- a/drivers/net/wireguard/socket.c
+++ b/drivers/net/wireguard/socket.c
@@ -160,6 +160,7 @@ static int send6(struct wg_device *wg, struct sk_buff *skb,
 	rcu_read_unlock_bh();
 	return ret;
 #else
+	kfree_skb(skb);
 	return -EAFNOSUPPORT;
 #endif
 }

From 77fc73ac89be96ec8f39e8efa53885caa7cb3645 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Tue, 29 Mar 2022 21:31:27 -0400
Subject: [PATCH 28/57] wireguard: socket: ignore v6 endpoints when ipv6 is
 disabled

The previous commit fixed a memory leak on the send path in the event
that IPv6 is disabled at compile time, but how did a packet even arrive
there to begin with? It turns out we have previously allowed IPv6
endpoints even when IPv6 support is disabled at compile time. This is
awkward and inconsistent. Instead, let's just ignore all things IPv6,
the same way we do other malformed endpoints, in the case where IPv6 is
disabled.

Fixes: e7096c131e51 ("net: WireGuard secure network tunnel")
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/wireguard/socket.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireguard/socket.c b/drivers/net/wireguard/socket.c
index 467eef0e563bf..0414d7a6ce741 100644
--- a/drivers/net/wireguard/socket.c
+++ b/drivers/net/wireguard/socket.c
@@ -242,7 +242,7 @@ int wg_socket_endpoint_from_skb(struct endpoint *endpoint,
 		endpoint->addr4.sin_addr.s_addr = ip_hdr(skb)->saddr;
 		endpoint->src4.s_addr = ip_hdr(skb)->daddr;
 		endpoint->src_if4 = skb->skb_iif;
-	} else if (skb->protocol == htons(ETH_P_IPV6)) {
+	} else if (IS_ENABLED(CONFIG_IPV6) && skb->protocol == htons(ETH_P_IPV6)) {
 		endpoint->addr6.sin6_family = AF_INET6;
 		endpoint->addr6.sin6_port = udp_hdr(skb)->source;
 		endpoint->addr6.sin6_addr = ipv6_hdr(skb)->saddr;
@@ -285,7 +285,7 @@ void wg_socket_set_peer_endpoint(struct wg_peer *peer,
 		peer->endpoint.addr4 = endpoint->addr4;
 		peer->endpoint.src4 = endpoint->src4;
 		peer->endpoint.src_if4 = endpoint->src_if4;
-	} else if (endpoint->addr.sa_family == AF_INET6) {
+	} else if (IS_ENABLED(CONFIG_IPV6) && endpoint->addr.sa_family == AF_INET6) {
 		peer->endpoint.addr6 = endpoint->addr6;
 		peer->endpoint.src6 = endpoint->src6;
 	} else {

From f9512d654f62604664251dedd437a22fe484974a Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 29 Mar 2022 18:20:25 -0700
Subject: [PATCH 29/57] net: sparx5: uses, depends on BRIDGE or !BRIDGE

Fix build errors when BRIDGE=m and SPARX5_SWITCH=y:

riscv64-linux-ld: drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.o: in function `.L305':
sparx5_switchdev.c:(.text+0xdb0): undefined reference to `br_vlan_enabled'
riscv64-linux-ld: drivers/net/ethernet/microchip/sparx5/sparx5_switchdev.o: in function `.L283':
sparx5_switchdev.c:(.text+0xee0): undefined reference to `br_vlan_enabled'

Fixes: 3cfa11bac9bb ("net: sparx5: add the basic sparx5 driver")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Horatiu Vultur <horatiu.vultur@microchip.com>
Cc: Lars Povlsen <lars.povlsen@microchip.com>
Cc: Steen Hegelund <Steen.Hegelund@microchip.com>
Cc: UNGLinuxDriver@microchip.com
Cc: Paolo Abeni <pabeni@redhat.com>
Link: https://lore.kernel.org/r/20220330012025.29560-1-rdunlap@infradead.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/microchip/sparx5/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/microchip/sparx5/Kconfig b/drivers/net/ethernet/microchip/sparx5/Kconfig
index 85b24edb65d51..cc5e48e1bb4c3 100644
--- a/drivers/net/ethernet/microchip/sparx5/Kconfig
+++ b/drivers/net/ethernet/microchip/sparx5/Kconfig
@@ -5,6 +5,7 @@ config SPARX5_SWITCH
 	depends on OF
 	depends on ARCH_SPARX5 || COMPILE_TEST
 	depends on PTP_1588_CLOCK_OPTIONAL
+	depends on BRIDGE || BRIDGE=n
 	select PHYLINK
 	select PHY_SPARX5_SERDES
 	select RESET_CONTROLLER

From e382fea8ae54f5bb62869c6b69b33993d43adeca Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Mon, 28 Mar 2022 13:36:11 +0200
Subject: [PATCH 30/57] can: isotp: restore accidentally removed MSG_PEEK
 feature

In commit 42bf50a1795a ("can: isotp: support MSG_TRUNC flag when
reading from socket") a new check for recvmsg flags has been
introduced that only checked for the flags that are handled in
isotp_recvmsg() itself.

This accidentally removed the MSG_PEEK feature flag which is processed
later in the call chain in __skb_try_recv_from_queue().

Add MSG_PEEK to the set of valid flags to restore the feature.

Fixes: 42bf50a1795a ("can: isotp: support MSG_TRUNC flag when reading from socket")
Link: https://github.com/linux-can/can-utils/issues/347#issuecomment-1079554254
Link: https://lore.kernel.org/all/20220328113611.3691-1-socketcan@hartkopp.net
Reported-by: Derek Will <derekrobertwill@gmail.com>
Suggested-by: Derek Will <derekrobertwill@gmail.com>
Tested-by: Derek Will <derekrobertwill@gmail.com>
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/isotp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/can/isotp.c b/net/can/isotp.c
index f6f8ba1f816d4..bafb0fb5f0e0e 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -1050,7 +1050,7 @@ static int isotp_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 	int noblock = flags & MSG_DONTWAIT;
 	int ret = 0;
 
-	if (flags & ~(MSG_DONTWAIT | MSG_TRUNC))
+	if (flags & ~(MSG_DONTWAIT | MSG_TRUNC | MSG_PEEK))
 		return -EINVAL;
 
 	if (!so->bound)

From fa7b514d2b2894e052b8e94c7a29feb98e90093f Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Sat, 19 Mar 2022 08:31:28 -0700
Subject: [PATCH 31/57] can: mcp251xfd: mcp251xfd_register_get_dev_id(): fix
 return of error value

Clang static analysis reports this issue:

| mcp251xfd-core.c:1813:7: warning: The left operand
|   of '&' is a garbage value
|   FIELD_GET(MCP251XFD_REG_DEVID_ID_MASK, dev_id),
|   ^                                      ~~~~~~

dev_id is set in a successful call to mcp251xfd_register_get_dev_id().
Though the status of calls made by mcp251xfd_register_get_dev_id() are
checked and handled, their status' are not returned. So return err.

Fixes: 55e5b97f003e ("can: mcp25xxfd: add driver for Microchip MCP25xxFD SPI CAN")
Link: https://lore.kernel.org/all/20220319153128.2164120-1-trix@redhat.com
Signed-off-by: Tom Rix <trix@redhat.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index 325024be7b045..f9dd8fdba12bc 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -1786,7 +1786,7 @@ mcp251xfd_register_get_dev_id(const struct mcp251xfd_priv *priv, u32 *dev_id,
  out_kfree_buf_rx:
 	kfree(buf_rx);
 
-	return 0;
+	return err;
 }
 
 #define MCP251XFD_QUIRK_ACTIVE(quirk) \

From 2e8e79c416aae1de224c0f1860f2e3350fa171f8 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Thu, 17 Mar 2022 08:57:35 +0100
Subject: [PATCH 32/57] can: m_can: m_can_tx_handler(): fix use after free of
 skb

can_put_echo_skb() will clone skb then free the skb. Move the
can_put_echo_skb() for the m_can version 3.0.x directly before the
start of the xmit in hardware, similar to the 3.1.x branch.

Fixes: 80646733f11c ("can: m_can: update to support CAN FD features")
Link: https://lore.kernel.org/all/20220317081305.739554-1-mkl@pengutronix.de
Cc: stable@vger.kernel.org
Reported-by: Hangyu Hua <hbh25y@gmail.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/m_can/m_can.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index 1a4b56f6fa8c6..b3b5bc1c803b3 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -1637,8 +1637,6 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev)
 		if (err)
 			goto out_fail;
 
-		can_put_echo_skb(skb, dev, 0, 0);
-
 		if (cdev->can.ctrlmode & CAN_CTRLMODE_FD) {
 			cccr = m_can_read(cdev, M_CAN_CCCR);
 			cccr &= ~CCCR_CMR_MASK;
@@ -1655,6 +1653,9 @@ static netdev_tx_t m_can_tx_handler(struct m_can_classdev *cdev)
 			m_can_write(cdev, M_CAN_CCCR, cccr);
 		}
 		m_can_write(cdev, M_CAN_TXBTIE, 0x1);
+
+		can_put_echo_skb(skb, dev, 0, 0);
+
 		m_can_write(cdev, M_CAN_TXBAR, 0x1);
 		/* End of xmit function for version 3.0.x */
 	} else {

From c70222752228a62135cee3409dccefd494a24646 Mon Sep 17 00:00:00 2001
From: Hangyu Hua <hbh25y@gmail.com>
Date: Mon, 28 Feb 2022 16:36:39 +0800
Subject: [PATCH 33/57] can: ems_usb: ems_usb_start_xmit(): fix double
 dev_kfree_skb() in error path

There is no need to call dev_kfree_skb() when usb_submit_urb() fails
beacause can_put_echo_skb() deletes the original skb and
can_free_echo_skb() deletes the cloned skb.

Link: https://lore.kernel.org/all/20220228083639.38183-1-hbh25y@gmail.com
Fixes: 702171adeed3 ("ems_usb: Added support for EMS CPC-USB/ARM7 CAN/USB interface")
Cc: stable@vger.kernel.org
Cc: Sebastian Haas <haas@ems-wuensche.com>
Signed-off-by: Hangyu Hua <hbh25y@gmail.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/ems_usb.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index 7bedceffdfa36..bbec3311d8934 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -819,7 +819,6 @@ static netdev_tx_t ems_usb_start_xmit(struct sk_buff *skb, struct net_device *ne
 
 		usb_unanchor_urb(urb);
 		usb_free_coherent(dev->udev, size, buf, urb->transfer_dma);
-		dev_kfree_skb(skb);
 
 		atomic_dec(&dev->active_tx_urbs);
 

From 3d3925ff6433f98992685a9679613a2cc97f3ce2 Mon Sep 17 00:00:00 2001
From: Hangyu Hua <hbh25y@gmail.com>
Date: Fri, 11 Mar 2022 16:06:14 +0800
Subject: [PATCH 34/57] can: usb_8dev: usb_8dev_start_xmit(): fix double
 dev_kfree_skb() in error path

There is no need to call dev_kfree_skb() when usb_submit_urb() fails
because can_put_echo_skb() deletes original skb and
can_free_echo_skb() deletes the cloned skb.

Fixes: 0024d8ad1639 ("can: usb_8dev: Add support for USB2CAN interface from 8 devices")
Link: https://lore.kernel.org/all/20220311080614.45229-1-hbh25y@gmail.com
Cc: stable@vger.kernel.org
Signed-off-by: Hangyu Hua <hbh25y@gmail.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/usb_8dev.c | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c
index 431af1ec1e3ca..b638604bf1eef 100644
--- a/drivers/net/can/usb/usb_8dev.c
+++ b/drivers/net/can/usb/usb_8dev.c
@@ -663,9 +663,20 @@ static netdev_tx_t usb_8dev_start_xmit(struct sk_buff *skb,
 	atomic_inc(&priv->active_tx_urbs);
 
 	err = usb_submit_urb(urb, GFP_ATOMIC);
-	if (unlikely(err))
-		goto failed;
-	else if (atomic_read(&priv->active_tx_urbs) >= MAX_TX_URBS)
+	if (unlikely(err)) {
+		can_free_echo_skb(netdev, context->echo_index, NULL);
+
+		usb_unanchor_urb(urb);
+		usb_free_coherent(priv->udev, size, buf, urb->transfer_dma);
+
+		atomic_dec(&priv->active_tx_urbs);
+
+		if (err == -ENODEV)
+			netif_device_detach(netdev);
+		else
+			netdev_warn(netdev, "failed tx_urb %d\n", err);
+		stats->tx_dropped++;
+	} else if (atomic_read(&priv->active_tx_urbs) >= MAX_TX_URBS)
 		/* Slow down tx path */
 		netif_stop_queue(netdev);
 
@@ -684,19 +695,6 @@ static netdev_tx_t usb_8dev_start_xmit(struct sk_buff *skb,
 
 	return NETDEV_TX_BUSY;
 
-failed:
-	can_free_echo_skb(netdev, context->echo_index, NULL);
-
-	usb_unanchor_urb(urb);
-	usb_free_coherent(priv->udev, size, buf, urb->transfer_dma);
-
-	atomic_dec(&priv->active_tx_urbs);
-
-	if (err == -ENODEV)
-		netif_device_detach(netdev);
-	else
-		netdev_warn(netdev, "failed tx_urb %d\n", err);
-
 nomembuf:
 	usb_free_urb(urb);
 

From 04c9b00ba83594a29813d6b1fb8fdc93a3915174 Mon Sep 17 00:00:00 2001
From: Hangyu Hua <hbh25y@gmail.com>
Date: Fri, 11 Mar 2022 16:02:08 +0800
Subject: [PATCH 35/57] can: mcba_usb: mcba_usb_start_xmit(): fix double
 dev_kfree_skb in error path

There is no need to call dev_kfree_skb() when usb_submit_urb() fails
because can_put_echo_skb() deletes original skb and
can_free_echo_skb() deletes the cloned skb.

Fixes: 51f3baad7de9 ("can: mcba_usb: Add support for Microchip CAN BUS Analyzer")
Link: https://lore.kernel.org/all/20220311080208.45047-1-hbh25y@gmail.com
Signed-off-by: Hangyu Hua <hbh25y@gmail.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/mcba_usb.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c
index 77bddff86252b..7c198eb5bc9c3 100644
--- a/drivers/net/can/usb/mcba_usb.c
+++ b/drivers/net/can/usb/mcba_usb.c
@@ -364,7 +364,6 @@ static netdev_tx_t mcba_usb_start_xmit(struct sk_buff *skb,
 xmit_failed:
 	can_free_echo_skb(priv->netdev, ctx->ndx, NULL);
 	mcba_usb_free_ctx(ctx);
-	dev_kfree_skb(skb);
 	stats->tx_dropped++;
 
 	return NETDEV_TX_OK;

From 136bed0bfd3bc9c95c88aafff2d22ecb3a919f23 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Sun, 13 Mar 2022 13:09:03 +0300
Subject: [PATCH 36/57] can: mcba_usb: properly check endpoint type

Syzbot reported warning in usb_submit_urb() which is caused by wrong
endpoint type. We should check that in endpoint is actually present to
prevent this warning.

Found pipes are now saved to struct mcba_priv and code uses them
directly instead of making pipes in place.

Fail log:

| usb 5-1: BOGUS urb xfer, pipe 3 != type 1
| WARNING: CPU: 1 PID: 49 at drivers/usb/core/urb.c:502 usb_submit_urb+0xed2/0x18a0 drivers/usb/core/urb.c:502
| Modules linked in:
| CPU: 1 PID: 49 Comm: kworker/1:2 Not tainted 5.17.0-rc6-syzkaller-00184-g38f80f42147f #0
| Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.14.0-2 04/01/2014
| Workqueue: usb_hub_wq hub_event
| RIP: 0010:usb_submit_urb+0xed2/0x18a0 drivers/usb/core/urb.c:502
| ...
| Call Trace:
|  <TASK>
|  mcba_usb_start drivers/net/can/usb/mcba_usb.c:662 [inline]
|  mcba_usb_probe+0x8a3/0xc50 drivers/net/can/usb/mcba_usb.c:858
|  usb_probe_interface+0x315/0x7f0 drivers/usb/core/driver.c:396
|  call_driver_probe drivers/base/dd.c:517 [inline]

Fixes: 51f3baad7de9 ("can: mcba_usb: Add support for Microchip CAN BUS Analyzer")
Link: https://lore.kernel.org/all/20220313100903.10868-1-paskripkin@gmail.com
Reported-and-tested-by: syzbot+3bc1dce0cc0052d60fde@syzkaller.appspotmail.com
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Reviewed-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/mcba_usb.c | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c
index 7c198eb5bc9c3..c45a814e1de2f 100644
--- a/drivers/net/can/usb/mcba_usb.c
+++ b/drivers/net/can/usb/mcba_usb.c
@@ -33,10 +33,6 @@
 #define MCBA_USB_RX_BUFF_SIZE 64
 #define MCBA_USB_TX_BUFF_SIZE (sizeof(struct mcba_usb_msg))
 
-/* MCBA endpoint numbers */
-#define MCBA_USB_EP_IN 1
-#define MCBA_USB_EP_OUT 1
-
 /* Microchip command id */
 #define MBCA_CMD_RECEIVE_MESSAGE 0xE3
 #define MBCA_CMD_I_AM_ALIVE_FROM_CAN 0xF5
@@ -83,6 +79,8 @@ struct mcba_priv {
 	atomic_t free_ctx_cnt;
 	void *rxbuf[MCBA_MAX_RX_URBS];
 	dma_addr_t rxbuf_dma[MCBA_MAX_RX_URBS];
+	int rx_pipe;
+	int tx_pipe;
 };
 
 /* CAN frame */
@@ -268,10 +266,8 @@ static netdev_tx_t mcba_usb_xmit(struct mcba_priv *priv,
 
 	memcpy(buf, usb_msg, MCBA_USB_TX_BUFF_SIZE);
 
-	usb_fill_bulk_urb(urb, priv->udev,
-			  usb_sndbulkpipe(priv->udev, MCBA_USB_EP_OUT), buf,
-			  MCBA_USB_TX_BUFF_SIZE, mcba_usb_write_bulk_callback,
-			  ctx);
+	usb_fill_bulk_urb(urb, priv->udev, priv->tx_pipe, buf, MCBA_USB_TX_BUFF_SIZE,
+			  mcba_usb_write_bulk_callback, ctx);
 
 	urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
 	usb_anchor_urb(urb, &priv->tx_submitted);
@@ -607,7 +603,7 @@ static void mcba_usb_read_bulk_callback(struct urb *urb)
 resubmit_urb:
 
 	usb_fill_bulk_urb(urb, priv->udev,
-			  usb_rcvbulkpipe(priv->udev, MCBA_USB_EP_OUT),
+			  priv->rx_pipe,
 			  urb->transfer_buffer, MCBA_USB_RX_BUFF_SIZE,
 			  mcba_usb_read_bulk_callback, priv);
 
@@ -652,7 +648,7 @@ static int mcba_usb_start(struct mcba_priv *priv)
 		urb->transfer_dma = buf_dma;
 
 		usb_fill_bulk_urb(urb, priv->udev,
-				  usb_rcvbulkpipe(priv->udev, MCBA_USB_EP_IN),
+				  priv->rx_pipe,
 				  buf, MCBA_USB_RX_BUFF_SIZE,
 				  mcba_usb_read_bulk_callback, priv);
 		urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
@@ -806,6 +802,13 @@ static int mcba_usb_probe(struct usb_interface *intf,
 	struct mcba_priv *priv;
 	int err;
 	struct usb_device *usbdev = interface_to_usbdev(intf);
+	struct usb_endpoint_descriptor *in, *out;
+
+	err = usb_find_common_endpoints(intf->cur_altsetting, &in, &out, NULL, NULL);
+	if (err) {
+		dev_err(&intf->dev, "Can't find endpoints\n");
+		return err;
+	}
 
 	netdev = alloc_candev(sizeof(struct mcba_priv), MCBA_MAX_TX_URBS);
 	if (!netdev) {
@@ -851,6 +854,9 @@ static int mcba_usb_probe(struct usb_interface *intf,
 		goto cleanup_free_candev;
 	}
 
+	priv->rx_pipe = usb_rcvbulkpipe(priv->udev, in->bEndpointAddress);
+	priv->tx_pipe = usb_sndbulkpipe(priv->udev, out->bEndpointAddress);
+
 	devm_can_led_init(netdev);
 
 	/* Start USB dev only if we have successfully registered CAN device */

From 50d34a0d151dc7abbdbec781bd7f09f2b3cbf01a Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Tue, 29 Mar 2022 21:29:43 +0200
Subject: [PATCH 37/57] can: gs_usb: gs_make_candev(): fix memory leak for
 devices with extended bit timing configuration

Some CAN-FD capable devices offer extended bit timing information for
the data bit timing. The information must be read with an USB control
message. The memory for this message is allocated but not free()ed (in
the non error case). This patch adds the missing free.

Fixes: 6679f4c5e5a6 ("can: gs_usb: add extended bt_const feature")
Link: https://lore.kernel.org/all/20220329193450.659726-1-mkl@pengutronix.de
Reported-by: syzbot+4d0ae90a195b269f102d@syzkaller.appspotmail.com
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/gs_usb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index 67408e3160621..b29ba9138866b 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -1092,6 +1092,8 @@ static struct gs_can *gs_make_candev(unsigned int channel,
 		dev->data_bt_const.brp_inc = le32_to_cpu(bt_const_extended->dbrp_inc);
 
 		dev->can.data_bittiming_const = &dev->data_bt_const;
+
+		kfree(bt_const_extended);
 	}
 
 	SET_NETDEV_DEV(netdev, &intf->dev);

From 50386f7526dd7fd71a390966ea9736c8420aa31f Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 29 Mar 2022 21:24:52 -0700
Subject: [PATCH 38/57] docs: netdev: replace references to old archives

Most people use (or should use) lore at this point.
Replace the pointers to older archiving systems.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/networking/netdev-FAQ.rst | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst
index e26532f497605..25b8a7de737c2 100644
--- a/Documentation/networking/netdev-FAQ.rst
+++ b/Documentation/networking/netdev-FAQ.rst
@@ -16,10 +16,8 @@ Note that some subsystems (e.g. wireless drivers) which have a high
 volume of traffic have their own specific mailing lists.
 
 The netdev list is managed (like many other Linux mailing lists) through
-VGER (http://vger.kernel.org/) and archives can be found below:
-
--  http://marc.info/?l=linux-netdev
--  http://www.spinics.net/lists/netdev/
+VGER (http://vger.kernel.org/) with archives available at
+https://lore.kernel.org/netdev/
 
 Aside from subsystems like that mentioned above, all network-related
 Linux development (i.e. RFC, review, comments, etc.) takes place on

From 30cddd30532a72f857abf9d7c80d8b620fd3e5a1 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 29 Mar 2022 21:24:53 -0700
Subject: [PATCH 39/57] docs: netdev: minor reword

that -> those

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/networking/netdev-FAQ.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst
index 25b8a7de737c2..f7e5755e013e4 100644
--- a/Documentation/networking/netdev-FAQ.rst
+++ b/Documentation/networking/netdev-FAQ.rst
@@ -19,7 +19,7 @@ The netdev list is managed (like many other Linux mailing lists) through
 VGER (http://vger.kernel.org/) with archives available at
 https://lore.kernel.org/netdev/
 
-Aside from subsystems like that mentioned above, all network-related
+Aside from subsystems like those mentioned above, all network-related
 Linux development (i.e. RFC, review, comments, etc.) takes place on
 netdev.
 

From c82d90b14f6c23f4e7cd89944f9a89f89223dc58 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 29 Mar 2022 21:24:54 -0700
Subject: [PATCH 40/57] docs: netdev: move the patch marking section up

We want people to mark their patches with net and net-next in the subject.
Many miss doing that. Move the FAQ section which points that out up, and
place it after the section which enumerates the trees, that seems like
a pretty logical place for it. Since the two sections are together we
can remove a little bit (not too much) of the repetition.

v2: also remove the text for non-git setups, we want people to use git.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/networking/netdev-FAQ.rst | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst
index f7e5755e013e4..fd5f5a1a08467 100644
--- a/Documentation/networking/netdev-FAQ.rst
+++ b/Documentation/networking/netdev-FAQ.rst
@@ -35,6 +35,17 @@ for the future release.  You can find the trees here:
 - https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
 - https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
 
+How do I indicate which tree (net vs. net-next) my patch should be in?
+----------------------------------------------------------------------
+To help maintainers and CI bots you should explicitly mark which tree
+your patch is targeting. Assuming that you use git, use the prefix
+flag::
+
+  git format-patch --subject-prefix='PATCH net-next' start..finish
+
+Use ``net`` instead of ``net-next`` (always lower case) in the above for
+bug-fix ``net`` content.
+
 How often do changes from these trees make it to the mainline Linus tree?
 -------------------------------------------------------------------------
 To understand this, you need to know a bit of background information on
@@ -90,20 +101,6 @@ and note the top of the "tags" section.  If it is rc1, it is early in
 the dev cycle.  If it was tagged rc7 a week ago, then a release is
 probably imminent.
 
-How do I indicate which tree (net vs. net-next) my patch should be in?
-----------------------------------------------------------------------
-Firstly, think whether you have a bug fix or new "next-like" content.
-Then once decided, assuming that you use git, use the prefix flag, i.e.
-::
-
-  git format-patch --subject-prefix='PATCH net-next' start..finish
-
-Use ``net`` instead of ``net-next`` (always lower case) in the above for
-bug-fix ``net`` content.  If you don't use git, then note the only magic
-in the above is just the subject text of the outgoing e-mail, and you
-can manually change it yourself with whatever MUA you are comfortable
-with.
-
 I sent a patch and I'm wondering what happened to it - how can I tell whether it got merged?
 --------------------------------------------------------------------------------------------
 Start by looking at the main patchworks queue for netdev:

From 2fd4c50dbff19eb3d798236074bfa3226b5df4a6 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 29 Mar 2022 21:24:55 -0700
Subject: [PATCH 41/57] docs: netdev: turn the net-next closed into a Warning

Use the sphinx Warning box to make the net-next being closed
stand out more.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/networking/netdev-FAQ.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst
index fd5f5a1a08467..041993258dda3 100644
--- a/Documentation/networking/netdev-FAQ.rst
+++ b/Documentation/networking/netdev-FAQ.rst
@@ -70,8 +70,9 @@ relating to vX.Y
 An announcement indicating when ``net-next`` has been closed is usually
 sent to netdev, but knowing the above, you can predict that in advance.
 
-IMPORTANT: Do not send new ``net-next`` content to netdev during the
-period during which ``net-next`` tree is closed.
+.. warning::
+  Do not send new ``net-next`` content to netdev during the
+  period during which ``net-next`` tree is closed.
 
 Shortly after the two weeks have passed (and vX.Y-rc1 is released), the
 tree for ``net-next`` reopens to collect content for the next (vX.Y+1)

From 0e242e3fb7a7f6c120195c5a57faf78a321dbf65 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 29 Mar 2022 21:24:56 -0700
Subject: [PATCH 42/57] docs: netdev: note that RFC postings are allowed any
 time

Document that RFCs are allowed during the merge window.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/networking/netdev-FAQ.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst
index 041993258dda3..f4c77efa75d4c 100644
--- a/Documentation/networking/netdev-FAQ.rst
+++ b/Documentation/networking/netdev-FAQ.rst
@@ -74,6 +74,9 @@ sent to netdev, but knowing the above, you can predict that in advance.
   Do not send new ``net-next`` content to netdev during the
   period during which ``net-next`` tree is closed.
 
+RFC patches sent for review only are obviously welcome at any time
+(use ``--subject-prefix='RFC net-next'`` with ``git format-patch``).
+
 Shortly after the two weeks have passed (and vX.Y-rc1 is released), the
 tree for ``net-next`` reopens to collect content for the next (vX.Y+1)
 release.

From 5d84921ac750d2b2e42c7102890b978cb2c2729e Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 29 Mar 2022 21:24:57 -0700
Subject: [PATCH 43/57] docs: netdev: shorten the name and mention msgid for
 patch status

Cut down the length of the question so it renders better in docs.
Mention that Message-ID can be used to search patchwork.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/networking/netdev-FAQ.rst | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst
index f4c77efa75d4c..e10a8140d6420 100644
--- a/Documentation/networking/netdev-FAQ.rst
+++ b/Documentation/networking/netdev-FAQ.rst
@@ -105,14 +105,16 @@ and note the top of the "tags" section.  If it is rc1, it is early in
 the dev cycle.  If it was tagged rc7 a week ago, then a release is
 probably imminent.
 
-I sent a patch and I'm wondering what happened to it - how can I tell whether it got merged?
---------------------------------------------------------------------------------------------
+How can I tell the status of a patch I've sent?
+-----------------------------------------------
 Start by looking at the main patchworks queue for netdev:
 
   https://patchwork.kernel.org/project/netdevbpf/list/
 
 The "State" field will tell you exactly where things are at with your
-patch.
+patch. Patches are indexed by the ``Message-ID`` header of the emails
+which carried them so if you have trouble finding your patch append
+the value of ``Message-ID`` to the URL above.
 
 The above only says "Under Review".  How can I find out more?
 -------------------------------------------------------------

From 8f785c1bb84f7c8ca07c408dc96e8d8358b02bb3 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 29 Mar 2022 21:24:58 -0700
Subject: [PATCH 44/57] docs: netdev: rephrase the 'Under review' question

The semantics of "Under review" have shifted. Reword the question
about it a bit and focus it on the response time.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/networking/netdev-FAQ.rst | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst
index e10a8140d6420..00ac300ebe6a6 100644
--- a/Documentation/networking/netdev-FAQ.rst
+++ b/Documentation/networking/netdev-FAQ.rst
@@ -116,10 +116,12 @@ patch. Patches are indexed by the ``Message-ID`` header of the emails
 which carried them so if you have trouble finding your patch append
 the value of ``Message-ID`` to the URL above.
 
-The above only says "Under Review".  How can I find out more?
--------------------------------------------------------------
+How long before my patch is accepted?
+-------------------------------------
 Generally speaking, the patches get triaged quickly (in less than
-48h).  So be patient.  Asking the maintainer for status updates on your
+48h). But be patient, if your patch is active in patchwork (i.e. it's
+listed on the project's patch list) the chances it was missed are close to zero.
+Asking the maintainer for status updates on your
 patch is a good way to ensure your patch is ignored or pushed to the
 bottom of the priority list.
 

From 724c1a7443c5ccb7cda9bfb5ccab84bcece2c50c Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 29 Mar 2022 21:24:59 -0700
Subject: [PATCH 45/57] docs: netdev: rephrase the 'should I update patchwork'
 question

Make the question shorter and adjust the start of the answer accordingly.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/networking/netdev-FAQ.rst | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst
index 00ac300ebe6a6..9c455d08510df 100644
--- a/Documentation/networking/netdev-FAQ.rst
+++ b/Documentation/networking/netdev-FAQ.rst
@@ -125,9 +125,11 @@ Asking the maintainer for status updates on your
 patch is a good way to ensure your patch is ignored or pushed to the
 bottom of the priority list.
 
-I submitted multiple versions of the patch series. Should I directly update patchwork for the previous versions of these patch series?
---------------------------------------------------------------------------------------------------------------------------------------
-No, please don't interfere with the patch status on patchwork, leave
+Should I directly update patchwork state of my own patches?
+-----------------------------------------------------------
+It may be tempting to help the maintainers and update the state of your
+own patches when you post a new version or spot a bug. Please do not do that.
+Interfering with the patch status on patchwork will only cause confusion. Leave
 it to the maintainer to figure out what is the most recent and current
 version that should be applied. If there is any doubt, the maintainer
 will reply and ask what should be done.

From b8ba106378a058711d8abd0889351ff9c81850c1 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 29 Mar 2022 21:25:00 -0700
Subject: [PATCH 46/57] docs: netdev: add a question about re-posting frequency

We have to tell people to stop reposting to often lately,
or not to repost while the discussion is ongoing.
Document this.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/networking/netdev-FAQ.rst | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst
index 9c455d08510df..f8b89dc81cab2 100644
--- a/Documentation/networking/netdev-FAQ.rst
+++ b/Documentation/networking/netdev-FAQ.rst
@@ -140,6 +140,17 @@ No, please resend the entire patch series and make sure you do number your
 patches such that it is clear this is the latest and greatest set of patches
 that can be applied.
 
+I have received review feedback, when should I post a revised version of the patches?
+-------------------------------------------------------------------------------------
+Allow at least 24 hours to pass between postings. This will ensure reviewers
+from all geographical locations have a chance to chime in. Do not wait
+too long (weeks) between postings either as it will make it harder for reviewers
+to recall all the context.
+
+Make sure you address all the feedback in your new posting. Do not post a new
+version of the code if the discussion about the previous version is still
+ongoing, unless directly instructed by a reviewer.
+
 I submitted multiple versions of a patch series and it looks like a version other than the last one has been accepted, what should I do?
 ----------------------------------------------------------------------------------------------------------------------------------------
 There is no revert possible, once it is pushed out, it stays like that.

From 3eca381457ca58fbde16f827ddfaaecde3d61127 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 29 Mar 2022 21:25:01 -0700
Subject: [PATCH 47/57] docs: netdev: make the testing requirement more
 stringent

These days we often ask for selftests so let's update our
testing requirements.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/networking/netdev-FAQ.rst | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst
index f8b89dc81cab2..1388f78cfbc57 100644
--- a/Documentation/networking/netdev-FAQ.rst
+++ b/Documentation/networking/netdev-FAQ.rst
@@ -196,11 +196,15 @@ as possible alternative mechanisms.
 
 What level of testing is expected before I submit my change?
 ------------------------------------------------------------
-If your changes are against ``net-next``, the expectation is that you
-have tested by layering your changes on top of ``net-next``.  Ideally
-you will have done run-time testing specific to your change, but at a
-minimum, your changes should survive an ``allyesconfig`` and an
-``allmodconfig`` build without new warnings or failures.
+At the very minimum your changes must survive an ``allyesconfig`` and an
+``allmodconfig`` build with ``W=1`` set without new warnings or failures.
+
+Ideally you will have done run-time testing specific to your change,
+and the patch series contains a set of kernel selftest for
+``tools/testing/selftests/net`` or using the KUnit framework.
+
+You are expected to test your changes on top of the relevant networking
+tree (``net`` or ``net-next``) and not e.g. a stable tree or ``linux-next``.
 
 How do I post corresponding changes to user space components?
 -------------------------------------------------------------

From a300597318771f889136db36f9f0dcfd26b84f18 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 29 Mar 2022 21:25:02 -0700
Subject: [PATCH 48/57] docs: netdev: add missing back ticks

I think double back ticks are more correct. Add where they are missing.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/networking/netdev-FAQ.rst | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst
index 1388f78cfbc57..294ad9b0162d9 100644
--- a/Documentation/networking/netdev-FAQ.rst
+++ b/Documentation/networking/netdev-FAQ.rst
@@ -218,7 +218,7 @@ or the user space project is not reviewed on netdev include a link
 to a public repo where user space patches can be seen.
 
 In case user space tooling lives in a separate repository but is
-reviewed on netdev  (e.g. patches to `iproute2` tools) kernel and
+reviewed on netdev  (e.g. patches to ``iproute2`` tools) kernel and
 user space patches should form separate series (threads) when posted
 to the mailing list, e.g.::
 
@@ -251,18 +251,18 @@ traffic if we can help it.
 netdevsim is great, can I extend it for my out-of-tree tests?
 -------------------------------------------------------------
 
-No, `netdevsim` is a test vehicle solely for upstream tests.
-(Please add your tests under tools/testing/selftests/.)
+No, ``netdevsim`` is a test vehicle solely for upstream tests.
+(Please add your tests under ``tools/testing/selftests/``.)
 
-We also give no guarantees that `netdevsim` won't change in the future
+We also give no guarantees that ``netdevsim`` won't change in the future
 in a way which would break what would normally be considered uAPI.
 
 Is netdevsim considered a "user" of an API?
 -------------------------------------------
 
 Linux kernel has a long standing rule that no API should be added unless
-it has a real, in-tree user. Mock-ups and tests based on `netdevsim` are
-strongly encouraged when adding new APIs, but `netdevsim` in itself
+it has a real, in-tree user. Mock-ups and tests based on ``netdevsim`` are
+strongly encouraged when adding new APIs, but ``netdevsim`` in itself
 is **not** considered a use case/user.
 
 Any other tips to help ensure my net/net-next patch gets OK'd?

From 99eba4e5cbd462db47dcb949af6d5474acdac953 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 29 Mar 2022 21:25:03 -0700
Subject: [PATCH 49/57] docs: netdev: call out the merge window in tag checking

Add the most important case to the question about "where are we
in the cycle" - the case of net-next being closed.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/networking/netdev-FAQ.rst | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst
index 294ad9b0162d9..a18e4e671e85a 100644
--- a/Documentation/networking/netdev-FAQ.rst
+++ b/Documentation/networking/netdev-FAQ.rst
@@ -103,7 +103,9 @@ Load the mainline (Linus) page here:
 
 and note the top of the "tags" section.  If it is rc1, it is early in
 the dev cycle.  If it was tagged rc7 a week ago, then a release is
-probably imminent.
+probably imminent. If the most recent tag is a final release tag
+(without an ``-rcN`` suffix) - we are most likely in a merge window
+and ``net-next`` is closed.
 
 How can I tell the status of a patch I've sent?
 -----------------------------------------------

From 08767a26f095909423f963c864f15db50c0ce9a7 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 29 Mar 2022 21:25:04 -0700
Subject: [PATCH 50/57] docs: netdev: broaden the new vs old code formatting
 guidelines

Convert the "should I use new or old comment formatting" to cover
all formatting. This makes the question itself shorter.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/networking/netdev-FAQ.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst
index a18e4e671e85a..c456b5225d664 100644
--- a/Documentation/networking/netdev-FAQ.rst
+++ b/Documentation/networking/netdev-FAQ.rst
@@ -183,10 +183,10 @@ it is requested that you make it look like this::
    * another line of text
    */
 
-I am working in existing code that has the former comment style and not the latter. Should I submit new code in the former style or the latter?
------------------------------------------------------------------------------------------------------------------------------------------------
-Make it the latter style, so that eventually all code in the domain
-of netdev is of this format.
+I am working in existing code which uses non-standard formatting. Which formatting should I use?
+------------------------------------------------------------------------------------------------
+Make your code follow the most recent guidelines, so that eventually all code
+in the domain of netdev is in the preferred format.
 
 I found a bug that might have possible security implications or similar. Should I mail the main netdev maintainer off-list?
 ---------------------------------------------------------------------------------------------------------------------------

From 8df0136376dc9227a45fd6a1420016f58792b5d0 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 29 Mar 2022 21:25:05 -0700
Subject: [PATCH 51/57] docs: netdev: move the netdev-FAQ to the process pages

The documentation for the tip tree is really in quite a similar
spirit to the netdev-FAQ. Move the netdev-FAQ to the process docs
as well.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/bpf/bpf_devel_QA.rst                             | 2 +-
 Documentation/networking/index.rst                             | 3 ++-
 Documentation/process/maintainer-handbooks.rst                 | 1 +
 .../netdev-FAQ.rst => process/maintainer-netdev.rst}           | 0
 MAINTAINERS                                                    | 1 +
 5 files changed, 5 insertions(+), 2 deletions(-)
 rename Documentation/{networking/netdev-FAQ.rst => process/maintainer-netdev.rst} (100%)

diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst
index 253496af8fef2..761474bd7fe6e 100644
--- a/Documentation/bpf/bpf_devel_QA.rst
+++ b/Documentation/bpf/bpf_devel_QA.rst
@@ -658,7 +658,7 @@ when:
 
 .. Links
 .. _Documentation/process/: https://www.kernel.org/doc/html/latest/process/
-.. _netdev-FAQ: ../networking/netdev-FAQ.rst
+.. _netdev-FAQ: Documentation/process/maintainer-netdev.rst
 .. _selftests:
    https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/
 .. _Documentation/dev-tools/kselftest.rst:
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index ce017136ab053..72cf33579b785 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -1,12 +1,13 @@
 Linux Networking Documentation
 ==============================
 
+Refer to :ref:`netdev-FAQ` for a guide on netdev development process specifics.
+
 Contents:
 
 .. toctree::
    :maxdepth: 2
 
-   netdev-FAQ
    af_xdp
    bareudp
    batman-adv
diff --git a/Documentation/process/maintainer-handbooks.rst b/Documentation/process/maintainer-handbooks.rst
index 6af1abb0da482..d783060b4cc6e 100644
--- a/Documentation/process/maintainer-handbooks.rst
+++ b/Documentation/process/maintainer-handbooks.rst
@@ -16,3 +16,4 @@ Contents:
    :maxdepth: 2
 
    maintainer-tip
+   maintainer-netdev
diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/process/maintainer-netdev.rst
similarity index 100%
rename from Documentation/networking/netdev-FAQ.rst
rename to Documentation/process/maintainer-netdev.rst
diff --git a/MAINTAINERS b/MAINTAINERS
index d91f6c6e3d3b5..00dd58fc8bf3b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13661,6 +13661,7 @@ B:	mailto:netdev@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
 F:	Documentation/networking/
+F:	Documentation/process/maintainer-netdev.rst
 F:	include/linux/in.h
 F:	include/linux/net.h
 F:	include/linux/netdevice.h

From 9c9a04212fa380d2e7d1412bb281309955c0a781 Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Wed, 30 Mar 2022 21:45:05 +0800
Subject: [PATCH 52/57] net: hns3: fix the concurrency between functions
 reading debugfs

Currently, the debugfs mechanism is that all functions share a
global variable to save the pointer for obtaining data. When
different functions concurrently access the same file node,
repeated release exceptions occur. Therefore, the granularity
of the pointer for storing the obtained data is adjusted to be
private for each function.

Fixes: 5e69ea7ee2a6 ("net: hns3: refactor the debugfs process")
Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h       |  1 +
 .../net/ethernet/hisilicon/hns3/hns3_debugfs.c    | 15 +++++++++++----
 .../net/ethernet/hisilicon/hns3/hns3_debugfs.h    |  1 -
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index d44dd7091fa13..79c64f4e67d2b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -845,6 +845,7 @@ struct hnae3_handle {
 	struct dentry *hnae3_dbgfs;
 	/* protects concurrent contention between debugfs commands */
 	struct mutex dbgfs_lock;
+	char **dbgfs_buf;
 
 	/* Network interface message level enabled bits */
 	u32 msg_enable;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index f726a5b70f9e2..44d9b560b3374 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -1227,7 +1227,7 @@ static ssize_t hns3_dbg_read(struct file *filp, char __user *buffer,
 		return ret;
 
 	mutex_lock(&handle->dbgfs_lock);
-	save_buf = &hns3_dbg_cmd[index].buf;
+	save_buf = &handle->dbgfs_buf[index];
 
 	if (!test_bit(HNS3_NIC_STATE_INITED, &priv->state) ||
 	    test_bit(HNS3_NIC_STATE_RESETTING, &priv->state)) {
@@ -1332,6 +1332,13 @@ int hns3_dbg_init(struct hnae3_handle *handle)
 	int ret;
 	u32 i;
 
+	handle->dbgfs_buf = devm_kcalloc(&handle->pdev->dev,
+					 ARRAY_SIZE(hns3_dbg_cmd),
+					 sizeof(*handle->dbgfs_buf),
+					 GFP_KERNEL);
+	if (!handle->dbgfs_buf)
+		return -ENOMEM;
+
 	hns3_dbg_dentry[HNS3_DBG_DENTRY_COMMON].dentry =
 				debugfs_create_dir(name, hns3_dbgfs_root);
 	handle->hnae3_dbgfs = hns3_dbg_dentry[HNS3_DBG_DENTRY_COMMON].dentry;
@@ -1380,9 +1387,9 @@ void hns3_dbg_uninit(struct hnae3_handle *handle)
 	u32 i;
 
 	for (i = 0; i < ARRAY_SIZE(hns3_dbg_cmd); i++)
-		if (hns3_dbg_cmd[i].buf) {
-			kvfree(hns3_dbg_cmd[i].buf);
-			hns3_dbg_cmd[i].buf = NULL;
+		if (handle->dbgfs_buf[i]) {
+			kvfree(handle->dbgfs_buf[i]);
+			handle->dbgfs_buf[i] = NULL;
 		}
 
 	mutex_destroy(&handle->dbgfs_lock);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
index 83aa1450ab9fe..97578eabb7d8b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.h
@@ -49,7 +49,6 @@ struct hns3_dbg_cmd_info {
 	enum hnae3_dbg_cmd cmd;
 	enum hns3_dbg_dentry_type dentry;
 	u32 buf_len;
-	char *buf;
 	int (*init)(struct hnae3_handle *handle, unsigned int cmd);
 };
 

From 7ed258f12ec5ce855f15cdfb5710361dc82fe899 Mon Sep 17 00:00:00 2001
From: Guangbin Huang <huangguangbin2@huawei.com>
Date: Wed, 30 Mar 2022 21:45:06 +0800
Subject: [PATCH 53/57] net: hns3: fix software vlan talbe of vlan 0
 inconsistent with hardware

When user delete vlan 0, as driver will not delete vlan 0 for hardware in
function hclge_set_vlan_filter_hw(), so vlan 0 in software vlan talbe should
not be deleted.

Fixes: fe4144d47eef ("net: hns3: sync VLAN filter entries when kill VLAN ID failed")
Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 2a5e6a241d527..8cebb180c812f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -10323,11 +10323,11 @@ int hclge_set_vlan_filter(struct hnae3_handle *handle, __be16 proto,
 	}
 
 	if (!ret) {
-		if (is_kill)
-			hclge_rm_vport_vlan_table(vport, vlan_id, false);
-		else
+		if (!is_kill)
 			hclge_add_vport_vlan_table(vport, vlan_id,
 						   writen_to_tbl);
+		else if (is_kill && vlan_id != 0)
+			hclge_rm_vport_vlan_table(vport, vlan_id, false);
 	} else if (is_kill) {
 		/* when remove hw vlan filter failed, record the vlan id,
 		 * and try to remove it from hw later, to be consistence

From 4a7f62f91933c8ae5308f9127fd8ea48188b6bc3 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 30 Mar 2022 15:39:16 +0100
Subject: [PATCH 54/57] rxrpc: Fix call timer start racing with call
 destruction

The rxrpc_call struct has a timer used to handle various timed events
relating to a call.  This timer can get started from the packet input
routines that are run in softirq mode with just the RCU read lock held.
Unfortunately, because only the RCU read lock is held - and neither ref or
other lock is taken - the call can start getting destroyed at the same time
a packet comes in addressed to that call.  This causes the timer - which
was already stopped - to get restarted.  Later, the timer dispatch code may
then oops if the timer got deallocated first.

Fix this by trying to take a ref on the rxrpc_call struct and, if
successful, passing that ref along to the timer.  If the timer was already
running, the ref is discarded.

The timer completion routine can then pass the ref along to the call's work
item when it queues it.  If the timer or work item where already
queued/running, the extra ref is discarded.

Fixes: a158bdd3247b ("rxrpc: Fix call timeouts")
Reported-by: Marc Dionne <marc.dionne@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Reviewed-by: Marc Dionne <marc.dionne@auristor.com>
Tested-by: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
Link: http://lists.infradead.org/pipermail/linux-afs/2022-March/005073.html
Link: https://lore.kernel.org/r/164865115696.2943015.11097991776647323586.stgit@warthog.procyon.org.uk
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 include/trace/events/rxrpc.h |  8 +++++++-
 net/rxrpc/ar-internal.h      | 15 +++++++-------
 net/rxrpc/call_event.c       |  2 +-
 net/rxrpc/call_object.c      | 40 +++++++++++++++++++++++++++++++-----
 4 files changed, 50 insertions(+), 15 deletions(-)

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index e70c90116edae..4a3ab0ed6e062 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -83,12 +83,15 @@ enum rxrpc_call_trace {
 	rxrpc_call_error,
 	rxrpc_call_got,
 	rxrpc_call_got_kernel,
+	rxrpc_call_got_timer,
 	rxrpc_call_got_userid,
 	rxrpc_call_new_client,
 	rxrpc_call_new_service,
 	rxrpc_call_put,
 	rxrpc_call_put_kernel,
 	rxrpc_call_put_noqueue,
+	rxrpc_call_put_notimer,
+	rxrpc_call_put_timer,
 	rxrpc_call_put_userid,
 	rxrpc_call_queued,
 	rxrpc_call_queued_ref,
@@ -278,12 +281,15 @@ enum rxrpc_tx_point {
 	EM(rxrpc_call_error,			"*E*") \
 	EM(rxrpc_call_got,			"GOT") \
 	EM(rxrpc_call_got_kernel,		"Gke") \
+	EM(rxrpc_call_got_timer,		"GTM") \
 	EM(rxrpc_call_got_userid,		"Gus") \
 	EM(rxrpc_call_new_client,		"NWc") \
 	EM(rxrpc_call_new_service,		"NWs") \
 	EM(rxrpc_call_put,			"PUT") \
 	EM(rxrpc_call_put_kernel,		"Pke") \
-	EM(rxrpc_call_put_noqueue,		"PNQ") \
+	EM(rxrpc_call_put_noqueue,		"PnQ") \
+	EM(rxrpc_call_put_notimer,		"PnT") \
+	EM(rxrpc_call_put_timer,		"PTM") \
 	EM(rxrpc_call_put_userid,		"Pus") \
 	EM(rxrpc_call_queued,			"QUE") \
 	EM(rxrpc_call_queued_ref,		"QUR") \
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 7bd6f8a66a3ef..969e532f77a90 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -777,14 +777,12 @@ void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool, bool,
 		       enum rxrpc_propose_ack_trace);
 void rxrpc_process_call(struct work_struct *);
 
-static inline void rxrpc_reduce_call_timer(struct rxrpc_call *call,
-					   unsigned long expire_at,
-					   unsigned long now,
-					   enum rxrpc_timer_trace why)
-{
-	trace_rxrpc_timer(call, why, now);
-	timer_reduce(&call->timer, expire_at);
-}
+void rxrpc_reduce_call_timer(struct rxrpc_call *call,
+			     unsigned long expire_at,
+			     unsigned long now,
+			     enum rxrpc_timer_trace why);
+
+void rxrpc_delete_call_timer(struct rxrpc_call *call);
 
 /*
  * call_object.c
@@ -808,6 +806,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
 bool __rxrpc_queue_call(struct rxrpc_call *);
 bool rxrpc_queue_call(struct rxrpc_call *);
 void rxrpc_see_call(struct rxrpc_call *);
+bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op);
 void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace);
 void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace);
 void rxrpc_cleanup_call(struct rxrpc_call *);
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index df864e6922679..22e05de5d1ca9 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -310,7 +310,7 @@ void rxrpc_process_call(struct work_struct *work)
 	}
 
 	if (call->state == RXRPC_CALL_COMPLETE) {
-		del_timer_sync(&call->timer);
+		rxrpc_delete_call_timer(call);
 		goto out_put;
 	}
 
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 4eb91d958a48d..043508fd8d8a5 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -53,10 +53,30 @@ static void rxrpc_call_timer_expired(struct timer_list *t)
 
 	if (call->state < RXRPC_CALL_COMPLETE) {
 		trace_rxrpc_timer(call, rxrpc_timer_expired, jiffies);
-		rxrpc_queue_call(call);
+		__rxrpc_queue_call(call);
+	} else {
+		rxrpc_put_call(call, rxrpc_call_put);
+	}
+}
+
+void rxrpc_reduce_call_timer(struct rxrpc_call *call,
+			     unsigned long expire_at,
+			     unsigned long now,
+			     enum rxrpc_timer_trace why)
+{
+	if (rxrpc_try_get_call(call, rxrpc_call_got_timer)) {
+		trace_rxrpc_timer(call, why, now);
+		if (timer_reduce(&call->timer, expire_at))
+			rxrpc_put_call(call, rxrpc_call_put_notimer);
 	}
 }
 
+void rxrpc_delete_call_timer(struct rxrpc_call *call)
+{
+	if (del_timer_sync(&call->timer))
+		rxrpc_put_call(call, rxrpc_call_put_timer);
+}
+
 static struct lock_class_key rxrpc_call_user_mutex_lock_class_key;
 
 /*
@@ -463,6 +483,17 @@ void rxrpc_see_call(struct rxrpc_call *call)
 	}
 }
 
+bool rxrpc_try_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
+{
+	const void *here = __builtin_return_address(0);
+	int n = atomic_fetch_add_unless(&call->usage, 1, 0);
+
+	if (n == 0)
+		return false;
+	trace_rxrpc_call(call->debug_id, op, n, here, NULL);
+	return true;
+}
+
 /*
  * Note the addition of a ref on a call.
  */
@@ -510,8 +541,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
 	spin_unlock_bh(&call->lock);
 
 	rxrpc_put_call_slot(call);
-
-	del_timer_sync(&call->timer);
+	rxrpc_delete_call_timer(call);
 
 	/* Make sure we don't get any more notifications */
 	write_lock_bh(&rx->recvmsg_lock);
@@ -618,6 +648,8 @@ static void rxrpc_destroy_call(struct work_struct *work)
 	struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor);
 	struct rxrpc_net *rxnet = call->rxnet;
 
+	rxrpc_delete_call_timer(call);
+
 	rxrpc_put_connection(call->conn);
 	rxrpc_put_peer(call->peer);
 	kfree(call->rxtx_buffer);
@@ -652,8 +684,6 @@ void rxrpc_cleanup_call(struct rxrpc_call *call)
 
 	memset(&call->sock_node, 0xcd, sizeof(call->sock_node));
 
-	del_timer_sync(&call->timer);
-
 	ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
 	ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags));
 

From ff8376ade4f668130385839cef586a0990f8ef87 Mon Sep 17 00:00:00 2001
From: Xiaolong Huang <butterflyhuangxx@gmail.com>
Date: Wed, 30 Mar 2022 15:22:14 +0100
Subject: [PATCH 55/57] rxrpc: fix some null-ptr-deref bugs in server_key.c

Some function calls are not implemented in rxrpc_no_security, there are
preparse_server_key, free_preparse_server_key and destroy_server_key.
When rxrpc security type is rxrpc_no_security, user can easily trigger a
null-ptr-deref bug via ioctl. So judgment should be added to prevent it

The crash log:
user@syzkaller:~$ ./rxrpc_preparse_s
[   37.956878][T15626] BUG: kernel NULL pointer dereference, address: 0000000000000000
[   37.957645][T15626] #PF: supervisor instruction fetch in kernel mode
[   37.958229][T15626] #PF: error_code(0x0010) - not-present page
[   37.958762][T15626] PGD 4aadf067 P4D 4aadf067 PUD 4aade067 PMD 0
[   37.959321][T15626] Oops: 0010 [#1] PREEMPT SMP
[   37.959739][T15626] CPU: 0 PID: 15626 Comm: rxrpc_preparse_ Not tainted 5.17.0-01442-gb47d5a4f6b8d #43
[   37.960588][T15626] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1 04/01/2014
[   37.961474][T15626] RIP: 0010:0x0
[   37.961787][T15626] Code: Unable to access opcode bytes at RIP 0xffffffffffffffd6.
[   37.962480][T15626] RSP: 0018:ffffc9000d9abdc0 EFLAGS: 00010286
[   37.963018][T15626] RAX: ffffffff84335200 RBX: ffff888012a1ce80 RCX: 0000000000000000
[   37.963727][T15626] RDX: 0000000000000000 RSI: ffffffff84a736dc RDI: ffffc9000d9abe48
[   37.964425][T15626] RBP: ffffc9000d9abe48 R08: 0000000000000000 R09: 0000000000000002
[   37.965118][T15626] R10: 000000000000000a R11: f000000000000000 R12: ffff888013145680
[   37.965836][T15626] R13: 0000000000000000 R14: ffffffffffffffec R15: ffff8880432aba80
[   37.966441][T15626] FS:  00007f2177907700(0000) GS:ffff88803ec00000(0000) knlGS:0000000000000000
[   37.966979][T15626] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   37.967384][T15626] CR2: ffffffffffffffd6 CR3: 000000004aaf1000 CR4: 00000000000006f0
[   37.967864][T15626] Call Trace:
[   37.968062][T15626]  <TASK>
[   37.968240][T15626]  rxrpc_preparse_s+0x59/0x90
[   37.968541][T15626]  key_create_or_update+0x174/0x510
[   37.968863][T15626]  __x64_sys_add_key+0x139/0x1d0
[   37.969165][T15626]  do_syscall_64+0x35/0xb0
[   37.969451][T15626]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[   37.969824][T15626] RIP: 0033:0x43a1f9

Signed-off-by: Xiaolong Huang <butterflyhuangxx@gmail.com>
Tested-by: Xiaolong Huang <butterflyhuangxx@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
Link: http://lists.infradead.org/pipermail/linux-afs/2022-March/005069.html
Fixes: 12da59fcab5a ("rxrpc: Hand server key parsing off to the security class")
Link: https://lore.kernel.org/r/164865013439.2941502.8966285221215590921.stgit@warthog.procyon.org.uk
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/rxrpc/server_key.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/rxrpc/server_key.c b/net/rxrpc/server_key.c
index ead3471307ee5..ee269e0e6ee87 100644
--- a/net/rxrpc/server_key.c
+++ b/net/rxrpc/server_key.c
@@ -84,6 +84,9 @@ static int rxrpc_preparse_s(struct key_preparsed_payload *prep)
 
 	prep->payload.data[1] = (struct rxrpc_security *)sec;
 
+	if (!sec->preparse_server_key)
+		return -EINVAL;
+
 	return sec->preparse_server_key(prep);
 }
 
@@ -91,7 +94,7 @@ static void rxrpc_free_preparse_s(struct key_preparsed_payload *prep)
 {
 	const struct rxrpc_security *sec = prep->payload.data[1];
 
-	if (sec)
+	if (sec && sec->free_preparse_server_key)
 		sec->free_preparse_server_key(prep);
 }
 
@@ -99,7 +102,7 @@ static void rxrpc_destroy_s(struct key *key)
 {
 	const struct rxrpc_security *sec = key->payload.data[1];
 
-	if (sec)
+	if (sec && sec->destroy_server_key)
 		sec->destroy_server_key(key);
 }
 

From ea07af2e71cdf2c08251d8ead196ce1c9466e38c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?St=C3=A9phane=20Graber?= <stgraber@ubuntu.com>
Date: Wed, 30 Mar 2022 15:42:45 -0400
Subject: [PATCH 56/57] openvswitch: Add recirc_id to recirc warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When hitting the recirculation limit, the kernel would currently log
something like this:

[   58.586597] openvswitch: ovs-system: deferred action limit reached, drop recirc action

Which isn't all that useful to debug as we only have the interface name
to go on but can't track it down to a specific flow.

With this change, we now instead get:

[   58.586597] openvswitch: ovs-system: deferred action limit reached, drop recirc action (recirc_id=0x9e)

Which can now be correlated with the flow entries from OVS.

Suggested-by: Frode Nordahl <frode.nordahl@canonical.com>
Signed-off-by: Stéphane Graber <stgraber@ubuntu.com>
Tested-by: Stephane Graber <stgraber@ubuntu.com>
Acked-by: Eelco Chaudron <echaudro@redhat.com>
Link: https://lore.kernel.org/r/20220330194244.3476544-1-stgraber@ubuntu.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/openvswitch/actions.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 780d9e2246f39..7056cb1b8ba0f 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1539,8 +1539,8 @@ static int clone_execute(struct datapath *dp, struct sk_buff *skb,
 				pr_warn("%s: deferred action limit reached, drop sample action\n",
 					ovs_dp_name(dp));
 			} else {  /* Recirc action */
-				pr_warn("%s: deferred action limit reached, drop recirc action\n",
-					ovs_dp_name(dp));
+				pr_warn("%s: deferred action limit reached, drop recirc action (recirc_id=%#x)\n",
+					ovs_dp_name(dp), recirc_id);
 			}
 		}
 	}

From 9d570741aec1e1ebd37823b34a2958f24809ff24 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 30 Mar 2022 12:46:43 -0700
Subject: [PATCH 57/57] vxlan: do not feed vxlan_vnifilter_dump_dev with non
 vxlan devices

vxlan_vnifilter_dump_dev() assumes it is called only
for vxlan devices. Make sure it is the case.

BUG: KASAN: slab-out-of-bounds in vxlan_vnifilter_dump_dev+0x9a0/0xb40 drivers/net/vxlan/vxlan_vnifilter.c:349
Read of size 4 at addr ffff888060d1ce70 by task syz-executor.3/17662

CPU: 0 PID: 17662 Comm: syz-executor.3 Tainted: G        W         5.17.0-syzkaller-12888-g77c9387c0c5b #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 <TASK>
 __dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
 print_address_description.constprop.0.cold+0xeb/0x495 mm/kasan/report.c:313
 print_report mm/kasan/report.c:429 [inline]
 kasan_report.cold+0xf4/0x1c6 mm/kasan/report.c:491
 vxlan_vnifilter_dump_dev+0x9a0/0xb40 drivers/net/vxlan/vxlan_vnifilter.c:349
 vxlan_vnifilter_dump+0x3ff/0x650 drivers/net/vxlan/vxlan_vnifilter.c:428
 netlink_dump+0x4b5/0xb70 net/netlink/af_netlink.c:2270
 __netlink_dump_start+0x647/0x900 net/netlink/af_netlink.c:2375
 netlink_dump_start include/linux/netlink.h:245 [inline]
 rtnetlink_rcv_msg+0x70c/0xb80 net/core/rtnetlink.c:5953
 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2496
 netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline]
 netlink_unicast+0x543/0x7f0 net/netlink/af_netlink.c:1345
 netlink_sendmsg+0x904/0xe00 net/netlink/af_netlink.c:1921
 sock_sendmsg_nosec net/socket.c:705 [inline]
 sock_sendmsg+0xcf/0x120 net/socket.c:725
 ____sys_sendmsg+0x6e2/0x800 net/socket.c:2413
 ___sys_sendmsg+0xf3/0x170 net/socket.c:2467
 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2496
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x35/0x80 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x7f87b8e89049

Fixes: f9c4bb0b245c ("vxlan: vni filtering support on collect metadata device")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Acked-by: Roopa Prabhu <roopa@nvidia.com>
Link: https://lore.kernel.org/r/20220330194643.2706132-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/vxlan/vxlan_vnifilter.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/vxlan/vxlan_vnifilter.c b/drivers/net/vxlan/vxlan_vnifilter.c
index 9f28d0b6a6b26..3e04af4c5daa1 100644
--- a/drivers/net/vxlan/vxlan_vnifilter.c
+++ b/drivers/net/vxlan/vxlan_vnifilter.c
@@ -425,6 +425,12 @@ static int vxlan_vnifilter_dump(struct sk_buff *skb, struct netlink_callback *cb
 			err = -ENODEV;
 			goto out_err;
 		}
+		if (!netif_is_vxlan(dev)) {
+			NL_SET_ERR_MSG(cb->extack,
+				       "The device is not a vxlan device");
+			err = -EINVAL;
+			goto out_err;
+		}
 		err = vxlan_vnifilter_dump_dev(dev, skb, cb);
 		/* if the dump completed without an error we return 0 here */
 		if (err != -EMSGSIZE)