From dea450c90f463de57d7f351711a6ac7e89090843 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 2 Nov 2021 15:17:07 -0400
Subject: [PATCH 01/32] fs: dlm: remove obsolete INBUF define

This patch removes an obsolete define for some length for an temporary
buffer which is not being used anymore. The use of this define is not
necessary anymore since commit 4798cbbfbd00 ("fs: dlm: rework receive
handling").

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/dlm_internal.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 5f57538b5d450..44a5c67b52134 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -41,12 +41,6 @@
 #include <linux/dlm.h>
 #include "config.h"
 
-/* Size of the temp buffer midcomms allocates on the stack.
-   We try to make this large enough so most messages fit.
-   FIXME: should sctp make this unnecessary? */
-
-#define DLM_INBUF_LEN		148
-
 struct dlm_ls;
 struct dlm_lkb;
 struct dlm_rsb;

From bb6866a5bdc5ff0236147c01394f6a264978a16c Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 2 Nov 2021 15:17:08 -0400
Subject: [PATCH 02/32] fs: dlm: fix small lockspace typo

This patch fixes a typo from lockspace to lockspace.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lockspace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 10eddfa6c3d7b..b90566502a813 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -868,7 +868,7 @@ static int release_lockspace(struct dlm_ls *ls, int force)
  * until this returns.
  *
  * Force has 4 possible values:
- * 0 - don't destroy locksapce if it has any LKBs
+ * 0 - don't destroy lockspace if it has any LKBs
  * 1 - destroy lockspace if it has remote LKBs but not if it has local LKBs
  * 2 - destroy lockspace regardless of LKBs
  * 3 - destroy lockspace as part of a forced shutdown

From 1aafd9c231919dea9b10e654107e24d5c553c60d Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 2 Nov 2021 15:17:09 -0400
Subject: [PATCH 03/32] fs: dlm: debug improvements print nodeid

This patch improves the debug output for midcomms layer by also printing
out the nodeid where users counter belongs to.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/midcomms.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index 7ae39ec8d9b0a..008078f06813d 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -1231,7 +1231,7 @@ void dlm_midcomms_add_member(int nodeid)
 	}
 
 	node->users++;
-	pr_debug("users inc count %d\n", node->users);
+	pr_debug("node %d users inc count %d\n", nodeid, node->users);
 	spin_unlock(&node->state_lock);
 
 	srcu_read_unlock(&nodes_srcu, idx);
@@ -1254,7 +1254,7 @@ void dlm_midcomms_remove_member(int nodeid)
 
 	spin_lock(&node->state_lock);
 	node->users--;
-	pr_debug("users dec count %d\n", node->users);
+	pr_debug("node %d users dec count %d\n", nodeid, node->users);
 
 	/* hitting users count to zero means the
 	 * other side is running dlm_midcomms_stop()

From fe93367541bcedaba1dd5cb9cf138eec0267ea56 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 2 Nov 2021 15:17:10 -0400
Subject: [PATCH 04/32] fs: dlm: remove check SCTP is loaded message

Since commit 764ff4011424 ("fs: dlm: auto load sctp module") we try
load the sctp module before we try to create a sctp kernel socket. That
a socket creation fails now has more likely other reasons. This patch
removes the part of error to load the sctp module and instead printout
the error code.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 8f715c620e1f8..bee3757eb4c73 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1775,7 +1775,7 @@ static int dlm_listen_for_all(void)
 	result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family,
 				  SOCK_STREAM, dlm_proto_ops->proto, &sock);
 	if (result < 0) {
-		log_print("Can't create comms socket, check SCTP is loaded");
+		log_print("Can't create comms socket: %d", result);
 		goto out;
 	}
 

From 658bd576f95ed597e519cdadf1c86ac87c17aea5 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 2 Nov 2021 15:17:11 -0400
Subject: [PATCH 05/32] fs: dlm: move version conversion to compile time

This patch moves version conversion to little endian from a runtime
variable to compile time constant.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/midcomms.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index 008078f06813d..76bdc3a9dc61d 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -909,11 +909,11 @@ int dlm_process_incoming_buffer(int nodeid, unsigned char *buf, int len)
 		if (msglen > len)
 			break;
 
-		switch (le32_to_cpu(hd->h_version)) {
-		case DLM_VERSION_3_1:
+		switch (hd->h_version) {
+		case cpu_to_le32(DLM_VERSION_3_1):
 			dlm_midcomms_receive_buffer_3_1((union dlm_packet *)ptr, nodeid);
 			break;
-		case DLM_VERSION_3_2:
+		case cpu_to_le32(DLM_VERSION_3_2):
 			dlm_midcomms_receive_buffer_3_2((union dlm_packet *)ptr, nodeid);
 			break;
 		default:

From 3e9736713d0cb2877b11ec7185b231bba7b21936 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 2 Nov 2021 15:17:12 -0400
Subject: [PATCH 06/32] fs: dlm: use dlm_recovery_stopped instead of test_bit

This patch will change to use dlm_recovery_stopped() which is the dlm way
to check if the LSFL_RECOVER_STOP flag in ls_flags by using the helper.
It is an atomic operation but the check is still as before to fetch the
value if ls_recover_lock is held. There might be more further
investigations if the value can be changed afterwards and if it has any
side effects.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/rcom.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 6cba86470278a..5821b777a1a74 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -601,7 +601,7 @@ void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
 
 	spin_lock(&ls->ls_recover_lock);
 	status = ls->ls_recover_status;
-	stop = test_bit(LSFL_RECOVER_STOP, &ls->ls_flags);
+	stop = dlm_recovery_stopped(ls);
 	seq = ls->ls_recover_seq;
 	spin_unlock(&ls->ls_recover_lock);
 

From e10249b1902d3b0b71e99f518a695c2c39ab4fe6 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 2 Nov 2021 15:17:13 -0400
Subject: [PATCH 07/32] fs: dlm: use dlm_recovery_stopped in condition

This patch will change to evaluate the dlm_recovery_stopped() in the
condition of the if branch instead fetch it before evaluating the
condition. As this is an atomic test-set operation it should be
evaluated in the condition itself.

Reported-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/dir.c      | 3 +--
 fs/dlm/member.c   | 3 +--
 fs/dlm/recoverd.c | 3 +--
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c
index 45ebbe602bbf0..b6692f81ec83e 100644
--- a/fs/dlm/dir.c
+++ b/fs/dlm/dir.c
@@ -84,8 +84,7 @@ int dlm_recover_directory(struct dlm_ls *ls)
 
 		for (;;) {
 			int left;
-			error = dlm_recovery_stopped(ls);
-			if (error) {
+			if (dlm_recovery_stopped(ls)) {
 				error = -EINTR;
 				goto out_free;
 			}
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 731d489aa323e..61f906e705db8 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -442,8 +442,7 @@ static int ping_members(struct dlm_ls *ls)
 	int error = 0;
 
 	list_for_each_entry(memb, &ls->ls_nodes, list) {
-		error = dlm_recovery_stopped(ls);
-		if (error) {
+		if (dlm_recovery_stopped(ls)) {
 			error = -EINTR;
 			break;
 		}
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 97d052cea5a92..a55dfce705dd2 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -124,8 +124,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 
 	dlm_recover_waiters_pre(ls);
 
-	error = dlm_recovery_stopped(ls);
-	if (error) {
+	if (dlm_recovery_stopped(ls)) {
 		error = -EINTR;
 		goto fail;
 	}

From 2f05ec4327ffaa34877de67fc5bb5eb3ab3767f0 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 2 Nov 2021 15:17:14 -0400
Subject: [PATCH 08/32] fs: dlm: make dlm_callback_resume quite

This patch makes dlm_callback_resume info printout less noisy by
accumulate all callback queues into one printout not in 25 times steps.
It seems this printout became lately quite noisy in relationship with
gfs2.

Before:

[241767.849302] dlm: bin: dlm_callback_resume 25
[241767.854846] dlm: bin: dlm_callback_resume 25
[241767.860373] dlm: bin: dlm_callback_resume 25
...
[241767.865920] dlm: bin: dlm_callback_resume 25
[241767.871352] dlm: bin: dlm_callback_resume 25
[241767.876733] dlm: bin: dlm_callback_resume 25

After the patch:

[  385.485728] dlm: gfs2: dlm_callback_resume 175

if zero it will not be printed out.

Reported-by: Barry Marson <bmarson@redhat.com>
Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/ast.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index 283c7b94eddad..6600930497ccc 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -295,7 +295,7 @@ void dlm_callback_suspend(struct dlm_ls *ls)
 void dlm_callback_resume(struct dlm_ls *ls)
 {
 	struct dlm_lkb *lkb, *safe;
-	int count = 0;
+	int count = 0, sum = 0;
 
 	clear_bit(LSFL_CB_DELAY, &ls->ls_flags);
 
@@ -313,12 +313,14 @@ void dlm_callback_resume(struct dlm_ls *ls)
 	}
 	mutex_unlock(&ls->ls_cb_mutex);
 
-	if (count)
-		log_rinfo(ls, "dlm_callback_resume %d", count);
+	sum += count;
 	if (count == MAX_CB_QUEUE) {
 		count = 0;
 		cond_resched();
 		goto more;
 	}
+
+	if (sum)
+		log_rinfo(ls, "%s %d", __func__, sum);
 }
 

From f1d3b8f91d965c4fd900ac5dd06240cc9df0c7a7 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 2 Nov 2021 15:17:15 -0400
Subject: [PATCH 09/32] fs: dlm: initial support for tracepoints

This patch adds initial support for dlm tracepoints. It will introduce
tracepoints to dlm main functionality dlm_lock()/dlm_unlock() and their
complete ast() callback or blocking bast() callback.

The lock/unlock functionality has a start and end tracepoint, this is
because there exists a race in case if would have a tracepoint at the
end position only the complete/blocking callbacks could occur before. To
work with eBPF tracing and using their lookup hash functionality there
could be problems that an entry was not inserted yet. However use the
start functionality for hash insert and check again in end functionality
if there was an dlm internal error so there is no ast callback. In further
it might also that locks with local masters will occur those callbacks
immediately so we must have such functionality.

I did not make everything accessible yet, although it seems eBPF can be
used to access a lot of internal datastructures if it's aware of the
struct definitions of the running kernel instance. We still can change
it, if you do eBPF experiments e.g. time measurements between lock and
callback functionality you can simple use the local lkb_id field as hash
value in combination with the lockspace id if you have multiple
lockspaces. Otherwise you can simple use trace-cmd for some functionality,
e.g. `trace-cmd record -e dlm` and `trace-cmd report` afterwards.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/ast.c               |   4 +
 fs/dlm/lock.c              |  10 ++
 fs/dlm/main.c              |   3 +
 include/trace/events/dlm.h | 220 +++++++++++++++++++++++++++++++++++++
 4 files changed, 237 insertions(+)
 create mode 100644 include/trace/events/dlm.h

diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index 6600930497ccc..27bae7d4a477a 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -9,6 +9,8 @@
 *******************************************************************************
 ******************************************************************************/
 
+#include <trace/events/dlm.h>
+
 #include "dlm_internal.h"
 #include "lock.h"
 #include "user.h"
@@ -254,10 +256,12 @@ void dlm_callback_work(struct work_struct *work)
 			continue;
 		} else if (callbacks[i].flags & DLM_CB_BAST) {
 			bastfn(lkb->lkb_astparam, callbacks[i].mode);
+			trace_dlm_bast(ls, lkb, callbacks[i].mode);
 		} else if (callbacks[i].flags & DLM_CB_CAST) {
 			lkb->lkb_lksb->sb_status = callbacks[i].sb_status;
 			lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags;
 			castfn(lkb->lkb_astparam);
+			trace_dlm_ast(ls, lkb, lkb->lkb_lksb);
 		}
 	}
 
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index c502c065d0075..feb2e94f5879e 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -53,6 +53,8 @@
                                    R: do_xxxx()
    L: receive_xxxx_reply()     <-  R: send_xxxx_reply()
 */
+#include <trace/events/dlm.h>
+
 #include <linux/types.h>
 #include <linux/rbtree.h>
 #include <linux/slab.h>
@@ -3437,6 +3439,8 @@ int dlm_lock(dlm_lockspace_t *lockspace,
 	if (error)
 		goto out;
 
+	trace_dlm_lock_start(ls, lkb, mode, flags);
+
 	error = set_lock_args(mode, lksb, flags, namelen, 0, ast,
 			      astarg, bast, &args);
 	if (error)
@@ -3450,6 +3454,8 @@ int dlm_lock(dlm_lockspace_t *lockspace,
 	if (error == -EINPROGRESS)
 		error = 0;
  out_put:
+	trace_dlm_lock_end(ls, lkb, mode, flags, error);
+
 	if (convert || error)
 		__put_lkb(ls, lkb);
 	if (error == -EAGAIN || error == -EDEADLK)
@@ -3481,6 +3487,8 @@ int dlm_unlock(dlm_lockspace_t *lockspace,
 	if (error)
 		goto out;
 
+	trace_dlm_unlock_start(ls, lkb, flags);
+
 	error = set_unlock_args(flags, astarg, &args);
 	if (error)
 		goto out_put;
@@ -3495,6 +3503,8 @@ int dlm_unlock(dlm_lockspace_t *lockspace,
 	if (error == -EBUSY && (flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK)))
 		error = 0;
  out_put:
+	trace_dlm_unlock_end(ls, lkb, flags, error);
+
 	dlm_put_lkb(lkb);
  out:
 	dlm_unlock_recovery(ls);
diff --git a/fs/dlm/main.c b/fs/dlm/main.c
index afc66a1346d3d..1c5be4b70ac1b 100644
--- a/fs/dlm/main.c
+++ b/fs/dlm/main.c
@@ -19,6 +19,9 @@
 #include "config.h"
 #include "lowcomms.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/dlm.h>
+
 static int __init init_dlm(void)
 {
 	int error;
diff --git a/include/trace/events/dlm.h b/include/trace/events/dlm.h
new file mode 100644
index 0000000000000..c97b4c163c3e9
--- /dev/null
+++ b/include/trace/events/dlm.h
@@ -0,0 +1,220 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM dlm
+
+#if !defined(_TRACE_DLM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_DLM_H
+
+#include <linux/dlm.h>
+#include <linux/dlmconstants.h>
+#include <linux/tracepoint.h>
+
+#include "../../../fs/dlm/dlm_internal.h"
+
+#define show_lock_flags(flags) __print_flags(flags, "|",	\
+	{ DLM_LKF_NOQUEUE,	"NOQUEUE" },			\
+	{ DLM_LKF_CANCEL,	"CANCEL" },			\
+	{ DLM_LKF_CONVERT,	"CONVERT" },			\
+	{ DLM_LKF_VALBLK,	"VALBLK" },			\
+	{ DLM_LKF_QUECVT,	"QUECVT" },			\
+	{ DLM_LKF_IVVALBLK,	"IVVALBLK" },			\
+	{ DLM_LKF_CONVDEADLK,	"CONVDEADLK" },			\
+	{ DLM_LKF_PERSISTENT,	"PERSISTENT" },			\
+	{ DLM_LKF_NODLCKWT,	"NODLCKWT" },			\
+	{ DLM_LKF_NODLCKBLK,	"NODLCKBLK" },			\
+	{ DLM_LKF_EXPEDITE,	"EXPEDITE" },			\
+	{ DLM_LKF_NOQUEUEBAST,	"NOQUEUEBAST" },		\
+	{ DLM_LKF_HEADQUE,	"HEADQUE" },			\
+	{ DLM_LKF_NOORDER,	"NOORDER" },			\
+	{ DLM_LKF_ORPHAN,	"ORPHAN" },			\
+	{ DLM_LKF_ALTPR,	"ALTPR" },			\
+	{ DLM_LKF_ALTCW,	"ALTCW" },			\
+	{ DLM_LKF_FORCEUNLOCK,	"FORCEUNLOCK" },		\
+	{ DLM_LKF_TIMEOUT,	"TIMEOUT" })
+
+#define show_lock_mode(mode) __print_symbolic(mode,		\
+	{ DLM_LOCK_IV,		"IV"},				\
+	{ DLM_LOCK_NL,		"NL"},				\
+	{ DLM_LOCK_CR,		"CR"},				\
+	{ DLM_LOCK_CW,		"CW"},				\
+	{ DLM_LOCK_PR,		"PR"},				\
+	{ DLM_LOCK_PW,		"PW"},				\
+	{ DLM_LOCK_EX,		"EX"})
+
+#define show_dlm_sb_flags(flags) __print_flags(flags, "|",	\
+	{ DLM_SBF_DEMOTED,	"DEMOTED" },			\
+	{ DLM_SBF_VALNOTVALID,	"VALNOTVALID" },		\
+	{ DLM_SBF_ALTMODE,	"ALTMODE" })
+
+/* note: we begin tracing dlm_lock_start() only if ls and lkb are found */
+TRACE_EVENT(dlm_lock_start,
+
+	TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, int mode,
+		 __u32 flags),
+
+	TP_ARGS(ls, lkb, mode, flags),
+
+	TP_STRUCT__entry(
+		__field(__u32, ls_id)
+		__field(__u32, lkb_id)
+		__field(int, mode)
+		__field(__u32, flags)
+	),
+
+	TP_fast_assign(
+		__entry->ls_id = ls->ls_global_id;
+		__entry->lkb_id = lkb->lkb_id;
+		__entry->mode = mode;
+		__entry->flags = flags;
+	),
+
+	TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s",
+		  __entry->ls_id, __entry->lkb_id,
+		  show_lock_mode(__entry->mode),
+		  show_lock_flags(__entry->flags))
+
+);
+
+TRACE_EVENT(dlm_lock_end,
+
+	TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, int mode, __u32 flags,
+		 int error),
+
+	TP_ARGS(ls, lkb, mode, flags, error),
+
+	TP_STRUCT__entry(
+		__field(__u32, ls_id)
+		__field(__u32, lkb_id)
+		__field(int, mode)
+		__field(__u32, flags)
+		__field(int, error)
+	),
+
+	TP_fast_assign(
+		__entry->ls_id = ls->ls_global_id;
+		__entry->lkb_id = lkb->lkb_id;
+		__entry->mode = mode;
+		__entry->flags = flags;
+
+		/* return value will be zeroed in those cases by dlm_lock()
+		 * we do it here again to not introduce more overhead if
+		 * trace isn't running and error reflects the return value.
+		 */
+		if (error == -EAGAIN || error == -EDEADLK)
+			__entry->error = 0;
+		else
+			__entry->error = error;
+	),
+
+	TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s error=%d",
+		  __entry->ls_id, __entry->lkb_id,
+		  show_lock_mode(__entry->mode),
+		  show_lock_flags(__entry->flags), __entry->error)
+
+);
+
+TRACE_EVENT(dlm_bast,
+
+	TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, int mode),
+
+	TP_ARGS(ls, lkb, mode),
+
+	TP_STRUCT__entry(
+		__field(__u32, ls_id)
+		__field(__u32, lkb_id)
+		__field(int, mode)
+	),
+
+	TP_fast_assign(
+		__entry->ls_id = ls->ls_global_id;
+		__entry->lkb_id = lkb->lkb_id;
+		__entry->mode = mode;
+	),
+
+	TP_printk("ls_id=%u lkb_id=%x mode=%s", __entry->ls_id,
+		  __entry->lkb_id, show_lock_mode(__entry->mode))
+
+);
+
+TRACE_EVENT(dlm_ast,
+
+	TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_lksb *lksb),
+
+	TP_ARGS(ls, lkb, lksb),
+
+	TP_STRUCT__entry(
+		__field(__u32, ls_id)
+		__field(__u32, lkb_id)
+		__field(u8, sb_flags)
+		__field(int, sb_status)
+	),
+
+	TP_fast_assign(
+		__entry->ls_id = ls->ls_global_id;
+		__entry->lkb_id = lkb->lkb_id;
+		__entry->sb_flags = lksb->sb_flags;
+		__entry->sb_status = lksb->sb_status;
+	),
+
+	TP_printk("ls_id=%u lkb_id=%x sb_flags=%s sb_status=%d",
+		  __entry->ls_id, __entry->lkb_id,
+		  show_dlm_sb_flags(__entry->sb_flags), __entry->sb_status)
+
+);
+
+/* note: we begin tracing dlm_unlock_start() only if ls and lkb are found */
+TRACE_EVENT(dlm_unlock_start,
+
+	TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, __u32 flags),
+
+	TP_ARGS(ls, lkb, flags),
+
+	TP_STRUCT__entry(
+		__field(__u32, ls_id)
+		__field(__u32, lkb_id)
+		__field(__u32, flags)
+	),
+
+	TP_fast_assign(
+		__entry->ls_id = ls->ls_global_id;
+		__entry->lkb_id = lkb->lkb_id;
+		__entry->flags = flags;
+	),
+
+	TP_printk("ls_id=%u lkb_id=%x flags=%s",
+		  __entry->ls_id, __entry->lkb_id,
+		  show_lock_flags(__entry->flags))
+
+);
+
+TRACE_EVENT(dlm_unlock_end,
+
+	TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, __u32 flags,
+		 int error),
+
+	TP_ARGS(ls, lkb, flags, error),
+
+	TP_STRUCT__entry(
+		__field(__u32, ls_id)
+		__field(__u32, lkb_id)
+		__field(__u32, flags)
+		__field(int, error)
+	),
+
+	TP_fast_assign(
+		__entry->ls_id = ls->ls_global_id;
+		__entry->lkb_id = lkb->lkb_id;
+		__entry->flags = flags;
+		__entry->error = error;
+	),
+
+	TP_printk("ls_id=%u lkb_id=%x flags=%s error=%d",
+		  __entry->ls_id, __entry->lkb_id,
+		  show_lock_flags(__entry->flags), __entry->error)
+
+);
+
+#endif /* if !defined(_TRACE_DLM_H) || defined(TRACE_HEADER_MULTI_READ) */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>

From 92732376fd29462b502f41486bcef55f49c5713e Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 2 Nov 2021 15:17:16 -0400
Subject: [PATCH 10/32] fs: dlm: trace socket handling

This patch adds tracepoints for dlm socket receive and send
functionality. We can use it to track how much data was send or received
to or from a specific nodeid.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c          |  4 ++++
 include/trace/events/dlm.h | 40 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index bee3757eb4c73..6d6dcf0d5ba97 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -53,6 +53,8 @@
 #include <net/sctp/sctp.h>
 #include <net/ipv6.h>
 
+#include <trace/events/dlm.h>
+
 #include "dlm_internal.h"
 #include "lowcomms.h"
 #include "midcomms.h"
@@ -925,6 +927,7 @@ static int receive_from_sock(struct connection *con)
 		msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL;
 		ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len,
 				     msg.msg_flags);
+		trace_dlm_recv(con->nodeid, ret);
 		if (ret == -EAGAIN)
 			break;
 		else if (ret <= 0)
@@ -1411,6 +1414,7 @@ static void send_to_sock(struct connection *con)
 
 		ret = kernel_sendpage(con->sock, e->page, offset, len,
 				      msg_flags);
+		trace_dlm_send(con->nodeid, ret);
 		if (ret == -EAGAIN || ret == 0) {
 			if (ret == -EAGAIN &&
 			    test_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags) &&
diff --git a/include/trace/events/dlm.h b/include/trace/events/dlm.h
index c97b4c163c3e9..32088c6032445 100644
--- a/include/trace/events/dlm.h
+++ b/include/trace/events/dlm.h
@@ -214,6 +214,46 @@ TRACE_EVENT(dlm_unlock_end,
 
 );
 
+TRACE_EVENT(dlm_send,
+
+	TP_PROTO(int nodeid, int ret),
+
+	TP_ARGS(nodeid, ret),
+
+	TP_STRUCT__entry(
+		__field(int, nodeid)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		__entry->nodeid = nodeid;
+		__entry->ret = ret;
+	),
+
+	TP_printk("nodeid=%d ret=%d", __entry->nodeid, __entry->ret)
+
+);
+
+TRACE_EVENT(dlm_recv,
+
+	TP_PROTO(int nodeid, int ret),
+
+	TP_ARGS(nodeid, ret),
+
+	TP_STRUCT__entry(
+		__field(int, nodeid)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		__entry->nodeid = nodeid;
+		__entry->ret = ret;
+	),
+
+	TP_printk("nodeid=%d ret=%d", __entry->nodeid, __entry->ret)
+
+);
+
 #endif /* if !defined(_TRACE_DLM_H) || defined(TRACE_HEADER_MULTI_READ) */
 
 /* This part must be outside protection */

From 164d88abd7608e869b7617d5ff8893344fdda759 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 2 Nov 2021 15:17:17 -0400
Subject: [PATCH 11/32] fs: dlm: requestqueue busy wait to event based wait

This patch changes the requestqueue busy waiting algorithm to use
atomic counter values and wait_event() to wait until the requestqueue is
empty. It will slightly reduce the number of holding ls_requestqueue_mutex
mutex.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/dlm_internal.h |  2 ++
 fs/dlm/lockspace.c    |  2 ++
 fs/dlm/requestqueue.c | 15 +++++++--------
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 44a5c67b52134..fd1c7a8c44855 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -626,6 +626,8 @@ struct dlm_ls {
 	struct rw_semaphore	ls_in_recovery;	/* block local requests */
 	struct rw_semaphore	ls_recv_active;	/* block dlm_recv */
 	struct list_head	ls_requestqueue;/* queue remote requests */
+	atomic_t		ls_requestqueue_cnt;
+	wait_queue_head_t	ls_requestqueue_wait;
 	struct mutex		ls_requestqueue_mutex;
 	struct dlm_rcom		*ls_recover_buf;
 	int			ls_recover_nodeid; /* for debugging */
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index b90566502a813..4e4181304ca16 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -564,6 +564,8 @@ static int new_lockspace(const char *name, const char *cluster,
 	init_rwsem(&ls->ls_in_recovery);
 	init_rwsem(&ls->ls_recv_active);
 	INIT_LIST_HEAD(&ls->ls_requestqueue);
+	atomic_set(&ls->ls_requestqueue_cnt, 0);
+	init_waitqueue_head(&ls->ls_requestqueue_wait);
 	mutex_init(&ls->ls_requestqueue_mutex);
 	mutex_init(&ls->ls_clear_proc_locks);
 
diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c
index e89e0ff8bfa3a..d0cf68570dcf6 100644
--- a/fs/dlm/requestqueue.c
+++ b/fs/dlm/requestqueue.c
@@ -44,6 +44,7 @@ void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms)
 	e->nodeid = nodeid;
 	memcpy(&e->request, ms, ms->m_header.h_length);
 
+	atomic_inc(&ls->ls_requestqueue_cnt);
 	mutex_lock(&ls->ls_requestqueue_mutex);
 	list_add_tail(&e->list, &ls->ls_requestqueue);
 	mutex_unlock(&ls->ls_requestqueue_mutex);
@@ -89,6 +90,8 @@ int dlm_process_requestqueue(struct dlm_ls *ls)
 
 		mutex_lock(&ls->ls_requestqueue_mutex);
 		list_del(&e->list);
+		if (atomic_dec_and_test(&ls->ls_requestqueue_cnt))
+			wake_up(&ls->ls_requestqueue_wait);
 		kfree(e);
 
 		if (dlm_locking_stopped(ls)) {
@@ -115,14 +118,8 @@ int dlm_process_requestqueue(struct dlm_ls *ls)
 
 void dlm_wait_requestqueue(struct dlm_ls *ls)
 {
-	for (;;) {
-		mutex_lock(&ls->ls_requestqueue_mutex);
-		if (list_empty(&ls->ls_requestqueue))
-			break;
-		mutex_unlock(&ls->ls_requestqueue_mutex);
-		schedule();
-	}
-	mutex_unlock(&ls->ls_requestqueue_mutex);
+	wait_event(ls->ls_requestqueue_wait,
+		   atomic_read(&ls->ls_requestqueue_cnt) == 0);
 }
 
 static int purge_request(struct dlm_ls *ls, struct dlm_message *ms, int nodeid)
@@ -161,6 +158,8 @@ void dlm_purge_requestqueue(struct dlm_ls *ls)
 
 		if (purge_request(ls, ms, e->nodeid)) {
 			list_del(&e->list);
+			if (atomic_dec_and_test(&ls->ls_requestqueue_cnt))
+				wake_up(&ls->ls_requestqueue_wait);
 			kfree(e);
 		}
 	}

From 3cb5977c5214c219b2859f926ed547480d53fdde Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 2 Nov 2021 15:17:18 -0400
Subject: [PATCH 12/32] fs: dlm: ls_count busy wait to event based wait

This patch changes the ls_count busy wait to use atomic counter values
and wait_event() to wait until ls_count reach zero. It will slightly
reduce the number of holding lslist_lock. At remove lockspace we need to
retry the wait because it a lockspace get could interefere between
wait_event() and holding the lock which deletes the lockspace list entry.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/dlm_internal.h |  3 ++-
 fs/dlm/lockspace.c    | 33 +++++++++++++++++----------------
 fs/dlm/requestqueue.c |  2 +-
 3 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index fd1c7a8c44855..019931804af9b 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -548,8 +548,9 @@ struct dlm_ls {
 	uint32_t		ls_generation;
 	uint32_t		ls_exflags;
 	int			ls_lvblen;
-	int			ls_count;	/* refcount of processes in
+	atomic_t		ls_count;	/* refcount of processes in
 						   the dlm using this ls */
+	wait_queue_head_t	ls_count_wait;
 	int			ls_create_count; /* create/release refcount */
 	unsigned long		ls_flags;	/* LSFL_ */
 	unsigned long		ls_scan_time;
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 4e4181304ca16..2e51bd2bdacce 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -314,7 +314,7 @@ struct dlm_ls *dlm_find_lockspace_global(uint32_t id)
 
 	list_for_each_entry(ls, &lslist, ls_list) {
 		if (ls->ls_global_id == id) {
-			ls->ls_count++;
+			atomic_inc(&ls->ls_count);
 			goto out;
 		}
 	}
@@ -331,7 +331,7 @@ struct dlm_ls *dlm_find_lockspace_local(dlm_lockspace_t *lockspace)
 	spin_lock(&lslist_lock);
 	list_for_each_entry(ls, &lslist, ls_list) {
 		if (ls->ls_local_handle == lockspace) {
-			ls->ls_count++;
+			atomic_inc(&ls->ls_count);
 			goto out;
 		}
 	}
@@ -348,7 +348,7 @@ struct dlm_ls *dlm_find_lockspace_device(int minor)
 	spin_lock(&lslist_lock);
 	list_for_each_entry(ls, &lslist, ls_list) {
 		if (ls->ls_device.minor == minor) {
-			ls->ls_count++;
+			atomic_inc(&ls->ls_count);
 			goto out;
 		}
 	}
@@ -360,24 +360,24 @@ struct dlm_ls *dlm_find_lockspace_device(int minor)
 
 void dlm_put_lockspace(struct dlm_ls *ls)
 {
-	spin_lock(&lslist_lock);
-	ls->ls_count--;
-	spin_unlock(&lslist_lock);
+	if (atomic_dec_and_test(&ls->ls_count))
+		wake_up(&ls->ls_count_wait);
 }
 
 static void remove_lockspace(struct dlm_ls *ls)
 {
-	for (;;) {
-		spin_lock(&lslist_lock);
-		if (ls->ls_count == 0) {
-			WARN_ON(ls->ls_create_count != 0);
-			list_del(&ls->ls_list);
-			spin_unlock(&lslist_lock);
-			return;
-		}
+retry:
+	wait_event(ls->ls_count_wait, atomic_read(&ls->ls_count) == 0);
+
+	spin_lock(&lslist_lock);
+	if (atomic_read(&ls->ls_count) != 0) {
 		spin_unlock(&lslist_lock);
-		ssleep(1);
+		goto retry;
 	}
+
+	WARN_ON(ls->ls_create_count != 0);
+	list_del(&ls->ls_list);
+	spin_unlock(&lslist_lock);
 }
 
 static int threads_start(void)
@@ -481,7 +481,8 @@ static int new_lockspace(const char *name, const char *cluster,
 	memcpy(ls->ls_name, name, namelen);
 	ls->ls_namelen = namelen;
 	ls->ls_lvblen = lvblen;
-	ls->ls_count = 0;
+	atomic_set(&ls->ls_count, 0);
+	init_waitqueue_head(&ls->ls_count_wait);
 	ls->ls_flags = 0;
 	ls->ls_scan_time = jiffies;
 
diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c
index d0cf68570dcf6..ccb5307c21e90 100644
--- a/fs/dlm/requestqueue.c
+++ b/fs/dlm/requestqueue.c
@@ -127,7 +127,7 @@ static int purge_request(struct dlm_ls *ls, struct dlm_message *ms, int nodeid)
 	uint32_t type = ms->m_type;
 
 	/* the ls is being cleaned up and freed by release_lockspace */
-	if (!ls->ls_count)
+	if (!atomic_read(&ls->ls_count))
 		return 1;
 
 	if (dlm_is_removed(ls, nodeid))

From 5c16febbc19bb463bfb8e80cb5b24ec6ff1a439f Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 2 Nov 2021 15:17:19 -0400
Subject: [PATCH 13/32] fs: dlm: let handle callback data as void

This patch changes the dlm_lowcomms_new_msg() function pointer private data
from "struct mhandle *" to "void *" to provide different structures than
just "struct mhandle".

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c | 19 +++++++++----------
 fs/dlm/lowcomms.h |  4 ++--
 fs/dlm/midcomms.c |  4 +++-
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 6d6dcf0d5ba97..3f8b015ba7990 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1205,8 +1205,7 @@ static struct writequeue_entry *new_writequeue_entry(struct connection *con,
 
 static struct writequeue_entry *new_wq_entry(struct connection *con, int len,
 					     gfp_t allocation, char **ppc,
-					     void (*cb)(struct dlm_mhandle *mh),
-					     struct dlm_mhandle *mh)
+					     void (*cb)(void *data), void *data)
 {
 	struct writequeue_entry *e;
 
@@ -1218,7 +1217,7 @@ static struct writequeue_entry *new_wq_entry(struct connection *con, int len,
 
 			*ppc = page_address(e->page) + e->end;
 			if (cb)
-				cb(mh);
+				cb(data);
 
 			e->end += len;
 			e->users++;
@@ -1240,7 +1239,7 @@ static struct writequeue_entry *new_wq_entry(struct connection *con, int len,
 
 	spin_lock(&con->writequeue_lock);
 	if (cb)
-		cb(mh);
+		cb(data);
 
 	list_add_tail(&e->list, &con->writequeue);
 	spin_unlock(&con->writequeue_lock);
@@ -1250,8 +1249,8 @@ static struct writequeue_entry *new_wq_entry(struct connection *con, int len,
 
 static struct dlm_msg *dlm_lowcomms_new_msg_con(struct connection *con, int len,
 						gfp_t allocation, char **ppc,
-						void (*cb)(struct dlm_mhandle *mh),
-						struct dlm_mhandle *mh)
+						void (*cb)(void *data),
+						void *data)
 {
 	struct writequeue_entry *e;
 	struct dlm_msg *msg;
@@ -1274,7 +1273,7 @@ static struct dlm_msg *dlm_lowcomms_new_msg_con(struct connection *con, int len,
 
 	kref_init(&msg->ref);
 
-	e = new_wq_entry(con, len, allocation, ppc, cb, mh);
+	e = new_wq_entry(con, len, allocation, ppc, cb, data);
 	if (!e) {
 		if (sleepable)
 			mutex_unlock(&con->wq_alloc);
@@ -1294,8 +1293,8 @@ static struct dlm_msg *dlm_lowcomms_new_msg_con(struct connection *con, int len,
 }
 
 struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation,
-				     char **ppc, void (*cb)(struct dlm_mhandle *mh),
-				     struct dlm_mhandle *mh)
+				     char **ppc, void (*cb)(void *data),
+				     void *data)
 {
 	struct connection *con;
 	struct dlm_msg *msg;
@@ -1316,7 +1315,7 @@ struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation,
 		return NULL;
 	}
 
-	msg = dlm_lowcomms_new_msg_con(con, len, allocation, ppc, cb, mh);
+	msg = dlm_lowcomms_new_msg_con(con, len, allocation, ppc, cb, data);
 	if (!msg) {
 		srcu_read_unlock(&connections_srcu, idx);
 		return NULL;
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h
index 4ccae07cf0058..8108ea24ec301 100644
--- a/fs/dlm/lowcomms.h
+++ b/fs/dlm/lowcomms.h
@@ -38,8 +38,8 @@ void dlm_lowcomms_stop(void);
 void dlm_lowcomms_exit(void);
 int dlm_lowcomms_close(int nodeid);
 struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation,
-				     char **ppc, void (*cb)(struct dlm_mhandle *mh),
-				     struct dlm_mhandle *mh);
+				     char **ppc, void (*cb)(void *data),
+				     void *data);
 void dlm_lowcomms_commit_msg(struct dlm_msg *msg);
 void dlm_lowcomms_put_msg(struct dlm_msg *msg);
 int dlm_lowcomms_resend_msg(struct dlm_msg *msg);
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index 76bdc3a9dc61d..95a5643a950e3 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -1020,8 +1020,10 @@ static void dlm_fill_opts_header(struct dlm_opts *opts, uint16_t inner_len,
 	header_out(&opts->o_header);
 }
 
-static void midcomms_new_msg_cb(struct dlm_mhandle *mh)
+static void midcomms_new_msg_cb(void *data)
 {
+	struct dlm_mhandle *mh = data;
+
 	atomic_inc(&mh->node->send_queue_cnt);
 
 	spin_lock(&mh->node->send_queue_lock);

From 9af5b8f0ead7cd90161b0555ed8e85ee38f79fa5 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 2 Nov 2021 15:17:20 -0400
Subject: [PATCH 14/32] fs: dlm: add debugfs rawmsg send functionality

This patch adds a dlm functionality to send a raw dlm message to a
specific cluster node. This raw message can be build by user space and
send out by writing the message to "rawmsg" dlm debugfs file.

There is a in progress scapy dlm module which provides a easy build of
DLM messages in user space. For example:

DLM(h_cmd=3, o_nextcmd=1, h_nodeid=1, h_lockspace=0xe4f48a18, ...)

The goal is to provide an easy reproducable state to crash DLM or to
fuzz the DLM kernel stack if there are possible ways to crash it.

Note: that if the sequence number is zero and dlm version is not set to
3.1 the kernel will automatic will set a right sequence number, otherwise
DLM stack testing is not possible.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/debug_fs.c | 37 ++++++++++++++++++++++++++++++++++++
 fs/dlm/midcomms.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/dlm/midcomms.h |  2 ++
 3 files changed, 87 insertions(+)

diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 47e9d57e4cae3..555904eeea8ea 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -768,6 +768,42 @@ static int dlm_version_show(struct seq_file *file, void *offset)
 }
 DEFINE_SHOW_ATTRIBUTE(dlm_version);
 
+static ssize_t dlm_rawmsg_write(struct file *fp, const char __user *user_buf,
+				size_t count, loff_t *ppos)
+{
+	void *buf;
+	int ret;
+
+	if (count > PAGE_SIZE || count < sizeof(struct dlm_header))
+		return -EINVAL;
+
+	buf = kmalloc(PAGE_SIZE, GFP_NOFS);
+	if (!buf)
+		return -ENOMEM;
+
+	if (copy_from_user(buf, user_buf, count)) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	ret = dlm_midcomms_rawmsg_send(fp->private_data, buf, count);
+	if (ret)
+		goto out;
+
+	kfree(buf);
+	return count;
+
+out:
+	kfree(buf);
+	return ret;
+}
+
+static const struct file_operations dlm_rawmsg_fops = {
+	.open	= simple_open,
+	.write	= dlm_rawmsg_write,
+	.llseek	= no_llseek,
+};
+
 void *dlm_create_debug_comms_file(int nodeid, void *data)
 {
 	struct dentry *d_node;
@@ -782,6 +818,7 @@ void *dlm_create_debug_comms_file(int nodeid, void *data)
 	debugfs_create_file("send_queue_count", 0444, d_node, data,
 			    &dlm_send_queue_cnt_fops);
 	debugfs_create_file("version", 0444, d_node, data, &dlm_version_fops);
+	debugfs_create_file("rawmsg", 0200, d_node, data, &dlm_rawmsg_fops);
 
 	return d_node;
 }
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index 95a5643a950e3..0b9bce6f04e14 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -1427,3 +1427,51 @@ int dlm_midcomms_close(int nodeid)
 
 	return ret;
 }
+
+/* debug functionality to send raw dlm msg from user space */
+struct dlm_rawmsg_data {
+	struct midcomms_node *node;
+	void *buf;
+};
+
+static void midcomms_new_rawmsg_cb(void *data)
+{
+	struct dlm_rawmsg_data *rd = data;
+	struct dlm_header *h = rd->buf;
+
+	switch (h->h_version) {
+	case cpu_to_le32(DLM_VERSION_3_1):
+		break;
+	default:
+		switch (h->h_cmd) {
+		case DLM_OPTS:
+			if (!h->u.h_seq)
+				h->u.h_seq = rd->node->seq_send++;
+			break;
+		default:
+			break;
+		}
+		break;
+	}
+}
+
+int dlm_midcomms_rawmsg_send(struct midcomms_node *node, void *buf,
+			     int buflen)
+{
+	struct dlm_rawmsg_data rd;
+	struct dlm_msg *msg;
+	char *msgbuf;
+
+	rd.node = node;
+	rd.buf = buf;
+
+	msg = dlm_lowcomms_new_msg(node->nodeid, buflen, GFP_NOFS,
+				   &msgbuf, midcomms_new_rawmsg_cb, &rd);
+	if (!msg)
+		return -ENOMEM;
+
+	memcpy(msgbuf, buf, buflen);
+	dlm_lowcomms_commit_msg(msg);
+	return 0;
+}
+
diff --git a/fs/dlm/midcomms.h b/fs/dlm/midcomms.h
index 579abc6929be2..bc63cf73aa872 100644
--- a/fs/dlm/midcomms.h
+++ b/fs/dlm/midcomms.h
@@ -28,6 +28,8 @@ const char *dlm_midcomms_state(struct midcomms_node *node);
 unsigned long dlm_midcomms_flags(struct midcomms_node *node);
 int dlm_midcomms_send_queue_cnt(struct midcomms_node *node);
 uint32_t dlm_midcomms_version(struct midcomms_node *node);
+int dlm_midcomms_rawmsg_send(struct midcomms_node *node, void *buf,
+			     int buflen);
 
 #endif				/* __MIDCOMMS_DOT_H__ */
 

From 75d25ffe380a01b88cb3bf604a6b8dc5a562a2e5 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 2 Nov 2021 15:17:21 -0400
Subject: [PATCH 15/32] fs: dlm: allow create lkb with specific id range

This patch adds functionality to add a lkb with a specific id range.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lock.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index feb2e94f5879e..8b30c9d9e545d 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -1180,7 +1180,8 @@ static void detach_lkb(struct dlm_lkb *lkb)
 	}
 }
 
-static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
+static int _create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret,
+		       int start, int end)
 {
 	struct dlm_lkb *lkb;
 	int rv;
@@ -1201,7 +1202,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
 
 	idr_preload(GFP_NOFS);
 	spin_lock(&ls->ls_lkbidr_spin);
-	rv = idr_alloc(&ls->ls_lkbidr, lkb, 1, 0, GFP_NOWAIT);
+	rv = idr_alloc(&ls->ls_lkbidr, lkb, start, end, GFP_NOWAIT);
 	if (rv >= 0)
 		lkb->lkb_id = rv;
 	spin_unlock(&ls->ls_lkbidr_spin);
@@ -1217,6 +1218,11 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
 	return 0;
 }
 
+static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
+{
+	return _create_lkb(ls, lkb_ret, 1, 0);
+}
+
 static int find_lkb(struct dlm_ls *ls, uint32_t lkid, struct dlm_lkb **lkb_ret)
 {
 	struct dlm_lkb *lkb;

From 5054e79de99984b4f39a073534526bc7c827b1e0 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 2 Nov 2021 15:17:22 -0400
Subject: [PATCH 16/32] fs: dlm: add lkb debugfs functionality

This patch adds functionality to add an lkb during runtime. This is a
highly debugging feature only, wrong input can crash the kernel. It is a
early state feature as well. The goal is to provide a user interface for
manipulate dlm state and combine it with the rawmsg feature. It is
debugfs functionality, we don't care about UAPI breakage. Even it's
possible to add lkb's/rsb's which could never be exists in such wat by
using normal DLM operation. The user of this interface always need to
think before using this feature, not every crash which happens can really
occur during normal dlm operation.

Future there should be more functionality to add a more realistic lkb
which reflects normal DLM state inside the kernel. For now this is
enough.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/debug_fs.c | 32 +++++++++++++++++++++++++++++++-
 fs/dlm/lock.c     | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/dlm/lock.h     |  2 ++
 3 files changed, 79 insertions(+), 1 deletion(-)

diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 555904eeea8ea..2ead4751d6556 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -635,6 +635,35 @@ static int table_open2(struct inode *inode, struct file *file)
 	return 0;
 }
 
+static ssize_t table_write2(struct file *file, const char __user *user_buf,
+			    size_t count, loff_t *ppos)
+{
+	struct seq_file *seq = file->private_data;
+	int n, len, lkb_nodeid, lkb_status, error;
+	char name[DLM_RESNAME_MAXLEN] = {};
+	struct dlm_ls *ls = seq->private;
+	unsigned int lkb_flags;
+	char buf[256] = {};
+	uint32_t lkb_id;
+
+	if (copy_from_user(buf, user_buf,
+			   min_t(size_t, sizeof(buf) - 1, count)))
+		return -EFAULT;
+
+	n = sscanf(buf, "%x %" __stringify(DLM_RESNAME_MAXLEN) "s %x %d %d",
+		   &lkb_id, name, &lkb_flags, &lkb_nodeid, &lkb_status);
+	if (n != 5)
+		return -EINVAL;
+
+	len = strnlen(name, DLM_RESNAME_MAXLEN);
+	error = dlm_debug_add_lkb(ls, lkb_id, name, len, lkb_flags,
+				  lkb_nodeid, lkb_status);
+	if (error)
+		return error;
+
+	return count;
+}
+
 static int table_open3(struct inode *inode, struct file *file)
 {
 	struct seq_file *seq;
@@ -675,6 +704,7 @@ static const struct file_operations format2_fops = {
 	.owner   = THIS_MODULE,
 	.open    = table_open2,
 	.read    = seq_read,
+	.write   = table_write2,
 	.llseek  = seq_lseek,
 	.release = seq_release
 };
@@ -846,7 +876,7 @@ void dlm_create_debug_file(struct dlm_ls *ls)
 	snprintf(name, DLM_LOCKSPACE_LEN + 8, "%s_locks", ls->ls_name);
 
 	ls->ls_debug_locks_dentry = debugfs_create_file(name,
-							S_IFREG | S_IRUGO,
+							0644,
 							dlm_root,
 							ls,
 							&format2_fops);
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 8b30c9d9e545d..aeb793693d8c5 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -6317,3 +6317,49 @@ int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
 	return error;
 }
 
+/* debug functionality */
+int dlm_debug_add_lkb(struct dlm_ls *ls, uint32_t lkb_id, char *name, int len,
+		      int lkb_nodeid, unsigned int lkb_flags, int lkb_status)
+{
+	struct dlm_lksb *lksb;
+	struct dlm_lkb *lkb;
+	struct dlm_rsb *r;
+	int error;
+
+	/* we currently can't set a valid user lock */
+	if (lkb_flags & DLM_IFL_USER)
+		return -EOPNOTSUPP;
+
+	lksb = kzalloc(sizeof(*lksb), GFP_NOFS);
+	if (!lksb)
+		return -ENOMEM;
+
+	error = _create_lkb(ls, &lkb, lkb_id, lkb_id + 1);
+	if (error) {
+		kfree(lksb);
+		return error;
+	}
+
+	lkb->lkb_flags = lkb_flags;
+	lkb->lkb_nodeid = lkb_nodeid;
+	lkb->lkb_lksb = lksb;
+	/* user specific pointer, just don't have it NULL for kernel locks */
+	if (~lkb_flags & DLM_IFL_USER)
+		lkb->lkb_astparam = (void *)0xDEADBEEF;
+
+	error = find_rsb(ls, name, len, 0, R_REQUEST, &r);
+	if (error) {
+		kfree(lksb);
+		__put_lkb(ls, lkb);
+		return error;
+	}
+
+	lock_rsb(r);
+	attach_lkb(r, lkb);
+	add_lkb(r, lkb, lkb_status);
+	unlock_rsb(r);
+	put_rsb(r);
+
+	return 0;
+}
+
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 456c6ec3ef6f4..863a66e128a22 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -58,6 +58,8 @@ int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
 	int nodeid, int pid);
 int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid);
 void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc);
+int dlm_debug_add_lkb(struct dlm_ls *ls, uint32_t lkb_id, char *name, int len,
+		      int lkb_nodeid, unsigned int lkb_flags, int lkb_status);
 
 static inline int is_master(struct dlm_rsb *r)
 {

From 63eab2b00bcff620682e8570367458c9619a9970 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 2 Nov 2021 15:17:23 -0400
Subject: [PATCH 17/32] fs: dlm: add lkb waiters debugfs functionality

This patch adds functionality to put a lkb to the waiters state. It can
be useful to combine this feature with the "rawmsg" debugfs
functionality. It will bring the DLM lkb into a state that a message
will be parsed by the kernel.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/debug_fs.c | 27 ++++++++++++++++++++++++++-
 fs/dlm/lock.c     | 15 +++++++++++++++
 fs/dlm/lock.h     |  2 ++
 3 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 2ead4751d6556..df6f3f107be4f 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -754,10 +754,35 @@ static ssize_t waiters_read(struct file *file, char __user *userbuf,
 	return rv;
 }
 
+static ssize_t waiters_write(struct file *file, const char __user *user_buf,
+			     size_t count, loff_t *ppos)
+{
+	struct dlm_ls *ls = file->private_data;
+	int mstype, to_nodeid;
+	char buf[128] = {};
+	uint32_t lkb_id;
+	int n, error;
+
+	if (copy_from_user(buf, user_buf,
+			   min_t(size_t, sizeof(buf) - 1, count)))
+		return -EFAULT;
+
+	n = sscanf(buf, "%x %d %d", &lkb_id, &mstype, &to_nodeid);
+	if (n != 3)
+		return -EINVAL;
+
+	error = dlm_debug_add_lkb_to_waiters(ls, lkb_id, mstype, to_nodeid);
+	if (error)
+		return error;
+
+	return count;
+}
+
 static const struct file_operations waiters_fops = {
 	.owner   = THIS_MODULE,
 	.open    = simple_open,
 	.read    = waiters_read,
+	.write   = waiters_write,
 	.llseek  = default_llseek,
 };
 
@@ -907,7 +932,7 @@ void dlm_create_debug_file(struct dlm_ls *ls)
 	snprintf(name, DLM_LOCKSPACE_LEN + 8, "%s_waiters", ls->ls_name);
 
 	ls->ls_debug_waiters_dentry = debugfs_create_file(name,
-							  S_IFREG | S_IRUGO,
+							  0644,
 							  dlm_root,
 							  ls,
 							  &waiters_fops);
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index aeb793693d8c5..0dbe273566c0b 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -6363,3 +6363,18 @@ int dlm_debug_add_lkb(struct dlm_ls *ls, uint32_t lkb_id, char *name, int len,
 	return 0;
 }
 
+int dlm_debug_add_lkb_to_waiters(struct dlm_ls *ls, uint32_t lkb_id,
+				 int mstype, int to_nodeid)
+{
+	struct dlm_lkb *lkb;
+	int error;
+
+	error = find_lkb(ls, lkb_id, &lkb);
+	if (error)
+		return error;
+
+	error = add_to_waiters(lkb, mstype, to_nodeid);
+	dlm_put_lkb(lkb);
+	return error;
+}
+
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 863a66e128a22..252a5898f9081 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -60,6 +60,8 @@ int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid);
 void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc);
 int dlm_debug_add_lkb(struct dlm_ls *ls, uint32_t lkb_id, char *name, int len,
 		      int lkb_nodeid, unsigned int lkb_flags, int lkb_status);
+int dlm_debug_add_lkb_to_waiters(struct dlm_ls *ls, uint32_t lkb_id,
+				 int mstype, int to_nodeid);
 
 static inline int is_master(struct dlm_rsb *r)
 {

From 6c2e3bf68f3e5e5a647aa52be246d5f552d7496d Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 2 Nov 2021 15:17:24 -0400
Subject: [PATCH 18/32] fs: dlm: filter user dlm messages for kernel locks

This patch fixes the following crash by receiving a invalid message:

[  160.672220] ==================================================================
[  160.676206] BUG: KASAN: user-memory-access in dlm_user_add_ast+0xc3/0x370
[  160.679659] Read of size 8 at addr 00000000deadbeef by task kworker/u32:13/319
[  160.681447]
[  160.681824] CPU: 10 PID: 319 Comm: kworker/u32:13 Not tainted 5.14.0-rc2+ #399
[  160.683472] Hardware name: Red Hat KVM/RHEL-AV, BIOS 1.14.0-1.module+el8.6.0+12648+6ede71a5 04/01/2014
[  160.685574] Workqueue: dlm_recv process_recv_sockets
[  160.686721] Call Trace:
[  160.687310]  dump_stack_lvl+0x56/0x6f
[  160.688169]  ? dlm_user_add_ast+0xc3/0x370
[  160.689116]  kasan_report.cold.14+0x116/0x11b
[  160.690138]  ? dlm_user_add_ast+0xc3/0x370
[  160.690832]  dlm_user_add_ast+0xc3/0x370
[  160.691502]  _receive_unlock_reply+0x103/0x170
[  160.692241]  _receive_message+0x11df/0x1ec0
[  160.692926]  ? rcu_read_lock_sched_held+0xa1/0xd0
[  160.693700]  ? rcu_read_lock_bh_held+0xb0/0xb0
[  160.694427]  ? lock_acquire+0x175/0x400
[  160.695058]  ? do_purge.isra.51+0x200/0x200
[  160.695744]  ? lock_acquired+0x360/0x5d0
[  160.696400]  ? lock_contended+0x6a0/0x6a0
[  160.697055]  ? lock_release+0x21d/0x5e0
[  160.697686]  ? lock_is_held_type+0xe0/0x110
[  160.698352]  ? lock_is_held_type+0xe0/0x110
[  160.699026]  ? ___might_sleep+0x1cc/0x1e0
[  160.699698]  ? dlm_wait_requestqueue+0x94/0x140
[  160.700451]  ? dlm_process_requestqueue+0x240/0x240
[  160.701249]  ? down_write_killable+0x2b0/0x2b0
[  160.701988]  ? do_raw_spin_unlock+0xa2/0x130
[  160.702690]  dlm_receive_buffer+0x1a5/0x210
[  160.703385]  dlm_process_incoming_buffer+0x726/0x9f0
[  160.704210]  receive_from_sock+0x1c0/0x3b0
[  160.704886]  ? dlm_tcp_shutdown+0x30/0x30
[  160.705561]  ? lock_acquire+0x175/0x400
[  160.706197]  ? rcu_read_lock_sched_held+0xa1/0xd0
[  160.706941]  ? rcu_read_lock_bh_held+0xb0/0xb0
[  160.707681]  process_recv_sockets+0x32/0x40
[  160.708366]  process_one_work+0x55e/0xad0
[  160.709045]  ? pwq_dec_nr_in_flight+0x110/0x110
[  160.709820]  worker_thread+0x65/0x5e0
[  160.710423]  ? process_one_work+0xad0/0xad0
[  160.711087]  kthread+0x1ed/0x220
[  160.711628]  ? set_kthread_struct+0x80/0x80
[  160.712314]  ret_from_fork+0x22/0x30

The issue is that we received a DLM message for a user lock but the
destination lock is a kernel lock. Note that the address which is trying
to derefence is 00000000deadbeef, which is in a kernel lock
lkb->lkb_astparam, this field should never be derefenced by the DLM
kernel stack. In case of a user lock lkb->lkb_astparam is lkb->lkb_ua
(memory is shared by a union field). The struct lkb_ua will be handled
by the DLM kernel stack but on a kernel lock it will contain invalid
data and ends in most likely crashing the kernel.

It can be reproduced with two cluster nodes.

node 2:
dlm_tool join test
echo "862 fooobaar 1 2 1" > /sys/kernel/debug/dlm/test_locks
echo "862 3 1" > /sys/kernel/debug/dlm/test_waiters

node 1:
dlm_tool join test

python:
foo = DLM(h_cmd=3, o_nextcmd=1, h_nodeid=1, h_lockspace=0x77222027, \
          m_type=7, m_flags=0x1, m_remid=0x862, m_result=0xFFFEFFFE)
newFile = open("/sys/kernel/debug/dlm/comms/2/rawmsg", "wb")
newFile.write(bytes(foo))

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lock.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 0dbe273566c0b..54705d367076b 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -3989,6 +3989,14 @@ static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms)
 	int from = ms->m_header.h_nodeid;
 	int error = 0;
 
+	/* currently mixing of user/kernel locks are not supported */
+	if (ms->m_flags & DLM_IFL_USER && ~lkb->lkb_flags & DLM_IFL_USER) {
+		log_error(lkb->lkb_resource->res_ls,
+			  "got user dlm message for a kernel lock");
+		error = -EINVAL;
+		goto out;
+	}
+
 	switch (ms->m_type) {
 	case DLM_MSG_CONVERT:
 	case DLM_MSG_UNLOCK:
@@ -4017,6 +4025,7 @@ static int validate_message(struct dlm_lkb *lkb, struct dlm_message *ms)
 		error = -EINVAL;
 	}
 
+out:
 	if (error)
 		log_error(lkb->lkb_resource->res_ls,
 			  "ignore invalid message %d from %d %x %x %x %d",

From b87b1883efe385e56384ff48e6f3108a33fde508 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Wed, 3 Nov 2021 17:04:18 -0400
Subject: [PATCH 19/32] fs: dlm: remove double list_first_entry call

This patch removes a list_first_entry() call which is already done by
the previous con_next_wq() call.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 3f8b015ba7990..2f070514b3eed 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1405,7 +1405,6 @@ static void send_to_sock(struct connection *con)
 		if (!e)
 			break;
 
-		e = list_first_entry(&con->writequeue, struct writequeue_entry, list);
 		len = e->len;
 		offset = e->offset;
 		BUG_ON(len == 0 && e->users == 0);

From c8b9f34e223fcad1e9980f343587f38624331bbc Mon Sep 17 00:00:00 2001
From: Zhang Mingyu <zhang.mingyu@zte.com.cn>
Date: Fri, 5 Nov 2021 01:43:20 +0000
Subject: [PATCH 20/32] fs: dlm:Remove unneeded semicolon

Eliminate the following coccinelle check warning:
fs/dlm/midcomms.c:972:2-3

Reported-by: Zeal Robot <zealci@zte.com.cn>
Signed-off-by: Zhang Mingyu <zhang.mingyu@zte.com.cn>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/midcomms.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index 0b9bce6f04e14..74b4308b912cf 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -969,7 +969,7 @@ void dlm_midcomms_receive_done(int nodeid)
 		spin_unlock(&node->state_lock);
 		/* do nothing FIN has it's own ack send */
 		break;
-	};
+	}
 	srcu_read_unlock(&nodes_srcu, idx);
 }
 

From 6a628fa43810f861da50c593c69f2ead1c829231 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Fri, 12 Nov 2021 10:08:01 -0500
Subject: [PATCH 21/32] fs: dlm: fix potential buffer overflow

This patch fixes an potential overflow in sscanf and the maximum
declared string parsing length which seems to be excluding the null
termination symbol. This patch will just add one byte to be prepared on
a string with length of DLM_RESNAME_MAXLEN including the null
termination symbol.

Fixes: 5054e79de999 ("fs: dlm: add lkb debugfs functionality")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/debug_fs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index df6f3f107be4f..8fb04ebbafb5d 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -640,7 +640,7 @@ static ssize_t table_write2(struct file *file, const char __user *user_buf,
 {
 	struct seq_file *seq = file->private_data;
 	int n, len, lkb_nodeid, lkb_status, error;
-	char name[DLM_RESNAME_MAXLEN] = {};
+	char name[DLM_RESNAME_MAXLEN + 1] = {};
 	struct dlm_ls *ls = seq->private;
 	unsigned int lkb_flags;
 	char buf[256] = {};

From 4c3d90570bcc2b338f70f61f01110268e281ca3c Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Mon, 15 Nov 2021 08:57:05 -0500
Subject: [PATCH 22/32] fs: dlm: don't call kernel_getpeername() in
 error_report()

In some cases kernel_getpeername() will held the socket lock which is
already held when the socket layer calls error_report() callback. Since
commit 9dfc685e0262 ("inet: remove races in inet{6}_getname()") this
problem becomes more likely because the socket lock will be held always.
You will see something like:

bob9-u5 login: [  562.316860] BUG: spinlock recursion on CPU#7, swapper/7/0
[  562.318562]  lock: 0xffff8f2284720088, .magic: dead4ead, .owner: swapper/7/0, .owner_cpu: 7
[  562.319522] CPU: 7 PID: 0 Comm: swapper/7 Not tainted 5.15.0+ #135
[  562.320346] Hardware name: Red Hat KVM/RHEL-AV, BIOS 1.13.0-2.module+el8.3.0+7353+9de0a3cc 04/01/2014
[  562.321277] Call Trace:
[  562.321529]  <IRQ>
[  562.321734]  dump_stack_lvl+0x33/0x42
[  562.322282]  do_raw_spin_lock+0x8b/0xc0
[  562.322674]  lock_sock_nested+0x1e/0x50
[  562.323057]  inet_getname+0x39/0x110
[  562.323425]  ? sock_def_readable+0x80/0x80
[  562.323838]  lowcomms_error_report+0x63/0x260 [dlm]
[  562.324338]  ? wait_for_completion_interruptible_timeout+0xd2/0x120
[  562.324949]  ? lock_timer_base+0x67/0x80
[  562.325330]  ? do_raw_spin_unlock+0x49/0xc0
[  562.325735]  ? _raw_spin_unlock_irqrestore+0x1e/0x40
[  562.326218]  ? del_timer+0x54/0x80
[  562.326549]  sk_error_report+0x12/0x70
[  562.326919]  tcp_validate_incoming+0x3c8/0x530
[  562.327347]  ? kvm_clock_read+0x14/0x30
[  562.327718]  ? ktime_get+0x3b/0xa0
[  562.328055]  tcp_rcv_established+0x121/0x660
[  562.328466]  tcp_v4_do_rcv+0x132/0x260
[  562.328835]  tcp_v4_rcv+0xcea/0xe20
[  562.329173]  ip_protocol_deliver_rcu+0x35/0x1f0
[  562.329615]  ip_local_deliver_finish+0x54/0x60
[  562.330050]  ip_local_deliver+0xf7/0x110
[  562.330431]  ? inet_rtm_getroute+0x211/0x840
[  562.330848]  ? ip_protocol_deliver_rcu+0x1f0/0x1f0
[  562.331310]  ip_rcv+0xe1/0xf0
[  562.331603]  ? ip_local_deliver+0x110/0x110
[  562.332011]  __netif_receive_skb_core+0x46a/0x1040
[  562.332476]  ? inet_gro_receive+0x263/0x2e0
[  562.332885]  __netif_receive_skb_list_core+0x13b/0x2c0
[  562.333383]  netif_receive_skb_list_internal+0x1c8/0x2f0
[  562.333896]  ? update_load_avg+0x7e/0x5e0
[  562.334285]  gro_normal_list.part.149+0x19/0x40
[  562.334722]  napi_complete_done+0x67/0x160
[  562.335134]  virtnet_poll+0x2ad/0x408 [virtio_net]
[  562.335644]  __napi_poll+0x28/0x140
[  562.336012]  net_rx_action+0x23d/0x300
[  562.336414]  __do_softirq+0xf2/0x2ea
[  562.336803]  irq_exit_rcu+0xc1/0xf0
[  562.337173]  common_interrupt+0xb9/0xd0

It is and was always forbidden to call kernel_getpeername() in context
of error_report(). To get rid of the problem we access the destination
address for the peer over the socket structure. While on it we fix to
print out the destination port of the inet socket.

Fixes: 1a31833d085a ("DLM: Replace nodeid_to_addr with kernel_getpeername")
Reported-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c | 42 ++++++++++++++++++++----------------------
 1 file changed, 20 insertions(+), 22 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 2f070514b3eed..c7750849c4954 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -594,8 +594,8 @@ int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark)
 static void lowcomms_error_report(struct sock *sk)
 {
 	struct connection *con;
-	struct sockaddr_storage saddr;
 	void (*orig_report)(struct sock *) = NULL;
+	struct inet_sock *inet;
 
 	read_lock_bh(&sk->sk_callback_lock);
 	con = sock2con(sk);
@@ -603,33 +603,31 @@ static void lowcomms_error_report(struct sock *sk)
 		goto out;
 
 	orig_report = listen_sock.sk_error_report;
-	if (kernel_getpeername(sk->sk_socket, (struct sockaddr *)&saddr) < 0) {
-		printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
-				   "sending to node %d, port %d, "
-				   "sk_err=%d/%d\n", dlm_our_nodeid(),
-				   con->nodeid, dlm_config.ci_tcp_port,
-				   sk->sk_err, sk->sk_err_soft);
-	} else if (saddr.ss_family == AF_INET) {
-		struct sockaddr_in *sin4 = (struct sockaddr_in *)&saddr;
 
+	inet = inet_sk(sk);
+	switch (sk->sk_family) {
+	case AF_INET:
 		printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
-				   "sending to node %d at %pI4, port %d, "
+				   "sending to node %d at %pI4, dport %d, "
 				   "sk_err=%d/%d\n", dlm_our_nodeid(),
-				   con->nodeid, &sin4->sin_addr.s_addr,
-				   dlm_config.ci_tcp_port, sk->sk_err,
+				   con->nodeid, &inet->inet_daddr,
+				   ntohs(inet->inet_dport), sk->sk_err,
 				   sk->sk_err_soft);
-	} else {
-		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&saddr;
-
+		break;
+	case AF_INET6:
 		printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
-				   "sending to node %d at %u.%u.%u.%u, "
-				   "port %d, sk_err=%d/%d\n", dlm_our_nodeid(),
-				   con->nodeid, sin6->sin6_addr.s6_addr32[0],
-				   sin6->sin6_addr.s6_addr32[1],
-				   sin6->sin6_addr.s6_addr32[2],
-				   sin6->sin6_addr.s6_addr32[3],
-				   dlm_config.ci_tcp_port, sk->sk_err,
+				   "sending to node %d at %pI6c, "
+				   "dport %d, sk_err=%d/%d\n", dlm_our_nodeid(),
+				   con->nodeid, &sk->sk_v6_daddr,
+				   ntohs(inet->inet_dport), sk->sk_err,
 				   sk->sk_err_soft);
+		break;
+	default:
+		printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
+				   "invalid socket family %d set, "
+				   "sk_err=%d/%d\n", dlm_our_nodeid(),
+				   sk->sk_family, sk->sk_err, sk->sk_err_soft);
+		goto out;
 	}
 
 	/* below sendcon only handling */

From 92c44605381418b01af44c63fd27185cac368866 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Mon, 15 Nov 2021 08:57:06 -0500
Subject: [PATCH 23/32] fs: dlm: replace use of socket sk_callback_lock with
 sock_lock

This patch will replace the use of socket sk_callback_lock lock and uses
socket lock instead. Some users like sunrpc, see commit ea9afca88bbe
("SUNRPC: Replace use of socket sk_callback_lock with sock_lock") moving
from sk_callback_lock to sock_lock which seems to be held when the socket
callbacks are called.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c | 27 ++++++++++-----------------
 1 file changed, 10 insertions(+), 17 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index c7750849c4954..2034701890111 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -488,11 +488,9 @@ static void lowcomms_data_ready(struct sock *sk)
 {
 	struct connection *con;
 
-	read_lock_bh(&sk->sk_callback_lock);
 	con = sock2con(sk);
 	if (con && !test_and_set_bit(CF_READ_PENDING, &con->flags))
 		queue_work(recv_workqueue, &con->rwork);
-	read_unlock_bh(&sk->sk_callback_lock);
 }
 
 static void lowcomms_listen_data_ready(struct sock *sk)
@@ -507,15 +505,14 @@ static void lowcomms_write_space(struct sock *sk)
 {
 	struct connection *con;
 
-	read_lock_bh(&sk->sk_callback_lock);
 	con = sock2con(sk);
 	if (!con)
-		goto out;
+		return;
 
 	if (!test_and_set_bit(CF_CONNECTED, &con->flags)) {
 		log_print("successful connected to node %d", con->nodeid);
 		queue_work(send_workqueue, &con->swork);
-		goto out;
+		return;
 	}
 
 	clear_bit(SOCK_NOSPACE, &con->sock->flags);
@@ -526,8 +523,6 @@ static void lowcomms_write_space(struct sock *sk)
 	}
 
 	queue_work(send_workqueue, &con->swork);
-out:
-	read_unlock_bh(&sk->sk_callback_lock);
 }
 
 static inline void lowcomms_connect_sock(struct connection *con)
@@ -597,7 +592,6 @@ static void lowcomms_error_report(struct sock *sk)
 	void (*orig_report)(struct sock *) = NULL;
 	struct inet_sock *inet;
 
-	read_lock_bh(&sk->sk_callback_lock);
 	con = sock2con(sk);
 	if (con == NULL)
 		goto out;
@@ -646,7 +640,6 @@ static void lowcomms_error_report(struct sock *sk)
 		queue_work(send_workqueue, &con->swork);
 
 out:
-	read_unlock_bh(&sk->sk_callback_lock);
 	if (orig_report)
 		orig_report(sk);
 }
@@ -666,20 +659,20 @@ static void restore_callbacks(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
 
-	write_lock_bh(&sk->sk_callback_lock);
+	lock_sock(sk);
 	sk->sk_user_data = NULL;
 	sk->sk_data_ready = listen_sock.sk_data_ready;
 	sk->sk_state_change = listen_sock.sk_state_change;
 	sk->sk_write_space = listen_sock.sk_write_space;
 	sk->sk_error_report = listen_sock.sk_error_report;
-	write_unlock_bh(&sk->sk_callback_lock);
+	release_sock(sk);
 }
 
 static void add_listen_sock(struct socket *sock, struct listen_connection *con)
 {
 	struct sock *sk = sock->sk;
 
-	write_lock_bh(&sk->sk_callback_lock);
+	lock_sock(sk);
 	save_listen_callbacks(sock);
 	con->sock = sock;
 
@@ -687,7 +680,7 @@ static void add_listen_sock(struct socket *sock, struct listen_connection *con)
 	sk->sk_allocation = GFP_NOFS;
 	/* Install a data_ready callback */
 	sk->sk_data_ready = lowcomms_listen_data_ready;
-	write_unlock_bh(&sk->sk_callback_lock);
+	release_sock(sk);
 }
 
 /* Make a socket active */
@@ -695,7 +688,7 @@ static void add_sock(struct socket *sock, struct connection *con)
 {
 	struct sock *sk = sock->sk;
 
-	write_lock_bh(&sk->sk_callback_lock);
+	lock_sock(sk);
 	con->sock = sock;
 
 	sk->sk_user_data = con;
@@ -705,7 +698,7 @@ static void add_sock(struct socket *sock, struct connection *con)
 	sk->sk_state_change = lowcomms_state_change;
 	sk->sk_allocation = GFP_NOFS;
 	sk->sk_error_report = lowcomms_error_report;
-	write_unlock_bh(&sk->sk_callback_lock);
+	release_sock(sk);
 }
 
 /* Add the port number to an IPv6 or 4 sockaddr and return the address
@@ -1680,9 +1673,9 @@ static void _stop_conn(struct connection *con, bool and_other)
 	set_bit(CF_READ_PENDING, &con->flags);
 	set_bit(CF_WRITE_PENDING, &con->flags);
 	if (con->sock && con->sock->sk) {
-		write_lock_bh(&con->sock->sk->sk_callback_lock);
+		lock_sock(con->sock->sk);
 		con->sock->sk->sk_user_data = NULL;
-		write_unlock_bh(&con->sock->sk->sk_callback_lock);
+		release_sock(con->sock->sk);
 	}
 	if (con->othercon && and_other)
 		_stop_conn(con->othercon, false);

From 1b9beda83e27a0c2cd75d1cb743c297c7b36c844 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Wed, 17 Nov 2021 09:20:43 -0500
Subject: [PATCH 24/32] fs: dlm: fix build with CONFIG_IPV6 disabled

This patch will surround the AF_INET6 case in sk_error_report() of dlm
with a #if IS_ENABLED(CONFIG_IPV6). The field sk->sk_v6_daddr is not
defined when CONFIG_IPV6 is disabled. If CONFIG_IPV6 is disabled, the
socket creation with AF_INET6 should already fail because a runtime
check if AF_INET6 is registered. However if there is the possibility
that AF_INET6 is set as sk_family the sk_error_report() callback will
print then an invalid family type error.

Reported-by: kernel test robot <lkp@intel.com>
Fixes: 4c3d90570bcc ("fs: dlm: don't call kernel_getpeername() in error_report()")
Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 2034701890111..f7fc1ac76ce83 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -608,6 +608,7 @@ static void lowcomms_error_report(struct sock *sk)
 				   ntohs(inet->inet_dport), sk->sk_err,
 				   sk->sk_err_soft);
 		break;
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
 				   "sending to node %d at %pI6c, "
@@ -616,6 +617,7 @@ static void lowcomms_error_report(struct sock *sk)
 				   ntohs(inet->inet_dport), sk->sk_err,
 				   sk->sk_err_soft);
 		break;
+#endif
 	default:
 		printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
 				   "invalid socket family %d set, "

From f70813d6a5fce7bde411272cfe1ab565a4254266 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 30 Nov 2021 14:47:14 -0500
Subject: [PATCH 25/32] fs: dlm: use list_empty() to check last iteration

This patch will use list_empty(&ls->ls_cb_delay) to check for last list
iteration. In case of a multiply count of MAX_CB_QUEUE and the list is
empty we do a extra goto more which we can avoid by checking on
list_empty().

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/ast.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index 27bae7d4a477a..bfac462dd3e8f 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -300,6 +300,7 @@ void dlm_callback_resume(struct dlm_ls *ls)
 {
 	struct dlm_lkb *lkb, *safe;
 	int count = 0, sum = 0;
+	bool empty;
 
 	clear_bit(LSFL_CB_DELAY, &ls->ls_flags);
 
@@ -315,10 +316,11 @@ void dlm_callback_resume(struct dlm_ls *ls)
 		if (count == MAX_CB_QUEUE)
 			break;
 	}
+	empty = list_empty(&ls->ls_cb_delay);
 	mutex_unlock(&ls->ls_cb_mutex);
 
 	sum += count;
-	if (count == MAX_CB_QUEUE) {
+	if (!empty) {
 		count = 0;
 		cond_resched();
 		goto more;

From bcbfea41e1f9d516faed1faf0f2d390c000bf0d9 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 30 Nov 2021 14:47:15 -0500
Subject: [PATCH 26/32] fs: dlm: check for pending users filling buffers

Currently we don't care if the DLM application stack is filling buffers
(not committed yet) while we transmit some already committed buffers.
By checking on active writequeue users before dequeue a writequeue entry
we know there is coming more data and do nothing. We wait until the send
worker will be triggered again if the writequeue entry users hit zero.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index f7fc1ac76ce83..6d500ebc61453 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -201,7 +201,10 @@ static struct writequeue_entry *con_next_wq(struct connection *con)
 
 	e = list_first_entry(&con->writequeue, struct writequeue_entry,
 			     list);
-	if (e->len == 0)
+	/* if len is zero nothing is to send, if there are users filling
+	 * buffers we wait until the users are done so we can send more.
+	 */
+	if (e->users || e->len == 0)
 		return NULL;
 
 	return e;

From 21d9ac1a5376d949199398848006f6b14649f533 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 30 Nov 2021 14:47:16 -0500
Subject: [PATCH 27/32] fs: dlm: use event based wait for pending remove

This patch will use an event based waitqueue to wait for a possible clash
with the ls_remove_name field of dlm_ls instead of doing busy waiting.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/dlm_internal.h |  1 +
 fs/dlm/lock.c         | 19 ++++++++++++-------
 fs/dlm/lockspace.c    |  1 +
 3 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 019931804af9b..74a9590a4dd5b 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -576,6 +576,7 @@ struct dlm_ls {
 	struct list_head	ls_new_rsb;	/* new rsb structs */
 
 	spinlock_t		ls_remove_spin;
+	wait_queue_head_t	ls_remove_wait;
 	char			ls_remove_name[DLM_RESNAME_MAXLEN+1];
 	char			*ls_remove_names[DLM_REMOVE_NAMES_MAX];
 	int			ls_remove_len;
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index 54705d367076b..bdb51d209ba25 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -1626,21 +1626,24 @@ static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms)
 }
 
 /* If there's an rsb for the same resource being removed, ensure
-   that the remove message is sent before the new lookup message.
-   It should be rare to need a delay here, but if not, then it may
-   be worthwhile to add a proper wait mechanism rather than a delay. */
+ * that the remove message is sent before the new lookup message.
+ */
+
+#define DLM_WAIT_PENDING_COND(ls, r)		\
+	(ls->ls_remove_len &&			\
+	 !rsb_cmp(r, ls->ls_remove_name,	\
+		  ls->ls_remove_len))
 
 static void wait_pending_remove(struct dlm_rsb *r)
 {
 	struct dlm_ls *ls = r->res_ls;
  restart:
 	spin_lock(&ls->ls_remove_spin);
-	if (ls->ls_remove_len &&
-	    !rsb_cmp(r, ls->ls_remove_name, ls->ls_remove_len)) {
+	if (DLM_WAIT_PENDING_COND(ls, r)) {
 		log_debug(ls, "delay lookup for remove dir %d %s",
-		  	  r->res_dir_nodeid, r->res_name);
+			  r->res_dir_nodeid, r->res_name);
 		spin_unlock(&ls->ls_remove_spin);
-		msleep(1);
+		wait_event(ls->ls_remove_wait, !DLM_WAIT_PENDING_COND(ls, r));
 		goto restart;
 	}
 	spin_unlock(&ls->ls_remove_spin);
@@ -1792,6 +1795,7 @@ static void shrink_bucket(struct dlm_ls *ls, int b)
 		memcpy(ls->ls_remove_name, name, DLM_RESNAME_MAXLEN);
 		spin_unlock(&ls->ls_remove_spin);
 		spin_unlock(&ls->ls_rsbtbl[b].lock);
+		wake_up(&ls->ls_remove_wait);
 
 		send_remove(r);
 
@@ -4075,6 +4079,7 @@ static void send_repeat_remove(struct dlm_ls *ls, char *ms_name, int len)
 	memcpy(ls->ls_remove_name, name, DLM_RESNAME_MAXLEN);
 	spin_unlock(&ls->ls_remove_spin);
 	spin_unlock(&ls->ls_rsbtbl[b].lock);
+	wake_up(&ls->ls_remove_wait);
 
 	rv = _create_message(ls, sizeof(struct dlm_message) + len,
 			     dir_nodeid, DLM_MSG_REMOVE, &ms, &mh);
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 2e51bd2bdacce..31384e7d6f90a 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -512,6 +512,7 @@ static int new_lockspace(const char *name, const char *cluster,
 	}
 
 	spin_lock_init(&ls->ls_remove_spin);
+	init_waitqueue_head(&ls->ls_remove_wait);
 
 	for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++) {
 		ls->ls_remove_names[i] = kzalloc(DLM_RESNAME_MAXLEN+1,

From be3b0400edbf68556cd390125e2c868988616391 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 30 Nov 2021 14:47:17 -0500
Subject: [PATCH 28/32] fs: dlm: remove wq_alloc mutex

This patch cleanups the code for allocating a new buffer in the dlm
writequeue mechanism. There was a possible tuneup to allow scheduling
while a new writequeue entry needs to be allocated because either no
sending page is available or are full. To avoid multiple concurrent
users checking at the same time if an entry is available or full
alloc_wq was introduce that those are waiting if there is currently a
new writequeue entry in process to be queued so possible further users
will check on the new allocated writequeue entry if it's full.

To simplify the code we just remove this mutex and switch that the
already introduced spin lock will be held during writequeue check,
allocation and queueing. So other users can never check on available
writequeues while there is a new one in process but not queued yet.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c | 48 +++++++++++------------------------------------
 1 file changed, 11 insertions(+), 37 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 6d500ebc61453..4919faf797097 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -86,7 +86,6 @@ struct connection {
 	struct list_head writequeue;  /* List of outgoing writequeue_entries */
 	spinlock_t writequeue_lock;
 	atomic_t writequeue_cnt;
-	struct mutex wq_alloc;
 	int retries;
 #define MAX_CONNECT_RETRIES 3
 	struct hlist_node list;
@@ -270,8 +269,6 @@ static struct connection *nodeid2con(int nodeid, gfp_t alloc)
 		return NULL;
 	}
 
-	mutex_init(&con->wq_alloc);
-
 	spin_lock(&connections_lock);
 	/* Because multiple workqueues/threads calls this function it can
 	 * race on multiple cpu's. Instead of locking hot path __find_con()
@@ -1176,16 +1173,15 @@ static void deinit_local(void)
 		kfree(dlm_local_addr[i]);
 }
 
-static struct writequeue_entry *new_writequeue_entry(struct connection *con,
-						     gfp_t allocation)
+static struct writequeue_entry *new_writequeue_entry(struct connection *con)
 {
 	struct writequeue_entry *entry;
 
-	entry = kzalloc(sizeof(*entry), allocation);
+	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
 	if (!entry)
 		return NULL;
 
-	entry->page = alloc_page(allocation | __GFP_ZERO);
+	entry->page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
 	if (!entry->page) {
 		kfree(entry);
 		return NULL;
@@ -1200,8 +1196,8 @@ static struct writequeue_entry *new_writequeue_entry(struct connection *con,
 }
 
 static struct writequeue_entry *new_wq_entry(struct connection *con, int len,
-					     gfp_t allocation, char **ppc,
-					     void (*cb)(void *data), void *data)
+					     char **ppc, void (*cb)(void *data),
+					     void *data)
 {
 	struct writequeue_entry *e;
 
@@ -1217,29 +1213,25 @@ static struct writequeue_entry *new_wq_entry(struct connection *con, int len,
 
 			e->end += len;
 			e->users++;
-			spin_unlock(&con->writequeue_lock);
-
-			return e;
+			goto out;
 		}
 	}
-	spin_unlock(&con->writequeue_lock);
 
-	e = new_writequeue_entry(con, allocation);
+	e = new_writequeue_entry(con);
 	if (!e)
-		return NULL;
+		goto out;
 
 	kref_get(&e->ref);
 	*ppc = page_address(e->page);
 	e->end += len;
 	atomic_inc(&con->writequeue_cnt);
-
-	spin_lock(&con->writequeue_lock);
 	if (cb)
 		cb(data);
 
 	list_add_tail(&e->list, &con->writequeue);
-	spin_unlock(&con->writequeue_lock);
 
+out:
+	spin_unlock(&con->writequeue_lock);
 	return e;
 };
 
@@ -1250,37 +1242,19 @@ static struct dlm_msg *dlm_lowcomms_new_msg_con(struct connection *con, int len,
 {
 	struct writequeue_entry *e;
 	struct dlm_msg *msg;
-	bool sleepable;
 
 	msg = kzalloc(sizeof(*msg), allocation);
 	if (!msg)
 		return NULL;
 
-	/* this mutex is being used as a wait to avoid multiple "fast"
-	 * new writequeue page list entry allocs in new_wq_entry in
-	 * normal operation which is sleepable context. Without it
-	 * we could end in multiple writequeue entries with one
-	 * dlm message because multiple callers were waiting at
-	 * the writequeue_lock in new_wq_entry().
-	 */
-	sleepable = gfpflags_normal_context(allocation);
-	if (sleepable)
-		mutex_lock(&con->wq_alloc);
-
 	kref_init(&msg->ref);
 
-	e = new_wq_entry(con, len, allocation, ppc, cb, data);
+	e = new_wq_entry(con, len, ppc, cb, data);
 	if (!e) {
-		if (sleepable)
-			mutex_unlock(&con->wq_alloc);
-
 		kfree(msg);
 		return NULL;
 	}
 
-	if (sleepable)
-		mutex_unlock(&con->wq_alloc);
-
 	msg->ppc = *ppc;
 	msg->len = len;
 	msg->entry = e;

From 6c547f264077ffeb56390f42ed2a07749dd619b2 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 30 Nov 2021 14:47:18 -0500
Subject: [PATCH 29/32] fs: dlm: memory cache for midcomms hotpath

This patch will introduce a kmem cache for allocating message handles
which are needed for midcomms layer to take track of lowcomms messages.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/memory.c   | 31 ++++++++++++++++++++++++++-----
 fs/dlm/memory.h   |  2 ++
 fs/dlm/midcomms.c | 21 +++++++++++++++------
 fs/dlm/midcomms.h |  1 +
 4 files changed, 44 insertions(+), 11 deletions(-)

diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c
index 5918f4d395869..8996c6453ad5c 100644
--- a/fs/dlm/memory.c
+++ b/fs/dlm/memory.c
@@ -10,32 +10,44 @@
 ******************************************************************************/
 
 #include "dlm_internal.h"
+#include "midcomms.h"
 #include "config.h"
 #include "memory.h"
 
+static struct kmem_cache *mhandle_cache;
 static struct kmem_cache *lkb_cache;
 static struct kmem_cache *rsb_cache;
 
 
 int __init dlm_memory_init(void)
 {
+	mhandle_cache = dlm_midcomms_cache_create();
+	if (!mhandle_cache)
+		goto out;
+
 	lkb_cache = kmem_cache_create("dlm_lkb", sizeof(struct dlm_lkb),
 				__alignof__(struct dlm_lkb), 0, NULL);
 	if (!lkb_cache)
-		return -ENOMEM;
+		goto lkb;
 
 	rsb_cache = kmem_cache_create("dlm_rsb", sizeof(struct dlm_rsb),
 				__alignof__(struct dlm_rsb), 0, NULL);
-	if (!rsb_cache) {
-		kmem_cache_destroy(lkb_cache);
-		return -ENOMEM;
-	}
+	if (!rsb_cache)
+		goto rsb;
 
 	return 0;
+
+rsb:
+	kmem_cache_destroy(lkb_cache);
+lkb:
+	kmem_cache_destroy(mhandle_cache);
+out:
+	return -ENOMEM;
 }
 
 void dlm_memory_exit(void)
 {
+	kmem_cache_destroy(mhandle_cache);
 	kmem_cache_destroy(lkb_cache);
 	kmem_cache_destroy(rsb_cache);
 }
@@ -89,3 +101,12 @@ void dlm_free_lkb(struct dlm_lkb *lkb)
 	kmem_cache_free(lkb_cache, lkb);
 }
 
+struct dlm_mhandle *dlm_allocate_mhandle(void)
+{
+	return kmem_cache_alloc(mhandle_cache, GFP_NOFS);
+}
+
+void dlm_free_mhandle(struct dlm_mhandle *mhandle)
+{
+	kmem_cache_free(mhandle_cache, mhandle);
+}
diff --git a/fs/dlm/memory.h b/fs/dlm/memory.h
index 4f218ea4b187d..c4d46be778a29 100644
--- a/fs/dlm/memory.h
+++ b/fs/dlm/memory.h
@@ -20,6 +20,8 @@ struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls);
 void dlm_free_lkb(struct dlm_lkb *l);
 char *dlm_allocate_lvb(struct dlm_ls *ls);
 void dlm_free_lvb(char *l);
+struct dlm_mhandle *dlm_allocate_mhandle(void);
+void dlm_free_mhandle(struct dlm_mhandle *mhandle);
 
 #endif		/* __MEMORY_DOT_H__ */
 
diff --git a/fs/dlm/midcomms.c b/fs/dlm/midcomms.c
index 74b4308b912cf..3635e42b06696 100644
--- a/fs/dlm/midcomms.c
+++ b/fs/dlm/midcomms.c
@@ -137,6 +137,7 @@
 #include "dlm_internal.h"
 #include "lowcomms.h"
 #include "config.h"
+#include "memory.h"
 #include "lock.h"
 #include "util.h"
 #include "midcomms.h"
@@ -220,6 +221,12 @@ DEFINE_STATIC_SRCU(nodes_srcu);
  */
 static DEFINE_MUTEX(close_lock);
 
+struct kmem_cache *dlm_midcomms_cache_create(void)
+{
+	return kmem_cache_create("dlm_mhandle", sizeof(struct dlm_mhandle),
+				 0, 0, NULL);
+}
+
 static inline const char *dlm_state_str(int state)
 {
 	switch (state) {
@@ -279,7 +286,7 @@ static void dlm_mhandle_release(struct rcu_head *rcu)
 	struct dlm_mhandle *mh = container_of(rcu, struct dlm_mhandle, rcu);
 
 	dlm_lowcomms_put_msg(mh->msg);
-	kfree(mh);
+	dlm_free_mhandle(mh);
 }
 
 static void dlm_mhandle_delete(struct midcomms_node *node,
@@ -1073,10 +1080,12 @@ struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len,
 	/* this is a bug, however we going on and hope it will be resolved */
 	WARN_ON(test_bit(DLM_NODE_FLAG_STOP_TX, &node->flags));
 
-	mh = kzalloc(sizeof(*mh), GFP_NOFS);
+	mh = dlm_allocate_mhandle();
 	if (!mh)
 		goto err;
 
+	mh->committed = false;
+	mh->ack_rcv = NULL;
 	mh->idx = idx;
 	mh->node = node;
 
@@ -1085,7 +1094,7 @@ struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len,
 		msg = dlm_lowcomms_new_msg(nodeid, len, allocation, ppc,
 					   NULL, NULL);
 		if (!msg) {
-			kfree(mh);
+			dlm_free_mhandle(mh);
 			goto err;
 		}
 
@@ -1094,13 +1103,13 @@ struct dlm_mhandle *dlm_midcomms_get_mhandle(int nodeid, int len,
 		msg = dlm_midcomms_get_msg_3_2(mh, nodeid, len, allocation,
 					       ppc);
 		if (!msg) {
-			kfree(mh);
+			dlm_free_mhandle(mh);
 			goto err;
 		}
 
 		break;
 	default:
-		kfree(mh);
+		dlm_free_mhandle(mh);
 		WARN_ON(1);
 		goto err;
 	}
@@ -1136,7 +1145,7 @@ void dlm_midcomms_commit_mhandle(struct dlm_mhandle *mh)
 		dlm_lowcomms_commit_msg(mh->msg);
 		dlm_lowcomms_put_msg(mh->msg);
 		/* mh is not part of rcu list in this case */
-		kfree(mh);
+		dlm_free_mhandle(mh);
 		break;
 	case DLM_VERSION_3_2:
 		dlm_midcomms_commit_msg_3_2(mh);
diff --git a/fs/dlm/midcomms.h b/fs/dlm/midcomms.h
index bc63cf73aa872..82bcd96619228 100644
--- a/fs/dlm/midcomms.h
+++ b/fs/dlm/midcomms.h
@@ -30,6 +30,7 @@ int dlm_midcomms_send_queue_cnt(struct midcomms_node *node);
 uint32_t dlm_midcomms_version(struct midcomms_node *node);
 int dlm_midcomms_rawmsg_send(struct midcomms_node *node, void *buf,
 			     int buflen);
+struct kmem_cache *dlm_midcomms_cache_create(void);
 
 #endif				/* __MIDCOMMS_DOT_H__ */
 

From 3af2326ca0a13cf84aeb75e001e757ff3cefeae9 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 30 Nov 2021 14:47:19 -0500
Subject: [PATCH 30/32] fs: dlm: memory cache for writequeue_entry

This patch introduces a kmem cache for writequeue entry. A writequeue
entry get quite a lot allocated if dlm transmit messages.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c | 26 +++++++++++++++++++++-----
 fs/dlm/lowcomms.h |  1 +
 fs/dlm/memory.c   | 21 ++++++++++++++++++++-
 fs/dlm/memory.h   |  2 ++
 4 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 4919faf797097..300f44c5d1326 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -58,6 +58,7 @@
 #include "dlm_internal.h"
 #include "lowcomms.h"
 #include "midcomms.h"
+#include "memory.h"
 #include "config.h"
 
 #define NEEDED_RMEM (4*1024*1024)
@@ -190,6 +191,19 @@ static const struct dlm_proto_ops *dlm_proto_ops;
 static void process_recv_sockets(struct work_struct *work);
 static void process_send_sockets(struct work_struct *work);
 
+static void writequeue_entry_ctor(void *data)
+{
+	struct writequeue_entry *entry = data;
+
+	INIT_LIST_HEAD(&entry->msgs);
+}
+
+struct kmem_cache *dlm_lowcomms_writequeue_cache_create(void)
+{
+	return kmem_cache_create("dlm_writequeue", sizeof(struct writequeue_entry),
+				 0, 0, writequeue_entry_ctor);
+}
+
 /* need to held writequeue_lock */
 static struct writequeue_entry *con_next_wq(struct connection *con)
 {
@@ -728,7 +742,7 @@ static void dlm_page_release(struct kref *kref)
 						  ref);
 
 	__free_page(e->page);
-	kfree(e);
+	dlm_free_writequeue(e);
 }
 
 static void dlm_msg_release(struct kref *kref)
@@ -1177,21 +1191,23 @@ static struct writequeue_entry *new_writequeue_entry(struct connection *con)
 {
 	struct writequeue_entry *entry;
 
-	entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+	entry = dlm_allocate_writequeue();
 	if (!entry)
 		return NULL;
 
 	entry->page = alloc_page(GFP_ATOMIC | __GFP_ZERO);
 	if (!entry->page) {
-		kfree(entry);
+		dlm_free_writequeue(entry);
 		return NULL;
 	}
 
+	entry->offset = 0;
+	entry->len = 0;
+	entry->end = 0;
+	entry->dirty = false;
 	entry->con = con;
 	entry->users = 1;
 	kref_init(&entry->ref);
-	INIT_LIST_HEAD(&entry->msgs);
-
 	return entry;
 }
 
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h
index 8108ea24ec301..6c8f4ce457f05 100644
--- a/fs/dlm/lowcomms.h
+++ b/fs/dlm/lowcomms.h
@@ -47,6 +47,7 @@ int dlm_lowcomms_connect_node(int nodeid);
 int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark);
 int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len);
 void dlm_midcomms_receive_done(int nodeid);
+struct kmem_cache *dlm_lowcomms_writequeue_cache_create(void);
 
 #endif				/* __LOWCOMMS_DOT_H__ */
 
diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c
index 8996c6453ad5c..94af986e83c6d 100644
--- a/fs/dlm/memory.c
+++ b/fs/dlm/memory.c
@@ -11,9 +11,11 @@
 
 #include "dlm_internal.h"
 #include "midcomms.h"
+#include "lowcomms.h"
 #include "config.h"
 #include "memory.h"
 
+static struct kmem_cache *writequeue_cache;
 static struct kmem_cache *mhandle_cache;
 static struct kmem_cache *lkb_cache;
 static struct kmem_cache *rsb_cache;
@@ -21,9 +23,13 @@ static struct kmem_cache *rsb_cache;
 
 int __init dlm_memory_init(void)
 {
+	writequeue_cache = dlm_lowcomms_writequeue_cache_create();
+	if (!writequeue_cache)
+		goto out;
+
 	mhandle_cache = dlm_midcomms_cache_create();
 	if (!mhandle_cache)
-		goto out;
+		goto mhandle;
 
 	lkb_cache = kmem_cache_create("dlm_lkb", sizeof(struct dlm_lkb),
 				__alignof__(struct dlm_lkb), 0, NULL);
@@ -41,12 +47,15 @@ int __init dlm_memory_init(void)
 	kmem_cache_destroy(lkb_cache);
 lkb:
 	kmem_cache_destroy(mhandle_cache);
+mhandle:
+	kmem_cache_destroy(writequeue_cache);
 out:
 	return -ENOMEM;
 }
 
 void dlm_memory_exit(void)
 {
+	kmem_cache_destroy(writequeue_cache);
 	kmem_cache_destroy(mhandle_cache);
 	kmem_cache_destroy(lkb_cache);
 	kmem_cache_destroy(rsb_cache);
@@ -110,3 +119,13 @@ void dlm_free_mhandle(struct dlm_mhandle *mhandle)
 {
 	kmem_cache_free(mhandle_cache, mhandle);
 }
+
+struct writequeue_entry *dlm_allocate_writequeue(void)
+{
+	return kmem_cache_alloc(writequeue_cache, GFP_ATOMIC);
+}
+
+void dlm_free_writequeue(struct writequeue_entry *writequeue)
+{
+	kmem_cache_free(writequeue_cache, writequeue);
+}
diff --git a/fs/dlm/memory.h b/fs/dlm/memory.h
index c4d46be778a29..854269eacd445 100644
--- a/fs/dlm/memory.h
+++ b/fs/dlm/memory.h
@@ -22,6 +22,8 @@ char *dlm_allocate_lvb(struct dlm_ls *ls);
 void dlm_free_lvb(char *l);
 struct dlm_mhandle *dlm_allocate_mhandle(void);
 void dlm_free_mhandle(struct dlm_mhandle *mhandle);
+struct writequeue_entry *dlm_allocate_writequeue(void);
+void dlm_free_writequeue(struct writequeue_entry *writequeue);
 
 #endif		/* __MEMORY_DOT_H__ */
 

From e4dc81ed5a8069b8ae56116058ebbad77ff559ec Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 30 Nov 2021 14:47:20 -0500
Subject: [PATCH 31/32] fs: dlm: memory cache for lowcomms hotpath

This patch introduces a kmem cache for dlm_msg handles which are used
always if dlm sends a message out. Even if their are covered by midcomms
layer or not.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c | 13 ++++++++++---
 fs/dlm/lowcomms.h |  1 +
 fs/dlm/memory.c   | 18 ++++++++++++++++++
 fs/dlm/memory.h   |  2 ++
 4 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 300f44c5d1326..23a1ff6907252 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -204,6 +204,11 @@ struct kmem_cache *dlm_lowcomms_writequeue_cache_create(void)
 				 0, 0, writequeue_entry_ctor);
 }
 
+struct kmem_cache *dlm_lowcomms_msg_cache_create(void)
+{
+	return kmem_cache_create("dlm_msg", sizeof(struct dlm_msg), 0, 0, NULL);
+}
+
 /* need to held writequeue_lock */
 static struct writequeue_entry *con_next_wq(struct connection *con)
 {
@@ -750,7 +755,7 @@ static void dlm_msg_release(struct kref *kref)
 	struct dlm_msg *msg = container_of(kref, struct dlm_msg, ref);
 
 	kref_put(&msg->entry->ref, dlm_page_release);
-	kfree(msg);
+	dlm_free_msg(msg);
 }
 
 static void free_entry(struct writequeue_entry *e)
@@ -1259,7 +1264,7 @@ static struct dlm_msg *dlm_lowcomms_new_msg_con(struct connection *con, int len,
 	struct writequeue_entry *e;
 	struct dlm_msg *msg;
 
-	msg = kzalloc(sizeof(*msg), allocation);
+	msg = dlm_allocate_msg(allocation);
 	if (!msg)
 		return NULL;
 
@@ -1267,10 +1272,12 @@ static struct dlm_msg *dlm_lowcomms_new_msg_con(struct connection *con, int len,
 
 	e = new_wq_entry(con, len, ppc, cb, data);
 	if (!e) {
-		kfree(msg);
+		dlm_free_msg(msg);
 		return NULL;
 	}
 
+	msg->retransmit = false;
+	msg->orig_msg = NULL;
 	msg->ppc = *ppc;
 	msg->len = len;
 	msg->entry = e;
diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h
index 6c8f4ce457f05..29369feea9916 100644
--- a/fs/dlm/lowcomms.h
+++ b/fs/dlm/lowcomms.h
@@ -48,6 +48,7 @@ int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark);
 int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len);
 void dlm_midcomms_receive_done(int nodeid);
 struct kmem_cache *dlm_lowcomms_writequeue_cache_create(void);
+struct kmem_cache *dlm_lowcomms_msg_cache_create(void);
 
 #endif				/* __LOWCOMMS_DOT_H__ */
 
diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c
index 94af986e83c6d..ce35c3c19aeb5 100644
--- a/fs/dlm/memory.c
+++ b/fs/dlm/memory.c
@@ -17,6 +17,7 @@
 
 static struct kmem_cache *writequeue_cache;
 static struct kmem_cache *mhandle_cache;
+static struct kmem_cache *msg_cache;
 static struct kmem_cache *lkb_cache;
 static struct kmem_cache *rsb_cache;
 
@@ -36,6 +37,10 @@ int __init dlm_memory_init(void)
 	if (!lkb_cache)
 		goto lkb;
 
+	msg_cache = dlm_lowcomms_msg_cache_create();
+	if (!msg_cache)
+		goto msg;
+
 	rsb_cache = kmem_cache_create("dlm_rsb", sizeof(struct dlm_rsb),
 				__alignof__(struct dlm_rsb), 0, NULL);
 	if (!rsb_cache)
@@ -44,6 +49,8 @@ int __init dlm_memory_init(void)
 	return 0;
 
 rsb:
+	kmem_cache_destroy(msg_cache);
+msg:
 	kmem_cache_destroy(lkb_cache);
 lkb:
 	kmem_cache_destroy(mhandle_cache);
@@ -57,6 +64,7 @@ void dlm_memory_exit(void)
 {
 	kmem_cache_destroy(writequeue_cache);
 	kmem_cache_destroy(mhandle_cache);
+	kmem_cache_destroy(msg_cache);
 	kmem_cache_destroy(lkb_cache);
 	kmem_cache_destroy(rsb_cache);
 }
@@ -129,3 +137,13 @@ void dlm_free_writequeue(struct writequeue_entry *writequeue)
 {
 	kmem_cache_free(writequeue_cache, writequeue);
 }
+
+struct dlm_msg *dlm_allocate_msg(gfp_t allocation)
+{
+	return kmem_cache_alloc(msg_cache, allocation);
+}
+
+void dlm_free_msg(struct dlm_msg *msg)
+{
+	kmem_cache_free(msg_cache, msg);
+}
diff --git a/fs/dlm/memory.h b/fs/dlm/memory.h
index 854269eacd445..7bd3f1a391ca7 100644
--- a/fs/dlm/memory.h
+++ b/fs/dlm/memory.h
@@ -24,6 +24,8 @@ struct dlm_mhandle *dlm_allocate_mhandle(void);
 void dlm_free_mhandle(struct dlm_mhandle *mhandle);
 struct writequeue_entry *dlm_allocate_writequeue(void);
 void dlm_free_writequeue(struct writequeue_entry *writequeue);
+struct dlm_msg *dlm_allocate_msg(gfp_t allocation);
+void dlm_free_msg(struct dlm_msg *msg);
 
 #endif		/* __MEMORY_DOT_H__ */
 

From feae43f8aa88309224b27bbe3a59fcb9aefab6f5 Mon Sep 17 00:00:00 2001
From: Alexander Aring <aahringo@redhat.com>
Date: Tue, 4 Jan 2022 09:09:47 -0500
Subject: [PATCH 32/32] fs: dlm: print cluster addr if non-cluster node
 connects

This patch prints the cluster node address if a non-cluster node
(according to the dlm config setting) tries to connect. The current
hexdump call will print in a different loglevel and only available if
dynamic debug is enabled. Additional we using the ip address format
strings to print an IETF ip4/6 string represenation.

Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
---
 fs/dlm/lowcomms.c | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 23a1ff6907252..e284d696c1fdc 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1028,10 +1028,28 @@ static int accept_from_sock(struct listen_connection *con)
 	/* Get the new node's NODEID */
 	make_sockaddr(&peeraddr, 0, &len);
 	if (addr_to_nodeid(&peeraddr, &nodeid, &mark)) {
-		unsigned char *b=(unsigned char *)&peeraddr;
-		log_print("connect from non cluster node");
-		print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, 
-				     b, sizeof(struct sockaddr_storage));
+		switch (peeraddr.ss_family) {
+		case AF_INET: {
+			struct sockaddr_in *sin = (struct sockaddr_in *)&peeraddr;
+
+			log_print("connect from non cluster IPv4 node %pI4",
+				  &sin->sin_addr);
+			break;
+		}
+#if IS_ENABLED(CONFIG_IPV6)
+		case AF_INET6: {
+			struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&peeraddr;
+
+			log_print("connect from non cluster IPv6 node %pI6c",
+				  &sin6->sin6_addr);
+			break;
+		}
+#endif
+		default:
+			log_print("invalid family from non cluster node");
+			break;
+		}
+
 		sock_release(newsock);
 		return -1;
 	}