From 5cbec208dc994de860ae72d3340bc54f14e71b39 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 2 Nov 2020 20:04:16 -0500 Subject: [PATCH 01/13] fs: dlm: fix proper srcu api call This patch will use call_srcu() instead of call_rcu() because the related datastructure resource are handled under srcu context. I assume the current code is fine anyway since free_conn() must be called when the related resource are not in use otherwise. However it will correct the overall handling in a srcu context. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 79f56f16bc2ce..77382c2ce6da2 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1616,10 +1616,11 @@ static void free_conn(struct connection *con) spin_unlock(&connections_lock); if (con->othercon) { clean_one_writequeue(con->othercon); - call_rcu(&con->othercon->rcu, connection_release); + call_srcu(&connections_srcu, &con->othercon->rcu, + connection_release); } clean_one_writequeue(con); - call_rcu(&con->rcu, connection_release); + call_srcu(&connections_srcu, &con->rcu, connection_release); } static void work_flush(void) From 9f8f9c774ad10aa1c15952c36f580d7e3711a100 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 2 Nov 2020 20:04:17 -0500 Subject: [PATCH 02/13] fs: dlm: define max send buffer This patch will set the maximum transmit buffer size for rcom messages with "names" to 4096 bytes. It's a leftover change of commit 4798cbbfbd00 ("fs: dlm: rework receive handling"). Fact is that we cannot allocate a contiguous transmit buffer length above of 4096 bytes. It seems at some places the upper layer protocol will calculate according to dlm_config.ci_buffer_size the possible payload of a dlm recovery message. As compiler setting we will use now the maximum possible message which dlm can send out. Commit 4e192ee68e5af ("fs: dlm: disallow buffer size below default") disallow a buffer setting smaller than the 4096 bytes and above 4096 bytes is definitely wrong because we will then write out of buffer space as we cannot allocate a contiguous buffer above 4096 bytes. The ci_buffer_size is still there to define the possible maximum receive buffer size of a recvmsg() which should be at least the maximum possible dlm message size. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/dlm/lockspace.c | 2 +- fs/dlm/lowcomms.h | 2 ++ fs/dlm/member.c | 2 +- fs/dlm/rcom.c | 6 +++--- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 624617c12250a..561dcad08ad6e 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c @@ -572,7 +572,7 @@ static int new_lockspace(const char *name, const char *cluster, mutex_init(&ls->ls_requestqueue_mutex); mutex_init(&ls->ls_clear_proc_locks); - ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_NOFS); + ls->ls_recover_buf = kmalloc(LOWCOMMS_MAX_TX_BUFFER_LEN, GFP_NOFS); if (!ls->ls_recover_buf) goto out_lkbidr; diff --git a/fs/dlm/lowcomms.h b/fs/dlm/lowcomms.h index 687b2894e469a..0918f9376489f 100644 --- a/fs/dlm/lowcomms.h +++ b/fs/dlm/lowcomms.h @@ -12,6 +12,8 @@ #ifndef __LOWCOMMS_DOT_H__ #define __LOWCOMMS_DOT_H__ +#define LOWCOMMS_MAX_TX_BUFFER_LEN 4096 + int dlm_lowcomms_start(void); void dlm_lowcomms_stop(void); void dlm_lowcomms_exit(void); diff --git a/fs/dlm/member.c b/fs/dlm/member.c index 7ad83deb45053..ceef3f2074ffd 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c @@ -270,7 +270,7 @@ int dlm_slots_assign(struct dlm_ls *ls, int *num_slots, int *slots_size, log_slots(ls, gen, num, NULL, array, array_size); - max_slots = (dlm_config.ci_buffer_size - sizeof(struct dlm_rcom) - + max_slots = (LOWCOMMS_MAX_TX_BUFFER_LEN - sizeof(struct dlm_rcom) - sizeof(struct rcom_config)) / sizeof(struct rcom_slot); if (num > max_slots) { diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index 4daf5dc2b51c0..73ddee5159d79 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c @@ -162,7 +162,7 @@ int dlm_rcom_status(struct dlm_ls *ls, int nodeid, uint32_t status_flags) set_rcom_status(ls, (struct rcom_status *)rc->rc_buf, status_flags); allow_sync_reply(ls, &rc->rc_id); - memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size); + memset(ls->ls_recover_buf, 0, LOWCOMMS_MAX_TX_BUFFER_LEN); send_rcom(ls, mh, rc); @@ -284,7 +284,7 @@ int dlm_rcom_names(struct dlm_ls *ls, int nodeid, char *last_name, int last_len) memcpy(rc->rc_buf, last_name, last_len); allow_sync_reply(ls, &rc->rc_id); - memset(ls->ls_recover_buf, 0, dlm_config.ci_buffer_size); + memset(ls->ls_recover_buf, 0, LOWCOMMS_MAX_TX_BUFFER_LEN); send_rcom(ls, mh, rc); @@ -304,7 +304,7 @@ static void receive_rcom_names(struct dlm_ls *ls, struct dlm_rcom *rc_in) nodeid = rc_in->rc_header.h_nodeid; inlen = rc_in->rc_header.h_length - sizeof(struct dlm_rcom); - outlen = dlm_config.ci_buffer_size - sizeof(struct dlm_rcom); + outlen = LOWCOMMS_MAX_TX_BUFFER_LEN - sizeof(struct dlm_rcom); error = create_rcom(ls, nodeid, DLM_RCOM_NAMES_REPLY, outlen, &rc, &mh); if (error) From 692f51c8cbe752cb16ea2a75016ea0a497d00b1c Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 2 Nov 2020 20:04:18 -0500 Subject: [PATCH 03/13] fs: dlm: add get buffer error handling This patch adds an error handling to the get buffer functionality if the user is requesting a buffer length which is more than possible of the internal buffer allocator. This should never happen because specific handling decided by compile time, but will warn if somebody forget about to handle this limitation right. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 77382c2ce6da2..620eca3979d54 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1352,6 +1352,12 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc) struct writequeue_entry *e; int offset = 0; + if (len > LOWCOMMS_MAX_TX_BUFFER_LEN) { + BUILD_BUG_ON(PAGE_SIZE < LOWCOMMS_MAX_TX_BUFFER_LEN); + log_print("failed to allocate a buffer of size %d", len); + return NULL; + } + con = nodeid2con(nodeid, allocation); if (!con) return NULL; From 53a5edaa05c1073e47668f167ec9788383c780e1 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 2 Nov 2020 20:04:19 -0500 Subject: [PATCH 04/13] fs: dlm: flush othercon at close This patch ensures we also flush the othercon writequeue when a lowcomms close occurs. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 620eca3979d54..c0c688aac223f 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1512,6 +1512,8 @@ int dlm_lowcomms_close(int nodeid) set_bit(CF_CLOSE, &con->flags); close_connection(con, true, true, true); clean_one_writequeue(con); + if (con->othercon) + clean_one_writequeue(con->othercon); } spin_lock(&dlm_node_addrs_spin); From 19633c7e204b99fe9b952c70b712778b24a8d137 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 2 Nov 2020 20:04:20 -0500 Subject: [PATCH 05/13] fs: dlm: handle non blocked connect event The manpage of connect shows that in non blocked mode a writeability indicates successful connection event. This patch is handling this event inside the writeability callback. In case of SCTP we use blocking connect functionality which indicates a successful connect when the function returns with a successful return value. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index c0c688aac223f..193a4c91c4d8c 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -78,6 +78,7 @@ struct connection { #define CF_APP_LIMITED 7 #define CF_CLOSING 8 #define CF_SHUTDOWN 9 +#define CF_CONNECTED 10 struct list_head writequeue; /* List of outgoing writequeue_entries */ spinlock_t writequeue_lock; int (*rx_action) (struct connection *); /* What to do when active */ @@ -419,6 +420,12 @@ static void lowcomms_write_space(struct sock *sk) if (!con) goto out; + if (!test_and_set_bit(CF_CONNECTED, &con->flags)) { + log_print("successful connected to node %d", con->nodeid); + queue_work(send_workqueue, &con->swork); + goto out; + } + clear_bit(SOCK_NOSPACE, &con->sock->flags); if (test_and_clear_bit(CF_APP_LIMITED, &con->flags)) { @@ -604,6 +611,7 @@ static void close_connection(struct connection *con, bool and_other, con->rx_leftover = 0; con->retries = 0; + clear_bit(CF_CONNECTED, &con->flags); mutex_unlock(&con->sock_mutex); clear_bit(CF_CLOSING, &con->flags); } @@ -1027,8 +1035,11 @@ static void sctp_connect_to_sock(struct connection *con) if (result == -EINPROGRESS) result = 0; - if (result == 0) + if (result == 0) { + if (!test_and_set_bit(CF_CONNECTED, &con->flags)) + log_print("successful connected to node %d", con->nodeid); goto out; + } bind_err: con->sock = NULL; From 6cde210a975879a6da74b5755065c7ea3ccbcb90 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 2 Nov 2020 20:04:21 -0500 Subject: [PATCH 06/13] fs: dlm: add helper for init connection This patch will move the connection structure initialization into an own function. This avoids cases to update the othercon initialization. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 67 ++++++++++++++++++++++------------------------- 1 file changed, 32 insertions(+), 35 deletions(-) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 193a4c91c4d8c..30cd53fa2f770 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -170,29 +170,12 @@ static struct connection *__find_con(int nodeid) return NULL; } -/* - * If 'allocation' is zero then we don't attempt to create a new - * connection structure for this node. - */ -static struct connection *nodeid2con(int nodeid, gfp_t alloc) +static int dlm_con_init(struct connection *con, int nodeid) { - struct connection *con, *tmp; - int r; - - con = __find_con(nodeid); - if (con || !alloc) - return con; - - con = kzalloc(sizeof(*con), alloc); - if (!con) - return NULL; - con->rx_buflen = dlm_config.ci_buffer_size; con->rx_buf = kmalloc(con->rx_buflen, GFP_NOFS); - if (!con->rx_buf) { - kfree(con); - return NULL; - } + if (!con->rx_buf) + return -ENOMEM; con->nodeid = nodeid; mutex_init(&con->sock_mutex); @@ -211,6 +194,32 @@ static struct connection *nodeid2con(int nodeid, gfp_t alloc) con->rx_action = zerocon->rx_action; } + return 0; +} + +/* + * If 'allocation' is zero then we don't attempt to create a new + * connection structure for this node. + */ +static struct connection *nodeid2con(int nodeid, gfp_t alloc) +{ + struct connection *con, *tmp; + int r, ret; + + con = __find_con(nodeid); + if (con || !alloc) + return con; + + con = kzalloc(sizeof(*con), alloc); + if (!con) + return NULL; + + ret = dlm_con_init(con, nodeid); + if (ret) { + kfree(con); + return NULL; + } + r = nodeid_hash(nodeid); spin_lock(&connections_lock); @@ -849,32 +858,20 @@ static int accept_from_sock(struct connection *con) goto accept_err; } - othercon->rx_buflen = dlm_config.ci_buffer_size; - othercon->rx_buf = kmalloc(othercon->rx_buflen, GFP_NOFS); - if (!othercon->rx_buf) { - mutex_unlock(&newcon->sock_mutex); + result = dlm_con_init(othercon, nodeid); + if (result < 0) { kfree(othercon); - log_print("failed to allocate incoming socket receive buffer"); - result = -ENOMEM; goto accept_err; } - othercon->nodeid = nodeid; - othercon->rx_action = receive_from_sock; - mutex_init(&othercon->sock_mutex); - INIT_LIST_HEAD(&othercon->writequeue); - spin_lock_init(&othercon->writequeue_lock); - INIT_WORK(&othercon->swork, process_send_sockets); - INIT_WORK(&othercon->rwork, process_recv_sockets); - init_waitqueue_head(&othercon->shutdown_wait); set_bit(CF_IS_OTHERCON, &othercon->flags); + newcon->othercon = othercon; } else { /* close other sock con if we have something new */ close_connection(othercon, false, true, false); } mutex_lock_nested(&othercon->sock_mutex, 2); - newcon->othercon = othercon; add_sock(newsock, othercon); addcon = othercon; mutex_unlock(&othercon->sock_mutex); From 0672c3c280efd33b8037863fc2bbc3510670a7d3 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 2 Nov 2020 20:04:22 -0500 Subject: [PATCH 07/13] fs: dlm: move connect callback in node creation This patch moves the assignment for the connect callback to the node creation instead of assign some dummy functionality. The assignment which connect functionality will be used will be detected according to the configfs setting. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 30cd53fa2f770..30a101de0a0c6 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -142,6 +142,8 @@ DEFINE_STATIC_SRCU(connections_srcu); static void process_recv_sockets(struct work_struct *work); static void process_send_sockets(struct work_struct *work); +static void sctp_connect_to_sock(struct connection *con); +static void tcp_connect_to_sock(struct connection *con); /* This is deliberately very simple because most clusters have simple sequential nodeids, so we should be able to go straight to a connection @@ -185,14 +187,10 @@ static int dlm_con_init(struct connection *con, int nodeid) INIT_WORK(&con->rwork, process_recv_sockets); init_waitqueue_head(&con->shutdown_wait); - /* Setup action pointers for child sockets */ - if (con->nodeid) { - struct connection *zerocon = __find_con(0); - - con->connect_action = zerocon->connect_action; - if (!con->rx_action) - con->rx_action = zerocon->rx_action; - } + if (dlm_config.ci_protocol == 0) + con->connect_action = tcp_connect_to_sock; + else + con->connect_action = sctp_connect_to_sock; return 0; } @@ -1006,7 +1004,6 @@ static void sctp_connect_to_sock(struct connection *con) sock_set_mark(sock->sk, mark); con->rx_action = receive_from_sock; - con->connect_action = sctp_connect_to_sock; add_sock(sock, con); /* Bind to all addresses. */ @@ -1104,7 +1101,6 @@ static void tcp_connect_to_sock(struct connection *con) } con->rx_action = receive_from_sock; - con->connect_action = tcp_connect_to_sock; con->shutdown_action = dlm_tcp_shutdown; add_sock(sock, con); @@ -1192,7 +1188,6 @@ static struct socket *tcp_create_listen_sock(struct connection *con, sock->sk->sk_user_data = con; save_listen_callbacks(sock); con->rx_action = accept_from_sock; - con->connect_action = tcp_connect_to_sock; write_unlock_bh(&sock->sk->sk_callback_lock); /* Bind to our port */ @@ -1275,7 +1270,6 @@ static int sctp_listen_for_all(void) con->sock = sock; con->sock->sk->sk_data_ready = lowcomms_data_ready; con->rx_action = accept_from_sock; - con->connect_action = sctp_connect_to_sock; write_unlock_bh(&sock->sk->sk_callback_lock); From 42873c903bd712b40d827c2bed100ccefa66fce8 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 2 Nov 2020 20:04:23 -0500 Subject: [PATCH 08/13] fs: dlm: move shutdown action to node creation This patch move the assignment for the shutdown action callback to the node creation functionality. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 30a101de0a0c6..9723df4e67b8d 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -144,6 +144,7 @@ static void process_send_sockets(struct work_struct *work); static void sctp_connect_to_sock(struct connection *con); static void tcp_connect_to_sock(struct connection *con); +static void dlm_tcp_shutdown(struct connection *con); /* This is deliberately very simple because most clusters have simple sequential nodeids, so we should be able to go straight to a connection @@ -187,10 +188,12 @@ static int dlm_con_init(struct connection *con, int nodeid) INIT_WORK(&con->rwork, process_recv_sockets); init_waitqueue_head(&con->shutdown_wait); - if (dlm_config.ci_protocol == 0) + if (dlm_config.ci_protocol == 0) { con->connect_action = tcp_connect_to_sock; - else + con->shutdown_action = dlm_tcp_shutdown; + } else { con->connect_action = sctp_connect_to_sock; + } return 0; } @@ -1101,7 +1104,6 @@ static void tcp_connect_to_sock(struct connection *con) } con->rx_action = receive_from_sock; - con->shutdown_action = dlm_tcp_shutdown; add_sock(sock, con); /* Bind to our cluster-known address connecting to avoid From 13004e8afedcaab5a2e4c1fac4fbeafa629bca07 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 2 Nov 2020 20:04:24 -0500 Subject: [PATCH 09/13] fs: dlm: refactor sctp sock parameter This patch refactors sctp_bind_addrs() to work with a socket parameter instead of a connection parameter. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 9723df4e67b8d..889ac5e8ad0a1 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -936,7 +936,7 @@ static void writequeue_entry_complete(struct writequeue_entry *e, int completed) /* * sctp_bind_addrs - bind a SCTP socket to all our addresses */ -static int sctp_bind_addrs(struct connection *con, uint16_t port) +static int sctp_bind_addrs(struct socket *sock, uint16_t port) { struct sockaddr_storage localaddr; struct sockaddr *addr = (struct sockaddr *)&localaddr; @@ -947,9 +947,9 @@ static int sctp_bind_addrs(struct connection *con, uint16_t port) make_sockaddr(&localaddr, port, &addr_len); if (!i) - result = kernel_bind(con->sock, addr, addr_len); + result = kernel_bind(sock, addr, addr_len); else - result = sock_bind_add(con->sock->sk, addr, addr_len); + result = sock_bind_add(sock->sk, addr, addr_len); if (result < 0) { log_print("Can't bind to %d addr number %d, %d.\n", @@ -1010,7 +1010,7 @@ static void sctp_connect_to_sock(struct connection *con) add_sock(sock, con); /* Bind to all addresses. */ - if (sctp_bind_addrs(con, 0)) + if (sctp_bind_addrs(con->sock, 0)) goto bind_err; make_sockaddr(&daddr, dlm_config.ci_tcp_port, &addr_len); @@ -1276,7 +1276,7 @@ static int sctp_listen_for_all(void) write_unlock_bh(&sock->sk->sk_callback_lock); /* Bind to all addresses. */ - if (sctp_bind_addrs(con, dlm_config.ci_tcp_port)) + if (sctp_bind_addrs(con->sock, dlm_config.ci_tcp_port)) goto create_delsock; result = sock->ops->listen(sock, 5); From d11ccd451b655617388ace167ab2440b4b4cc95b Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 2 Nov 2020 20:04:25 -0500 Subject: [PATCH 10/13] fs: dlm: listen socket out of connection hash This patch introduces a own connection structure for the listen socket handling instead of handling the listen socket as normal connection structure in the connection hash. We can remove some nodeid equals zero validation checks, because this nodeid should not exists anymore inside the node hash. This patch also removes the sock mutex in accept_from_sock() function because this function can't occur in another parallel context if it's scheduled on only one workqueue. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 167 ++++++++++++++++++++-------------------------- 1 file changed, 74 insertions(+), 93 deletions(-) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 889ac5e8ad0a1..b042ef56eba6e 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -81,7 +81,6 @@ struct connection { #define CF_CONNECTED 10 struct list_head writequeue; /* List of outgoing writequeue_entries */ spinlock_t writequeue_lock; - int (*rx_action) (struct connection *); /* What to do when active */ void (*connect_action) (struct connection *); /* What to do to connect */ void (*shutdown_action)(struct connection *con); /* What to do to shutdown */ int retries; @@ -98,6 +97,11 @@ struct connection { }; #define sock2con(x) ((struct connection *)(x)->sk_user_data) +struct listen_connection { + struct socket *sock; + struct work_struct rwork; +}; + /* An entry waiting to be sent */ struct writequeue_entry { struct list_head list; @@ -127,6 +131,7 @@ static struct listen_sock_callbacks { static LIST_HEAD(dlm_node_addrs); static DEFINE_SPINLOCK(dlm_node_addrs_spin); +static struct listen_connection listen_con; static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT]; static int dlm_local_count; static int dlm_allow_conn; @@ -421,6 +426,11 @@ static void lowcomms_data_ready(struct sock *sk) read_unlock_bh(&sk->sk_callback_lock); } +static void lowcomms_listen_data_ready(struct sock *sk) +{ + queue_work(recv_workqueue, &listen_con.rwork); +} + static void lowcomms_write_space(struct sock *sk) { struct connection *con; @@ -556,6 +566,21 @@ static void restore_callbacks(struct socket *sock) write_unlock_bh(&sk->sk_callback_lock); } +static void add_listen_sock(struct socket *sock, struct listen_connection *con) +{ + struct sock *sk = sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + save_listen_callbacks(sock); + con->sock = sock; + + sk->sk_user_data = con; + sk->sk_allocation = GFP_NOFS; + /* Install a data_ready callback */ + sk->sk_data_ready = lowcomms_listen_data_ready; + write_unlock_bh(&sk->sk_callback_lock); +} + /* Make a socket active */ static void add_sock(struct socket *sock, struct connection *con) { @@ -593,6 +618,15 @@ static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port, memset((char *)saddr + *addr_len, 0, sizeof(struct sockaddr_storage) - *addr_len); } +static void dlm_close_sock(struct socket **sock) +{ + if (*sock) { + restore_callbacks(*sock); + sock_release(*sock); + *sock = NULL; + } +} + /* Close a remote connection and tidy up */ static void close_connection(struct connection *con, bool and_other, bool tx, bool rx) @@ -609,11 +643,8 @@ static void close_connection(struct connection *con, bool and_other, } mutex_lock(&con->sock_mutex); - if (con->sock) { - restore_callbacks(con->sock); - sock_release(con->sock); - con->sock = NULL; - } + dlm_close_sock(&con->sock); + if (con->othercon && and_other) { /* Will only re-enter once. */ close_connection(con->othercon, false, true, true); @@ -709,11 +740,6 @@ static int receive_from_sock(struct connection *con) goto out_close; } - if (con->nodeid == 0) { - ret = -EINVAL; - goto out_close; - } - /* realloc if we get new buffer size to read out */ buflen = dlm_config.ci_buffer_size; if (con->rx_buflen != buflen && con->rx_leftover <= buflen) { @@ -785,7 +811,7 @@ static int receive_from_sock(struct connection *con) } /* Listening socket is busy, accept a connection */ -static int accept_from_sock(struct connection *con) +static int accept_from_sock(struct listen_connection *con) { int result; struct sockaddr_storage peeraddr; @@ -800,12 +826,8 @@ static int accept_from_sock(struct connection *con) return -1; } - mutex_lock_nested(&con->sock_mutex, 0); - - if (!con->sock) { - mutex_unlock(&con->sock_mutex); + if (!con->sock) return -ENOTCONN; - } result = kernel_accept(con->sock, &newsock, O_NONBLOCK); if (result < 0) @@ -827,7 +849,6 @@ static int accept_from_sock(struct connection *con) print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, b, sizeof(struct sockaddr_storage)); sock_release(newsock); - mutex_unlock(&con->sock_mutex); return -1; } @@ -846,7 +867,8 @@ static int accept_from_sock(struct connection *con) result = -ENOMEM; goto accept_err; } - mutex_lock_nested(&newcon->sock_mutex, 1); + + mutex_lock(&newcon->sock_mutex); if (newcon->sock) { struct connection *othercon = newcon->othercon; @@ -865,20 +887,18 @@ static int accept_from_sock(struct connection *con) goto accept_err; } - set_bit(CF_IS_OTHERCON, &othercon->flags); newcon->othercon = othercon; } else { /* close other sock con if we have something new */ close_connection(othercon, false, true, false); } - mutex_lock_nested(&othercon->sock_mutex, 2); + mutex_lock_nested(&othercon->sock_mutex, 1); add_sock(newsock, othercon); addcon = othercon; mutex_unlock(&othercon->sock_mutex); } else { - newcon->rx_action = receive_from_sock; /* accept copies the sk after we've saved the callbacks, so we don't want to save them a second time or comm errors will result in calling sk_error_report recursively. */ @@ -895,12 +915,10 @@ static int accept_from_sock(struct connection *con) */ if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags)) queue_work(recv_workqueue, &addcon->rwork); - mutex_unlock(&con->sock_mutex); return 0; accept_err: - mutex_unlock(&con->sock_mutex); if (newsock) sock_release(newsock); @@ -973,11 +991,6 @@ static void sctp_connect_to_sock(struct connection *con) struct socket *sock; unsigned int mark; - if (con->nodeid == 0) { - log_print("attempt to connect sock 0 foiled"); - return; - } - dlm_comm_mark(con->nodeid, &mark); mutex_lock(&con->sock_mutex); @@ -1006,7 +1019,6 @@ static void sctp_connect_to_sock(struct connection *con) sock_set_mark(sock->sk, mark); - con->rx_action = receive_from_sock; add_sock(sock, con); /* Bind to all addresses. */ @@ -1073,11 +1085,6 @@ static void tcp_connect_to_sock(struct connection *con) unsigned int mark; int result; - if (con->nodeid == 0) { - log_print("attempt to connect sock 0 foiled"); - return; - } - dlm_comm_mark(con->nodeid, &mark); mutex_lock(&con->sock_mutex); @@ -1103,7 +1110,6 @@ static void tcp_connect_to_sock(struct connection *con) goto out_err; } - con->rx_action = receive_from_sock; add_sock(sock, con); /* Bind to our cluster-known address connecting to avoid @@ -1159,8 +1165,11 @@ static void tcp_connect_to_sock(struct connection *con) return; } -static struct socket *tcp_create_listen_sock(struct connection *con, - struct sockaddr_storage *saddr) +/* On error caller must run dlm_close_sock() for the + * listen connection socket. + */ +static int tcp_create_listen_sock(struct listen_connection *con, + struct sockaddr_storage *saddr) { struct socket *sock = NULL; int result = 0; @@ -1186,20 +1195,13 @@ static struct socket *tcp_create_listen_sock(struct connection *con, sock_set_reuseaddr(sock->sk); - write_lock_bh(&sock->sk->sk_callback_lock); - sock->sk->sk_user_data = con; - save_listen_callbacks(sock); - con->rx_action = accept_from_sock; - write_unlock_bh(&sock->sk->sk_callback_lock); + add_listen_sock(sock, con); /* Bind to our port */ make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len); result = sock->ops->bind(sock, (struct sockaddr *) saddr, addr_len); if (result < 0) { log_print("Can't bind to port %d", dlm_config.ci_tcp_port); - sock_release(sock); - sock = NULL; - con->sock = NULL; goto create_out; } sock_set_keepalive(sock->sk); @@ -1207,13 +1209,13 @@ static struct socket *tcp_create_listen_sock(struct connection *con, result = sock->ops->listen(sock, 5); if (result < 0) { log_print("Can't listen on port %d", dlm_config.ci_tcp_port); - sock_release(sock); - sock = NULL; goto create_out; } + return 0; + create_out: - return sock; + return result; } /* Get local addresses */ @@ -1242,15 +1244,14 @@ static void deinit_local(void) kfree(dlm_local_addr[i]); } -/* Initialise SCTP socket and bind to all interfaces */ -static int sctp_listen_for_all(void) +/* Initialise SCTP socket and bind to all interfaces + * On error caller must run dlm_close_sock() for the + * listen connection socket. + */ +static int sctp_listen_for_all(struct listen_connection *con) { struct socket *sock = NULL; int result = -EINVAL; - struct connection *con = nodeid2con(0, GFP_NOFS); - - if (!con) - return -ENOMEM; log_print("Using SCTP for communications"); @@ -1265,44 +1266,27 @@ static int sctp_listen_for_all(void) sock_set_mark(sock->sk, dlm_config.ci_mark); sctp_sock_set_nodelay(sock->sk); - write_lock_bh(&sock->sk->sk_callback_lock); - /* Init con struct */ - sock->sk->sk_user_data = con; - save_listen_callbacks(sock); - con->sock = sock; - con->sock->sk->sk_data_ready = lowcomms_data_ready; - con->rx_action = accept_from_sock; - - write_unlock_bh(&sock->sk->sk_callback_lock); + add_listen_sock(sock, con); /* Bind to all addresses. */ - if (sctp_bind_addrs(con->sock, dlm_config.ci_tcp_port)) - goto create_delsock; + result = sctp_bind_addrs(con->sock, dlm_config.ci_tcp_port); + if (result < 0) + goto out; result = sock->ops->listen(sock, 5); if (result < 0) { log_print("Can't set socket listening"); - goto create_delsock; + goto out; } return 0; -create_delsock: - sock_release(sock); - con->sock = NULL; out: return result; } static int tcp_listen_for_all(void) { - struct socket *sock = NULL; - struct connection *con = nodeid2con(0, GFP_NOFS); - int result = -EINVAL; - - if (!con) - return -ENOMEM; - /* We don't support multi-homed hosts */ if (dlm_local_addr[1] != NULL) { log_print("TCP protocol can't handle multi-homed hosts, " @@ -1312,16 +1296,7 @@ static int tcp_listen_for_all(void) log_print("Using TCP for communications"); - sock = tcp_create_listen_sock(con, dlm_local_addr[0]); - if (sock) { - add_sock(sock, con); - result = 0; - } - else { - result = -EADDRINUSE; - } - - return result; + return tcp_create_listen_sock(&listen_con, dlm_local_addr[0]); } @@ -1541,10 +1516,15 @@ static void process_recv_sockets(struct work_struct *work) clear_bit(CF_READ_PENDING, &con->flags); do { - err = con->rx_action(con); + err = receive_from_sock(con); } while (!err); } +static void process_listen_recv_socket(struct work_struct *work) +{ + accept_from_sock(&listen_con); +} + /* Send workqueue function */ static void process_send_sockets(struct work_struct *work) { @@ -1678,6 +1658,8 @@ void dlm_lowcomms_stop(void) if (send_workqueue) flush_workqueue(send_workqueue); + dlm_close_sock(&listen_con.sock); + foreach_conn(shutdown_conn); work_flush(); foreach_conn(free_conn); @@ -1688,7 +1670,6 @@ void dlm_lowcomms_stop(void) int dlm_lowcomms_start(void) { int error = -EINVAL; - struct connection *con; int i; for (i = 0; i < CONN_HASH_SIZE; i++) @@ -1701,6 +1682,8 @@ int dlm_lowcomms_start(void) goto fail; } + INIT_WORK(&listen_con.rwork, process_listen_recv_socket); + error = work_start(); if (error) goto fail; @@ -1711,7 +1694,7 @@ int dlm_lowcomms_start(void) if (dlm_config.ci_protocol == 0) error = tcp_listen_for_all(); else - error = sctp_listen_for_all(); + error = sctp_listen_for_all(&listen_con); if (error) goto fail_unlisten; @@ -1719,9 +1702,7 @@ int dlm_lowcomms_start(void) fail_unlisten: dlm_allow_conn = 0; - con = nodeid2con(0,0); - if (con) - free_conn(con); + dlm_close_sock(&listen_con.sock); fail: return error; } From 1a26bfafbce0f2ec8cfe04d9cdcaead0e6dd58ec Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 2 Nov 2020 20:04:26 -0500 Subject: [PATCH 11/13] fs: dlm: fix check for multi-homed hosts This patch will use the runtime array size dlm_local_count variable to check the actual size of the dlm_local_addr array. There exists currently a cleanup bug, because the tcp_listen_for_all() functionality might check on a dangled pointer. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index b042ef56eba6e..f7e86791a0825 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1288,7 +1288,7 @@ static int sctp_listen_for_all(struct listen_connection *con) static int tcp_listen_for_all(void) { /* We don't support multi-homed hosts */ - if (dlm_local_addr[1] != NULL) { + if (dlm_local_count > 1) { log_print("TCP protocol can't handle multi-homed hosts, " "try SCTP"); return -EINVAL; From 40c6b83e5a07d1dc3952a5ad040eb1c7c966c4fe Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 2 Nov 2020 20:04:27 -0500 Subject: [PATCH 12/13] fs: dlm: constify addr_compare This patch just constify some function parameter which should be have a read access only. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index f7e86791a0825..7f85594b663a8 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -274,7 +274,8 @@ static struct dlm_node_addr *find_node_addr(int nodeid) return NULL; } -static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y) +static int addr_compare(const struct sockaddr_storage *x, + const struct sockaddr_storage *y) { switch (x->ss_family) { case AF_INET: { From 4f19d071f9bee1bb2040ae73436314a5ec9ede44 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 2 Nov 2020 20:04:28 -0500 Subject: [PATCH 13/13] fs: dlm: check on existing node address This patch checks if we add twice the same address to a per node address array. This should never be the case and we report -EEXIST to the user space. Signed-off-by: Alexander Aring Signed-off-by: David Teigland --- fs/dlm/lowcomms.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 7f85594b663a8..372c34ff8594f 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -374,10 +374,25 @@ static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid) return rv; } +/* caller need to held dlm_node_addrs_spin lock */ +static bool dlm_lowcomms_na_has_addr(const struct dlm_node_addr *na, + const struct sockaddr_storage *addr) +{ + int i; + + for (i = 0; i < na->addr_count; i++) { + if (addr_compare(na->addr[i], addr)) + return true; + } + + return false; +} + int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len) { struct sockaddr_storage *new_addr; struct dlm_node_addr *new_node, *na; + bool ret; new_node = kzalloc(sizeof(struct dlm_node_addr), GFP_NOFS); if (!new_node) @@ -402,6 +417,14 @@ int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len) return 0; } + ret = dlm_lowcomms_na_has_addr(na, addr); + if (ret) { + spin_unlock(&dlm_node_addrs_spin); + kfree(new_addr); + kfree(new_node); + return -EEXIST; + } + if (na->addr_count >= DLM_MAX_ADDR_COUNT) { spin_unlock(&dlm_node_addrs_spin); kfree(new_addr);