From 263ffaeef174cc3adb51c87d6bb383d7af0199e7 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 22 Nov 2018 10:26:34 +0100 Subject: [PATCH 01/10] net/smc: cleanup tcp_listen_worker initialization The tcp_listen_worker is already initialized when socket is created (in smc_sock_alloc()). Get rid of the duplicate initialization in smc_listen(). No functional change. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 80e2119f1c701..d9b1a0e4446cd 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1354,7 +1354,6 @@ static int smc_listen(struct socket *sock, int backlog) sk->sk_max_ack_backlog = backlog; sk->sk_ack_backlog = 0; sk->sk_state = SMC_LISTEN; - INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); sock_hold(sk); /* sock_hold in tcp_listen_worker */ if (!schedule_work(&smc->tcp_listen_work)) sock_put(sk); From 3f3f0e364eb8ca18366e462f65dfe303f7449a6f Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 22 Nov 2018 10:26:35 +0100 Subject: [PATCH 02/10] net/smc: make smc_lgr_free() static smc_lgr_free() is just called inside smc_core.c. Make it static. Just cleanup, no functional change. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 4 +++- net/smc/smc_core.h | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 18daebcef1813..4812ca30f1dc1 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -149,6 +149,8 @@ static int smc_link_send_delete(struct smc_link *lnk) return -ENOTCONN; } +static void smc_lgr_free(struct smc_link_group *lgr); + static void smc_lgr_free_work(struct work_struct *work) { struct smc_link_group *lgr = container_of(to_delayed_work(work), @@ -408,7 +410,7 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr) } /* remove a link group */ -void smc_lgr_free(struct smc_link_group *lgr) +static void smc_lgr_free(struct smc_link_group *lgr) { smc_lgr_free_bufs(lgr); if (lgr->is_smcd) diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index c156674733c9d..07364c0b41a17 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -243,7 +243,6 @@ struct smc_sock; struct smc_clc_msg_accept_confirm; struct smc_clc_msg_local; -void smc_lgr_free(struct smc_link_group *lgr); void smc_lgr_forget(struct smc_link_group *lgr); void smc_lgr_terminate(struct smc_link_group *lgr); void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport); From 6ae36bff3f511d8b24ebbc126e3f1f23ac202ef4 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 22 Nov 2018 10:26:36 +0100 Subject: [PATCH 03/10] net/smc: remove sock_error detour in clc-functions There is no need to store the return value in sk_err, if it is afterwards cleared again with sock_error(). This patch sets the return value directly. Just cleanup, no functional change. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_clc.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 89c3a8c7859a3..7278ec0cfa582 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -374,10 +374,8 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(struct smc_clc_msg_decline)); if (len < sizeof(struct smc_clc_msg_decline)) - smc->sk.sk_err = EPROTO; - if (len < 0) - smc->sk.sk_err = -len; - return sock_error(&smc->sk); + len = -EPROTO; + return len > 0 ? 0 : len; } /* send CLC PROPOSAL message across internal TCP socket */ @@ -536,7 +534,6 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact) struct smc_link *link; struct msghdr msg; struct kvec vec; - int rc = 0; int len; memset(&aclc, 0, sizeof(aclc)); @@ -589,13 +586,8 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact) vec.iov_len = ntohs(aclc.hdr.length); len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, ntohs(aclc.hdr.length)); - if (len < ntohs(aclc.hdr.length)) { - if (len >= 0) - new_smc->sk.sk_err = EPROTO; - else - new_smc->sk.sk_err = new_smc->clcsock->sk->sk_err; - rc = sock_error(&new_smc->sk); - } + if (len < ntohs(aclc.hdr.length)) + len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err; - return rc; + return len > 0 ? 0 : len; } From 9ed28556a388fdb894bdf9bd64c05cf6e7783ba3 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 22 Nov 2018 10:26:37 +0100 Subject: [PATCH 04/10] net/smc: allow fallback after clc timeouts If connection initialization fails for the LLC CONFIRM LINK or the LLC ADD LINK step, fallback to TCP should be enabled. Thus the negative return code -EAGAIN should switch to a positive timeout reason code in these cases, and the internal CLC socket should not have a set sk_err. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 8 ++++---- net/smc/smc_clc.c | 9 +++++++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index d9b1a0e4446cd..66836cfbc5876 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -336,7 +336,7 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc) rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), SMC_CLC_DECLINE); - return rc; + return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc; } if (link->llc_confirm_rc) @@ -364,7 +364,7 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc) rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), SMC_CLC_DECLINE); - return rc; + return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc; } /* send add link reject message, only one link supported for now */ @@ -966,7 +966,7 @@ static int smc_serv_conf_first_link(struct smc_sock *smc) rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), SMC_CLC_DECLINE); - return rc; + return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc; } if (link->llc_confirm_resp_rc) @@ -987,7 +987,7 @@ static int smc_serv_conf_first_link(struct smc_sock *smc) rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), SMC_CLC_DECLINE); - return rc; + return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc; } smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time); diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 7278ec0cfa582..62043d69e3a31 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -297,7 +297,11 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, } if (clc_sk->sk_err) { reason_code = -clc_sk->sk_err; - smc->sk.sk_err = clc_sk->sk_err; + if (clc_sk->sk_err == EAGAIN && + expected_type == SMC_CLC_DECLINE) + clc_sk->sk_err = 0; /* reset for fallback usage */ + else + smc->sk.sk_err = clc_sk->sk_err; goto out; } if (!len) { /* peer has performed orderly shutdown */ @@ -306,7 +310,8 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, goto out; } if (len < 0) { - smc->sk.sk_err = -len; + if (len != -EAGAIN || expected_type != SMC_CLC_DECLINE) + smc->sk.sk_err = -len; reason_code = len; goto out; } From 90d8b29cb4b251cd874aa00a50d11b28a7322986 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 22 Nov 2018 10:26:38 +0100 Subject: [PATCH 05/10] net/smc: no link delete for a never active link If a link is terminated that has never reached the active state, there is no need to trigger an LLC DELETE LINK. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 4812ca30f1dc1..ec7a7ed3b968f 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -173,8 +173,11 @@ static void smc_lgr_free_work(struct work_struct *work) spin_unlock_bh(&smc_lgr_list.lock); if (!lgr->is_smcd && !lgr->terminating) { + struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; + /* try to send del link msg, on error free lgr immediately */ - if (!smc_link_send_delete(&lgr->lnk[SMC_SINGLE_LINK])) { + if (lnk->state == SMC_LNK_ACTIVE && + !smc_link_send_delete(lnk)) { /* reschedule in case we never receive a response */ smc_lgr_schedule_free_work(lgr); return; From 2b59f58e34e78abec2bccd2413ae9e7ea509a855 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 22 Nov 2018 10:26:39 +0100 Subject: [PATCH 06/10] net/smc: short wait for late smc_clc_wait_msg After sending one of the initial LLC messages CONFIRM LINK or ADD LINK, there is already a wait for the LLC response. It does not make sense to wait another long time for a CLC DECLINE. Thus this patch introduces a shorter wait time for these cases. Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 15 ++++++++------- net/smc/smc_clc.c | 6 +++--- net/smc/smc_clc.h | 3 ++- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 66836cfbc5876..93f7ff8f6e88a 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -335,7 +335,7 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc) struct smc_clc_msg_decline dclc; rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), - SMC_CLC_DECLINE); + SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc; } @@ -363,7 +363,7 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc) struct smc_clc_msg_decline dclc; rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), - SMC_CLC_DECLINE); + SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc; } @@ -533,7 +533,8 @@ static int smc_connect_clc(struct smc_sock *smc, int smc_type, if (rc) return rc; /* receive SMC Accept CLC message */ - return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT); + return smc_clc_wait_msg(smc, aclc, sizeof(*aclc), SMC_CLC_ACCEPT, + CLC_WAIT_TIME); } /* setup for RDMA connection of client */ @@ -965,7 +966,7 @@ static int smc_serv_conf_first_link(struct smc_sock *smc) struct smc_clc_msg_decline dclc; rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), - SMC_CLC_DECLINE); + SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc; } @@ -986,7 +987,7 @@ static int smc_serv_conf_first_link(struct smc_sock *smc) struct smc_clc_msg_decline dclc; rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc), - SMC_CLC_DECLINE); + SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT); return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc; } @@ -1222,7 +1223,7 @@ static void smc_listen_work(struct work_struct *work) */ pclc = (struct smc_clc_msg_proposal *)&buf; reason_code = smc_clc_wait_msg(new_smc, pclc, SMC_CLC_MAX_LEN, - SMC_CLC_PROPOSAL); + SMC_CLC_PROPOSAL, CLC_WAIT_TIME); if (reason_code) { smc_listen_decline(new_smc, reason_code, 0); return; @@ -1272,7 +1273,7 @@ static void smc_listen_work(struct work_struct *work) /* receive SMC Confirm CLC message */ reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), - SMC_CLC_CONFIRM); + SMC_CLC_CONFIRM, CLC_WAIT_TIME); if (reason_code) { mutex_unlock(&smc_create_lgr_pending); smc_listen_decline(new_smc, reason_code, local_contact); diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 62043d69e3a31..776e9dfc915dd 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -265,7 +265,7 @@ int smc_clc_prfx_match(struct socket *clcsock, * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise. */ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, - u8 expected_type) + u8 expected_type, unsigned long timeout) { long rcvtimeo = smc->clcsock->sk->sk_rcvtimeo; struct sock *clc_sk = smc->clcsock->sk; @@ -285,7 +285,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, * sizeof(struct smc_clc_msg_hdr) */ krflags = MSG_PEEK | MSG_WAITALL; - smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME; + clc_sk->sk_rcvtimeo = timeout; iov_iter_kvec(&msg.msg_iter, READ, &vec, 1, sizeof(struct smc_clc_msg_hdr)); len = sock_recvmsg(smc->clcsock, &msg, krflags); @@ -351,7 +351,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, } out: - smc->clcsock->sk->sk_rcvtimeo = rcvtimeo; + clc_sk->sk_rcvtimeo = rcvtimeo; return reason_code; } diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 18da89b681c2d..24658e8c0de42 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -27,6 +27,7 @@ #define SMC_TYPE_D 1 /* SMC-D only */ #define SMC_TYPE_B 3 /* SMC-R and SMC-D */ #define CLC_WAIT_TIME (6 * HZ) /* max. wait time on clcsock */ +#define CLC_WAIT_TIME_SHORT HZ /* short wait time on clcsock */ #define SMC_CLC_DECL_MEM 0x01010000 /* insufficient memory resources */ #define SMC_CLC_DECL_TIMEOUT_CL 0x02010000 /* timeout w4 QP confirm link */ #define SMC_CLC_DECL_TIMEOUT_AL 0x02020000 /* timeout w4 QP add link */ @@ -182,7 +183,7 @@ struct smcd_dev; int smc_clc_prfx_match(struct socket *clcsock, struct smc_clc_msg_proposal_prefix *prop); int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, - u8 expected_type); + u8 expected_type, unsigned long timeout); int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info); int smc_clc_send_proposal(struct smc_sock *smc, int smc_type, struct smc_ib_device *smcibdev, u8 ibport, u8 gid[], From 587e41dcea357a1ac15e3b31d800900e1c585d7e Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Thu, 22 Nov 2018 10:26:40 +0100 Subject: [PATCH 07/10] net/smc: cleanup listen worker mutex unlocking For easier reading move the unlock of mutex smc_create_lgr_pending into smc_listen_work(), i.e. into the function the mutex has been locked. No functional change. Reported-by: Dan Carpenter Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 93f7ff8f6e88a..7657e249f526c 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1182,7 +1182,6 @@ static int smc_listen_rdma_finish(struct smc_sock *new_smc, return 0; decline: - mutex_unlock(&smc_create_lgr_pending); smc_listen_decline(new_smc, reason_code, local_contact); return reason_code; } @@ -1282,8 +1281,10 @@ static void smc_listen_work(struct work_struct *work) /* finish worker */ if (!ism_supported) { - if (smc_listen_rdma_finish(new_smc, &cclc, local_contact)) + if (smc_listen_rdma_finish(new_smc, &cclc, local_contact)) { + mutex_unlock(&smc_create_lgr_pending); return; + } } smc_conn_save_peer_info(new_smc, &cclc); mutex_unlock(&smc_create_lgr_pending); From 4600cfc302f70f2b4d89818b4d4c79bd19e5ab87 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Thu, 22 Nov 2018 10:26:41 +0100 Subject: [PATCH 08/10] net/smc: avoid a delay by waiting for nothing When a send failed then don't start to wait for a response in smc_llc_do_confirm_rkey. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_llc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 9c916c709ca71..132c6a8e49f83 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -651,7 +651,9 @@ int smc_llc_do_confirm_rkey(struct smc_link *link, int rc; reinit_completion(&link->llc_confirm_rkey); - smc_llc_send_confirm_rkey(link, rmb_desc); + rc = smc_llc_send_confirm_rkey(link, rmb_desc); + if (rc) + return rc; /* receive CONFIRM RKEY response from server over RoCE fabric */ rc = wait_for_completion_interruptible_timeout(&link->llc_confirm_rkey, SMC_LLC_WAIT_TIME); From 60e03c62c5db22c5eb63bcb6ce226cf05f4ee47c Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Thu, 22 Nov 2018 10:26:42 +0100 Subject: [PATCH 09/10] net/smc: add infrastructure to send delete rkey messages Add the infrastructure to send LLC messages of type DELETE RKEY to unregister a shared memory region at the peer. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/smc_core.h | 3 +++ net/smc/smc_llc.c | 53 +++++++++++++++++++++++++++++++++++++++++++++- net/smc/smc_llc.h | 2 ++ 3 files changed, 57 insertions(+), 1 deletion(-) diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 07364c0b41a17..bce39d6df45a4 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -109,6 +109,9 @@ struct smc_link { int llc_testlink_time; /* testlink interval */ struct completion llc_confirm_rkey; /* wait 4 rx of cnf rkey */ int llc_confirm_rkey_rc; /* rc from cnf rkey msg */ + struct completion llc_delete_rkey; /* wait 4 rx of del rkey */ + int llc_delete_rkey_rc; /* rc from del rkey msg */ + struct mutex llc_delete_rkey_mutex; /* serialize usage */ }; /* For now we just allow one parallel link per link group. The SMC protocol diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 132c6a8e49f83..a6d3623d06f42 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -238,6 +238,29 @@ static int smc_llc_send_confirm_rkey(struct smc_link *link, return rc; } +/* send LLC delete rkey request */ +static int smc_llc_send_delete_rkey(struct smc_link *link, + struct smc_buf_desc *rmb_desc) +{ + struct smc_llc_msg_delete_rkey *rkeyllc; + struct smc_wr_tx_pend_priv *pend; + struct smc_wr_buf *wr_buf; + int rc; + + rc = smc_llc_add_pending_send(link, &wr_buf, &pend); + if (rc) + return rc; + rkeyllc = (struct smc_llc_msg_delete_rkey *)wr_buf; + memset(rkeyllc, 0, sizeof(*rkeyllc)); + rkeyllc->hd.common.type = SMC_LLC_DELETE_RKEY; + rkeyllc->hd.length = sizeof(struct smc_llc_msg_delete_rkey); + rkeyllc->num_rkeys = 1; + rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); + /* send llc message */ + rc = smc_wr_tx_send(link, pend); + return rc; +} + /* prepare an add link message */ static void smc_llc_prep_add_link(struct smc_llc_msg_add_link *addllc, struct smc_link *link, u8 mac[], u8 gid[], @@ -509,7 +532,9 @@ static void smc_llc_rx_delete_rkey(struct smc_link *link, int i, max; if (llc->hd.flags & SMC_LLC_FLAG_RESP) { - /* unused as long as we don't send this type of msg */ + link->llc_delete_rkey_rc = llc->hd.flags & + SMC_LLC_FLAG_RKEY_NEG; + complete(&link->llc_delete_rkey); } else { max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX); for (i = 0; i < max; i++) { @@ -610,6 +635,8 @@ int smc_llc_link_init(struct smc_link *link) init_completion(&link->llc_add); init_completion(&link->llc_add_resp); init_completion(&link->llc_confirm_rkey); + init_completion(&link->llc_delete_rkey); + mutex_init(&link->llc_delete_rkey_mutex); init_completion(&link->llc_testlink_resp); INIT_DELAYED_WORK(&link->llc_testlink_wrk, smc_llc_testlink_work); return 0; @@ -650,6 +677,7 @@ int smc_llc_do_confirm_rkey(struct smc_link *link, { int rc; + /* protected by mutex smc_create_lgr_pending */ reinit_completion(&link->llc_confirm_rkey); rc = smc_llc_send_confirm_rkey(link, rmb_desc); if (rc) @@ -662,6 +690,29 @@ int smc_llc_do_confirm_rkey(struct smc_link *link, return 0; } +/* unregister an rtoken at the remote peer */ +int smc_llc_do_delete_rkey(struct smc_link *link, + struct smc_buf_desc *rmb_desc) +{ + int rc; + + mutex_lock(&link->llc_delete_rkey_mutex); + reinit_completion(&link->llc_delete_rkey); + rc = smc_llc_send_delete_rkey(link, rmb_desc); + if (rc) + goto out; + /* receive DELETE RKEY response from server over RoCE fabric */ + rc = wait_for_completion_interruptible_timeout(&link->llc_delete_rkey, + SMC_LLC_WAIT_TIME); + if (rc <= 0 || link->llc_delete_rkey_rc) + rc = -EFAULT; + else + rc = 0; +out: + mutex_unlock(&link->llc_delete_rkey_mutex); + return rc; +} + /***************************** init, exit, misc ******************************/ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = { diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index 9e2ff088e3018..461c0c3ef76ef 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -49,6 +49,8 @@ void smc_llc_link_inactive(struct smc_link *link); void smc_llc_link_clear(struct smc_link *link); int smc_llc_do_confirm_rkey(struct smc_link *link, struct smc_buf_desc *rmb_desc); +int smc_llc_do_delete_rkey(struct smc_link *link, + struct smc_buf_desc *rmb_desc); int smc_llc_init(void) __init; #endif /* SMC_LLC_H */ From c7674c001b1143a5bc6b36efc7adc4bdd8ff5e76 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Thu, 22 Nov 2018 10:26:43 +0100 Subject: [PATCH 10/10] net/smc: unregister rkeys of unused buffer When an rmb is no longer in use by a connection, unregister its rkey at the remote peer with an LLC DELETE RKEY message. With this change, unused buffers held in the buffer pool are no longer registered at the remote peer. They are registered before the buffer is actually used and unregistered when they are no longer used by a connection. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 22 +++++++++++----------- net/smc/smc_core.c | 7 ++++++- net/smc/smc_core.h | 2 +- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 7657e249f526c..4b865250e238e 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -299,14 +299,17 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc) smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC); } -/* register a new rmb, optionally send confirm_rkey msg to register with peer */ +/* register a new rmb, send confirm_rkey msg to register with peer */ static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc, bool conf_rkey) { - /* register memory region for new rmb */ - if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) { - rmb_desc->regerr = 1; - return -EFAULT; + if (!rmb_desc->wr_reg) { + /* register memory region for new rmb */ + if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) { + rmb_desc->regerr = 1; + return -EFAULT; + } + rmb_desc->wr_reg = 1; } if (!conf_rkey) return 0; @@ -581,8 +584,7 @@ static int smc_connect_rdma(struct smc_sock *smc, return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK, local_contact); } else { - if (!smc->conn.rmb_desc->reused && - smc_reg_rmb(link, smc->conn.rmb_desc, true)) + if (smc_reg_rmb(link, smc->conn.rmb_desc, true)) return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB, local_contact); } @@ -1143,10 +1145,8 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact) struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; if (local_contact != SMC_FIRST_CONTACT) { - if (!new_smc->conn.rmb_desc->reused) { - if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true)) - return SMC_CLC_DECL_ERR_REGRMB; - } + if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true)) + return SMC_CLC_DECL_ERR_REGRMB; } smc_rmb_sync_sg_for_device(&new_smc->conn); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index ec7a7ed3b968f..1382ddae591e9 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -298,8 +298,13 @@ static void smc_buf_unuse(struct smc_connection *conn, conn->sndbuf_desc->used = 0; if (conn->rmb_desc) { if (!conn->rmb_desc->regerr) { - conn->rmb_desc->reused = 1; conn->rmb_desc->used = 0; + if (!lgr->is_smcd) { + /* unregister rmb with peer */ + smc_llc_do_delete_rkey( + &lgr->lnk[SMC_SINGLE_LINK], + conn->rmb_desc); + } } else { /* buf registration failed, reuse not possible */ write_lock_bh(&lgr->rmbs_lock); diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index bce39d6df45a4..e177c6675038c 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -130,7 +130,7 @@ struct smc_buf_desc { struct page *pages; int len; /* length of buffer */ u32 used; /* currently used / unused */ - u8 reused : 1; /* new created / reused */ + u8 wr_reg : 1; /* mem region registered */ u8 regerr : 1; /* err during registration */ union { struct { /* SMC-R */