diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 05cbcd3a6f607..cf0e11978b66c 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -377,6 +377,15 @@ static void smc_link_save_peer_info(struct smc_link *link, link->peer_mtu = clc->qp_mtu; } +static void smc_lgr_forget(struct smc_link_group *lgr) +{ + spin_lock_bh(&smc_lgr_list.lock); + /* do not use this link group for new connections */ + if (!list_empty(&lgr->list)) + list_del_init(&lgr->list); + spin_unlock_bh(&smc_lgr_list.lock); +} + /* setup for RDMA connection of client */ static int smc_connect_rdma(struct smc_sock *smc) { @@ -513,6 +522,8 @@ static int smc_connect_rdma(struct smc_sock *smc) return rc ? rc : local_contact; decline_rdma_unlock: + if (local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(smc->conn.lgr); mutex_unlock(&smc_create_lgr_pending); smc_conn_free(&smc->conn); decline_rdma: @@ -526,6 +537,8 @@ static int smc_connect_rdma(struct smc_sock *smc) goto out_connected; out_err_unlock: + if (local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(smc->conn.lgr); mutex_unlock(&smc_create_lgr_pending); smc_conn_free(&smc->conn); out_err: @@ -906,6 +919,8 @@ static void smc_listen_work(struct work_struct *work) return; decline_rdma_unlock: + if (local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(new_smc->conn.lgr); mutex_unlock(&smc_create_lgr_pending); decline_rdma: /* RDMA setup failed, switch back to TCP */ @@ -918,6 +933,8 @@ static void smc_listen_work(struct work_struct *work) goto out_connected; out_err_unlock: + if (local_contact == SMC_FIRST_CONTACT) + smc_lgr_forget(new_smc->conn.lgr); mutex_unlock(&smc_create_lgr_pending); out_err: newsmcsk->sk_state = SMC_CLOSED; diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index 51805334e0013..6e8f5fbe0f091 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -65,9 +65,14 @@ int smc_cdc_get_free_slot(struct smc_connection *conn, struct smc_cdc_tx_pend **pend) { struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; + int rc; - return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, - (struct smc_wr_tx_pend_priv **)pend); + rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, + (struct smc_wr_tx_pend_priv **)pend); + if (!conn->alert_token_local) + /* abnormal termination */ + rc = -EPIPE; + return rc; } static inline void smc_cdc_add_pending_send(struct smc_connection *conn, diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 1468a2a3cdf40..babe05d385e71 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -89,7 +89,7 @@ static int smc_close_abort(struct smc_connection *conn) } /* terminate smc socket abnormally - active abort - * RDMA communication no longer possible + * link group is terminated, i.e. RDMA communication no longer possible */ static void smc_close_active_abort(struct smc_sock *smc) { @@ -107,21 +107,25 @@ static void smc_close_active_abort(struct smc_sock *smc) case SMC_INIT: case SMC_ACTIVE: sk->sk_state = SMC_PEERABORTWAIT; + release_sock(sk); + cancel_delayed_work_sync(&smc->conn.tx_work); + lock_sock(sk); break; case SMC_APPCLOSEWAIT1: case SMC_APPCLOSEWAIT2: - txflags->peer_conn_abort = 1; sock_release(smc->clcsock); if (!smc_cdc_rxed_any_close(&smc->conn)) sk->sk_state = SMC_PEERABORTWAIT; else sk->sk_state = SMC_CLOSED; + release_sock(sk); + cancel_delayed_work_sync(&smc->conn.tx_work); + lock_sock(sk); break; case SMC_PEERCLOSEWAIT1: case SMC_PEERCLOSEWAIT2: if (!txflags->peer_conn_closed) { sk->sk_state = SMC_PEERABORTWAIT; - txflags->peer_conn_abort = 1; sock_release(smc->clcsock); } else { sk->sk_state = SMC_CLOSED; @@ -129,10 +133,8 @@ static void smc_close_active_abort(struct smc_sock *smc) break; case SMC_PROCESSABORT: case SMC_APPFINCLOSEWAIT: - if (!txflags->peer_conn_closed) { - txflags->peer_conn_abort = 1; + if (!txflags->peer_conn_closed) sock_release(smc->clcsock); - } sk->sk_state = SMC_CLOSED; break; case SMC_PEERFINCLOSEWAIT: @@ -249,9 +251,6 @@ int smc_close_active(struct smc_sock *smc) /* peer sending PeerConnectionClosed will cause transition */ break; case SMC_PROCESSABORT: - release_sock(sk); - cancel_delayed_work_sync(&conn->tx_work); - lock_sock(sk); smc_close_abort(conn); sk->sk_state = SMC_CLOSED; break; @@ -277,7 +276,6 @@ static void smc_close_passive_abort_received(struct smc_sock *smc) case SMC_APPFINCLOSEWAIT: case SMC_APPCLOSEWAIT1: case SMC_APPCLOSEWAIT2: - smc_close_abort(&smc->conn); sk->sk_state = SMC_PROCESSABORT; break; case SMC_PEERCLOSEWAIT1: @@ -285,7 +283,6 @@ static void smc_close_passive_abort_received(struct smc_sock *smc) if (txflags->peer_done_writing && !smc_close_sent_any_close(&smc->conn)) { /* just shutdown, but not yet closed locally */ - smc_close_abort(&smc->conn); sk->sk_state = SMC_PROCESSABORT; } else { sk->sk_state = SMC_CLOSED; @@ -302,8 +299,9 @@ static void smc_close_passive_abort_received(struct smc_sock *smc) } } -/* Some kind of closing has been received: peer_conn_closed, peer_conn_abort, - * or peer_done_writing. +/* Either some kind of closing has been received: peer_conn_closed, + * peer_conn_abort, or peer_done_writing + * or the link group of the connection terminates abnormally. */ static void smc_close_passive_work(struct work_struct *work) { @@ -326,7 +324,11 @@ static void smc_close_passive_work(struct work_struct *work) rxflags = &conn->local_rx_ctrl.conn_state_flags; if (rxflags->peer_conn_abort) { + /* peer has not received all data */ smc_close_passive_abort_received(smc); + release_sock(&smc->sk); + cancel_delayed_work_sync(&conn->tx_work); + lock_sock(&smc->sk); goto wakeup; } @@ -348,7 +350,6 @@ static void smc_close_passive_work(struct work_struct *work) /* fall through */ /* to check for closing */ case SMC_PEERCLOSEWAIT2: - case SMC_PEERFINCLOSEWAIT: if (!smc_cdc_rxed_any_close(conn)) break; if (sock_flag(sk, SOCK_DEAD) && @@ -360,6 +361,10 @@ static void smc_close_passive_work(struct work_struct *work) sk->sk_state = SMC_APPFINCLOSEWAIT; } break; + case SMC_PEERFINCLOSEWAIT: + if (smc_cdc_rxed_any_close(conn)) + sk->sk_state = SMC_CLOSED; + break; case SMC_APPCLOSEWAIT1: case SMC_APPCLOSEWAIT2: case SMC_APPFINCLOSEWAIT: diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 94f21116dac5e..ed5b46d1fe41a 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -128,6 +128,8 @@ static void smc_lgr_free_work(struct work_struct *work) bool conns; spin_lock_bh(&smc_lgr_list.lock); + if (list_empty(&lgr->list)) + goto free; read_lock_bh(&lgr->conns_lock); conns = RB_EMPTY_ROOT(&lgr->conns_all); read_unlock_bh(&lgr->conns_lock); @@ -136,6 +138,7 @@ static void smc_lgr_free_work(struct work_struct *work) return; } list_del_init(&lgr->list); /* remove from smc_lgr_list */ +free: spin_unlock_bh(&smc_lgr_list.lock); smc_lgr_free(lgr); } @@ -231,9 +234,7 @@ static void smc_buf_unuse(struct smc_connection *conn) /* remove a finished connection from its link group */ void smc_conn_free(struct smc_connection *conn) { - struct smc_link_group *lgr = conn->lgr; - - if (!lgr) + if (!conn->lgr) return; smc_cdc_tx_dismiss_slots(conn); smc_lgr_unregister_conn(conn); @@ -328,12 +329,16 @@ void smc_lgr_terminate(struct smc_link_group *lgr) conn = rb_entry(node, struct smc_connection, alert_node); smc = container_of(conn, struct smc_sock, conn); sock_hold(&smc->sk); + conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; __smc_lgr_unregister_conn(conn); + write_unlock_bh(&lgr->conns_lock); schedule_work(&conn->close_work); + write_lock_bh(&lgr->conns_lock); sock_put(&smc->sk); node = rb_first(&lgr->conns_all); } write_unlock_bh(&lgr->conns_lock); + wake_up(&lgr->lnk[SMC_SINGLE_LINK].wr_reg_wait); } /* Determine vlan of internal TCP socket. diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c index d2d01cf702244..427b91c1c964f 100644 --- a/net/smc/smc_diag.c +++ b/net/smc/smc_diag.c @@ -86,7 +86,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns)) goto errout; - if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && smc->conn.lgr) { + if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && + smc->conn.alert_token_local) { struct smc_connection *conn = &smc->conn; struct smc_diag_conninfo cinfo = { .token = conn->alert_token_local, @@ -124,7 +125,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb, goto errout; } - if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr) { + if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr && + !list_empty(&smc->conn.lgr->list)) { struct smc_diag_lgrinfo linfo = { .role = smc->conn.lgr->role, .lnk[0].ibport = smc->conn.lgr->lnk[0].ibport, diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index fea6482233a61..838bce20c3610 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -248,8 +248,10 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset, peer_rmbe_offset; rdma_wr.rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey; rc = ib_post_send(link->roce_qp, &rdma_wr.wr, &failed_wr); - if (rc) + if (rc) { conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; + smc_lgr_terminate(lgr); + } return rc; } @@ -406,8 +408,9 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn) goto out_unlock; } rc = 0; - schedule_delayed_work(&conn->tx_work, - SMC_TX_WORK_DELAY); + if (conn->alert_token_local) /* connection healthy */ + schedule_delayed_work(&conn->tx_work, + SMC_TX_WORK_DELAY); } goto out_unlock; } @@ -438,10 +441,17 @@ static void smc_tx_work(struct work_struct *work) int rc; lock_sock(&smc->sk); + if (smc->sk.sk_err || + !conn->alert_token_local || + conn->local_rx_ctrl.conn_state_flags.peer_conn_abort) + goto out; + rc = smc_tx_sndbuf_nonempty(conn); if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked && !atomic_read(&conn->bytes_to_rcv)) conn->local_rx_ctrl.prod_flags.write_blocked = 0; + +out: release_sock(&smc->sk); } @@ -462,7 +472,8 @@ void smc_tx_consumer_update(struct smc_connection *conn) ((to_confirm > conn->rmbe_update_limit) && ((to_confirm > (conn->rmbe_size / 2)) || conn->local_rx_ctrl.prod_flags.write_blocked))) { - if (smc_cdc_get_slot_and_msg_send(conn) < 0) { + if ((smc_cdc_get_slot_and_msg_send(conn) < 0) && + conn->alert_token_local) { /* connection healthy */ schedule_delayed_work(&conn->tx_work, SMC_TX_WORK_DELAY); return; diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 5ed94109d1d6f..1b8af23e6e2b9 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -174,9 +174,9 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, struct smc_wr_tx_pend_priv **wr_pend_priv) { struct smc_wr_tx_pend *wr_pend; + u32 idx = link->wr_tx_cnt; struct ib_send_wr *wr_ib; u64 wr_id; - u32 idx; int rc; *wr_buf = NULL; @@ -186,16 +186,17 @@ int smc_wr_tx_get_free_slot(struct smc_link *link, if (rc) return rc; } else { + struct smc_link_group *lgr; + + lgr = container_of(link, struct smc_link_group, + lnk[SMC_SINGLE_LINK]); rc = wait_event_timeout( link->wr_tx_wait, + list_empty(&lgr->list) || /* lgr terminated */ (smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY), SMC_WR_TX_WAIT_FREE_SLOT_TIME); if (!rc) { /* timeout - terminate connections */ - struct smc_link_group *lgr; - - lgr = container_of(link, struct smc_link_group, - lnk[SMC_SINGLE_LINK]); smc_lgr_terminate(lgr); return -EPIPE; } @@ -248,8 +249,14 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv) pend = container_of(priv, struct smc_wr_tx_pend, priv); rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], &failed_wr); - if (rc) + if (rc) { + struct smc_link_group *lgr = + container_of(link, struct smc_link_group, + lnk[SMC_SINGLE_LINK]); + smc_wr_tx_put_slot(link, priv); + smc_lgr_terminate(lgr); + } return rc; }