Skip to content

Commit

Permalink
net/smc: check for healthy link group resp. connections
Browse files Browse the repository at this point in the history
If a problem for at least one connection of a link group is detected,
the whole link group and all its connections are terminated.
This patch adds a check for healthy link group when trying to reserve
a work request, and checks for healthy connections before starting
a tx worker.

Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Ursula Braun authored and David S. Miller committed Jan 25, 2018
1 parent 732720f commit 1a0a04c
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 12 deletions.
9 changes: 7 additions & 2 deletions net/smc/smc_cdc.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,14 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
struct smc_cdc_tx_pend **pend)
{
struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
int rc;

return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
(struct smc_wr_tx_pend_priv **)pend);
rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
(struct smc_wr_tx_pend_priv **)pend);
if (!conn->alert_token_local)
/* abnormal termination */
rc = -EPIPE;
return rc;
}

static inline void smc_cdc_add_pending_send(struct smc_connection *conn,
Expand Down
6 changes: 4 additions & 2 deletions net/smc/smc_diag.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns))
goto errout;

if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) && smc->conn.lgr) {
if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) &&
smc->conn.alert_token_local) {
struct smc_connection *conn = &smc->conn;
struct smc_diag_conninfo cinfo = {
.token = conn->alert_token_local,
Expand Down Expand Up @@ -124,7 +125,8 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
goto errout;
}

if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr) {
if ((req->diag_ext & (1 << (SMC_DIAG_LGRINFO - 1))) && smc->conn.lgr &&
!list_empty(&smc->conn.lgr->list)) {
struct smc_diag_lgrinfo linfo = {
.role = smc->conn.lgr->role,
.lnk[0].ibport = smc->conn.lgr->lnk[0].ibport,
Expand Down
15 changes: 12 additions & 3 deletions net/smc/smc_tx.c
Original file line number Diff line number Diff line change
Expand Up @@ -408,8 +408,9 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn)
goto out_unlock;
}
rc = 0;
schedule_delayed_work(&conn->tx_work,
SMC_TX_WORK_DELAY);
if (conn->alert_token_local) /* connection healthy */
schedule_delayed_work(&conn->tx_work,
SMC_TX_WORK_DELAY);
}
goto out_unlock;
}
Expand Down Expand Up @@ -440,10 +441,17 @@ static void smc_tx_work(struct work_struct *work)
int rc;

lock_sock(&smc->sk);
if (smc->sk.sk_err ||
!conn->alert_token_local ||
conn->local_rx_ctrl.conn_state_flags.peer_conn_abort)
goto out;

rc = smc_tx_sndbuf_nonempty(conn);
if (!rc && conn->local_rx_ctrl.prod_flags.write_blocked &&
!atomic_read(&conn->bytes_to_rcv))
conn->local_rx_ctrl.prod_flags.write_blocked = 0;

out:
release_sock(&smc->sk);
}

Expand All @@ -464,7 +472,8 @@ void smc_tx_consumer_update(struct smc_connection *conn)
((to_confirm > conn->rmbe_update_limit) &&
((to_confirm > (conn->rmbe_size / 2)) ||
conn->local_rx_ctrl.prod_flags.write_blocked))) {
if (smc_cdc_get_slot_and_msg_send(conn) < 0) {
if ((smc_cdc_get_slot_and_msg_send(conn) < 0) &&
conn->alert_token_local) { /* connection healthy */
schedule_delayed_work(&conn->tx_work,
SMC_TX_WORK_DELAY);
return;
Expand Down
11 changes: 6 additions & 5 deletions net/smc/smc_wr.c
Original file line number Diff line number Diff line change
Expand Up @@ -174,9 +174,9 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
struct smc_wr_tx_pend_priv **wr_pend_priv)
{
struct smc_wr_tx_pend *wr_pend;
u32 idx = link->wr_tx_cnt;
struct ib_send_wr *wr_ib;
u64 wr_id;
u32 idx;
int rc;

*wr_buf = NULL;
Expand All @@ -186,16 +186,17 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
if (rc)
return rc;
} else {
struct smc_link_group *lgr;

lgr = container_of(link, struct smc_link_group,
lnk[SMC_SINGLE_LINK]);
rc = wait_event_timeout(
link->wr_tx_wait,
list_empty(&lgr->list) || /* lgr terminated */
(smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
SMC_WR_TX_WAIT_FREE_SLOT_TIME);
if (!rc) {
/* timeout - terminate connections */
struct smc_link_group *lgr;

lgr = container_of(link, struct smc_link_group,
lnk[SMC_SINGLE_LINK]);
smc_lgr_terminate(lgr);
return -EPIPE;
}
Expand Down

0 comments on commit 1a0a04c

Please sign in to comment.