Skip to content

Commit

Permalink
dlm: fix race while closing connections
Browse files Browse the repository at this point in the history
When a connection have issues DLM may need to close it.  Therefore we
should also cancel pending workqueues for such connection at that time,
and not just when dlm is not willing to use this connection anymore.

Also, if we don't clear CF_CONNECT_PENDING flag, the error handling
routines won't be able to re-connect as lowcomms_connect_sock() will
check for it.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David Teigland <teigland@redhat.com>
  • Loading branch information
Marcelo Ricardo Leitner authored and David Teigland committed Aug 17, 2015
1 parent 28926a0 commit 0d737a8
Showing 1 changed file with 15 additions and 14 deletions.
29 changes: 15 additions & 14 deletions fs/dlm/lowcomms.c
Original file line number Diff line number Diff line change
Expand Up @@ -514,17 +514,24 @@ static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
}

/* Close a remote connection and tidy up */
static void close_connection(struct connection *con, bool and_other)
static void close_connection(struct connection *con, bool and_other,
bool tx, bool rx)
{
mutex_lock(&con->sock_mutex);
clear_bit(CF_CONNECT_PENDING, &con->flags);
clear_bit(CF_WRITE_PENDING, &con->flags);
if (tx && cancel_work_sync(&con->swork))
log_print("canceled swork for node %d", con->nodeid);
if (rx && cancel_work_sync(&con->rwork))
log_print("canceled rwork for node %d", con->nodeid);

mutex_lock(&con->sock_mutex);
if (con->sock) {
sock_release(con->sock);
con->sock = NULL;
}
if (con->othercon && and_other) {
/* Will only re-enter once. */
close_connection(con->othercon, false);
close_connection(con->othercon, false, true, true);
}
if (con->rx_page) {
__free_page(con->rx_page);
Expand Down Expand Up @@ -902,7 +909,7 @@ static int receive_from_sock(struct connection *con)
out_close:
mutex_unlock(&con->sock_mutex);
if (ret != -EAGAIN) {
close_connection(con, false);
close_connection(con, false, true, false);
/* Reconnect when there is something to send */
}
/* Don't return success if we really got EOF */
Expand Down Expand Up @@ -1622,7 +1629,7 @@ static void send_to_sock(struct connection *con)

send_error:
mutex_unlock(&con->sock_mutex);
close_connection(con, false);
close_connection(con, false, false, true);
lowcomms_connect_sock(con);
return;

Expand Down Expand Up @@ -1654,15 +1661,9 @@ int dlm_lowcomms_close(int nodeid)
log_print("closing connection to node %d", nodeid);
con = nodeid2con(nodeid, 0);
if (con) {
clear_bit(CF_CONNECT_PENDING, &con->flags);
clear_bit(CF_WRITE_PENDING, &con->flags);
set_bit(CF_CLOSE, &con->flags);
if (cancel_work_sync(&con->swork))
log_print("canceled swork for node %d", nodeid);
if (cancel_work_sync(&con->rwork))
log_print("canceled rwork for node %d", nodeid);
close_connection(con, true, true, true);
clean_one_writequeue(con);
close_connection(con, true);
}

spin_lock(&dlm_node_addrs_spin);
Expand Down Expand Up @@ -1745,7 +1746,7 @@ static void stop_conn(struct connection *con)

static void free_conn(struct connection *con)
{
close_connection(con, true);
close_connection(con, true, true, true);
if (con->othercon)
kmem_cache_free(con_cache, con->othercon);
hlist_del(&con->list);
Expand Down Expand Up @@ -1816,7 +1817,7 @@ int dlm_lowcomms_start(void)
dlm_allow_conn = 0;
con = nodeid2con(0,0);
if (con) {
close_connection(con, false);
close_connection(con, false, true, true);
kmem_cache_free(con_cache, con);
}
fail_destroy:
Expand Down

0 comments on commit 0d737a8

Please sign in to comment.