Skip to content

Commit

Permalink
[CIFS] cifs: reconnect unresponsive servers
Browse files Browse the repository at this point in the history
If the server isn't responding to echoes, we don't want to leave tasks
hung waiting for it to reply. At that point, we'll want to reconnect
so that soft mounts can return an error to userspace quickly.

If the client hasn't received a reply after a specified number of echo
intervals, assume that the transport is down and attempt to reconnect
the socket.

The number of echo_intervals to wait before attempting to reconnect is
tunable via a module parameter. Setting it to 0, means that the client
will never attempt to reconnect. The default is 5.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
  • Loading branch information
Steve French committed Jan 20, 2011
1 parent c74093b commit fda3594
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 5 deletions.
6 changes: 5 additions & 1 deletion fs/cifs/cifsfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,11 @@ unsigned int cifs_max_pending = CIFS_MAX_REQ;
module_param(cifs_max_pending, int, 0);
MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server. "
"Default: 50 Range: 2 to 256");

unsigned short echo_retries = 5;
module_param(echo_retries, ushort, 0644);
MODULE_PARM_DESC(echo_retries, "Number of echo attempts before giving up and "
"reconnecting server. Default: 5. 0 means "
"never reconnect.");
extern mempool_t *cifs_sm_req_poolp;
extern mempool_t *cifs_req_poolp;
extern mempool_t *cifs_mid_poolp;
Expand Down
3 changes: 3 additions & 0 deletions fs/cifs/cifsglob.h
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,9 @@ GLOBAL_EXTERN unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */
GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */
GLOBAL_EXTERN unsigned int cifs_max_pending; /* MAX requests at once to server*/

/* reconnect after this many failed echo attempts */
GLOBAL_EXTERN unsigned short echo_retries;

void cifs_oplock_break(struct work_struct *work);
void cifs_oplock_break_get(struct cifsFileInfo *cfile);
void cifs_oplock_break_put(struct cifsFileInfo *cfile);
Expand Down
21 changes: 17 additions & 4 deletions fs/cifs/connect.c
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
kfree(server->session_key.response);
server->session_key.response = NULL;
server->session_key.len = 0;
server->lstrp = jiffies;
mutex_unlock(&server->srv_mutex);

/* mark submitted MIDs for retry and issue callback */
Expand Down Expand Up @@ -420,7 +421,20 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
smb_msg.msg_control = NULL;
smb_msg.msg_controllen = 0;
pdu_length = 4; /* enough to get RFC1001 header */

incomplete_rcv:
if (echo_retries > 0 &&
time_after(jiffies, server->lstrp +
(echo_retries * SMB_ECHO_INTERVAL))) {
cERROR(1, "Server %s has not responded in %d seconds. "
"Reconnecting...", server->hostname,
(echo_retries * SMB_ECHO_INTERVAL / HZ));
cifs_reconnect(server);
csocket = server->ssocket;
wake_up(&server->response_q);
continue;
}

length =
kernel_recvmsg(csocket, &smb_msg,
&iov, 1, pdu_length, 0 /* BB other flags? */);
Expand Down Expand Up @@ -581,6 +595,8 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
}

mid_entry = NULL;
server->lstrp = jiffies;

spin_lock(&GlobalMid_Lock);
list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
Expand Down Expand Up @@ -629,10 +645,6 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server)
#ifdef CONFIG_CIFS_STATS2
mid_entry->when_received = jiffies;
#endif
/* so we do not time out requests to server
which is still responding (since server could
be busy but not dead) */
server->lstrp = jiffies;
break;
}
mid_entry = NULL;
Expand Down Expand Up @@ -1685,6 +1697,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
volume_info->target_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL);
tcp_ses->session_estab = false;
tcp_ses->sequence_number = 0;
tcp_ses->lstrp = jiffies;
INIT_LIST_HEAD(&tcp_ses->tcp_ses_list);
INIT_LIST_HEAD(&tcp_ses->smb_ses_list);
INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request);
Expand Down

0 comments on commit fda3594

Please sign in to comment.