Skip to content

Commit

Permalink
NLM: Fix reclaim races
Browse files Browse the repository at this point in the history
Currently it is possible for a task to remove its locks at the same time as
the NLM recovery thread is trying to recover them. This quickly leads to an
Oops.
Protect the locks using an rw semaphore while they are being recovered.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
  • Loading branch information
Trond Myklebust authored and Trond Myklebust committed Jun 9, 2006
1 parent 5046791 commit 28df955
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 15 deletions.
39 changes: 25 additions & 14 deletions fs/lockd/clntlock.c
Original file line number Diff line number Diff line change
Expand Up @@ -147,11 +147,10 @@ u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
* Someone has sent us an SM_NOTIFY. Ensure we bind to the new port number,
* that we mark locks for reclaiming, and that we bump the pseudo NSM state.
*/
static inline
void nlmclnt_prepare_reclaim(struct nlm_host *host, u32 newstate)
static void nlmclnt_prepare_reclaim(struct nlm_host *host)
{
down_write(&host->h_rwsem);
host->h_monitored = 0;
host->h_nsmstate = newstate;
host->h_state++;
host->h_nextrebind = 0;
nlm_rebind_host(host);
Expand All @@ -164,19 +163,24 @@ void nlmclnt_prepare_reclaim(struct nlm_host *host, u32 newstate)
dprintk("NLM: reclaiming locks for host %s", host->h_name);
}

static void nlmclnt_finish_reclaim(struct nlm_host *host)
{
host->h_reclaiming = 0;
up_write(&host->h_rwsem);
dprintk("NLM: done reclaiming locks for host %s", host->h_name);
}

/*
* Reclaim all locks on server host. We do this by spawning a separate
* reclaimer thread.
*/
void
nlmclnt_recovery(struct nlm_host *host, u32 newstate)
{
if (host->h_reclaiming++) {
if (host->h_nsmstate == newstate)
return;
nlmclnt_prepare_reclaim(host, newstate);
} else {
nlmclnt_prepare_reclaim(host, newstate);
if (host->h_nsmstate == newstate)
return;
host->h_nsmstate = newstate;
if (!host->h_reclaiming++) {
nlm_get_host(host);
__module_get(THIS_MODULE);
if (kernel_thread(reclaimer, host, CLONE_KERNEL) < 0)
Expand All @@ -190,6 +194,7 @@ reclaimer(void *ptr)
struct nlm_host *host = (struct nlm_host *) ptr;
struct nlm_wait *block;
struct file_lock *fl, *next;
u32 nsmstate;

daemonize("%s-reclaim", host->h_name);
allow_signal(SIGKILL);
Expand All @@ -199,19 +204,25 @@ reclaimer(void *ptr)
lock_kernel();
lockd_up();

nlmclnt_prepare_reclaim(host);
/* First, reclaim all locks that have been marked. */
restart:
nsmstate = host->h_nsmstate;
list_for_each_entry_safe(fl, next, &host->h_reclaim, fl_u.nfs_fl.list) {
list_del_init(&fl->fl_u.nfs_fl.list);

if (signalled())
continue;
if (nlmclnt_reclaim(host, fl) == 0)
list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted);
goto restart;
if (nlmclnt_reclaim(host, fl) != 0)
continue;
list_add_tail(&fl->fl_u.nfs_fl.list, &host->h_granted);
if (host->h_nsmstate != nsmstate) {
/* Argh! The server rebooted again! */
list_splice_init(&host->h_granted, &host->h_reclaim);
goto restart;
}
}

host->h_reclaiming = 0;
nlmclnt_finish_reclaim(host);

/* Now, wake up all processes that sleep on a blocked lock */
list_for_each_entry(block, &nlm_blocked, b_list) {
Expand Down
14 changes: 13 additions & 1 deletion fs/lockd/clntproc.c
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,10 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
}

block = nlmclnt_prepare_block(host, fl);
again:
for(;;) {
/* Reboot protection */
fl->fl_u.nfs_fl.state = host->h_state;
status = nlmclnt_call(req, NLMPROC_LOCK);
if (status < 0)
goto out_unblock;
Expand All @@ -531,10 +534,16 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
}

if (resp->status == NLM_LCK_GRANTED) {
fl->fl_u.nfs_fl.state = host->h_state;
down_read(&host->h_rwsem);
/* Check whether or not the server has rebooted */
if (fl->fl_u.nfs_fl.state != host->h_state) {
up_read(&host->h_rwsem);
goto again;
}
fl->fl_flags |= FL_SLEEP;
/* Ensure the resulting lock will get added to granted list */
do_vfs_lock(fl);
up_read(&host->h_rwsem);
}
status = nlm_stat_to_errno(resp->status);
out_unblock:
Expand Down Expand Up @@ -596,6 +605,7 @@ nlmclnt_reclaim(struct nlm_host *host, struct file_lock *fl)
static int
nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
{
struct nlm_host *host = req->a_host;
struct nlm_res *resp = &req->a_res;
int status;

Expand All @@ -604,7 +614,9 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
* request, or to deny it with NLM_LCK_DENIED_GRACE_PERIOD. In either
* case, we want to unlock.
*/
down_read(&host->h_rwsem);
do_vfs_lock(fl);
up_read(&host->h_rwsem);

if (req->a_flags & RPC_TASK_ASYNC)
return nlm_async_call(req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
Expand Down
1 change: 1 addition & 0 deletions fs/lockd/host.c
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ nlm_lookup_host(int server, struct sockaddr_in *sin,
host->h_expires = jiffies + NLM_HOST_EXPIRE;
atomic_set(&host->h_count, 1);
init_waitqueue_head(&host->h_gracewait);
init_rwsem(&host->h_rwsem);
host->h_state = 0; /* pseudo NSM state */
host->h_nsmstate = 0; /* real NSM state */
host->h_server = server;
Expand Down
1 change: 1 addition & 0 deletions include/linux/lockd/lockd.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ struct nlm_host {
h_killed : 1,
h_monitored : 1;
wait_queue_head_t h_gracewait; /* wait while reclaiming */
struct rw_semaphore h_rwsem; /* Reboot recovery lock */
u32 h_state; /* pseudo-state counter */
u32 h_nsmstate; /* true remote NSM state */
u32 h_pidcount; /* Pseudopids */
Expand Down

0 comments on commit 28df955

Please sign in to comment.