From 031fd3aa20fcf6d1862ea7814ee8b2caf36c0d78 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 6 Feb 2008 11:34:10 -0500
Subject: [PATCH 1/6] NLM: set RPC_CLNT_CREATE_NOPING for NLM RPC clients

It's currently possible for an unresponsive NLM client to completely
lock up a server's lockd. The scenario is something like this:

1) client1 (or a process on the server) takes a lock on a file
2) client2 tries to take a blocking lock on the same file and
   awaits the callback
3) client2 goes unresponsive (plug pulled, network partition, etc)
4) client1 releases the lock

...at that point the server's lockd will try to queue up a GRANT_MSG
callback for client2, but first it requeues the block with a timeout of
30s. nlm_async_call will attempt to bind the RPC client to client2 and
will call rpc_ping. rpc_ping entails a sync RPC call and if client2 is
unresponsive it will take around 60s for that to time out. Once it times
out, it's already time to retry the block and the whole process repeats.

Once in this situation, nlmsvc_retry_blocked will never return until
the host starts responding again. lockd won't service new calls.

Fix this by skipping the RPC ping on NLM RPC clients. This makes
nlm_async_call return quickly when called.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/lockd/host.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index ca6b16fc3101a..00063ee0b55c6 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -244,6 +244,7 @@ nlm_bind_host(struct nlm_host *host)
 			.version	= host->h_version,
 			.authflavor	= RPC_AUTH_UNIX,
 			.flags		= (RPC_CLNT_CREATE_HARDRTRY |
+					   RPC_CLNT_CREATE_NOPING |
 					   RPC_CLNT_CREATE_AUTOBIND),
 		};
 

From 90bd17c87821fe0e055e0f9a7446c2875f31eb4c Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 6 Feb 2008 11:34:11 -0500
Subject: [PATCH 2/6] NLM: have server-side RPC clients default to soft RPC
 tasks

Now that it no longer does an RPC ping, lockd always ends up queueing
an RPC task for the GRANT_MSG callback. But, it also requeues the block
for later attempts. Since these are hard RPC tasks, if the client we're
calling back goes unresponsive the GRANT_MSG callbacks can stack up in
the RPC queue.

Fix this by making server-side RPC clients default to soft RPC tasks.
lockd requeues the block anyway, so this should be OK.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/lockd/host.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 00063ee0b55c6..f1ef49fff118e 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -243,11 +243,18 @@ nlm_bind_host(struct nlm_host *host)
 			.program	= &nlm_program,
 			.version	= host->h_version,
 			.authflavor	= RPC_AUTH_UNIX,
-			.flags		= (RPC_CLNT_CREATE_HARDRTRY |
-					   RPC_CLNT_CREATE_NOPING |
+			.flags		= (RPC_CLNT_CREATE_NOPING |
 					   RPC_CLNT_CREATE_AUTOBIND),
 		};
 
+		/*
+		 * lockd retries server side blocks automatically so we want
+		 * those to be soft RPC calls. Client side calls need to be
+		 * hard RPC tasks.
+		 */
+		if (!host->h_server)
+			args.flags |= RPC_CLNT_CREATE_HARDRTRY;
+
 		clnt = rpc_create(&args);
 		if (!IS_ERR(clnt))
 			host->h_rpcclnt = clnt;

From 9706501e43a80ce48b319214a0a9e562deded35b Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 6 Feb 2008 11:34:12 -0500
Subject: [PATCH 3/6] NLM: don't reattempt GRANT_MSG when there is already an
 RPC in flight

With the current scheme in nlmsvc_grant_blocked, we can end up with more
than one GRANT_MSG callback for a block in flight. Right now, we requeue
the block unconditionally so that a GRANT_MSG callback is done again in
30s. If the client is unresponsive, it can take more than 30s for the
call already in flight to time out.

There's no benefit to having more than one GRANT_MSG RPC queued up at a
time, so put it on the list with a timeout of NLM_NEVER before doing the
RPC call. If the RPC call submission fails, we requeue it with a short
timeout. If it works, then nlmsvc_grant_callback will end up requeueing
it with a shorter timeout after it completes.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/lockd/svclock.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 2f4d8fa666892..82db7b323b83c 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -763,11 +763,20 @@ nlmsvc_grant_blocked(struct nlm_block *block)
 	dprintk("lockd: GRANTing blocked lock.\n");
 	block->b_granted = 1;
 
-	/* Schedule next grant callback in 30 seconds */
-	nlmsvc_insert_block(block, 30 * HZ);
+	/* keep block on the list, but don't reattempt until the RPC
+	 * completes or the submission fails
+	 */
+	nlmsvc_insert_block(block, NLM_NEVER);
 
-	/* Call the client */
-	nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG, &nlmsvc_grant_ops);
+	/* Call the client -- use a soft RPC task since nlmsvc_retry_blocked
+	 * will queue up a new one if this one times out
+	 */
+	error = nlm_async_call(block->b_call, NLMPROC_GRANTED_MSG,
+				&nlmsvc_grant_ops);
+
+	/* RPC submission failed, wait a bit and retry */
+	if (error < 0)
+		nlmsvc_insert_block(block, 10 * HZ);
 }
 
 /*

From c64e80d55db81df22a7f25b75ab4ba4c55db4749 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Wed, 6 Feb 2008 11:34:13 -0500
Subject: [PATCH 4/6] NLM: don't requeue block if it was invalidated while
 GRANT_MSG was in flight

It's possible for lockd to catch a SIGKILL while a GRANT_MSG callback
is in flight. If this happens we don't want lockd to insert the block
back into the nlm_blocked list.

This helps that situation, but there's still a possible race. Fixing
that will mean adding real locking for nlm_blocked.

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 fs/lockd/svclock.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 82db7b323b83c..fe9bdb4a220cd 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -795,6 +795,17 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
 
 	dprintk("lockd: GRANT_MSG RPC callback\n");
 
+	/* if the block is not on a list at this point then it has
+	 * been invalidated. Don't try to requeue it.
+	 *
+	 * FIXME: it's possible that the block is removed from the list
+	 * after this check but before the nlmsvc_insert_block. In that
+	 * case it will be added back. Perhaps we need better locking
+	 * for nlm_blocked?
+	 */
+	if (list_empty(&block->b_list))
+		return;
+
 	/* Technically, we should down the file semaphore here. Since we
 	 * move the block towards the head of the queue only, no harm
 	 * can be done, though. */

From fbb7878c1a2ee40a1e983bf20f3dd3a80255dcf2 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Thu, 7 Feb 2008 23:10:21 -0500
Subject: [PATCH 5/6] nfsd: clean up svc_reserve_auth()

This is a void function attempting to return the return value from
another void function, which seems harmless but extremely weird, and
apparently makes some compilers complain.

While we're there, clean up a little (e.g. the switch statement had a
minor style problem and seemed overkill as long as there's only one
case).

Thanks to Trond for noticing this.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Cc: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/svc.h | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 64c771056187a..64c97552964a4 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -409,16 +409,13 @@ char *		   svc_print_addr(struct svc_rqst *, char *, size_t);
  * for all cases without actually generating the checksum, so we just use a
  * static value.
  */
-static inline void
-svc_reserve_auth(struct svc_rqst *rqstp, int space)
+static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space)
 {
-	int			added_space = 0;
+	int added_space = 0;
 
-	switch(rqstp->rq_authop->flavour) {
-		case RPC_AUTH_GSS:
-			added_space = RPC_MAX_AUTH_SIZE;
-	}
-	return svc_reserve(rqstp, space + added_space);
+	if (rqstp->rq_authop->flavour)
+		added_space = RPC_MAX_AUTH_SIZE;
+	svc_reserve(rqstp, space + added_space);
 }
 
 #endif /* SUNRPC_SVC_H */

From bb50c8012cbd85b8e105584b32e4d5a2d335dcef Mon Sep 17 00:00:00 2001
From: Roland Dreier <rdreier@cisco.com>
Date: Fri, 8 Feb 2008 16:02:04 -0800
Subject: [PATCH 6/6] SUNPRC: Fix printk format warning

net/sunrpc/xprtrdma/svc_rdma_sendto.c:160: warning: format '%llx'
expects type 'long long unsigned int', but argument 3 has type 'u64'

Signed-off-by: Roland Dreier <rolandd@cisco.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 net/sunrpc/xprtrdma/svc_rdma_sendto.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 3e321949e1dc5..0598b229c11d3 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -159,7 +159,8 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
 	BUG_ON(sge_count >= 32);
 	dprintk("svcrdma: RDMA_WRITE rmr=%x, to=%llx, xdr_off=%d, "
 		"write_len=%d, xdr_sge=%p, sge_count=%d\n",
-		rmr, to, xdr_off, write_len, xdr_sge, sge_count);
+		rmr, (unsigned long long)to, xdr_off,
+		write_len, xdr_sge, sge_count);
 
 	ctxt = svc_rdma_get_context(xprt);
 	ctxt->count = 0;