From fccca7fc6aab4e6b519e2d606ef34632e4f50e33 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 26 Jan 2008 17:37:47 -0500
Subject: [PATCH 001/118] NFS: Fix a sillyrename race...

Ensure that readdir revalidates its data cache after blocking on
sillyrename.

Also fix a typo in nfs_do_call_unlink(): swap the ^= for an |=. The result
is the same, since we've already checked that the flag is unset, but it
makes the code more readable.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c    | 11 +++++------
 fs/nfs/unlink.c |  2 +-
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index f697b5c74b7c3..d9abdb1d6a2a6 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -537,12 +537,6 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 
 	lock_kernel();
 
-	res = nfs_revalidate_mapping_nolock(inode, filp->f_mapping);
-	if (res < 0) {
-		unlock_kernel();
-		return res;
-	}
-
 	/*
 	 * filp->f_pos points to the dirent entry number.
 	 * *desc->dir_cookie has the cookie for the next entry. We have
@@ -564,6 +558,10 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	desc->entry = &my_entry;
 
 	nfs_block_sillyrename(dentry);
+	res = nfs_revalidate_mapping_nolock(inode, filp->f_mapping);
+	if (res < 0)
+		goto out;
+
 	while(!desc->entry->eof) {
 		res = readdir_search_pagecache(desc);
 
@@ -594,6 +592,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 			break;
 		}
 	}
+out:
 	nfs_unblock_sillyrename(dentry);
 	unlock_kernel();
 	if (res > 0)
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 233ad38161f92..c5fa6d8001f18 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -138,7 +138,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
 		spin_lock(&alias->d_lock);
 		if (!(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
 			alias->d_fsdata = data;
-			alias->d_flags ^= DCACHE_NFSFS_RENAMED;
+			alias->d_flags |= DCACHE_NFSFS_RENAMED;
 			ret = 1;
 		}
 		spin_unlock(&alias->d_lock);

From 609005c319bc6062b95ed82e132884ed7e22cdb9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 28 Jan 2008 19:42:59 -0500
Subject: [PATCH 002/118] NFS: Sillyrename: in the case of a race, check
 aliases are really positive

In nfs_do_call_unlink() we check that we haven't raced, and that lookup()
hasn't created an aliased dentry to our sillydeleted dentry. If somebody
has deleted the file on the server and the lookup() resulted in a negative
dentry, then ignore...

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/unlink.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index c5fa6d8001f18..431981d0265f9 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -130,13 +130,15 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
 	alias = d_lookup(parent, &data->args.name);
 	if (alias != NULL) {
 		int ret = 0;
+
 		/*
 		 * Hey, we raced with lookup... See if we need to transfer
 		 * the sillyrename information to the aliased dentry.
 		 */
 		nfs_free_dname(data);
 		spin_lock(&alias->d_lock);
-		if (!(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
+		if (alias->d_inode != NULL &&
+		    !(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
 			alias->d_fsdata = data;
 			alias->d_flags |= DCACHE_NFSFS_RENAMED;
 			ret = 1;

From d45b9d8baf41acb177abbbe6746b1dea094b8a28 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 28 Jan 2008 19:43:18 -0500
Subject: [PATCH 003/118] NFS: Handle -ENOENT errors in
 unlink()/rmdir()/rename()

If the server returns an ENOENT error, we still need to do a d_delete() in
order to ensure that the dentry is deleted.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index d9abdb1d6a2a6..06f26d40b4fe4 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1267,6 +1267,12 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	return error;
 }
 
+static void nfs_dentry_handle_enoent(struct dentry *dentry)
+{
+	if (dentry->d_inode != NULL && !d_unhashed(dentry))
+		d_delete(dentry);
+}
+
 static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
 {
 	int error;
@@ -1279,6 +1285,8 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
 	/* Ensure the VFS deletes this inode */
 	if (error == 0 && dentry->d_inode != NULL)
 		clear_nlink(dentry->d_inode);
+	else if (error == -ENOENT)
+		nfs_dentry_handle_enoent(dentry);
 	unlock_kernel();
 
 	return error;
@@ -1385,6 +1393,8 @@ static int nfs_safe_remove(struct dentry *dentry)
 		nfs_mark_for_revalidate(inode);
 	} else
 		error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
+	if (error == -ENOENT)
+		nfs_dentry_handle_enoent(dentry);
 out:
 	return error;
 }
@@ -1421,7 +1431,7 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
 	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
 	error = nfs_safe_remove(dentry);
-	if (!error) {
+	if (!error || error == -ENOENT) {
 		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
 	} else if (need_rehash)
 		d_rehash(dentry);
@@ -1634,7 +1644,8 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		d_move(old_dentry, new_dentry);
 		nfs_set_verifier(new_dentry,
 					nfs_save_change_attribute(new_dir));
-	}
+	} else if (error == -ENOENT)
+		nfs_dentry_handle_enoent(old_dentry);
 
 	/* new dentry created? */
 	if (dentry)

From 77f111929d024165e736e919187cff017279bebe Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 28 Jan 2008 19:43:19 -0500
Subject: [PATCH 004/118] NFS: Ensure that we eject stale inodes as soon as
 possible

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 06f26d40b4fe4..32c666c612a19 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -826,6 +826,10 @@ static int nfs_dentry_delete(struct dentry *dentry)
 		dentry->d_parent->d_name.name, dentry->d_name.name,
 		dentry->d_flags);
 
+	/* Unhash any dentry with a stale inode */
+	if (dentry->d_inode != NULL && NFS_STALE(dentry->d_inode))
+		return 1;
+
 	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
 		/* Unhash it, so that ->d_iput() would be called */
 		return 1;

From 8b1f9ee56e21e505a3d5d3e33f823006d1abdbaf Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 22 Jan 2008 17:13:06 -0500
Subject: [PATCH 005/118] NFS: Optimise nfs_vm_page_mkwrite()

The current model locks the page twice for no good reason. Optimise by
inlining the parts of nfs_write_begin()/nfs_write_end() that we care about.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/file.c | 36 ++++++++++++++----------------------
 1 file changed, 14 insertions(+), 22 deletions(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index b3bb89f7d5d28..4560fc2ddb4af 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -392,35 +392,27 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
 	struct file *filp = vma->vm_file;
 	unsigned pagelen;
 	int ret = -EINVAL;
-	void *fsdata;
 	struct address_space *mapping;
-	loff_t offset;
 
 	lock_page(page);
 	mapping = page->mapping;
-	if (mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping) {
-		unlock_page(page);
-		return -EINVAL;
-	}
+	if (mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping)
+		goto out_unlock;
+
+	ret = 0;
 	pagelen = nfs_page_length(page);
-	offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
-	unlock_page(page);
+	if (pagelen == 0)
+		goto out_unlock;
 
-	/*
-	 * we can use mapping after releasing the page lock, because:
-	 * we hold mmap_sem on the fault path, which should pin the vma
-	 * which should pin the file, which pins the dentry which should
-	 * hold a reference on inode.
-	 */
+	ret = nfs_flush_incompatible(filp, page);
+	if (ret != 0)
+		goto out_unlock;
 
-	if (pagelen) {
-		struct page *page2 = NULL;
-		ret = nfs_write_begin(filp, mapping, offset, pagelen,
-			       	0, &page2, &fsdata);
-		if (!ret)
-			ret = nfs_write_end(filp, mapping, offset, pagelen,
-				       	pagelen, page2, fsdata);
-	}
+	ret = nfs_updatepage(filp, page, 0, pagelen);
+	if (ret == 0)
+		ret = pagelen;
+out_unlock:
+	unlock_page(page);
 	return ret;
 }
 

From acee478afc6ff7e1b8852d9a4dca1ff36021414d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 22 Jan 2008 17:13:07 -0500
Subject: [PATCH 006/118] NFS: Clean up the write request locking.

Ensure that we set/clear NFS_PAGE_TAG_LOCKED when the nfs_page is hashed.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/pagelist.c        | 13 ++++++++-----
 fs/nfs/write.c           | 16 +++++++---------
 include/linux/nfs_page.h | 13 +------------
 3 files changed, 16 insertions(+), 26 deletions(-)

diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 345bb9b4765b6..3b3dbb94393de 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -111,13 +111,14 @@ void nfs_unlock_request(struct nfs_page *req)
  * nfs_set_page_tag_locked - Tag a request as locked
  * @req:
  */
-static int nfs_set_page_tag_locked(struct nfs_page *req)
+int nfs_set_page_tag_locked(struct nfs_page *req)
 {
 	struct nfs_inode *nfsi = NFS_I(req->wb_context->path.dentry->d_inode);
 
-	if (!nfs_lock_request(req))
+	if (!nfs_lock_request_dontget(req))
 		return 0;
-	radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
+	if (req->wb_page != NULL)
+		radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
 	return 1;
 }
 
@@ -132,9 +133,10 @@ void nfs_clear_page_tag_locked(struct nfs_page *req)
 	if (req->wb_page != NULL) {
 		spin_lock(&inode->i_lock);
 		radix_tree_tag_clear(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
+		nfs_unlock_request(req);
 		spin_unlock(&inode->i_lock);
-	}
-	nfs_unlock_request(req);
+	} else
+		nfs_unlock_request(req);
 }
 
 /**
@@ -421,6 +423,7 @@ int nfs_scan_list(struct nfs_inode *nfsi,
 				goto out;
 			idx_start = req->wb_index + 1;
 			if (nfs_set_page_tag_locked(req)) {
+				kref_get(&req->wb_kref);
 				nfs_list_remove_request(req);
 				radix_tree_tag_clear(&nfsi->nfs_page_tree,
 						req->wb_index, tag);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 51cc1bd6a1166..092e79c6d9623 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -196,7 +196,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
 	}
 	/* Update file length */
 	nfs_grow_file(page, offset, count);
-	nfs_unlock_request(req);
+	nfs_clear_page_tag_locked(req);
 	return 0;
 }
 
@@ -252,7 +252,6 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
 				struct page *page)
 {
 	struct inode *inode = page->mapping->host;
-	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_page *req;
 	int ret;
 
@@ -263,10 +262,10 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
 			spin_unlock(&inode->i_lock);
 			return 0;
 		}
-		if (nfs_lock_request_dontget(req))
+		if (nfs_set_page_tag_locked(req))
 			break;
 		/* Note: If we hold the page lock, as is the case in nfs_writepage,
-		 *	 then the call to nfs_lock_request_dontget() will always
+		 *	 then the call to nfs_set_page_tag_locked() will always
 		 *	 succeed provided that someone hasn't already marked the
 		 *	 request as dirty (in which case we don't care).
 		 */
@@ -280,7 +279,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
 	if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) {
 		/* This request is marked for commit */
 		spin_unlock(&inode->i_lock);
-		nfs_unlock_request(req);
+		nfs_clear_page_tag_locked(req);
 		nfs_pageio_complete(pgio);
 		return 0;
 	}
@@ -288,8 +287,6 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
 		spin_unlock(&inode->i_lock);
 		BUG();
 	}
-	radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
-			NFS_PAGE_TAG_LOCKED);
 	spin_unlock(&inode->i_lock);
 	nfs_pageio_add_request(pgio, req);
 	return 0;
@@ -381,6 +378,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 	set_page_private(req->wb_page, (unsigned long)req);
 	nfsi->npages++;
 	kref_get(&req->wb_kref);
+	radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
 	return 0;
 }
 
@@ -596,7 +594,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 		spin_lock(&inode->i_lock);
 		req = nfs_page_find_request_locked(page);
 		if (req) {
-			if (!nfs_lock_request_dontget(req)) {
+			if (!nfs_set_page_tag_locked(req)) {
 				int error;
 
 				spin_unlock(&inode->i_lock);
@@ -646,7 +644,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 	    || req->wb_page != page
 	    || !nfs_dirty_request(req)
 	    || offset > rqend || end < req->wb_offset) {
-		nfs_unlock_request(req);
+		nfs_clear_page_tag_locked(req);
 		return ERR_PTR(-EBUSY);
 	}
 
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 30dbcc185e697..a1676e19e4917 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -83,6 +83,7 @@ extern	void nfs_pageio_complete(struct nfs_pageio_descriptor *desc);
 extern	void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t);
 extern  int nfs_wait_on_request(struct nfs_page *);
 extern	void nfs_unlock_request(struct nfs_page *req);
+extern	int nfs_set_page_tag_locked(struct nfs_page *req);
 extern  void nfs_clear_page_tag_locked(struct nfs_page *req);
 
 
@@ -95,18 +96,6 @@ nfs_lock_request_dontget(struct nfs_page *req)
 	return !test_and_set_bit(PG_BUSY, &req->wb_flags);
 }
 
-/*
- * Lock the page of an asynchronous request and take a reference
- */
-static inline int
-nfs_lock_request(struct nfs_page *req)
-{
-	if (test_and_set_bit(PG_BUSY, &req->wb_flags))
-		return 0;
-	kref_get(&req->wb_kref);
-	return 1;
-}
-
 /**
  * nfs_list_add_request - Insert a request into a list
  * @req: request

From 2f74c0a05612b9c2014b5b67833dba9b9f523948 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 8 Jan 2008 17:56:07 -0500
Subject: [PATCH 007/118] NFSv4: Clean up the OPEN/CLOSE serialisation code

Reduce the time spent locking the rpc_sequence structure by queuing the
nfs_seqid only when we are ready to take the lock (when calling
nfs_wait_on_sequence).

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c  | 30 +++++++++++-------------------
 fs/nfs/nfs4state.c | 32 ++++++++++++++++----------------
 2 files changed, 27 insertions(+), 35 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9e2e1c7291dbf..a51a7537f3f6e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3331,15 +3331,12 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
 
 	p->arg.fh = NFS_FH(inode);
 	p->arg.fl = &p->fl;
-	if (!(lsp->ls_seqid.flags & NFS_SEQID_CONFIRMED)) {
-		p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid);
-		if (p->arg.open_seqid == NULL)
-			goto out_free;
-
-	}
+	p->arg.open_seqid = nfs_alloc_seqid(&lsp->ls_state->owner->so_seqid);
+	if (p->arg.open_seqid == NULL)
+		goto out_free;
 	p->arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid);
 	if (p->arg.lock_seqid == NULL)
-		goto out_free;
+		goto out_free_seqid;
 	p->arg.lock_stateid = &lsp->ls_stateid;
 	p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
 	p->arg.lock_owner.id = lsp->ls_id.id;
@@ -3348,9 +3345,9 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
 	p->ctx = get_nfs_open_context(ctx);
 	memcpy(&p->fl, fl, sizeof(p->fl));
 	return p;
+out_free_seqid:
+	nfs_free_seqid(p->arg.open_seqid);
 out_free:
-	if (p->arg.open_seqid != NULL)
-		nfs_free_seqid(p->arg.open_seqid);
 	kfree(p);
 	return NULL;
 }
@@ -3368,20 +3365,16 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
 	};
 
 	dprintk("%s: begin!\n", __FUNCTION__);
+	if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0)
+		return;
 	/* Do we need to do an open_to_lock_owner? */
 	if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)) {
 		if (nfs_wait_on_sequence(data->arg.open_seqid, task) != 0)
 			return;
 		data->arg.open_stateid = &state->stateid;
 		data->arg.new_lock_owner = 1;
-		/* Retest in case we raced... */
-		if (!(data->arg.lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED))
-			goto do_rpc;
-	}
-	if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0)
-		return;
-	data->arg.new_lock_owner = 0;
-do_rpc:	
+	} else
+		data->arg.new_lock_owner = 0;
 	data->timestamp = jiffies;
 	rpc_call_setup(task, &msg, 0);
 	dprintk("%s: done!, ret = %d\n", __FUNCTION__, data->rpc_status);
@@ -3419,6 +3412,7 @@ static void nfs4_lock_release(void *calldata)
 	struct nfs4_lockdata *data = calldata;
 
 	dprintk("%s: begin!\n", __FUNCTION__);
+	nfs_free_seqid(data->arg.open_seqid);
 	if (data->cancelled != 0) {
 		struct rpc_task *task;
 		task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
@@ -3428,8 +3422,6 @@ static void nfs4_lock_release(void *calldata)
 		dprintk("%s: cancelling lock!\n", __FUNCTION__);
 	} else
 		nfs_free_seqid(data->arg.lock_seqid);
-	if (data->arg.open_seqid != NULL)
-		nfs_free_seqid(data->arg.open_seqid);
 	nfs4_put_lock_state(data->lsp);
 	put_nfs_open_context(data->ctx);
 	kfree(data);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 5a39c6f78acf9..bf94c6e0a503e 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -644,27 +644,26 @@ void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t f
 
 struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter)
 {
-	struct rpc_sequence *sequence = counter->sequence;
 	struct nfs_seqid *new;
 
 	new = kmalloc(sizeof(*new), GFP_KERNEL);
 	if (new != NULL) {
 		new->sequence = counter;
-		spin_lock(&sequence->lock);
-		list_add_tail(&new->list, &sequence->list);
-		spin_unlock(&sequence->lock);
+		INIT_LIST_HEAD(&new->list);
 	}
 	return new;
 }
 
 void nfs_free_seqid(struct nfs_seqid *seqid)
 {
-	struct rpc_sequence *sequence = seqid->sequence->sequence;
+	if (!list_empty(&seqid->list)) {
+		struct rpc_sequence *sequence = seqid->sequence->sequence;
 
-	spin_lock(&sequence->lock);
-	list_del(&seqid->list);
-	spin_unlock(&sequence->lock);
-	rpc_wake_up(&sequence->wait);
+		spin_lock(&sequence->lock);
+		list_del(&seqid->list);
+		spin_unlock(&sequence->lock);
+		rpc_wake_up(&sequence->wait);
+	}
 	kfree(seqid);
 }
 
@@ -675,6 +674,7 @@ void nfs_free_seqid(struct nfs_seqid *seqid)
  */
 static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
 {
+	BUG_ON(list_first_entry(&seqid->sequence->sequence->list, struct nfs_seqid, list) != seqid);
 	switch (status) {
 		case 0:
 			break;
@@ -726,15 +726,15 @@ int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
 	struct rpc_sequence *sequence = seqid->sequence->sequence;
 	int status = 0;
 
-	if (sequence->list.next == &seqid->list)
-		goto out;
 	spin_lock(&sequence->lock);
-	if (sequence->list.next != &seqid->list) {
-		rpc_sleep_on(&sequence->wait, task, NULL, NULL);
-		status = -EAGAIN;
-	}
+	if (list_empty(&seqid->list))
+		list_add_tail(&seqid->list, &sequence->list);
+	if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid)
+		goto unlock;
+	rpc_sleep_on(&sequence->wait, task, NULL, NULL);
+	status = -EAGAIN;
+unlock:
 	spin_unlock(&sequence->lock);
-out:
 	return status;
 }
 

From ef818a28fac9bd214e676986d8301db0582b92a9 Mon Sep 17 00:00:00 2001
From: Steve Dickson <SteveD@redhat.com>
Date: Thu, 8 Nov 2007 04:05:04 -0500
Subject: [PATCH 008/118] NFS: Stop sillyname renames and unmounts from racing

Added an active/deactive mechanism to the nfs_server structure
allowing async operations to hold off umount until the
operations are done.

Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           |  3 +++
 fs/nfs/internal.h         |  2 ++
 fs/nfs/super.c            | 24 ++++++++++++++++++++++++
 fs/nfs/unlink.c           |  4 ++++
 include/linux/nfs_fs_sb.h |  6 ++++++
 5 files changed, 39 insertions(+)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index a6f6254976120..c3740f5ab9783 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -729,6 +729,9 @@ static struct nfs_server *nfs_alloc_server(void)
 	INIT_LIST_HEAD(&server->client_link);
 	INIT_LIST_HEAD(&server->master_link);
 
+	init_waitqueue_head(&server->active_wq);
+	atomic_set(&server->active, 0);
+
 	server->io_stats = nfs_alloc_iostats();
 	if (!server->io_stats) {
 		kfree(server);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index f3acf48412be7..75793794aefe4 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -160,6 +160,8 @@ extern struct rpc_stat nfs_rpcstat;
 
 extern int __init register_nfs_fs(void);
 extern void __exit unregister_nfs_fs(void);
+extern void nfs_sb_active(struct nfs_server *server);
+extern void nfs_sb_deactive(struct nfs_server *server);
 
 /* namespace.c */
 extern char *nfs_path(const char *base,
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 0b0c72a072ffd..fda1635dd1336 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -202,6 +202,7 @@ static int nfs_get_sb(struct file_system_type *, int, const char *, void *, stru
 static int nfs_xdev_get_sb(struct file_system_type *fs_type,
 		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
 static void nfs_kill_super(struct super_block *);
+static void nfs_put_super(struct super_block *);
 
 static struct file_system_type nfs_fs_type = {
 	.owner		= THIS_MODULE,
@@ -223,6 +224,7 @@ static const struct super_operations nfs_sops = {
 	.alloc_inode	= nfs_alloc_inode,
 	.destroy_inode	= nfs_destroy_inode,
 	.write_inode	= nfs_write_inode,
+	.put_super	= nfs_put_super,
 	.statfs		= nfs_statfs,
 	.clear_inode	= nfs_clear_inode,
 	.umount_begin	= nfs_umount_begin,
@@ -325,6 +327,28 @@ void __exit unregister_nfs_fs(void)
 	unregister_filesystem(&nfs_fs_type);
 }
 
+void nfs_sb_active(struct nfs_server *server)
+{
+	atomic_inc(&server->active);
+}
+
+void nfs_sb_deactive(struct nfs_server *server)
+{
+	if (atomic_dec_and_test(&server->active))
+		wake_up(&server->active_wq);
+}
+
+static void nfs_put_super(struct super_block *sb)
+{
+	struct nfs_server *server = NFS_SB(sb);
+	/*
+	 * Make sure there are no outstanding ops to this server.
+	 * If so, wait for them to finish before allowing the
+	 * unmount to continue.
+	 */
+	wait_event(server->active_wq, atomic_read(&server->active) == 0);
+}
+
 /*
  * Deliver file system statistics to userspace
  */
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 431981d0265f9..8e5428e0b86f2 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -14,6 +14,8 @@
 #include <linux/sched.h>
 #include <linux/wait.h>
 
+#include "internal.h"
+
 struct nfs_unlinkdata {
 	struct hlist_node list;
 	struct nfs_removeargs args;
@@ -113,6 +115,7 @@ static void nfs_async_unlink_release(void *calldata)
 	struct nfs_unlinkdata	*data = calldata;
 
 	nfs_dec_sillycount(data->dir);
+	nfs_sb_deactive(NFS_SERVER(data->dir));
 	nfs_free_unlinkdata(data);
 }
 
@@ -153,6 +156,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
 		nfs_dec_sillycount(dir);
 		return 0;
 	}
+	nfs_sb_active(NFS_SERVER(dir));
 	data->args.fh = NFS_FH(dir);
 	nfs_fattr_init(&data->res.dir_attr);
 
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 0cac49bc09553..9f949b5876849 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -3,6 +3,9 @@
 
 #include <linux/list.h>
 #include <linux/backing-dev.h>
+#include <linux/wait.h>
+
+#include <asm/atomic.h>
 
 struct nfs_iostats;
 
@@ -110,6 +113,9 @@ struct nfs_server {
 						   filesystem */
 #endif
 	void (*destroy)(struct nfs_server *);
+
+	atomic_t active; /* Keep trace of any activity to this server */
+	wait_queue_head_t active_wq;  /* Wait for any activity to stop  */
 };
 
 /* Server capabilities */

From 66af1e558538137080615e7ad6d1f2f80862de01 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 6 Nov 2007 10:18:36 -0500
Subject: [PATCH 009/118] SUNRPC: Fix a race in xs_tcp_state_change()

When scheduling the autoclose RPC call, we want to ensure that we don't
race against the test_bit() call in xprt_clear_locked().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xprt.h |  1 +
 net/sunrpc/xprt.c           | 20 ++++++++++++++++++++
 net/sunrpc/xprtsock.c       |  5 +----
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 30b17b3bc1a9b..6f524a9e7fd01 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -246,6 +246,7 @@ struct rpc_rqst *	xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid);
 void			xprt_complete_rqst(struct rpc_task *task, int copied);
 void			xprt_release_rqst_cong(struct rpc_task *task);
 void			xprt_disconnect(struct rpc_xprt *xprt);
+void			xprt_force_disconnect(struct rpc_xprt *xprt);
 
 /*
  * Reserved bit positions in xprt->state
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index fb92f51405c5d..a3af02168af1d 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -570,6 +570,7 @@ static void xprt_autoclose(struct work_struct *work)
 
 	xprt_disconnect(xprt);
 	xprt->ops->close(xprt);
+	clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
 	xprt_release_write(xprt, NULL);
 }
 
@@ -588,6 +589,25 @@ void xprt_disconnect(struct rpc_xprt *xprt)
 }
 EXPORT_SYMBOL_GPL(xprt_disconnect);
 
+/**
+ * xprt_force_disconnect - force a transport to disconnect
+ * @xprt: transport to disconnect
+ *
+ */
+void xprt_force_disconnect(struct rpc_xprt *xprt)
+{
+	/* Don't race with the test_bit() in xprt_clear_locked() */
+	spin_lock_bh(&xprt->transport_lock);
+	set_bit(XPRT_CLOSE_WAIT, &xprt->state);
+	/* Try to schedule an autoclose RPC call */
+	if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
+		queue_work(rpciod_workqueue, &xprt->task_cleanup);
+	else if (xprt->snd_task != NULL)
+		rpc_wake_up_task(xprt->snd_task);
+	spin_unlock_bh(&xprt->transport_lock);
+}
+EXPORT_SYMBOL_GPL(xprt_force_disconnect);
+
 static void
 xprt_init_autodisconnect(unsigned long data)
 {
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 6fa52f44de0fd..abb40c1407381 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1122,10 +1122,7 @@ static void xs_tcp_state_change(struct sock *sk)
 	case TCP_SYN_RECV:
 		break;
 	case TCP_CLOSE_WAIT:
-		/* Try to schedule an autoclose RPC calls */
-		set_bit(XPRT_CLOSE_WAIT, &xprt->state);
-		if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
-			queue_work(rpciod_workqueue, &xprt->task_cleanup);
+		xprt_force_disconnect(xprt);
 	default:
 		xprt_disconnect(xprt);
 	}

From 67a391d72ca7efb387c30ec761a487e50a3ff085 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 5 Nov 2007 17:40:58 -0500
Subject: [PATCH 010/118] SUNRPC: Fix TCP rebinding logic

Currently the TCP rebinding logic assumes that if we're not using a
reserved port, then we don't need to reconnect on the same port if a
disconnection event occurs. This breaks most RPC duplicate reply cache
implementations.

Also take into account the fact that xprt_min_resvport and
xprt_max_resvport may change while we're reconnecting, since the user may
change them at any time via the sysctls. Ensure that we check the port
boundaries every time we loop in xs_bind4/xs_bind6. Also ensure that if the
boundaries change, we only scan the ports a maximum of 2 times.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprtsock.c | 59 ++++++++++++++++++++++++++++---------------
 1 file changed, 38 insertions(+), 21 deletions(-)

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index abb40c1407381..b5544514ee57d 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1276,34 +1276,53 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
 	}
 }
 
+static unsigned short xs_get_srcport(struct sock_xprt *transport, struct socket *sock)
+{
+	unsigned short port = transport->port;
+
+	if (port == 0 && transport->xprt.resvport)
+		port = xs_get_random_port();
+	return port;
+}
+
+static unsigned short xs_next_srcport(struct sock_xprt *transport, struct socket *sock, unsigned short port)
+{
+	if (transport->port != 0)
+		transport->port = 0;
+	if (!transport->xprt.resvport)
+		return 0;
+	if (port <= xprt_min_resvport || port > xprt_max_resvport)
+		return xprt_max_resvport;
+	return --port;
+}
+
 static int xs_bind4(struct sock_xprt *transport, struct socket *sock)
 {
 	struct sockaddr_in myaddr = {
 		.sin_family = AF_INET,
 	};
 	struct sockaddr_in *sa;
-	int err;
-	unsigned short port = transport->port;
+	int err, nloop = 0;
+	unsigned short port = xs_get_srcport(transport, sock);
+	unsigned short last;
 
-	if (!transport->xprt.resvport)
-		port = 0;
 	sa = (struct sockaddr_in *)&transport->addr;
 	myaddr.sin_addr = sa->sin_addr;
 	do {
 		myaddr.sin_port = htons(port);
 		err = kernel_bind(sock, (struct sockaddr *) &myaddr,
 						sizeof(myaddr));
-		if (!transport->xprt.resvport)
+		if (port == 0)
 			break;
 		if (err == 0) {
 			transport->port = port;
 			break;
 		}
-		if (port <= xprt_min_resvport)
-			port = xprt_max_resvport;
-		else
-			port--;
-	} while (err == -EADDRINUSE && port != transport->port);
+		last = port;
+		port = xs_next_srcport(transport, sock, port);
+		if (port > last)
+			nloop++;
+	} while (err == -EADDRINUSE && nloop != 2);
 	dprintk("RPC:       %s "NIPQUAD_FMT":%u: %s (%d)\n",
 			__FUNCTION__, NIPQUAD(myaddr.sin_addr),
 			port, err ? "failed" : "ok", err);
@@ -1316,28 +1335,27 @@ static int xs_bind6(struct sock_xprt *transport, struct socket *sock)
 		.sin6_family = AF_INET6,
 	};
 	struct sockaddr_in6 *sa;
-	int err;
-	unsigned short port = transport->port;
+	int err, nloop = 0;
+	unsigned short port = xs_get_srcport(transport, sock);
+	unsigned short last;
 
-	if (!transport->xprt.resvport)
-		port = 0;
 	sa = (struct sockaddr_in6 *)&transport->addr;
 	myaddr.sin6_addr = sa->sin6_addr;
 	do {
 		myaddr.sin6_port = htons(port);
 		err = kernel_bind(sock, (struct sockaddr *) &myaddr,
 						sizeof(myaddr));
-		if (!transport->xprt.resvport)
+		if (port == 0)
 			break;
 		if (err == 0) {
 			transport->port = port;
 			break;
 		}
-		if (port <= xprt_min_resvport)
-			port = xprt_max_resvport;
-		else
-			port--;
-	} while (err == -EADDRINUSE && port != transport->port);
+		last = port;
+		port = xs_next_srcport(transport, sock, port);
+		if (port > last)
+			nloop++;
+	} while (err == -EADDRINUSE && nloop != 2);
 	dprintk("RPC:       xs_bind6 "NIP6_FMT":%u: %s (%d)\n",
 		NIP6(myaddr.sin6_addr), port, err ? "failed" : "ok", err);
 	return err;
@@ -1819,7 +1837,6 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
 	xprt->addrlen = args->addrlen;
 	if (args->srcaddr)
 		memcpy(&new->addr, args->srcaddr, args->addrlen);
-	new->port = xs_get_random_port();
 
 	return xprt;
 }

From 3b948ae5be5e22532584113e2e02029519bbad8f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 5 Nov 2007 17:42:39 -0500
Subject: [PATCH 011/118] SUNRPC: Allow the client to detect if the TCP
 connection is closed

Add an xprt->state bit to enable the TCP ->state_change() method to signal
whether or not the TCP connection is in the process of closing down.
This will to be used by the reconnection logic in a separate patch.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xprt.h |  1 +
 net/sunrpc/xprtsock.c       | 24 +++++++++++++++++++++---
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 6f524a9e7fd01..afb9e6ad7fe07 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -257,6 +257,7 @@ void			xprt_force_disconnect(struct rpc_xprt *xprt);
 #define XPRT_CLOSE_WAIT		(3)
 #define XPRT_BOUND		(4)
 #define XPRT_BINDING		(5)
+#define XPRT_CLOSING		(6)
 
 static inline void xprt_set_connected(struct rpc_xprt *xprt)
 {
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index b5544514ee57d..721c5419765ec 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -758,7 +758,9 @@ static void xs_close(struct rpc_xprt *xprt)
 	sock_release(sock);
 clear_close_wait:
 	smp_mb__before_clear_bit();
+	clear_bit(XPRT_CONNECTED, &xprt->state);
 	clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
+	clear_bit(XPRT_CLOSING, &xprt->state);
 	smp_mb__after_clear_bit();
 }
 
@@ -1118,12 +1120,28 @@ static void xs_tcp_state_change(struct sock *sk)
 		}
 		spin_unlock_bh(&xprt->transport_lock);
 		break;
-	case TCP_SYN_SENT:
-	case TCP_SYN_RECV:
+	case TCP_FIN_WAIT1:
+		/* The client initiated a shutdown of the socket */
+		set_bit(XPRT_CLOSING, &xprt->state);
+		smp_mb__before_clear_bit();
+		clear_bit(XPRT_CONNECTED, &xprt->state);
+		smp_mb__after_clear_bit();
 		break;
 	case TCP_CLOSE_WAIT:
+		/* The server initiated a shutdown of the socket */
+		set_bit(XPRT_CLOSING, &xprt->state);
 		xprt_force_disconnect(xprt);
-	default:
+		break;
+	case TCP_LAST_ACK:
+		smp_mb__before_clear_bit();
+		clear_bit(XPRT_CONNECTED, &xprt->state);
+		smp_mb__after_clear_bit();
+		break;
+	case TCP_CLOSE:
+		smp_mb__before_clear_bit();
+		clear_bit(XPRT_CLOSING, &xprt->state);
+		smp_mb__after_clear_bit();
+		/* Mark transport as closed and wake up all pending tasks */
 		xprt_disconnect(xprt);
 	}
  out:

From ef80367071dce7d2533e79ae8f3c84ec42708dc8 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 31 Dec 2007 16:19:17 -0500
Subject: [PATCH 012/118] SUNRPC: TCP clear XPRT_CLOSE_WAIT when the socket is
 closed for writes

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprtsock.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 721c5419765ec..25aae8c526e4c 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1125,6 +1125,7 @@ static void xs_tcp_state_change(struct sock *sk)
 		set_bit(XPRT_CLOSING, &xprt->state);
 		smp_mb__before_clear_bit();
 		clear_bit(XPRT_CONNECTED, &xprt->state);
+		clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
 		smp_mb__after_clear_bit();
 		break;
 	case TCP_CLOSE_WAIT:
@@ -1139,6 +1140,7 @@ static void xs_tcp_state_change(struct sock *sk)
 		break;
 	case TCP_CLOSE:
 		smp_mb__before_clear_bit();
+		clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
 		clear_bit(XPRT_CLOSING, &xprt->state);
 		smp_mb__after_clear_bit();
 		/* Mark transport as closed and wake up all pending tasks */

From e06799f958bf7f9f8fae15f0c6f519953fb0257c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 5 Nov 2007 15:44:12 -0500
Subject: [PATCH 013/118] SUNRPC: Use shutdown() instead of close() when
 disconnecting a TCP socket

By using shutdown() rather than close() we allow the RPC client to wait
for the TCP close handshake to complete before we start trying to reconnect
using the same port.
We use shutdown(SHUT_WR) only instead of shutting down both directions,
however we wait until the server has closed the connection on its side.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprtsock.c | 41 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 34 insertions(+), 7 deletions(-)

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 25aae8c526e4c..a4c06900f398d 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -614,6 +614,22 @@ static int xs_udp_send_request(struct rpc_task *task)
 	return status;
 }
 
+/**
+ * xs_tcp_shutdown - gracefully shut down a TCP socket
+ * @xprt: transport
+ *
+ * Initiates a graceful shutdown of the TCP socket by calling the
+ * equivalent of shutdown(SHUT_WR);
+ */
+static void xs_tcp_shutdown(struct rpc_xprt *xprt)
+{
+	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+	struct socket *sock = transport->sock;
+
+	if (sock != NULL)
+		kernel_sock_shutdown(sock, SHUT_WR);
+}
+
 static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf)
 {
 	u32 reclen = buf->len - sizeof(rpc_fraghdr);
@@ -691,7 +707,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
 	default:
 		dprintk("RPC:       sendmsg returned unrecognized error %d\n",
 			-status);
-		xprt_disconnect(xprt);
+		xs_tcp_shutdown(xprt);
 		break;
 	}
 
@@ -1637,8 +1653,7 @@ static void xs_tcp_connect_worker4(struct work_struct *work)
 				break;
 			default:
 				/* get rid of existing socket, and retry */
-				xs_close(xprt);
-				break;
+				xs_tcp_shutdown(xprt);
 		}
 	}
 out:
@@ -1697,8 +1712,7 @@ static void xs_tcp_connect_worker6(struct work_struct *work)
 				break;
 			default:
 				/* get rid of existing socket, and retry */
-				xs_close(xprt);
-				break;
+				xs_tcp_shutdown(xprt);
 		}
 	}
 out:
@@ -1745,6 +1759,19 @@ static void xs_connect(struct rpc_task *task)
 	}
 }
 
+static void xs_tcp_connect(struct rpc_task *task)
+{
+	struct rpc_xprt *xprt = task->tk_xprt;
+
+	/* Initiate graceful shutdown of the socket if not already done */
+	if (test_bit(XPRT_CONNECTED, &xprt->state))
+		xs_tcp_shutdown(xprt);
+	/* Exit if we need to wait for socket shutdown to complete */
+	if (test_bit(XPRT_CLOSING, &xprt->state))
+		return;
+	xs_connect(task);
+}
+
 /**
  * xs_udp_print_stats - display UDP socket-specifc stats
  * @xprt: rpc_xprt struct containing statistics
@@ -1815,12 +1842,12 @@ static struct rpc_xprt_ops xs_tcp_ops = {
 	.release_xprt		= xs_tcp_release_xprt,
 	.rpcbind		= rpcb_getport_async,
 	.set_port		= xs_set_port,
-	.connect		= xs_connect,
+	.connect		= xs_tcp_connect,
 	.buf_alloc		= rpc_malloc,
 	.buf_free		= rpc_free,
 	.send_request		= xs_tcp_send_request,
 	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
-	.close			= xs_close,
+	.close			= xs_tcp_shutdown,
 	.destroy		= xs_destroy,
 	.print_stats		= xs_tcp_print_stats,
 };

From 7272dcd31d56580dee7693c21e369fd167e137fe Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 6 Nov 2007 18:28:53 -0500
Subject: [PATCH 014/118] SUNRPC: xprt_autoclose() should not call
 xprt_disconnect()

The transport layer should do that itself whenever appropriate.

Note that the RDMA transport already assumes that it needs to call
xprt_disconnect in xprt_rdma_close().
For TCP sockets, we want to call xprt_disconnect() only after the
connection has been closed by both ends.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprt.c     | 1 -
 net/sunrpc/xprtsock.c | 3 +--
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index a3af02168af1d..80742995e02e2 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -568,7 +568,6 @@ static void xprt_autoclose(struct work_struct *work)
 	struct rpc_xprt *xprt =
 		container_of(work, struct rpc_xprt, task_cleanup);
 
-	xprt_disconnect(xprt);
 	xprt->ops->close(xprt);
 	clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
 	xprt_release_write(xprt, NULL);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index a4c06900f398d..9b06b8b2a1122 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -774,10 +774,10 @@ static void xs_close(struct rpc_xprt *xprt)
 	sock_release(sock);
 clear_close_wait:
 	smp_mb__before_clear_bit();
-	clear_bit(XPRT_CONNECTED, &xprt->state);
 	clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
 	clear_bit(XPRT_CLOSING, &xprt->state);
 	smp_mb__after_clear_bit();
+	xprt_disconnect(xprt);
 }
 
 /**
@@ -793,7 +793,6 @@ static void xs_destroy(struct rpc_xprt *xprt)
 
 	cancel_rearming_delayed_work(&transport->connect_worker);
 
-	xprt_disconnect(xprt);
 	xs_close(xprt);
 	xs_free_peer_addresses(xprt);
 	kfree(xprt->slot);

From 3ebb067d92ebe5bcfd282acf12bade891d334d07 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 6 Nov 2007 18:40:12 -0500
Subject: [PATCH 015/118] SUNRPC: Make call_status()/call_decode() call
 xprt_force_disconnect()

Move the calls to xprt_disconnect() over to xprt_force_disconnect() in
order to enable the transport layer to manage the state of the
XPRT_CONNECTED flag.
Ditto in xs_tcp_read_fraghdr().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c     | 4 ++--
 net/sunrpc/xprtsock.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 76be83ee4b04a..046d8f68483b3 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1137,7 +1137,7 @@ call_status(struct rpc_task *task)
 	case -ETIMEDOUT:
 		task->tk_action = call_timeout;
 		if (task->tk_client->cl_discrtry)
-			xprt_disconnect(task->tk_xprt);
+			xprt_force_disconnect(task->tk_xprt);
 		break;
 	case -ECONNREFUSED:
 	case -ENOTCONN:
@@ -1260,7 +1260,7 @@ call_decode(struct rpc_task *task)
 	req->rq_received = req->rq_private_buf.len = 0;
 	task->tk_status = 0;
 	if (task->tk_client->cl_discrtry)
-		xprt_disconnect(task->tk_xprt);
+		xprt_force_disconnect(task->tk_xprt);
 }
 
 /*
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 9b06b8b2a1122..8c9af3d92c674 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -903,7 +903,7 @@ static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_rea
 	/* Sanity check of the record length */
 	if (unlikely(transport->tcp_reclen < 4)) {
 		dprintk("RPC:       invalid TCP record fragment length\n");
-		xprt_disconnect(xprt);
+		xprt_force_disconnect(xprt);
 		return;
 	}
 	dprintk("RPC:       reading TCP record fragment of length %d\n",

From 62da3b24880bccd4ffc32cf8d9a7e23fab475bdd Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 6 Nov 2007 18:44:20 -0500
Subject: [PATCH 016/118] SUNRPC: Rename xprt_disconnect()

xprt_disconnect() should really only be called when the transport shutdown
is completed, and it is time to wake up any pending tasks. Rename it to
xprt_disconnect_done() in order to reflect the semantical change.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xprt.h     | 2 +-
 net/sunrpc/xprt.c               | 6 +++---
 net/sunrpc/xprtrdma/transport.c | 4 ++--
 net/sunrpc/xprtsock.c           | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index afb9e6ad7fe07..2554cd2b016fb 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -245,7 +245,7 @@ void			xprt_adjust_cwnd(struct rpc_task *task, int result);
 struct rpc_rqst *	xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid);
 void			xprt_complete_rqst(struct rpc_task *task, int copied);
 void			xprt_release_rqst_cong(struct rpc_task *task);
-void			xprt_disconnect(struct rpc_xprt *xprt);
+void			xprt_disconnect_done(struct rpc_xprt *xprt);
 void			xprt_force_disconnect(struct rpc_xprt *xprt);
 
 /*
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 80742995e02e2..592c2ee63e0f0 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -574,11 +574,11 @@ static void xprt_autoclose(struct work_struct *work)
 }
 
 /**
- * xprt_disconnect - mark a transport as disconnected
+ * xprt_disconnect_done - mark a transport as disconnected
  * @xprt: transport to flag for disconnect
  *
  */
-void xprt_disconnect(struct rpc_xprt *xprt)
+void xprt_disconnect_done(struct rpc_xprt *xprt)
 {
 	dprintk("RPC:       disconnected transport %p\n", xprt);
 	spin_lock_bh(&xprt->transport_lock);
@@ -586,7 +586,7 @@ void xprt_disconnect(struct rpc_xprt *xprt)
 	xprt_wake_pending_tasks(xprt, -ENOTCONN);
 	spin_unlock_bh(&xprt->transport_lock);
 }
-EXPORT_SYMBOL_GPL(xprt_disconnect);
+EXPORT_SYMBOL_GPL(xprt_disconnect_done);
 
 /**
  * xprt_force_disconnect - force a transport to disconnect
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 6f2112dd9f786..73033d824eed4 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -449,7 +449,7 @@ xprt_rdma_close(struct rpc_xprt *xprt)
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
 
 	dprintk("RPC:       %s: closing\n", __func__);
-	xprt_disconnect(xprt);
+	xprt_disconnect_done(xprt);
 	(void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia);
 }
 
@@ -682,7 +682,7 @@ xprt_rdma_send_request(struct rpc_task *task)
 	}
 
 	if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) {
-		xprt_disconnect(xprt);
+		xprt_disconnect_done(xprt);
 		return -ENOTCONN;	/* implies disconnect */
 	}
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 8c9af3d92c674..741ab8ad1f3ab 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -777,7 +777,7 @@ static void xs_close(struct rpc_xprt *xprt)
 	clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
 	clear_bit(XPRT_CLOSING, &xprt->state);
 	smp_mb__after_clear_bit();
-	xprt_disconnect(xprt);
+	xprt_disconnect_done(xprt);
 }
 
 /**
@@ -1159,7 +1159,7 @@ static void xs_tcp_state_change(struct sock *sk)
 		clear_bit(XPRT_CLOSING, &xprt->state);
 		smp_mb__after_clear_bit();
 		/* Mark transport as closed and wake up all pending tasks */
-		xprt_disconnect(xprt);
+		xprt_disconnect_done(xprt);
 	}
  out:
 	read_unlock(&sk->sk_callback_lock);

From 663b8858dddbc8e634476960cc1c5f69dadba9b0 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 1 Jan 2008 18:42:12 -0500
Subject: [PATCH 017/118] SUNRPC: Reconnect immediately whenever the server
 isn't refusing it.

If we've disconnected from the server, rather than the other way round,
then it makes little sense to wait 3 seconds before reconnecting.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprtsock.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 741ab8ad1f3ab..a4cfdc5b2648f 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1130,13 +1130,13 @@ static void xs_tcp_state_change(struct sock *sk)
 			transport->tcp_flags =
 				TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID;
 
-			xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
 			xprt_wake_pending_tasks(xprt, 0);
 		}
 		spin_unlock_bh(&xprt->transport_lock);
 		break;
 	case TCP_FIN_WAIT1:
 		/* The client initiated a shutdown of the socket */
+		xprt->reestablish_timeout = 0;
 		set_bit(XPRT_CLOSING, &xprt->state);
 		smp_mb__before_clear_bit();
 		clear_bit(XPRT_CONNECTED, &xprt->state);
@@ -1147,6 +1147,14 @@ static void xs_tcp_state_change(struct sock *sk)
 		/* The server initiated a shutdown of the socket */
 		set_bit(XPRT_CLOSING, &xprt->state);
 		xprt_force_disconnect(xprt);
+	case TCP_SYN_SENT:
+	case TCP_CLOSING:
+		/*
+		 * If the server closed down the connection, make sure that
+		 * we back off before reconnecting
+		 */
+		if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
+			xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
 		break;
 	case TCP_LAST_ACK:
 		smp_mb__before_clear_bit();

From 93a44a75b97b9d8a03dd3d3f3247c3d0ec46aa4c Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 6 Nov 2007 13:06:03 -0500
Subject: [PATCH 018/118] sunrpc: document the rpc_pipefs kernel api

Add kerneldoc comments for the rpc_pipefs.c functions that are exported.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/rpc_pipe.c | 52 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 1 deletion(-)

diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index c59f3ca2b41b0..5364e2e52e071 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -76,6 +76,16 @@ rpc_timeout_upcall_queue(struct work_struct *work)
 	rpc_purge_list(rpci, &free_list, destroy_msg, -ETIMEDOUT);
 }
 
+/**
+ * rpc_queue_upcall
+ * @inode: inode of upcall pipe on which to queue given message
+ * @msg: message to queue
+ *
+ * Call with an @inode created by rpc_mkpipe() to queue an upcall.
+ * A userspace process may then later read the upcall by performing a
+ * read on an open file for this inode.  It is up to the caller to
+ * initialize the fields of @msg (other than @msg->list) appropriately.
+ */
 int
 rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg)
 {
@@ -663,7 +673,16 @@ rpc_lookup_negative(char *path, struct nameidata *nd)
 	return dentry;
 }
 
-
+/**
+ * rpc_mkdir - Create a new directory in rpc_pipefs
+ * @path: path from the rpc_pipefs root to the new directory
+ * @rpc_clnt: rpc client to associate with this directory
+ *
+ * This creates a directory at the given @path associated with
+ * @rpc_clnt, which will contain a file named "info" with some basic
+ * information about the client, together with any "pipes" that may
+ * later be created using rpc_mkpipe().
+ */
 struct dentry *
 rpc_mkdir(char *path, struct rpc_clnt *rpc_client)
 {
@@ -699,6 +718,10 @@ rpc_mkdir(char *path, struct rpc_clnt *rpc_client)
 	goto out;
 }
 
+/**
+ * rpc_rmdir - Remove a directory created with rpc_mkdir()
+ * @dentry: directory to remove
+ */
 int
 rpc_rmdir(struct dentry *dentry)
 {
@@ -717,6 +740,25 @@ rpc_rmdir(struct dentry *dentry)
 	return error;
 }
 
+/**
+ * rpc_mkpipe - make an rpc_pipefs file for kernel<->userspace communication
+ * @parent: dentry of directory to create new "pipe" in
+ * @name: name of pipe
+ * @private: private data to associate with the pipe, for the caller's use
+ * @ops: operations defining the behavior of the pipe: upcall, downcall,
+ *	release_pipe, and destroy_msg.
+ *
+ * Data is made available for userspace to read by calls to
+ * rpc_queue_upcall().  The actual reads will result in calls to
+ * @ops->upcall, which will be called with the file pointer,
+ * message, and userspace buffer to copy to.
+ *
+ * Writes can come at any time, and do not necessarily have to be
+ * responses to upcalls.  They will result in calls to @msg->downcall.
+ *
+ * The @private argument passed here will be available to all these methods
+ * from the file pointer, via RPC_I(file->f_dentry->d_inode)->private.
+ */
 struct dentry *
 rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pipe_ops *ops, int flags)
 {
@@ -764,6 +806,14 @@ rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pi
 	goto out;
 }
 
+/**
+ * rpc_unlink - remove a pipe
+ * @dentry: dentry for the pipe, as returned from rpc_mkpipe
+ *
+ * After this call, lookups will no longer find the pipe, and any
+ * attempts to read or write using preexisting opens of the pipe will
+ * return -EPIPE.
+ */
 int
 rpc_unlink(struct dentry *dentry)
 {

From a6eaf8bdf9308b51ec84e358915fc65400029519 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 14 Jul 2007 15:39:58 -0400
Subject: [PATCH 019/118] SUNRPC: Move exported declarations to the function
 declarations

Do this for all RPC client related functions and XDR functions.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth.c        |  8 +++++
 net/sunrpc/clnt.c        | 10 ++++++
 net/sunrpc/rpc_pipe.c    |  3 ++
 net/sunrpc/sched.c       |  9 ++++++
 net/sunrpc/stats.c       |  2 ++
 net/sunrpc/sunrpc_syms.c | 66 ----------------------------------------
 net/sunrpc/sysctl.c      |  7 +++++
 net/sunrpc/xdr.c         | 16 +++++++++-
 net/sunrpc/xprt.c        |  1 +
 9 files changed, 55 insertions(+), 67 deletions(-)

diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 1ea27559b1deb..1025a0ea9bb79 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -51,6 +51,7 @@ rpcauth_register(const struct rpc_authops *ops)
 	spin_unlock(&rpc_authflavor_lock);
 	return ret;
 }
+EXPORT_SYMBOL(rpcauth_register);
 
 int
 rpcauth_unregister(const struct rpc_authops *ops)
@@ -68,6 +69,7 @@ rpcauth_unregister(const struct rpc_authops *ops)
 	spin_unlock(&rpc_authflavor_lock);
 	return ret;
 }
+EXPORT_SYMBOL(rpcauth_unregister);
 
 struct rpc_auth *
 rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
@@ -102,6 +104,7 @@ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
 out:
 	return auth;
 }
+EXPORT_SYMBOL(rpcauth_create);
 
 void
 rpcauth_release(struct rpc_auth *auth)
@@ -151,6 +154,7 @@ rpcauth_init_credcache(struct rpc_auth *auth)
 	auth->au_credcache = new;
 	return 0;
 }
+EXPORT_SYMBOL(rpcauth_init_credcache);
 
 /*
  * Destroy a list of credentials
@@ -213,6 +217,7 @@ rpcauth_destroy_credcache(struct rpc_auth *auth)
 		kfree(cache);
 	}
 }
+EXPORT_SYMBOL(rpcauth_destroy_credcache);
 
 /*
  * Remove stale credentials. Avoid sleeping inside the loop.
@@ -332,6 +337,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
 out:
 	return cred;
 }
+EXPORT_SYMBOL(rpcauth_lookup_credcache);
 
 struct rpc_cred *
 rpcauth_lookupcred(struct rpc_auth *auth, int flags)
@@ -350,6 +356,7 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags)
 	put_group_info(acred.group_info);
 	return ret;
 }
+EXPORT_SYMBOL(rpcauth_lookupcred);
 
 void
 rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
@@ -435,6 +442,7 @@ put_rpccred(struct rpc_cred *cred)
 out_destroy:
 	cred->cr_ops->crdestroy(cred);
 }
+EXPORT_SYMBOL(put_rpccred);
 
 void
 rpcauth_unbindcred(struct rpc_task *task)
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 046d8f68483b3..5b561f9b6a172 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -345,6 +345,7 @@ rpc_clone_client(struct rpc_clnt *clnt)
 	dprintk("RPC:       %s: returned error %d\n", __FUNCTION__, err);
 	return ERR_PTR(err);
 }
+EXPORT_SYMBOL(rpc_clone_client);
 
 /*
  * Properly shut down an RPC client, terminating all outstanding
@@ -363,6 +364,7 @@ void rpc_shutdown_client(struct rpc_clnt *clnt)
 
 	rpc_release_client(clnt);
 }
+EXPORT_SYMBOL(rpc_shutdown_client);
 
 /*
  * Free an RPC client
@@ -467,6 +469,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
 out:
 	return clnt;
 }
+EXPORT_SYMBOL(rpc_bind_new_program);
 
 /*
  * Default callback for async RPC calls
@@ -512,11 +515,13 @@ void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset)
 {
 	rpc_save_sigmask(oldset, clnt->cl_intr);
 }
+EXPORT_SYMBOL(rpc_clnt_sigmask);
 
 void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset)
 {
 	rpc_restore_sigmask(oldset);
 }
+EXPORT_SYMBOL(rpc_clnt_sigunmask);
 
 static
 struct rpc_task *rpc_do_run_task(struct rpc_clnt *clnt,
@@ -572,6 +577,7 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 	rpc_put_task(task);
 	return status;
 }
+EXPORT_SYMBOL(rpc_call_sync);
 
 /**
  * rpc_call_async - Perform an asynchronous RPC call
@@ -593,6 +599,7 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
 	rpc_put_task(task);
 	return 0;
 }
+EXPORT_SYMBOL(rpc_call_async);
 
 /**
  * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it
@@ -625,6 +632,7 @@ rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags)
 	else
 		task->tk_action = rpc_exit_task;
 }
+EXPORT_SYMBOL(rpc_call_setup);
 
 /**
  * rpc_peeraddr - extract remote peer address from clnt's xprt
@@ -671,6 +679,7 @@ rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize
 	if (xprt->ops->set_buffer_size)
 		xprt->ops->set_buffer_size(xprt, sndsize, rcvsize);
 }
+EXPORT_SYMBOL(rpc_setbufsize);
 
 /*
  * Return size of largest payload RPC client can support, in bytes
@@ -710,6 +719,7 @@ rpc_restart_call(struct rpc_task *task)
 
 	task->tk_action = call_start;
 }
+EXPORT_SYMBOL(rpc_restart_call);
 
 /*
  * 0.  Initial state
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 5364e2e52e071..19b44e53e4210 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -113,6 +113,7 @@ rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg)
 	wake_up(&rpci->waitq);
 	return res;
 }
+EXPORT_SYMBOL(rpc_queue_upcall);
 
 static inline void
 rpc_inode_setowner(struct inode *inode, void *private)
@@ -805,6 +806,7 @@ rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pi
 			-ENOMEM);
 	goto out;
 }
+EXPORT_SYMBOL(rpc_mkpipe);
 
 /**
  * rpc_unlink - remove a pipe
@@ -835,6 +837,7 @@ rpc_unlink(struct dentry *dentry)
 	dput(parent);
 	return error;
 }
+EXPORT_SYMBOL(rpc_unlink);
 
 /*
  * populate the filesystem
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index eed5dd9819cde..aff8ac6737c3c 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -373,6 +373,7 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
 	__rpc_sleep_on(q, task, action, timer);
 	spin_unlock_bh(&q->lock);
 }
+EXPORT_SYMBOL(rpc_sleep_on);
 
 /**
  * __rpc_do_wake_up_task - wake up a single rpc_task
@@ -444,6 +445,7 @@ void rpc_wake_up_task(struct rpc_task *task)
 	}
 	rcu_read_unlock_bh();
 }
+EXPORT_SYMBOL(rpc_wake_up_task);
 
 /*
  * Wake up the next task on a priority queue.
@@ -519,6 +521,7 @@ struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue)
 
 	return task;
 }
+EXPORT_SYMBOL(rpc_wake_up_next);
 
 /**
  * rpc_wake_up - wake up all rpc_tasks
@@ -544,6 +547,7 @@ void rpc_wake_up(struct rpc_wait_queue *queue)
 	spin_unlock(&queue->lock);
 	rcu_read_unlock_bh();
 }
+EXPORT_SYMBOL(rpc_wake_up);
 
 /**
  * rpc_wake_up_status - wake up all rpc_tasks and set their status value.
@@ -572,6 +576,7 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
 	spin_unlock(&queue->lock);
 	rcu_read_unlock_bh();
 }
+EXPORT_SYMBOL(rpc_wake_up_status);
 
 static void __rpc_atrun(struct rpc_task *task)
 {
@@ -586,6 +591,7 @@ void rpc_delay(struct rpc_task *task, unsigned long delay)
 	task->tk_timeout = delay;
 	rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun);
 }
+EXPORT_SYMBOL(rpc_delay);
 
 /*
  * Helper to call task->tk_ops->rpc_call_prepare
@@ -731,6 +737,7 @@ void rpc_execute(struct rpc_task *task)
 	rpc_set_running(task);
 	__rpc_execute(task);
 }
+EXPORT_SYMBOL(rpc_execute);
 
 static void rpc_async_schedule(struct work_struct *work)
 {
@@ -848,6 +855,7 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons
 	dprintk("RPC:       new task initialized, procpid %u\n",
 				task_pid_nr(current));
 }
+EXPORT_SYMBOL(rpc_init_task);
 
 static struct rpc_task *
 rpc_alloc_task(void)
@@ -959,6 +967,7 @@ void rpc_killall_tasks(struct rpc_clnt *clnt)
 	}
 	spin_unlock(&clnt->cl_lock);
 }
+EXPORT_SYMBOL(rpc_killall_tasks);
 
 int rpciod_up(void)
 {
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 4d4f3738b6887..fd97a49a96d38 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -241,12 +241,14 @@ rpc_proc_register(struct rpc_stat *statp)
 {
 	return do_register(statp->program->name, statp, &rpc_proc_fops);
 }
+EXPORT_SYMBOL(rpc_proc_register);
 
 void
 rpc_proc_unregister(const char *name)
 {
 	remove_proc_entry(name, proc_net_rpc);
 }
+EXPORT_SYMBOL(rpc_proc_unregister);
 
 struct proc_dir_entry *
 svc_proc_register(struct svc_stat *statp, const struct file_operations *fops)
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index 33d89e842c850..1a7e309d008bc 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -22,45 +22,6 @@
 #include <linux/sunrpc/rpc_pipe_fs.h>
 #include <linux/sunrpc/xprtsock.h>
 
-/* RPC scheduler */
-EXPORT_SYMBOL(rpc_execute);
-EXPORT_SYMBOL(rpc_init_task);
-EXPORT_SYMBOL(rpc_sleep_on);
-EXPORT_SYMBOL(rpc_wake_up_next);
-EXPORT_SYMBOL(rpc_wake_up_task);
-EXPORT_SYMBOL(rpc_wake_up_status);
-
-/* RPC client functions */
-EXPORT_SYMBOL(rpc_clone_client);
-EXPORT_SYMBOL(rpc_bind_new_program);
-EXPORT_SYMBOL(rpc_shutdown_client);
-EXPORT_SYMBOL(rpc_killall_tasks);
-EXPORT_SYMBOL(rpc_call_sync);
-EXPORT_SYMBOL(rpc_call_async);
-EXPORT_SYMBOL(rpc_call_setup);
-EXPORT_SYMBOL(rpc_clnt_sigmask);
-EXPORT_SYMBOL(rpc_clnt_sigunmask);
-EXPORT_SYMBOL(rpc_delay);
-EXPORT_SYMBOL(rpc_restart_call);
-EXPORT_SYMBOL(rpc_setbufsize);
-EXPORT_SYMBOL(rpc_unlink);
-EXPORT_SYMBOL(rpc_wake_up);
-EXPORT_SYMBOL(rpc_queue_upcall);
-EXPORT_SYMBOL(rpc_mkpipe);
-
-/* Client transport */
-EXPORT_SYMBOL(xprt_set_timeout);
-
-/* Client credential cache */
-EXPORT_SYMBOL(rpcauth_register);
-EXPORT_SYMBOL(rpcauth_unregister);
-EXPORT_SYMBOL(rpcauth_create);
-EXPORT_SYMBOL(rpcauth_lookupcred);
-EXPORT_SYMBOL(rpcauth_lookup_credcache);
-EXPORT_SYMBOL(rpcauth_destroy_credcache);
-EXPORT_SYMBOL(rpcauth_init_credcache);
-EXPORT_SYMBOL(put_rpccred);
-
 /* RPC server stuff */
 EXPORT_SYMBOL(svc_create);
 EXPORT_SYMBOL(svc_create_thread);
@@ -81,8 +42,6 @@ EXPORT_SYMBOL(svc_set_client);
 
 /* RPC statistics */
 #ifdef CONFIG_PROC_FS
-EXPORT_SYMBOL(rpc_proc_register);
-EXPORT_SYMBOL(rpc_proc_unregister);
 EXPORT_SYMBOL(svc_proc_register);
 EXPORT_SYMBOL(svc_proc_unregister);
 EXPORT_SYMBOL(svc_seq_show);
@@ -105,31 +64,6 @@ EXPORT_SYMBOL(qword_get);
 EXPORT_SYMBOL(svcauth_unix_purge);
 EXPORT_SYMBOL(unix_domain_find);
 
-/* Generic XDR */
-EXPORT_SYMBOL(xdr_encode_string);
-EXPORT_SYMBOL(xdr_decode_string_inplace);
-EXPORT_SYMBOL(xdr_decode_netobj);
-EXPORT_SYMBOL(xdr_encode_netobj);
-EXPORT_SYMBOL(xdr_encode_pages);
-EXPORT_SYMBOL(xdr_inline_pages);
-EXPORT_SYMBOL(xdr_shift_buf);
-EXPORT_SYMBOL(xdr_encode_word);
-EXPORT_SYMBOL(xdr_decode_word);
-EXPORT_SYMBOL(xdr_encode_array2);
-EXPORT_SYMBOL(xdr_decode_array2);
-EXPORT_SYMBOL(xdr_buf_from_iov);
-EXPORT_SYMBOL(xdr_buf_subsegment);
-EXPORT_SYMBOL(xdr_buf_read_netobj);
-EXPORT_SYMBOL(read_bytes_from_xdr_buf);
-
-/* Debugging symbols */
-#ifdef RPC_DEBUG
-EXPORT_SYMBOL(rpc_debug);
-EXPORT_SYMBOL(nfs_debug);
-EXPORT_SYMBOL(nfsd_debug);
-EXPORT_SYMBOL(nlm_debug);
-#endif
-
 extern struct cache_detail ip_map_cache, unix_gid_cache;
 
 static int __init
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index 2be714e9b3827..c879732ae9d04 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -23,9 +23,16 @@
  * Declare the debug flags here
  */
 unsigned int	rpc_debug;
+EXPORT_SYMBOL(rpc_debug);
+
 unsigned int	nfs_debug;
+EXPORT_SYMBOL(nfs_debug);
+
 unsigned int	nfsd_debug;
+EXPORT_SYMBOL(nfsd_debug);
+
 unsigned int	nlm_debug;
+EXPORT_SYMBOL(nlm_debug);
 
 #ifdef RPC_DEBUG
 
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index fdc5e6d7562b7..54264062ea695 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -28,6 +28,7 @@ xdr_encode_netobj(__be32 *p, const struct xdr_netobj *obj)
 	memcpy(p, obj->data, obj->len);
 	return p + XDR_QUADLEN(obj->len);
 }
+EXPORT_SYMBOL(xdr_encode_netobj);
 
 __be32 *
 xdr_decode_netobj(__be32 *p, struct xdr_netobj *obj)
@@ -40,6 +41,7 @@ xdr_decode_netobj(__be32 *p, struct xdr_netobj *obj)
 	obj->data = (u8 *) p;
 	return p + XDR_QUADLEN(len);
 }
+EXPORT_SYMBOL(xdr_decode_netobj);
 
 /**
  * xdr_encode_opaque_fixed - Encode fixed length opaque data
@@ -91,6 +93,7 @@ xdr_encode_string(__be32 *p, const char *string)
 {
 	return xdr_encode_array(p, string, strlen(string));
 }
+EXPORT_SYMBOL(xdr_encode_string);
 
 __be32 *
 xdr_decode_string_inplace(__be32 *p, char **sp, int *lenp, int maxlen)
@@ -103,6 +106,7 @@ xdr_decode_string_inplace(__be32 *p, char **sp, int *lenp, int maxlen)
 	*sp = (char *) p;
 	return p + XDR_QUADLEN(len);
 }
+EXPORT_SYMBOL(xdr_decode_string_inplace);
 
 void
 xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base,
@@ -130,6 +134,7 @@ xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base,
 	xdr->buflen += len;
 	xdr->len += len;
 }
+EXPORT_SYMBOL(xdr_encode_pages);
 
 void
 xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
@@ -151,7 +156,7 @@ xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
 
 	xdr->buflen += len;
 }
-
+EXPORT_SYMBOL(xdr_inline_pages);
 
 /*
  * Helper routines for doing 'memmove' like operations on a struct xdr_buf
@@ -418,6 +423,7 @@ xdr_shift_buf(struct xdr_buf *buf, size_t len)
 {
 	xdr_shrink_bufhead(buf, len);
 }
+EXPORT_SYMBOL(xdr_shift_buf);
 
 /**
  * xdr_init_encode - Initialize a struct xdr_stream for sending data.
@@ -639,6 +645,7 @@ xdr_buf_from_iov(struct kvec *iov, struct xdr_buf *buf)
 	buf->page_len = 0;
 	buf->buflen = buf->len = iov->iov_len;
 }
+EXPORT_SYMBOL(xdr_buf_from_iov);
 
 /* Sets subbuf to the portion of buf of length len beginning base bytes
  * from the start of buf. Returns -1 if base of length are out of bounds. */
@@ -687,6 +694,7 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
 		return -1;
 	return 0;
 }
+EXPORT_SYMBOL(xdr_buf_subsegment);
 
 static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
 {
@@ -717,6 +725,7 @@ int read_bytes_from_xdr_buf(struct xdr_buf *buf, unsigned int base, void *obj, u
 	__read_bytes_from_xdr_buf(&subbuf, obj, len);
 	return 0;
 }
+EXPORT_SYMBOL(read_bytes_from_xdr_buf);
 
 static void __write_bytes_to_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
 {
@@ -760,6 +769,7 @@ xdr_decode_word(struct xdr_buf *buf, unsigned int base, u32 *obj)
 	*obj = ntohl(raw);
 	return 0;
 }
+EXPORT_SYMBOL(xdr_decode_word);
 
 int
 xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj)
@@ -768,6 +778,7 @@ xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj)
 
 	return write_bytes_to_xdr_buf(buf, base, &raw, sizeof(obj));
 }
+EXPORT_SYMBOL(xdr_encode_word);
 
 /* If the netobj starting offset bytes from the start of xdr_buf is contained
  * entirely in the head or the tail, set object to point to it; otherwise
@@ -805,6 +816,7 @@ int xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, unsigned in
 	__read_bytes_from_xdr_buf(&subbuf, obj->data, obj->len);
 	return 0;
 }
+EXPORT_SYMBOL(xdr_buf_read_netobj);
 
 /* Returns 0 on success, or else a negative error code. */
 static int
@@ -1010,6 +1022,7 @@ xdr_decode_array2(struct xdr_buf *buf, unsigned int base,
 
 	return xdr_xcode_array2(buf, base, desc, 0);
 }
+EXPORT_SYMBOL(xdr_decode_array2);
 
 int
 xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
@@ -1021,6 +1034,7 @@ xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
 
 	return xdr_xcode_array2(buf, base, desc, 1);
 }
+EXPORT_SYMBOL(xdr_encode_array2);
 
 int
 xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len,
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 592c2ee63e0f0..1fd47f918bb97 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -992,6 +992,7 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i
 	to->to_retries   = retr;
 	to->to_exponential = 0;
 }
+EXPORT_SYMBOL(xprt_set_timeout);
 
 /**
  * xprt_create_transport - create an RPC transport

From e8914c65f7f8d4e8701b8e78a12b714872ea0402 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 14 Jul 2007 15:39:59 -0400
Subject: [PATCH 020/118] SUNRPC: Restrict sunrpc client exports

The sunrpc client exports are not meant to be part of any official kernel
API: they can change at the drop of a hat. Mark them as internal functions
using EXPORT_SYMBOL_GPL.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth.c   | 18 +++++++++---------
 net/sunrpc/clnt.c   | 24 ++++++++++++------------
 net/sunrpc/sched.c  | 26 +++++++++++++-------------
 net/sunrpc/stats.c  | 10 +++++-----
 net/sunrpc/sysctl.c |  8 ++++----
 net/sunrpc/xprt.c   |  2 +-
 6 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 1025a0ea9bb79..eca941ce298b6 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -51,7 +51,7 @@ rpcauth_register(const struct rpc_authops *ops)
 	spin_unlock(&rpc_authflavor_lock);
 	return ret;
 }
-EXPORT_SYMBOL(rpcauth_register);
+EXPORT_SYMBOL_GPL(rpcauth_register);
 
 int
 rpcauth_unregister(const struct rpc_authops *ops)
@@ -69,7 +69,7 @@ rpcauth_unregister(const struct rpc_authops *ops)
 	spin_unlock(&rpc_authflavor_lock);
 	return ret;
 }
-EXPORT_SYMBOL(rpcauth_unregister);
+EXPORT_SYMBOL_GPL(rpcauth_unregister);
 
 struct rpc_auth *
 rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
@@ -104,7 +104,7 @@ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt)
 out:
 	return auth;
 }
-EXPORT_SYMBOL(rpcauth_create);
+EXPORT_SYMBOL_GPL(rpcauth_create);
 
 void
 rpcauth_release(struct rpc_auth *auth)
@@ -154,7 +154,7 @@ rpcauth_init_credcache(struct rpc_auth *auth)
 	auth->au_credcache = new;
 	return 0;
 }
-EXPORT_SYMBOL(rpcauth_init_credcache);
+EXPORT_SYMBOL_GPL(rpcauth_init_credcache);
 
 /*
  * Destroy a list of credentials
@@ -217,7 +217,7 @@ rpcauth_destroy_credcache(struct rpc_auth *auth)
 		kfree(cache);
 	}
 }
-EXPORT_SYMBOL(rpcauth_destroy_credcache);
+EXPORT_SYMBOL_GPL(rpcauth_destroy_credcache);
 
 /*
  * Remove stale credentials. Avoid sleeping inside the loop.
@@ -337,7 +337,7 @@ rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred,
 out:
 	return cred;
 }
-EXPORT_SYMBOL(rpcauth_lookup_credcache);
+EXPORT_SYMBOL_GPL(rpcauth_lookup_credcache);
 
 struct rpc_cred *
 rpcauth_lookupcred(struct rpc_auth *auth, int flags)
@@ -356,7 +356,7 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags)
 	put_group_info(acred.group_info);
 	return ret;
 }
-EXPORT_SYMBOL(rpcauth_lookupcred);
+EXPORT_SYMBOL_GPL(rpcauth_lookupcred);
 
 void
 rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
@@ -373,7 +373,7 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
 #endif
 	cred->cr_uid = acred->uid;
 }
-EXPORT_SYMBOL(rpcauth_init_cred);
+EXPORT_SYMBOL_GPL(rpcauth_init_cred);
 
 struct rpc_cred *
 rpcauth_bindcred(struct rpc_task *task)
@@ -442,7 +442,7 @@ put_rpccred(struct rpc_cred *cred)
 out_destroy:
 	cred->cr_ops->crdestroy(cred);
 }
-EXPORT_SYMBOL(put_rpccred);
+EXPORT_SYMBOL_GPL(put_rpccred);
 
 void
 rpcauth_unbindcred(struct rpc_task *task)
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 5b561f9b6a172..22092b91dd852 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -345,7 +345,7 @@ rpc_clone_client(struct rpc_clnt *clnt)
 	dprintk("RPC:       %s: returned error %d\n", __FUNCTION__, err);
 	return ERR_PTR(err);
 }
-EXPORT_SYMBOL(rpc_clone_client);
+EXPORT_SYMBOL_GPL(rpc_clone_client);
 
 /*
  * Properly shut down an RPC client, terminating all outstanding
@@ -364,7 +364,7 @@ void rpc_shutdown_client(struct rpc_clnt *clnt)
 
 	rpc_release_client(clnt);
 }
-EXPORT_SYMBOL(rpc_shutdown_client);
+EXPORT_SYMBOL_GPL(rpc_shutdown_client);
 
 /*
  * Free an RPC client
@@ -469,7 +469,7 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old,
 out:
 	return clnt;
 }
-EXPORT_SYMBOL(rpc_bind_new_program);
+EXPORT_SYMBOL_GPL(rpc_bind_new_program);
 
 /*
  * Default callback for async RPC calls
@@ -515,13 +515,13 @@ void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset)
 {
 	rpc_save_sigmask(oldset, clnt->cl_intr);
 }
-EXPORT_SYMBOL(rpc_clnt_sigmask);
+EXPORT_SYMBOL_GPL(rpc_clnt_sigmask);
 
 void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset)
 {
 	rpc_restore_sigmask(oldset);
 }
-EXPORT_SYMBOL(rpc_clnt_sigunmask);
+EXPORT_SYMBOL_GPL(rpc_clnt_sigunmask);
 
 static
 struct rpc_task *rpc_do_run_task(struct rpc_clnt *clnt,
@@ -577,7 +577,7 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 	rpc_put_task(task);
 	return status;
 }
-EXPORT_SYMBOL(rpc_call_sync);
+EXPORT_SYMBOL_GPL(rpc_call_sync);
 
 /**
  * rpc_call_async - Perform an asynchronous RPC call
@@ -599,7 +599,7 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
 	rpc_put_task(task);
 	return 0;
 }
-EXPORT_SYMBOL(rpc_call_async);
+EXPORT_SYMBOL_GPL(rpc_call_async);
 
 /**
  * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it
@@ -614,7 +614,7 @@ struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
 {
 	return rpc_do_run_task(clnt, NULL, flags, tk_ops, data);
 }
-EXPORT_SYMBOL(rpc_run_task);
+EXPORT_SYMBOL_GPL(rpc_run_task);
 
 void
 rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags)
@@ -632,7 +632,7 @@ rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags)
 	else
 		task->tk_action = rpc_exit_task;
 }
-EXPORT_SYMBOL(rpc_call_setup);
+EXPORT_SYMBOL_GPL(rpc_call_setup);
 
 /**
  * rpc_peeraddr - extract remote peer address from clnt's xprt
@@ -679,7 +679,7 @@ rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize
 	if (xprt->ops->set_buffer_size)
 		xprt->ops->set_buffer_size(xprt, sndsize, rcvsize);
 }
-EXPORT_SYMBOL(rpc_setbufsize);
+EXPORT_SYMBOL_GPL(rpc_setbufsize);
 
 /*
  * Return size of largest payload RPC client can support, in bytes
@@ -719,7 +719,7 @@ rpc_restart_call(struct rpc_task *task)
 
 	task->tk_action = call_start;
 }
-EXPORT_SYMBOL(rpc_restart_call);
+EXPORT_SYMBOL_GPL(rpc_restart_call);
 
 /*
  * 0.  Initial state
@@ -1529,7 +1529,7 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int
 	};
 	return rpc_do_run_task(clnt, &msg, flags, &rpc_default_ops, NULL);
 }
-EXPORT_SYMBOL(rpc_call_null);
+EXPORT_SYMBOL_GPL(rpc_call_null);
 
 #ifdef RPC_DEBUG
 void rpc_show_tasks(void)
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index aff8ac6737c3c..d0b4c7e11e069 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -243,7 +243,7 @@ void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname)
 {
 	__rpc_init_priority_wait_queue(queue, qname, 0);
 }
-EXPORT_SYMBOL(rpc_init_wait_queue);
+EXPORT_SYMBOL_GPL(rpc_init_wait_queue);
 
 static int rpc_wait_bit_interruptible(void *word)
 {
@@ -303,7 +303,7 @@ int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *))
 	return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
 			action, TASK_INTERRUPTIBLE);
 }
-EXPORT_SYMBOL(__rpc_wait_for_completion_task);
+EXPORT_SYMBOL_GPL(__rpc_wait_for_completion_task);
 
 /*
  * Make an RPC task runnable.
@@ -373,7 +373,7 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
 	__rpc_sleep_on(q, task, action, timer);
 	spin_unlock_bh(&q->lock);
 }
-EXPORT_SYMBOL(rpc_sleep_on);
+EXPORT_SYMBOL_GPL(rpc_sleep_on);
 
 /**
  * __rpc_do_wake_up_task - wake up a single rpc_task
@@ -445,7 +445,7 @@ void rpc_wake_up_task(struct rpc_task *task)
 	}
 	rcu_read_unlock_bh();
 }
-EXPORT_SYMBOL(rpc_wake_up_task);
+EXPORT_SYMBOL_GPL(rpc_wake_up_task);
 
 /*
  * Wake up the next task on a priority queue.
@@ -521,7 +521,7 @@ struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue)
 
 	return task;
 }
-EXPORT_SYMBOL(rpc_wake_up_next);
+EXPORT_SYMBOL_GPL(rpc_wake_up_next);
 
 /**
  * rpc_wake_up - wake up all rpc_tasks
@@ -547,7 +547,7 @@ void rpc_wake_up(struct rpc_wait_queue *queue)
 	spin_unlock(&queue->lock);
 	rcu_read_unlock_bh();
 }
-EXPORT_SYMBOL(rpc_wake_up);
+EXPORT_SYMBOL_GPL(rpc_wake_up);
 
 /**
  * rpc_wake_up_status - wake up all rpc_tasks and set their status value.
@@ -576,7 +576,7 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
 	spin_unlock(&queue->lock);
 	rcu_read_unlock_bh();
 }
-EXPORT_SYMBOL(rpc_wake_up_status);
+EXPORT_SYMBOL_GPL(rpc_wake_up_status);
 
 static void __rpc_atrun(struct rpc_task *task)
 {
@@ -591,7 +591,7 @@ void rpc_delay(struct rpc_task *task, unsigned long delay)
 	task->tk_timeout = delay;
 	rpc_sleep_on(&delay_queue, task, NULL, __rpc_atrun);
 }
-EXPORT_SYMBOL(rpc_delay);
+EXPORT_SYMBOL_GPL(rpc_delay);
 
 /*
  * Helper to call task->tk_ops->rpc_call_prepare
@@ -620,7 +620,7 @@ void rpc_exit_task(struct rpc_task *task)
 		}
 	}
 }
-EXPORT_SYMBOL(rpc_exit_task);
+EXPORT_SYMBOL_GPL(rpc_exit_task);
 
 void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata)
 {
@@ -737,7 +737,7 @@ void rpc_execute(struct rpc_task *task)
 	rpc_set_running(task);
 	__rpc_execute(task);
 }
-EXPORT_SYMBOL(rpc_execute);
+EXPORT_SYMBOL_GPL(rpc_execute);
 
 static void rpc_async_schedule(struct work_struct *work)
 {
@@ -855,7 +855,7 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons
 	dprintk("RPC:       new task initialized, procpid %u\n",
 				task_pid_nr(current));
 }
-EXPORT_SYMBOL(rpc_init_task);
+EXPORT_SYMBOL_GPL(rpc_init_task);
 
 static struct rpc_task *
 rpc_alloc_task(void)
@@ -910,7 +910,7 @@ void rpc_put_task(struct rpc_task *task)
 		call_rcu_bh(&task->u.tk_rcu, rpc_free_task);
 	rpc_release_calldata(tk_ops, calldata);
 }
-EXPORT_SYMBOL(rpc_put_task);
+EXPORT_SYMBOL_GPL(rpc_put_task);
 
 static void rpc_release_task(struct rpc_task *task)
 {
@@ -967,7 +967,7 @@ void rpc_killall_tasks(struct rpc_clnt *clnt)
 	}
 	spin_unlock(&clnt->cl_lock);
 }
-EXPORT_SYMBOL(rpc_killall_tasks);
+EXPORT_SYMBOL_GPL(rpc_killall_tasks);
 
 int rpciod_up(void)
 {
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index fd97a49a96d38..74df2d358e61b 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -118,7 +118,7 @@ struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt)
 	new = kcalloc(clnt->cl_maxproc, sizeof(struct rpc_iostats), GFP_KERNEL);
 	return new;
 }
-EXPORT_SYMBOL(rpc_alloc_iostats);
+EXPORT_SYMBOL_GPL(rpc_alloc_iostats);
 
 /**
  * rpc_free_iostats - release an rpc_iostats structure
@@ -129,7 +129,7 @@ void rpc_free_iostats(struct rpc_iostats *stats)
 {
 	kfree(stats);
 }
-EXPORT_SYMBOL(rpc_free_iostats);
+EXPORT_SYMBOL_GPL(rpc_free_iostats);
 
 /**
  * rpc_count_iostats - tally up per-task stats
@@ -215,7 +215,7 @@ void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
 				metrics->om_execute * MILLISECS_PER_JIFFY);
 	}
 }
-EXPORT_SYMBOL(rpc_print_iostats);
+EXPORT_SYMBOL_GPL(rpc_print_iostats);
 
 /*
  * Register/unregister RPC proc files
@@ -241,14 +241,14 @@ rpc_proc_register(struct rpc_stat *statp)
 {
 	return do_register(statp->program->name, statp, &rpc_proc_fops);
 }
-EXPORT_SYMBOL(rpc_proc_register);
+EXPORT_SYMBOL_GPL(rpc_proc_register);
 
 void
 rpc_proc_unregister(const char *name)
 {
 	remove_proc_entry(name, proc_net_rpc);
 }
-EXPORT_SYMBOL(rpc_proc_unregister);
+EXPORT_SYMBOL_GPL(rpc_proc_unregister);
 
 struct proc_dir_entry *
 svc_proc_register(struct svc_stat *statp, const struct file_operations *fops)
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index c879732ae9d04..bada7de0c2fcd 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -23,16 +23,16 @@
  * Declare the debug flags here
  */
 unsigned int	rpc_debug;
-EXPORT_SYMBOL(rpc_debug);
+EXPORT_SYMBOL_GPL(rpc_debug);
 
 unsigned int	nfs_debug;
-EXPORT_SYMBOL(nfs_debug);
+EXPORT_SYMBOL_GPL(nfs_debug);
 
 unsigned int	nfsd_debug;
-EXPORT_SYMBOL(nfsd_debug);
+EXPORT_SYMBOL_GPL(nfsd_debug);
 
 unsigned int	nlm_debug;
-EXPORT_SYMBOL(nlm_debug);
+EXPORT_SYMBOL_GPL(nlm_debug);
 
 #ifdef RPC_DEBUG
 
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 1fd47f918bb97..7520c6623c46a 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -992,7 +992,7 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i
 	to->to_retries   = retr;
 	to->to_exponential = 0;
 }
-EXPORT_SYMBOL(xprt_set_timeout);
+EXPORT_SYMBOL_GPL(xprt_set_timeout);
 
 /**
  * xprt_create_transport - create an RPC transport

From 84115e1cd4a3614c4e566d4cce31381dce3dbef9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 14 Jul 2007 15:39:59 -0400
Subject: [PATCH 021/118] SUNRPC: Cleanup of rpc_task initialisation

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c              | 36 +++++++++++++++++++------
 fs/nfs/read.c                | 15 +++++++----
 fs/nfs/write.c               | 30 +++++++++++++--------
 include/linux/sunrpc/clnt.h  |  2 +-
 include/linux/sunrpc/sched.h | 14 ++++++----
 net/sunrpc/clnt.c            | 51 ++++++++++++++++++++++++++----------
 net/sunrpc/sched.c           | 27 +++++++++----------
 7 files changed, 117 insertions(+), 58 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 3c9d16b4f80c2..f9f5fc13dc7dd 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -272,6 +272,11 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 	unsigned long user_addr = (unsigned long)iov->iov_base;
 	size_t count = iov->iov_len;
 	size_t rsize = NFS_SERVER(inode)->rsize;
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = NFS_CLIENT(inode),
+		.callback_ops = &nfs_read_direct_ops,
+		.flags = RPC_TASK_ASYNC,
+	};
 	unsigned int pgbase;
 	int result;
 	ssize_t started = 0;
@@ -322,8 +327,8 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 		data->res.eof = 0;
 		data->res.count = bytes;
 
-		rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
-				&nfs_read_direct_ops, data);
+		task_setup_data.callback_data = data;
+		rpc_init_task(&data->task, &task_setup_data);
 		NFS_PROTO(inode)->read_setup(data);
 
 		data->task.tk_cookie = (unsigned long) inode;
@@ -431,6 +436,11 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 	struct inode *inode = dreq->inode;
 	struct list_head *p;
 	struct nfs_write_data *data;
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = NFS_CLIENT(inode),
+		.callback_ops = &nfs_write_direct_ops,
+		.flags = RPC_TASK_ASYNC,
+	};
 
 	dreq->count = 0;
 	get_dreq(dreq);
@@ -451,8 +461,8 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 		 * Reuse data->task; data->args should not have changed
 		 * since the original request was sent.
 		 */
-		rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
-				&nfs_write_direct_ops, data);
+		task_setup_data.callback_data = data;
+		rpc_init_task(&data->task, &task_setup_data);
 		NFS_PROTO(inode)->write_setup(data, FLUSH_STABLE);
 
 		data->task.tk_priority = RPC_PRIORITY_NORMAL;
@@ -504,6 +514,12 @@ static const struct rpc_call_ops nfs_commit_direct_ops = {
 static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 {
 	struct nfs_write_data *data = dreq->commit_data;
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = NFS_CLIENT(dreq->inode),
+		.callback_ops = &nfs_commit_direct_ops,
+		.callback_data = data,
+		.flags = RPC_TASK_ASYNC,
+	};
 
 	data->inode = dreq->inode;
 	data->cred = dreq->ctx->cred;
@@ -515,8 +531,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 	data->res.fattr = &data->fattr;
 	data->res.verf = &data->verf;
 
-	rpc_init_task(&data->task, NFS_CLIENT(dreq->inode), RPC_TASK_ASYNC,
-				&nfs_commit_direct_ops, data);
+	rpc_init_task(&data->task, &task_setup_data);
 	NFS_PROTO(data->inode)->commit_setup(data, 0);
 
 	data->task.tk_priority = RPC_PRIORITY_NORMAL;
@@ -641,6 +656,11 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 	struct inode *inode = ctx->path.dentry->d_inode;
 	unsigned long user_addr = (unsigned long)iov->iov_base;
 	size_t count = iov->iov_len;
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = NFS_CLIENT(inode),
+		.callback_ops = &nfs_write_direct_ops,
+		.flags = RPC_TASK_ASYNC,
+	};
 	size_t wsize = NFS_SERVER(inode)->wsize;
 	unsigned int pgbase;
 	int result;
@@ -694,8 +714,8 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 		data->res.count = bytes;
 		data->res.verf = &data->verf;
 
-		rpc_init_task(&data->task, NFS_CLIENT(inode), RPC_TASK_ASYNC,
-				&nfs_write_direct_ops, data);
+		task_setup_data.callback_data = data;
+		rpc_init_task(&data->task, &task_setup_data);
 		NFS_PROTO(inode)->write_setup(data, sync);
 
 		data->task.tk_priority = RPC_PRIORITY_NORMAL;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 4587a86adaac8..c7f0d5ebd4514 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -160,11 +160,17 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 		const struct rpc_call_ops *call_ops,
 		unsigned int count, unsigned int offset)
 {
-	struct inode		*inode;
-	int flags;
+	struct inode *inode = req->wb_context->path.dentry->d_inode;
+	int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = NFS_CLIENT(inode),
+		.callback_ops = call_ops,
+		.callback_data = data,
+		.flags = RPC_TASK_ASYNC | swap_flags,
+	};
 
 	data->req	  = req;
-	data->inode	  = inode = req->wb_context->path.dentry->d_inode;
+	data->inode	  = inode;
 	data->cred	  = req->wb_context->cred;
 
 	data->args.fh     = NFS_FH(inode);
@@ -180,8 +186,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 	nfs_fattr_init(&data->fattr);
 
 	/* Set up the initial task struct. */
-	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
-	rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
+	rpc_init_task(&data->task, &task_setup_data);
 	NFS_PROTO(inode)->read_setup(data);
 
 	data->task.tk_cookie = (unsigned long)inode;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 092e79c6d9623..c4376606f1060 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -773,8 +773,14 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 		unsigned int count, unsigned int offset,
 		int how)
 {
-	struct inode		*inode;
-	int flags;
+	struct inode *inode = req->wb_context->path.dentry->d_inode;
+	int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = NFS_CLIENT(inode),
+		.callback_ops = call_ops,
+		.callback_data = data,
+		.flags = flags,
+	};
 
 	/* Set up the RPC argument and reply structs
 	 * NB: take care not to mess about with data->commit et al. */
@@ -796,8 +802,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 	nfs_fattr_init(&data->fattr);
 
 	/* Set up the initial task struct.  */
-	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-	rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
+	rpc_init_task(&data->task, &task_setup_data);
 	NFS_PROTO(inode)->write_setup(data, how);
 
 	data->task.tk_priority = flush_task_priority(how);
@@ -1144,16 +1149,20 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 		struct nfs_write_data *data,
 		int how)
 {
-	struct nfs_page		*first;
-	struct inode		*inode;
-	int flags;
+	struct nfs_page *first = nfs_list_entry(head->next);
+	struct inode *inode = first->wb_context->path.dentry->d_inode;
+	int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = NFS_CLIENT(inode),
+		.callback_ops = &nfs_commit_ops,
+		.callback_data = data,
+		.flags = flags,
+	};
 
 	/* Set up the RPC argument and reply structs
 	 * NB: take care not to mess about with data->commit et al. */
 
 	list_splice_init(head, &data->pages);
-	first = nfs_list_entry(data->pages.next);
-	inode = first->wb_context->path.dentry->d_inode;
 
 	data->inode	  = inode;
 	data->cred	  = first->wb_context->cred;
@@ -1168,8 +1177,7 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 	nfs_fattr_init(&data->fattr);
 
 	/* Set up the initial task struct.  */
-	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
-	rpc_init_task(&data->task, NFS_CLIENT(inode), flags, &nfs_commit_ops, data);
+	rpc_init_task(&data->task, &task_setup_data);
 	NFS_PROTO(inode)->commit_setup(data, how);
 
 	data->task.tk_priority = flush_task_priority(how);
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index d9d5c5ad826c7..ec9704181f947 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -126,7 +126,7 @@ int		rpcb_register(u32, u32, int, unsigned short, int *);
 int		rpcb_getport_sync(struct sockaddr_in *, __u32, __u32, int);
 void		rpcb_getport_async(struct rpc_task *);
 
-void		rpc_call_setup(struct rpc_task *, struct rpc_message *, int);
+void		rpc_call_setup(struct rpc_task *, const struct rpc_message *, int);
 
 int		rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg,
 			       int flags, const struct rpc_call_ops *tk_ops,
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 8ea077db0099c..9efe045fc3768 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -117,6 +117,13 @@ struct rpc_call_ops {
 	void (*rpc_release)(void *);
 };
 
+struct rpc_task_setup {
+	struct rpc_clnt *rpc_client;
+	const struct rpc_message *rpc_message;
+	const struct rpc_call_ops *callback_ops;
+	void *callback_data;
+	unsigned short flags;
+};
 
 /*
  * RPC task flags
@@ -236,13 +243,10 @@ struct rpc_wait_queue {
 /*
  * Function prototypes
  */
-struct rpc_task *rpc_new_task(struct rpc_clnt *, int flags,
-				const struct rpc_call_ops *ops, void *data);
+struct rpc_task *rpc_new_task(const struct rpc_task_setup *);
 struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
 				const struct rpc_call_ops *ops, void *data);
-void		rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt,
-				int flags, const struct rpc_call_ops *ops,
-				void *data);
+void		rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *);
 void		rpc_put_task(struct rpc_task *);
 void		rpc_exit_task(struct rpc_task *);
 void		rpc_release_calldata(const struct rpc_call_ops *, void *);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 22092b91dd852..7c80abd9263f1 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -524,25 +524,22 @@ void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset)
 EXPORT_SYMBOL_GPL(rpc_clnt_sigunmask);
 
 static
-struct rpc_task *rpc_do_run_task(struct rpc_clnt *clnt,
-		struct rpc_message *msg,
-		int flags,
-		const struct rpc_call_ops *ops,
-		void *data)
+struct rpc_task *rpc_do_run_task(const struct rpc_task_setup *task_setup_data)
 {
 	struct rpc_task *task, *ret;
 	sigset_t oldset;
 
-	task = rpc_new_task(clnt, flags, ops, data);
+	task = rpc_new_task(task_setup_data);
 	if (task == NULL) {
-		rpc_release_calldata(ops, data);
+		rpc_release_calldata(task_setup_data->callback_ops,
+				task_setup_data->callback_data);
 		return ERR_PTR(-ENOMEM);
 	}
 
 	/* Mask signals on synchronous RPC calls and RPCSEC_GSS upcalls */
 	rpc_task_sigmask(task, &oldset);
-	if (msg != NULL) {
-		rpc_call_setup(task, msg, 0);
+	if (task_setup_data->rpc_message != NULL) {
+		rpc_call_setup(task, task_setup_data->rpc_message, 0);
 		if (task->tk_status != 0) {
 			ret = ERR_PTR(task->tk_status);
 			rpc_put_task(task);
@@ -566,11 +563,17 @@ struct rpc_task *rpc_do_run_task(struct rpc_clnt *clnt,
 int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 {
 	struct rpc_task	*task;
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = clnt,
+		.rpc_message = msg,
+		.callback_ops = &rpc_default_ops,
+		.flags = flags,
+	};
 	int status;
 
 	BUG_ON(flags & RPC_TASK_ASYNC);
 
-	task = rpc_do_run_task(clnt, msg, flags, &rpc_default_ops, NULL);
+	task = rpc_do_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
 	status = task->tk_status;
@@ -592,8 +595,15 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
 	       const struct rpc_call_ops *tk_ops, void *data)
 {
 	struct rpc_task	*task;
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = clnt,
+		.rpc_message = msg,
+		.callback_ops = tk_ops,
+		.callback_data = data,
+		.flags = flags|RPC_TASK_ASYNC,
+	};
 
-	task = rpc_do_run_task(clnt, msg, flags|RPC_TASK_ASYNC, tk_ops, data);
+	task = rpc_do_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
 	rpc_put_task(task);
@@ -612,12 +622,19 @@ struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
 					const struct rpc_call_ops *tk_ops,
 					void *data)
 {
-	return rpc_do_run_task(clnt, NULL, flags, tk_ops, data);
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = clnt,
+		.callback_ops = tk_ops,
+		.callback_data = data,
+		.flags = flags,
+	};
+
+	return rpc_do_run_task(&task_setup_data);
 }
 EXPORT_SYMBOL_GPL(rpc_run_task);
 
 void
-rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags)
+rpc_call_setup(struct rpc_task *task, const struct rpc_message *msg, int flags)
 {
 	task->tk_msg   = *msg;
 	task->tk_flags |= flags;
@@ -1527,7 +1544,13 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int
 		.rpc_proc = &rpcproc_null,
 		.rpc_cred = cred,
 	};
-	return rpc_do_run_task(clnt, &msg, flags, &rpc_default_ops, NULL);
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = clnt,
+		.rpc_message = &msg,
+		.callback_ops = &rpc_default_ops,
+		.flags = flags,
+	};
+	return rpc_do_run_task(&task_setup_data);
 }
 EXPORT_SYMBOL_GPL(rpc_call_null);
 
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index d0b4c7e11e069..10216989309c9 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -815,18 +815,15 @@ EXPORT_SYMBOL_GPL(rpc_free);
 /*
  * Creation and deletion of RPC task structures
  */
-void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata)
+void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *task_setup_data)
 {
 	memset(task, 0, sizeof(*task));
 	setup_timer(&task->tk_timer, (void (*)(unsigned long))rpc_run_timer,
 			(unsigned long)task);
 	atomic_set(&task->tk_count, 1);
-	task->tk_client = clnt;
-	task->tk_flags  = flags;
-	task->tk_ops = tk_ops;
-	if (tk_ops->rpc_call_prepare != NULL)
-		task->tk_action = rpc_prepare_task;
-	task->tk_calldata = calldata;
+	task->tk_flags  = task_setup_data->flags;
+	task->tk_ops = task_setup_data->callback_ops;
+	task->tk_calldata = task_setup_data->callback_data;
 	INIT_LIST_HEAD(&task->tk_task);
 
 	/* Initialize retry counters */
@@ -839,15 +836,17 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons
 	/* Initialize workqueue for async tasks */
 	task->tk_workqueue = rpciod_workqueue;
 
-	if (clnt) {
-		kref_get(&clnt->cl_kref);
-		if (clnt->cl_softrtry)
+	task->tk_client = task_setup_data->rpc_client;
+	if (task->tk_client != NULL) {
+		kref_get(&task->tk_client->cl_kref);
+		if (task->tk_client->cl_softrtry)
 			task->tk_flags |= RPC_TASK_SOFT;
-		if (!clnt->cl_intr)
+		if (!task->tk_client->cl_intr)
 			task->tk_flags |= RPC_TASK_NOINTR;
 	}
 
-	BUG_ON(task->tk_ops == NULL);
+	if (task->tk_ops->rpc_call_prepare != NULL)
+		task->tk_action = rpc_prepare_task;
 
 	/* starting timestamp */
 	task->tk_start = jiffies;
@@ -873,7 +872,7 @@ static void rpc_free_task(struct rcu_head *rcu)
 /*
  * Create a new task for the specified client.
  */
-struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata)
+struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data)
 {
 	struct rpc_task	*task;
 
@@ -881,7 +880,7 @@ struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc
 	if (!task)
 		goto out;
 
-	rpc_init_task(task, clnt, flags, tk_ops, calldata);
+	rpc_init_task(task, setup_data);
 
 	dprintk("RPC:       allocated task %p\n", task);
 	task->tk_flags |= RPC_TASK_DYNAMIC;

From c970aa85e71bd581726c42df843f6f129db275ac Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 14 Jul 2007 15:39:59 -0400
Subject: [PATCH 022/118] SUNRPC: Clean up rpc_run_task

Make it use the new task initialiser structure instead of acting as a
wrapper.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c            | 49 ++++++++++++++++++++++++++++++------
 fs/nfs/unlink.c              |  9 ++++++-
 include/linux/sunrpc/sched.h |  3 +--
 net/sunrpc/clnt.c            | 36 +++++++-------------------
 net/sunrpc/rpcb_clnt.c       |  8 +++++-
 5 files changed, 67 insertions(+), 38 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a51a7537f3f6e..ff2c5f83ce87e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -779,12 +779,18 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
 {
 	struct nfs_server *server = NFS_SERVER(data->dir->d_inode);
 	struct rpc_task *task;
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = server->client,
+		.callback_ops = &nfs4_open_confirm_ops,
+		.callback_data = data,
+		.flags = RPC_TASK_ASYNC,
+	};
 	int status;
 
 	kref_get(&data->kref);
 	data->rpc_done = 0;
 	data->rpc_status = 0;
-	task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data);
+	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
 	status = nfs4_wait_for_completion_rpc_task(task);
@@ -908,13 +914,19 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 	struct nfs_openargs *o_arg = &data->o_arg;
 	struct nfs_openres *o_res = &data->o_res;
 	struct rpc_task *task;
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = server->client,
+		.callback_ops = &nfs4_open_ops,
+		.callback_data = data,
+		.flags = RPC_TASK_ASYNC,
+	};
 	int status;
 
 	kref_get(&data->kref);
 	data->rpc_done = 0;
 	data->rpc_status = 0;
 	data->cancelled = 0;
-	task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data);
+	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
 	status = nfs4_wait_for_completion_rpc_task(task);
@@ -1309,6 +1321,11 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
 	struct nfs4_closedata *calldata;
 	struct nfs4_state_owner *sp = state->owner;
 	struct rpc_task *task;
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = server->client,
+		.callback_ops = &nfs4_close_ops,
+		.flags = RPC_TASK_ASYNC,
+	};
 	int status = -ENOMEM;
 
 	calldata = kmalloc(sizeof(*calldata), GFP_KERNEL);
@@ -1328,7 +1345,8 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
 	calldata->path.mnt = mntget(path->mnt);
 	calldata->path.dentry = dget(path->dentry);
 
-	task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_close_ops, calldata);
+	task_setup_data.callback_data = calldata;
+	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
 	status = 0;
@@ -3027,6 +3045,11 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
 	struct nfs4_delegreturndata *data;
 	struct nfs_server *server = NFS_SERVER(inode);
 	struct rpc_task *task;
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = server->client,
+		.callback_ops = &nfs4_delegreturn_ops,
+		.flags = RPC_TASK_ASYNC,
+	};
 	int status;
 
 	data = kmalloc(sizeof(*data), GFP_KERNEL);
@@ -3043,7 +3066,8 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
 	data->timestamp = jiffies;
 	data->rpc_status = 0;
 
-	task = rpc_run_task(NFS_CLIENT(inode), RPC_TASK_ASYNC, &nfs4_delegreturn_ops, data);
+	task_setup_data.callback_data = data;
+	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
 	status = nfs4_wait_for_completion_rpc_task(task);
@@ -3260,6 +3284,11 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
 		struct nfs_seqid *seqid)
 {
 	struct nfs4_unlockdata *data;
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = NFS_CLIENT(lsp->ls_state->inode),
+		.callback_ops = &nfs4_locku_ops,
+		.flags = RPC_TASK_ASYNC,
+	};
 
 	/* Ensure this is an unlock - when canceling a lock, the
 	 * canceled lock is passed in, and it won't be an unlock.
@@ -3272,7 +3301,8 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
 		return ERR_PTR(-ENOMEM);
 	}
 
-	return rpc_run_task(NFS_CLIENT(lsp->ls_state->inode), RPC_TASK_ASYNC, &nfs4_locku_ops, data);
+	task_setup_data.callback_data = data;
+	return rpc_run_task(&task_setup_data);
 }
 
 static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
@@ -3438,6 +3468,11 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
 {
 	struct nfs4_lockdata *data;
 	struct rpc_task *task;
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = NFS_CLIENT(state->inode),
+		.callback_ops = &nfs4_lock_ops,
+		.flags = RPC_TASK_ASYNC,
+	};
 	int ret;
 
 	dprintk("%s: begin!\n", __FUNCTION__);
@@ -3449,8 +3484,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
 		data->arg.block = 1;
 	if (reclaim != 0)
 		data->arg.reclaim = 1;
-	task = rpc_run_task(NFS_CLIENT(state->inode), RPC_TASK_ASYNC,
-			&nfs4_lock_ops, data);
+	task_setup_data.callback_data = data;
+	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
 	ret = nfs4_wait_for_completion_rpc_task(task);
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 8e5428e0b86f2..6660d9a533454 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -127,6 +127,11 @@ static const struct rpc_call_ops nfs_unlink_ops = {
 
 static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data)
 {
+	struct rpc_task_setup task_setup_data = {
+		.callback_ops = &nfs_unlink_ops,
+		.callback_data = data,
+		.flags = RPC_TASK_ASYNC,
+	};
 	struct rpc_task *task;
 	struct dentry *alias;
 
@@ -160,7 +165,9 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
 	data->args.fh = NFS_FH(dir);
 	nfs_fattr_init(&data->res.dir_attr);
 
-	task = rpc_run_task(NFS_CLIENT(dir), RPC_TASK_ASYNC, &nfs_unlink_ops, data);
+	task_setup_data.rpc_client = NFS_CLIENT(dir);
+
+	task = rpc_run_task(&task_setup_data);
 	if (!IS_ERR(task))
 		rpc_put_task(task);
 	return 1;
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 9efe045fc3768..d974421d76476 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -244,8 +244,7 @@ struct rpc_wait_queue {
  * Function prototypes
  */
 struct rpc_task *rpc_new_task(const struct rpc_task_setup *);
-struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
-				const struct rpc_call_ops *ops, void *data);
+struct rpc_task *rpc_run_task(const struct rpc_task_setup *);
 void		rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *);
 void		rpc_put_task(struct rpc_task *);
 void		rpc_exit_task(struct rpc_task *);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 7c80abd9263f1..9aad45946d323 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -523,8 +523,11 @@ void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset)
 }
 EXPORT_SYMBOL_GPL(rpc_clnt_sigunmask);
 
-static
-struct rpc_task *rpc_do_run_task(const struct rpc_task_setup *task_setup_data)
+/**
+ * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it
+ * @task_setup_data: pointer to task initialisation data
+ */
+struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
 {
 	struct rpc_task *task, *ret;
 	sigset_t oldset;
@@ -553,6 +556,7 @@ struct rpc_task *rpc_do_run_task(const struct rpc_task_setup *task_setup_data)
 	rpc_restore_sigmask(&oldset);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(rpc_run_task);
 
 /**
  * rpc_call_sync - Perform a synchronous RPC call
@@ -573,7 +577,7 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 
 	BUG_ON(flags & RPC_TASK_ASYNC);
 
-	task = rpc_do_run_task(&task_setup_data);
+	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
 	status = task->tk_status;
@@ -603,7 +607,7 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
 		.flags = flags|RPC_TASK_ASYNC,
 	};
 
-	task = rpc_do_run_task(&task_setup_data);
+	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
 	rpc_put_task(task);
@@ -611,28 +615,6 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
 }
 EXPORT_SYMBOL_GPL(rpc_call_async);
 
-/**
- * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it
- * @clnt: pointer to RPC client
- * @flags: RPC flags
- * @ops: RPC call ops
- * @data: user call data
- */
-struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
-					const struct rpc_call_ops *tk_ops,
-					void *data)
-{
-	struct rpc_task_setup task_setup_data = {
-		.rpc_client = clnt,
-		.callback_ops = tk_ops,
-		.callback_data = data,
-		.flags = flags,
-	};
-
-	return rpc_do_run_task(&task_setup_data);
-}
-EXPORT_SYMBOL_GPL(rpc_run_task);
-
 void
 rpc_call_setup(struct rpc_task *task, const struct rpc_message *msg, int flags)
 {
@@ -1550,7 +1532,7 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int
 		.callback_ops = &rpc_default_ops,
 		.flags = flags,
 	};
-	return rpc_do_run_task(&task_setup_data);
+	return rpc_run_task(&task_setup_data);
 }
 EXPORT_SYMBOL_GPL(rpc_call_null);
 
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index a05493aedb680..7c362e5f6e1b3 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -310,6 +310,10 @@ void rpcb_getport_async(struct rpc_task *task)
 	struct rpc_clnt	*rpcb_clnt;
 	static struct rpcbind_args *map;
 	struct rpc_task	*child;
+	struct rpc_task_setup task_setup_data = {
+		.callback_ops = &rpcb_getport_ops,
+		.flags = RPC_TASK_ASYNC,
+	};
 	struct sockaddr addr;
 	int status;
 	struct rpcb_info *info;
@@ -395,7 +399,9 @@ void rpcb_getport_async(struct rpc_task *task)
 	       sizeof(map->r_addr));
 	map->r_owner = RPCB_OWNER_STRING;	/* ignored for GETADDR */
 
-	child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map);
+	task_setup_data.rpc_client = rpcb_clnt;
+	task_setup_data.callback_data = map;
+	child = rpc_run_task(&task_setup_data);
 	rpc_release_client(rpcb_clnt);
 	if (IS_ERR(child)) {
 		status = -EIO;

From 3ff7576ddac06c3d07089e241b40826d24bbf1ac Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 14 Jul 2007 15:40:00 -0400
Subject: [PATCH 023/118] SUNRPC: Clean up the initialisation of priority queue
 scheduling info.

We want the default scheduling priority (priority == 0) to remain
RPC_PRIORITY_NORMAL.

Also ensure that the priority wait queue scheduling is per process id
instead of sometimes being per thread, and sometimes being per inode.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c              | 10 ----------
 fs/nfs/read.c                |  2 --
 fs/nfs/write.c               | 12 +++++-------
 include/linux/sunrpc/sched.h | 17 +++++++++--------
 net/sunrpc/sched.c           | 30 +++++++++++++++---------------
 5 files changed, 29 insertions(+), 42 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index f9f5fc13dc7dd..5bcc764e501aa 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -331,8 +331,6 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 		rpc_init_task(&data->task, &task_setup_data);
 		NFS_PROTO(inode)->read_setup(data);
 
-		data->task.tk_cookie = (unsigned long) inode;
-
 		rpc_execute(&data->task);
 
 		dprintk("NFS: %5u initiated direct read call "
@@ -465,9 +463,6 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 		rpc_init_task(&data->task, &task_setup_data);
 		NFS_PROTO(inode)->write_setup(data, FLUSH_STABLE);
 
-		data->task.tk_priority = RPC_PRIORITY_NORMAL;
-		data->task.tk_cookie = (unsigned long) inode;
-
 		/*
 		 * We're called via an RPC callback, so BKL is already held.
 		 */
@@ -534,8 +529,6 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 	rpc_init_task(&data->task, &task_setup_data);
 	NFS_PROTO(data->inode)->commit_setup(data, 0);
 
-	data->task.tk_priority = RPC_PRIORITY_NORMAL;
-	data->task.tk_cookie = (unsigned long)data->inode;
 	/* Note: task.tk_ops->rpc_release will free dreq->commit_data */
 	dreq->commit_data = NULL;
 
@@ -718,9 +711,6 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 		rpc_init_task(&data->task, &task_setup_data);
 		NFS_PROTO(inode)->write_setup(data, sync);
 
-		data->task.tk_priority = RPC_PRIORITY_NORMAL;
-		data->task.tk_cookie = (unsigned long) inode;
-
 		rpc_execute(&data->task);
 
 		dprintk("NFS: %5u initiated direct write call "
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index c7f0d5ebd4514..8f1eb08ccffae 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -189,8 +189,6 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 	rpc_init_task(&data->task, &task_setup_data);
 	NFS_PROTO(inode)->read_setup(data);
 
-	data->task.tk_cookie = (unsigned long)inode;
-
 	dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
 			data->task.tk_pid,
 			inode->i_sb->s_id,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c4376606f1060..8d90e90ccd477 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -753,7 +753,7 @@ static void nfs_writepage_release(struct nfs_page *req)
 	nfs_clear_page_tag_locked(req);
 }
 
-static inline int flush_task_priority(int how)
+static int flush_task_priority(int how)
 {
 	switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) {
 		case FLUSH_HIGHPRI:
@@ -775,11 +775,13 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 {
 	struct inode *inode = req->wb_context->path.dentry->d_inode;
 	int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+	int priority = flush_task_priority(how);
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = NFS_CLIENT(inode),
 		.callback_ops = call_ops,
 		.callback_data = data,
 		.flags = flags,
+		.priority = priority,
 	};
 
 	/* Set up the RPC argument and reply structs
@@ -805,9 +807,6 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 	rpc_init_task(&data->task, &task_setup_data);
 	NFS_PROTO(inode)->write_setup(data, how);
 
-	data->task.tk_priority = flush_task_priority(how);
-	data->task.tk_cookie = (unsigned long)inode;
-
 	dprintk("NFS: %5u initiated write call "
 		"(req %s/%Ld, %u bytes @ offset %Lu)\n",
 		data->task.tk_pid,
@@ -1152,11 +1151,13 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 	struct nfs_page *first = nfs_list_entry(head->next);
 	struct inode *inode = first->wb_context->path.dentry->d_inode;
 	int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
+	int priority = flush_task_priority(how);
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = NFS_CLIENT(inode),
 		.callback_ops = &nfs_commit_ops,
 		.callback_data = data,
 		.flags = flags,
+		.priority = priority,
 	};
 
 	/* Set up the RPC argument and reply structs
@@ -1180,9 +1181,6 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 	rpc_init_task(&data->task, &task_setup_data);
 	NFS_PROTO(inode)->commit_setup(data, how);
 
-	data->task.tk_priority = flush_task_priority(how);
-	data->task.tk_cookie = (unsigned long)inode;
-	
 	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
 }
 
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index d974421d76476..c9444fdc23ac6 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -56,8 +56,6 @@ struct rpc_task {
 	__u8			tk_garb_retry;
 	__u8			tk_cred_retry;
 
-	unsigned long		tk_cookie;	/* Cookie for batching tasks */
-
 	/*
 	 * timeout_fn   to be executed by timer bottom half
 	 * callback	to be executed after waking up
@@ -78,7 +76,6 @@ struct rpc_task {
 	struct timer_list	tk_timer;	/* kernel timer */
 	unsigned long		tk_timeout;	/* timeout for rpc_sleep() */
 	unsigned short		tk_flags;	/* misc flags */
-	unsigned char		tk_priority : 2;/* Task priority */
 	unsigned long		tk_runstate;	/* Task run status */
 	struct workqueue_struct	*tk_workqueue;	/* Normally rpciod, but could
 						 * be any workqueue
@@ -94,6 +91,9 @@ struct rpc_task {
 	unsigned long		tk_start;	/* RPC task init timestamp */
 	long			tk_rtt;		/* round-trip time (jiffies) */
 
+	pid_t			tk_owner;	/* Process id for batching tasks */
+	unsigned char		tk_priority : 2;/* Task priority */
+
 #ifdef RPC_DEBUG
 	unsigned short		tk_pid;		/* debugging aid */
 #endif
@@ -123,6 +123,7 @@ struct rpc_task_setup {
 	const struct rpc_call_ops *callback_ops;
 	void *callback_data;
 	unsigned short flags;
+	signed char priority;
 };
 
 /*
@@ -187,10 +188,10 @@ struct rpc_task_setup {
  * Note: if you change these, you must also change
  * the task initialization definitions below.
  */
-#define RPC_PRIORITY_LOW	0
-#define RPC_PRIORITY_NORMAL	1
-#define RPC_PRIORITY_HIGH	2
-#define RPC_NR_PRIORITY		(RPC_PRIORITY_HIGH+1)
+#define RPC_PRIORITY_LOW	(-1)
+#define RPC_PRIORITY_NORMAL	(0)
+#define RPC_PRIORITY_HIGH	(1)
+#define RPC_NR_PRIORITY		(1 + RPC_PRIORITY_HIGH - RPC_PRIORITY_LOW)
 
 /*
  * RPC synchronization objects
@@ -198,7 +199,7 @@ struct rpc_task_setup {
 struct rpc_wait_queue {
 	spinlock_t		lock;
 	struct list_head	tasks[RPC_NR_PRIORITY];	/* task queue for each priority level */
-	unsigned long		cookie;			/* cookie of last task serviced */
+	pid_t			owner;			/* process id of last task serviced */
 	unsigned char		maxpriority;		/* maximum priority (0 if queue is not a priority queue) */
 	unsigned char		priority;		/* current priority */
 	unsigned char		count;			/* # task groups remaining serviced so far */
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 10216989309c9..b9061bcf6fc15 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -135,7 +135,7 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct r
 	if (unlikely(task->tk_priority > queue->maxpriority))
 		q = &queue->tasks[queue->maxpriority];
 	list_for_each_entry(t, q, u.tk_wait.list) {
-		if (t->tk_cookie == task->tk_cookie) {
+		if (t->tk_owner == task->tk_owner) {
 			list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
 			return;
 		}
@@ -208,26 +208,26 @@ static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int
 	queue->count = 1 << (priority * 2);
 }
 
-static inline void rpc_set_waitqueue_cookie(struct rpc_wait_queue *queue, unsigned long cookie)
+static inline void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
 {
-	queue->cookie = cookie;
+	queue->owner = pid;
 	queue->nr = RPC_BATCH_COUNT;
 }
 
 static inline void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
 {
 	rpc_set_waitqueue_priority(queue, queue->maxpriority);
-	rpc_set_waitqueue_cookie(queue, 0);
+	rpc_set_waitqueue_owner(queue, 0);
 }
 
-static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, int maxprio)
+static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname, unsigned char nr_queues)
 {
 	int i;
 
 	spin_lock_init(&queue->lock);
 	for (i = 0; i < ARRAY_SIZE(queue->tasks); i++)
 		INIT_LIST_HEAD(&queue->tasks[i]);
-	queue->maxpriority = maxprio;
+	queue->maxpriority = nr_queues - 1;
 	rpc_reset_waitqueue_priority(queue);
 #ifdef RPC_DEBUG
 	queue->name = qname;
@@ -236,12 +236,12 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c
 
 void rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const char *qname)
 {
-	__rpc_init_priority_wait_queue(queue, qname, RPC_PRIORITY_HIGH);
+	__rpc_init_priority_wait_queue(queue, qname, RPC_NR_PRIORITY);
 }
 
 void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname)
 {
-	__rpc_init_priority_wait_queue(queue, qname, 0);
+	__rpc_init_priority_wait_queue(queue, qname, 1);
 }
 EXPORT_SYMBOL_GPL(rpc_init_wait_queue);
 
@@ -456,12 +456,12 @@ static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queu
 	struct rpc_task *task;
 
 	/*
-	 * Service a batch of tasks from a single cookie.
+	 * Service a batch of tasks from a single owner.
 	 */
 	q = &queue->tasks[queue->priority];
 	if (!list_empty(q)) {
 		task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
-		if (queue->cookie == task->tk_cookie) {
+		if (queue->owner == task->tk_owner) {
 			if (--queue->nr)
 				goto out;
 			list_move_tail(&task->u.tk_wait.list, q);
@@ -470,7 +470,7 @@ static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queu
 		 * Check if we need to switch queues.
 		 */
 		if (--queue->count)
-			goto new_cookie;
+			goto new_owner;
 	}
 
 	/*
@@ -492,8 +492,8 @@ static struct rpc_task * __rpc_wake_up_next_priority(struct rpc_wait_queue *queu
 
 new_queue:
 	rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0]));
-new_cookie:
-	rpc_set_waitqueue_cookie(queue, task->tk_cookie);
+new_owner:
+	rpc_set_waitqueue_owner(queue, task->tk_owner);
 out:
 	__rpc_wake_up_task(task);
 	return task;
@@ -830,8 +830,8 @@ void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *task_setu
 	task->tk_garb_retry = 2;
 	task->tk_cred_retry = 2;
 
-	task->tk_priority = RPC_PRIORITY_NORMAL;
-	task->tk_cookie = (unsigned long)current;
+	task->tk_priority = task_setup_data->priority - RPC_PRIORITY_LOW;
+	task->tk_owner = current->tgid;
 
 	/* Initialize workqueue for async tasks */
 	task->tk_workqueue = rpciod_workqueue;

From 5085925902cc4d93b9a4992936edd2aee70a5e15 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 25 Oct 2007 18:19:37 -0400
Subject: [PATCH 024/118] SUNRPC: Mask signals across the call to
 rpc_call_setup() in rpc_run_task

To ensure that the RPCSEC_GSS upcall is performed with the correct sigmask.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth.c |  3 +++
 net/sunrpc/clnt.c | 13 +++++++------
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index eca941ce298b6..bcd9abdb031c4 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -385,6 +385,7 @@ rpcauth_bindcred(struct rpc_task *task)
 		.group_info = current->group_info,
 	};
 	struct rpc_cred *ret;
+	sigset_t oldset;
 	int flags = 0;
 
 	dprintk("RPC: %5u looking up %s cred\n",
@@ -392,7 +393,9 @@ rpcauth_bindcred(struct rpc_task *task)
 	get_group_info(acred.group_info);
 	if (task->tk_flags & RPC_TASK_ROOTCREDS)
 		flags |= RPCAUTH_LOOKUP_ROOTCREDS;
+	rpc_clnt_sigmask(task->tk_client, &oldset);
 	ret = auth->au_ops->lookup_cred(auth, &acred, flags);
+	rpc_clnt_sigunmask(task->tk_client, &oldset);
 	if (!IS_ERR(ret))
 		task->tk_msg.rpc_cred = ret;
 	else
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 9aad45946d323..aefe3ae218f77 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -501,12 +501,12 @@ static void rpc_save_sigmask(sigset_t *oldset, int intr)
 	sigprocmask(SIG_BLOCK, &sigmask, oldset);
 }
 
-static inline void rpc_task_sigmask(struct rpc_task *task, sigset_t *oldset)
+static void rpc_task_sigmask(struct rpc_task *task, sigset_t *oldset)
 {
 	rpc_save_sigmask(oldset, !RPC_TASK_UNINTERRUPTIBLE(task));
 }
 
-static inline void rpc_restore_sigmask(sigset_t *oldset)
+static void rpc_restore_sigmask(sigset_t *oldset)
 {
 	sigprocmask(SIG_SETMASK, oldset, NULL);
 }
@@ -536,11 +536,10 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
 	if (task == NULL) {
 		rpc_release_calldata(task_setup_data->callback_ops,
 				task_setup_data->callback_data);
-		return ERR_PTR(-ENOMEM);
+		ret = ERR_PTR(-ENOMEM);
+		goto out;
 	}
 
-	/* Mask signals on synchronous RPC calls and RPCSEC_GSS upcalls */
-	rpc_task_sigmask(task, &oldset);
 	if (task_setup_data->rpc_message != NULL) {
 		rpc_call_setup(task, task_setup_data->rpc_message, 0);
 		if (task->tk_status != 0) {
@@ -550,10 +549,12 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
 		}
 	}
 	atomic_inc(&task->tk_count);
+	/* Mask signals on synchronous RPC calls and RPCSEC_GSS upcalls */
+	rpc_task_sigmask(task, &oldset);
 	rpc_execute(task);
+	rpc_restore_sigmask(&oldset);
 	ret = task;
 out:
-	rpc_restore_sigmask(&oldset);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(rpc_run_task);

From 77de2c590ec72828156d85fa13a96db87301cc68 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 25 Oct 2007 18:40:21 -0400
Subject: [PATCH 025/118] SUNRPC: Add a helper rpc_call_start() that
 initialises task->tk_action

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h | 2 +-
 net/sunrpc/clnt.c           | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index ec9704181f947..c79f2edf03237 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -127,7 +127,7 @@ int		rpcb_getport_sync(struct sockaddr_in *, __u32, __u32, int);
 void		rpcb_getport_async(struct rpc_task *);
 
 void		rpc_call_setup(struct rpc_task *, const struct rpc_message *, int);
-
+void		rpc_call_start(struct rpc_task *);
 int		rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg,
 			       int flags, const struct rpc_call_ops *tk_ops,
 			       void *calldata);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index aefe3ae218f77..7aeffeebf42d4 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -634,6 +634,13 @@ rpc_call_setup(struct rpc_task *task, const struct rpc_message *msg, int flags)
 }
 EXPORT_SYMBOL_GPL(rpc_call_setup);
 
+void
+rpc_call_start(struct rpc_task *task)
+{
+	task->tk_action = call_start;
+}
+EXPORT_SYMBOL_GPL(rpc_call_start);
+
 /**
  * rpc_peeraddr - extract remote peer address from clnt's xprt
  * @clnt: RPC client structure

From b3ef8b3bb93300e58a4c4806207de3de4eb76f48 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 25 Oct 2007 18:32:34 -0400
Subject: [PATCH 026/118] SUNRPC: Allow rpc_init_task() to initialise the
 rpc_task->tk_msg

In preparation for the removal of rpc_call_setup().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c  | 11 ++++-------
 net/sunrpc/sched.c | 11 +++++++++++
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 7aeffeebf42d4..6eb79c49c9378 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -540,13 +540,10 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
 		goto out;
 	}
 
-	if (task_setup_data->rpc_message != NULL) {
-		rpc_call_setup(task, task_setup_data->rpc_message, 0);
-		if (task->tk_status != 0) {
-			ret = ERR_PTR(task->tk_status);
-			rpc_put_task(task);
-			goto out;
-		}
+	if (task->tk_status != 0) {
+		ret = ERR_PTR(task->tk_status);
+		rpc_put_task(task);
+		goto out;
 	}
 	atomic_inc(&task->tk_count);
 	/* Mask signals on synchronous RPC calls and RPCSEC_GSS upcalls */
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index b9061bcf6fc15..fa53a88b2c5bd 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -848,6 +848,17 @@ void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *task_setu
 	if (task->tk_ops->rpc_call_prepare != NULL)
 		task->tk_action = rpc_prepare_task;
 
+	if (task_setup_data->rpc_message != NULL) {
+		memcpy(&task->tk_msg, task_setup_data->rpc_message, sizeof(task->tk_msg));
+		/* Bind the user cred */
+		if (task->tk_msg.rpc_cred != NULL)
+			rpcauth_holdcred(task);
+		else
+			rpcauth_bindcred(task);
+		if (task->tk_action == NULL)
+			rpc_call_start(task);
+	}
+
 	/* starting timestamp */
 	task->tk_start = jiffies;
 

From bdc7f021f3a1fade77adf3c2d7f65690566fddfe Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 14 Jul 2007 15:40:00 -0400
Subject: [PATCH 027/118] NFS: Clean up the (commit|read|write)_setup()
 callback routines

Move the common code for setting up the nfs_write_data and nfs_read_data
structures into fs/nfs/read.c, fs/nfs/write.c and fs/nfs/direct.c.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c         | 45 ++++++++++++++++++++++++++++--------
 fs/nfs/nfs3proc.c       | 41 +++++----------------------------
 fs/nfs/nfs4proc.c       | 49 +++++++--------------------------------
 fs/nfs/proc.c           | 26 ++++-----------------
 fs/nfs/read.c           | 38 +++++++++++++++---------------
 fs/nfs/write.c          | 51 ++++++++++++++++++++++++++++-------------
 include/linux/nfs_xdr.h |  6 ++---
 7 files changed, 113 insertions(+), 143 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 5bcc764e501aa..244d1bd7002c5 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -272,8 +272,12 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 	unsigned long user_addr = (unsigned long)iov->iov_base;
 	size_t count = iov->iov_len;
 	size_t rsize = NFS_SERVER(inode)->rsize;
+	struct rpc_message msg = {
+		.rpc_cred = ctx->cred,
+	};
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = NFS_CLIENT(inode),
+		.rpc_message = &msg,
 		.callback_ops = &nfs_read_direct_ops,
 		.flags = RPC_TASK_ASYNC,
 	};
@@ -316,7 +320,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 
 		data->req = (struct nfs_page *) dreq;
 		data->inode = inode;
-		data->cred = ctx->cred;
+		data->cred = msg.rpc_cred;
 		data->args.fh = NFS_FH(inode);
 		data->args.context = ctx;
 		data->args.offset = pos;
@@ -326,10 +330,12 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 		data->res.fattr = &data->fattr;
 		data->res.eof = 0;
 		data->res.count = bytes;
+		msg.rpc_argp = &data->args;
+		msg.rpc_resp = &data->res;
 
 		task_setup_data.callback_data = data;
+		NFS_PROTO(inode)->read_setup(data, &msg);
 		rpc_init_task(&data->task, &task_setup_data);
-		NFS_PROTO(inode)->read_setup(data);
 
 		rpc_execute(&data->task);
 
@@ -434,6 +440,9 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 	struct inode *inode = dreq->inode;
 	struct list_head *p;
 	struct nfs_write_data *data;
+	struct rpc_message msg = {
+		.rpc_cred = dreq->ctx->cred,
+	};
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = NFS_CLIENT(inode),
 		.callback_ops = &nfs_write_direct_ops,
@@ -448,6 +457,9 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 
 		get_dreq(dreq);
 
+		/* Use stable writes */
+		data->args.stable = NFS_FILE_SYNC;
+
 		/*
 		 * Reset data->res.
 		 */
@@ -460,8 +472,10 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 		 * since the original request was sent.
 		 */
 		task_setup_data.callback_data = data;
+		msg.rpc_argp = &data->args;
+		msg.rpc_resp = &data->res;
+		NFS_PROTO(inode)->write_setup(data, &msg);
 		rpc_init_task(&data->task, &task_setup_data);
-		NFS_PROTO(inode)->write_setup(data, FLUSH_STABLE);
 
 		/*
 		 * We're called via an RPC callback, so BKL is already held.
@@ -509,15 +523,21 @@ static const struct rpc_call_ops nfs_commit_direct_ops = {
 static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 {
 	struct nfs_write_data *data = dreq->commit_data;
+	struct rpc_message msg = {
+		.rpc_argp = &data->args,
+		.rpc_resp = &data->res,
+		.rpc_cred = dreq->ctx->cred,
+	};
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = NFS_CLIENT(dreq->inode),
+		.rpc_message = &msg,
 		.callback_ops = &nfs_commit_direct_ops,
 		.callback_data = data,
 		.flags = RPC_TASK_ASYNC,
 	};
 
 	data->inode = dreq->inode;
-	data->cred = dreq->ctx->cred;
+	data->cred = msg.rpc_cred;
 
 	data->args.fh = NFS_FH(data->inode);
 	data->args.offset = 0;
@@ -526,8 +546,8 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 	data->res.fattr = &data->fattr;
 	data->res.verf = &data->verf;
 
+	NFS_PROTO(data->inode)->commit_setup(data, &msg);
 	rpc_init_task(&data->task, &task_setup_data);
-	NFS_PROTO(data->inode)->commit_setup(data, 0);
 
 	/* Note: task.tk_ops->rpc_release will free dreq->commit_data */
 	dreq->commit_data = NULL;
@@ -649,8 +669,12 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 	struct inode *inode = ctx->path.dentry->d_inode;
 	unsigned long user_addr = (unsigned long)iov->iov_base;
 	size_t count = iov->iov_len;
+	struct rpc_message msg = {
+		.rpc_cred = ctx->cred,
+	};
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = NFS_CLIENT(inode),
+		.rpc_message = &msg,
 		.callback_ops = &nfs_write_direct_ops,
 		.flags = RPC_TASK_ASYNC,
 	};
@@ -696,20 +720,23 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 
 		data->req = (struct nfs_page *) dreq;
 		data->inode = inode;
-		data->cred = ctx->cred;
+		data->cred = msg.rpc_cred;
 		data->args.fh = NFS_FH(inode);
 		data->args.context = ctx;
 		data->args.offset = pos;
 		data->args.pgbase = pgbase;
 		data->args.pages = data->pagevec;
 		data->args.count = bytes;
+		data->args.stable = sync;
 		data->res.fattr = &data->fattr;
 		data->res.count = bytes;
 		data->res.verf = &data->verf;
 
 		task_setup_data.callback_data = data;
+		msg.rpc_argp = &data->args;
+		msg.rpc_resp = &data->res;
+		NFS_PROTO(inode)->write_setup(data, &msg);
 		rpc_init_task(&data->task, &task_setup_data);
-		NFS_PROTO(inode)->write_setup(data, sync);
 
 		rpc_execute(&data->task);
 
@@ -782,7 +809,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
 	struct rpc_clnt *clnt = NFS_CLIENT(inode);
 	struct nfs_direct_req *dreq;
 	size_t wsize = NFS_SERVER(inode)->wsize;
-	int sync = 0;
+	int sync = NFS_UNSTABLE;
 
 	dreq = nfs_direct_req_alloc();
 	if (!dreq)
@@ -790,7 +817,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
 	nfs_alloc_commit_data(dreq);
 
 	if (dreq->commit_data == NULL || count < wsize)
-		sync = FLUSH_STABLE;
+		sync = NFS_FILE_SYNC;
 
 	dreq->inode = inode;
 	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 4cdc2361a669b..e68580ebaa479 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -732,16 +732,9 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
 	return 0;
 }
 
-static void nfs3_proc_read_setup(struct nfs_read_data *data)
+static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
 {
-	struct rpc_message	msg = {
-		.rpc_proc	= &nfs3_procedures[NFS3PROC_READ],
-		.rpc_argp	= &data->args,
-		.rpc_resp	= &data->res,
-		.rpc_cred	= data->cred,
-	};
-
-	rpc_call_setup(&data->task, &msg, 0);
+	msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
 }
 
 static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
@@ -753,24 +746,9 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
 	return 0;
 }
 
-static void nfs3_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
 {
-	struct rpc_message	msg = {
-		.rpc_proc	= &nfs3_procedures[NFS3PROC_WRITE],
-		.rpc_argp	= &data->args,
-		.rpc_resp	= &data->res,
-		.rpc_cred	= data->cred,
-	};
-
-	data->args.stable = NFS_UNSTABLE;
-	if (how & FLUSH_STABLE) {
-		data->args.stable = NFS_FILE_SYNC;
-		if (NFS_I(data->inode)->ncommit)
-			data->args.stable = NFS_DATA_SYNC;
-	}
-
-	/* Finalize the task. */
-	rpc_call_setup(&data->task, &msg, 0);
+	msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
 }
 
 static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
@@ -781,16 +759,9 @@ static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data)
 	return 0;
 }
 
-static void nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
+static void nfs3_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
 {
-	struct rpc_message	msg = {
-		.rpc_proc	= &nfs3_procedures[NFS3PROC_COMMIT],
-		.rpc_argp	= &data->args,
-		.rpc_resp	= &data->res,
-		.rpc_cred	= data->cred,
-	};
-
-	rpc_call_setup(&data->task, &msg, 0);
+	msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT];
 }
 
 static int
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index ff2c5f83ce87e..7c0baf23abdc8 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2432,18 +2432,10 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
 	return 0;
 }
 
-static void nfs4_proc_read_setup(struct nfs_read_data *data)
+static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
 {
-	struct rpc_message msg = {
-		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ],
-		.rpc_argp = &data->args,
-		.rpc_resp = &data->res,
-		.rpc_cred = data->cred,
-	};
-
 	data->timestamp   = jiffies;
-
-	rpc_call_setup(&data->task, &msg, 0);
+	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
 }
 
 static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
@@ -2461,33 +2453,15 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
 	return 0;
 }
 
-static void nfs4_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
 {
-	struct rpc_message msg = {
-		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE],
-		.rpc_argp = &data->args,
-		.rpc_resp = &data->res,
-		.rpc_cred = data->cred,
-	};
-	struct inode *inode = data->inode;
-	struct nfs_server *server = NFS_SERVER(inode);
-	int stable;
-	
-	if (how & FLUSH_STABLE) {
-		if (!NFS_I(inode)->ncommit)
-			stable = NFS_FILE_SYNC;
-		else
-			stable = NFS_DATA_SYNC;
-	} else
-		stable = NFS_UNSTABLE;
-	data->args.stable = stable;
+	struct nfs_server *server = NFS_SERVER(data->inode);
+
 	data->args.bitmask = server->attr_bitmask;
 	data->res.server = server;
-
 	data->timestamp   = jiffies;
 
-	/* Finalize the task. */
-	rpc_call_setup(&data->task, &msg, 0);
+	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
 }
 
 static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
@@ -2502,20 +2476,13 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
 	return 0;
 }
 
-static void nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
+static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
 {
-	struct rpc_message msg = {
-		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT],
-		.rpc_argp = &data->args,
-		.rpc_resp = &data->res,
-		.rpc_cred = data->cred,
-	};	
 	struct nfs_server *server = NFS_SERVER(data->inode);
 	
 	data->args.bitmask = server->attr_bitmask;
 	data->res.server = server;
-
-	rpc_call_setup(&data->task, &msg, 0);
+	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
 }
 
 /*
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 4f80d88e9fee0..c9f46a24e75c7 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -565,16 +565,9 @@ static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
 	return 0;
 }
 
-static void nfs_proc_read_setup(struct nfs_read_data *data)
+static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
 {
-	struct rpc_message	msg = {
-		.rpc_proc	= &nfs_procedures[NFSPROC_READ],
-		.rpc_argp	= &data->args,
-		.rpc_resp	= &data->res,
-		.rpc_cred	= data->cred,
-	};
-
-	rpc_call_setup(&data->task, &msg, 0);
+	msg->rpc_proc = &nfs_procedures[NFSPROC_READ];
 }
 
 static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
@@ -584,24 +577,15 @@ static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
 	return 0;
 }
 
-static void nfs_proc_write_setup(struct nfs_write_data *data, int how)
+static void nfs_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
 {
-	struct rpc_message	msg = {
-		.rpc_proc	= &nfs_procedures[NFSPROC_WRITE],
-		.rpc_argp	= &data->args,
-		.rpc_resp	= &data->res,
-		.rpc_cred	= data->cred,
-	};
-
 	/* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
 	data->args.stable = NFS_FILE_SYNC;
-
-	/* Finalize the task. */
-	rpc_call_setup(&data->task, &msg, 0);
+	msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE];
 }
 
 static void
-nfs_proc_commit_setup(struct nfs_write_data *data, int how)
+nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
 {
 	BUG();
 }
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 8f1eb08ccffae..e9dbdc8eafe6b 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -153,6 +153,16 @@ static void nfs_readpage_release(struct nfs_page *req)
 	nfs_release_request(req);
 }
 
+static void nfs_execute_read(struct nfs_read_data *data)
+{
+	struct rpc_clnt *clnt = NFS_CLIENT(data->inode);
+	sigset_t oldset;
+
+	rpc_clnt_sigmask(clnt, &oldset);
+	rpc_execute(&data->task);
+	rpc_clnt_sigunmask(clnt, &oldset);
+}
+
 /*
  * Set up the NFS read request struct
  */
@@ -162,8 +172,14 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 {
 	struct inode *inode = req->wb_context->path.dentry->d_inode;
 	int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
+	struct rpc_message msg = {
+		.rpc_argp = &data->args,
+		.rpc_resp = &data->res,
+		.rpc_cred = req->wb_context->cred,
+	};
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = NFS_CLIENT(inode),
+		.rpc_message = &msg,
 		.callback_ops = call_ops,
 		.callback_data = data,
 		.flags = RPC_TASK_ASYNC | swap_flags,
@@ -171,7 +187,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 
 	data->req	  = req;
 	data->inode	  = inode;
-	data->cred	  = req->wb_context->cred;
+	data->cred	  = msg.rpc_cred;
 
 	data->args.fh     = NFS_FH(inode);
 	data->args.offset = req_offset(req) + offset;
@@ -186,8 +202,8 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 	nfs_fattr_init(&data->fattr);
 
 	/* Set up the initial task struct. */
+	NFS_PROTO(inode)->read_setup(data, &msg);
 	rpc_init_task(&data->task, &task_setup_data);
-	NFS_PROTO(inode)->read_setup(data);
 
 	dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
 			data->task.tk_pid,
@@ -195,6 +211,8 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 			(long long)NFS_FILEID(inode),
 			count,
 			(unsigned long long)data->args.offset);
+
+	nfs_execute_read(data);
 }
 
 static void
@@ -210,19 +228,6 @@ nfs_async_read_error(struct list_head *head)
 	}
 }
 
-/*
- * Start an async read operation
- */
-static void nfs_execute_read(struct nfs_read_data *data)
-{
-	struct rpc_clnt *clnt = NFS_CLIENT(data->inode);
-	sigset_t oldset;
-
-	rpc_clnt_sigmask(clnt, &oldset);
-	rpc_execute(&data->task);
-	rpc_clnt_sigunmask(clnt, &oldset);
-}
-
 /*
  * Generate multiple requests to fill a single page.
  *
@@ -277,7 +282,6 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne
 				  rsize, offset);
 		offset += rsize;
 		nbytes -= rsize;
-		nfs_execute_read(data);
 	} while (nbytes != 0);
 
 	return 0;
@@ -315,8 +319,6 @@ static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned
 	req = nfs_list_entry(data->pages.next);
 
 	nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0);
-
-	nfs_execute_read(data);
 	return 0;
 out_bad:
 	nfs_async_read_error(head);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 8d90e90ccd477..9a69469274ae1 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -764,6 +764,16 @@ static int flush_task_priority(int how)
 	return RPC_PRIORITY_NORMAL;
 }
 
+static void nfs_execute_write(struct nfs_write_data *data)
+{
+	struct rpc_clnt *clnt = NFS_CLIENT(data->inode);
+	sigset_t oldset;
+
+	rpc_clnt_sigmask(clnt, &oldset);
+	rpc_execute(&data->task);
+	rpc_clnt_sigunmask(clnt, &oldset);
+}
+
 /*
  * Set up the argument/result storage required for the RPC call.
  */
@@ -776,8 +786,14 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 	struct inode *inode = req->wb_context->path.dentry->d_inode;
 	int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
 	int priority = flush_task_priority(how);
+	struct rpc_message msg = {
+		.rpc_argp = &data->args,
+		.rpc_resp = &data->res,
+		.rpc_cred = req->wb_context->cred,
+	};
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = NFS_CLIENT(inode),
+		.rpc_message = &msg,
 		.callback_ops = call_ops,
 		.callback_data = data,
 		.flags = flags,
@@ -789,7 +805,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 
 	data->req = req;
 	data->inode = inode = req->wb_context->path.dentry->d_inode;
-	data->cred = req->wb_context->cred;
+	data->cred = msg.rpc_cred;
 
 	data->args.fh     = NFS_FH(inode);
 	data->args.offset = req_offset(req) + offset;
@@ -797,6 +813,12 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 	data->args.pages  = data->pagevec;
 	data->args.count  = count;
 	data->args.context = req->wb_context;
+	data->args.stable  = NFS_UNSTABLE;
+	if (how & FLUSH_STABLE) {
+		data->args.stable = NFS_DATA_SYNC;
+		if (!NFS_I(inode)->ncommit)
+			data->args.stable = NFS_FILE_SYNC;
+	}
 
 	data->res.fattr   = &data->fattr;
 	data->res.count   = count;
@@ -804,8 +826,8 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 	nfs_fattr_init(&data->fattr);
 
 	/* Set up the initial task struct.  */
+	NFS_PROTO(inode)->write_setup(data, &msg);
 	rpc_init_task(&data->task, &task_setup_data);
-	NFS_PROTO(inode)->write_setup(data, how);
 
 	dprintk("NFS: %5u initiated write call "
 		"(req %s/%Ld, %u bytes @ offset %Lu)\n",
@@ -814,16 +836,8 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 		(long long)NFS_FILEID(inode),
 		count,
 		(unsigned long long)data->args.offset);
-}
 
-static void nfs_execute_write(struct nfs_write_data *data)
-{
-	struct rpc_clnt *clnt = NFS_CLIENT(data->inode);
-	sigset_t oldset;
-
-	rpc_clnt_sigmask(clnt, &oldset);
-	rpc_execute(&data->task);
-	rpc_clnt_sigunmask(clnt, &oldset);
+	nfs_execute_write(data);
 }
 
 /*
@@ -870,7 +884,6 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
 				   wsize, offset, how);
 		offset += wsize;
 		nbytes -= wsize;
-		nfs_execute_write(data);
 	} while (nbytes != 0);
 
 	return 0;
@@ -918,7 +931,6 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
 	/* Set up the argument struct */
 	nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
 
-	nfs_execute_write(data);
 	return 0;
  out_bad:
 	while (!list_empty(head)) {
@@ -1152,8 +1164,14 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 	struct inode *inode = first->wb_context->path.dentry->d_inode;
 	int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
 	int priority = flush_task_priority(how);
+	struct rpc_message msg = {
+		.rpc_argp = &data->args,
+		.rpc_resp = &data->res,
+		.rpc_cred = first->wb_context->cred,
+	};
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = NFS_CLIENT(inode),
+		.rpc_message = &msg,
 		.callback_ops = &nfs_commit_ops,
 		.callback_data = data,
 		.flags = flags,
@@ -1166,7 +1184,7 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 	list_splice_init(head, &data->pages);
 
 	data->inode	  = inode;
-	data->cred	  = first->wb_context->cred;
+	data->cred	  = msg.rpc_cred;
 
 	data->args.fh     = NFS_FH(data->inode);
 	/* Note: we always request a commit of the entire inode */
@@ -1178,10 +1196,12 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 	nfs_fattr_init(&data->fattr);
 
 	/* Set up the initial task struct.  */
+	NFS_PROTO(inode)->commit_setup(data, &msg);
 	rpc_init_task(&data->task, &task_setup_data);
-	NFS_PROTO(inode)->commit_setup(data, how);
 
 	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
+
+	nfs_execute_write(data);
 }
 
 /*
@@ -1201,7 +1221,6 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 	/* Set up the argument struct */
 	nfs_commit_rpcsetup(head, data, how);
 
-	nfs_execute_write(data);
 	return 0;
  out_bad:
 	while (!list_empty(head)) {
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index daab252f2e5cf..6b213a64b15f0 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -816,11 +816,11 @@ struct nfs_rpc_ops {
 			     struct nfs_pathconf *);
 	int	(*set_capabilities)(struct nfs_server *, struct nfs_fh *);
 	__be32 *(*decode_dirent)(__be32 *, struct nfs_entry *, int plus);
-	void	(*read_setup)   (struct nfs_read_data *);
+	void	(*read_setup)   (struct nfs_read_data *, struct rpc_message *);
 	int	(*read_done)  (struct rpc_task *, struct nfs_read_data *);
-	void	(*write_setup)  (struct nfs_write_data *, int how);
+	void	(*write_setup)  (struct nfs_write_data *, struct rpc_message *);
 	int	(*write_done)  (struct rpc_task *, struct nfs_write_data *);
-	void	(*commit_setup) (struct nfs_write_data *, int how);
+	void	(*commit_setup) (struct nfs_write_data *, struct rpc_message *);
 	int	(*commit_done) (struct rpc_task *, struct nfs_write_data *);
 	int	(*file_open)   (struct inode *, struct file *);
 	int	(*file_release) (struct inode *, struct file *);

From 5138fde01161cd7976fdc51f6a17da73adaa6baf Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 14 Jul 2007 15:40:01 -0400
Subject: [PATCH 028/118] NFS/SUNRPC: Convert all users of rpc_call_setup()

Replace use of rpc_call_setup() with rpc_init_task(), and in cases where we
need to initialise task->tk_action, with rpc_call_start().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c      | 118 ++++++++++++++++++-----------------------
 fs/nfs/unlink.c        |  28 +++-------
 net/sunrpc/rpcb_clnt.c |  40 +++++++-------
 3 files changed, 79 insertions(+), 107 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7c0baf23abdc8..826b445b8c701 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -718,19 +718,6 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
 	return err;
 }
 
-static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata)
-{
-	struct nfs4_opendata *data = calldata;
-	struct  rpc_message msg = {
-		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM],
-		.rpc_argp = &data->c_arg,
-		.rpc_resp = &data->c_res,
-		.rpc_cred = data->owner->so_cred,
-	};
-	data->timestamp = jiffies;
-	rpc_call_setup(task, &msg, 0);
-}
-
 static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_opendata *data = calldata;
@@ -767,7 +754,6 @@ static void nfs4_open_confirm_release(void *calldata)
 }
 
 static const struct rpc_call_ops nfs4_open_confirm_ops = {
-	.rpc_call_prepare = nfs4_open_confirm_prepare,
 	.rpc_call_done = nfs4_open_confirm_done,
 	.rpc_release = nfs4_open_confirm_release,
 };
@@ -779,8 +765,15 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
 {
 	struct nfs_server *server = NFS_SERVER(data->dir->d_inode);
 	struct rpc_task *task;
+	struct  rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM],
+		.rpc_argp = &data->c_arg,
+		.rpc_resp = &data->c_res,
+		.rpc_cred = data->owner->so_cred,
+	};
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = server->client,
+		.rpc_message = &msg,
 		.callback_ops = &nfs4_open_confirm_ops,
 		.callback_data = data,
 		.flags = RPC_TASK_ASYNC,
@@ -790,6 +783,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
 	kref_get(&data->kref);
 	data->rpc_done = 0;
 	data->rpc_status = 0;
+	data->timestamp = jiffies;
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
@@ -807,13 +801,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_opendata *data = calldata;
 	struct nfs4_state_owner *sp = data->owner;
-	struct rpc_message msg = {
-		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN],
-		.rpc_argp = &data->o_arg,
-		.rpc_resp = &data->o_res,
-		.rpc_cred = sp->so_cred,
-	};
-	
+
 	if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
 		return;
 	/*
@@ -838,11 +826,11 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
 	data->o_arg.id = sp->so_owner_id.id;
 	data->o_arg.clientid = sp->so_client->cl_clientid;
 	if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) {
-		msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
+		task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
 		nfs_copy_fh(&data->o_res.fh, data->o_arg.fh);
 	}
 	data->timestamp = jiffies;
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_start(task);
 	return;
 out_no_action:
 	task->tk_action = NULL;
@@ -914,8 +902,15 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 	struct nfs_openargs *o_arg = &data->o_arg;
 	struct nfs_openres *o_res = &data->o_res;
 	struct rpc_task *task;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN],
+		.rpc_argp = o_arg,
+		.rpc_resp = o_res,
+		.rpc_cred = data->owner->so_cred,
+	};
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = server->client,
+		.rpc_message = &msg,
 		.callback_ops = &nfs4_open_ops,
 		.callback_data = data,
 		.flags = RPC_TASK_ASYNC,
@@ -1256,12 +1251,6 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 {
 	struct nfs4_closedata *calldata = data;
 	struct nfs4_state *state = calldata->state;
-	struct rpc_message msg = {
-		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
-		.rpc_argp = &calldata->arg,
-		.rpc_resp = &calldata->res,
-		.rpc_cred = state->owner->so_cred,
-	};
 	int clear_rd, clear_wr, clear_rdwr;
 
 	if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
@@ -1288,14 +1277,14 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 	}
 	nfs_fattr_init(calldata->res.fattr);
 	if (test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0) {
-		msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
+		task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
 		calldata->arg.open_flags = FMODE_READ;
 	} else if (test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0) {
-		msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
+		task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
 		calldata->arg.open_flags = FMODE_WRITE;
 	}
 	calldata->timestamp = jiffies;
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_start(task);
 }
 
 static const struct rpc_call_ops nfs4_close_ops = {
@@ -1321,8 +1310,13 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
 	struct nfs4_closedata *calldata;
 	struct nfs4_state_owner *sp = state->owner;
 	struct rpc_task *task;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
+		.rpc_cred = state->owner->so_cred,
+	};
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = server->client,
+		.rpc_message = &msg,
 		.callback_ops = &nfs4_close_ops,
 		.flags = RPC_TASK_ASYNC,
 	};
@@ -1345,6 +1339,8 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
 	calldata->path.mnt = mntget(path->mnt);
 	calldata->path.dentry = dget(path->dentry);
 
+	msg.rpc_argp = &calldata->arg,
+	msg.rpc_resp = &calldata->res,
 	task_setup_data.callback_data = calldata;
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
@@ -2966,25 +2962,11 @@ struct nfs4_delegreturndata {
 	struct nfs4_delegreturnres res;
 	struct nfs_fh fh;
 	nfs4_stateid stateid;
-	struct rpc_cred *cred;
 	unsigned long timestamp;
 	struct nfs_fattr fattr;
 	int rpc_status;
 };
 
-static void nfs4_delegreturn_prepare(struct rpc_task *task, void *calldata)
-{
-	struct nfs4_delegreturndata *data = calldata;
-	struct rpc_message msg = {
-		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DELEGRETURN],
-		.rpc_argp = &data->args,
-		.rpc_resp = &data->res,
-		.rpc_cred = data->cred,
-	};
-	nfs_fattr_init(data->res.fattr);
-	rpc_call_setup(task, &msg, 0);
-}
-
 static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_delegreturndata *data = calldata;
@@ -2995,14 +2977,10 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
 
 static void nfs4_delegreturn_release(void *calldata)
 {
-	struct nfs4_delegreturndata *data = calldata;
-
-	put_rpccred(data->cred);
 	kfree(calldata);
 }
 
 static const struct rpc_call_ops nfs4_delegreturn_ops = {
-	.rpc_call_prepare = nfs4_delegreturn_prepare,
 	.rpc_call_done = nfs4_delegreturn_done,
 	.rpc_release = nfs4_delegreturn_release,
 };
@@ -3012,8 +2990,13 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
 	struct nfs4_delegreturndata *data;
 	struct nfs_server *server = NFS_SERVER(inode);
 	struct rpc_task *task;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DELEGRETURN],
+		.rpc_cred = cred,
+	};
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = server->client,
+		.rpc_message = &msg,
 		.callback_ops = &nfs4_delegreturn_ops,
 		.flags = RPC_TASK_ASYNC,
 	};
@@ -3029,11 +3012,13 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
 	memcpy(&data->stateid, stateid, sizeof(data->stateid));
 	data->res.fattr = &data->fattr;
 	data->res.server = server;
-	data->cred = get_rpccred(cred);
+	nfs_fattr_init(data->res.fattr);
 	data->timestamp = jiffies;
 	data->rpc_status = 0;
 
 	task_setup_data.callback_data = data;
+	msg.rpc_argp = &data->args,
+	msg.rpc_resp = &data->res,
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
@@ -3221,12 +3206,6 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
 static void nfs4_locku_prepare(struct rpc_task *task, void *data)
 {
 	struct nfs4_unlockdata *calldata = data;
-	struct rpc_message msg = {
-		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_LOCKU],
-		.rpc_argp       = &calldata->arg,
-		.rpc_resp       = &calldata->res,
-		.rpc_cred	= calldata->lsp->ls_state->owner->so_cred,
-	};
 
 	if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
 		return;
@@ -3236,7 +3215,7 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
 		return;
 	}
 	calldata->timestamp = jiffies;
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_start(task);
 }
 
 static const struct rpc_call_ops nfs4_locku_ops = {
@@ -3251,8 +3230,13 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
 		struct nfs_seqid *seqid)
 {
 	struct nfs4_unlockdata *data;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU],
+		.rpc_cred = ctx->cred,
+	};
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = NFS_CLIENT(lsp->ls_state->inode),
+		.rpc_message = &msg,
 		.callback_ops = &nfs4_locku_ops,
 		.flags = RPC_TASK_ASYNC,
 	};
@@ -3268,6 +3252,8 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
 		return ERR_PTR(-ENOMEM);
 	}
 
+	msg.rpc_argp = &data->arg,
+	msg.rpc_resp = &data->res,
 	task_setup_data.callback_data = data;
 	return rpc_run_task(&task_setup_data);
 }
@@ -3353,13 +3339,6 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
 {
 	struct nfs4_lockdata *data = calldata;
 	struct nfs4_state *state = data->lsp->ls_state;
-	struct nfs4_state_owner *sp = state->owner;
-	struct rpc_message msg = {
-		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCK],
-		.rpc_argp = &data->arg,
-		.rpc_resp = &data->res,
-		.rpc_cred = sp->so_cred,
-	};
 
 	dprintk("%s: begin!\n", __FUNCTION__);
 	if (nfs_wait_on_sequence(data->arg.lock_seqid, task) != 0)
@@ -3373,7 +3352,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
 	} else
 		data->arg.new_lock_owner = 0;
 	data->timestamp = jiffies;
-	rpc_call_setup(task, &msg, 0);
+	rpc_call_start(task);
 	dprintk("%s: done!, ret = %d\n", __FUNCTION__, data->rpc_status);
 }
 
@@ -3435,8 +3414,13 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
 {
 	struct nfs4_lockdata *data;
 	struct rpc_task *task;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCK],
+		.rpc_cred = state->owner->so_cred,
+	};
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = NFS_CLIENT(state->inode),
+		.rpc_message = &msg,
 		.callback_ops = &nfs4_lock_ops,
 		.flags = RPC_TASK_ASYNC,
 	};
@@ -3451,6 +3435,8 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
 		data->arg.block = 1;
 	if (reclaim != 0)
 		data->arg.reclaim = 1;
+	msg.rpc_argp = &data->arg,
+	msg.rpc_resp = &data->res,
 	task_setup_data.callback_data = data;
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 6660d9a533454..757415363422d 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -70,24 +70,6 @@ static void nfs_dec_sillycount(struct inode *dir)
 		wake_up(&nfsi->waitqueue);
 }
 
-/**
- * nfs_async_unlink_init - Initialize the RPC info
- * task: rpc_task of the sillydelete
- */
-static void nfs_async_unlink_init(struct rpc_task *task, void *calldata)
-{
-	struct nfs_unlinkdata *data = calldata;
-	struct inode *dir = data->dir;
-	struct rpc_message msg = {
-		.rpc_argp = &data->args,
-		.rpc_resp = &data->res,
-		.rpc_cred = data->cred,
-	};
-
-	NFS_PROTO(dir)->unlink_setup(&msg, dir);
-	rpc_call_setup(task, &msg, 0);
-}
-
 /**
  * nfs_async_unlink_done - Sillydelete post-processing
  * @task: rpc_task of the sillydelete
@@ -120,14 +102,19 @@ static void nfs_async_unlink_release(void *calldata)
 }
 
 static const struct rpc_call_ops nfs_unlink_ops = {
-	.rpc_call_prepare = nfs_async_unlink_init,
 	.rpc_call_done = nfs_async_unlink_done,
 	.rpc_release = nfs_async_unlink_release,
 };
 
 static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct nfs_unlinkdata *data)
 {
+	struct rpc_message msg = {
+		.rpc_argp = &data->args,
+		.rpc_resp = &data->res,
+		.rpc_cred = data->cred,
+	};
 	struct rpc_task_setup task_setup_data = {
+		.rpc_message = &msg,
 		.callback_ops = &nfs_unlink_ops,
 		.callback_data = data,
 		.flags = RPC_TASK_ASYNC,
@@ -165,8 +152,9 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
 	data->args.fh = NFS_FH(dir);
 	nfs_fattr_init(&data->res.dir_attr);
 
-	task_setup_data.rpc_client = NFS_CLIENT(dir);
+	NFS_PROTO(dir)->unlink_setup(&msg, dir);
 
+	task_setup_data.rpc_client = NFS_CLIENT(dir);
 	task = rpc_run_task(&task_setup_data);
 	if (!IS_ERR(task))
 		rpc_put_task(task);
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 7c362e5f6e1b3..f876e37d1972d 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -128,19 +128,6 @@ struct rpcb_info {
 static struct rpcb_info rpcb_next_version[];
 static struct rpcb_info rpcb_next_version6[];
 
-static void rpcb_getport_prepare(struct rpc_task *task, void *calldata)
-{
-	struct rpcbind_args *map = calldata;
-	struct rpc_xprt *xprt = map->r_xprt;
-	struct rpc_message msg = {
-		.rpc_proc	= rpcb_next_version[xprt->bind_index].rpc_proc,
-		.rpc_argp	= map,
-		.rpc_resp	= &map->r_port,
-	};
-
-	rpc_call_setup(task, &msg, 0);
-}
-
 static void rpcb_map_release(void *data)
 {
 	struct rpcbind_args *map = data;
@@ -150,7 +137,6 @@ static void rpcb_map_release(void *data)
 }
 
 static const struct rpc_call_ops rpcb_getport_ops = {
-	.rpc_call_prepare	= rpcb_getport_prepare,
 	.rpc_call_done		= rpcb_getport_done,
 	.rpc_release		= rpcb_map_release,
 };
@@ -295,6 +281,24 @@ int rpcb_getport_sync(struct sockaddr_in *sin, __u32 prog,
 }
 EXPORT_SYMBOL_GPL(rpcb_getport_sync);
 
+static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbind_args *map, int version)
+{
+	struct rpc_message msg = {
+		.rpc_proc = rpcb_next_version[version].rpc_proc,
+		.rpc_argp = map,
+		.rpc_resp = &map->r_port,
+	};
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = rpcb_clnt,
+		.rpc_message = &msg,
+		.callback_ops = &rpcb_getport_ops,
+		.callback_data = map,
+		.flags = RPC_TASK_ASYNC,
+	};
+
+	return rpc_run_task(&task_setup_data);
+}
+
 /**
  * rpcb_getport_async - obtain the port for a given RPC service on a given host
  * @task: task that is waiting for portmapper request
@@ -310,10 +314,6 @@ void rpcb_getport_async(struct rpc_task *task)
 	struct rpc_clnt	*rpcb_clnt;
 	static struct rpcbind_args *map;
 	struct rpc_task	*child;
-	struct rpc_task_setup task_setup_data = {
-		.callback_ops = &rpcb_getport_ops,
-		.flags = RPC_TASK_ASYNC,
-	};
 	struct sockaddr addr;
 	int status;
 	struct rpcb_info *info;
@@ -399,9 +399,7 @@ void rpcb_getport_async(struct rpc_task *task)
 	       sizeof(map->r_addr));
 	map->r_owner = RPCB_OWNER_STRING;	/* ignored for GETADDR */
 
-	task_setup_data.rpc_client = rpcb_clnt;
-	task_setup_data.callback_data = map;
-	child = rpc_run_task(&task_setup_data);
+	child = rpcb_call_async(rpcb_clnt, map, xprt->bind_index);
 	rpc_release_client(rpcb_clnt);
 	if (IS_ERR(child)) {
 		status = -EIO;

From b5627943ab6fabbc13a45d92683363a3d08a249f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 25 Oct 2007 18:42:21 -0400
Subject: [PATCH 029/118] SUNRPC: Remove the now unused function
 rpc_call_setup()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h |  1 -
 net/sunrpc/clnt.c           | 18 ------------------
 2 files changed, 19 deletions(-)

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index c79f2edf03237..4fc268ab6f3e6 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -126,7 +126,6 @@ int		rpcb_register(u32, u32, int, unsigned short, int *);
 int		rpcb_getport_sync(struct sockaddr_in *, __u32, __u32, int);
 void		rpcb_getport_async(struct rpc_task *);
 
-void		rpc_call_setup(struct rpc_task *, const struct rpc_message *, int);
 void		rpc_call_start(struct rpc_task *);
 int		rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg,
 			       int flags, const struct rpc_call_ops *tk_ops,
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 6eb79c49c9378..e0a5e47a1d4b3 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -613,24 +613,6 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
 }
 EXPORT_SYMBOL_GPL(rpc_call_async);
 
-void
-rpc_call_setup(struct rpc_task *task, const struct rpc_message *msg, int flags)
-{
-	task->tk_msg   = *msg;
-	task->tk_flags |= flags;
-	/* Bind the user cred */
-	if (task->tk_msg.rpc_cred != NULL)
-		rpcauth_holdcred(task);
-	else
-		rpcauth_bindcred(task);
-
-	if (task->tk_status == 0)
-		task->tk_action = call_start;
-	else
-		task->tk_action = rpc_exit_task;
-}
-EXPORT_SYMBOL_GPL(rpc_call_setup);
-
 void
 rpc_call_start(struct rpc_task *task)
 {

From e8f5d77c8029ff8f5dcd1dfc133aac0bbbffd92b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 25 Oct 2007 18:42:53 -0400
Subject: [PATCH 030/118] SUNRPC: allow the caller of rpc_run_task to
 preallocate the struct rpc_task

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h |  1 +
 net/sunrpc/sched.c           | 16 ++++++++++------
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index c9444fdc23ac6..60a05c71637e2 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -118,6 +118,7 @@ struct rpc_call_ops {
 };
 
 struct rpc_task_setup {
+	struct rpc_task *task;
 	struct rpc_clnt *rpc_client;
 	const struct rpc_message *rpc_message;
 	const struct rpc_call_ops *callback_ops;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index fa53a88b2c5bd..c03e7bf6e9bc6 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -885,16 +885,20 @@ static void rpc_free_task(struct rcu_head *rcu)
  */
 struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data)
 {
-	struct rpc_task	*task;
-
-	task = rpc_alloc_task();
-	if (!task)
-		goto out;
+	struct rpc_task	*task = setup_data->task;
+	unsigned short flags = 0;
+
+	if (task == NULL) {
+		task = rpc_alloc_task();
+		if (task == NULL)
+			goto out;
+		flags = RPC_TASK_DYNAMIC;
+	}
 
 	rpc_init_task(task, setup_data);
 
+	task->tk_flags |= flags;
 	dprintk("RPC:       allocated task %p\n", task);
-	task->tk_flags |= RPC_TASK_DYNAMIC;
 out:
 	return task;
 }

From 0773769191d943358a8392fa86abd756d004c4b6 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 25 Oct 2007 18:42:54 -0400
Subject: [PATCH 031/118] NFS/SUNRPC: Convert users of
 rpc_init_task+rpc_execute to rpc_run_task()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 28 ++++++++++++++++++++--------
 fs/nfs/read.c   | 17 +++++------------
 fs/nfs/write.c  | 24 ++++++++++--------------
 3 files changed, 35 insertions(+), 34 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 244d1bd7002c5..eadd87f7159f1 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -272,6 +272,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 	unsigned long user_addr = (unsigned long)iov->iov_base;
 	size_t count = iov->iov_len;
 	size_t rsize = NFS_SERVER(inode)->rsize;
+	struct rpc_task *task;
 	struct rpc_message msg = {
 		.rpc_cred = ctx->cred,
 	};
@@ -333,11 +334,13 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 		msg.rpc_argp = &data->args;
 		msg.rpc_resp = &data->res;
 
+		task_setup_data.task = &data->task;
 		task_setup_data.callback_data = data;
 		NFS_PROTO(inode)->read_setup(data, &msg);
-		rpc_init_task(&data->task, &task_setup_data);
 
-		rpc_execute(&data->task);
+		task = rpc_run_task(&task_setup_data);
+		if (!IS_ERR(task))
+			rpc_put_task(task);
 
 		dprintk("NFS: %5u initiated direct read call "
 			"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
@@ -440,6 +443,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 	struct inode *inode = dreq->inode;
 	struct list_head *p;
 	struct nfs_write_data *data;
+	struct rpc_task *task;
 	struct rpc_message msg = {
 		.rpc_cred = dreq->ctx->cred,
 	};
@@ -471,16 +475,18 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 		 * Reuse data->task; data->args should not have changed
 		 * since the original request was sent.
 		 */
+		task_setup_data.task = &data->task;
 		task_setup_data.callback_data = data;
 		msg.rpc_argp = &data->args;
 		msg.rpc_resp = &data->res;
 		NFS_PROTO(inode)->write_setup(data, &msg);
-		rpc_init_task(&data->task, &task_setup_data);
 
 		/*
 		 * We're called via an RPC callback, so BKL is already held.
 		 */
-		rpc_execute(&data->task);
+		task = rpc_run_task(&task_setup_data);
+		if (!IS_ERR(task))
+			rpc_put_task(task);
 
 		dprintk("NFS: %5u rescheduled direct write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
 				data->task.tk_pid,
@@ -523,12 +529,14 @@ static const struct rpc_call_ops nfs_commit_direct_ops = {
 static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 {
 	struct nfs_write_data *data = dreq->commit_data;
+	struct rpc_task *task;
 	struct rpc_message msg = {
 		.rpc_argp = &data->args,
 		.rpc_resp = &data->res,
 		.rpc_cred = dreq->ctx->cred,
 	};
 	struct rpc_task_setup task_setup_data = {
+		.task = &data->task,
 		.rpc_client = NFS_CLIENT(dreq->inode),
 		.rpc_message = &msg,
 		.callback_ops = &nfs_commit_direct_ops,
@@ -547,14 +555,15 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 	data->res.verf = &data->verf;
 
 	NFS_PROTO(data->inode)->commit_setup(data, &msg);
-	rpc_init_task(&data->task, &task_setup_data);
 
 	/* Note: task.tk_ops->rpc_release will free dreq->commit_data */
 	dreq->commit_data = NULL;
 
 	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
 
-	rpc_execute(&data->task);
+	task = rpc_run_task(&task_setup_data);
+	if (!IS_ERR(task))
+		rpc_put_task(task);
 }
 
 static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
@@ -669,6 +678,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 	struct inode *inode = ctx->path.dentry->d_inode;
 	unsigned long user_addr = (unsigned long)iov->iov_base;
 	size_t count = iov->iov_len;
+	struct rpc_task *task;
 	struct rpc_message msg = {
 		.rpc_cred = ctx->cred,
 	};
@@ -732,13 +742,15 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 		data->res.count = bytes;
 		data->res.verf = &data->verf;
 
+		task_setup_data.task = &data->task;
 		task_setup_data.callback_data = data;
 		msg.rpc_argp = &data->args;
 		msg.rpc_resp = &data->res;
 		NFS_PROTO(inode)->write_setup(data, &msg);
-		rpc_init_task(&data->task, &task_setup_data);
 
-		rpc_execute(&data->task);
+		task = rpc_run_task(&task_setup_data);
+		if (!IS_ERR(task))
+			rpc_put_task(task);
 
 		dprintk("NFS: %5u initiated direct write call "
 			"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index e9dbdc8eafe6b..efc121c494fe4 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -153,16 +153,6 @@ static void nfs_readpage_release(struct nfs_page *req)
 	nfs_release_request(req);
 }
 
-static void nfs_execute_read(struct nfs_read_data *data)
-{
-	struct rpc_clnt *clnt = NFS_CLIENT(data->inode);
-	sigset_t oldset;
-
-	rpc_clnt_sigmask(clnt, &oldset);
-	rpc_execute(&data->task);
-	rpc_clnt_sigunmask(clnt, &oldset);
-}
-
 /*
  * Set up the NFS read request struct
  */
@@ -172,12 +162,14 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 {
 	struct inode *inode = req->wb_context->path.dentry->d_inode;
 	int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
+	struct rpc_task *task;
 	struct rpc_message msg = {
 		.rpc_argp = &data->args,
 		.rpc_resp = &data->res,
 		.rpc_cred = req->wb_context->cred,
 	};
 	struct rpc_task_setup task_setup_data = {
+		.task = &data->task,
 		.rpc_client = NFS_CLIENT(inode),
 		.rpc_message = &msg,
 		.callback_ops = call_ops,
@@ -203,7 +195,6 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 
 	/* Set up the initial task struct. */
 	NFS_PROTO(inode)->read_setup(data, &msg);
-	rpc_init_task(&data->task, &task_setup_data);
 
 	dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
 			data->task.tk_pid,
@@ -212,7 +203,9 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 			count,
 			(unsigned long long)data->args.offset);
 
-	nfs_execute_read(data);
+	task = rpc_run_task(&task_setup_data);
+	if (!IS_ERR(task))
+		rpc_put_task(task);
 }
 
 static void
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 9a69469274ae1..fbd64f2fa7f9d 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -764,16 +764,6 @@ static int flush_task_priority(int how)
 	return RPC_PRIORITY_NORMAL;
 }
 
-static void nfs_execute_write(struct nfs_write_data *data)
-{
-	struct rpc_clnt *clnt = NFS_CLIENT(data->inode);
-	sigset_t oldset;
-
-	rpc_clnt_sigmask(clnt, &oldset);
-	rpc_execute(&data->task);
-	rpc_clnt_sigunmask(clnt, &oldset);
-}
-
 /*
  * Set up the argument/result storage required for the RPC call.
  */
@@ -786,6 +776,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 	struct inode *inode = req->wb_context->path.dentry->d_inode;
 	int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
 	int priority = flush_task_priority(how);
+	struct rpc_task *task;
 	struct rpc_message msg = {
 		.rpc_argp = &data->args,
 		.rpc_resp = &data->res,
@@ -793,6 +784,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 	};
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = NFS_CLIENT(inode),
+		.task = &data->task,
 		.rpc_message = &msg,
 		.callback_ops = call_ops,
 		.callback_data = data,
@@ -827,7 +819,6 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 
 	/* Set up the initial task struct.  */
 	NFS_PROTO(inode)->write_setup(data, &msg);
-	rpc_init_task(&data->task, &task_setup_data);
 
 	dprintk("NFS: %5u initiated write call "
 		"(req %s/%Ld, %u bytes @ offset %Lu)\n",
@@ -837,7 +828,9 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 		count,
 		(unsigned long long)data->args.offset);
 
-	nfs_execute_write(data);
+	task = rpc_run_task(&task_setup_data);
+	if (!IS_ERR(task))
+		rpc_put_task(task);
 }
 
 /*
@@ -1164,12 +1157,14 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 	struct inode *inode = first->wb_context->path.dentry->d_inode;
 	int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
 	int priority = flush_task_priority(how);
+	struct rpc_task *task;
 	struct rpc_message msg = {
 		.rpc_argp = &data->args,
 		.rpc_resp = &data->res,
 		.rpc_cred = first->wb_context->cred,
 	};
 	struct rpc_task_setup task_setup_data = {
+		.task = &data->task,
 		.rpc_client = NFS_CLIENT(inode),
 		.rpc_message = &msg,
 		.callback_ops = &nfs_commit_ops,
@@ -1197,11 +1192,12 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 
 	/* Set up the initial task struct.  */
 	NFS_PROTO(inode)->commit_setup(data, &msg);
-	rpc_init_task(&data->task, &task_setup_data);
 
 	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
 
-	nfs_execute_write(data);
+	task = rpc_run_task(&task_setup_data);
+	if (!IS_ERR(task))
+		rpc_put_task(task);
 }
 
 /*

From 47fe064831a2a949f6c1e0086f61a105e99ea867 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 25 Oct 2007 18:42:55 -0400
Subject: [PATCH 032/118] SUNRPC: Unexport rpc_init_task() and rpc_execute()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h | 1 -
 net/sunrpc/sched.c           | 4 +---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 60a05c71637e2..23481a5e1f532 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -247,7 +247,6 @@ struct rpc_wait_queue {
  */
 struct rpc_task *rpc_new_task(const struct rpc_task_setup *);
 struct rpc_task *rpc_run_task(const struct rpc_task_setup *);
-void		rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *);
 void		rpc_put_task(struct rpc_task *);
 void		rpc_exit_task(struct rpc_task *);
 void		rpc_release_calldata(const struct rpc_call_ops *, void *);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index c03e7bf6e9bc6..ce6cfae91e883 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -737,7 +737,6 @@ void rpc_execute(struct rpc_task *task)
 	rpc_set_running(task);
 	__rpc_execute(task);
 }
-EXPORT_SYMBOL_GPL(rpc_execute);
 
 static void rpc_async_schedule(struct work_struct *work)
 {
@@ -815,7 +814,7 @@ EXPORT_SYMBOL_GPL(rpc_free);
 /*
  * Creation and deletion of RPC task structures
  */
-void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *task_setup_data)
+static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *task_setup_data)
 {
 	memset(task, 0, sizeof(*task));
 	setup_timer(&task->tk_timer, (void (*)(unsigned long))rpc_run_timer,
@@ -865,7 +864,6 @@ void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *task_setu
 	dprintk("RPC:       new task initialized, procpid %u\n",
 				task_pid_nr(current));
 }
-EXPORT_SYMBOL_GPL(rpc_init_task);
 
 static struct rpc_task *
 rpc_alloc_task(void)

From a4a874990cbc1bc5df6f357c4f2d043cd1923e15 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 18 Jul 2007 13:24:19 -0400
Subject: [PATCH 033/118] SUNRPC: Cleanup to remove the last users of the
 RPC_WAITQ declaration

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/sched.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index ce6cfae91e883..40ce6f6672d6b 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -45,7 +45,7 @@ static void			 rpc_release_task(struct rpc_task *task);
 /*
  * RPC tasks sit here while waiting for conditions to improve.
  */
-static RPC_WAITQ(delay_queue, "delayq");
+static struct rpc_wait_queue delay_queue;
 
 /*
  * rpciod-related stuff
@@ -1059,6 +1059,11 @@ rpc_init_mempool(void)
 		goto err_nomem;
 	if (!rpciod_start())
 		goto err_nomem;
+	/*
+	 * The following is not strictly a mempool initialisation,
+	 * but there is no harm in doing it here
+	 */
+	rpc_init_wait_queue(&delay_queue, "delayq");
 	return 0;
 err_nomem:
 	rpc_destroy_mempool();

From c087567d3ffb2c7c61e091982e6ca45478394f1a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 18 Jul 2007 18:32:38 -0400
Subject: [PATCH 034/118] SUNRPC: Remove the obsolete RPC_WAITQ macro

Now that we've killed off all the users.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 23481a5e1f532..ce3d1b1327290 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -217,29 +217,6 @@ struct rpc_wait_queue {
  * performance of NFS operations such as read/write.
  */
 #define RPC_BATCH_COUNT			16
-
-#ifndef RPC_DEBUG
-# define RPC_WAITQ_INIT(var,qname) { \
-		.lock = __SPIN_LOCK_UNLOCKED(var.lock), \
-		.tasks = { \
-			[0] = LIST_HEAD_INIT(var.tasks[0]), \
-			[1] = LIST_HEAD_INIT(var.tasks[1]), \
-			[2] = LIST_HEAD_INIT(var.tasks[2]), \
-		}, \
-	}
-#else
-# define RPC_WAITQ_INIT(var,qname) { \
-		.lock = __SPIN_LOCK_UNLOCKED(var.lock), \
-		.tasks = { \
-			[0] = LIST_HEAD_INIT(var.tasks[0]), \
-			[1] = LIST_HEAD_INIT(var.tasks[1]), \
-			[2] = LIST_HEAD_INIT(var.tasks[2]), \
-		}, \
-		.name = qname, \
-	}
-#endif
-# define RPC_WAITQ(var,qname)      struct rpc_wait_queue var = RPC_WAITQ_INIT(var,qname)
-
 #define RPC_IS_PRIORITY(q)		((q)->maxpriority > 0)
 
 /*

From 2a428b2b8fe2c270a5889086ebe3ab914e3ea7d8 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 26 Oct 2007 13:30:43 -0400
Subject: [PATCH 035/118] SUNRPC: Prevent mixed sign comparisons in
 rpcrdma_convert_iovs()

Keep the type of the buffer position the same during iovec conversion to
reduce the likelihood of unexpected results from comparisons and length
computations.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: Thomas Talpey <Thomas.Talpey@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprtrdma/rpc_rdma.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 1aa1580cda6d8..72c8eab30d9be 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -83,7 +83,7 @@ static const char transfertypes[][12] = {
  */
 
 static int
-rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, int pos,
+rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
 	enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs)
 {
 	int len, n = 0, p;
@@ -169,7 +169,7 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
 	struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
 	struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_task->tk_xprt);
 	int nsegs, nchunks = 0;
-	int pos;
+	unsigned int pos;
 	struct rpcrdma_mr_seg *seg = req->rl_segments;
 	struct rpcrdma_read_chunk *cur_rchunk = NULL;
 	struct rpcrdma_write_array *warray = NULL;
@@ -213,7 +213,7 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
 					(__be32 *)&cur_rchunk->rc_target.rs_offset,
 					seg->mr_base);
 			dprintk("RPC:       %s: read chunk "
-				"elem %d@0x%llx:0x%x pos %d (%s)\n", __func__,
+				"elem %d@0x%llx:0x%x pos %u (%s)\n", __func__,
 				seg->mr_len, (unsigned long long)seg->mr_base,
 				seg->mr_rkey, pos, n < nsegs ? "more" : "last");
 			cur_rchunk++;

From d4b37ff73540ab90bee57b882a10b21e2f97939f Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 26 Oct 2007 13:30:49 -0400
Subject: [PATCH 036/118] SUNRPC: Fix an unnecessary implicit type cast in
 rpcrdma_count_chunks()

Nit: rl_nchunks is an unsigned integer, so pass it into
rpcrdma_count_chunks() via an unsigned integer argument.  This eliminates
a harmless mixed sign comparison in rpcrdma_count_chunks()

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: Thomas Talpey <Thomas.Talpey@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprtrdma/rpc_rdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 72c8eab30d9be..e55427f73dfe4 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -552,7 +552,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
  * RDMA'd by server. See map at rpcrdma_create_chunks()! :-)
  */
 static int
-rpcrdma_count_chunks(struct rpcrdma_rep *rep, int max, int wrchunk, __be32 **iptrp)
+rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __be32 **iptrp)
 {
 	unsigned int i, total_len;
 	struct rpcrdma_write_chunk *cur_wchunk;

From 5d40a8a525c8165bafed233cf0f137e8d10d7e92 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 26 Oct 2007 13:30:54 -0400
Subject: [PATCH 037/118] SUNRPC: Check a return result

Minor: Replace an empty if statement with a debugging dprintk.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: Thomas Talpey <Thomas.Talpey@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprtrdma/verbs.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 44b0fb942e8db..ffbf22a1d2ca0 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -522,7 +522,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
 				struct rpcrdma_create_data_internal *cdata)
 {
 	struct ib_device_attr devattr;
-	int rc;
+	int rc, err;
 
 	rc = ib_query_device(ia->ri_id->device, &devattr);
 	if (rc) {
@@ -648,8 +648,10 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
 	return 0;
 
 out2:
-	if (ib_destroy_cq(ep->rep_cq))
-		;
+	err = ib_destroy_cq(ep->rep_cq);
+	if (err)
+		dprintk("RPC:       %s: ib_destroy_cq returned %i\n",
+			__func__, err);
 out1:
 	return rc;
 }

From 322e2efe6224be5de2852a7fddfac5cf11317af3 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 26 Oct 2007 13:30:59 -0400
Subject: [PATCH 038/118] SUNRPC: temp var should match return type of
 xdr_skb_read_actor

The return type of xdr_skb_read_actor functions is size_t.  This fixes a
nit I unwittingly overlooked in commit dd456471.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/socklib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index 97ac45f034d6f..a661a3acb37e6 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -72,7 +72,7 @@ ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct
 	struct page	**ppage = xdr->pages;
 	unsigned int	len, pglen = xdr->page_len;
 	ssize_t		copied = 0;
-	int		ret;
+	size_t		ret;
 
 	len = xdr->head[0].iov_len;
 	if (base < len) {

From 9b45b74ce2234ca15131ec0725010c1da818df05 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 26 Oct 2007 13:31:04 -0400
Subject: [PATCH 039/118] SUNRPC: Remove an unneeded implicit type cast when
 calling rpc_depopulate()

The two arguments of rpc_depopulate() that pass in inode numbers should use
the same type as inode->i_ino: unsigned long.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/rpc_pipe.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 19b44e53e4210..7e197168a2451 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -523,8 +523,8 @@ rpc_get_inode(struct super_block *sb, int mode)
 /*
  * FIXME: This probably has races.
  */
-static void
-rpc_depopulate(struct dentry *parent, int start, int eof)
+static void rpc_depopulate(struct dentry *parent,
+			   unsigned long start, unsigned long eof)
 {
 	struct inode *dir = parent->d_inode;
 	struct list_head *pos, *next;

From 8a8c74bf94fcdec058062d331b3d9777910778ab Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 26 Oct 2007 13:31:47 -0400
Subject: [PATCH 040/118] NFS: Ensure nfs_wcc_update_inode always converts file
 size to loff_t

The nfs_wcc_update_inode() function omits logic to convert the type of
the NFS on-the-wire value of a file's size (__u64) to the type of file
size value stored in struct inode (loff_t, which is signed).

Everywhere else in the NFS client I checked already correctly converts the
file size type.

This effects only very large files.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index db5d96dc6107d..cd0e57f3a00fe 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -814,8 +814,9 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 			if (S_ISDIR(inode->i_mode))
 				nfsi->cache_validity |= NFS_INO_INVALID_DATA;
 		}
-		if (inode->i_size == fattr->pre_size && nfsi->npages == 0)
-			inode->i_size = fattr->size;
+		if (inode->i_size == nfs_size_to_loff_t(fattr->pre_size) &&
+		    nfsi->npages == 0)
+			inode->i_size = nfs_size_to_loff_t(fattr->size);
 	}
 }
 

From 6232dbbcffc617a5a47596b2ec347b24dc2dd2fd Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 26 Oct 2007 13:31:52 -0400
Subject: [PATCH 041/118] NFS: Use unsigned intermediates for manipulating
 header lengths (NFSv2 XDR)

Clean up: prevent length underflow and mixed sign comparisons when
unmarshalling NFS version 2 read, readdir, and readlink replies.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs2xdr.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 668ab96c7b596..1f7ea675e0c5e 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -262,7 +262,9 @@ static int
 nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res)
 {
 	struct kvec *iov = req->rq_rcv_buf.head;
-	int	status, count, recvd, hdrlen;
+	size_t hdrlen;
+	u32 count, recvd;
+	int status;
 
 	if ((status = ntohl(*p++)))
 		return -nfs_stat_to_errno(status);
@@ -273,7 +275,7 @@ nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res)
 	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
 	if (iov->iov_len < hdrlen) {
 		dprintk("NFS: READ reply header overflowed:"
-				"length %d > %Zu\n", hdrlen, iov->iov_len);
+				"length %Zu > %Zu\n", hdrlen, iov->iov_len);
 		return -errno_NFSERR_IO;
 	} else if (iov->iov_len != hdrlen) {
 		dprintk("NFS: READ header is short. iovec will be shifted.\n");
@@ -283,11 +285,11 @@ nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res)
 	recvd = req->rq_rcv_buf.len - hdrlen;
 	if (count > recvd) {
 		dprintk("NFS: server cheating in read reply: "
-			"count %d > recvd %d\n", count, recvd);
+			"count %u > recvd %u\n", count, recvd);
 		count = recvd;
 	}
 
-	dprintk("RPC:      readres OK count %d\n", count);
+	dprintk("RPC:      readres OK count %u\n", count);
 	if (count < res->count)
 		res->count = count;
 
@@ -423,9 +425,10 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
 	struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
 	struct kvec *iov = rcvbuf->head;
 	struct page **page;
-	int hdrlen, recvd;
+	size_t hdrlen;
+	unsigned int pglen, recvd;
+	u32 len;
 	int status, nr;
-	unsigned int len, pglen;
 	__be32 *end, *entry, *kaddr;
 
 	if ((status = ntohl(*p++)))
@@ -434,7 +437,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
 	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
 	if (iov->iov_len < hdrlen) {
 		dprintk("NFS: READDIR reply header overflowed:"
-				"length %d > %Zu\n", hdrlen, iov->iov_len);
+				"length %Zu > %Zu\n", hdrlen, iov->iov_len);
 		return -errno_NFSERR_IO;
 	} else if (iov->iov_len != hdrlen) {
 		dprintk("NFS: READDIR header is short. iovec will be shifted.\n");
@@ -576,7 +579,8 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy)
 {
 	struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
 	struct kvec *iov = rcvbuf->head;
-	int hdrlen, len, recvd;
+	size_t hdrlen;
+	u32 len, recvd;
 	char	*kaddr;
 	int	status;
 
@@ -584,14 +588,14 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy)
 		return -nfs_stat_to_errno(status);
 	/* Convert length of symlink */
 	len = ntohl(*p++);
-	if (len >= rcvbuf->page_len || len <= 0) {
+	if (len >= rcvbuf->page_len) {
 		dprintk("nfs: server returned giant symlink!\n");
 		return -ENAMETOOLONG;
 	}
 	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
 	if (iov->iov_len < hdrlen) {
 		dprintk("NFS: READLINK reply header overflowed:"
-				"length %d > %Zu\n", hdrlen, iov->iov_len);
+				"length %Zu > %Zu\n", hdrlen, iov->iov_len);
 		return -errno_NFSERR_IO;
 	} else if (iov->iov_len != hdrlen) {
 		dprintk("NFS: READLINK header is short. iovec will be shifted.\n");

From c957c526ef86e472359dadb4204dab8a503b687d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 26 Oct 2007 13:31:57 -0400
Subject: [PATCH 042/118] NFS: Use unsigned intermediates for manipulating
 header lengths (NFSv3 XDR)

Clean up: prevent length underflow and mixed sign comparisons when
unmarshalling NFS version 3 read, readdir, and readlink replies.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs3xdr.c | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 616d3267b7e7b..3917e2fa4e40f 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -506,9 +506,9 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
 	struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
 	struct kvec *iov = rcvbuf->head;
 	struct page **page;
-	int hdrlen, recvd;
+	size_t hdrlen;
+	u32 len, recvd, pglen;
 	int status, nr;
-	unsigned int len, pglen;
 	__be32 *entry, *end, *kaddr;
 
 	status = ntohl(*p++);
@@ -527,7 +527,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
 	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
 	if (iov->iov_len < hdrlen) {
 		dprintk("NFS: READDIR reply header overflowed:"
-				"length %d > %Zu\n", hdrlen, iov->iov_len);
+				"length %Zu > %Zu\n", hdrlen, iov->iov_len);
 		return -errno_NFSERR_IO;
 	} else if (iov->iov_len != hdrlen) {
 		dprintk("NFS: READDIR header is short. iovec will be shifted.\n");
@@ -549,7 +549,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
 		len = ntohl(*p++);		/* string length */
 		p += XDR_QUADLEN(len) + 2;	/* name + cookie */
 		if (len > NFS3_MAXNAMLEN) {
-			dprintk("NFS: giant filename in readdir (len %x)!\n",
+			dprintk("NFS: giant filename in readdir (len 0x%x)!\n",
 						len);
 			goto err_unmap;
 		}
@@ -570,7 +570,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
 				len = ntohl(*p++);
 				if (len > NFS3_FHSIZE) {
 					dprintk("NFS: giant filehandle in "
-						"readdir (len %x)!\n", len);
+						"readdir (len 0x%x)!\n", len);
 					goto err_unmap;
 				}
 				p += XDR_QUADLEN(len);
@@ -815,7 +815,8 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
 {
 	struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
 	struct kvec *iov = rcvbuf->head;
-	int hdrlen, len, recvd;
+	size_t hdrlen;
+	u32 len, recvd;
 	char	*kaddr;
 	int	status;
 
@@ -827,7 +828,7 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
 
 	/* Convert length of symlink */
 	len = ntohl(*p++);
-	if (len >= rcvbuf->page_len || len <= 0) {
+	if (len >= rcvbuf->page_len) {
 		dprintk("nfs: server returned giant symlink!\n");
 		return -ENAMETOOLONG;
 	}
@@ -835,7 +836,7 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
 	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
 	if (iov->iov_len < hdrlen) {
 		dprintk("NFS: READLINK reply header overflowed:"
-				"length %d > %Zu\n", hdrlen, iov->iov_len);
+				"length %Zu > %Zu\n", hdrlen, iov->iov_len);
 		return -errno_NFSERR_IO;
 	} else if (iov->iov_len != hdrlen) {
 		dprintk("NFS: READLINK header is short. "
@@ -863,7 +864,9 @@ static int
 nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res)
 {
 	struct kvec *iov = req->rq_rcv_buf.head;
-	int	status, count, ocount, recvd, hdrlen;
+	size_t hdrlen;
+	u32 count, ocount, recvd;
+	int status;
 
 	status = ntohl(*p++);
 	p = xdr_decode_post_op_attr(p, res->fattr);
@@ -871,7 +874,7 @@ nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res)
 	if (status != 0)
 		return -nfs_stat_to_errno(status);
 
-	/* Decode reply could and EOF flag. NFSv3 is somewhat redundant
+	/* Decode reply count and EOF flag. NFSv3 is somewhat redundant
 	 * in that it puts the count both in the res struct and in the
 	 * opaque data count. */
 	count    = ntohl(*p++);
@@ -886,7 +889,7 @@ nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res)
 	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
 	if (iov->iov_len < hdrlen) {
 		dprintk("NFS: READ reply header overflowed:"
-				"length %d > %Zu\n", hdrlen, iov->iov_len);
+				"length %Zu > %Zu\n", hdrlen, iov->iov_len);
        		return -errno_NFSERR_IO;
 	} else if (iov->iov_len != hdrlen) {
 		dprintk("NFS: READ header is short. iovec will be shifted.\n");
@@ -896,7 +899,7 @@ nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res)
 	recvd = req->rq_rcv_buf.len - hdrlen;
 	if (count > recvd) {
 		dprintk("NFS: server cheating in read reply: "
-			"count %d > recvd %d\n", count, recvd);
+			"count %u > recvd %u\n", count, recvd);
 		count = recvd;
 		res->eof = 0;
 	}

From bcecff77a9c743ff67fdddeabc30ef76a6877886 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 26 Oct 2007 13:32:03 -0400
Subject: [PATCH 043/118] NFS: Use unsigned intermediates for manipulating
 header lengths (NFSv4 XDR)

Clean up: prevent length underflow and mixed sign comparison when
unmarshalling NFS version 4 getacl, readdir, and readlink replies.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4xdr.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 51dd3804866f1..2e1fe171bf73e 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3476,10 +3476,11 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
 	struct xdr_buf	*rcvbuf = &req->rq_rcv_buf;
 	struct page	*page = *rcvbuf->pages;
 	struct kvec	*iov = rcvbuf->head;
-	unsigned int	nr, pglen = rcvbuf->page_len;
+	size_t		hdrlen;
+	u32		recvd, pglen = rcvbuf->page_len;
 	__be32		*end, *entry, *p, *kaddr;
-	uint32_t	len, attrlen, xlen;
-	int 		hdrlen, recvd, status;
+	unsigned int	nr;
+	int		status;
 
 	status = decode_op_hdr(xdr, OP_READDIR);
 	if (status)
@@ -3503,6 +3504,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
 	end = p + ((pglen + readdir->pgbase) >> 2);
 	entry = p;
 	for (nr = 0; *p++; nr++) {
+		u32 len, attrlen, xlen;
 		if (end - p < 3)
 			goto short_pkt;
 		dprintk("cookie = %Lu, ", *((unsigned long long *)p));
@@ -3551,7 +3553,8 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
 {
 	struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
 	struct kvec *iov = rcvbuf->head;
-	int hdrlen, len, recvd;
+	size_t hdrlen;
+	u32 len, recvd;
 	__be32 *p;
 	char *kaddr;
 	int status;
@@ -3646,7 +3649,8 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
 	if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U)))
 		return -EIO;
 	if (likely(bitmap[0] & FATTR4_WORD0_ACL)) {
-		int hdrlen, recvd;
+		size_t hdrlen;
+		u32 recvd;
 
 		/* We ignore &savep and don't do consistency checks on
 		 * the attr length.  Let userspace figure it out.... */

From 464ad6b1ade186b53a1dae863361853326b85694 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 26 Oct 2007 13:32:08 -0400
Subject: [PATCH 044/118] NFS: Change sign of some loop indices in nfs4xdr.c

Nit: Eliminate some mixed sign comparisons in loop indices.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4xdr.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 2e1fe171bf73e..eae46f008da74 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2515,14 +2515,12 @@ static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uin
 
 static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
 {
-	int n;
+	u32 n;
 	__be32 *p;
 	int status = 0;
 
 	READ_BUF(4);
 	READ32(n);
-	if (n < 0)
-		goto out_eio;
 	if (n == 0)
 		goto root_path;
 	dprintk("path ");
@@ -2579,13 +2577,11 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
 		goto out_eio;
 	res->nlocations = 0;
 	while (res->nlocations < n) {
-		int m;
+		u32 m;
 		struct nfs4_fs_location *loc = &res->locations[res->nlocations];
 
 		READ_BUF(4);
 		READ32(m);
-		if (m <= 0)
-			goto out_eio;
 
 		loc->nservers = 0;
 		dprintk("%s: servers ", __FUNCTION__);
@@ -2598,8 +2594,12 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
 			if (loc->nservers < NFS4_FS_LOCATION_MAXSERVERS)
 				loc->nservers++;
 			else {
-				int i;
-				dprintk("%s: using first %d of %d servers returned for location %d\n", __FUNCTION__, NFS4_FS_LOCATION_MAXSERVERS, m, res->nlocations);
+				unsigned int i;
+				dprintk("%s: using first %u of %u servers "
+					"returned for location %u\n",
+						__FUNCTION__,
+						NFS4_FS_LOCATION_MAXSERVERS,
+						m, res->nlocations);
 				for (i = loc->nservers; i < m; i++) {
 					unsigned int len;
 					char *data;

From 28c494c5c8d425e15b7b82571e4df6d6bc34594d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 26 Oct 2007 13:32:13 -0400
Subject: [PATCH 045/118] NFS: Prevent nfs_getattr() hang during heavy write
 workloads

POSIX requires that ctime and mtime, as reported by the stat(2) call,
reflect the activity of the most recent write(2).  To that end, nfs_getattr()
flushes pending dirty writes to a file before doing a GETATTR to allow the
NFS server to set the file's size, ctime, and mtime properly.

However, nfs_getattr() can be starved when a constant stream of application
writes to a file prevents nfs_wb_nocommit() from completing.  This usually
results in hangs of programs doing a stat against an NFS file that is being
written.  "ls -l" is a common victim of this behavior.

To prevent starvation, hold the file's i_mutex in nfs_getattr() to
freeze applications writes temporarily so the client can more quickly obtain
clean values for a file's size, mtime, and ctime.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index cd0e57f3a00fe..cc3a09db41a91 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -461,9 +461,18 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 	int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
 	int err;
 
-	/* Flush out writes to the server in order to update c/mtime */
-	if (S_ISREG(inode->i_mode))
+	/*
+	 * Flush out writes to the server in order to update c/mtime.
+	 *
+	 * Hold the i_mutex to suspend application writes temporarily;
+	 * this prevents long-running writing applications from blocking
+	 * nfs_wb_nocommit.
+	 */
+	if (S_ISREG(inode->i_mode)) {
+		mutex_lock(&inode->i_mutex);
 		nfs_wb_nocommit(inode);
+		mutex_unlock(&inode->i_mutex);
+	}
 
 	/*
 	 * We may force a getattr if the user cares about atime.

From 0eb2574121ef0ffbebe5335c66c227d1b987fa25 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 26 Oct 2007 13:32:19 -0400
Subject: [PATCH 046/118] NFS: Ensure that NFS version 4 mounts use NFS_PORT if
 nfsport wasn't set

Text-based mount option parsing introduced a minor regression in the
behavior of NFS version 4 mounts.  NFS version 4 is not supposed to require
a running rpcbind service on the server in order for a mount to succeed.

In other words, if the mount options don't specify a port number, the port
number is supposed to default to 2049.  For earlier versions of NFS, the
default port number was zero in order to cause the RPC client to autobind
to the server's NFS service.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index fda1635dd1336..7d84d94fa827f 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1643,6 +1643,8 @@ static int nfs4_validate_mount_data(void *options,
 		if (nfs_parse_mount_options((char *)options, args) == 0)
 			return -EINVAL;
 
+		if (args->nfs_server.address.sin_port == 0)
+			args->nfs_server.address.sin_port = htons(NFS_PORT);
 		if (!nfs_verify_server_address((struct sockaddr *)
 						&args->nfs_server.address))
 			return -EINVAL;

From ad879cef8554e20f9b5ca356c878712eb671228c Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 26 Oct 2007 13:32:24 -0400
Subject: [PATCH 047/118] NFS: Remove support for the 'nfsprog' option

Remove the mount option that allows users to specify an alternate NFS
program number.  The client hasn't support setting an alternate NFS
program number for a very long time.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/internal.h |  1 -
 fs/nfs/super.c    | 14 +-------------
 2 files changed, 1 insertion(+), 14 deletions(-)

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 75793794aefe4..a78a09b40d1b1 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -43,7 +43,6 @@ struct nfs_parsed_mount_data {
 	struct {
 		struct sockaddr_in	address;
 		char			*hostname;
-		unsigned int		program;
 		unsigned int		version;
 		unsigned short		port;
 		int			protocol;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 7d84d94fa827f..1a18ca390ddfe 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -84,7 +84,7 @@ enum {
 	Opt_namelen,
 	Opt_mountport,
 	Opt_mountprog, Opt_mountvers,
-	Opt_nfsprog, Opt_nfsvers,
+	Opt_nfsvers,
 
 	/* Mount options that take string arguments */
 	Opt_sec, Opt_proto, Opt_mountproto,
@@ -139,7 +139,6 @@ static match_table_t nfs_mount_option_tokens = {
 	{ Opt_mountport, "mountport=%u" },
 	{ Opt_mountprog, "mountprog=%u" },
 	{ Opt_mountvers, "mountvers=%u" },
-	{ Opt_nfsprog, "nfsprog=%u" },
 	{ Opt_nfsvers, "nfsvers=%u" },
 	{ Opt_nfsvers, "vers=%u" },
 
@@ -801,13 +800,6 @@ static int nfs_parse_mount_options(char *raw,
 				return 0;
 			mnt->mount_server.version = option;
 			break;
-		case Opt_nfsprog:
-			if (match_int(args, &option))
-				return 0;
-			if (option < 0)
-				return 0;
-			mnt->nfs_server.program = option;
-			break;
 		case Opt_nfsvers:
 			if (match_int(args, &option))
 				return 0;
@@ -1067,9 +1059,6 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
  *
  * + breaking back: trying proto=udp after proto=tcp, v2 after v3,
  *   mountproto=tcp after mountproto=udp, and so on
- *
- * XXX: as far as I can tell, changing the NFS program number is not
- *      supported in the NFS client.
  */
 static int nfs_validate_mount_data(void *options,
 				   struct nfs_parsed_mount_data *args,
@@ -1095,7 +1084,6 @@ static int nfs_validate_mount_data(void *options,
 	args->mount_server.protocol = XPRT_TRANSPORT_UDP;
 	args->mount_server.program = NFS_MNT_PROGRAM;
 	args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
-	args->nfs_server.program = NFS_PROGRAM;
 
 	switch (data->version) {
 	case 1:

From e887cbcf911b2d16742832b38411559273ce5d77 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 26 Oct 2007 13:32:29 -0400
Subject: [PATCH 048/118] NFS: Remove support for the 'mountprog' option

Remove the mount option that allows users to specify an alternate mountd
program number.  The client hasn't support setting an alternate mountd
program number for a very long time.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/internal.h |  1 -
 fs/nfs/super.c    | 11 +----------
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index a78a09b40d1b1..058d503a0ee13 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -52,7 +52,6 @@ struct nfs_parsed_mount_data {
 		struct sockaddr_in	address;
 		char			*hostname;
 		char			*export_path;
-		unsigned int		program;
 		int			protocol;
 	} nfs_server;
 };
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 1a18ca390ddfe..330c3922739f9 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -83,7 +83,7 @@ enum {
 	Opt_actimeo,
 	Opt_namelen,
 	Opt_mountport,
-	Opt_mountprog, Opt_mountvers,
+	Opt_mountvers,
 	Opt_nfsvers,
 
 	/* Mount options that take string arguments */
@@ -137,7 +137,6 @@ static match_table_t nfs_mount_option_tokens = {
 	{ Opt_userspace, "retry=%u" },
 	{ Opt_namelen, "namlen=%u" },
 	{ Opt_mountport, "mountport=%u" },
-	{ Opt_mountprog, "mountprog=%u" },
 	{ Opt_mountvers, "mountvers=%u" },
 	{ Opt_nfsvers, "nfsvers=%u" },
 	{ Opt_nfsvers, "vers=%u" },
@@ -786,13 +785,6 @@ static int nfs_parse_mount_options(char *raw,
 				return 0;
 			mnt->mount_server.port = option;
 			break;
-		case Opt_mountprog:
-			if (match_int(args, &option))
-				return 0;
-			if (option < 0)
-				return 0;
-			mnt->mount_server.program = option;
-			break;
 		case Opt_mountvers:
 			if (match_int(args, &option))
 				return 0;
@@ -1082,7 +1074,6 @@ static int nfs_validate_mount_data(void *options,
 	args->acdirmin		= 30;
 	args->acdirmax		= 60;
 	args->mount_server.protocol = XPRT_TRANSPORT_UDP;
-	args->mount_server.program = NFS_MNT_PROGRAM;
 	args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
 
 	switch (data->version) {

From 6a0ed1de8ecee0cde21ea667891a03f6c84ecd66 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 26 Oct 2007 13:32:40 -0400
Subject: [PATCH 049/118] NFS: Clean up: copy hostname with kstrndup during
 mount processing

Clean up: mount option parsing uses kstrndup in several places, rather than
using kzalloc.  Replace the few remaining uses of kzalloc with kstrndup,
for consistency.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 330c3922739f9..a3492d6f8f9b3 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1648,21 +1648,16 @@ static int nfs4_validate_mount_data(void *options,
 		len = c - dev_name;
 		if (len > NFS4_MAXNAMLEN)
 			return -ENAMETOOLONG;
-		args->nfs_server.hostname = kzalloc(len, GFP_KERNEL);
-		if (args->nfs_server.hostname == NULL)
-			return -ENOMEM;
-		strncpy(args->nfs_server.hostname, dev_name, len - 1);
+		/* N.B. caller will free nfs_server.hostname in all cases */
+		args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL);
 
 		c++;			/* step over the ':' */
 		len = strlen(c);
 		if (len > NFS4_MAXPATHLEN)
 			return -ENAMETOOLONG;
-		args->nfs_server.export_path = kzalloc(len + 1, GFP_KERNEL);
-		if (args->nfs_server.export_path == NULL)
-			return -ENOMEM;
-		strncpy(args->nfs_server.export_path, c, len);
+		args->nfs_server.export_path = kstrndup(c, len, GFP_KERNEL);
 
-		dprintk("MNTPATH: %s\n", args->nfs_server.export_path);
+		dprintk("NFS: MNTPATH: '%s'\n", args->nfs_server.export_path);
 
 		if (args->client_address == NULL)
 			goto out_no_client_address;

From d45273ed6f4613e81701c3e896d9db200c288fff Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 26 Oct 2007 13:32:45 -0400
Subject: [PATCH 050/118] NFS: Clean up address comparison in
 __nfs_find_client()

The address comparison in the __nfs_find_client() function is deceptive.
It uses a memcmp() to check a pair of u32 fields for equality.  Not only is
this inefficient, but usually memcmp() is used for comparing two *whole*
sockaddr_in's (which includes comparisons of the address family and port
number), so it's easy to mistake the comparison here for a whole sockaddr
comparison, which it isn't.

So for clarity and efficiency, we replace the memcmp() with a simple test
for equality between the two s_addr fields.  This should have no
behavioral effect.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index c3740f5ab9783..8b5f9b9685ddf 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -220,8 +220,7 @@ static struct nfs_client *__nfs_find_client(const struct sockaddr_in *addr, int
 		if (clp->cl_nfsversion != nfsversion)
 			continue;
 
-		if (memcmp(&clp->cl_addr.sin_addr, &addr->sin_addr,
-			   sizeof(clp->cl_addr.sin_addr)) != 0)
+		if (clp->cl_addr.sin_addr.s_addr != addr->sin_addr.s_addr)
 			continue;
 
 		if (!match_port || clp->cl_addr.sin_port == addr->sin_port)

From 5cce428d953cc3843b100e078dbc3c01c6411b85 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 26 Oct 2007 13:33:01 -0400
Subject: [PATCH 051/118] NFS: Remove an unneeded check in
 decode_compound_header_arg()

Clean up:  The header tag length is unsigned, so checking that it is less
than zero is unnecessary.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback_xdr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 058ade7efe794..97abd829e4327 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -139,7 +139,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
 	if (unlikely(status != 0))
 		return status;
 	/* We do not like overly long tags! */
-	if (hdr->taglen > CB_OP_TAGLEN_MAXSZ-12 || hdr->taglen < 0) {
+	if (hdr->taglen > CB_OP_TAGLEN_MAXSZ - 12) {
 		printk("NFSv4 CALLBACK %s: client sent tag of length %u\n",
 				__FUNCTION__, hdr->taglen);
 		return htonl(NFS4ERR_RESOURCE);

From bfc69a456642a51c89dfd8e5184468857cb44f32 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 15 Oct 2007 18:18:29 -0400
Subject: [PATCH 052/118] NFS: define a function to update
 nfsi->cache_change_attribute

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c           | 15 +++++++++++++++
 fs/nfs/inode.c         |  6 ++++--
 fs/nfs/nfs4proc.c      |  2 +-
 include/linux/nfs_fs.h |  1 +
 4 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 32c666c612a19..72d141a0dbd80 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -638,6 +638,21 @@ static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync)
 	return 0;
 }
 
+/**
+ * nfs_force_lookup_revalidate - Mark the directory as having changed
+ * @dir - pointer to directory inode
+ *
+ * This forces the revalidation code in nfs_lookup_revalidate() to do a
+ * full lookup on all child dentries of 'dir' whenever a change occurs
+ * on the server that might have invalidated our dcache.
+ *
+ * The caller should be holding dir->i_lock
+ */
+void nfs_force_lookup_revalidate(struct inode *dir)
+{
+	NFS_I(dir)->cache_change_attribute = jiffies;
+}
+
 /*
  * A check for whether or not the parent directory has changed.
  * In the case it has, we assume that the dentries are untrustworthy
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index cc3a09db41a91..5747d49bdd76e 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1029,7 +1029,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 			dprintk("NFS: mtime change on server for file %s/%ld\n",
 					inode->i_sb->s_id, inode->i_ino);
 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
-			nfsi->cache_change_attribute = now;
+			if (S_ISDIR(inode->i_mode))
+				nfs_force_lookup_revalidate(inode);
 		}
 		/* If ctime has changed we should definitely clear access+acl caches */
 		if (!timespec_equal(&inode->i_ctime, &fattr->ctime))
@@ -1038,7 +1039,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		dprintk("NFS: change_attr change on server for file %s/%ld\n",
 				inode->i_sb->s_id, inode->i_ino);
 		invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
-		nfsi->cache_change_attribute = now;
+		if (S_ISDIR(inode->i_mode))
+			nfs_force_lookup_revalidate(inode);
 	}
 
 	/* Check if our cached file size is stale */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 826b445b8c701..26192a703129e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -210,7 +210,7 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
 	spin_lock(&dir->i_lock);
 	nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA;
 	if (!cinfo->atomic || cinfo->before != nfsi->change_attr)
-		nfsi->cache_change_attribute = jiffies;
+		nfs_force_lookup_revalidate(dir);
 	nfsi->change_attr = cinfo->after;
 	spin_unlock(&dir->i_lock);
 }
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 2d15d4aac0940..7095aaa087d81 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -366,6 +366,7 @@ extern const struct inode_operations nfs3_dir_inode_operations;
 extern const struct file_operations nfs_dir_operations;
 extern struct dentry_operations nfs_dentry_operations;
 
+extern void nfs_force_lookup_revalidate(struct inode *dir);
 extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr);
 extern int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags);
 extern void nfs_access_zap_cache(struct inode *inode);

From 3a498026eef9603c14037e73a4a94cfdb2fa44eb Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 14 Dec 2007 14:56:04 -0500
Subject: [PATCH 053/118] NFS: Clean up the nfs_client initialisation

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c | 51 ++++++++++++++++++++++++++++++-------------------
 1 file changed, 31 insertions(+), 20 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 8b5f9b9685ddf..d7f6d50442b7c 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -93,22 +93,26 @@ struct rpc_program		nfsacl_program = {
 };
 #endif  /* CONFIG_NFS_V3_ACL */
 
+struct nfs_client_initdata {
+	const char *hostname;
+	const struct sockaddr_in *addr;
+	int version;
+};
+
 /*
  * Allocate a shared client record
  *
  * Since these are allocated/deallocated very rarely, we don't
  * bother putting them in a slab cache...
  */
-static struct nfs_client *nfs_alloc_client(const char *hostname,
-					   const struct sockaddr_in *addr,
-					   int nfsversion)
+static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
 {
 	struct nfs_client *clp;
 
 	if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
 		goto error_0;
 
-	if (nfsversion == 4) {
+	if (cl_init->version == 4) {
 		if (nfs_callback_up() < 0)
 			goto error_2;
 		__set_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
@@ -117,11 +121,11 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
 	atomic_set(&clp->cl_count, 1);
 	clp->cl_cons_state = NFS_CS_INITING;
 
-	clp->cl_nfsversion = nfsversion;
-	memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr));
+	clp->cl_nfsversion = cl_init->version;
+	memcpy(&clp->cl_addr, cl_init->addr, sizeof(clp->cl_addr));
 
-	if (hostname) {
-		clp->cl_hostname = kstrdup(hostname, GFP_KERNEL);
+	if (cl_init->hostname) {
+		clp->cl_hostname = kstrdup(cl_init->hostname, GFP_KERNEL);
 		if (!clp->cl_hostname)
 			goto error_3;
 	}
@@ -256,22 +260,20 @@ struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversio
  * Look up a client by IP address and protocol version
  * - creates a new record if one doesn't yet exist
  */
-static struct nfs_client *nfs_get_client(const char *hostname,
-					 const struct sockaddr_in *addr,
-					 int nfsversion)
+static struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
 {
 	struct nfs_client *clp, *new = NULL;
 	int error;
 
 	dprintk("--> nfs_get_client(%s,"NIPQUAD_FMT":%d,%d)\n",
-		hostname ?: "", NIPQUAD(addr->sin_addr),
-		addr->sin_port, nfsversion);
+		cl_init->hostname ?: "", NIPQUAD(cl_init->addr->sin_addr),
+		cl_init->addr->sin_port, cl_init->version);
 
 	/* see if the client already exists */
 	do {
 		spin_lock(&nfs_client_lock);
 
-		clp = __nfs_find_client(addr, nfsversion, 1);
+		clp = __nfs_find_client(cl_init->addr, cl_init->version, 1);
 		if (clp)
 			goto found_client;
 		if (new)
@@ -279,7 +281,7 @@ static struct nfs_client *nfs_get_client(const char *hostname,
 
 		spin_unlock(&nfs_client_lock);
 
-		new = nfs_alloc_client(hostname, addr, nfsversion);
+		new = nfs_alloc_client(cl_init);
 	} while (new);
 
 	return ERR_PTR(-ENOMEM);
@@ -540,19 +542,23 @@ static int nfs_init_client(struct nfs_client *clp,
 static int nfs_init_server(struct nfs_server *server,
 			   const struct nfs_parsed_mount_data *data)
 {
+	struct nfs_client_initdata cl_init = {
+		.hostname = data->nfs_server.hostname,
+		.addr = &data->nfs_server.address,
+		.version = 2,
+	};
 	struct nfs_client *clp;
-	int error, nfsvers = 2;
+	int error;
 
 	dprintk("--> nfs_init_server()\n");
 
 #ifdef CONFIG_NFS_V3
 	if (data->flags & NFS_MOUNT_VER3)
-		nfsvers = 3;
+		cl_init.version = 3;
 #endif
 
 	/* Allocate or find a client reference we can use */
-	clp = nfs_get_client(data->nfs_server.hostname,
-				&data->nfs_server.address, nfsvers);
+	clp = nfs_get_client(&cl_init);
 	if (IS_ERR(clp)) {
 		dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
 		return PTR_ERR(clp);
@@ -889,13 +895,18 @@ static int nfs4_set_client(struct nfs_server *server,
 		rpc_authflavor_t authflavour,
 		int proto, int timeo, int retrans)
 {
+	struct nfs_client_initdata cl_init = {
+		.hostname = hostname,
+		.addr = addr,
+		.version = 4,
+	};
 	struct nfs_client *clp;
 	int error;
 
 	dprintk("--> nfs4_set_client()\n");
 
 	/* Allocate or find a client reference we can use */
-	clp = nfs_get_client(hostname, addr, 4);
+	clp = nfs_get_client(&cl_init);
 	if (IS_ERR(clp)) {
 		error = PTR_ERR(clp);
 		goto error;

From c81468a1a766921f11ae44e8a99816ac8dc7b015 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 14 Dec 2007 14:56:05 -0500
Subject: [PATCH 054/118] NFS: Clean up the nfs_find_client function.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c | 52 ++++++++++++++++++++++++++++---------------------
 1 file changed, 30 insertions(+), 22 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index d7f6d50442b7c..ff778ecee0bdc 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -208,52 +208,60 @@ void nfs_put_client(struct nfs_client *clp)
 }
 
 /*
- * Find a client by address
- * - caller must hold nfs_client_lock
+ * Find a client by IP address and protocol version
+ * - returns NULL if no such client
  */
-static struct nfs_client *__nfs_find_client(const struct sockaddr_in *addr, int nfsversion, int match_port)
+struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversion)
 {
 	struct nfs_client *clp;
 
+	spin_lock(&nfs_client_lock);
 	list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
 		/* Don't match clients that failed to initialise properly */
-		if (clp->cl_cons_state < 0)
+		if (clp->cl_cons_state != NFS_CS_READY)
 			continue;
 
 		/* Different NFS versions cannot share the same nfs_client */
 		if (clp->cl_nfsversion != nfsversion)
 			continue;
 
+		/* Match only the IP address, not the port number */
 		if (clp->cl_addr.sin_addr.s_addr != addr->sin_addr.s_addr)
 			continue;
 
-		if (!match_port || clp->cl_addr.sin_port == addr->sin_port)
-			goto found;
+		atomic_inc(&clp->cl_count);
+		spin_unlock(&nfs_client_lock);
+		return clp;
 	}
-
+	spin_unlock(&nfs_client_lock);
 	return NULL;
-
-found:
-	atomic_inc(&clp->cl_count);
-	return clp;
 }
 
 /*
- * Find a client by IP address and protocol version
- * - returns NULL if no such client
+ * Find an nfs_client on the list that matches the initialisation data
+ * that is supplied.
  */
-struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversion)
+static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *data)
 {
 	struct nfs_client *clp;
 
-	spin_lock(&nfs_client_lock);
-	clp = __nfs_find_client(addr, nfsversion, 0);
-	spin_unlock(&nfs_client_lock);
-	if (clp != NULL && clp->cl_cons_state != NFS_CS_READY) {
-		nfs_put_client(clp);
-		clp = NULL;
+	list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+		/* Don't match clients that failed to initialise properly */
+		if (clp->cl_cons_state < 0)
+			continue;
+
+		/* Different NFS versions cannot share the same nfs_client */
+		if (clp->cl_nfsversion != data->version)
+			continue;
+
+		/* Match the full socket address */
+		if (memcmp(&clp->cl_addr, data->addr, sizeof(clp->cl_addr)) != 0)
+			continue;
+
+		atomic_inc(&clp->cl_count);
+		return clp;
 	}
-	return clp;
+	return NULL;
 }
 
 /*
@@ -273,7 +281,7 @@ static struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_in
 	do {
 		spin_lock(&nfs_client_lock);
 
-		clp = __nfs_find_client(cl_init->addr, cl_init->version, 1);
+		clp = nfs_match_client(cl_init);
 		if (clp)
 			goto found_client;
 		if (new)

From 40c553193df41920de659f0446e5d214c862e827 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 14 Dec 2007 14:56:07 -0500
Subject: [PATCH 055/118] NFS: Remove the redundant nfs_client->cl_nfsversion

We can get the same information from the rpc_ops structure instead.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           | 41 +++++++++++++++++----------------------
 fs/nfs/namespace.c        |  2 +-
 fs/nfs/super.c            |  2 +-
 include/linux/nfs_fs_sb.h |  1 -
 4 files changed, 20 insertions(+), 26 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index ff778ecee0bdc..3b21731ae571b 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -96,7 +96,7 @@ struct rpc_program		nfsacl_program = {
 struct nfs_client_initdata {
 	const char *hostname;
 	const struct sockaddr_in *addr;
-	int version;
+	const struct nfs_rpc_ops *rpc_ops;
 };
 
 /*
@@ -112,7 +112,9 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
 	if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
 		goto error_0;
 
-	if (cl_init->version == 4) {
+	clp->rpc_ops = cl_init->rpc_ops;
+
+	if (cl_init->rpc_ops->version == 4) {
 		if (nfs_callback_up() < 0)
 			goto error_2;
 		__set_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
@@ -121,7 +123,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
 	atomic_set(&clp->cl_count, 1);
 	clp->cl_cons_state = NFS_CS_INITING;
 
-	clp->cl_nfsversion = cl_init->version;
 	memcpy(&clp->cl_addr, cl_init->addr, sizeof(clp->cl_addr));
 
 	if (cl_init->hostname) {
@@ -170,7 +171,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
  */
 static void nfs_free_client(struct nfs_client *clp)
 {
-	dprintk("--> nfs_free_client(%d)\n", clp->cl_nfsversion);
+	dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version);
 
 	nfs4_shutdown_client(clp);
 
@@ -222,7 +223,7 @@ struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversio
 			continue;
 
 		/* Different NFS versions cannot share the same nfs_client */
-		if (clp->cl_nfsversion != nfsversion)
+		if (clp->rpc_ops->version != nfsversion)
 			continue;
 
 		/* Match only the IP address, not the port number */
@@ -251,7 +252,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
 			continue;
 
 		/* Different NFS versions cannot share the same nfs_client */
-		if (clp->cl_nfsversion != data->version)
+		if (clp->rpc_ops != data->rpc_ops)
 			continue;
 
 		/* Match the full socket address */
@@ -273,9 +274,9 @@ static struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_in
 	struct nfs_client *clp, *new = NULL;
 	int error;
 
-	dprintk("--> nfs_get_client(%s,"NIPQUAD_FMT":%d,%d)\n",
+	dprintk("--> nfs_get_client(%s,"NIPQUAD_FMT":%d,%u)\n",
 		cl_init->hostname ?: "", NIPQUAD(cl_init->addr->sin_addr),
-		cl_init->addr->sin_port, cl_init->version);
+		cl_init->addr->sin_port, cl_init->rpc_ops->version);
 
 	/* see if the client already exists */
 	do {
@@ -430,7 +431,7 @@ static int nfs_start_lockd(struct nfs_server *server)
 {
 	int error = 0;
 
-	if (server->nfs_client->cl_nfsversion > 3)
+	if (server->nfs_client->rpc_ops->version > 3)
 		goto out;
 	if (server->flags & NFS_MOUNT_NONLM)
 		goto out;
@@ -450,7 +451,7 @@ static int nfs_start_lockd(struct nfs_server *server)
 #ifdef CONFIG_NFS_V3_ACL
 static void nfs_init_server_aclclient(struct nfs_server *server)
 {
-	if (server->nfs_client->cl_nfsversion != 3)
+	if (server->nfs_client->rpc_ops->version != 3)
 		goto out_noacl;
 	if (server->flags & NFS_MOUNT_NOACL)
 		goto out_noacl;
@@ -521,12 +522,6 @@ static int nfs_init_client(struct nfs_client *clp,
 		return 0;
 	}
 
-	/* Check NFS protocol revision and initialize RPC op vector */
-	clp->rpc_ops = &nfs_v2_clientops;
-#ifdef CONFIG_NFS_V3
-	if (clp->cl_nfsversion == 3)
-		clp->rpc_ops = &nfs_v3_clientops;
-#endif
 	/*
 	 * Create a client RPC handle for doing FSSTAT with UNIX auth only
 	 * - RFC 2623, sec 2.3.2
@@ -553,7 +548,7 @@ static int nfs_init_server(struct nfs_server *server,
 	struct nfs_client_initdata cl_init = {
 		.hostname = data->nfs_server.hostname,
 		.addr = &data->nfs_server.address,
-		.version = 2,
+		.rpc_ops = &nfs_v2_clientops,
 	};
 	struct nfs_client *clp;
 	int error;
@@ -562,7 +557,7 @@ static int nfs_init_server(struct nfs_server *server,
 
 #ifdef CONFIG_NFS_V3
 	if (data->flags & NFS_MOUNT_VER3)
-		cl_init.version = 3;
+		cl_init.rpc_ops = &nfs_v3_clientops;
 #endif
 
 	/* Allocate or find a client reference we can use */
@@ -906,7 +901,7 @@ static int nfs4_set_client(struct nfs_server *server,
 	struct nfs_client_initdata cl_init = {
 		.hostname = hostname,
 		.addr = addr,
-		.version = 4,
+		.rpc_ops = &nfs_v4_clientops,
 	};
 	struct nfs_client *clp;
 	int error;
@@ -1284,8 +1279,8 @@ static int nfs_server_list_show(struct seq_file *m, void *v)
 	/* display one transport per line on subsequent lines */
 	clp = list_entry(v, struct nfs_client, cl_share_link);
 
-	seq_printf(m, "v%d %02x%02x%02x%02x %4hx %3d %s\n",
-		   clp->cl_nfsversion,
+	seq_printf(m, "v%u %02x%02x%02x%02x %4hx %3d %s\n",
+		   clp->rpc_ops->version,
 		   NIPQUAD(clp->cl_addr.sin_addr),
 		   ntohs(clp->cl_addr.sin_port),
 		   atomic_read(&clp->cl_count),
@@ -1363,8 +1358,8 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
 		 (unsigned long long) server->fsid.major,
 		 (unsigned long long) server->fsid.minor);
 
-	seq_printf(m, "v%d %02x%02x%02x%02x %4hx %-7s %-17s\n",
-		   clp->cl_nfsversion,
+	seq_printf(m, "v%u %02x%02x%02x%02x %4hx %-7s %-17s\n",
+		   clp->rpc_ops->version,
 		   NIPQUAD(clp->cl_addr.sin_addr),
 		   ntohs(clp->cl_addr.sin_port),
 		   dev,
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index acfc56f9edc04..be4ce1c3a3d8d 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -188,7 +188,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
 {
 #ifdef CONFIG_NFS_V4
 	struct vfsmount *mnt = NULL;
-	switch (server->nfs_client->cl_nfsversion) {
+	switch (server->nfs_client->rpc_ops->version) {
 		case 2:
 		case 3:
 			mnt = vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index a3492d6f8f9b3..5608e6a4c1e11 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -529,7 +529,7 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
 	seq_printf(m, ",namelen=%d", nfss->namelen);
 
 #ifdef CONFIG_NFS_V4
-	if (nfss->nfs_client->cl_nfsversion == 4) {
+	if (nfss->nfs_client->rpc_ops->version == 4) {
 		seq_printf(m, "\n\tnfsv4:\t");
 		seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
 		seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 9f949b5876849..97b3257566671 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -17,7 +17,6 @@ struct nfs_client {
 	int			cl_cons_state;	/* current construction state (-ve: init error) */
 #define NFS_CS_READY		0		/* ready to be used */
 #define NFS_CS_INITING		1		/* busy initialising */
-	int			cl_nfsversion;	/* NFS protocol version */
 	unsigned long		cl_res_state;	/* NFS resources state */
 #define NFS_CS_CALLBACK		1		/* - callback started */
 #define NFS_CS_IDMAP		2		/* - idmap started */

From 510deb0d7035d4fd465627deb3a119ca854f9e00 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 10 Dec 2007 14:56:24 -0500
Subject: [PATCH 056/118] SUNRPC: rpc_create() default hostname should support
 AF_INET6 addresses

If the ULP doesn't pass a hostname string to rpc_create(), it manufactures
one based on the passed-in address.  Be smart enough to handle an AF_INET6
address properly in this case.

Move the default servername logic before the xprt_create_transport() call
to simplify error handling in rpc_create().

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index e0a5e47a1d4b3..3af35ab0e99d5 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -30,6 +30,7 @@
 #include <linux/smp_lock.h>
 #include <linux/utsname.h>
 #include <linux/workqueue.h>
+#include <linux/in6.h>
 
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
@@ -247,7 +248,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
 		.addrlen = args->addrsize,
 		.timeout = args->timeout
 	};
-	char servername[20];
+	char servername[48];
 
 	xprt = xprt_create_transport(&xprtargs);
 	if (IS_ERR(xprt))
@@ -258,13 +259,34 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
 	 * up a string representation of the passed-in address.
 	 */
 	if (args->servername == NULL) {
-		struct sockaddr_in *addr =
-					(struct sockaddr_in *) args->address;
-		snprintf(servername, sizeof(servername), NIPQUAD_FMT,
-			NIPQUAD(addr->sin_addr.s_addr));
+		servername[0] = '\0';
+		switch (args->address->sa_family) {
+		case AF_INET: {
+			struct sockaddr_in *sin =
+					(struct sockaddr_in *)args->address;
+			snprintf(servername, sizeof(servername), NIPQUAD_FMT,
+				 NIPQUAD(sin->sin_addr.s_addr));
+			break;
+		}
+		case AF_INET6: {
+			struct sockaddr_in6 *sin =
+					(struct sockaddr_in6 *)args->address;
+			snprintf(servername, sizeof(servername), NIP6_FMT,
+				 NIP6(sin->sin6_addr));
+			break;
+		}
+		default:
+			/* caller wants default server name, but
+			 * address family isn't recognized. */
+			return ERR_PTR(-EINVAL);
+		}
 		args->servername = servername;
 	}
 
+	xprt = xprt_create_transport(&xprtargs);
+	if (IS_ERR(xprt))
+		return (struct rpc_clnt *)xprt;
+
 	/*
 	 * By default, kernel RPC client connects from a reserved port.
 	 * CAP_NET_BIND_SERVICE will not be set for unprivileged requesters,

From 9f6ad26d2abfd9a2ec4a34b934ca75c4239ab8cf Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 10 Dec 2007 14:56:31 -0500
Subject: [PATCH 057/118] SUNRPC: Fix socket address handling in rpcb_clnt

Make sure rpcb_clnt passes the correct address length to rpc_create().

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/rpcb_clnt.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index f876e37d1972d..c1310f7962248 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -148,12 +148,13 @@ static void rpcb_wake_rpcbind_waiters(struct rpc_xprt *xprt, int status)
 }
 
 static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
-					int proto, int version, int privileged)
+				    size_t salen, int proto, int version,
+				    int privileged)
 {
 	struct rpc_create_args args = {
 		.protocol	= proto,
 		.address	= srvaddr,
-		.addrsize	= sizeof(struct sockaddr_in),
+		.addrsize	= salen,
 		.servername	= hostname,
 		.program	= &rpcb_program,
 		.version	= version,
@@ -216,7 +217,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
 			prog, vers, prot, port);
 
 	rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin,
-					XPRT_TRANSPORT_UDP, 2, 1);
+				sizeof(sin), XPRT_TRANSPORT_UDP, 2, 1);
 	if (IS_ERR(rpcb_clnt))
 		return PTR_ERR(rpcb_clnt);
 
@@ -265,7 +266,8 @@ int rpcb_getport_sync(struct sockaddr_in *sin, __u32 prog,
 		__FUNCTION__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
 
 	sprintf(hostname, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr));
-	rpcb_clnt = rpcb_create(hostname, (struct sockaddr *)sin, prot, 2, 0);
+	rpcb_clnt = rpcb_create(hostname, (struct sockaddr *)sin,
+				sizeof(sin), prot, 2, 0);
 	if (IS_ERR(rpcb_clnt))
 		return PTR_ERR(rpcb_clnt);
 
@@ -314,7 +316,9 @@ void rpcb_getport_async(struct rpc_task *task)
 	struct rpc_clnt	*rpcb_clnt;
 	static struct rpcbind_args *map;
 	struct rpc_task	*child;
-	struct sockaddr addr;
+	struct sockaddr_storage addr;
+	struct sockaddr *sap = (struct sockaddr *)&addr;
+	size_t salen;
 	int status;
 	struct rpcb_info *info;
 
@@ -344,10 +348,10 @@ void rpcb_getport_async(struct rpc_task *task)
 		goto bailout_nofree;
 	}
 
-	rpc_peeraddr(clnt, (void *)&addr, sizeof(addr));
+	salen = rpc_peeraddr(clnt, sap, sizeof(addr));
 
 	/* Don't ever use rpcbind v2 for AF_INET6 requests */
-	switch (addr.sa_family) {
+	switch (sap->sa_family) {
 	case AF_INET:
 		info = rpcb_next_version;
 		break;
@@ -372,7 +376,7 @@ void rpcb_getport_async(struct rpc_task *task)
 	dprintk("RPC: %5u %s: trying rpcbind version %u\n",
 		task->tk_pid, __FUNCTION__, bind_version);
 
-	rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot,
+	rpcb_clnt = rpcb_create(clnt->cl_server, sap, salen, xprt->prot,
 				bind_version, 0);
 	if (IS_ERR(rpcb_clnt)) {
 		status = PTR_ERR(rpcb_clnt);

From 0a48f5d70fa9e87269d076fe27f3563f4375c479 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 10 Dec 2007 14:56:38 -0500
Subject: [PATCH 058/118] SUNRPC: RPC version numbers are u32

Clean up: use correct type for RPC version numbers in rpcbind client.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/rpcb_clnt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index c1310f7962248..9696b5127060d 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -148,7 +148,7 @@ static void rpcb_wake_rpcbind_waiters(struct rpc_xprt *xprt, int status)
 }
 
 static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr,
-				    size_t salen, int proto, int version,
+				    size_t salen, int proto, u32 version,
 				    int privileged)
 {
 	struct rpc_create_args args = {
@@ -311,7 +311,7 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi
 void rpcb_getport_async(struct rpc_task *task)
 {
 	struct rpc_clnt *clnt = task->tk_client;
-	int bind_version;
+	u32 bind_version;
 	struct rpc_xprt *xprt = task->tk_xprt;
 	struct rpc_clnt	*rpcb_clnt;
 	static struct rpcbind_args *map;

From 0fb2b7e945f55a8317e5f58db7c068aab5b825a1 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 10 Dec 2007 14:56:46 -0500
Subject: [PATCH 059/118] SUNRPC: Move universal address definitions to global
 header

Universal addresses are defined in RFC 1833 and clarified in RFC 3530.  We
need to use them in several places in the NFS and RPC clients, so move the
relevant definition and block comment to an appropriate global include
file.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/msg_prot.h | 39 ++++++++++++++++++++++++++++
 net/sunrpc/rpcb_clnt.c          | 45 +++------------------------------
 2 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h
index c4beb5775111c..70df4f1d88474 100644
--- a/include/linux/sunrpc/msg_prot.h
+++ b/include/linux/sunrpc/msg_prot.h
@@ -152,5 +152,44 @@ typedef __be32	rpc_fraghdr;
  */
 #define RPCBIND_MAXNETIDLEN	(4u)
 
+/*
+ * Universal addresses are introduced in RFC 1833 and further spelled
+ * out in RFC 3530.  RPCBIND_MAXUADDRLEN defines a maximum byte length
+ * of a universal address for use in allocating buffers and character
+ * arrays.
+ *
+ * Quoting RFC 3530, section 2.2:
+ *
+ * For TCP over IPv4 and for UDP over IPv4, the format of r_addr is the
+ * US-ASCII string:
+ *
+ *	h1.h2.h3.h4.p1.p2
+ *
+ * The prefix, "h1.h2.h3.h4", is the standard textual form for
+ * representing an IPv4 address, which is always four octets long.
+ * Assuming big-endian ordering, h1, h2, h3, and h4, are respectively,
+ * the first through fourth octets each converted to ASCII-decimal.
+ * Assuming big-endian ordering, p1 and p2 are, respectively, the first
+ * and second octets each converted to ASCII-decimal.  For example, if a
+ * host, in big-endian order, has an address of 0x0A010307 and there is
+ * a service listening on, in big endian order, port 0x020F (decimal
+ * 527), then the complete universal address is "10.1.3.7.2.15".
+ *
+ * ...
+ *
+ * For TCP over IPv6 and for UDP over IPv6, the format of r_addr is the
+ * US-ASCII string:
+ *
+ *	x1:x2:x3:x4:x5:x6:x7:x8.p1.p2
+ *
+ * The suffix "p1.p2" is the service port, and is computed the same way
+ * as with universal addresses for TCP and UDP over IPv4.  The prefix,
+ * "x1:x2:x3:x4:x5:x6:x7:x8", is the standard textual form for
+ * representing an IPv6 address as defined in Section 2.2 of [RFC2373].
+ * Additionally, the two alternative forms specified in Section 2.2 of
+ * [RFC2373] are also acceptable.
+ */
+#define RPCBIND_MAXUADDRLEN	(56u)
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_MSGPROT_H_ */
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 9696b5127060d..f494e58910ec0 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -54,45 +54,6 @@ enum {
 #define RPCB_HIGHPROC_3		RPCBPROC_TADDR2UADDR
 #define RPCB_HIGHPROC_4		RPCBPROC_GETSTAT
 
-/*
- * r_addr
- *
- * Quoting RFC 3530, section 2.2:
- *
- * For TCP over IPv4 and for UDP over IPv4, the format of r_addr is the
- * US-ASCII string:
- *
- *	h1.h2.h3.h4.p1.p2
- *
- * The prefix, "h1.h2.h3.h4", is the standard textual form for
- * representing an IPv4 address, which is always four octets long.
- * Assuming big-endian ordering, h1, h2, h3, and h4, are respectively,
- * the first through fourth octets each converted to ASCII-decimal.
- * Assuming big-endian ordering, p1 and p2 are, respectively, the first
- * and second octets each converted to ASCII-decimal.  For example, if a
- * host, in big-endian order, has an address of 0x0A010307 and there is
- * a service listening on, in big endian order, port 0x020F (decimal
- * 527), then the complete universal address is "10.1.3.7.2.15".
- *
- * ...
- *
- * For TCP over IPv6 and for UDP over IPv6, the format of r_addr is the
- * US-ASCII string:
- *
- *	x1:x2:x3:x4:x5:x6:x7:x8.p1.p2
- *
- * The suffix "p1.p2" is the service port, and is computed the same way
- * as with universal addresses for TCP and UDP over IPv4.  The prefix,
- * "x1:x2:x3:x4:x5:x6:x7:x8", is the standard textual form for
- * representing an IPv6 address as defined in Section 2.2 of [RFC2373].
- * Additionally, the two alternative forms specified in Section 2.2 of
- * [RFC2373] are also acceptable.
- *
- * XXX: Currently this implementation does not explicitly convert the
- *      stored address to US-ASCII on non-ASCII systems.
- */
-#define RPCB_MAXADDRLEN		(128u)
-
 /*
  * r_owner
  *
@@ -113,7 +74,7 @@ struct rpcbind_args {
 	u32			r_prot;
 	unsigned short		r_port;
 	char *			r_netid;
-	char			r_addr[RPCB_MAXADDRLEN];
+	char			r_addr[RPCBIND_MAXUADDRLEN];
 	char *			r_owner;
 };
 
@@ -526,7 +487,7 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p,
 	 * Simple sanity check.  The smallest possible universal
 	 * address is an IPv4 address string containing 11 bytes.
 	 */
-	if (addr_len < 11 || addr_len > RPCB_MAXADDRLEN)
+	if (addr_len < 11 || addr_len > RPCBIND_MAXUADDRLEN)
 		goto out_err;
 
 	/*
@@ -577,7 +538,7 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p,
 #define RPCB_boolean_sz		(1u)
 
 #define RPCB_netid_sz		(1+XDR_QUADLEN(RPCBIND_MAXNETIDLEN))
-#define RPCB_addr_sz		(1+XDR_QUADLEN(RPCB_MAXADDRLEN))
+#define RPCB_addr_sz		(1+XDR_QUADLEN(RPCBIND_MAXUADDRLEN))
 #define RPCB_ownerstring_sz	(1+XDR_QUADLEN(RPCB_MAXOWNERLEN))
 
 #define RPCB_mappingargs_sz	RPCB_program_sz+RPCB_version_sz+	\

From cc38bac3a0093b3b7928efc6ff8e9faf9e75f41d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 10 Dec 2007 14:56:54 -0500
Subject: [PATCH 060/118] NFS: Ensure NFSv4 SETCLIENTID send buffer is large
 enough

Ensure that the RPC buffer size specified for NFSv4 SETCLIENTID procedures
matches what we are encoding into the buffer.  See the definition of
struct nfs4_setclientid {} and the encode_setclientid() function.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4xdr.c        | 10 ++++++----
 include/linux/nfs_xdr.h | 13 +++++++------
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index eae46f008da74..db1ed9c46ede9 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -116,10 +116,12 @@ static int nfs4_stat_to_errno(int);
 #define decode_renew_maxsz	(op_decode_hdr_maxsz)
 #define encode_setclientid_maxsz \
 				(op_encode_hdr_maxsz + \
-				4 /*server->ip_addr*/ + \
-				1 /*Netid*/ + \
-				6 /*uaddr*/ + \
-				6 + (NFS4_VERIFIER_SIZE >> 2))
+				XDR_QUADLEN(NFS4_VERIFIER_SIZE) + \
+				XDR_QUADLEN(NFS4_SETCLIENTID_NAMELEN) + \
+				1 /* sc_prog */ + \
+				XDR_QUADLEN(RPCBIND_MAXNETIDLEN) + \
+				XDR_QUADLEN(RPCBIND_MAXUADDRLEN) + \
+				1) /* sc_cb_ident */
 #define decode_setclientid_maxsz \
 				(op_decode_hdr_maxsz + \
 				2 + \
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 6b213a64b15f0..d8e395d0c7c95 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -666,16 +666,17 @@ struct nfs4_rename_res {
 	struct nfs_fattr *		new_fattr;
 };
 
+#define NFS4_SETCLIENTID_NAMELEN	(48)
 struct nfs4_setclientid {
-	const nfs4_verifier *		sc_verifier;      /* request */
+	const nfs4_verifier *		sc_verifier;
 	unsigned int			sc_name_len;
-	char				sc_name[48];	  /* request */
-	u32				sc_prog;          /* request */
+	char				sc_name[NFS4_SETCLIENTID_NAMELEN];
+	u32				sc_prog;
 	unsigned int			sc_netid_len;
-	char				sc_netid[4];	  /* request */
+	char				sc_netid[RPCBIND_MAXNETIDLEN];
 	unsigned int			sc_uaddr_len;
-	char				sc_uaddr[24];     /* request */
-	u32				sc_cb_ident;      /* request */
+	char				sc_uaddr[RPCBIND_MAXUADDRLEN];
+	u32				sc_cb_ident;
 };
 
 struct nfs4_statfs_arg {

From 4392f2592297876967191238a341667a6d4fc456 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 10 Dec 2007 14:57:01 -0500
Subject: [PATCH 061/118] NFS: Increase size of cl_ipaddr field to hold IPv6
 addresses

The nfs_client's cl_ipaddr field needs to be larger to hold strings that
represent IPv6 addresses.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: Aurelien Charbon <aurelien.charbon@ext.bull.net>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_fs_sb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 97b3257566671..0b3dcba3d97d2 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -64,7 +64,7 @@ struct nfs_client {
 	/* Our own IP address, as a null-terminated string.
 	 * This is used to generate the clientid, and the callback address.
 	 */
-	char			cl_ipaddr[16];
+	char			cl_ipaddr[48];
 	unsigned char		cl_id_uniquifier;
 #endif
 };

From d4d3c507493afd3c9d19fbe9762f44e790909dbe Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 10 Dec 2007 14:57:09 -0500
Subject: [PATCH 062/118] NFS: Enable NFS client to generate CLIENTID strings
 with IPv6 addresses

We recently added methods to RPC transports that provide string versions of
the remote peer address information.  Convert the NFSv4 SETCLIENTID
procedure to use those methods instead of building the client ID out of
whole cloth.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: Aurelien Charbon <aurelien.charbon@ext.bull.net>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 26192a703129e..5e8c4cf7959e3 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2891,14 +2891,18 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short po
 
 	for(;;) {
 		setclientid.sc_name_len = scnprintf(setclientid.sc_name,
-				sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u %s %u",
-				clp->cl_ipaddr, NIPQUAD(clp->cl_addr.sin_addr),
+				sizeof(setclientid.sc_name), "%s/%s %s %u",
+				clp->cl_ipaddr,
+				rpc_peeraddr2str(clp->cl_rpcclient,
+							RPC_DISPLAY_ADDR),
 				cred->cr_ops->cr_name,
 				clp->cl_id_uniquifier);
 		setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
-				sizeof(setclientid.sc_netid), "tcp");
+				sizeof(setclientid.sc_netid),
+				rpc_peeraddr2str(clp->cl_rpcclient,
+							RPC_DISPLAY_NETID));
 		setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr,
-				sizeof(setclientid.sc_uaddr), "%s.%d.%d",
+				sizeof(setclientid.sc_uaddr), "%s.%u.%u",
 				clp->cl_ipaddr, port >> 8, port & 255);
 
 		status = rpc_call_sync(clp->cl_rpcclient, &msg, 0);

From 5d8515caeb99940f5ed56d22a03aba20bbe7fdcb Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 10 Dec 2007 14:57:16 -0500
Subject: [PATCH 063/118] NFS: eliminate NIPQUAD(clp->cl_addr.sin_addr)

To ensure the NFS client displays IPv6 addresses properly, replace
address family-specific NIPQUAD() invocations with a call to the RPC
client to get a formatted string representing the remote peer's
address.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: Aurelien Charbon <aurelien.charbon@ext.bull.net>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c     | 12 ++++++------
 fs/nfs/delegation.c | 10 ++++++----
 fs/nfs/nfs4state.c  |  9 +++++----
 fs/nfs/super.c      |  5 +++--
 4 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 3b21731ae571b..701cd193a014c 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1279,10 +1279,10 @@ static int nfs_server_list_show(struct seq_file *m, void *v)
 	/* display one transport per line on subsequent lines */
 	clp = list_entry(v, struct nfs_client, cl_share_link);
 
-	seq_printf(m, "v%u %02x%02x%02x%02x %4hx %3d %s\n",
+	seq_printf(m, "v%u %s %s %3d %s\n",
 		   clp->rpc_ops->version,
-		   NIPQUAD(clp->cl_addr.sin_addr),
-		   ntohs(clp->cl_addr.sin_port),
+		   rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
+		   rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT),
 		   atomic_read(&clp->cl_count),
 		   clp->cl_hostname);
 
@@ -1358,10 +1358,10 @@ static int nfs_volume_list_show(struct seq_file *m, void *v)
 		 (unsigned long long) server->fsid.major,
 		 (unsigned long long) server->fsid.minor);
 
-	seq_printf(m, "v%u %02x%02x%02x%02x %4hx %-7s %-17s\n",
+	seq_printf(m, "v%u %s %s %-7s %-17s\n",
 		   clp->rpc_ops->version,
-		   NIPQUAD(clp->cl_addr.sin_addr),
-		   ntohs(clp->cl_addr.sin_port),
+		   rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_ADDR),
+		   rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_HEX_PORT),
 		   dev,
 		   fsid);
 
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 11833f4caeaa9..b03dcd8403f1d 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -156,8 +156,9 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 		if (memcmp(&delegation->stateid, &nfsi->delegation->stateid,
 					sizeof(delegation->stateid)) != 0 ||
 				delegation->type != nfsi->delegation->type) {
-			printk("%s: server %u.%u.%u.%u, handed out a duplicate delegation!\n",
-					__FUNCTION__, NIPQUAD(clp->cl_addr.sin_addr));
+			printk(KERN_WARNING "%s: server %s handed out "
+					"a duplicate delegation!\n",
+					__FUNCTION__, clp->cl_hostname);
 			status = -EIO;
 		}
 	}
@@ -314,8 +315,9 @@ void nfs_expire_all_delegations(struct nfs_client *clp)
 	__module_get(THIS_MODULE);
 	atomic_inc(&clp->cl_count);
 	task = kthread_run(nfs_do_expire_all_delegations, clp,
-			"%u.%u.%u.%u-delegreturn",
-			NIPQUAD(clp->cl_addr.sin_addr));
+				"%s-delegreturn",
+				rpc_peeraddr2str(clp->cl_rpcclient,
+							RPC_DISPLAY_ADDR));
 	if (!IS_ERR(task))
 		return;
 	nfs_put_client(clp);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index bf94c6e0a503e..f9c7432471dcf 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -758,8 +758,9 @@ static void nfs4_recover_state(struct nfs_client *clp)
 
 	__module_get(THIS_MODULE);
 	atomic_inc(&clp->cl_count);
-	task = kthread_run(reclaimer, clp, "%u.%u.%u.%u-reclaim",
-			NIPQUAD(clp->cl_addr.sin_addr));
+	task = kthread_run(reclaimer, clp, "%s-reclaim",
+				rpc_peeraddr2str(clp->cl_rpcclient,
+							RPC_DISPLAY_ADDR));
 	if (!IS_ERR(task))
 		return;
 	nfs4_clear_recover_bit(clp);
@@ -970,8 +971,8 @@ static int reclaimer(void *ptr)
 	module_put_and_exit(0);
 	return 0;
 out_error:
-	printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u with error %d\n",
-				NIPQUAD(clp->cl_addr.sin_addr), -status);
+	printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %s"
+			" with error %d\n", clp->cl_hostname, -status);
 	set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
 	goto out;
 }
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 5608e6a4c1e11..75f3cbf922a30 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -491,8 +491,9 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 
 	nfs_show_mount_options(m, nfss, 0);
 
-	seq_printf(m, ",addr="NIPQUAD_FMT,
-		NIPQUAD(nfss->nfs_client->cl_addr.sin_addr));
+	seq_printf(m, ",addr=%s",
+			rpc_peeraddr2str(nfss->nfs_client->cl_rpcclient,
+							RPC_DISPLAY_ADDR));
 
 	return 0;
 }

From 1d98fe6717c5786394268da430a4354f6205da54 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 10 Dec 2007 14:57:23 -0500
Subject: [PATCH 064/118] NFS: Move dprintks from callback.c to callback_proc.c

Clean up: The client side peer address is available in callback_proc.c,
so move a dprintk out of fs/nfs/callback.c and into
fs/nfs/callback_proc.c.

This is more consistent with other debugging messages, and the proc
routines have more information about each request to display.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: Aurelien Charbon <aurelien.charbon@ext.bull.net>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback.c      |  4 ----
 fs/nfs/callback_proc.c | 12 +++++++++++-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index a796be5051bfa..bbf67f148ff90 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -73,8 +73,6 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
 	complete(&nfs_callback_info.started);
 
 	for(;;) {
-		char buf[RPC_MAX_ADDRBUFLEN];
-
 		if (signalled()) {
 			if (nfs_callback_info.users == 0)
 				break;
@@ -92,8 +90,6 @@ static void nfs_callback_svc(struct svc_rqst *rqstp)
 					__FUNCTION__, -err);
 			break;
 		}
-		dprintk("%s: request from %s\n", __FUNCTION__,
-				svc_print_addr(rqstp, buf, sizeof(buf)));
 		svc_process(rqstp);
 	}
 
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 72e55d83756d9..e89a9007c91cf 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -12,7 +12,9 @@
 #include "delegation.h"
 #include "internal.h"
 
+#ifdef NFS_DEBUG
 #define NFSDBG_FACILITY NFSDBG_CALLBACK
+#endif
  
 __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res)
 {
@@ -20,12 +22,16 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *
 	struct nfs_delegation *delegation;
 	struct nfs_inode *nfsi;
 	struct inode *inode;
-	
+
 	res->bitmap[0] = res->bitmap[1] = 0;
 	res->status = htonl(NFS4ERR_BADHANDLE);
 	clp = nfs_find_client(args->addr, 4);
 	if (clp == NULL)
 		goto out;
+
+	dprintk("NFS: GETATTR callback request from %s\n",
+		rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+
 	inode = nfs_delegation_find_inode(clp, &args->fh);
 	if (inode == NULL)
 		goto out_putclient;
@@ -65,6 +71,10 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
 	clp = nfs_find_client(args->addr, 4);
 	if (clp == NULL)
 		goto out;
+
+	dprintk("NFS: RECALL callback request from %s\n",
+		rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
+
 	inode = nfs_delegation_find_inode(clp, &args->fh);
 	if (inode == NULL)
 		goto out_putclient;

From 3f43c6667acb4e02962b2829a4d4ebb6b6e6f70e Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 10 Dec 2007 14:57:31 -0500
Subject: [PATCH 065/118] NFS: Address a couple of nits in
 nfs_follow_referral()

Clean up: fix an outdated block comment, and address a comparison
between a signed and unsigned integer.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4namespace.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index dd5fef20c7027..bd1b1617905d6 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -114,10 +114,7 @@ static inline int valid_ipaddr4(const char *buf)
  * nfs_follow_referral - set up mountpoint when hitting a referral on moved error
  * @mnt_parent - mountpoint of parent directory
  * @dentry - parent directory
- * @fspath - fs path returned in fs_locations
- * @mntpath - mount path to new server
- * @hostname - hostname of new server
- * @addr - host addr of new server
+ * @locations - array of NFSv4 server location information
  *
  */
 static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
@@ -131,7 +128,8 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
 		.authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor,
 	};
 	char *page = NULL, *page2 = NULL;
-	int loc, s, error;
+	unsigned int s;
+	int loc, error;
 
 	if (locations == NULL || locations->nlocations <= 0)
 		goto out;

From fd00a8ff8e37815c9df49f5cf09786e441e1396b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 10 Dec 2007 14:57:38 -0500
Subject: [PATCH 066/118] NFS: Add support for AF_INET6 addresses in
 nfs_compare_super()

Refactor nfs_compare_super() and add AF_INET6 support.

Replace the generic memcmp() to document explicitly what parts of the
addresses must match in this check, and make the comparison independent
of the lengths of both addresses.

A side benefit is both tests are more computationally efficient than a
memcmp().

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 43 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 40 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 75f3cbf922a30..c3d8fcf385238 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -45,6 +45,8 @@
 #include <linux/nfs_idmap.h>
 #include <linux/vfs.h>
 #include <linux/inet.h>
+#include <linux/in6.h>
+#include <net/ipv6.h>
 #include <linux/nfs_xdr.h>
 #include <linux/magic.h>
 #include <linux/parser.h>
@@ -1326,15 +1328,50 @@ static int nfs_set_super(struct super_block *s, void *data)
 	return ret;
 }
 
+static int nfs_compare_super_address(struct nfs_server *server1,
+				     struct nfs_server *server2)
+{
+	struct sockaddr *sap1, *sap2;
+
+	sap1 = (struct sockaddr *)&server1->nfs_client->cl_addr;
+	sap2 = (struct sockaddr *)&server2->nfs_client->cl_addr;
+
+	if (sap1->sa_family != sap2->sa_family)
+		return 0;
+
+	switch (sap1->sa_family) {
+	case AF_INET: {
+		struct sockaddr_in *sin1 = (struct sockaddr_in *)sap1;
+		struct sockaddr_in *sin2 = (struct sockaddr_in *)sap2;
+		if (sin1->sin_addr.s_addr != sin2->sin_addr.s_addr)
+			return 0;
+		if (sin1->sin_port != sin2->sin_port)
+			return 0;
+		break;
+	}
+	case AF_INET6: {
+		struct sockaddr_in6 *sin1 = (struct sockaddr_in6 *)sap1;
+		struct sockaddr_in6 *sin2 = (struct sockaddr_in6 *)sap2;
+		if (!ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr))
+			return 0;
+		if (sin1->sin6_port != sin2->sin6_port)
+			return 0;
+		break;
+	}
+	default:
+		return 0;
+	}
+
+	return 1;
+}
+
 static int nfs_compare_super(struct super_block *sb, void *data)
 {
 	struct nfs_sb_mountdata *sb_mntdata = data;
 	struct nfs_server *server = sb_mntdata->server, *old = NFS_SB(sb);
 	int mntflags = sb_mntdata->mntflags;
 
-	if (memcmp(&old->nfs_client->cl_addr,
-				&server->nfs_client->cl_addr,
-				sizeof(old->nfs_client->cl_addr)) != 0)
+	if (!nfs_compare_super_address(old, server))
 		return 0;
 	/* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */
 	if (old->flags & NFS_MOUNT_UNSHARED)

From cdcd7f9abc8c95524376835fbe8e11c5f7bf588e Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 10 Dec 2007 14:57:45 -0500
Subject: [PATCH 067/118] NFS: Verify IPv6 addresses properly

Add support to nfs_verify_server_address for recognizing AF_INET6
addresses.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index c3d8fcf385238..038b20b38b220 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -600,16 +600,21 @@ static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags)
 }
 
 /*
- * Sanity-check a server address provided by the mount command
+ * Sanity-check a server address provided by the mount command.
+ *
+ * Address family must be initialized, and address must not be
+ * the ANY address for that family.
  */
 static int nfs_verify_server_address(struct sockaddr *addr)
 {
 	switch (addr->sa_family) {
 	case AF_INET: {
-		struct sockaddr_in *sa = (struct sockaddr_in *) addr;
-		if (sa->sin_addr.s_addr != INADDR_ANY)
-			return 1;
-		break;
+		struct sockaddr_in *sa = (struct sockaddr_in *)addr;
+		return sa->sin_addr.s_addr != INADDR_ANY;
+	}
+	case AF_INET6: {
+		struct in6_addr *sa = &((struct sockaddr_in6 *)addr)->sin6_addr;
+		return !ipv6_addr_any(sa);
 	}
 	}
 

From 04dcd6e3aceedff9fcc96ce3014688d5b642d627 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 10 Dec 2007 14:57:53 -0500
Subject: [PATCH 068/118] NFS: Make setting a port number agostic

We'll need to set the port number of an AF_INET or AF_INET6 address in
several places in fs/nfs/super.c, so introduce a helper that can manage
this for us.  We put this helper to immediate use.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 038b20b38b220..ef1aad774e6c5 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -599,6 +599,25 @@ static void nfs_umount_begin(struct vfsmount *vfsmnt, int flags)
 		rpc_killall_tasks(rpc);
 }
 
+/*
+ * Set the port number in an address.  Be agnostic about the address family.
+ */
+static void nfs_set_port(struct sockaddr *sap, unsigned short port)
+{
+	switch (sap->sa_family) {
+	case AF_INET: {
+		struct sockaddr_in *ap = (struct sockaddr_in *)sap;
+		ap->sin_port = htons(port);
+		break;
+	}
+	case AF_INET6: {
+		struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap;
+		ap->sin6_port = htons(port);
+		break;
+	}
+	}
+}
+
 /*
  * Sanity-check a server address provided by the mount command.
  *
@@ -629,6 +648,7 @@ static int nfs_parse_mount_options(char *raw,
 				   struct nfs_parsed_mount_data *mnt)
 {
 	char *p, *string;
+	unsigned short port = 0;
 
 	if (!raw) {
 		dfprintk(MOUNT, "NFS: mount options string was NULL.\n");
@@ -731,7 +751,7 @@ static int nfs_parse_mount_options(char *raw,
 				return 0;
 			if (option < 0 || option > 65535)
 				return 0;
-			mnt->nfs_server.address.sin_port = htons(option);
+			port = option;
 			break;
 		case Opt_rsize:
 			if (match_int(args, &mnt->rsize))
@@ -973,6 +993,8 @@ static int nfs_parse_mount_options(char *raw,
 		}
 	}
 
+	nfs_set_port((struct sockaddr *)&mnt->nfs_server.address, port);
+
 	return 1;
 
 out_nomem:
@@ -1023,7 +1045,7 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
 	/*
 	 * autobind will be used if mount_server.port == 0
 	 */
-	sin.sin_port = htons(args->mount_server.port);
+	nfs_set_port((struct sockaddr *)&sin, args->mount_server.port);
 
 	/*
 	 * Now ask the mount server to map our export path

From 0d0f0c192df0282600c6d11c8cc252e7e7a80afc Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 10 Dec 2007 14:58:00 -0500
Subject: [PATCH 069/118] NFS: Set default port for NFSv4, with support for
 AF_INET6

Create a helper function to set the default NFS port for NFSv4 mount
points.  The helper supports both AF_INET and AF_INET6 family addresses.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ef1aad774e6c5..a88697ff19efc 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1595,6 +1595,28 @@ static void nfs4_fill_super(struct super_block *sb)
 	nfs_initialise_sb(sb);
 }
 
+/*
+ * If the user didn't specify a port, set the port number to
+ * the NFS version 4 default port.
+ */
+static void nfs4_default_port(struct sockaddr *sap)
+{
+	switch (sap->sa_family) {
+	case AF_INET: {
+		struct sockaddr_in *ap = (struct sockaddr_in *)sap;
+		if (ap->sin_port == 0)
+			ap->sin_port = htons(NFS_PORT);
+		break;
+	}
+	case AF_INET6: {
+		struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap;
+		if (ap->sin6_port == 0)
+			ap->sin6_port = htons(NFS_PORT);
+		break;
+	}
+	}
+}
+
 /*
  * Validate NFSv4 mount options
  */
@@ -1628,12 +1650,13 @@ static int nfs4_validate_mount_data(void *options,
 				   data->host_addr,
 				   sizeof(args->nfs_server.address)))
 			return -EFAULT;
-		if (args->nfs_server.address.sin_port == 0)
-			args->nfs_server.address.sin_port = htons(NFS_PORT);
 		if (!nfs_verify_server_address((struct sockaddr *)
 						&args->nfs_server.address))
 			goto out_no_address;
 
+		nfs4_default_port((struct sockaddr *)
+				  &args->nfs_server.address);
+
 		switch (data->auth_flavourlen) {
 		case 0:
 			args->auth_flavors[0] = RPC_AUTH_UNIX;
@@ -1687,12 +1710,13 @@ static int nfs4_validate_mount_data(void *options,
 		if (nfs_parse_mount_options((char *)options, args) == 0)
 			return -EINVAL;
 
-		if (args->nfs_server.address.sin_port == 0)
-			args->nfs_server.address.sin_port = htons(NFS_PORT);
 		if (!nfs_verify_server_address((struct sockaddr *)
 						&args->nfs_server.address))
 			return -EINVAL;
 
+		nfs4_default_port((struct sockaddr *)
+				  &args->nfs_server.address);
+
 		switch (args->auth_flavor_len) {
 		case 0:
 			args->auth_flavors[0] = RPC_AUTH_UNIX;

From 3b0d3f93d01bb013c3dcf9555d2d111c91ac6a1e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 3 Jan 2008 13:28:58 -0500
Subject: [PATCH 070/118] NFS: Add support for AF_INET6 addresses in
 __nfs_find_client()

Introduce AF_INET6-specific address checking to __nfs_find_client().

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c | 41 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 39 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 701cd193a014c..876162cddf1ee 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -34,6 +34,8 @@
 #include <linux/nfs_idmap.h>
 #include <linux/vfs.h>
 #include <linux/inet.h>
+#include <linux/in6.h>
+#include <net/ipv6.h>
 #include <linux/nfs_xdr.h>
 
 #include <asm/system.h>
@@ -208,16 +210,44 @@ void nfs_put_client(struct nfs_client *clp)
 	}
 }
 
+static int nfs_sockaddr_match_ipaddr4(const struct sockaddr_in *sa1,
+				 const struct sockaddr_in *sa2)
+{
+	return sa1->sin_addr.s_addr == sa2->sin_addr.s_addr;
+}
+
+static int nfs_sockaddr_match_ipaddr6(const struct sockaddr_in6 *sa1,
+				 const struct sockaddr_in6 *sa2)
+{
+	return ipv6_addr_equal(&sa1->sin6_addr, &sa2->sin6_addr);
+}
+
+static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
+				 const struct sockaddr *sa2)
+{
+	switch (sa1->sa_family) {
+	case AF_INET:
+		return nfs_sockaddr_match_ipaddr4((const struct sockaddr_in *)sa1,
+				(const struct sockaddr_in *)sa2);
+	case AF_INET6:
+		return nfs_sockaddr_match_ipaddr6((const struct sockaddr_in6 *)sa1,
+				(const struct sockaddr_in6 *)sa2);
+	}
+	BUG();
+}
+
 /*
  * Find a client by IP address and protocol version
  * - returns NULL if no such client
  */
-struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversion)
+struct nfs_client *_nfs_find_client(const struct sockaddr *addr, int nfsversion)
 {
 	struct nfs_client *clp;
 
 	spin_lock(&nfs_client_lock);
 	list_for_each_entry(clp, &nfs_client_list, cl_share_link) {
+		struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
+
 		/* Don't match clients that failed to initialise properly */
 		if (clp->cl_cons_state != NFS_CS_READY)
 			continue;
@@ -226,8 +256,10 @@ struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversio
 		if (clp->rpc_ops->version != nfsversion)
 			continue;
 
+		if (addr->sa_family != clap->sa_family)
+			continue;
 		/* Match only the IP address, not the port number */
-		if (clp->cl_addr.sin_addr.s_addr != addr->sin_addr.s_addr)
+		if (!nfs_sockaddr_match_ipaddr(addr, clap))
 			continue;
 
 		atomic_inc(&clp->cl_count);
@@ -238,6 +270,11 @@ struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversio
 	return NULL;
 }
 
+struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversion)
+{
+	return _nfs_find_client((const struct sockaddr *)addr, nfsversion);
+}
+
 /*
  * Find an nfs_client on the list that matches the initialisation data
  * that is supplied.

From 6e4cffd7b2cf86022dcf9cceeb63f16ff852caa1 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 10 Dec 2007 14:58:15 -0500
Subject: [PATCH 071/118] NFS: Expand server address storage in nfs_client
 struct

Prepare for managing larger addresses in the NFS client by widening the
nfs_client struct's cl_addr field.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: Aurelien Charbon <aurelien.charbon@ext.bull.net>

(Modified to work with the new parameters for nfs_alloc_client)
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           | 8 ++++++--
 include/linux/nfs_fs_sb.h | 3 ++-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 876162cddf1ee..44fe7fd7bfbf6 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -98,6 +98,7 @@ struct rpc_program		nfsacl_program = {
 struct nfs_client_initdata {
 	const char *hostname;
 	const struct sockaddr_in *addr;
+	size_t addrlen;
 	const struct nfs_rpc_ops *rpc_ops;
 };
 
@@ -125,7 +126,8 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
 	atomic_set(&clp->cl_count, 1);
 	clp->cl_cons_state = NFS_CS_INITING;
 
-	memcpy(&clp->cl_addr, cl_init->addr, sizeof(clp->cl_addr));
+	memcpy(&clp->cl_addr, cl_init->addr, cl_init->addrlen);
+	clp->cl_addrlen = cl_init->addrlen;
 
 	if (cl_init->hostname) {
 		clp->cl_hostname = kstrdup(cl_init->hostname, GFP_KERNEL);
@@ -425,7 +427,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp, int proto,
 	struct rpc_create_args args = {
 		.protocol	= proto,
 		.address	= (struct sockaddr *)&clp->cl_addr,
-		.addrsize	= sizeof(clp->cl_addr),
+		.addrsize	= clp->cl_addrlen,
 		.timeout	= &timeparms,
 		.servername	= clp->cl_hostname,
 		.program	= &nfs_program,
@@ -585,6 +587,7 @@ static int nfs_init_server(struct nfs_server *server,
 	struct nfs_client_initdata cl_init = {
 		.hostname = data->nfs_server.hostname,
 		.addr = &data->nfs_server.address,
+		.addrlen = sizeof(data->nfs_server.address),
 		.rpc_ops = &nfs_v2_clientops,
 	};
 	struct nfs_client *clp;
@@ -938,6 +941,7 @@ static int nfs4_set_client(struct nfs_server *server,
 	struct nfs_client_initdata cl_init = {
 		.hostname = hostname,
 		.addr = addr,
+		.addrlen = sizeof(*addr),
 		.rpc_ops = &nfs_v4_clientops,
 	};
 	struct nfs_client *clp;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 0b3dcba3d97d2..912cfe84ea860 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -21,7 +21,8 @@ struct nfs_client {
 #define NFS_CS_CALLBACK		1		/* - callback started */
 #define NFS_CS_IDMAP		2		/* - idmap started */
 #define NFS_CS_RENEWD		3		/* - renewd started */
-	struct sockaddr_in	cl_addr;	/* server identifier */
+	struct sockaddr_storage	cl_addr;	/* server identifier */
+	size_t			cl_addrlen;
 	char *			cl_hostname;	/* hostname of server */
 	struct list_head	cl_share_link;	/* link in global client list */
 	struct list_head	cl_superblocks;	/* List of nfs_server structs */

From 671beed7e28d9d27eef256862f6c1783a1da147e Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 10 Dec 2007 14:58:22 -0500
Subject: [PATCH 072/118] NFS: Change cb_getattrargs to pass "struct sockaddr
 *" instead of sockaddr_in

Change the addr field in the cb_getattrargs struct to a "struct sockaddr *"
to support non-IPv4 addresses.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: Aurelien Charbon <aurelien.charbon@ext.bull.net>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback.h      | 2 +-
 fs/nfs/callback_proc.c | 2 +-
 fs/nfs/callback_xdr.c  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index c2bb14e053e1c..ec0ffd9641c6c 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -38,7 +38,7 @@ struct cb_compound_hdr_res {
 };
 
 struct cb_getattrargs {
-	struct sockaddr_in *addr;
+	struct sockaddr *addr;
 	struct nfs_fh fh;
 	uint32_t bitmap[2];
 };
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index e89a9007c91cf..32f0df0a95726 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -25,7 +25,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *
 
 	res->bitmap[0] = res->bitmap[1] = 0;
 	res->status = htonl(NFS4ERR_BADHANDLE);
-	clp = nfs_find_client(args->addr, 4);
+	clp = nfs_find_client((struct sockaddr_in *)args->addr, 4);
 	if (clp == NULL)
 		goto out;
 
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 97abd829e4327..3eda1bc00ecc0 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -176,7 +176,7 @@ static __be32 decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr
 	status = decode_fh(xdr, &args->fh);
 	if (unlikely(status != 0))
 		goto out;
-	args->addr = svc_addr_in(rqstp);
+	args->addr = svc_addr(rqstp);
 	status = decode_bitmap(xdr, args->bitmap);
 out:
 	dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(status));

From c1d35866566bc2b270a82445271fcce1e391c4b9 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 10 Dec 2007 14:58:29 -0500
Subject: [PATCH 073/118] NFS: Change cb_recallargs to pass "struct sockaddr *"
 instead of sockaddr_in

Change the addr field in the cb_recallargs struct to a "struct sockaddr *"
to support non-IPv4 addresses.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: Aurelien Charbon <aurelien.charbon@ext.bull.net>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback.h      | 2 +-
 fs/nfs/callback_proc.c | 2 +-
 fs/nfs/callback_xdr.c  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index ec0ffd9641c6c..bb25d2135ff1e 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -53,7 +53,7 @@ struct cb_getattrres {
 };
 
 struct cb_recallargs {
-	struct sockaddr_in *addr;
+	struct sockaddr *addr;
 	struct nfs_fh fh;
 	nfs4_stateid stateid;
 	uint32_t truncate;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 32f0df0a95726..fa9586dcc3dde 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -68,7 +68,7 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
 	__be32 res;
 	
 	res = htonl(NFS4ERR_BADHANDLE);
-	clp = nfs_find_client(args->addr, 4);
+	clp = nfs_find_client((struct sockaddr_in *)args->addr, 4);
 	if (clp == NULL)
 		goto out;
 
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 3eda1bc00ecc0..c63eb720b68be 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -188,7 +188,7 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr,
 	__be32 *p;
 	__be32 status;
 
-	args->addr = svc_addr_in(rqstp);
+	args->addr = svc_addr(rqstp);
 	status = decode_stateid(xdr, &args->stateid);
 	if (unlikely(status != 0))
 		goto out;

From ff052645c939b2fd8d467105adf98fa621cc244b Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 10 Dec 2007 14:58:44 -0500
Subject: [PATCH 074/118] NFS: Change nfs_find_client() to take "struct
 sockaddr *"

Adjust arguments and callers of nfs_find_client() to pass a
"struct sockaddr *" instead of "struct sockaddr_in *" to support non-IPv4
addresses.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: Aurelien Charbon <aurelien.charbon@ext.bull.net>

Trond: Also fix up protocol version number argument in nfs_find_client() to
use the correct u32 type.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback.c      | 3 +--
 fs/nfs/callback_proc.c | 4 ++--
 fs/nfs/client.c        | 7 +------
 fs/nfs/internal.h      | 2 +-
 4 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index bbf67f148ff90..9b6bbf1b97879 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -164,12 +164,11 @@ void nfs_callback_down(void)
 
 static int nfs_callback_authenticate(struct svc_rqst *rqstp)
 {
-	struct sockaddr_in *addr = svc_addr_in(rqstp);
 	struct nfs_client *clp;
 	char buf[RPC_MAX_ADDRBUFLEN];
 
 	/* Don't talk to strangers */
-	clp = nfs_find_client(addr, 4);
+	clp = nfs_find_client(svc_addr(rqstp), 4);
 	if (clp == NULL)
 		return SVC_DROP;
 
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index fa9586dcc3dde..e89a9007c91cf 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -25,7 +25,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *
 
 	res->bitmap[0] = res->bitmap[1] = 0;
 	res->status = htonl(NFS4ERR_BADHANDLE);
-	clp = nfs_find_client((struct sockaddr_in *)args->addr, 4);
+	clp = nfs_find_client(args->addr, 4);
 	if (clp == NULL)
 		goto out;
 
@@ -68,7 +68,7 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
 	__be32 res;
 	
 	res = htonl(NFS4ERR_BADHANDLE);
-	clp = nfs_find_client((struct sockaddr_in *)args->addr, 4);
+	clp = nfs_find_client(args->addr, 4);
 	if (clp == NULL)
 		goto out;
 
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 44fe7fd7bfbf6..73bf4ecad0300 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -242,7 +242,7 @@ static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
  * Find a client by IP address and protocol version
  * - returns NULL if no such client
  */
-struct nfs_client *_nfs_find_client(const struct sockaddr *addr, int nfsversion)
+struct nfs_client *nfs_find_client(const struct sockaddr *addr, u32 nfsversion)
 {
 	struct nfs_client *clp;
 
@@ -272,11 +272,6 @@ struct nfs_client *_nfs_find_client(const struct sockaddr *addr, int nfsversion)
 	return NULL;
 }
 
-struct nfs_client *nfs_find_client(const struct sockaddr_in *addr, int nfsversion)
-{
-	return _nfs_find_client((const struct sockaddr *)addr, nfsversion);
-}
-
 /*
  * Find an nfs_client on the list that matches the initialisation data
  * that is supplied.
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 058d503a0ee13..c8458b1680182 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -60,7 +60,7 @@ struct nfs_parsed_mount_data {
 extern struct rpc_program nfs_program;
 
 extern void nfs_put_client(struct nfs_client *);
-extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int);
+extern struct nfs_client *nfs_find_client(const struct sockaddr *, u32);
 extern struct nfs_server *nfs_create_server(
 					const struct nfs_parsed_mount_data *,
 					struct nfs_fh *);

From d7422c472bbaa419876b91e8823c6219c4a144cb Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 10 Dec 2007 14:58:51 -0500
Subject: [PATCH 075/118] NFS: Change nfs_get_client() to take sockaddr *

Adjust arguments and callers of nfs_get_client() to pass a
"struct sockaddr *" instead of "struct sockaddr_in *" to support
non-IPv4 addresses.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: Aurelien Charbon <aurelien.charbon@ext.bull.net>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 73bf4ecad0300..e43072bdbb0ce 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -97,7 +97,7 @@ struct rpc_program		nfsacl_program = {
 
 struct nfs_client_initdata {
 	const char *hostname;
-	const struct sockaddr_in *addr;
+	const struct sockaddr *addr;
 	size_t addrlen;
 	const struct nfs_rpc_ops *rpc_ops;
 };
@@ -308,9 +308,8 @@ static struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_in
 	struct nfs_client *clp, *new = NULL;
 	int error;
 
-	dprintk("--> nfs_get_client(%s,"NIPQUAD_FMT":%d,%u)\n",
-		cl_init->hostname ?: "", NIPQUAD(cl_init->addr->sin_addr),
-		cl_init->addr->sin_port, cl_init->rpc_ops->version);
+	dprintk("--> nfs_get_client(%s,v%u)\n",
+		cl_init->hostname ?: "", cl_init->rpc_ops->version);
 
 	/* see if the client already exists */
 	do {
@@ -581,7 +580,7 @@ static int nfs_init_server(struct nfs_server *server,
 {
 	struct nfs_client_initdata cl_init = {
 		.hostname = data->nfs_server.hostname,
-		.addr = &data->nfs_server.address,
+		.addr = (const struct sockaddr *)&data->nfs_server.address,
 		.addrlen = sizeof(data->nfs_server.address),
 		.rpc_ops = &nfs_v2_clientops,
 	};
@@ -935,7 +934,7 @@ static int nfs4_set_client(struct nfs_server *server,
 {
 	struct nfs_client_initdata cl_init = {
 		.hostname = hostname,
-		.addr = addr,
+		.addr = (const struct sockaddr *)addr,
 		.addrlen = sizeof(*addr),
 		.rpc_ops = &nfs_v4_clientops,
 	};

From dcecae0ff44dceea7adb6bef5c8eb660fe87a93c Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 10 Dec 2007 14:58:59 -0500
Subject: [PATCH 076/118] NFS: Change nfs4_set_client() to accept struct
 sockaddr *

Adjust the arguments and callers of nfs4_set_client() to pass a "struct
sockaddr *" instead of a "struct sockaddr_in *" to support non-IPv4
addresses in the NFS client.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: Aurelien Charbon <aurelien.charbon@ext.bull.net>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index e43072bdbb0ce..11380601fc784 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -927,15 +927,17 @@ static int nfs4_init_client(struct nfs_client *clp,
  * Set up an NFS4 client
  */
 static int nfs4_set_client(struct nfs_server *server,
-		const char *hostname, const struct sockaddr_in *addr,
+		const char *hostname,
+		const struct sockaddr *addr,
+		const size_t addrlen,
 		const char *ip_addr,
 		rpc_authflavor_t authflavour,
 		int proto, int timeo, int retrans)
 {
 	struct nfs_client_initdata cl_init = {
 		.hostname = hostname,
-		.addr = (const struct sockaddr *)addr,
-		.addrlen = sizeof(*addr),
+		.addr = addr,
+		.addrlen = addrlen,
 		.rpc_ops = &nfs_v4_clientops,
 	};
 	struct nfs_client *clp;
@@ -1015,7 +1017,8 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
 	/* Get a client record */
 	error = nfs4_set_client(server,
 			data->nfs_server.hostname,
-			&data->nfs_server.address,
+			(struct sockaddr *)&data->nfs_server.address,
+			sizeof(data->nfs_server.address),
 			data->client_address,
 			data->auth_flavors[0],
 			data->nfs_server.protocol,
@@ -1090,12 +1093,14 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 
 	/* Get a client representation.
 	 * Note: NFSv4 always uses TCP, */
-	error = nfs4_set_client(server, data->hostname, data->addr,
-			parent_client->cl_ipaddr,
-			data->authflavor,
-			parent_server->client->cl_xprt->prot,
-			parent_client->retrans_timeo,
-			parent_client->retrans_count);
+	error = nfs4_set_client(server, data->hostname,
+				(struct sockaddr *)data->addr,
+				sizeof(*data->addr),
+				parent_client->cl_ipaddr,
+				data->authflavor,
+				parent_server->client->cl_xprt->prot,
+				parent_client->retrans_timeo,
+				parent_client->retrans_count);
 	if (error < 0)
 		goto error;
 

From 6677d09513e35ac2f38d3a8c8a26fbd7bbcef192 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 10 Dec 2007 14:59:06 -0500
Subject: [PATCH 077/118] NFS: Adjust nfs_clone_mount structure to store
 "struct sockaddr *"

Change the addr field in the nfs_clone_mount structure to store a "struct
sockaddr *" to support non-IPv4 addresses in the NFS client.

Note this is mostly a cosmetic change, and does not actually allow
referrals using IPv6 addresses.  The existing referral code assumes that
the server returns a string that represents an IPv4 address.  This code
needs to support hostnames and IPv6 addresses as well as IPv4 addresses,
thus it will need to be reorganized completely (to handle DNS resolution
in user space).

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: Aurelien Charbon <aurelien.charbon@ext.bull.net>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c        |  4 ++--
 fs/nfs/internal.h      |  3 ++-
 fs/nfs/nfs4namespace.c | 12 +++++++-----
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 11380601fc784..ba114faf195fd 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -1094,8 +1094,8 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 	/* Get a client representation.
 	 * Note: NFSv4 always uses TCP, */
 	error = nfs4_set_client(server, data->hostname,
-				(struct sockaddr *)data->addr,
-				sizeof(*data->addr),
+				data->addr,
+				data->addrlen,
 				parent_client->cl_ipaddr,
 				data->authflavor,
 				parent_server->client->cl_xprt->prot,
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index c8458b1680182..75dd4e252cae5 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -21,7 +21,8 @@ struct nfs_clone_mount {
 	struct nfs_fattr *fattr;
 	char *hostname;
 	char *mnt_path;
-	struct sockaddr_in *addr;
+	struct sockaddr *addr;
+	size_t addrlen;
 	rpc_authflavor_t authflavor;
 };
 
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index bd1b1617905d6..5f9ba41ed5bfa 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -172,7 +172,10 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
 
 		s = 0;
 		while (s < location->nservers) {
-			struct sockaddr_in addr = {};
+			struct sockaddr_in addr = {
+				.sin_family	= AF_INET,
+				.sin_port	= htons(NFS_PORT),
+			};
 
 			if (location->servers[s].len <= 0 ||
 			    valid_ipaddr4(location->servers[s].data) < 0) {
@@ -181,10 +184,9 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
 			}
 
 			mountdata.hostname = location->servers[s].data;
-			addr.sin_addr.s_addr = in_aton(mountdata.hostname);
-			addr.sin_family = AF_INET;
-			addr.sin_port = htons(NFS_PORT);
-			mountdata.addr = &addr;
+			addr.sin_addr.s_addr = in_aton(mountdata.hostname),
+			mountdata.addr = (struct sockaddr *)&addr;
+			mountdata.addrlen = sizeof(addr);
 
 			snprintf(page, PAGE_SIZE, "%s:%s",
 					mountdata.hostname,

From 338320345b40eb7c63592f40d25cbd58ccf99548 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 10 Dec 2007 14:59:13 -0500
Subject: [PATCH 078/118] NFS: Remove the NIPQUAD from nfs_try_mount

In the name of address family compatibility, we can't have the NIP_FMT and
NIPQUAD macros in nfs_try_mount().  Instead, we can make use of an unused
mount option to display the mount server's hostname.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: Aurelien Charbon <aurelien.charbon@ext.bull.net>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index a88697ff19efc..f120be43d5439 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -89,7 +89,7 @@ enum {
 	Opt_nfsvers,
 
 	/* Mount options that take string arguments */
-	Opt_sec, Opt_proto, Opt_mountproto,
+	Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost,
 	Opt_addr, Opt_mountaddr, Opt_clientaddr,
 
 	/* Mount options that are ignored */
@@ -148,7 +148,7 @@ static match_table_t nfs_mount_option_tokens = {
 	{ Opt_mountproto, "mountproto=%s" },
 	{ Opt_addr, "addr=%s" },
 	{ Opt_clientaddr, "clientaddr=%s" },
-	{ Opt_userspace, "mounthost=%s" },
+	{ Opt_mounthost, "mounthost=%s" },
 	{ Opt_mountaddr, "mountaddr=%s" },
 
 	{ Opt_err, NULL }
@@ -974,6 +974,12 @@ static int nfs_parse_mount_options(char *raw,
 				goto out_nomem;
 			mnt->client_address = string;
 			break;
+		case Opt_mounthost:
+			string = match_strdup(args);
+			if (string == NULL)
+				goto out_nomem;
+			mnt->mount_server.hostname = string;
+			break;
 		case Opt_mountaddr:
 			string = match_strdup(args);
 			if (string == NULL)
@@ -1027,6 +1033,7 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
 {
 	struct sockaddr_in sin;
 	int status;
+	char *hostname;
 
 	if (args->mount_server.version == 0) {
 		if (args->flags & NFS_MOUNT_VER3)
@@ -1035,6 +1042,11 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
 			args->mount_server.version = NFS_MNT_VERSION;
 	}
 
+	if (args->mount_server.hostname)
+		hostname = args->mount_server.hostname;
+	else
+		hostname = args->nfs_server.hostname;
+
 	/*
 	 * Construct the mount server's address.
 	 */
@@ -1053,7 +1065,7 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
 	 */
 	status = nfs_mount((struct sockaddr *) &sin,
 			   sizeof(sin),
-			   args->nfs_server.hostname,
+			   hostname,
 			   args->nfs_server.export_path,
 			   args->mount_server.version,
 			   args->mount_server.protocol,
@@ -1061,8 +1073,8 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
 	if (status == 0)
 		return 0;
 
-	dfprintk(MOUNT, "NFS: unable to mount server " NIPQUAD_FMT
-			", error %d\n", NIPQUAD(sin.sin_addr.s_addr), status);
+	dfprintk(MOUNT, "NFS: unable to mount server %s, error %d",
+			hostname, status);
 	return status;
 }
 
@@ -1468,6 +1480,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
 
 out:
 	kfree(data.nfs_server.hostname);
+	kfree(data.mount_server.hostname);
 	return error;
 
 out_err_nosb:

From 9412b92772c1d80ea8284583b6aad0260e13515f Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 10 Dec 2007 14:59:21 -0500
Subject: [PATCH 079/118] NFS: Refactor mount option address parsing into
 separate function

Refactor the logic to parse incoming text-based IP addresses.  Use the
in4_pton() function instead of the older in_aton(), following the lead
of the in-kernel CIFS client.

Later we'll add IPv6 address parsing using the matching in6_pton()
function.  For now we can't allow IPv6 address parsing: we must expand
the size of the address storage fields in the nfs_parsed_mount_options
struct before we can parse and store IPv6 addresses.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: Aurelien Charbon <aurelien.charbon@ext.bull.net>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index f120be43d5439..041fe9e9b74d8 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -640,6 +640,26 @@ static int nfs_verify_server_address(struct sockaddr *addr)
 	return 0;
 }
 
+/*
+ * Parse string addresses passed in via a mount option,
+ * and construct a sockaddr based on the result.
+ *
+ * If address parsing fails, set the sockaddr's address
+ * family to AF_UNSPEC to force nfs_verify_server_address()
+ * to punt the mount.
+ */
+static void nfs_parse_server_address(char *value,
+				     struct sockaddr *sap)
+{
+	struct sockaddr_in *ap = (void *)sap;
+
+	ap->sin_family = AF_INET;
+	if (in4_pton(value, -1, (u8 *)&ap->sin_addr.s_addr, '\0', NULL))
+		return;
+
+	sap->sa_family = AF_UNSPEC;
+}
+
 /*
  * Error-check and convert a string of mount options from user space into
  * a data structure
@@ -963,9 +983,8 @@ static int nfs_parse_mount_options(char *raw,
 			string = match_strdup(args);
 			if (string == NULL)
 				goto out_nomem;
-			mnt->nfs_server.address.sin_family = AF_INET;
-			mnt->nfs_server.address.sin_addr.s_addr =
-							in_aton(string);
+			nfs_parse_server_address(string, (struct sockaddr *)
+						 &mnt->nfs_server.address);
 			kfree(string);
 			break;
 		case Opt_clientaddr:
@@ -984,9 +1003,8 @@ static int nfs_parse_mount_options(char *raw,
 			string = match_strdup(args);
 			if (string == NULL)
 				goto out_nomem;
-			mnt->mount_server.address.sin_family = AF_INET;
-			mnt->mount_server.address.sin_addr.s_addr =
-							in_aton(string);
+			nfs_parse_server_address(string, (struct sockaddr *)
+						 &mnt->mount_server.address);
 			kfree(string);
 			break;
 

From 4c5680177012a2b5c0f3fdf58f4375dd84a1da67 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 10 Dec 2007 14:59:28 -0500
Subject: [PATCH 080/118] NFS: Support non-IPv4 addresses in
 nfs_parsed_mount_data

Replace the nfs_server and mount_server address fields in the
nfs_parsed_mount_data structure with a "struct sockaddr_storage"
instead of a "struct sockaddr_in".

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: Aurelien Charbon <aurelien.charbon@ext.bull.net>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c   |  4 ++--
 fs/nfs/internal.h |  6 ++++--
 fs/nfs/super.c    | 54 +++++++++++++++++++++++++++++------------------
 3 files changed, 40 insertions(+), 24 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index ba114faf195fd..906613362a568 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -581,7 +581,7 @@ static int nfs_init_server(struct nfs_server *server,
 	struct nfs_client_initdata cl_init = {
 		.hostname = data->nfs_server.hostname,
 		.addr = (const struct sockaddr *)&data->nfs_server.address,
-		.addrlen = sizeof(data->nfs_server.address),
+		.addrlen = data->nfs_server.addrlen,
 		.rpc_ops = &nfs_v2_clientops,
 	};
 	struct nfs_client *clp;
@@ -1018,7 +1018,7 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
 	error = nfs4_set_client(server,
 			data->nfs_server.hostname,
 			(struct sockaddr *)&data->nfs_server.address,
-			sizeof(data->nfs_server.address),
+			data->nfs_server.addrlen,
 			data->client_address,
 			data->auth_flavors[0],
 			data->nfs_server.protocol,
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 75dd4e252cae5..a806211990865 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -42,7 +42,8 @@ struct nfs_parsed_mount_data {
 	char			*client_address;
 
 	struct {
-		struct sockaddr_in	address;
+		struct sockaddr_storage	address;
+		size_t			addrlen;
 		char			*hostname;
 		unsigned int		version;
 		unsigned short		port;
@@ -50,7 +51,8 @@ struct nfs_parsed_mount_data {
 	} mount_server;
 
 	struct {
-		struct sockaddr_in	address;
+		struct sockaddr_storage	address;
+		size_t			addrlen;
 		char			*hostname;
 		char			*export_path;
 		int			protocol;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 041fe9e9b74d8..7efc6a34b56be 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -649,15 +649,18 @@ static int nfs_verify_server_address(struct sockaddr *addr)
  * to punt the mount.
  */
 static void nfs_parse_server_address(char *value,
-				     struct sockaddr *sap)
+				     struct sockaddr *sap,
+				     size_t *len)
 {
 	struct sockaddr_in *ap = (void *)sap;
 
 	ap->sin_family = AF_INET;
+	*len = sizeof(*ap);
 	if (in4_pton(value, -1, (u8 *)&ap->sin_addr.s_addr, '\0', NULL))
 		return;
 
 	sap->sa_family = AF_UNSPEC;
+	*len = 0;
 }
 
 /*
@@ -984,7 +987,8 @@ static int nfs_parse_mount_options(char *raw,
 			if (string == NULL)
 				goto out_nomem;
 			nfs_parse_server_address(string, (struct sockaddr *)
-						 &mnt->nfs_server.address);
+						 &mnt->nfs_server.address,
+						 &mnt->nfs_server.addrlen);
 			kfree(string);
 			break;
 		case Opt_clientaddr:
@@ -1004,7 +1008,8 @@ static int nfs_parse_mount_options(char *raw,
 			if (string == NULL)
 				goto out_nomem;
 			nfs_parse_server_address(string, (struct sockaddr *)
-						 &mnt->mount_server.address);
+						 &mnt->mount_server.address,
+						 &mnt->mount_server.addrlen);
 			kfree(string);
 			break;
 
@@ -1049,9 +1054,9 @@ static int nfs_parse_mount_options(char *raw,
 static int nfs_try_mount(struct nfs_parsed_mount_data *args,
 			 struct nfs_fh *root_fh)
 {
-	struct sockaddr_in sin;
-	int status;
+	struct sockaddr *sap = (struct sockaddr *)&args->mount_server.address;
 	char *hostname;
+	int status;
 
 	if (args->mount_server.version == 0) {
 		if (args->flags & NFS_MOUNT_VER3)
@@ -1068,21 +1073,23 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
 	/*
 	 * Construct the mount server's address.
 	 */
-	if (args->mount_server.address.sin_addr.s_addr != INADDR_ANY)
-		sin = args->mount_server.address;
-	else
-		sin = args->nfs_server.address;
+	if (args->mount_server.address.ss_family == AF_UNSPEC) {
+		memcpy(sap, &args->nfs_server.address,
+		       args->nfs_server.addrlen);
+		args->mount_server.addrlen = args->nfs_server.addrlen;
+	}
+
 	/*
 	 * autobind will be used if mount_server.port == 0
 	 */
-	nfs_set_port((struct sockaddr *)&sin, args->mount_server.port);
+	nfs_set_port(sap, args->mount_server.port);
 
 	/*
 	 * Now ask the mount server to map our export path
 	 * to a file handle.
 	 */
-	status = nfs_mount((struct sockaddr *) &sin,
-			   sizeof(sin),
+	status = nfs_mount(sap,
+			   args->mount_server.addrlen,
 			   hostname,
 			   args->nfs_server.export_path,
 			   args->mount_server.version,
@@ -1165,9 +1172,6 @@ static int nfs_validate_mount_data(void *options,
 			memset(mntfh->data + mntfh->size, 0,
 			       sizeof(mntfh->data) - mntfh->size);
 
-		if (!nfs_verify_server_address((struct sockaddr *) &data->addr))
-			goto out_no_address;
-
 		/*
 		 * Translate to nfs_parsed_mount_data, which nfs_fill_super
 		 * can deal with.
@@ -1182,7 +1186,14 @@ static int nfs_validate_mount_data(void *options,
 		args->acregmax		= data->acregmax;
 		args->acdirmin		= data->acdirmin;
 		args->acdirmax		= data->acdirmax;
-		args->nfs_server.address = data->addr;
+
+		memcpy(&args->nfs_server.address, &data->addr,
+		       sizeof(data->addr));
+		args->nfs_server.addrlen = sizeof(data->addr);
+		if (!nfs_verify_server_address((struct sockaddr *)
+						&args->nfs_server.address))
+			goto out_no_address;
+
 		if (!(data->flags & NFS_MOUNT_TCP))
 			args->nfs_server.protocol = XPRT_TRANSPORT_UDP;
 		/* N.B. caller will free nfs_server.hostname in all cases */
@@ -1655,6 +1666,7 @@ static int nfs4_validate_mount_data(void *options,
 				    struct nfs_parsed_mount_data *args,
 				    const char *dev_name)
 {
+	struct sockaddr_in *ap;
 	struct nfs4_mount_data *data = (struct nfs4_mount_data *)options;
 	char *c;
 
@@ -1675,11 +1687,13 @@ static int nfs4_validate_mount_data(void *options,
 
 	switch (data->version) {
 	case 1:
-		if (data->host_addrlen != sizeof(args->nfs_server.address))
+		ap = (struct sockaddr_in *)&args->nfs_server.address;
+		if (data->host_addrlen > sizeof(args->nfs_server.address))
+			goto out_no_address;
+		if (data->host_addrlen == 0)
 			goto out_no_address;
-		if (copy_from_user(&args->nfs_server.address,
-				   data->host_addr,
-				   sizeof(args->nfs_server.address)))
+		args->nfs_server.addrlen = data->host_addrlen;
+		if (copy_from_user(ap, data->host_addr, data->host_addrlen))
 			return -EFAULT;
 		if (!nfs_verify_server_address((struct sockaddr *)
 						&args->nfs_server.address))

From 3c7c7e4812e40e50a9ce9d687432ab5515cb3f2f Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 10 Dec 2007 14:59:35 -0500
Subject: [PATCH 081/118] NFS: Pull covers off IPv6 address parsing

Now that the needed IPv6 infrastructure is in place, allow the NFS client's
IP address parser to generate AF_INET6 addresses.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 7efc6a34b56be..3cbe32f3e88bc 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -652,12 +652,23 @@ static void nfs_parse_server_address(char *value,
 				     struct sockaddr *sap,
 				     size_t *len)
 {
-	struct sockaddr_in *ap = (void *)sap;
+	if (strchr(value, ':')) {
+		struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap;
+		u8 *addr = (u8 *)&ap->sin6_addr.in6_u;
 
-	ap->sin_family = AF_INET;
-	*len = sizeof(*ap);
-	if (in4_pton(value, -1, (u8 *)&ap->sin_addr.s_addr, '\0', NULL))
-		return;
+		ap->sin6_family = AF_INET6;
+		*len = sizeof(*ap);
+		if (in6_pton(value, -1, addr, '\0', NULL))
+			return;
+	} else {
+		struct sockaddr_in *ap = (struct sockaddr_in *)sap;
+		u8 *addr = (u8 *)&ap->sin_addr.s_addr;
+
+		ap->sin_family = AF_INET;
+		*len = sizeof(*ap);
+		if (in4_pton(value, -1, addr, '\0', NULL))
+			return;
+	}
 
 	sap->sa_family = AF_UNSPEC;
 	*len = 0;

From 69dd716c5ffd89f5ba14ffb871d633ecea74d13a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 14 Dec 2007 14:56:07 -0500
Subject: [PATCH 082/118] NFSv4: Add socket proto argument to setclientid

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 4 +++-
 include/linux/nfs_xdr.h | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 5e8c4cf7959e3..b3d4e8e5696a4 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2891,10 +2891,12 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short po
 
 	for(;;) {
 		setclientid.sc_name_len = scnprintf(setclientid.sc_name,
-				sizeof(setclientid.sc_name), "%s/%s %s %u",
+				sizeof(setclientid.sc_name), "%s/%s %s %s %u",
 				clp->cl_ipaddr,
 				rpc_peeraddr2str(clp->cl_rpcclient,
 							RPC_DISPLAY_ADDR),
+				rpc_peeraddr2str(clp->cl_rpcclient,
+							RPC_DISPLAY_PROTO),
 				cred->cr_ops->cr_name,
 				clp->cl_id_uniquifier);
 		setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index d8e395d0c7c95..d342ae5c76abe 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -666,7 +666,7 @@ struct nfs4_rename_res {
 	struct nfs_fattr *		new_fattr;
 };
 
-#define NFS4_SETCLIENTID_NAMELEN	(48)
+#define NFS4_SETCLIENTID_NAMELEN	(56)
 struct nfs4_setclientid {
 	const nfs4_verifier *		sc_verifier;
 	unsigned int			sc_name_len;

From 698b6d088e8a5d907596c689d5ae9109611c5b59 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 20 Dec 2007 16:03:53 -0500
Subject: [PATCH 083/118] SUNRPC: cleanup for rpc_new_client()

There is no reason why we shouldn't just pass the rpc_create_args.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 3af35ab0e99d5..6a6b96e8dcb27 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -122,8 +122,9 @@ rpc_setup_pipedir(struct rpc_clnt *clnt, char *dir_name)
 	}
 }
 
-static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, struct rpc_program *program, u32 vers, rpc_authflavor_t flavor)
+static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, struct rpc_xprt *xprt)
 {
+	struct rpc_program	*program = args->program;
 	struct rpc_version	*version;
 	struct rpc_clnt		*clnt = NULL;
 	struct rpc_auth		*auth;
@@ -132,13 +133,13 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
 
 	/* sanity check the name before trying to print it */
 	err = -EINVAL;
-	len = strlen(servname);
+	len = strlen(args->servername);
 	if (len > RPC_MAXNETNAMELEN)
 		goto out_no_rpciod;
 	len++;
 
 	dprintk("RPC:       creating %s client for %s (xprt %p)\n",
-			program->name, servname, xprt);
+			program->name, args->servername, xprt);
 
 	err = rpciod_up();
 	if (err)
@@ -146,7 +147,11 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
 	err = -EINVAL;
 	if (!xprt)
 		goto out_no_xprt;
-	if (vers >= program->nrvers || !(version = program->version[vers]))
+
+	if (args->version >= program->nrvers)
+		goto out_err;
+	version = program->version[args->version];
+	if (version == NULL)
 		goto out_err;
 
 	err = -ENOMEM;
@@ -158,12 +163,12 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
 	clnt->cl_server = clnt->cl_inline_name;
 	if (len > sizeof(clnt->cl_inline_name)) {
 		char *buf = kmalloc(len, GFP_KERNEL);
-		if (buf != 0)
+		if (buf != NULL)
 			clnt->cl_server = buf;
 		else
 			len = sizeof(clnt->cl_inline_name);
 	}
-	strlcpy(clnt->cl_server, servname, len);
+	strlcpy(clnt->cl_server, args->servername, len);
 
 	clnt->cl_xprt     = xprt;
 	clnt->cl_procinfo = version->procs;
@@ -192,10 +197,10 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
 	if (err < 0)
 		goto out_no_path;
 
-	auth = rpcauth_create(flavor, clnt);
+	auth = rpcauth_create(args->authflavor, clnt);
 	if (IS_ERR(auth)) {
 		printk(KERN_INFO "RPC: Couldn't create auth handle (flavor %u)\n",
-				flavor);
+				args->authflavor);
 		err = PTR_ERR(auth);
 		goto out_no_auth;
 	}
@@ -297,8 +302,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
 	if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT)
 		xprt->resvport = 0;
 
-	clnt = rpc_new_client(xprt, args->servername, args->program,
-				args->version, args->authflavor);
+	clnt = rpc_new_client(args, xprt);
 	if (IS_ERR(clnt))
 		return clnt;
 

From 2881ae74e68ecfe3b32a90936e5d93a9ba598c3a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 20 Dec 2007 16:03:54 -0500
Subject: [PATCH 084/118] SUNRPC: Clean up the transport timeout initialisation

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xprt.h     |  5 -----
 net/sunrpc/xprt.c               | 17 -----------------
 net/sunrpc/xprtrdma/transport.c |  7 ++++++-
 net/sunrpc/xprtsock.c           | 29 +++++++++++++++++++++--------
 4 files changed, 27 insertions(+), 31 deletions(-)

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 2554cd2b016fb..a00d4a4ba6ee8 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -202,11 +202,6 @@ struct xprt_class {
 	char			name[32];
 };
 
-/*
- * Transport operations used by ULPs
- */
-void			xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr);
-
 /*
  * Generic internal transport functions
  */
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 7520c6623c46a..2bc99569c5866 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -977,23 +977,6 @@ void xprt_release(struct rpc_task *task)
 	spin_unlock(&xprt->reserve_lock);
 }
 
-/**
- * xprt_set_timeout - set constant RPC timeout
- * @to: RPC timeout parameters to set up
- * @retr: number of retries
- * @incr: amount of increase after each retry
- *
- */
-void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long incr)
-{
-	to->to_initval   =
-	to->to_increment = incr;
-	to->to_maxval    = to->to_initval + (incr * retr);
-	to->to_retries   = retr;
-	to->to_exponential = 0;
-}
-EXPORT_SYMBOL_GPL(xprt_set_timeout);
-
 /**
  * xprt_create_transport - create an RPC transport
  * @args: rpc transport creation arguments
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 73033d824eed4..39f10016c86b8 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -289,6 +289,11 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
 	module_put(THIS_MODULE);
 }
 
+static const struct rpc_timeout xprt_rdma_default_timeout = {
+	.to_initval = 60 * HZ,
+	.to_maxval = 60 * HZ,
+};
+
 /**
  * xprt_setup_rdma - Set up transport to use RDMA
  *
@@ -327,7 +332,7 @@ xprt_setup_rdma(struct xprt_create *args)
 	}
 
 	/* 60 second timeout, no retries */
-	xprt_set_timeout(&xprt->timeout, 0, 60UL * HZ);
+	memcpy(&xprt->timeout, &xprt_rdma_default_timeout, sizeof(xprt->timeout));
 	xprt->bind_timeout = (60U * HZ);
 	xprt->connect_timeout = (60U * HZ);
 	xprt->reestablish_timeout = (5U * HZ);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index a4cfdc5b2648f..d7b07ac5b710c 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1895,6 +1895,13 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
 	return xprt;
 }
 
+static const struct rpc_timeout xs_udp_default_timeout = {
+	.to_initval = 5 * HZ,
+	.to_maxval = 30 * HZ,
+	.to_increment = 5 * HZ,
+	.to_retries = 5,
+};
+
 /**
  * xs_setup_udp - Set up transport to use a UDP socket
  * @args: rpc transport creation arguments
@@ -1905,6 +1912,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 	struct sockaddr *addr = args->dstaddr;
 	struct rpc_xprt *xprt;
 	struct sock_xprt *transport;
+	const struct rpc_timeout *timeo = &xs_udp_default_timeout;
 
 	xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries);
 	if (IS_ERR(xprt))
@@ -1923,10 +1931,9 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 
 	xprt->ops = &xs_udp_ops;
 
-	if (args->timeout)
-		xprt->timeout = *args->timeout;
-	else
-		xprt_set_timeout(&xprt->timeout, 5, 5 * HZ);
+	if (args->timeout != NULL)
+		timeo = args->timeout;
+	memcpy(&xprt->timeout, timeo, sizeof(xprt->timeout));
 
 	switch (addr->sa_family) {
 	case AF_INET:
@@ -1961,6 +1968,12 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 	return ERR_PTR(-EINVAL);
 }
 
+static const struct rpc_timeout xs_tcp_default_timeout = {
+	.to_initval = 60 * HZ,
+	.to_maxval = 60 * HZ,
+	.to_retries = 2,
+};
+
 /**
  * xs_setup_tcp - Set up transport to use a TCP socket
  * @args: rpc transport creation arguments
@@ -1971,6 +1984,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 	struct sockaddr *addr = args->dstaddr;
 	struct rpc_xprt *xprt;
 	struct sock_xprt *transport;
+	const struct rpc_timeout *timeo = &xs_tcp_default_timeout;
 
 	xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
 	if (IS_ERR(xprt))
@@ -1988,10 +2002,9 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 
 	xprt->ops = &xs_tcp_ops;
 
-	if (args->timeout)
-		xprt->timeout = *args->timeout;
-	else
-		xprt_set_timeout(&xprt->timeout, 2, 60 * HZ);
+	if (args->timeout != NULL)
+		timeo = args->timeout;
+	memcpy(&xprt->timeout, timeo, sizeof(xprt->timeout));
 
 	switch (addr->sa_family) {
 	case AF_INET:

From ba7392bb37cb12781890f45d7ddee1618e33a036 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 20 Dec 2007 16:03:55 -0500
Subject: [PATCH 085/118] SUNRPC: Add support for per-client timeout values

In order to be able to support setting the timeo and retrans parameters on
a per-mountpoint basis, we move the rpc_timeout structure into the
rpc_clnt.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h     |  4 +++-
 include/linux/sunrpc/xprt.h     |  3 +--
 net/sunrpc/clnt.c               | 12 +++++++++---
 net/sunrpc/xprt.c               | 11 ++++++-----
 net/sunrpc/xprtrdma/transport.c |  2 +-
 net/sunrpc/xprtsock.c           | 11 ++---------
 6 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 4fc268ab6f3e6..51a338189a239 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -46,6 +46,7 @@ struct rpc_clnt {
 				cl_autobind : 1;/* use getport() */
 
 	struct rpc_rtt *	cl_rtt;		/* RTO estimator data */
+	const struct rpc_timeout *cl_timeout;	/* Timeout strategy */
 
 	int			cl_nodelen;	/* nodename length */
 	char 			cl_nodename[UNX_MAXNODENAME];
@@ -54,6 +55,7 @@ struct rpc_clnt {
 	struct dentry *		cl_dentry;	/* inode */
 	struct rpc_clnt *	cl_parent;	/* Points to parent of clones */
 	struct rpc_rtt		cl_rtt_default;
+	struct rpc_timeout	cl_timeout_default;
 	struct rpc_program *	cl_program;
 	char			cl_inline_name[32];
 };
@@ -99,7 +101,7 @@ struct rpc_create_args {
 	struct sockaddr		*address;
 	size_t			addrsize;
 	struct sockaddr		*saddress;
-	struct rpc_timeout	*timeout;
+	const struct rpc_timeout *timeout;
 	char			*servername;
 	struct rpc_program	*program;
 	u32			version;
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index a00d4a4ba6ee8..c3364d88d83ed 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -120,7 +120,7 @@ struct rpc_xprt {
 	struct kref		kref;		/* Reference count */
 	struct rpc_xprt_ops *	ops;		/* transport methods */
 
-	struct rpc_timeout	timeout;	/* timeout parms */
+	const struct rpc_timeout *timeout;	/* timeout parms */
 	struct sockaddr_storage	addr;		/* server address */
 	size_t			addrlen;	/* size of server address */
 	int			prot;		/* IP protocol */
@@ -191,7 +191,6 @@ struct xprt_create {
 	struct sockaddr *	srcaddr;	/* optional local address */
 	struct sockaddr *	dstaddr;	/* remote peer address */
 	size_t			addrlen;
-	struct rpc_timeout *	timeout;	/* optional timeout parameters */
 };
 
 struct xprt_class {
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 6a6b96e8dcb27..a3c00da9ce215 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -188,8 +188,15 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru
 	if (!xprt_bound(clnt->cl_xprt))
 		clnt->cl_autobind = 1;
 
+	clnt->cl_timeout = xprt->timeout;
+	if (args->timeout != NULL) {
+		memcpy(&clnt->cl_timeout_default, args->timeout,
+				sizeof(clnt->cl_timeout_default));
+		clnt->cl_timeout = &clnt->cl_timeout_default;
+	}
+
 	clnt->cl_rtt = &clnt->cl_rtt_default;
-	rpc_init_rtt(&clnt->cl_rtt_default, xprt->timeout.to_initval);
+	rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval);
 
 	kref_init(&clnt->cl_kref);
 
@@ -251,7 +258,6 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
 		.srcaddr = args->saddress,
 		.dstaddr = args->address,
 		.addrlen = args->addrsize,
-		.timeout = args->timeout
 	};
 	char servername[48];
 
@@ -348,7 +354,7 @@ rpc_clone_client(struct rpc_clnt *clnt)
 	new->cl_autobind = 0;
 	INIT_LIST_HEAD(&new->cl_tasks);
 	spin_lock_init(&new->cl_lock);
-	rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
+	rpc_init_rtt(&new->cl_rtt_default, clnt->cl_timeout->to_initval);
 	new->cl_metrics = rpc_alloc_iostats(clnt);
 	if (new->cl_metrics == NULL)
 		goto out_no_stats;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 2bc99569c5866..cfcade906a56b 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -501,9 +501,10 @@ EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_def);
 void xprt_set_retrans_timeout_rtt(struct rpc_task *task)
 {
 	int timer = task->tk_msg.rpc_proc->p_timer;
-	struct rpc_rtt *rtt = task->tk_client->cl_rtt;
+	struct rpc_clnt *clnt = task->tk_client;
+	struct rpc_rtt *rtt = clnt->cl_rtt;
 	struct rpc_rqst *req = task->tk_rqstp;
-	unsigned long max_timeout = req->rq_xprt->timeout.to_maxval;
+	unsigned long max_timeout = clnt->cl_timeout->to_maxval;
 
 	task->tk_timeout = rpc_calc_rto(rtt, timer);
 	task->tk_timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries;
@@ -514,7 +515,7 @@ EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_rtt);
 
 static void xprt_reset_majortimeo(struct rpc_rqst *req)
 {
-	struct rpc_timeout *to = &req->rq_xprt->timeout;
+	const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
 
 	req->rq_majortimeo = req->rq_timeout;
 	if (to->to_exponential)
@@ -534,7 +535,7 @@ static void xprt_reset_majortimeo(struct rpc_rqst *req)
 int xprt_adjust_timeout(struct rpc_rqst *req)
 {
 	struct rpc_xprt *xprt = req->rq_xprt;
-	struct rpc_timeout *to = &xprt->timeout;
+	const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
 	int status = 0;
 
 	if (time_before(jiffies, req->rq_majortimeo)) {
@@ -928,7 +929,7 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
 {
 	struct rpc_rqst	*req = task->tk_rqstp;
 
-	req->rq_timeout = xprt->timeout.to_initval;
+	req->rq_timeout = task->tk_client->cl_timeout->to_initval;
 	req->rq_task	= task;
 	req->rq_xprt    = xprt;
 	req->rq_buffer  = NULL;
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 39f10016c86b8..d1389afc83422 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -332,7 +332,7 @@ xprt_setup_rdma(struct xprt_create *args)
 	}
 
 	/* 60 second timeout, no retries */
-	memcpy(&xprt->timeout, &xprt_rdma_default_timeout, sizeof(xprt->timeout));
+	xprt->timeout = &xprt_rdma_default_timeout;
 	xprt->bind_timeout = (60U * HZ);
 	xprt->connect_timeout = (60U * HZ);
 	xprt->reestablish_timeout = (5U * HZ);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index d7b07ac5b710c..6ba329d339a28 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1912,7 +1912,6 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 	struct sockaddr *addr = args->dstaddr;
 	struct rpc_xprt *xprt;
 	struct sock_xprt *transport;
-	const struct rpc_timeout *timeo = &xs_udp_default_timeout;
 
 	xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries);
 	if (IS_ERR(xprt))
@@ -1931,9 +1930,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 
 	xprt->ops = &xs_udp_ops;
 
-	if (args->timeout != NULL)
-		timeo = args->timeout;
-	memcpy(&xprt->timeout, timeo, sizeof(xprt->timeout));
+	xprt->timeout = &xs_udp_default_timeout;
 
 	switch (addr->sa_family) {
 	case AF_INET:
@@ -1984,7 +1981,6 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 	struct sockaddr *addr = args->dstaddr;
 	struct rpc_xprt *xprt;
 	struct sock_xprt *transport;
-	const struct rpc_timeout *timeo = &xs_tcp_default_timeout;
 
 	xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
 	if (IS_ERR(xprt))
@@ -2001,10 +1997,7 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 	xprt->idle_timeout = XS_IDLE_DISC_TO;
 
 	xprt->ops = &xs_tcp_ops;
-
-	if (args->timeout != NULL)
-		timeo = args->timeout;
-	memcpy(&xprt->timeout, timeo, sizeof(xprt->timeout));
+	xprt->timeout = &xs_tcp_default_timeout;
 
 	switch (addr->sa_family) {
 	case AF_INET:

From 7a3e3e18e40848b6f01d44407ce86b91b8535fbd Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 20 Dec 2007 16:03:57 -0500
Subject: [PATCH 086/118] NFS: Ensure that we respect NFS_MAX_TCP_TIMEOUT

It isn't sufficient just to limit timeout->to_initval, we also need to
limit to_maxval.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 906613362a568..59a6dccab548b 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -387,12 +387,16 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
 	switch (proto) {
 	case XPRT_TRANSPORT_TCP:
 	case XPRT_TRANSPORT_RDMA:
-		if (!to->to_initval)
+		if (to->to_initval == 0)
 			to->to_initval = 60 * HZ;
 		if (to->to_initval > NFS_MAX_TCP_TIMEOUT)
 			to->to_initval = NFS_MAX_TCP_TIMEOUT;
 		to->to_increment = to->to_initval;
 		to->to_maxval = to->to_initval + (to->to_increment * to->to_retries);
+		if (to->to_maxval > NFS_MAX_TCP_TIMEOUT)
+			to->to_maxval = NFS_MAX_TCP_TIMEOUT;
+		if (to->to_maxval < to->to_initval)
+			to->to_maxval = to->to_initval;
 		to->to_exponential = 0;
 		break;
 	case XPRT_TRANSPORT_UDP:

From 331702337f2b2e7cef40366ee207a25604df4671 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 20 Dec 2007 16:03:59 -0500
Subject: [PATCH 087/118] NFS: Support per-mountpoint timeout parameters.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           | 82 ++++++++++++++++++++++-----------------
 fs/nfs/super.c            |  4 +-
 include/linux/nfs_fs_sb.h |  2 -
 3 files changed, 49 insertions(+), 39 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 59a6dccab548b..03d9bed7849a4 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -415,18 +415,16 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
  * Create an RPC client handle
  */
 static int nfs_create_rpc_client(struct nfs_client *clp, int proto,
-						unsigned int timeo,
-						unsigned int retrans,
-						rpc_authflavor_t flavor,
-						int flags)
+				 const struct rpc_timeout *timeparms,
+				 rpc_authflavor_t flavor,
+				 int flags)
 {
-	struct rpc_timeout	timeparms;
 	struct rpc_clnt		*clnt = NULL;
 	struct rpc_create_args args = {
 		.protocol	= proto,
 		.address	= (struct sockaddr *)&clp->cl_addr,
 		.addrsize	= clp->cl_addrlen,
-		.timeout	= &timeparms,
+		.timeout	= timeparms,
 		.servername	= clp->cl_hostname,
 		.program	= &nfs_program,
 		.version	= clp->rpc_ops->version,
@@ -437,10 +435,6 @@ static int nfs_create_rpc_client(struct nfs_client *clp, int proto,
 	if (!IS_ERR(clp->cl_rpcclient))
 		return 0;
 
-	nfs_init_timeout_values(&timeparms, proto, timeo, retrans);
-	clp->retrans_timeo = timeparms.to_initval;
-	clp->retrans_count = timeparms.to_retries;
-
 	clnt = rpc_create(&args);
 	if (IS_ERR(clnt)) {
 		dprintk("%s: cannot create RPC client. Error = %ld\n",
@@ -515,7 +509,9 @@ static inline void nfs_init_server_aclclient(struct nfs_server *server)
 /*
  * Create a general RPC client
  */
-static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t pseudoflavour)
+static int nfs_init_server_rpcclient(struct nfs_server *server,
+		const struct rpc_timeout *timeo,
+		rpc_authflavor_t pseudoflavour)
 {
 	struct nfs_client *clp = server->nfs_client;
 
@@ -525,6 +521,11 @@ static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t
 		return PTR_ERR(server->client);
 	}
 
+	memcpy(&server->client->cl_timeout_default,
+			timeo,
+			sizeof(server->client->cl_timeout_default));
+	server->client->cl_timeout = &server->client->cl_timeout_default;
+
 	if (pseudoflavour != clp->cl_rpcclient->cl_auth->au_flavor) {
 		struct rpc_auth *auth;
 
@@ -549,6 +550,7 @@ static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t
  * Initialise an NFS2 or NFS3 client
  */
 static int nfs_init_client(struct nfs_client *clp,
+			   const struct rpc_timeout *timeparms,
 			   const struct nfs_parsed_mount_data *data)
 {
 	int error;
@@ -564,7 +566,7 @@ static int nfs_init_client(struct nfs_client *clp,
 	 * - RFC 2623, sec 2.3.2
 	 */
 	error = nfs_create_rpc_client(clp, data->nfs_server.protocol,
-				data->timeo, data->retrans, RPC_AUTH_UNIX, 0);
+				timeparms, RPC_AUTH_UNIX, 0);
 	if (error < 0)
 		goto error;
 	nfs_mark_client_ready(clp, NFS_CS_READY);
@@ -588,6 +590,7 @@ static int nfs_init_server(struct nfs_server *server,
 		.addrlen = data->nfs_server.addrlen,
 		.rpc_ops = &nfs_v2_clientops,
 	};
+	struct rpc_timeout timeparms;
 	struct nfs_client *clp;
 	int error;
 
@@ -605,7 +608,9 @@ static int nfs_init_server(struct nfs_server *server,
 		return PTR_ERR(clp);
 	}
 
-	error = nfs_init_client(clp, data);
+	nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
+			data->timeo, data->retrans);
+	error = nfs_init_client(clp, &timeparms, data);
 	if (error < 0)
 		goto error;
 
@@ -629,7 +634,7 @@ static int nfs_init_server(struct nfs_server *server,
 	if (error < 0)
 		goto error;
 
-	error = nfs_init_server_rpcclient(server, data->auth_flavors[0]);
+	error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]);
 	if (error < 0)
 		goto error;
 
@@ -889,7 +894,8 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
  * Initialise an NFS4 client record
  */
 static int nfs4_init_client(struct nfs_client *clp,
-		int proto, int timeo, int retrans,
+		int proto,
+		const struct rpc_timeout *timeparms,
 		const char *ip_addr,
 		rpc_authflavor_t authflavour)
 {
@@ -904,7 +910,7 @@ static int nfs4_init_client(struct nfs_client *clp,
 	/* Check NFS protocol revision and initialize RPC op vector */
 	clp->rpc_ops = &nfs_v4_clientops;
 
-	error = nfs_create_rpc_client(clp, proto, timeo, retrans, authflavour,
+	error = nfs_create_rpc_client(clp, proto, timeparms, authflavour,
 					RPC_CLNT_CREATE_DISCRTRY);
 	if (error < 0)
 		goto error;
@@ -936,7 +942,7 @@ static int nfs4_set_client(struct nfs_server *server,
 		const size_t addrlen,
 		const char *ip_addr,
 		rpc_authflavor_t authflavour,
-		int proto, int timeo, int retrans)
+		int proto, const struct rpc_timeout *timeparms)
 {
 	struct nfs_client_initdata cl_init = {
 		.hostname = hostname,
@@ -955,7 +961,7 @@ static int nfs4_set_client(struct nfs_server *server,
 		error = PTR_ERR(clp);
 		goto error;
 	}
-	error = nfs4_init_client(clp, proto, timeo, retrans, ip_addr, authflavour);
+	error = nfs4_init_client(clp, proto, timeparms, ip_addr, authflavour);
 	if (error < 0)
 		goto error_put;
 
@@ -976,10 +982,26 @@ static int nfs4_set_client(struct nfs_server *server,
 static int nfs4_init_server(struct nfs_server *server,
 		const struct nfs_parsed_mount_data *data)
 {
+	struct rpc_timeout timeparms;
 	int error;
 
 	dprintk("--> nfs4_init_server()\n");
 
+	nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
+			data->timeo, data->retrans);
+
+	/* Get a client record */
+	error = nfs4_set_client(server,
+			data->nfs_server.hostname,
+			(const struct sockaddr *)&data->nfs_server.address,
+			data->nfs_server.addrlen,
+			data->client_address,
+			data->auth_flavors[0],
+			data->nfs_server.protocol,
+			&timeparms);
+	if (error < 0)
+		goto error;
+
 	/* Initialise the client representation from the mount data */
 	server->flags = data->flags & NFS_MOUNT_FLAGMASK;
 	server->caps |= NFS_CAP_ATOMIC_OPEN;
@@ -994,8 +1016,9 @@ static int nfs4_init_server(struct nfs_server *server,
 	server->acdirmin = data->acdirmin * HZ;
 	server->acdirmax = data->acdirmax * HZ;
 
-	error = nfs_init_server_rpcclient(server, data->auth_flavors[0]);
+	error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]);
 
+error:
 	/* Done */
 	dprintk("<-- nfs4_init_server() = %d\n", error);
 	return error;
@@ -1018,18 +1041,6 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
 	if (!server)
 		return ERR_PTR(-ENOMEM);
 
-	/* Get a client record */
-	error = nfs4_set_client(server,
-			data->nfs_server.hostname,
-			(struct sockaddr *)&data->nfs_server.address,
-			data->nfs_server.addrlen,
-			data->client_address,
-			data->auth_flavors[0],
-			data->nfs_server.protocol,
-			data->timeo, data->retrans);
-	if (error < 0)
-		goto error;
-
 	/* set up the general RPC client */
 	error = nfs4_init_server(server, data);
 	if (error < 0)
@@ -1103,8 +1114,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 				parent_client->cl_ipaddr,
 				data->authflavor,
 				parent_server->client->cl_xprt->prot,
-				parent_client->retrans_timeo,
-				parent_client->retrans_count);
+				parent_server->client->cl_timeout);
 	if (error < 0)
 		goto error;
 
@@ -1112,7 +1122,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
 	nfs_server_copy_userdata(server, parent_server);
 	server->caps |= NFS_CAP_ATOMIC_OPEN;
 
-	error = nfs_init_server_rpcclient(server, data->authflavor);
+	error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor);
 	if (error < 0)
 		goto error;
 
@@ -1181,7 +1191,9 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
 
 	server->fsid = fattr->fsid;
 
-	error = nfs_init_server_rpcclient(server, source->client->cl_auth->au_flavor);
+	error = nfs_init_server_rpcclient(server,
+			source->client->cl_timeout,
+			source->client->cl_auth->au_flavor);
 	if (error < 0)
 		goto out_free_server;
 	if (!IS_ERR(source->client_acl))
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 3cbe32f3e88bc..0d1bc61d0b689 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -479,8 +479,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 	}
 	seq_printf(m, ",proto=%s",
 		   rpc_peeraddr2str(nfss->client, RPC_DISPLAY_PROTO));
-	seq_printf(m, ",timeo=%lu", 10U * clp->retrans_timeo / HZ);
-	seq_printf(m, ",retrans=%u", clp->retrans_count);
+	seq_printf(m, ",timeo=%lu", 10U * nfss->client->cl_timeout->to_initval / HZ);
+	seq_printf(m, ",retrans=%u", nfss->client->cl_timeout->to_retries);
 	seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor));
 }
 
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 912cfe84ea860..d15c9487b8f18 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -29,8 +29,6 @@ struct nfs_client {
 
 	struct rpc_clnt *	cl_rpcclient;
 	const struct nfs_rpc_ops *rpc_ops;	/* NFS protocol vector */
-	unsigned long		retrans_timeo;	/* retransmit timeout */
-	unsigned int		retrans_count;	/* number of retransmit tries */
 
 #ifdef CONFIG_NFS_V4
 	u64			cl_clientid;	/* constant */

From 59dca3b28cb915745019d4f4c27d97b6b6ab12c6 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 3 Jan 2008 16:29:06 -0500
Subject: [PATCH 088/118] NFS: Fix the 'proto=' mount option

Currently, if you have a server mounted using networking protocol, you
cannot specify a different value using the 'proto=' option on another
mountpoint.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           | 20 +++++++++++++-------
 include/linux/nfs_fs_sb.h |  1 +
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 03d9bed7849a4..18fcb05a07075 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -100,6 +100,7 @@ struct nfs_client_initdata {
 	const struct sockaddr *addr;
 	size_t addrlen;
 	const struct nfs_rpc_ops *rpc_ops;
+	int proto;
 };
 
 /*
@@ -138,6 +139,8 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
 	INIT_LIST_HEAD(&clp->cl_superblocks);
 	clp->cl_rpcclient = ERR_PTR(-EINVAL);
 
+	clp->cl_proto = cl_init->proto;
+
 #ifdef CONFIG_NFS_V4
 	init_rwsem(&clp->cl_sem);
 	INIT_LIST_HEAD(&clp->cl_delegations);
@@ -289,6 +292,9 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
 		if (clp->rpc_ops != data->rpc_ops)
 			continue;
 
+		if (clp->cl_proto != data->proto)
+			continue;
+
 		/* Match the full socket address */
 		if (memcmp(&clp->cl_addr, data->addr, sizeof(clp->cl_addr)) != 0)
 			continue;
@@ -414,14 +420,14 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
 /*
  * Create an RPC client handle
  */
-static int nfs_create_rpc_client(struct nfs_client *clp, int proto,
+static int nfs_create_rpc_client(struct nfs_client *clp,
 				 const struct rpc_timeout *timeparms,
 				 rpc_authflavor_t flavor,
 				 int flags)
 {
 	struct rpc_clnt		*clnt = NULL;
 	struct rpc_create_args args = {
-		.protocol	= proto,
+		.protocol	= clp->cl_proto,
 		.address	= (struct sockaddr *)&clp->cl_addr,
 		.addrsize	= clp->cl_addrlen,
 		.timeout	= timeparms,
@@ -565,8 +571,7 @@ static int nfs_init_client(struct nfs_client *clp,
 	 * Create a client RPC handle for doing FSSTAT with UNIX auth only
 	 * - RFC 2623, sec 2.3.2
 	 */
-	error = nfs_create_rpc_client(clp, data->nfs_server.protocol,
-				timeparms, RPC_AUTH_UNIX, 0);
+	error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX, 0);
 	if (error < 0)
 		goto error;
 	nfs_mark_client_ready(clp, NFS_CS_READY);
@@ -589,6 +594,7 @@ static int nfs_init_server(struct nfs_server *server,
 		.addr = (const struct sockaddr *)&data->nfs_server.address,
 		.addrlen = data->nfs_server.addrlen,
 		.rpc_ops = &nfs_v2_clientops,
+		.proto = data->nfs_server.protocol,
 	};
 	struct rpc_timeout timeparms;
 	struct nfs_client *clp;
@@ -894,7 +900,6 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
  * Initialise an NFS4 client record
  */
 static int nfs4_init_client(struct nfs_client *clp,
-		int proto,
 		const struct rpc_timeout *timeparms,
 		const char *ip_addr,
 		rpc_authflavor_t authflavour)
@@ -910,7 +915,7 @@ static int nfs4_init_client(struct nfs_client *clp,
 	/* Check NFS protocol revision and initialize RPC op vector */
 	clp->rpc_ops = &nfs_v4_clientops;
 
-	error = nfs_create_rpc_client(clp, proto, timeparms, authflavour,
+	error = nfs_create_rpc_client(clp, timeparms, authflavour,
 					RPC_CLNT_CREATE_DISCRTRY);
 	if (error < 0)
 		goto error;
@@ -949,6 +954,7 @@ static int nfs4_set_client(struct nfs_server *server,
 		.addr = addr,
 		.addrlen = addrlen,
 		.rpc_ops = &nfs_v4_clientops,
+		.proto = proto,
 	};
 	struct nfs_client *clp;
 	int error;
@@ -961,7 +967,7 @@ static int nfs4_set_client(struct nfs_server *server,
 		error = PTR_ERR(clp);
 		goto error;
 	}
-	error = nfs4_init_client(clp, proto, timeparms, ip_addr, authflavour);
+	error = nfs4_init_client(clp, timeparms, ip_addr, authflavour);
 	if (error < 0)
 		goto error_put;
 
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index d15c9487b8f18..b5ba5f79485d2 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -29,6 +29,7 @@ struct nfs_client {
 
 	struct rpc_clnt *	cl_rpcclient;
 	const struct nfs_rpc_ops *rpc_ops;	/* NFS protocol vector */
+	int			cl_proto;	/* Network transport protocol */
 
 #ifdef CONFIG_NFS_V4
 	u64			cl_clientid;	/* constant */

From 7df089952fca41cb336733e1167c0a25e5a025d8 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 20 Dec 2007 14:54:27 -0500
Subject: [PATCH 089/118] SUNRPC: Fix use of copy_to_user() in
 gss_pipe_upcall()

The gss_pipe_upcall() function expects the copy_to_user() function to
return a negative error value if the call fails, but copy_to_user()
returns an unsigned long number of bytes that couldn't be copied.

Can rpc_pipefs actually retry a partially completed upcall read?  If
not, then gss_pipe_upcall() should punt any partial read, just like the
upcall logic in net/sunrpc/cache.c.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/auth_gss/auth_gss.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 1f2d85e869c0a..6dac387922888 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -472,16 +472,15 @@ gss_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
 		char __user *dst, size_t buflen)
 {
 	char *data = (char *)msg->data + msg->copied;
-	ssize_t mlen = msg->len;
-	ssize_t left;
+	size_t mlen = min(msg->len, buflen);
+	unsigned long left;
 
-	if (mlen > buflen)
-		mlen = buflen;
 	left = copy_to_user(dst, data, mlen);
-	if (left < 0) {
-		msg->errno = left;
-		return left;
+	if (left == mlen) {
+		msg->errno = -EFAULT;
+		return -EFAULT;
 	}
+
 	mlen -= left;
 	msg->copied += mlen;
 	msg->errno = 0;

From 369af0f1166f7a637751110395496cee156b4297 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 20 Dec 2007 14:54:35 -0500
Subject: [PATCH 090/118] NFS: Clean up fs/nfs/idmap.c

Clean up white space damage and use standard kernel coding conventions for
return statements.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/idmap.c | 87 +++++++++++++++++++++++++-------------------------
 1 file changed, 44 insertions(+), 43 deletions(-)

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index d11eb055265cb..c56fc7d5a46e2 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -72,39 +72,39 @@ module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
 		 &nfs_idmap_cache_timeout, 0644);
 
 struct idmap_hashent {
-	unsigned long ih_expires;
-	__u32 ih_id;
-	int ih_namelen;
-	char ih_name[IDMAP_NAMESZ];
+	unsigned long		ih_expires;
+	__u32			ih_id;
+	int			ih_namelen;
+	char			ih_name[IDMAP_NAMESZ];
 };
 
 struct idmap_hashtable {
-	__u8 h_type;
-	struct idmap_hashent h_entries[IDMAP_HASH_SZ];
+	__u8			h_type;
+	struct idmap_hashent	h_entries[IDMAP_HASH_SZ];
 };
 
 struct idmap {
-	struct dentry        *idmap_dentry;
-	wait_queue_head_t     idmap_wq;
-	struct idmap_msg      idmap_im;
-	struct mutex          idmap_lock;    /* Serializes upcalls */
-	struct mutex          idmap_im_lock; /* Protects the hashtable */
-	struct idmap_hashtable idmap_user_hash;
-	struct idmap_hashtable idmap_group_hash;
+	struct dentry		*idmap_dentry;
+	wait_queue_head_t	idmap_wq;
+	struct idmap_msg	idmap_im;
+	struct mutex		idmap_lock;	/* Serializes upcalls */
+	struct mutex		idmap_im_lock;	/* Protects the hashtable */
+	struct idmap_hashtable	idmap_user_hash;
+	struct idmap_hashtable	idmap_group_hash;
 };
 
-static ssize_t   idmap_pipe_upcall(struct file *, struct rpc_pipe_msg *,
-		     char __user *, size_t);
-static ssize_t   idmap_pipe_downcall(struct file *, const char __user *,
-		     size_t);
-static void      idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
+static ssize_t idmap_pipe_upcall(struct file *, struct rpc_pipe_msg *,
+				 char __user *, size_t);
+static ssize_t idmap_pipe_downcall(struct file *, const char __user *,
+				   size_t);
+static void idmap_pipe_destroy_msg(struct rpc_pipe_msg *);
 
 static unsigned int fnvhash32(const void *, size_t);
 
 static struct rpc_pipe_ops idmap_upcall_ops = {
-        .upcall         = idmap_pipe_upcall,
-        .downcall       = idmap_pipe_downcall,
-        .destroy_msg    = idmap_pipe_destroy_msg,
+	.upcall		= idmap_pipe_upcall,
+	.downcall	= idmap_pipe_downcall,
+	.destroy_msg	= idmap_pipe_destroy_msg,
 };
 
 int
@@ -115,19 +115,20 @@ nfs_idmap_new(struct nfs_client *clp)
 
 	BUG_ON(clp->cl_idmap != NULL);
 
-        if ((idmap = kzalloc(sizeof(*idmap), GFP_KERNEL)) == NULL)
-                return -ENOMEM;
+	idmap = kzalloc(sizeof(*idmap), GFP_KERNEL);
+	if (idmap == NULL)
+		return -ENOMEM;
 
-        idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry, "idmap",
-	    idmap, &idmap_upcall_ops, 0);
-        if (IS_ERR(idmap->idmap_dentry)) {
+	idmap->idmap_dentry = rpc_mkpipe(clp->cl_rpcclient->cl_dentry, "idmap",
+					 idmap, &idmap_upcall_ops, 0);
+	if (IS_ERR(idmap->idmap_dentry)) {
 		error = PTR_ERR(idmap->idmap_dentry);
 		kfree(idmap);
 		return error;
 	}
 
-        mutex_init(&idmap->idmap_lock);
-        mutex_init(&idmap->idmap_im_lock);
+	mutex_init(&idmap->idmap_lock);
+	mutex_init(&idmap->idmap_im_lock);
 	init_waitqueue_head(&idmap->idmap_wq);
 	idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER;
 	idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP;
@@ -285,7 +286,7 @@ nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h,
 	memset(im, 0, sizeof(*im));
 	mutex_unlock(&idmap->idmap_im_lock);
 	mutex_unlock(&idmap->idmap_lock);
-	return (ret);
+	return ret;
 }
 
 /*
@@ -354,16 +355,16 @@ nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h,
 /* RPC pipefs upcall/downcall routines */
 static ssize_t
 idmap_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
-    char __user *dst, size_t buflen)
+		  char __user *dst, size_t buflen)
 {
-        char *data = (char *)msg->data + msg->copied;
-        ssize_t mlen = msg->len - msg->copied;
-        ssize_t left;
+	char *data = (char *)msg->data + msg->copied;
+	ssize_t mlen = msg->len - msg->copied;
+	ssize_t left;
 
-        if (mlen > buflen)
-                mlen = buflen;
+	if (mlen > buflen)
+		mlen = buflen;
 
-        left = copy_to_user(dst, data, mlen);
+	left = copy_to_user(dst, data, mlen);
 	if (left < 0) {
 		msg->errno = left;
 		return left;
@@ -371,13 +372,13 @@ idmap_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
 	mlen -= left;
 	msg->copied += mlen;
 	msg->errno = 0;
-        return mlen;
+	return mlen;
 }
 
 static ssize_t
 idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 {
-        struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode);
+	struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode);
 	struct idmap *idmap = (struct idmap *)rpci->private;
 	struct idmap_msg im_in, *im = &idmap->idmap_im;
 	struct idmap_hashtable *h;
@@ -385,11 +386,11 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 	int namelen_in;
 	int ret;
 
-        if (mlen != sizeof(im_in))
-                return (-ENOSPC);
+	if (mlen != sizeof(im_in))
+		return -ENOSPC;
 
-        if (copy_from_user(&im_in, src, mlen) != 0)
-		return (-EFAULT);
+	if (copy_from_user(&im_in, src, mlen) != 0)
+		return -EFAULT;
 
 	mutex_lock(&idmap->idmap_im_lock);
 
@@ -487,7 +488,7 @@ static unsigned int fnvhash32(const void *buf, size_t buflen)
 		hash ^= (unsigned int)*p;
 	}
 
-	return (hash);
+	return hash;
 }
 
 int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid)

From a661b77fc12a172edea4b709e37f8cd58a6bd500 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 20 Dec 2007 14:54:42 -0500
Subject: [PATCH 091/118] NFS: Fix use of copy_to_user() in idmap_pipe_upcall

The idmap_pipe_upcall() function expects the copy_to_user() function to
return a negative error value if the call fails, but copy_to_user()
returns an unsigned long number of bytes that couldn't be copied.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/idmap.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index c56fc7d5a46e2..d93e071b900c8 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -358,17 +358,15 @@ idmap_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg,
 		  char __user *dst, size_t buflen)
 {
 	char *data = (char *)msg->data + msg->copied;
-	ssize_t mlen = msg->len - msg->copied;
-	ssize_t left;
-
-	if (mlen > buflen)
-		mlen = buflen;
+	size_t mlen = min(msg->len, buflen);
+	unsigned long left;
 
 	left = copy_to_user(dst, data, mlen);
-	if (left < 0) {
-		msg->errno = left;
-		return left;
+	if (left == mlen) {
+		msg->errno = -EFAULT;
+		return -EFAULT;
 	}
+
 	mlen -= left;
 	msg->copied += mlen;
 	msg->errno = 0;

From d24aae41b4d4141d4f3cffdbf4c31d85637ba691 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 20 Dec 2007 14:54:49 -0500
Subject: [PATCH 092/118] NFS: Use size_t for storing name lengths

Clean up: always use the same type when handling buffer lengths.  As a
bonus, this prevents a mixed sign comparison in idmap_lookup_name.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/idmap.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index d93e071b900c8..8ae5dba2d4e59 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -74,7 +74,7 @@ module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
 struct idmap_hashent {
 	unsigned long		ih_expires;
 	__u32			ih_id;
-	int			ih_namelen;
+	size_t			ih_namelen;
 	char			ih_name[IDMAP_NAMESZ];
 };
 
@@ -193,7 +193,7 @@ idmap_lookup_id(struct idmap_hashtable *h, __u32 id)
  * pretty trivial.
  */
 static inline struct idmap_hashent *
-idmap_alloc_name(struct idmap_hashtable *h, char *name, unsigned len)
+idmap_alloc_name(struct idmap_hashtable *h, char *name, size_t len)
 {
 	return idmap_name_hash(h, name, len);
 }
@@ -381,7 +381,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 	struct idmap_msg im_in, *im = &idmap->idmap_im;
 	struct idmap_hashtable *h;
 	struct idmap_hashent *he = NULL;
-	int namelen_in;
+	size_t namelen_in;
 	int ret;
 
 	if (mlen != sizeof(im_in))

From bf4285e75c3272ad9bfdeb886d247962bb2985f8 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 20 Dec 2007 14:54:57 -0500
Subject: [PATCH 093/118] NFS: Fix minor mixed sign comparison in NFS client's
 write logic

Clean up: PAGE_CACHE_SIZE is unsigned, and nfs_pageio_init() takes a size_t.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index fbd64f2fa7f9d..5ac5b27b639a8 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -939,7 +939,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
 static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
 				  struct inode *inode, int ioflags)
 {
-	int wsize = NFS_SERVER(inode)->wsize;
+	size_t wsize = NFS_SERVER(inode)->wsize;
 
 	if (wsize < PAGE_CACHE_SIZE)
 		nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);

From 3d509e5454a0a5ac88bf3191ab65d85952c1de31 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 20 Dec 2007 14:55:04 -0500
Subject: [PATCH 094/118] NFS: nfs_write_end clean up

Clean up: commit 4899f9c8 added nfs_write_end(), which introduces a
conditional expression that returns an unsigned integer in one arm and
a signed integer in the other.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/file.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 4560fc2ddb4af..ef57a5ae59046 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -349,7 +349,9 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
 	unlock_page(page);
 	page_cache_release(page);
 
-	return status < 0 ? status : copied;
+	if (status < 0)
+		return status;
+	return copied;
 }
 
 static void nfs_invalidate_page(struct page *page, unsigned long offset)

From cab6fc1b77c3ec4471d7d54ff6db9ad2dd59c2f5 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Thu, 20 Dec 2007 14:55:11 -0500
Subject: [PATCH 095/118] lockd: Eliminate harmless mixed sign comparison in
 nlmdbg_cookie2a()

The cookie->len field is unsigned, so the loop index variable in
nlmdbg_cookie2a() should also be unsigned.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/xdr.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 633653bff9440..3e459e18cc31b 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -612,8 +612,7 @@ const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
 	 * called with BKL held.
 	 */
 	static char buf[2*NLM_MAXCOOKIELEN+1];
-	int i;
-	int len = sizeof(buf);
+	unsigned int i, len = sizeof(buf);
 	char *p = buf;
 
 	len--;	/* allow for trailing \0 */

From b454ae906085cf7774fb4756746680c9b03b6f84 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 7 Jan 2008 18:34:48 -0500
Subject: [PATCH 096/118] SUNRPC: fewer conditionals in the format_ip_address
 routines

Clean up: have the set up routines explicitly pass the strings to be used
for the transport name and NETID.  This removes a number of conditionals
and dependencies on rpc_xprt.prot, which is overloaded.

Tighten up type checking on the address_strings array while we're at it.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h |  2 +-
 include/linux/sunrpc/xprt.h |  2 +-
 net/sunrpc/clnt.c           |  3 +-
 net/sunrpc/rpcb_clnt.c      |  2 +-
 net/sunrpc/xprtsock.c       | 56 +++++++++++++++----------------------
 5 files changed, 27 insertions(+), 38 deletions(-)

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 51a338189a239..5a13da2573b05 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -143,7 +143,7 @@ void		rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int);
 size_t		rpc_max_payload(struct rpc_clnt *);
 void		rpc_force_rebind(struct rpc_clnt *);
 size_t		rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t);
-char *		rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
+const char	*rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
 
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_CLNT_H */
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index c3364d88d83ed..b3ff9a815e6fe 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -183,7 +183,7 @@ struct rpc_xprt {
 					bklog_u;	/* backlog queue utilization */
 	} stat;
 
-	char *			address_strings[RPC_DISPLAY_MAX];
+	const char		*address_strings[RPC_DISPLAY_MAX];
 };
 
 struct xprt_create {
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index a3c00da9ce215..e775ca793249a 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -679,7 +679,8 @@ EXPORT_SYMBOL_GPL(rpc_peeraddr);
  * @format: address format
  *
  */
-char *rpc_peeraddr2str(struct rpc_clnt *clnt, enum rpc_display_format_t format)
+const char *rpc_peeraddr2str(struct rpc_clnt *clnt,
+			     enum rpc_display_format_t format)
 {
 	struct rpc_xprt *xprt = clnt->cl_xprt;
 
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index f494e58910ec0..b60fa92321a56 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -358,7 +358,7 @@ void rpcb_getport_async(struct rpc_task *task)
 	map->r_prot = xprt->prot;
 	map->r_port = 0;
 	map->r_xprt = xprt_get(xprt);
-	map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID);
+	map->r_netid = (char *)rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID);
 	memcpy(map->r_addr,
 	       rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR),
 	       sizeof(map->r_addr));
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 6ba329d339a28..b9b94f49c6202 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -280,7 +280,9 @@ static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt)
 	return (struct sockaddr_in6 *) &xprt->addr;
 }
 
-static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt)
+static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt,
+					  const char *protocol,
+					  const char *netid)
 {
 	struct sockaddr_in *addr = xs_addr_in(xprt);
 	char *buf;
@@ -299,21 +301,14 @@ static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt)
 	}
 	xprt->address_strings[RPC_DISPLAY_PORT] = buf;
 
-	buf = kzalloc(8, GFP_KERNEL);
-	if (buf) {
-		if (xprt->prot == IPPROTO_UDP)
-			snprintf(buf, 8, "udp");
-		else
-			snprintf(buf, 8, "tcp");
-	}
-	xprt->address_strings[RPC_DISPLAY_PROTO] = buf;
+	xprt->address_strings[RPC_DISPLAY_PROTO] = protocol;
 
 	buf = kzalloc(48, GFP_KERNEL);
 	if (buf) {
 		snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s",
 			NIPQUAD(addr->sin_addr.s_addr),
 			ntohs(addr->sin_port),
-			xprt->prot == IPPROTO_UDP ? "udp" : "tcp");
+			protocol);
 	}
 	xprt->address_strings[RPC_DISPLAY_ALL] = buf;
 
@@ -340,12 +335,12 @@ static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt)
 	}
 	xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
 
-	xprt->address_strings[RPC_DISPLAY_NETID] =
-		kstrdup(xprt->prot == IPPROTO_UDP ?
-			RPCBIND_NETID_UDP : RPCBIND_NETID_TCP, GFP_KERNEL);
+	xprt->address_strings[RPC_DISPLAY_NETID] = netid;
 }
 
-static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt)
+static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt,
+					  const char *protocol,
+					  const char *netid)
 {
 	struct sockaddr_in6 *addr = xs_addr_in6(xprt);
 	char *buf;
@@ -364,21 +359,14 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt)
 	}
 	xprt->address_strings[RPC_DISPLAY_PORT] = buf;
 
-	buf = kzalloc(8, GFP_KERNEL);
-	if (buf) {
-		if (xprt->prot == IPPROTO_UDP)
-			snprintf(buf, 8, "udp");
-		else
-			snprintf(buf, 8, "tcp");
-	}
-	xprt->address_strings[RPC_DISPLAY_PROTO] = buf;
+	xprt->address_strings[RPC_DISPLAY_PROTO] = protocol;
 
 	buf = kzalloc(64, GFP_KERNEL);
 	if (buf) {
 		snprintf(buf, 64, "addr="NIP6_FMT" port=%u proto=%s",
 				NIP6(addr->sin6_addr),
 				ntohs(addr->sin6_port),
-				xprt->prot == IPPROTO_UDP ? "udp" : "tcp");
+				protocol);
 	}
 	xprt->address_strings[RPC_DISPLAY_ALL] = buf;
 
@@ -405,17 +393,17 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt)
 	}
 	xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf;
 
-	xprt->address_strings[RPC_DISPLAY_NETID] =
-		kstrdup(xprt->prot == IPPROTO_UDP ?
-			RPCBIND_NETID_UDP6 : RPCBIND_NETID_TCP6, GFP_KERNEL);
+	xprt->address_strings[RPC_DISPLAY_NETID] = netid;
 }
 
 static void xs_free_peer_addresses(struct rpc_xprt *xprt)
 {
-	int i;
-
-	for (i = 0; i < RPC_DISPLAY_MAX; i++)
-		kfree(xprt->address_strings[i]);
+	kfree(xprt->address_strings[RPC_DISPLAY_ADDR]);
+	kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
+	kfree(xprt->address_strings[RPC_DISPLAY_ALL]);
+	kfree(xprt->address_strings[RPC_DISPLAY_HEX_ADDR]);
+	kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]);
+	kfree(xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR]);
 }
 
 #define XS_SENDMSG_FLAGS	(MSG_DONTWAIT | MSG_NOSIGNAL)
@@ -1939,7 +1927,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 
 		INIT_DELAYED_WORK(&transport->connect_worker,
 					xs_udp_connect_worker4);
-		xs_format_ipv4_peer_addresses(xprt);
+		xs_format_ipv4_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP);
 		break;
 	case AF_INET6:
 		if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
@@ -1947,7 +1935,7 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 
 		INIT_DELAYED_WORK(&transport->connect_worker,
 					xs_udp_connect_worker6);
-		xs_format_ipv6_peer_addresses(xprt);
+		xs_format_ipv6_peer_addresses(xprt, "udp", RPCBIND_NETID_UDP6);
 		break;
 	default:
 		kfree(xprt);
@@ -2005,14 +1993,14 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 			xprt_set_bound(xprt);
 
 		INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker4);
-		xs_format_ipv4_peer_addresses(xprt);
+		xs_format_ipv4_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP);
 		break;
 	case AF_INET6:
 		if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
 			xprt_set_bound(xprt);
 
 		INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker6);
-		xs_format_ipv6_peer_addresses(xprt);
+		xs_format_ipv6_peer_addresses(xprt, "tcp", RPCBIND_NETID_TCP6);
 		break;
 	default:
 		kfree(xprt);

From 86d61d8638ddf9cdf87df26c7fa69b2804425fbe Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 7 Jan 2008 21:16:56 -0500
Subject: [PATCH 097/118] SUNRPC: Fix up constant string declarations in struct
 rpcbind_args

...and eliminate an unnecessary cast.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/rpcb_clnt.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index b60fa92321a56..a6cd35883fa70 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -73,9 +73,9 @@ struct rpcbind_args {
 	u32			r_vers;
 	u32			r_prot;
 	unsigned short		r_port;
-	char *			r_netid;
-	char			r_addr[RPCBIND_MAXUADDRLEN];
-	char *			r_owner;
+	const char *		r_netid;
+	const char *		r_addr;
+	const char *		r_owner;
 };
 
 static struct rpc_procinfo rpcb_procedures2[];
@@ -358,10 +358,8 @@ void rpcb_getport_async(struct rpc_task *task)
 	map->r_prot = xprt->prot;
 	map->r_port = 0;
 	map->r_xprt = xprt_get(xprt);
-	map->r_netid = (char *)rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID);
-	memcpy(map->r_addr,
-	       rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR),
-	       sizeof(map->r_addr));
+	map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID);
+	map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR);
 	map->r_owner = RPCB_OWNER_STRING;	/* ignored for GETADDR */
 
 	child = rpcb_call_async(rpcb_clnt, map, xprt->bind_index);

From 52c4044d00fe703eb3fb18e0d8dfd1c196eb28be Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 11 Jan 2008 17:09:44 -0500
Subject: [PATCH 098/118] NLM: Introduce external nlm_host set-up and tear-down
 functions

We would like to remove the per-lock-operation nlm_lookup_host() call from
nlmclnt_proc().

The new architecture pins an nlm_host structure to each NFS client
superblock that has the "lock" mount option set.  The NFS client passes
in the pinned nlm_host structure during each call to nlmclnt_proc().  NFS
client unmount processing "puts" the nlm_host so it can be garbage-
collected later.

This patch introduces externally callable NLM functions that handle
mount-time nlm_host set up and tear-down.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntlock.c        | 48 ++++++++++++++++++++++++++++++++++++++
 include/linux/lockd/bind.h |  7 ++++++
 2 files changed, 55 insertions(+)

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index d070b18e539da..9a8f4f45c19eb 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -41,6 +41,54 @@ struct nlm_wait {
 
 static LIST_HEAD(nlm_blocked);
 
+/**
+ * nlmclnt_init - Set up per-NFS mount point lockd data structures
+ * @server_name: server's hostname
+ * @server_address: server's network address
+ * @server_addrlen: length of server's address
+ * @protocol: transport protocol lockd should use
+ * @nfs_version: NFS protocol version for this mount point
+ *
+ * Returns pointer to an appropriate nlm_host struct,
+ * or an ERR_PTR value.
+ */
+struct nlm_host *nlmclnt_init(const char *server_name,
+			      const struct sockaddr *server_address,
+			      size_t server_addrlen,
+			      unsigned short protocol, u32 nfs_version)
+{
+	struct nlm_host *host;
+	u32 nlm_version = (nfs_version == 2) ? 1 : 4;
+	int status;
+
+	status = lockd_up(protocol);
+	if (status < 0)
+		return ERR_PTR(status);
+
+	host = nlmclnt_lookup_host((struct sockaddr_in *)server_address,
+				   protocol, nlm_version,
+				   server_name, strlen(server_name));
+	if (host == NULL) {
+		lockd_down();
+		return ERR_PTR(-ENOLCK);
+	}
+
+	return host;
+}
+EXPORT_SYMBOL_GPL(nlmclnt_init);
+
+/**
+ * nlmclnt_done - Release resources allocated by nlmclnt_init()
+ * @host: nlm_host structure reserved by nlmclnt_init()
+ *
+ */
+void nlmclnt_done(struct nlm_host *host)
+{
+	nlm_release_host(host);
+	lockd_down();
+}
+EXPORT_SYMBOL_GPL(nlmclnt_done);
+
 /*
  * Queue up a lock for blocking so that the GRANTED request can see it
  */
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 6f1637c61e10e..ad5402f5456b6 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -35,6 +35,13 @@ extern struct nlmsvc_binding *	nlmsvc_ops;
 /*
  * Functions exported by the lockd module
  */
+extern struct nlm_host *nlmclnt_init(const char *server_name,
+					const struct sockaddr *server_address,
+					size_t server_addrlen,
+					unsigned short protocol,
+					u32 nfs_version);
+extern void	nlmclnt_done(struct nlm_host *host);
+
 extern int	nlmclnt_proc(struct inode *, int, struct file_lock *);
 extern int	lockd_up(int proto);
 extern void	lockd_down(void);

From 9289e7f91add1c09c3ec8571a2080f7507730b8d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 11 Jan 2008 17:09:52 -0500
Subject: [PATCH 099/118] NFS: Invoke nlmclnt_init during NFS mount processing

Cache an appropriate nlm_host structure in the NFS client's mount point
metadata for later use.

Note that there is no need to set NFS_MOUNT_NONLM in the error case -- if
nfs_start_lockd() returns a non-zero value, its callers ensure that the
mount request fails outright.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           | 32 +++++++++++++++++++-------------
 include/linux/nfs_fs_sb.h |  2 ++
 2 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 18fcb05a07075..0b3ce86f6fc95 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -458,7 +458,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
 static void nfs_destroy_server(struct nfs_server *server)
 {
 	if (!(server->flags & NFS_MOUNT_NONLM))
-		lockd_down();	/* release rpc.lockd */
+		nlmclnt_done(server->nlm_host);
 }
 
 /*
@@ -466,20 +466,26 @@ static void nfs_destroy_server(struct nfs_server *server)
  */
 static int nfs_start_lockd(struct nfs_server *server)
 {
-	int error = 0;
+	struct nlm_host *host;
+	struct nfs_client *clp = server->nfs_client;
+	u32 nfs_version = clp->rpc_ops->version;
+	unsigned short protocol = server->flags & NFS_MOUNT_TCP ?
+						IPPROTO_TCP : IPPROTO_UDP;
 
-	if (server->nfs_client->rpc_ops->version > 3)
-		goto out;
+	if (nfs_version > 3)
+		return 0;
 	if (server->flags & NFS_MOUNT_NONLM)
-		goto out;
-	error = lockd_up((server->flags & NFS_MOUNT_TCP) ?
-			IPPROTO_TCP : IPPROTO_UDP);
-	if (error < 0)
-		server->flags |= NFS_MOUNT_NONLM;
-	else
-		server->destroy = nfs_destroy_server;
-out:
-	return error;
+		return 0;
+
+	host = nlmclnt_init(clp->cl_hostname,
+			    (struct sockaddr *)&clp->cl_addr,
+			    clp->cl_addrlen, protocol, nfs_version);
+	if (IS_ERR(host))
+		return PTR_ERR(host);
+
+	server->nlm_host = host;
+	server->destroy = nfs_destroy_server;
+	return 0;
 }
 
 /*
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index b5ba5f79485d2..3423c6761bf7b 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -8,6 +8,7 @@
 #include <asm/atomic.h>
 
 struct nfs_iostats;
+struct nlm_host;
 
 /*
  * The nfs_client identifies our client state to the server.
@@ -80,6 +81,7 @@ struct nfs_server {
 	struct list_head	master_link;	/* link in master servers list */
 	struct rpc_clnt *	client;		/* RPC client handle */
 	struct rpc_clnt *	client_acl;	/* ACL RPC client handle */
+	struct nlm_host		*nlm_host;	/* NLM client handle */
 	struct nfs_iostats *	io_stats;	/* I/O statistics */
 	struct backing_dev_info	backing_dev_info;
 	atomic_long_t		writeback;	/* number of writeback pages */

From 1093a60ef34bb12010fe7ea4b780bee1c57cfbbe Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 11 Jan 2008 17:09:59 -0500
Subject: [PATCH 100/118] NLM/NFS: Use cached nlm_host when calling
 nlmclnt_proc()

Now that each NFS mount point caches its own nlm_host structure, it can be
passed to nlmclnt_proc() for each lock request.  By pinning an nlm_host for
each mount point, we trade the overhead of looking up or creating a fresh
nlm_host struct during every NLM procedure call for a little extra memory.

We also restrict the nlmclnt_proc symbol to limit the use of this call to
in-tree modules.

Note that nlm_lookup_host() (just removed from the client's per-request
NLM processing) could also trigger an nlm_host garbage collection.  Now
client-side nlm_host garbage collection occurs only during NFS mount
processing.  Since the NFS client now holds a reference on these nlm_host
structures, they wouldn't have been affected by garbage collection
anyway.

Given that nlm_lookup_host() reorders the global nlm_host chain after
every successful lookup, and that a garbage collection could be triggered
during the call, we've removed a significant amount of per-NLM-request
CPU processing overhead.

Sidebar: there are only a few remaining references to the internals of
NFS inodes in the client-side NLM code.  The only references I found are
related to extracting or comparing the inode's file handle via NFS_FH().
One is in nlmclnt_grant(); the other is in nlmclnt_setlockargs().

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c        | 33 ++++++++++-----------------------
 fs/nfs/nfs3proc.c          |  4 +++-
 fs/nfs/proc.c              |  4 +++-
 include/linux/lockd/bind.h |  3 ++-
 4 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index a10343bed1607..b1a4dba443bcf 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -145,34 +145,21 @@ static void nlmclnt_release_lockargs(struct nlm_rqst *req)
 	BUG_ON(req->a_args.lock.fl.fl_ops != NULL);
 }
 
-/*
- * This is the main entry point for the NLM client.
+/**
+ * nlmclnt_proc - Perform a single client-side lock request
+ * @host: address of a valid nlm_host context representing the NLM server
+ * @cmd: fcntl-style file lock operation to perform
+ * @fl: address of arguments for the lock operation
+ *
  */
-int
-nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
+int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
 {
-	struct rpc_clnt		*client = NFS_CLIENT(inode);
-	struct sockaddr_in	addr;
-	struct nfs_server	*nfssrv = NFS_SERVER(inode);
-	struct nlm_host		*host;
 	struct nlm_rqst		*call;
 	sigset_t		oldset;
 	unsigned long		flags;
-	int			status, vers;
-
-	vers = (NFS_PROTO(inode)->version == 3) ? 4 : 1;
-	if (NFS_PROTO(inode)->version > 3) {
-		printk(KERN_NOTICE "NFSv4 file locking not implemented!\n");
-		return -ENOLCK;
-	}
-
-	rpc_peeraddr(client, (struct sockaddr *) &addr, sizeof(addr));
-	host = nlmclnt_lookup_host(&addr, client->cl_xprt->prot, vers,
-				   nfssrv->nfs_client->cl_hostname,
-				   strlen(nfssrv->nfs_client->cl_hostname));
-	if (host == NULL)
-		return -ENOLCK;
+	int			status;
 
+	nlm_get_host(host);
 	call = nlm_alloc_call(host);
 	if (call == NULL)
 		return -ENOMEM;
@@ -219,7 +206,7 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 	dprintk("lockd: clnt proc returns %d\n", status);
 	return status;
 }
-EXPORT_SYMBOL(nlmclnt_proc);
+EXPORT_SYMBOL_GPL(nlmclnt_proc);
 
 /*
  * Allocate an NLM RPC call struct
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index e68580ebaa479..b353c1a05bfda 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -767,7 +767,9 @@ static void nfs3_proc_commit_setup(struct nfs_write_data *data, struct rpc_messa
 static int
 nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
 {
-	return nlmclnt_proc(filp->f_path.dentry->d_inode, cmd, fl);
+	struct inode *inode = filp->f_path.dentry->d_inode;
+
+	return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl);
 }
 
 const struct nfs_rpc_ops nfs_v3_clientops = {
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index c9f46a24e75c7..5ccf7faee19cd 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -593,7 +593,9 @@ nfs_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
 static int
 nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
 {
-	return nlmclnt_proc(filp->f_path.dentry->d_inode, cmd, fl);
+	struct inode *inode = filp->f_path.dentry->d_inode;
+
+	return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl);
 }
 
 
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index ad5402f5456b6..73368075af032 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -42,7 +42,8 @@ extern struct nlm_host *nlmclnt_init(const char *server_name,
 					u32 nfs_version);
 extern void	nlmclnt_done(struct nlm_host *host);
 
-extern int	nlmclnt_proc(struct inode *, int, struct file_lock *);
+extern int	nlmclnt_proc(struct nlm_host *host, int cmd,
+					struct file_lock *fl);
 extern int	lockd_up(int proto);
 extern void	lockd_down(void);
 

From 883bb163f84e0a54b29846c61621f52db3f27393 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Tue, 15 Jan 2008 16:04:20 -0500
Subject: [PATCH 101/118] NLM: Introduce an arguments structure for
 nlmclnt_init()

Clean up: pass 5 arguments to nlmclnt_init() in a structure similar to the
new nfs_client_initdata structure.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/lockd/clntlock.c        | 22 ++++++++--------------
 fs/nfs/client.c            | 17 ++++++++++-------
 include/linux/lockd/bind.h | 19 ++++++++++++++-----
 3 files changed, 32 insertions(+), 26 deletions(-)

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 9a8f4f45c19eb..0b45fd3a4bfd6 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -43,31 +43,25 @@ static LIST_HEAD(nlm_blocked);
 
 /**
  * nlmclnt_init - Set up per-NFS mount point lockd data structures
- * @server_name: server's hostname
- * @server_address: server's network address
- * @server_addrlen: length of server's address
- * @protocol: transport protocol lockd should use
- * @nfs_version: NFS protocol version for this mount point
+ * @nlm_init: pointer to arguments structure
  *
  * Returns pointer to an appropriate nlm_host struct,
  * or an ERR_PTR value.
  */
-struct nlm_host *nlmclnt_init(const char *server_name,
-			      const struct sockaddr *server_address,
-			      size_t server_addrlen,
-			      unsigned short protocol, u32 nfs_version)
+struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init)
 {
 	struct nlm_host *host;
-	u32 nlm_version = (nfs_version == 2) ? 1 : 4;
+	u32 nlm_version = (nlm_init->nfs_version == 2) ? 1 : 4;
 	int status;
 
-	status = lockd_up(protocol);
+	status = lockd_up(nlm_init->protocol);
 	if (status < 0)
 		return ERR_PTR(status);
 
-	host = nlmclnt_lookup_host((struct sockaddr_in *)server_address,
-				   protocol, nlm_version,
-				   server_name, strlen(server_name));
+	host = nlmclnt_lookup_host((struct sockaddr_in *)nlm_init->address,
+				   nlm_init->protocol, nlm_version,
+				   nlm_init->hostname,
+				   strlen(nlm_init->hostname));
 	if (host == NULL) {
 		lockd_down();
 		return ERR_PTR(-ENOLCK);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 0b3ce86f6fc95..7a15832369e90 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -468,18 +468,21 @@ static int nfs_start_lockd(struct nfs_server *server)
 {
 	struct nlm_host *host;
 	struct nfs_client *clp = server->nfs_client;
-	u32 nfs_version = clp->rpc_ops->version;
-	unsigned short protocol = server->flags & NFS_MOUNT_TCP ?
-						IPPROTO_TCP : IPPROTO_UDP;
+	struct nlmclnt_initdata nlm_init = {
+		.hostname	= clp->cl_hostname,
+		.address	= (struct sockaddr *)&clp->cl_addr,
+		.addrlen	= clp->cl_addrlen,
+		.protocol	= server->flags & NFS_MOUNT_TCP ?
+						IPPROTO_TCP : IPPROTO_UDP,
+		.nfs_version	= clp->rpc_ops->version,
+	};
 
-	if (nfs_version > 3)
+	if (nlm_init.nfs_version > 3)
 		return 0;
 	if (server->flags & NFS_MOUNT_NONLM)
 		return 0;
 
-	host = nlmclnt_init(clp->cl_hostname,
-			    (struct sockaddr *)&clp->cl_addr,
-			    clp->cl_addrlen, protocol, nfs_version);
+	host = nlmclnt_init(&nlm_init);
 	if (IS_ERR(host))
 		return PTR_ERR(host);
 
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 73368075af032..3d25bcd139d16 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -32,14 +32,23 @@ struct nlmsvc_binding {
 
 extern struct nlmsvc_binding *	nlmsvc_ops;
 
+/*
+ * Similar to nfs_client_initdata, but without the NFS-specific
+ * rpc_ops field.
+ */
+struct nlmclnt_initdata {
+	const char		*hostname;
+	const struct sockaddr	*address;
+	size_t			addrlen;
+	unsigned short		protocol;
+	u32			nfs_version;
+};
+
 /*
  * Functions exported by the lockd module
  */
-extern struct nlm_host *nlmclnt_init(const char *server_name,
-					const struct sockaddr *server_address,
-					size_t server_addrlen,
-					unsigned short protocol,
-					u32 nfs_version);
+
+extern struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init);
 extern void	nlmclnt_done(struct nlm_host *host);
 
 extern int	nlmclnt_proc(struct nlm_host *host, int cmd,

From 65fdf7d264213a9a8de44f9a20e002a26c267a76 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 11 Jan 2008 17:41:29 -0500
Subject: [PATCH 102/118] NLM: Fix a bogus 'return' in nlmclnt_rpc_release

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index b1a4dba443bcf..b6b74a60e1ebb 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -244,7 +244,7 @@ void nlm_release_call(struct nlm_rqst *call)
 
 static void nlmclnt_rpc_release(void *data)
 {
-	return nlm_release_call(data);
+	nlm_release_call(data);
 }
 
 static int nlm_wait_on_grace(wait_queue_head_t *queue)

From c0e07cb68db353c0ffbb0f82401cf6d79c253aed Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 14 Jan 2008 12:32:05 -0500
Subject: [PATCH 103/118] NFS: NFS version number is unsigned

RPC protocol version numbers are unsigned.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_xdr.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index d342ae5c76abe..f301d0b8babc9 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -774,7 +774,7 @@ struct nfs_access_entry;
  * RPC procedure vector for NFSv2/NFSv3 demuxing
  */
 struct nfs_rpc_ops {
-	int	version;		/* Protocol version */
+	u32	version;		/* Protocol version */
 	struct dentry_operations *dentry_ops;
 	const struct inode_operations *dir_inode_ops;
 	const struct inode_operations *file_inode_ops;

From 33e01dc7f578813cda074ceaeaf68b0f3ffcc393 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 14 Jan 2008 12:32:20 -0500
Subject: [PATCH 104/118] SUNRPC: Clean up functions that free address_strings
 array

Clean up: document the rule (kfree) and the exceptions
(RPC_DISPLAY_PROTO and RPC_DISPLAY_NETID) when freeing the objects in
a transport's address_strings array.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/xprtrdma/transport.c | 16 ++++++++++------
 net/sunrpc/xprtsock.c           | 16 ++++++++++------
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index d1389afc83422..02c522c17de59 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -212,12 +212,16 @@ xprt_rdma_format_addresses(struct rpc_xprt *xprt)
 static void
 xprt_rdma_free_addresses(struct rpc_xprt *xprt)
 {
-	kfree(xprt->address_strings[RPC_DISPLAY_ADDR]);
-	kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
-	kfree(xprt->address_strings[RPC_DISPLAY_ALL]);
-	kfree(xprt->address_strings[RPC_DISPLAY_HEX_ADDR]);
-	kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]);
-	kfree(xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR]);
+	unsigned int i;
+
+	for (i = 0; i < RPC_DISPLAY_MAX; i++)
+		switch (i) {
+		case RPC_DISPLAY_PROTO:
+		case RPC_DISPLAY_NETID:
+			continue;
+		default:
+			kfree(xprt->address_strings[i]);
+		}
 }
 
 static void
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index b9b94f49c6202..30e7ac243a90b 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -398,12 +398,16 @@ static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt,
 
 static void xs_free_peer_addresses(struct rpc_xprt *xprt)
 {
-	kfree(xprt->address_strings[RPC_DISPLAY_ADDR]);
-	kfree(xprt->address_strings[RPC_DISPLAY_PORT]);
-	kfree(xprt->address_strings[RPC_DISPLAY_ALL]);
-	kfree(xprt->address_strings[RPC_DISPLAY_HEX_ADDR]);
-	kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]);
-	kfree(xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR]);
+	unsigned int i;
+
+	for (i = 0; i < RPC_DISPLAY_MAX; i++)
+		switch (i) {
+		case RPC_DISPLAY_PROTO:
+		case RPC_DISPLAY_NETID:
+			continue;
+		default:
+			kfree(xprt->address_strings[i]);
+		}
 }
 
 #define XS_SENDMSG_FLAGS	(MSG_DONTWAIT | MSG_NOSIGNAL)

From b91e101fca70319f9ca839311bceff5f44dfc1ed Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 14 Jan 2008 15:11:46 -0500
Subject: [PATCH 105/118] SUNRPC: rpcb_getport_sync() should use built-in
 hostname generator

rpc_create() can already fill in the hostname with a string representation
of the server's IP address, so remove redundant logic in in
rpcb_getport_sync() that does that.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/rpcb_clnt.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index a6cd35883fa70..8962ac6b4c119 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -220,14 +220,12 @@ int rpcb_getport_sync(struct sockaddr_in *sin, __u32 prog,
 		.rpc_resp	= &map.r_port,
 	};
 	struct rpc_clnt	*rpcb_clnt;
-	char hostname[40];
 	int status;
 
 	dprintk("RPC:       %s(" NIPQUAD_FMT ", %u, %u, %d)\n",
 		__FUNCTION__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
 
-	sprintf(hostname, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr));
-	rpcb_clnt = rpcb_create(hostname, (struct sockaddr *)sin,
+	rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin,
 				sizeof(sin), prot, 2, 0);
 	if (IS_ERR(rpcb_clnt))
 		return PTR_ERR(rpcb_clnt);

From f1ec08cb9492cab579f85f9d937c79788b1dfde3 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 14 Jan 2008 15:11:53 -0500
Subject: [PATCH 106/118] SUNRPC: Use appropriate argument types in rpcb client

Clean up: Follow recommendations of Chapter 5 of Documentation/CodingStyle
and use "u32" instead of "__u32" for types in definitions that are not
shared with user space.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/clnt.h | 2 +-
 net/sunrpc/rpcb_clnt.c      | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 5a13da2573b05..3e9addc741c1a 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -125,7 +125,7 @@ void		rpc_shutdown_client(struct rpc_clnt *);
 void		rpc_release_client(struct rpc_clnt *);
 
 int		rpcb_register(u32, u32, int, unsigned short, int *);
-int		rpcb_getport_sync(struct sockaddr_in *, __u32, __u32, int);
+int		rpcb_getport_sync(struct sockaddr_in *, u32, u32, int);
 void		rpcb_getport_async(struct rpc_task *);
 
 void		rpc_call_start(struct rpc_task *);
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 8962ac6b4c119..77af989ecc9c0 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -205,8 +205,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
  *
  * XXX: Needs to support IPv6, and rpcbind versions 3 and 4
  */
-int rpcb_getport_sync(struct sockaddr_in *sin, __u32 prog,
-		      __u32 vers, int prot)
+int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot)
 {
 	struct rpcbind_args map = {
 		.r_prog		= prog,

From 67d60213627ec4391cc402bf95753eefde8522af Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 14 Jan 2008 15:12:01 -0500
Subject: [PATCH 107/118] SUNRPC: Clean up block comment preceding
 rpcb_getport_sync()

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/rpcb_clnt.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 77af989ecc9c0..8cbdc0a2de463 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -200,10 +200,13 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay)
  * @vers: RPC version number to bind
  * @prot: transport protocol to use to make this request
  *
+ * Return value is the requested advertised port number,
+ * or a negative errno value.
+ *
  * Called from outside the RPC client in a synchronous task context.
  * Uses default timeout parameters specified by underlying transport.
  *
- * XXX: Needs to support IPv6, and rpcbind versions 3 and 4
+ * XXX: Needs to support IPv6
  */
 int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot)
 {

From afc881124b8aff83c7a28269ef9d9cfce543256c Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 14 Jan 2008 15:12:08 -0500
Subject: [PATCH 108/118] SUNRPC: rpcb_getport_sync() passes incorrect address
 size to rpc_create()

The variable "sin" is a pointer, so sizeof(sin) is the size of a pointer,
not the size of thing that sin points to.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/rpcb_clnt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 8cbdc0a2de463..fa5b8f202d5b3 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -228,7 +228,7 @@ int rpcb_getport_sync(struct sockaddr_in *sin, u32 prog, u32 vers, int prot)
 		__FUNCTION__, NIPQUAD(sin->sin_addr.s_addr), prog, vers, prot);
 
 	rpcb_clnt = rpcb_create(NULL, (struct sockaddr *)sin,
-				sizeof(sin), prot, 2, 0);
+				sizeof(*sin), prot, 2, 0);
 	if (IS_ERR(rpcb_clnt))
 		return PTR_ERR(rpcb_clnt);
 

From 34f5b4662bf4b54f22b32ce76ce70eccd7ebc68a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 15 Jan 2008 14:17:11 -0500
Subject: [PATCH 109/118] SUNRPC: Don't bother changing the sigmask for
 asynchronous RPC calls

The caller will never sleep in rpc_execute, so don't bother setting the
sigmask.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 net/sunrpc/clnt.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index e775ca793249a..924916ceaa435 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -579,9 +579,12 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
 	}
 	atomic_inc(&task->tk_count);
 	/* Mask signals on synchronous RPC calls and RPCSEC_GSS upcalls */
-	rpc_task_sigmask(task, &oldset);
-	rpc_execute(task);
-	rpc_restore_sigmask(&oldset);
+	if (!RPC_IS_ASYNC(task)) {
+		rpc_task_sigmask(task, &oldset);
+		rpc_execute(task);
+		rpc_restore_sigmask(&oldset);
+	} else
+		rpc_execute(task);
 	ret = task;
 out:
 	return ret;

From f3c391e89c92651105364c6645244118ec9b3952 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 15 Jan 2008 14:17:12 -0500
Subject: [PATCH 110/118] NFS: Optimise away the sigmask code in aio/dio reads
 and writes

There are no interruptible waits for asynchronous RPC tasks, so we don't
need to wrap calls to rpc_run_task() with an
rpc_clnt_sigmask/rpc_clnt_unsigmask pair.

Instead we can wrap the wait_for_completion_interruptible() in
nfs_direct_wait(). This means that we completely optimise away sigmask
setting for the case of non-blocking aio/dio.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index eadd87f7159f1..f8e165c7d5a63 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -188,12 +188,17 @@ static void nfs_direct_req_release(struct nfs_direct_req *dreq)
 static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
 {
 	ssize_t result = -EIOCBQUEUED;
+	struct rpc_clnt *clnt;
+	sigset_t oldset;
 
 	/* Async requests don't wait here */
 	if (dreq->iocb)
 		goto out;
 
+	clnt = NFS_CLIENT(dreq->inode);
+	rpc_clnt_sigmask(clnt, &oldset);
 	result = wait_for_completion_interruptible(&dreq->completion);
+	rpc_clnt_sigunmask(clnt, &oldset);
 
 	if (!result)
 		result = dreq->error;
@@ -403,9 +408,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
 			       unsigned long nr_segs, loff_t pos)
 {
 	ssize_t result = 0;
-	sigset_t oldset;
 	struct inode *inode = iocb->ki_filp->f_mapping->host;
-	struct rpc_clnt *clnt = NFS_CLIENT(inode);
 	struct nfs_direct_req *dreq;
 
 	dreq = nfs_direct_req_alloc();
@@ -417,11 +420,9 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
 	if (!is_sync_kiocb(iocb))
 		dreq->iocb = iocb;
 
-	rpc_clnt_sigmask(clnt, &oldset);
 	result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos);
 	if (!result)
 		result = nfs_direct_wait(dreq);
-	rpc_clnt_sigunmask(clnt, &oldset);
 	nfs_direct_req_release(dreq);
 
 	return result;
@@ -816,9 +817,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
 				size_t count)
 {
 	ssize_t result = 0;
-	sigset_t oldset;
 	struct inode *inode = iocb->ki_filp->f_mapping->host;
-	struct rpc_clnt *clnt = NFS_CLIENT(inode);
 	struct nfs_direct_req *dreq;
 	size_t wsize = NFS_SERVER(inode)->wsize;
 	int sync = NFS_UNSTABLE;
@@ -836,11 +835,9 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
 	if (!is_sync_kiocb(iocb))
 		dreq->iocb = iocb;
 
-	rpc_clnt_sigmask(clnt, &oldset);
 	result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, sync);
 	if (!result)
 		result = nfs_direct_wait(dreq);
-	rpc_clnt_sigunmask(clnt, &oldset);
 	nfs_direct_req_release(dreq);
 
 	return result;

From 3d1c550874bcaf0d9b7fb66f601caed109074f4b Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Tue, 15 Jan 2008 16:43:19 -0500
Subject: [PATCH 111/118] nfs4: allow nfsv4 acls on non-regular-files

The rfc doesn't give any reason it shouldn't be possible to set an
attribute on a non-regular file.  And if the server supports it, then it
shouldn't be up to us to prevent it.

Thanks to Erez for the report and Trond for further analysis.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Tested-by: Erez Zadok <ezk@cs.sunysb.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b3d4e8e5696a4..89efbcd6fd536 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3617,10 +3617,6 @@ int nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf,
 	if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0)
 		return -EOPNOTSUPP;
 
-	if (!S_ISREG(inode->i_mode) &&
-	    (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
-		return -EPERM;
-
 	return nfs4_proc_set_acl(inode, buf, buflen);
 }
 

From fc6014771bde8a215a9a4ea24b45f76afeb3c922 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Wed, 16 Jan 2008 16:38:10 -0500
Subject: [PATCH 112/118] NFS: Address memory leaks in the NFS client mount
 option parser

David Howells noticed that repeating the same mount option twice during an
NFS mount request can result in orphaned memory in certain cases.

Only the client_address and mount_server.hostname strings are initialized
in the mount parsing loop, so those appear to be the only two pointers that
might be written over by repeating a mount option.  The strings in the
nfs_server section of the nfs_parsed_mount_data structure are set only once
after the options are parsed, thus these are not susceptible to being
overwritten.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 0d1bc61d0b689..22c49c02897d3 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1006,12 +1006,14 @@ static int nfs_parse_mount_options(char *raw,
 			string = match_strdup(args);
 			if (string == NULL)
 				goto out_nomem;
+			kfree(mnt->client_address);
 			mnt->client_address = string;
 			break;
 		case Opt_mounthost:
 			string = match_strdup(args);
 			if (string == NULL)
 				goto out_nomem;
+			kfree(mnt->mount_server.hostname);
 			mnt->mount_server.hostname = string;
 			break;
 		case Opt_mountaddr:

From 3a10c30acc4821ca000b52ed0edafd0d3bf26a52 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 23 Jan 2008 08:58:59 +0200
Subject: [PATCH 113/118] nfs: obliterate NFS_FLAGS macro

use NFS_I(inode)->flags instead

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c           | 8 ++++----
 fs/nfs/inode.c         | 6 +++---
 fs/nfs/read.c          | 2 +-
 include/linux/nfs_fs.h | 5 ++---
 4 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 72d141a0dbd80..c578d942f0004 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -192,7 +192,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
 		/* We requested READDIRPLUS, but the server doesn't grok it */
 		if (error == -ENOTSUPP && desc->plus) {
 			NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
-			clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
+			clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
 			desc->plus = 0;
 			goto again;
 		}
@@ -577,7 +577,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 			break;
 		}
 		if (res == -ETOOSMALL && desc->plus) {
-			clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
+			clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
 			nfs_zap_caches(inode);
 			desc->plus = 0;
 			desc->entry->eof = 0;
@@ -1760,7 +1760,7 @@ static void __nfs_access_zap_cache(struct inode *inode)
 void nfs_access_zap_cache(struct inode *inode)
 {
 	/* Remove from global LRU init */
-	if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) {
+	if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
 		spin_lock(&nfs_access_lru_lock);
 		list_del_init(&NFS_I(inode)->access_cache_inode_lru);
 		spin_unlock(&nfs_access_lru_lock);
@@ -1874,7 +1874,7 @@ static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *s
 	smp_mb__after_atomic_inc();
 
 	/* Add inode to global LRU list */
-	if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_FLAGS(inode))) {
+	if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
 		spin_lock(&nfs_access_lru_lock);
 		list_add_tail(&NFS_I(inode)->access_cache_inode_lru, &nfs_access_lru_list);
 		spin_unlock(&nfs_access_lru_lock);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 5747d49bdd76e..9d7b08c438656 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -192,7 +192,7 @@ void nfs_invalidate_atime(struct inode *inode)
  */
 static void nfs_invalidate_inode(struct inode *inode)
 {
-	set_bit(NFS_INO_STALE, &NFS_FLAGS(inode));
+	set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
 	nfs_zap_caches_locked(inode);
 }
 
@@ -291,7 +291,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 			inode->i_fop = &nfs_dir_operations;
 			if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)
 			    && fattr->size <= NFS_LIMIT_READDIRPLUS)
-				set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
+				set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
 			/* Deal with crossing mountpoints */
 			if (!nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) {
 				if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
@@ -668,7 +668,7 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
 		if (status == -ESTALE) {
 			nfs_zap_caches(inode);
 			if (!S_ISDIR(inode->i_mode))
-				set_bit(NFS_INO_STALE, &NFS_FLAGS(inode));
+				set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
 		}
 		goto out;
 	}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index efc121c494fe4..8fd6dfbe1bc3b 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -336,7 +336,7 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
 	nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count);
 
 	if (task->tk_status == -ESTALE) {
-		set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
+		set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags);
 		nfs_mark_for_revalidate(data->inode);
 	}
 	return 0;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 7095aaa087d81..9e7c24a2aca99 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -214,8 +214,7 @@ static inline struct nfs_inode *NFS_I(struct inode *inode)
 	(S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmax \
 			       : NFS_SERVER(inode)->acregmax)
 
-#define NFS_FLAGS(inode)		(NFS_I(inode)->flags)
-#define NFS_STALE(inode)		(test_bit(NFS_INO_STALE, &NFS_FLAGS(inode)))
+#define NFS_STALE(inode)		(test_bit(NFS_INO_STALE, &NFS_I(inode)->flags))
 
 #define NFS_FILEID(inode)		(NFS_I(inode)->fileid)
 
@@ -237,7 +236,7 @@ static inline int nfs_server_capable(struct inode *inode, int cap)
 
 static inline int NFS_USE_READDIRPLUS(struct inode *inode)
 {
-	return test_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode));
+	return test_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
 }
 
 static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf)

From 99fadcd76465842c014c88b8c9c19b457e9debc0 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Wed, 23 Jan 2008 08:59:08 +0200
Subject: [PATCH 114/118] nfs: convert NFS_*(inode) helpers to static inline

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c         |  2 +-
 include/linux/nfs_fs.h | 76 ++++++++++++++++++++++++++++++++----------
 2 files changed, 59 insertions(+), 19 deletions(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 9d7b08c438656..5d381cfbfe7e4 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -229,7 +229,7 @@ nfs_init_locked(struct inode *inode, void *opaque)
 	struct nfs_find_desc	*desc = (struct nfs_find_desc *)opaque;
 	struct nfs_fattr	*fattr = desc->fattr;
 
-	NFS_FILEID(inode) = fattr->fileid;
+	set_nfs_fileid(inode, fattr->fileid);
 	nfs_copy_fh(NFS_FH(inode), desc->fh);
 	return 0;
 }
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 9e7c24a2aca99..099ddb4481c07 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -196,27 +196,67 @@ struct nfs_inode {
 #define NFS_INO_STALE		(2)		/* possible stale inode */
 #define NFS_INO_ACL_LRU_SET	(3)		/* Inode is on the LRU list */
 
-static inline struct nfs_inode *NFS_I(struct inode *inode)
+static inline struct nfs_inode *NFS_I(const struct inode *inode)
 {
 	return container_of(inode, struct nfs_inode, vfs_inode);
 }
-#define NFS_SB(s)		((struct nfs_server *)(s->s_fs_info))
-
-#define NFS_FH(inode)			(&NFS_I(inode)->fh)
-#define NFS_SERVER(inode)		(NFS_SB(inode->i_sb))
-#define NFS_CLIENT(inode)		(NFS_SERVER(inode)->client)
-#define NFS_PROTO(inode)		(NFS_SERVER(inode)->nfs_client->rpc_ops)
-#define NFS_COOKIEVERF(inode)		(NFS_I(inode)->cookieverf)
-#define NFS_MINATTRTIMEO(inode) \
-	(S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmin \
-			       : NFS_SERVER(inode)->acregmin)
-#define NFS_MAXATTRTIMEO(inode) \
-	(S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmax \
-			       : NFS_SERVER(inode)->acregmax)
-
-#define NFS_STALE(inode)		(test_bit(NFS_INO_STALE, &NFS_I(inode)->flags))
-
-#define NFS_FILEID(inode)		(NFS_I(inode)->fileid)
+
+static inline struct nfs_server *NFS_SB(const struct super_block *s)
+{
+	return (struct nfs_server *)(s->s_fs_info);
+}
+
+static inline struct nfs_fh *NFS_FH(const struct inode *inode)
+{
+	return &NFS_I(inode)->fh;
+}
+
+static inline struct nfs_server *NFS_SERVER(const struct inode *inode)
+{
+	return NFS_SB(inode->i_sb);
+}
+
+static inline struct rpc_clnt *NFS_CLIENT(const struct inode *inode)
+{
+	return NFS_SERVER(inode)->client;
+}
+
+static inline const struct nfs_rpc_ops *NFS_PROTO(const struct inode *inode)
+{
+	return NFS_SERVER(inode)->nfs_client->rpc_ops;
+}
+
+static inline __be32 *NFS_COOKIEVERF(const struct inode *inode)
+{
+	return NFS_I(inode)->cookieverf;
+}
+
+static inline unsigned NFS_MINATTRTIMEO(const struct inode *inode)
+{
+	struct nfs_server *nfss = NFS_SERVER(inode);
+	return S_ISDIR(inode->i_mode) ? nfss->acdirmin : nfss->acregmin;
+}
+
+static inline unsigned NFS_MAXATTRTIMEO(const struct inode *inode)
+{
+	struct nfs_server *nfss = NFS_SERVER(inode);
+	return S_ISDIR(inode->i_mode) ? nfss->acdirmax : nfss->acregmax;
+}
+
+static inline int NFS_STALE(const struct inode *inode)
+{
+	return test_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
+}
+
+static inline __u64 NFS_FILEID(const struct inode *inode)
+{
+	return NFS_I(inode)->fileid;
+}
+
+static inline void set_nfs_fileid(struct inode *inode, __u64 fileid)
+{
+	NFS_I(inode)->fileid = fileid;
+}
 
 static inline void nfs_mark_for_revalidate(struct inode *inode)
 {

From e6f810759505bc86c009854b82cc495ffd8eb020 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 24 Jan 2008 18:14:34 -0500
Subject: [PATCH 115/118] NFS: Add an asynchronous delegreturn operation for
 use in nfs_clear_inode

Otherwise, there is a potential deadlock if the last dput() from an NFSv4
close() or other asynchronous operation leads to nfs_clear_inode calling
the synchronous delegreturn.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/delegation.c | 29 +++++++++++++++++++++++++----
 fs/nfs/delegation.h |  3 ++-
 fs/nfs/dir.c        |  1 -
 fs/nfs/inode.c      |  2 +-
 fs/nfs/nfs4proc.c   | 22 +++++++++++++---------
 5 files changed, 41 insertions(+), 16 deletions(-)

diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index b03dcd8403f1d..2dead8d1dd554 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -174,11 +174,11 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 	return status;
 }
 
-static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation)
+static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
 {
 	int res = 0;
 
-	res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid);
+	res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid, issync);
 	nfs_free_delegation(delegation);
 	return res;
 }
@@ -208,7 +208,7 @@ static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegat
 	up_read(&clp->cl_sem);
 	nfs_msync_inode(inode);
 
-	return nfs_do_return_delegation(inode, delegation);
+	return nfs_do_return_delegation(inode, delegation, 1);
 }
 
 static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid)
@@ -228,6 +228,27 @@ static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfs
 	return NULL;
 }
 
+/*
+ * This function returns the delegation without reclaiming opens
+ * or protecting against delegation reclaims.
+ * It is therefore really only safe to be called from
+ * nfs4_clear_inode()
+ */
+void nfs_inode_return_delegation_noreclaim(struct inode *inode)
+{
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_delegation *delegation;
+
+	if (rcu_dereference(nfsi->delegation) != NULL) {
+		spin_lock(&clp->cl_lock);
+		delegation = nfs_detach_delegation_locked(nfsi, NULL);
+		spin_unlock(&clp->cl_lock);
+		if (delegation != NULL)
+			nfs_do_return_delegation(inode, delegation, 0);
+	}
+}
+
 int nfs_inode_return_delegation(struct inode *inode)
 {
 	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
@@ -388,7 +409,7 @@ static int recall_thread(void *data)
 	nfs_msync_inode(inode);
 
 	if (delegation != NULL)
-		nfs_do_return_delegation(inode, delegation);
+		nfs_do_return_delegation(inode, delegation, 1);
 	iput(inode);
 	module_put_and_exit(0);
 }
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 5874ce7fdbaee..f1c5e2a5d88e3 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -29,6 +29,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
 int nfs_inode_return_delegation(struct inode *inode);
 int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
+void nfs_inode_return_delegation_noreclaim(struct inode *inode);
 
 struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
 void nfs_return_all_delegations(struct super_block *sb);
@@ -39,7 +40,7 @@ void nfs_delegation_mark_reclaim(struct nfs_client *clp);
 void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
 
 /* NFSv4 delegation-related procedures */
-int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
+int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync);
 int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid);
 int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
 int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index c578d942f0004..5ca762de88bf7 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -864,7 +864,6 @@ static int nfs_dentry_delete(struct dentry *dentry)
  */
 static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
 {
-	nfs_inode_return_delegation(inode);
 	if (S_ISDIR(inode->i_mode))
 		/* drop any readdir cache as it could easily be old */
 		NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 5d381cfbfe7e4..3f332e54e760a 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1145,7 +1145,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 void nfs4_clear_inode(struct inode *inode)
 {
 	/* If we are holding a delegation, return it! */
-	nfs_inode_return_delegation(inode);
+	nfs_inode_return_delegation_noreclaim(inode);
 	/* First call standard NFS clear_inode() code */
 	nfs_clear_inode(inode);
 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 89efbcd6fd536..5c189bd57eb2b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2991,7 +2991,7 @@ static const struct rpc_call_ops nfs4_delegreturn_ops = {
 	.rpc_release = nfs4_delegreturn_release,
 };
 
-static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid)
+static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync)
 {
 	struct nfs4_delegreturndata *data;
 	struct nfs_server *server = NFS_SERVER(inode);
@@ -3006,7 +3006,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
 		.callback_ops = &nfs4_delegreturn_ops,
 		.flags = RPC_TASK_ASYNC,
 	};
-	int status;
+	int status = 0;
 
 	data = kmalloc(sizeof(*data), GFP_KERNEL);
 	if (data == NULL)
@@ -3028,23 +3028,27 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
+	if (!issync)
+		goto out;
 	status = nfs4_wait_for_completion_rpc_task(task);
-	if (status == 0) {
-		status = data->rpc_status;
-		if (status == 0)
-			nfs_refresh_inode(inode, &data->fattr);
-	}
+	if (status != 0)
+		goto out;
+	status = data->rpc_status;
+	if (status != 0)
+		goto out;
+	nfs_refresh_inode(inode, &data->fattr);
+out:
 	rpc_put_task(task);
 	return status;
 }
 
-int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid)
+int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid, int issync)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs4_exception exception = { };
 	int err;
 	do {
-		err = _nfs4_proc_delegreturn(inode, cred, stateid);
+		err = _nfs4_proc_delegreturn(inode, cred, stateid, issync);
 		switch (err) {
 			case -NFS4ERR_STALE_STATEID:
 			case -NFS4ERR_EXPIRED:

From 6f23e3872cff238589f9bf39c71db2ea880c9a26 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 25 Jan 2008 16:38:17 -0500
Subject: [PATCH 116/118] NFS: Fix a potential race between umount and
 nfs_access_cache_shrinker()

Thanks to Yawei Niu for spotting the race.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 5ca762de88bf7..476cb0f837fd1 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1694,13 +1694,19 @@ int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
 restart:
 	spin_lock(&nfs_access_lru_lock);
 	list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
+		struct rw_semaphore *s_umount;
 		struct inode *inode;
 
 		if (nr_to_scan-- == 0)
 			break;
+		s_umount = &nfsi->vfs_inode.i_sb->s_umount;
+		if (!down_read_trylock(s_umount))
+			continue;
 		inode = igrab(&nfsi->vfs_inode);
-		if (inode == NULL)
+		if (inode == NULL) {
+			up_read(s_umount);
 			continue;
+		}
 		spin_lock(&inode->i_lock);
 		if (list_empty(&nfsi->access_cache_entry_lru))
 			goto remove_lru_entry;
@@ -1719,6 +1725,7 @@ int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
 		spin_unlock(&inode->i_lock);
 		spin_unlock(&nfs_access_lru_lock);
 		iput(inode);
+		up_read(s_umount);
 		goto restart;
 	}
 	spin_unlock(&nfs_access_lru_lock);

From 57bfa89171e50cddf51a4f62c90e47c6259857b4 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 25 Jan 2008 16:38:18 -0500
Subject: [PATCH 117/118] NFSv4: Deal more correctly with duplicate delegations

If a (broken?) server hands out two different delegations for the same
file, then we should return one of them.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/delegation.c | 89 ++++++++++++++++++++++++++-------------------
 1 file changed, 51 insertions(+), 38 deletions(-)

diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 2dead8d1dd554..b9eadd18ba707 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -125,6 +125,32 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st
 	put_rpccred(oldcred);
 }
 
+static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
+{
+	int res = 0;
+
+	res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid, issync);
+	nfs_free_delegation(delegation);
+	return res;
+}
+
+static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid)
+{
+	struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
+
+	if (delegation == NULL)
+		goto nomatch;
+	if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
+				sizeof(delegation->stateid.data)) != 0)
+		goto nomatch;
+	list_del_rcu(&delegation->super_list);
+	nfsi->delegation_state = 0;
+	rcu_assign_pointer(nfsi->delegation, NULL);
+	return delegation;
+nomatch:
+	return NULL;
+}
+
 /*
  * Set up a delegation on an inode
  */
@@ -133,6 +159,7 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
+	struct nfs_delegation *freeme = NULL;
 	int status = 0;
 
 	delegation = kmalloc(sizeof(*delegation), GFP_KERNEL);
@@ -147,42 +174,45 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 	delegation->inode = inode;
 
 	spin_lock(&clp->cl_lock);
-	if (rcu_dereference(nfsi->delegation) == NULL) {
-		list_add_rcu(&delegation->super_list, &clp->cl_delegations);
-		nfsi->delegation_state = delegation->type;
-		rcu_assign_pointer(nfsi->delegation, delegation);
-		delegation = NULL;
-	} else {
+	if (rcu_dereference(nfsi->delegation) != NULL) {
 		if (memcmp(&delegation->stateid, &nfsi->delegation->stateid,
-					sizeof(delegation->stateid)) != 0 ||
-				delegation->type != nfsi->delegation->type) {
-			printk(KERN_WARNING "%s: server %s handed out "
-					"a duplicate delegation!\n",
-					__FUNCTION__, clp->cl_hostname);
-			status = -EIO;
+					sizeof(delegation->stateid)) == 0 &&
+				delegation->type == nfsi->delegation->type) {
+			goto out;
+		}
+		/*
+		 * Deal with broken servers that hand out two
+		 * delegations for the same file.
+		 */
+		dfprintk(FILE, "%s: server %s handed out "
+				"a duplicate delegation!\n",
+				__FUNCTION__, clp->cl_hostname);
+		if (delegation->type <= nfsi->delegation->type) {
+			freeme = delegation;
+			delegation = NULL;
+			goto out;
 		}
+		freeme = nfs_detach_delegation_locked(nfsi, NULL);
 	}
+	list_add_rcu(&delegation->super_list, &clp->cl_delegations);
+	nfsi->delegation_state = delegation->type;
+	rcu_assign_pointer(nfsi->delegation, delegation);
+	delegation = NULL;
 
 	/* Ensure we revalidate the attributes and page cache! */
 	spin_lock(&inode->i_lock);
 	nfsi->cache_validity |= NFS_INO_REVAL_FORCED;
 	spin_unlock(&inode->i_lock);
 
+out:
 	spin_unlock(&clp->cl_lock);
 	if (delegation != NULL)
 		nfs_free_delegation(delegation);
+	if (freeme != NULL)
+		nfs_do_return_delegation(inode, freeme, 0);
 	return status;
 }
 
-static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation, int issync)
-{
-	int res = 0;
-
-	res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid, issync);
-	nfs_free_delegation(delegation);
-	return res;
-}
-
 /* Sync all data to disk upon delegation return */
 static void nfs_msync_inode(struct inode *inode)
 {
@@ -211,23 +241,6 @@ static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegat
 	return nfs_do_return_delegation(inode, delegation, 1);
 }
 
-static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid)
-{
-	struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
-
-	if (delegation == NULL)
-		goto nomatch;
-	if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
-				sizeof(delegation->stateid.data)) != 0)
-		goto nomatch;
-	list_del_rcu(&delegation->super_list);
-	nfsi->delegation_state = 0;
-	rcu_assign_pointer(nfsi->delegation, NULL);
-	return delegation;
-nomatch:
-	return NULL;
-}
-
 /*
  * This function returns the delegation without reclaiming opens
  * or protecting against delegation reclaims.

From 3fbd67ad61f6d5a09ea717b56c50bc5c3d8042a8 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 26 Jan 2008 01:06:40 -0500
Subject: [PATCH 118/118] NFSv4: Iterate through all nfs_clients when the
 server recalls a delegation

The same delegation may have been handed out to more than one nfs_client.
Ensure that if a recall occurs, we return all instances.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/callback_proc.c | 39 ++++++++++++++++++++++-----------------
 fs/nfs/client.c        | 35 +++++++++++++++++++++++++++++++++++
 fs/nfs/internal.h      |  1 +
 3 files changed, 58 insertions(+), 17 deletions(-)

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index e89a9007c91cf..15f7785048d3e 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -75,23 +75,28 @@ __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy)
 	dprintk("NFS: RECALL callback request from %s\n",
 		rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR));
 
-	inode = nfs_delegation_find_inode(clp, &args->fh);
-	if (inode == NULL)
-		goto out_putclient;
-	/* Set up a helper thread to actually return the delegation */
-	switch(nfs_async_inode_return_delegation(inode, &args->stateid)) {
-		case 0:
-			res = 0;
-			break;
-		case -ENOENT:
-			res = htonl(NFS4ERR_BAD_STATEID);
-			break;
-		default:
-			res = htonl(NFS4ERR_RESOURCE);
-	}
-	iput(inode);
-out_putclient:
-	nfs_put_client(clp);
+	do {
+		struct nfs_client *prev = clp;
+
+		inode = nfs_delegation_find_inode(clp, &args->fh);
+		if (inode != NULL) {
+			/* Set up a helper thread to actually return the delegation */
+			switch(nfs_async_inode_return_delegation(inode, &args->stateid)) {
+				case 0:
+					res = 0;
+					break;
+				case -ENOENT:
+					if (res != 0)
+						res = htonl(NFS4ERR_BAD_STATEID);
+					break;
+				default:
+					res = htonl(NFS4ERR_RESOURCE);
+			}
+			iput(inode);
+		}
+		clp = nfs_find_client_next(prev);
+		nfs_put_client(prev);
+	} while (clp != NULL);
 out:
 	dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res));
 	return res;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 7a15832369e90..685c43f810c10 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -275,6 +275,41 @@ struct nfs_client *nfs_find_client(const struct sockaddr *addr, u32 nfsversion)
 	return NULL;
 }
 
+/*
+ * Find a client by IP address and protocol version
+ * - returns NULL if no such client
+ */
+struct nfs_client *nfs_find_client_next(struct nfs_client *clp)
+{
+	struct sockaddr *sap = (struct sockaddr *)&clp->cl_addr;
+	u32 nfsvers = clp->rpc_ops->version;
+
+	spin_lock(&nfs_client_lock);
+	list_for_each_entry_continue(clp, &nfs_client_list, cl_share_link) {
+		struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
+
+		/* Don't match clients that failed to initialise properly */
+		if (clp->cl_cons_state != NFS_CS_READY)
+			continue;
+
+		/* Different NFS versions cannot share the same nfs_client */
+		if (clp->rpc_ops->version != nfsvers)
+			continue;
+
+		if (sap->sa_family != clap->sa_family)
+			continue;
+		/* Match only the IP address, not the port number */
+		if (!nfs_sockaddr_match_ipaddr(sap, clap))
+			continue;
+
+		atomic_inc(&clp->cl_count);
+		spin_unlock(&nfs_client_lock);
+		return clp;
+	}
+	spin_unlock(&nfs_client_lock);
+	return NULL;
+}
+
 /*
  * Find an nfs_client on the list that matches the initialisation data
  * that is supplied.
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index a806211990865..0f5619611b8dc 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -64,6 +64,7 @@ extern struct rpc_program nfs_program;
 
 extern void nfs_put_client(struct nfs_client *);
 extern struct nfs_client *nfs_find_client(const struct sockaddr *, u32);
+extern struct nfs_client *nfs_find_client_next(struct nfs_client *);
 extern struct nfs_server *nfs_create_server(
 					const struct nfs_parsed_mount_data *,
 					struct nfs_fh *);