Skip to content

Commit

Permalink
NFS: Fix a race in nfs_sync_inode()
Browse files Browse the repository at this point in the history
Kudos to Neil Brown for spotting the problem:

"in nfs_sync_inode, there is effectively the sequence:

   nfs_wait_on_requests
   nfs_flush_inode
   nfs_commit_inode

 This seems a bit racy to me as if the only requests are on the
 ->commit list, and nfs_commit_inode is called separately after
 nfs_wait_on_requests completes, and before nfs_commit_inode start
 (say: by nfs_write_inode) then none of these function will return
 >0, yet there will be some pending request that aren't waited for."

The solution is to search for requests to wait upon, search for dirty
requests, and search for uncommitted requests while holding the
nfsi->req_lock

The patch also cleans up nfs_sync_inode(), getting rid of the redundant
FLUSH_WAIT flag. It turns out that we were always setting it.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
  • Loading branch information
Trond Myklebust authored and Trond Myklebust committed Mar 20, 2006
1 parent 7d46a49 commit c42de9d
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 30 deletions.
4 changes: 2 additions & 2 deletions fs/nfs/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr)
static int
nfs_write_inode(struct inode *inode, int sync)
{
int flags = sync ? FLUSH_WAIT : 0;
int flags = sync ? FLUSH_SYNC : 0;
int ret;

ret = nfs_commit_inode(inode, flags);
Expand Down Expand Up @@ -1051,7 +1051,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
int err;

/* Flush out writes to the server in order to update c/mtime */
nfs_sync_inode(inode, 0, 0, FLUSH_WAIT|FLUSH_NOCOMMIT);
nfs_sync_inode_wait(inode, 0, 0, FLUSH_NOCOMMIT);

/*
* We may force a getattr if the user cares about atime.
Expand Down
72 changes: 50 additions & 22 deletions fs/nfs/write.c
Original file line number Diff line number Diff line change
Expand Up @@ -539,8 +539,7 @@ nfs_mark_request_commit(struct nfs_page *req)
*
* Interruptible by signals only if mounted with intr flag.
*/
static int
nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
static int nfs_wait_on_requests_locked(struct inode *inode, unsigned long idx_start, unsigned int npages)
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_page *req;
Expand All @@ -553,7 +552,6 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int
else
idx_end = idx_start + npages - 1;

spin_lock(&nfsi->req_lock);
next = idx_start;
while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_WRITEBACK)) {
if (req->wb_index > idx_end)
Expand All @@ -566,15 +564,25 @@ nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int
spin_unlock(&nfsi->req_lock);
error = nfs_wait_on_request(req);
nfs_release_request(req);
spin_lock(&nfsi->req_lock);
if (error < 0)
return error;
spin_lock(&nfsi->req_lock);
res++;
}
spin_unlock(&nfsi->req_lock);
return res;
}

static int nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
{
struct nfs_inode *nfsi = NFS_I(inode);
int ret;

spin_lock(&nfsi->req_lock);
ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
spin_unlock(&nfsi->req_lock);
return ret;
}

/*
* nfs_scan_dirty - Scan an inode for dirty requests
* @inode: NFS inode to scan
Expand Down Expand Up @@ -626,6 +634,11 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_st
}
return res;
}
#else
static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
{
return 0;
}
#endif

static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
Expand Down Expand Up @@ -1421,6 +1434,11 @@ static const struct rpc_call_ops nfs_commit_ops = {
.rpc_call_done = nfs_commit_done,
.rpc_release = nfs_commit_release,
};
#else
static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how)
{
return 0;
}
#endif

static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
Expand Down Expand Up @@ -1460,28 +1478,38 @@ int nfs_commit_inode(struct inode *inode, int how)
}
#endif

int nfs_sync_inode(struct inode *inode, unsigned long idx_start,
unsigned int npages, int how)
int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
unsigned int npages, int how)
{
struct nfs_inode *nfsi = NFS_I(inode);
LIST_HEAD(head);
int nocommit = how & FLUSH_NOCOMMIT;
int wait = how & FLUSH_WAIT;
int error;

how &= ~(FLUSH_WAIT|FLUSH_NOCOMMIT);
int pages, ret;

how &= ~FLUSH_NOCOMMIT;
spin_lock(&nfsi->req_lock);
do {
if (wait) {
error = nfs_wait_on_requests(inode, idx_start, npages);
if (error != 0)
continue;
}
error = nfs_flush_inode(inode, idx_start, npages, how);
if (error != 0)
ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
if (ret != 0)
continue;
if (!nocommit)
error = nfs_commit_inode(inode, how);
} while (error > 0);
return error;
pages = nfs_scan_dirty(inode, &head, idx_start, npages);
if (pages != 0) {
spin_unlock(&nfsi->req_lock);
ret = nfs_flush_list(inode, &head, pages, how);
spin_lock(&nfsi->req_lock);
continue;
}
if (nocommit)
break;
pages = nfs_scan_commit(inode, &head, 0, 0);
if (pages == 0)
break;
spin_unlock(&nfsi->req_lock);
ret = nfs_commit_list(inode, &head, how);
spin_lock(&nfsi->req_lock);
} while (ret >= 0);
spin_unlock(&nfsi->req_lock);
return ret;
}

int nfs_init_writepagecache(void)
Expand Down
10 changes: 4 additions & 6 deletions include/linux/nfs_fs.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,7 @@
* When flushing a cluster of dirty pages, there can be different
* strategies:
*/
#define FLUSH_AGING 0 /* only flush old buffers */
#define FLUSH_SYNC 1 /* file being synced, or contention */
#define FLUSH_WAIT 2 /* wait for completion */
#define FLUSH_STABLE 4 /* commit to stable storage */
#define FLUSH_LOWPRI 8 /* low priority background flush */
#define FLUSH_HIGHPRI 16 /* high priority memory reclaim flush */
Expand Down Expand Up @@ -419,7 +417,7 @@ void nfs_commit_free(struct nfs_write_data *p);
* Try to write back everything synchronously (but check the
* return value!)
*/
extern int nfs_sync_inode(struct inode *, unsigned long, unsigned int, int);
extern int nfs_sync_inode_wait(struct inode *, unsigned long, unsigned int, int);
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
extern int nfs_commit_inode(struct inode *, int);
extern void nfs_commit_release(void *wdata);
Expand All @@ -440,7 +438,7 @@ nfs_have_writebacks(struct inode *inode)
static inline int
nfs_wb_all(struct inode *inode)
{
int error = nfs_sync_inode(inode, 0, 0, FLUSH_WAIT);
int error = nfs_sync_inode_wait(inode, 0, 0, 0);
return (error < 0) ? error : 0;
}

Expand All @@ -449,8 +447,8 @@ nfs_wb_all(struct inode *inode)
*/
static inline int nfs_wb_page_priority(struct inode *inode, struct page* page, int how)
{
int error = nfs_sync_inode(inode, page->index, 1,
how | FLUSH_WAIT | FLUSH_STABLE);
int error = nfs_sync_inode_wait(inode, page->index, 1,
how | FLUSH_STABLE);
return (error < 0) ? error : 0;
}

Expand Down

0 comments on commit c42de9d

Please sign in to comment.