Skip to content

Commit

Permalink
fs: sync_sb_inodes fix
Browse files Browse the repository at this point in the history
Fix data integrity semantics required by sys_sync, by iterating over all
inodes and waiting for any writeback pages after the initial writeout.
Comments explain the exact problem.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
Nick Piggin authored and Linus Torvalds committed Jan 6, 2009
1 parent 4f5a99d commit 38f2197
Showing 1 changed file with 53 additions and 7 deletions.
60 changes: 53 additions & 7 deletions fs/fs-writeback.c
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,7 @@ void generic_sync_sb_inodes(struct super_block *sb,
struct writeback_control *wbc)
{
const unsigned long start = jiffies; /* livelock avoidance */
int sync = wbc->sync_mode == WB_SYNC_ALL;

spin_lock(&inode_lock);
if (!wbc->for_kupdate || list_empty(&sb->s_io))
Expand Down Expand Up @@ -516,7 +517,49 @@ void generic_sync_sb_inodes(struct super_block *sb,
if (!list_empty(&sb->s_more_io))
wbc->more_io = 1;
}
spin_unlock(&inode_lock);

if (sync) {
struct inode *inode, *old_inode = NULL;

/*
* Data integrity sync. Must wait for all pages under writeback,
* because there may have been pages dirtied before our sync
* call, but which had writeout started before we write it out.
* In which case, the inode may not be on the dirty list, but
* we still have to wait for that writeout.
*/
list_for_each_entry(inode, &sb->s_inodes, i_sb_list) {
struct address_space *mapping;

if (inode->i_state & (I_FREEING|I_WILL_FREE))
continue;
mapping = inode->i_mapping;
if (mapping->nrpages == 0)
continue;
__iget(inode);
spin_unlock(&inode_lock);
/*
* We hold a reference to 'inode' so it couldn't have
* been removed from s_inodes list while we dropped the
* inode_lock. We cannot iput the inode now as we can
* be holding the last reference and we cannot iput it
* under inode_lock. So we keep the reference and iput
* it later.
*/
iput(old_inode);
old_inode = inode;

filemap_fdatawait(mapping);

cond_resched();

spin_lock(&inode_lock);
}
spin_unlock(&inode_lock);
iput(old_inode);
} else
spin_unlock(&inode_lock);

return; /* Leave any unwritten inodes on s_io */
}
EXPORT_SYMBOL_GPL(generic_sync_sb_inodes);
Expand Down Expand Up @@ -596,13 +639,16 @@ void sync_inodes_sb(struct super_block *sb, int wait)
.range_start = 0,
.range_end = LLONG_MAX,
};
unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);

wbc.nr_to_write = nr_dirty + nr_unstable +
(inodes_stat.nr_inodes - inodes_stat.nr_unused) +
nr_dirty + nr_unstable;
wbc.nr_to_write += wbc.nr_to_write / 2; /* Bit more for luck */
if (!wait) {
unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);

wbc.nr_to_write = nr_dirty + nr_unstable +
(inodes_stat.nr_inodes - inodes_stat.nr_unused);
} else
wbc.nr_to_write = LONG_MAX; /* doesn't actually matter */

sync_sb_inodes(sb, &wbc);
}

Expand Down

0 comments on commit 38f2197

Please sign in to comment.