Skip to content

Commit

Permalink
tmpfs: undo fallocation on failure
Browse files Browse the repository at this point in the history
In the previous episode, we left the already-fallocated pages attached to
the file when shmem_fallocate() fails part way through.

Now try to do better, by extending the earlier optimization of !Uptodate
pages (then always under page lock) to !Uptodate pages (outside of page
lock), representing fallocated pages.  And don't waste time clearing them
at the time of fallocate(), leave that until later if necessary.

Adapt shmem_truncate_range() to shmem_undo_range(), so that a failing
fallocate can recognize and remove precisely those !Uptodate allocations
which it added (and were not independently allocated by racing tasks).

But unless we start playing with swapfile.c and memcontrol.c too, once one
of our fallocated pages reaches shmem_writepage(), we do then have to
instantiate it as an ordinarily allocated page, before swapping out.  This
is unsatisfactory, but improved in the next episode.

Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Cong Wang <amwang@redhat.com>
Cc: Kay Sievers <kay@vrfy.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
Hugh Dickins authored and Linus Torvalds committed May 29, 2012
1 parent e2d12e2 commit 1635f6a
Showing 1 changed file with 72 additions and 33 deletions.
105 changes: 72 additions & 33 deletions mm/shmem.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ enum sgp_type {
SGP_READ, /* don't exceed i_size, don't allocate page */
SGP_CACHE, /* don't exceed i_size, may allocate page */
SGP_DIRTY, /* like SGP_CACHE, but set new page dirty */
SGP_WRITE, /* may exceed i_size, may allocate page */
SGP_WRITE, /* may exceed i_size, may allocate !Uptodate page */
SGP_FALLOC, /* like SGP_WRITE, but make existing page Uptodate */
};

#ifdef CONFIG_TMPFS
Expand Down Expand Up @@ -427,8 +428,10 @@ void shmem_unlock_mapping(struct address_space *mapping)

/*
* Remove range of pages and swap entries from radix tree, and free them.
* If !unfalloc, truncate or punch hole; if unfalloc, undo failed fallocate.
*/
void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
bool unfalloc)
{
struct address_space *mapping = inode->i_mapping;
struct shmem_inode_info *info = SHMEM_I(inode);
Expand Down Expand Up @@ -462,16 +465,20 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
break;

if (radix_tree_exceptional_entry(page)) {
if (unfalloc)
continue;
nr_swaps_freed += !shmem_free_swap(mapping,
index, page);
continue;
}

if (!trylock_page(page))
continue;
if (page->mapping == mapping) {
VM_BUG_ON(PageWriteback(page));
truncate_inode_page(mapping, page);
if (!unfalloc || !PageUptodate(page)) {
if (page->mapping == mapping) {
VM_BUG_ON(PageWriteback(page));
truncate_inode_page(mapping, page);
}
}
unlock_page(page);
}
Expand Down Expand Up @@ -517,12 +524,12 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
min(end - index, (pgoff_t)PAGEVEC_SIZE),
pvec.pages, indices);
if (!pvec.nr) {
if (index == start)
if (index == start || unfalloc)
break;
index = start;
continue;
}
if (index == start && indices[0] >= end) {
if ((index == start || unfalloc) && indices[0] >= end) {
shmem_deswap_pagevec(&pvec);
pagevec_release(&pvec);
break;
Expand All @@ -536,15 +543,19 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
break;

if (radix_tree_exceptional_entry(page)) {
if (unfalloc)
continue;
nr_swaps_freed += !shmem_free_swap(mapping,
index, page);
continue;
}

lock_page(page);
if (page->mapping == mapping) {
VM_BUG_ON(PageWriteback(page));
truncate_inode_page(mapping, page);
if (!unfalloc || !PageUptodate(page)) {
if (page->mapping == mapping) {
VM_BUG_ON(PageWriteback(page));
truncate_inode_page(mapping, page);
}
}
unlock_page(page);
}
Expand All @@ -558,7 +569,11 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
info->swapped -= nr_swaps_freed;
shmem_recalc_inode(inode);
spin_unlock(&info->lock);
}

void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
{
shmem_undo_range(inode, lstart, lend, false);
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
}
EXPORT_SYMBOL_GPL(shmem_truncate_range);
Expand Down Expand Up @@ -771,6 +786,18 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
WARN_ON_ONCE(1); /* Still happens? Tell us about it! */
goto redirty;
}

/*
* This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC
* value into swapfile.c, the only way we can correctly account for a
* fallocated page arriving here is now to initialize it and write it.
*/
if (!PageUptodate(page)) {
clear_highpage(page);
flush_dcache_page(page);
SetPageUptodate(page);
}

swap = get_swap_page();
if (!swap.val)
goto redirty;
Expand Down Expand Up @@ -994,6 +1021,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
swp_entry_t swap;
int error;
int once = 0;
int alloced = 0;

if (index > (MAX_LFS_FILESIZE >> PAGE_CACHE_SHIFT))
return -EFBIG;
Expand All @@ -1005,19 +1033,21 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
page = NULL;
}

if (sgp != SGP_WRITE &&
if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
error = -EINVAL;
goto failed;
}

/* fallocated page? */
if (page && !PageUptodate(page)) {
if (sgp != SGP_READ)
goto clear;
unlock_page(page);
page_cache_release(page);
page = NULL;
}
if (page || (sgp == SGP_READ && !swap.val)) {
/*
* Once we can get the page lock, it must be uptodate:
* if there were an error in reading back from swap,
* the page would not be inserted into the filecache.
*/
BUG_ON(page && !PageUptodate(page));
*pagep = page;
return 0;
}
Expand Down Expand Up @@ -1114,9 +1144,18 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
inode->i_blocks += BLOCKS_PER_PAGE;
shmem_recalc_inode(inode);
spin_unlock(&info->lock);
alloced = true;

/*
* Let SGP_WRITE caller clear ends if write does not fill page
* Let SGP_FALLOC use the SGP_WRITE optimization on a new page.
*/
if (sgp == SGP_FALLOC)
sgp = SGP_WRITE;
clear:
/*
* Let SGP_WRITE caller clear ends if write does not fill page;
* but SGP_FALLOC on a page fallocated earlier must initialize
* it now, lest undo on failure cancel our earlier guarantee.
*/
if (sgp != SGP_WRITE) {
clear_highpage(page);
Expand All @@ -1128,10 +1167,13 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
}

/* Perhaps the file has been truncated since we checked */
if (sgp != SGP_WRITE &&
if (sgp != SGP_WRITE && sgp != SGP_FALLOC &&
((loff_t)index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
error = -EINVAL;
goto trunc;
if (alloced)
goto trunc;
else
goto failed;
}
*pagep = page;
return 0;
Expand All @@ -1140,13 +1182,15 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
* Error recovery.
*/
trunc:
info = SHMEM_I(inode);
ClearPageDirty(page);
delete_from_page_cache(page);
spin_lock(&info->lock);
info->alloced--;
inode->i_blocks -= BLOCKS_PER_PAGE;
spin_unlock(&info->lock);
decused:
sbinfo = SHMEM_SB(inode->i_sb);
if (sbinfo->max_blocks)
percpu_counter_add(&sbinfo->used_blocks, -1);
unacct:
Expand Down Expand Up @@ -1645,25 +1689,20 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
if (signal_pending(current))
error = -EINTR;
else
error = shmem_getpage(inode, index, &page, SGP_WRITE,
error = shmem_getpage(inode, index, &page, SGP_FALLOC,
NULL);
if (error) {
/*
* We really ought to free what we allocated so far,
* but it would be wrong to free pages allocated
* earlier, or already now in use: i_mutex does not
* exclude all cases. We do not know what to free.
*/
/* Remove the !PageUptodate pages we added */
shmem_undo_range(inode,
(loff_t)start << PAGE_CACHE_SHIFT,
(loff_t)index << PAGE_CACHE_SHIFT, true);
goto ctime;
}

if (!PageUptodate(page)) {
clear_highpage(page);
flush_dcache_page(page);
SetPageUptodate(page);
}
/*
* set_page_dirty so that memory pressure will swap rather
* If !PageUptodate, leave it that way so that freeable pages
* can be recognized if we need to rollback on error later.
* But set_page_dirty so that memory pressure will swap rather
* than free the pages we are allocating (and SGP_CACHE pages
* might still be clean: we now need to mark those dirty too).
*/
Expand Down

0 comments on commit 1635f6a

Please sign in to comment.