Skip to content

Commit

Permalink
readahead: clean up and simplify the code for filemap page fault read…
Browse files Browse the repository at this point in the history
…ahead

This shouldn't really change behavior all that much, but the single rather
complex function with read-ahead inside a loop etc is broken up into more
manageable pieces.

The behaviour is also less subtle, with the read-ahead being done up-front
rather than inside some subtle loop and thus avoiding the now unnecessary
extra state variables (ie "did_readaround" is gone).

Fengguang: the code split in fact fixed a bug reported by Pavel Levshin:
the PGMAJFAULT accounting used to be bypassed when MADV_RANDOM is set, in
which case the original code will directly jump to no_cached_page reading.

Cc: Pavel Levshin <lpk@581.spb.su>
Cc: <wli@movementarian.org>
Cc: Nick Piggin <npiggin@suse.de>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
Linus Torvalds committed Jun 17, 2009
1 parent 51daa88 commit ef00e08
Showing 1 changed file with 89 additions and 67 deletions.
156 changes: 89 additions & 67 deletions mm/filemap.c
Original file line number Diff line number Diff line change
Expand Up @@ -1456,6 +1456,68 @@ static int page_cache_read(struct file *file, pgoff_t offset)

#define MMAP_LOTSAMISS (100)

/*
* Synchronous readahead happens when we don't even find
* a page in the page cache at all.
*/
static void do_sync_mmap_readahead(struct vm_area_struct *vma,
struct file_ra_state *ra,
struct file *file,
pgoff_t offset)
{
unsigned long ra_pages;
struct address_space *mapping = file->f_mapping;

/* If we don't want any read-ahead, don't bother */
if (VM_RandomReadHint(vma))
return;

if (VM_SequentialReadHint(vma)) {
page_cache_sync_readahead(mapping, ra, file, offset, 1);
return;
}

if (ra->mmap_miss < INT_MAX)
ra->mmap_miss++;

/*
* Do we miss much more than hit in this file? If so,
* stop bothering with read-ahead. It will only hurt.
*/
if (ra->mmap_miss > MMAP_LOTSAMISS)
return;

ra_pages = max_sane_readahead(ra->ra_pages);
if (ra_pages) {
pgoff_t start = 0;

if (offset > ra_pages / 2)
start = offset - ra_pages / 2;
do_page_cache_readahead(mapping, file, start, ra_pages);
}
}

/*
* Asynchronous readahead happens when we find the page and PG_readahead,
* so we want to possibly extend the readahead further..
*/
static void do_async_mmap_readahead(struct vm_area_struct *vma,
struct file_ra_state *ra,
struct file *file,
struct page *page,
pgoff_t offset)
{
struct address_space *mapping = file->f_mapping;

/* If we don't want any read-ahead, don't bother */
if (VM_RandomReadHint(vma))
return;
if (ra->mmap_miss > 0)
ra->mmap_miss--;
if (PageReadahead(page))
page_cache_async_readahead(mapping, ra, file, page, offset, 1);
}

/**
* filemap_fault - read in file data for page fault handling
* @vma: vma in which the fault was taken
Expand All @@ -1475,97 +1537,63 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
struct address_space *mapping = file->f_mapping;
struct file_ra_state *ra = &file->f_ra;
struct inode *inode = mapping->host;
pgoff_t offset = vmf->pgoff;
struct page *page;
pgoff_t size;
int did_readaround = 0;
int ret = 0;

size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
if (vmf->pgoff >= size)
if (offset >= size)
return VM_FAULT_SIGBUS;

/* If we don't want any read-ahead, don't bother */
if (VM_RandomReadHint(vma))
goto no_cached_page;

/*
* Do we have something in the page cache already?
*/
retry_find:
page = find_lock_page(mapping, vmf->pgoff);
/*
* For sequential accesses, we use the generic readahead logic.
*/
if (VM_SequentialReadHint(vma)) {
if (!page) {
page_cache_sync_readahead(mapping, ra, file,
vmf->pgoff, 1);
page = find_lock_page(mapping, vmf->pgoff);
if (!page)
goto no_cached_page;
}
if (PageReadahead(page)) {
page_cache_async_readahead(mapping, ra, file, page,
vmf->pgoff, 1);
}
}

if (!page) {
unsigned long ra_pages;

ra->mmap_miss++;

page = find_get_page(mapping, offset);
if (likely(page)) {
/*
* Do we miss much more than hit in this file? If so,
* stop bothering with read-ahead. It will only hurt.
* We found the page, so try async readahead before
* waiting for the lock.
*/
if (ra->mmap_miss > MMAP_LOTSAMISS)
goto no_cached_page;
do_async_mmap_readahead(vma, ra, file, page, offset);
lock_page(page);

/*
* To keep the pgmajfault counter straight, we need to
* check did_readaround, as this is an inner loop.
*/
if (!did_readaround) {
ret = VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT);
}
did_readaround = 1;
ra_pages = max_sane_readahead(file->f_ra.ra_pages);
if (ra_pages) {
pgoff_t start = 0;

if (vmf->pgoff > ra_pages / 2)
start = vmf->pgoff - ra_pages / 2;
do_page_cache_readahead(mapping, file, start, ra_pages);
/* Did it get truncated? */
if (unlikely(page->mapping != mapping)) {
unlock_page(page);
put_page(page);
goto no_cached_page;
}
page = find_lock_page(mapping, vmf->pgoff);
} else {
/* No page in the page cache at all */
do_sync_mmap_readahead(vma, ra, file, offset);
count_vm_event(PGMAJFAULT);
ret = VM_FAULT_MAJOR;
retry_find:
page = find_lock_page(mapping, offset);
if (!page)
goto no_cached_page;
}

if (!did_readaround)
ra->mmap_miss--;

/*
* We have a locked page in the page cache, now we need to check
* that it's up-to-date. If not, it is going to be due to an error.
*/
if (unlikely(!PageUptodate(page)))
goto page_not_uptodate;

/* Must recheck i_size under page lock */
/*
* Found the page and have a reference on it.
* We must recheck i_size under page lock.
*/
size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
if (unlikely(vmf->pgoff >= size)) {
if (unlikely(offset >= size)) {
unlock_page(page);
page_cache_release(page);
return VM_FAULT_SIGBUS;
}

/*
* Found the page and have a reference on it.
*/
ra->prev_pos = (loff_t)page->index << PAGE_CACHE_SHIFT;
ra->prev_pos = (loff_t)offset << PAGE_CACHE_SHIFT;
vmf->page = page;
return ret | VM_FAULT_LOCKED;

Expand All @@ -1574,7 +1602,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
* We're only likely to ever get here if MADV_RANDOM is in
* effect.
*/
error = page_cache_read(file, vmf->pgoff);
error = page_cache_read(file, offset);

/*
* The page we want has now been added to the page cache.
Expand All @@ -1594,12 +1622,6 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
return VM_FAULT_SIGBUS;

page_not_uptodate:
/* IO error path */
if (!did_readaround) {
ret = VM_FAULT_MAJOR;
count_vm_event(PGMAJFAULT);
}

/*
* Umm, take care of errors if the page isn't up-to-date.
* Try to re-read it _once_. We do this synchronously,
Expand Down

0 comments on commit ef00e08

Please sign in to comment.