Skip to content

Commit

Permalink
mm: allow page fault handlers to perform the COW
Browse files Browse the repository at this point in the history
Currently COW of an XIP file is done by first bringing in a read-only
mapping, then retrying the fault and copying the page.  It is much more
efficient to tell the fault handler that a COW is being attempted (by
passing in the pre-allocated page in the vm_fault structure), and allow
the handler to perform the COW operation itself.

The handler cannot insert the page itself if there is already a read-only
mapping at that address, so allow the handler to return VM_FAULT_LOCKED
and set the fault_page to be NULL.  This indicates to the MM code that the
i_mmap_lock is held instead of the page lock.

Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andreas Dilger <andreas.dilger@intel.com>
Cc: Boaz Harrosh <boaz@plexistor.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
  • Loading branch information
Matthew Wilcox authored and Linus Torvalds committed Feb 17, 2015
1 parent 283307c commit 2e4cdab
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 9 deletions.
1 change: 1 addition & 0 deletions include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ struct vm_fault {
pgoff_t pgoff; /* Logical page offset based on vma */
void __user *virtual_address; /* Faulting virtual address */

struct page *cow_page; /* Handler may choose to COW */
struct page *page; /* ->fault handlers should return a
* page here, unless VM_FAULT_NOPAGE
* is set (which is also implied by
Expand Down
41 changes: 32 additions & 9 deletions mm/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -1965,6 +1965,7 @@ static int do_page_mkwrite(struct vm_area_struct *vma, struct page *page,
vmf.pgoff = page->index;
vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;
vmf.page = page;
vmf.cow_page = NULL;

ret = vma->vm_ops->page_mkwrite(vma, &vmf);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)))
Expand Down Expand Up @@ -2639,7 +2640,8 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
* See filemap_fault() and __lock_page_retry().
*/
static int __do_fault(struct vm_area_struct *vma, unsigned long address,
pgoff_t pgoff, unsigned int flags, struct page **page)
pgoff_t pgoff, unsigned int flags,
struct page *cow_page, struct page **page)
{
struct vm_fault vmf;
int ret;
Expand All @@ -2648,10 +2650,13 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
vmf.pgoff = pgoff;
vmf.flags = flags;
vmf.page = NULL;
vmf.cow_page = cow_page;

ret = vma->vm_ops->fault(vma, &vmf);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;
if (!vmf.page)
goto out;

if (unlikely(PageHWPoison(vmf.page))) {
if (ret & VM_FAULT_LOCKED)
Expand All @@ -2665,6 +2670,7 @@ static int __do_fault(struct vm_area_struct *vma, unsigned long address,
else
VM_BUG_ON_PAGE(!PageLocked(vmf.page), vmf.page);

out:
*page = vmf.page;
return ret;
}
Expand Down Expand Up @@ -2835,7 +2841,7 @@ static int do_read_fault(struct mm_struct *mm, struct vm_area_struct *vma,
pte_unmap_unlock(pte, ptl);
}

ret = __do_fault(vma, address, pgoff, flags, &fault_page);
ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;

Expand Down Expand Up @@ -2875,26 +2881,43 @@ static int do_cow_fault(struct mm_struct *mm, struct vm_area_struct *vma,
return VM_FAULT_OOM;
}

ret = __do_fault(vma, address, pgoff, flags, &fault_page);
ret = __do_fault(vma, address, pgoff, flags, new_page, &fault_page);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
goto uncharge_out;

copy_user_highpage(new_page, fault_page, address, vma);
if (fault_page)
copy_user_highpage(new_page, fault_page, address, vma);
__SetPageUptodate(new_page);

pte = pte_offset_map_lock(mm, pmd, address, &ptl);
if (unlikely(!pte_same(*pte, orig_pte))) {
pte_unmap_unlock(pte, ptl);
unlock_page(fault_page);
page_cache_release(fault_page);
if (fault_page) {
unlock_page(fault_page);
page_cache_release(fault_page);
} else {
/*
* The fault handler has no page to lock, so it holds
* i_mmap_lock for read to protect against truncate.
*/
i_mmap_unlock_read(vma->vm_file->f_mapping);
}
goto uncharge_out;
}
do_set_pte(vma, address, new_page, pte, true, true);
mem_cgroup_commit_charge(new_page, memcg, false);
lru_cache_add_active_or_unevictable(new_page, vma);
pte_unmap_unlock(pte, ptl);
unlock_page(fault_page);
page_cache_release(fault_page);
if (fault_page) {
unlock_page(fault_page);
page_cache_release(fault_page);
} else {
/*
* The fault handler has no page to lock, so it holds
* i_mmap_lock for read to protect against truncate.
*/
i_mmap_unlock_read(vma->vm_file->f_mapping);
}
return ret;
uncharge_out:
mem_cgroup_cancel_charge(new_page, memcg);
Expand All @@ -2913,7 +2936,7 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma,
int dirtied = 0;
int ret, tmp;

ret = __do_fault(vma, address, pgoff, flags, &fault_page);
ret = __do_fault(vma, address, pgoff, flags, NULL, &fault_page);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)))
return ret;

Expand Down

0 comments on commit 2e4cdab

Please sign in to comment.