Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 106929
b: refs/heads/master
c: cddb8a5
h: refs/heads/master
i:
  106927: aee26dd
v: v3
  • Loading branch information
Andrea Arcangeli authored and Linus Torvalds committed Jul 28, 2008
1 parent 6c1877b commit 2042ab6
Show file tree
Hide file tree
Showing 16 changed files with 624 additions and 14 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 7906d00cd1f687268f0a3599442d113767795ae6
refs/heads/master: cddb8a5c14aa89810b40495d94d3d2a0faee6619
1 change: 1 addition & 0 deletions trunk/arch/x86/kvm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
depends on HAVE_KVM
select PREEMPT_NOTIFIERS
select MMU_NOTIFIER
select ANON_INODES
---help---
Support hosting fully virtualized guest machines using hardware
Expand Down
4 changes: 4 additions & 0 deletions trunk/include/linux/mm_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <linux/rbtree.h>
#include <linux/rwsem.h>
#include <linux/completion.h>
#include <linux/cpumask.h>
#include <asm/page.h>
#include <asm/mmu.h>

Expand Down Expand Up @@ -253,6 +254,9 @@ struct mm_struct {
struct file *exe_file;
unsigned long num_exe_file_vmas;
#endif
#ifdef CONFIG_MMU_NOTIFIER
struct mmu_notifier_mm *mmu_notifier_mm;
#endif
};

#endif /* _LINUX_MM_TYPES_H */
279 changes: 279 additions & 0 deletions trunk/include/linux/mmu_notifier.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,279 @@
#ifndef _LINUX_MMU_NOTIFIER_H
#define _LINUX_MMU_NOTIFIER_H

#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/mm_types.h>

struct mmu_notifier;
struct mmu_notifier_ops;

#ifdef CONFIG_MMU_NOTIFIER

/*
* The mmu notifier_mm structure is allocated and installed in
* mm->mmu_notifier_mm inside the mm_take_all_locks() protected
* critical section and it's released only when mm_count reaches zero
* in mmdrop().
*/
struct mmu_notifier_mm {
/* all mmu notifiers registerd in this mm are queued in this list */
struct hlist_head list;
/* to serialize the list modifications and hlist_unhashed */
spinlock_t lock;
};

struct mmu_notifier_ops {
/*
* Called either by mmu_notifier_unregister or when the mm is
* being destroyed by exit_mmap, always before all pages are
* freed. This can run concurrently with other mmu notifier
* methods (the ones invoked outside the mm context) and it
* should tear down all secondary mmu mappings and freeze the
* secondary mmu. If this method isn't implemented you've to
* be sure that nothing could possibly write to the pages
* through the secondary mmu by the time the last thread with
* tsk->mm == mm exits.
*
* As side note: the pages freed after ->release returns could
* be immediately reallocated by the gart at an alias physical
* address with a different cache model, so if ->release isn't
* implemented because all _software_ driven memory accesses
* through the secondary mmu are terminated by the time the
* last thread of this mm quits, you've also to be sure that
* speculative _hardware_ operations can't allocate dirty
* cachelines in the cpu that could not be snooped and made
* coherent with the other read and write operations happening
* through the gart alias address, so leading to memory
* corruption.
*/
void (*release)(struct mmu_notifier *mn,
struct mm_struct *mm);

/*
* clear_flush_young is called after the VM is
* test-and-clearing the young/accessed bitflag in the
* pte. This way the VM will provide proper aging to the
* accesses to the page through the secondary MMUs and not
* only to the ones through the Linux pte.
*/
int (*clear_flush_young)(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long address);

/*
* Before this is invoked any secondary MMU is still ok to
* read/write to the page previously pointed to by the Linux
* pte because the page hasn't been freed yet and it won't be
* freed until this returns. If required set_page_dirty has to
* be called internally to this method.
*/
void (*invalidate_page)(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long address);

/*
* invalidate_range_start() and invalidate_range_end() must be
* paired and are called only when the mmap_sem and/or the
* locks protecting the reverse maps are held. The subsystem
* must guarantee that no additional references are taken to
* the pages in the range established between the call to
* invalidate_range_start() and the matching call to
* invalidate_range_end().
*
* Invalidation of multiple concurrent ranges may be
* optionally permitted by the driver. Either way the
* establishment of sptes is forbidden in the range passed to
* invalidate_range_begin/end for the whole duration of the
* invalidate_range_begin/end critical section.
*
* invalidate_range_start() is called when all pages in the
* range are still mapped and have at least a refcount of one.
*
* invalidate_range_end() is called when all pages in the
* range have been unmapped and the pages have been freed by
* the VM.
*
* The VM will remove the page table entries and potentially
* the page between invalidate_range_start() and
* invalidate_range_end(). If the page must not be freed
* because of pending I/O or other circumstances then the
* invalidate_range_start() callback (or the initial mapping
* by the driver) must make sure that the refcount is kept
* elevated.
*
* If the driver increases the refcount when the pages are
* initially mapped into an address space then either
* invalidate_range_start() or invalidate_range_end() may
* decrease the refcount. If the refcount is decreased on
* invalidate_range_start() then the VM can free pages as page
* table entries are removed. If the refcount is only
* droppped on invalidate_range_end() then the driver itself
* will drop the last refcount but it must take care to flush
* any secondary tlb before doing the final free on the
* page. Pages will no longer be referenced by the linux
* address space but may still be referenced by sptes until
* the last refcount is dropped.
*/
void (*invalidate_range_start)(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start, unsigned long end);
void (*invalidate_range_end)(struct mmu_notifier *mn,
struct mm_struct *mm,
unsigned long start, unsigned long end);
};

/*
* The notifier chains are protected by mmap_sem and/or the reverse map
* semaphores. Notifier chains are only changed when all reverse maps and
* the mmap_sem locks are taken.
*
* Therefore notifier chains can only be traversed when either
*
* 1. mmap_sem is held.
* 2. One of the reverse map locks is held (i_mmap_lock or anon_vma->lock).
* 3. No other concurrent thread can access the list (release)
*/
struct mmu_notifier {
struct hlist_node hlist;
const struct mmu_notifier_ops *ops;
};

static inline int mm_has_notifiers(struct mm_struct *mm)
{
return unlikely(mm->mmu_notifier_mm);
}

extern int mmu_notifier_register(struct mmu_notifier *mn,
struct mm_struct *mm);
extern int __mmu_notifier_register(struct mmu_notifier *mn,
struct mm_struct *mm);
extern void mmu_notifier_unregister(struct mmu_notifier *mn,
struct mm_struct *mm);
extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
extern void __mmu_notifier_release(struct mm_struct *mm);
extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
unsigned long address);
extern void __mmu_notifier_invalidate_page(struct mm_struct *mm,
unsigned long address);
extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
unsigned long start, unsigned long end);
extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
unsigned long start, unsigned long end);

static inline void mmu_notifier_release(struct mm_struct *mm)
{
if (mm_has_notifiers(mm))
__mmu_notifier_release(mm);
}

static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
unsigned long address)
{
if (mm_has_notifiers(mm))
return __mmu_notifier_clear_flush_young(mm, address);
return 0;
}

static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
unsigned long address)
{
if (mm_has_notifiers(mm))
__mmu_notifier_invalidate_page(mm, address);
}

static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
if (mm_has_notifiers(mm))
__mmu_notifier_invalidate_range_start(mm, start, end);
}

static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
if (mm_has_notifiers(mm))
__mmu_notifier_invalidate_range_end(mm, start, end);
}

static inline void mmu_notifier_mm_init(struct mm_struct *mm)
{
mm->mmu_notifier_mm = NULL;
}

static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
{
if (mm_has_notifiers(mm))
__mmu_notifier_mm_destroy(mm);
}

/*
* These two macros will sometime replace ptep_clear_flush.
* ptep_clear_flush is impleemnted as macro itself, so this also is
* implemented as a macro until ptep_clear_flush will converted to an
* inline function, to diminish the risk of compilation failure. The
* invalidate_page method over time can be moved outside the PT lock
* and these two macros can be later removed.
*/
#define ptep_clear_flush_notify(__vma, __address, __ptep) \
({ \
pte_t __pte; \
struct vm_area_struct *___vma = __vma; \
unsigned long ___address = __address; \
__pte = ptep_clear_flush(___vma, ___address, __ptep); \
mmu_notifier_invalidate_page(___vma->vm_mm, ___address); \
__pte; \
})

#define ptep_clear_flush_young_notify(__vma, __address, __ptep) \
({ \
int __young; \
struct vm_area_struct *___vma = __vma; \
unsigned long ___address = __address; \
__young = ptep_clear_flush_young(___vma, ___address, __ptep); \
__young |= mmu_notifier_clear_flush_young(___vma->vm_mm, \
___address); \
__young; \
})

#else /* CONFIG_MMU_NOTIFIER */

static inline void mmu_notifier_release(struct mm_struct *mm)
{
}

static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
unsigned long address)
{
return 0;
}

static inline void mmu_notifier_invalidate_page(struct mm_struct *mm,
unsigned long address)
{
}

static inline void mmu_notifier_invalidate_range_start(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
}

static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
unsigned long start, unsigned long end)
{
}

static inline void mmu_notifier_mm_init(struct mm_struct *mm)
{
}

static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
{
}

#define ptep_clear_flush_young_notify ptep_clear_flush_young
#define ptep_clear_flush_notify ptep_clear_flush

#endif /* CONFIG_MMU_NOTIFIER */

#endif /* _LINUX_MMU_NOTIFIER_H */
3 changes: 3 additions & 0 deletions trunk/kernel/fork.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <linux/key.h>
#include <linux/binfmts.h>
#include <linux/mman.h>
#include <linux/mmu_notifier.h>
#include <linux/fs.h>
#include <linux/nsproxy.h>
#include <linux/capability.h>
Expand Down Expand Up @@ -414,6 +415,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)

if (likely(!mm_alloc_pgd(mm))) {
mm->def_flags = 0;
mmu_notifier_mm_init(mm);
return mm;
}

Expand Down Expand Up @@ -446,6 +448,7 @@ void __mmdrop(struct mm_struct *mm)
BUG_ON(mm == &init_mm);
mm_free_pgd(mm);
destroy_context(mm);
mmu_notifier_mm_destroy(mm);
free_mm(mm);
}
EXPORT_SYMBOL_GPL(__mmdrop);
Expand Down
3 changes: 3 additions & 0 deletions trunk/mm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -208,3 +208,6 @@ config NR_QUICK
config VIRT_TO_BUS
def_bool y
depends on !ARCH_NO_VIRT_TO_BUS

config MMU_NOTIFIER
bool
1 change: 1 addition & 0 deletions trunk/mm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ obj-$(CONFIG_SHMEM) += shmem.o
obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o
obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o
obj-$(CONFIG_SLOB) += slob.o
obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
obj-$(CONFIG_SLAB) += slab.o
obj-$(CONFIG_SLUB) += slub.o
obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
Expand Down
3 changes: 2 additions & 1 deletion trunk/mm/filemap_xip.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/uio.h>
#include <linux/rmap.h>
#include <linux/mmu_notifier.h>
#include <linux/sched.h>
#include <asm/tlbflush.h>
#include <asm/io.h>
Expand Down Expand Up @@ -188,7 +189,7 @@ __xip_unmap (struct address_space * mapping,
if (pte) {
/* Nuke the page table entry. */
flush_cache_page(vma, address, pte_pfn(*pte));
pteval = ptep_clear_flush(vma, address, pte);
pteval = ptep_clear_flush_notify(vma, address, pte);
page_remove_rmap(page, vma);
dec_mm_counter(mm, file_rss);
BUG_ON(pte_dirty(pteval));
Expand Down
3 changes: 3 additions & 0 deletions trunk/mm/fremap.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <linux/rmap.h>
#include <linux/module.h>
#include <linux/syscalls.h>
#include <linux/mmu_notifier.h>

#include <asm/mmu_context.h>
#include <asm/cacheflush.h>
Expand Down Expand Up @@ -214,7 +215,9 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
spin_unlock(&mapping->i_mmap_lock);
}

mmu_notifier_invalidate_range_start(mm, start, start + size);
err = populate_range(mm, vma, start, size, pgoff);
mmu_notifier_invalidate_range_end(mm, start, start + size);
if (!err && !(flags & MAP_NONBLOCK)) {
if (unlikely(has_write_lock)) {
downgrade_write(&mm->mmap_sem);
Expand Down
Loading

0 comments on commit 2042ab6

Please sign in to comment.