Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 344793
b: refs/heads/master
c: b24f53a
h: refs/heads/master
i:
  344791: 9e71691
v: v3
  • Loading branch information
Lee Schermerhorn authored and Mel Gorman committed Dec 11, 2012
1 parent 8a7be32 commit b4b99af
Show file tree
Hide file tree
Showing 4 changed files with 186 additions and 19 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 4daae3b4b9e49b7e0935499a352f1c59d90287d2
refs/heads/master: b24f53a0bea38b266d219ee651b22dba727c44ae
5 changes: 5 additions & 0 deletions trunk/include/linux/mm.h
Original file line number Diff line number Diff line change
Expand Up @@ -1551,6 +1551,11 @@ static inline pgprot_t vm_get_page_prot(unsigned long vm_flags)
}
#endif

#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
void change_prot_numa(struct vm_area_struct *vma,
unsigned long start, unsigned long end);
#endif

struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
unsigned long pfn, unsigned long size, pgprot_t);
Expand Down
13 changes: 10 additions & 3 deletions trunk/include/uapi/linux/mempolicy.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,16 @@ enum mpol_rebind_step {

/* Flags for mbind */
#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */
#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */
#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */
#define MPOL_MF_INTERNAL (1<<3) /* Internal flags start here */
#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform
to policy */
#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to policy */
#define MPOL_MF_LAZY (1<<3) /* Modifies '_MOVE: lazy migrate on fault */
#define MPOL_MF_INTERNAL (1<<4) /* Internal flags start here */

#define MPOL_MF_VALID (MPOL_MF_STRICT | \
MPOL_MF_MOVE | \
MPOL_MF_MOVE_ALL | \
MPOL_MF_LAZY)

/*
* Internal flags that share the struct mempolicy flags word with
Expand Down
185 changes: 170 additions & 15 deletions trunk/mm/mempolicy.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
#include <linux/syscalls.h>
#include <linux/ctype.h>
#include <linux/mm_inline.h>
#include <linux/mmu_notifier.h>

#include <asm/tlbflush.h>
#include <asm/uaccess.h>
Expand Down Expand Up @@ -565,6 +566,145 @@ static inline int check_pgd_range(struct vm_area_struct *vma,
return 0;
}

#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE
/*
* Here we search for not shared page mappings (mapcount == 1) and we
* set up the pmd/pte_numa on those mappings so the very next access
* will fire a NUMA hinting page fault.
*/
static int
change_prot_numa_range(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte, *_pte;
struct page *page;
unsigned long _address, end;
spinlock_t *ptl;
int ret = 0;

VM_BUG_ON(address & ~PAGE_MASK);

pgd = pgd_offset(mm, address);
if (!pgd_present(*pgd))
goto out;

pud = pud_offset(pgd, address);
if (!pud_present(*pud))
goto out;

pmd = pmd_offset(pud, address);
if (pmd_none(*pmd))
goto out;

if (pmd_trans_huge_lock(pmd, vma) == 1) {
int page_nid;
ret = HPAGE_PMD_NR;

VM_BUG_ON(address & ~HPAGE_PMD_MASK);

if (pmd_numa(*pmd)) {
spin_unlock(&mm->page_table_lock);
goto out;
}

page = pmd_page(*pmd);

/* only check non-shared pages */
if (page_mapcount(page) != 1) {
spin_unlock(&mm->page_table_lock);
goto out;
}

page_nid = page_to_nid(page);

if (pmd_numa(*pmd)) {
spin_unlock(&mm->page_table_lock);
goto out;
}

set_pmd_at(mm, address, pmd, pmd_mknuma(*pmd));
ret += HPAGE_PMD_NR;
/* defer TLB flush to lower the overhead */
spin_unlock(&mm->page_table_lock);
goto out;
}

if (pmd_trans_unstable(pmd))
goto out;
VM_BUG_ON(!pmd_present(*pmd));

end = min(vma->vm_end, (address + PMD_SIZE) & PMD_MASK);
pte = pte_offset_map_lock(mm, pmd, address, &ptl);
for (_address = address, _pte = pte; _address < end;
_pte++, _address += PAGE_SIZE) {
pte_t pteval = *_pte;
if (!pte_present(pteval))
continue;
if (pte_numa(pteval))
continue;
page = vm_normal_page(vma, _address, pteval);
if (unlikely(!page))
continue;
/* only check non-shared pages */
if (page_mapcount(page) != 1)
continue;

set_pte_at(mm, _address, _pte, pte_mknuma(pteval));

/* defer TLB flush to lower the overhead */
ret++;
}
pte_unmap_unlock(pte, ptl);

if (ret && !pmd_numa(*pmd)) {
spin_lock(&mm->page_table_lock);
set_pmd_at(mm, address, pmd, pmd_mknuma(*pmd));
spin_unlock(&mm->page_table_lock);
/* defer TLB flush to lower the overhead */
}

out:
return ret;
}

/* Assumes mmap_sem is held */
void
change_prot_numa(struct vm_area_struct *vma,
unsigned long address, unsigned long end)
{
struct mm_struct *mm = vma->vm_mm;
int progress = 0;

while (address < end) {
VM_BUG_ON(address < vma->vm_start ||
address + PAGE_SIZE > vma->vm_end);

progress += change_prot_numa_range(mm, vma, address);
address = (address + PMD_SIZE) & PMD_MASK;
}

/*
* Flush the TLB for the mm to start the NUMA hinting
* page faults after we finish scanning this vma part
* if there were any PTE updates
*/
if (progress) {
mmu_notifier_invalidate_range_start(vma->vm_mm, address, end);
flush_tlb_range(vma, address, end);
mmu_notifier_invalidate_range_end(vma->vm_mm, address, end);
}
}
#else
static unsigned long change_prot_numa(struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
return 0;
}
#endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */

/*
* Check if all pages in a range are on a set of nodes.
* If pagelist != NULL then isolate pages from the LRU and
Expand All @@ -583,29 +723,40 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
return ERR_PTR(-EFAULT);
prev = NULL;
for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) {
unsigned long endvma = vma->vm_end;

if (endvma > end)
endvma = end;
if (vma->vm_start > start)
start = vma->vm_start;

if (!(flags & MPOL_MF_DISCONTIG_OK)) {
if (!vma->vm_next && vma->vm_end < end)
return ERR_PTR(-EFAULT);
if (prev && prev->vm_end < vma->vm_start)
return ERR_PTR(-EFAULT);
}
if (!is_vm_hugetlb_page(vma) &&
((flags & MPOL_MF_STRICT) ||

if (is_vm_hugetlb_page(vma))
goto next;

if (flags & MPOL_MF_LAZY) {
change_prot_numa(vma, start, endvma);
goto next;
}

if ((flags & MPOL_MF_STRICT) ||
((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) &&
vma_migratable(vma)))) {
unsigned long endvma = vma->vm_end;
vma_migratable(vma))) {

if (endvma > end)
endvma = end;
if (vma->vm_start > start)
start = vma->vm_start;
err = check_pgd_range(vma, start, endvma, nodes,
flags, private);
if (err) {
first = ERR_PTR(err);
break;
}
}
next:
prev = vma;
}
return first;
Expand Down Expand Up @@ -1138,8 +1289,7 @@ static long do_mbind(unsigned long start, unsigned long len,
int err;
LIST_HEAD(pagelist);

if (flags & ~(unsigned long)(MPOL_MF_STRICT |
MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
if (flags & ~(unsigned long)MPOL_MF_VALID)
return -EINVAL;
if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE))
return -EPERM;
Expand All @@ -1162,6 +1312,9 @@ static long do_mbind(unsigned long start, unsigned long len,
if (IS_ERR(new))
return PTR_ERR(new);

if (flags & MPOL_MF_LAZY)
new->flags |= MPOL_F_MOF;

/*
* If we are using the default policy then operation
* on discontinuous address spaces is okay after all
Expand Down Expand Up @@ -1198,13 +1351,15 @@ static long do_mbind(unsigned long start, unsigned long len,
vma = check_range(mm, start, end, nmask,
flags | MPOL_MF_INVERT, &pagelist);

err = PTR_ERR(vma);
if (!IS_ERR(vma)) {
int nr_failed = 0;

err = PTR_ERR(vma); /* maybe ... */
if (!IS_ERR(vma) && mode != MPOL_NOOP)
err = mbind_range(mm, start, end, new);

if (!err) {
int nr_failed = 0;

if (!list_empty(&pagelist)) {
WARN_ON_ONCE(flags & MPOL_MF_LAZY);
nr_failed = migrate_pages(&pagelist, new_vma_page,
(unsigned long)vma,
false, MIGRATE_SYNC,
Expand All @@ -1213,7 +1368,7 @@ static long do_mbind(unsigned long start, unsigned long len,
putback_lru_pages(&pagelist);
}

if (!err && nr_failed && (flags & MPOL_MF_STRICT))
if (nr_failed && (flags & MPOL_MF_STRICT))
err = -EIO;
} else
putback_lru_pages(&pagelist);
Expand Down

0 comments on commit b4b99af

Please sign in to comment.