Skip to content

Commit

Permalink
---
Browse files Browse the repository at this point in the history
yaml
---
r: 343308
b: refs/heads/master
c: 97ae174
h: refs/heads/master
v: v3
  • Loading branch information
Kirill A. Shutemov authored and Linus Torvalds committed Dec 13, 2012
1 parent f2993d2 commit f6d83ad
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 26 deletions.
2 changes: 1 addition & 1 deletion [refs]
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
---
refs/heads/master: 78ca0e679203bbf74f8febd9725a1c8dd083d073
refs/heads/master: 97ae17497e996ff09bf97b6db3b33f7fd4029092
113 changes: 88 additions & 25 deletions trunk/mm/huge_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,14 @@
#include <linux/mmu_notifier.h>
#include <linux/rmap.h>
#include <linux/swap.h>
#include <linux/shrinker.h>
#include <linux/mm_inline.h>
#include <linux/kthread.h>
#include <linux/khugepaged.h>
#include <linux/freezer.h>
#include <linux/mman.h>
#include <linux/pagemap.h>

#include <asm/tlb.h>
#include <asm/pgalloc.h>
#include "internal.h"
Expand Down Expand Up @@ -47,7 +49,6 @@ static unsigned int khugepaged_scan_sleep_millisecs __read_mostly = 10000;
/* during fragmentation poll the hugepage allocator once every minute */
static unsigned int khugepaged_alloc_sleep_millisecs __read_mostly = 60000;
static struct task_struct *khugepaged_thread __read_mostly;
static unsigned long huge_zero_pfn __read_mostly;
static DEFINE_MUTEX(khugepaged_mutex);
static DEFINE_SPINLOCK(khugepaged_mm_lock);
static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait);
Expand Down Expand Up @@ -160,31 +161,74 @@ static int start_khugepaged(void)
return err;
}

static int init_huge_zero_pfn(void)
static atomic_t huge_zero_refcount;
static unsigned long huge_zero_pfn __read_mostly;

static inline bool is_huge_zero_pfn(unsigned long pfn)
{
struct page *hpage;
unsigned long pfn;
unsigned long zero_pfn = ACCESS_ONCE(huge_zero_pfn);
return zero_pfn && pfn == zero_pfn;
}

hpage = alloc_pages((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE,
static inline bool is_huge_zero_pmd(pmd_t pmd)
{
return is_huge_zero_pfn(pmd_pfn(pmd));
}

static unsigned long get_huge_zero_page(void)
{
struct page *zero_page;
retry:
if (likely(atomic_inc_not_zero(&huge_zero_refcount)))
return ACCESS_ONCE(huge_zero_pfn);

zero_page = alloc_pages((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE,
HPAGE_PMD_ORDER);
if (!hpage)
return -ENOMEM;
pfn = page_to_pfn(hpage);
if (cmpxchg(&huge_zero_pfn, 0, pfn))
__free_page(hpage);
return 0;
if (!zero_page)
return 0;
preempt_disable();
if (cmpxchg(&huge_zero_pfn, 0, page_to_pfn(zero_page))) {
preempt_enable();
__free_page(zero_page);
goto retry;
}

/* We take additional reference here. It will be put back by shrinker */
atomic_set(&huge_zero_refcount, 2);
preempt_enable();
return ACCESS_ONCE(huge_zero_pfn);
}

static inline bool is_huge_zero_pfn(unsigned long pfn)
static void put_huge_zero_page(void)
{
return huge_zero_pfn && pfn == huge_zero_pfn;
/*
* Counter should never go to zero here. Only shrinker can put
* last reference.
*/
BUG_ON(atomic_dec_and_test(&huge_zero_refcount));
}

static inline bool is_huge_zero_pmd(pmd_t pmd)
static int shrink_huge_zero_page(struct shrinker *shrink,
struct shrink_control *sc)
{
return is_huge_zero_pfn(pmd_pfn(pmd));
if (!sc->nr_to_scan)
/* we can free zero page only if last reference remains */
return atomic_read(&huge_zero_refcount) == 1 ? HPAGE_PMD_NR : 0;

if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) {
unsigned long zero_pfn = xchg(&huge_zero_pfn, 0);
BUG_ON(zero_pfn == 0);
__free_page(__pfn_to_page(zero_pfn));
}

return 0;
}

static struct shrinker huge_zero_page_shrinker = {
.shrink = shrink_huge_zero_page,
.seeks = DEFAULT_SEEKS,
};

#ifdef CONFIG_SYSFS

static ssize_t double_flag_show(struct kobject *kobj,
Expand Down Expand Up @@ -576,6 +620,8 @@ static int __init hugepage_init(void)
goto out;
}

register_shrinker(&huge_zero_page_shrinker);

/*
* By default disable transparent hugepages on smaller systems,
* where the extra memory used could hurt more than TLB overhead
Expand Down Expand Up @@ -705,10 +751,11 @@ static inline struct page *alloc_hugepage(int defrag)
#endif

static void set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd)
struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd,
unsigned long zero_pfn)
{
pmd_t entry;
entry = pfn_pmd(huge_zero_pfn, vma->vm_page_prot);
entry = pfn_pmd(zero_pfn, vma->vm_page_prot);
entry = pmd_wrprotect(entry);
entry = pmd_mkhuge(entry);
set_pmd_at(mm, haddr, pmd, entry);
Expand All @@ -731,15 +778,19 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
return VM_FAULT_OOM;
if (!(flags & FAULT_FLAG_WRITE)) {
pgtable_t pgtable;
if (unlikely(!huge_zero_pfn && init_huge_zero_pfn())) {
count_vm_event(THP_FAULT_FALLBACK);
goto out;
}
unsigned long zero_pfn;
pgtable = pte_alloc_one(mm, haddr);
if (unlikely(!pgtable))
return VM_FAULT_OOM;
zero_pfn = get_huge_zero_page();
if (unlikely(!zero_pfn)) {
pte_free(mm, pgtable);
count_vm_event(THP_FAULT_FALLBACK);
goto out;
}
spin_lock(&mm->page_table_lock);
set_huge_zero_page(pgtable, mm, vma, haddr, pmd);
set_huge_zero_page(pgtable, mm, vma, haddr, pmd,
zero_pfn);
spin_unlock(&mm->page_table_lock);
return 0;
}
Expand Down Expand Up @@ -813,7 +864,15 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
* a page table.
*/
if (is_huge_zero_pmd(pmd)) {
set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd);
unsigned long zero_pfn;
/*
* get_huge_zero_page() will never allocate a new page here,
* since we already have a zero page to copy. It just takes a
* reference.
*/
zero_pfn = get_huge_zero_page();
set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd,
zero_pfn);
ret = 0;
goto out_unlock;
}
Expand Down Expand Up @@ -923,6 +982,7 @@ static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm,
smp_wmb(); /* make pte visible before pmd */
pmd_populate(mm, pmd, pgtable);
spin_unlock(&mm->page_table_lock);
put_huge_zero_page();
inc_mm_counter(mm, MM_ANONPAGES);

mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
Expand Down Expand Up @@ -1123,9 +1183,10 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
page_add_new_anon_rmap(new_page, vma, haddr);
set_pmd_at(mm, haddr, pmd, entry);
update_mmu_cache_pmd(vma, address, pmd);
if (is_huge_zero_pmd(orig_pmd))
if (is_huge_zero_pmd(orig_pmd)) {
add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
else {
put_huge_zero_page();
} else {
VM_BUG_ON(!PageHead(page));
page_remove_rmap(page);
put_page(page);
Expand Down Expand Up @@ -1202,6 +1263,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
if (is_huge_zero_pmd(orig_pmd)) {
tlb->mm->nr_ptes--;
spin_unlock(&tlb->mm->page_table_lock);
put_huge_zero_page();
} else {
page = pmd_page(orig_pmd);
page_remove_rmap(page);
Expand Down Expand Up @@ -2511,6 +2573,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
}
smp_wmb(); /* make pte visible before pmd */
pmd_populate(mm, pmd, pgtable);
put_huge_zero_page();
}

void __split_huge_page_pmd(struct vm_area_struct *vma, unsigned long address,
Expand Down

0 comments on commit f6d83ad

Please sign in to comment.