Skip to content

Commit

Permalink
[PATCH] Four level pagetables for ppc64
Browse files Browse the repository at this point in the history
Implement 4-level pagetables for ppc64

This patch implements full four-level page tables for ppc64, thereby
extending the usable user address range to 44 bits (16T).

The patch uses a full page for the tables at the bottom and top level,
and a quarter page for the intermediate levels.  It uses full 64-bit
pointers at every level, thus also increasing the addressable range of
physical memory.  This patch also tweaks the VSID allocation to allow
matching range for user addresses (this halves the number of available
contexts) and adds some #if and BUILD_BUG sanity checks.

Signed-off-by: David Gibson <dwg@au1.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
  • Loading branch information
David Gibson authored and Paul Mackerras committed Aug 29, 2005
1 parent decd300 commit e28f7fa
Show file tree
Hide file tree
Showing 12 changed files with 294 additions and 278 deletions.
2 changes: 1 addition & 1 deletion arch/ppc64/mm/hash_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
int local = 0;
cpumask_t tmp;

if ((ea & ~REGION_MASK) > EADDR_MASK)
if ((ea & ~REGION_MASK) >= PGTABLE_RANGE)
return 1;

switch (REGION_ID(ea)) {
Expand Down
187 changes: 59 additions & 128 deletions arch/ppc64/mm/hugetlbpage.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,124 +27,91 @@

#include <linux/sysctl.h>

#define HUGEPGDIR_SHIFT (HPAGE_SHIFT + PAGE_SHIFT - 3)
#define HUGEPGDIR_SIZE (1UL << HUGEPGDIR_SHIFT)
#define HUGEPGDIR_MASK (~(HUGEPGDIR_SIZE-1))

#define HUGEPTE_INDEX_SIZE 9
#define HUGEPGD_INDEX_SIZE 10

#define PTRS_PER_HUGEPTE (1 << HUGEPTE_INDEX_SIZE)
#define PTRS_PER_HUGEPGD (1 << HUGEPGD_INDEX_SIZE)

static inline int hugepgd_index(unsigned long addr)
{
return (addr & ~REGION_MASK) >> HUGEPGDIR_SHIFT;
}

static pud_t *hugepgd_offset(struct mm_struct *mm, unsigned long addr)
/* Modelled after find_linux_pte() */
pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
int index;
pgd_t *pg;
pud_t *pu;
pmd_t *pm;
pte_t *pt;

if (! mm->context.huge_pgdir)
return NULL;
BUG_ON(! in_hugepage_area(mm->context, addr));

addr &= HPAGE_MASK;

pg = pgd_offset(mm, addr);
if (!pgd_none(*pg)) {
pu = pud_offset(pg, addr);
if (!pud_none(*pu)) {
pm = pmd_offset(pu, addr);
pt = (pte_t *)pm;
BUG_ON(!pmd_none(*pm)
&& !(pte_present(*pt) && pte_huge(*pt)));
return pt;
}
}

index = hugepgd_index(addr);
BUG_ON(index >= PTRS_PER_HUGEPGD);
return (pud_t *)(mm->context.huge_pgdir + index);
return NULL;
}

static inline pte_t *hugepte_offset(pud_t *dir, unsigned long addr)
pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
{
int index;

if (pud_none(*dir))
return NULL;

index = (addr >> HPAGE_SHIFT) % PTRS_PER_HUGEPTE;
return (pte_t *)pud_page(*dir) + index;
}
pgd_t *pg;
pud_t *pu;
pmd_t *pm;
pte_t *pt;

static pud_t *hugepgd_alloc(struct mm_struct *mm, unsigned long addr)
{
BUG_ON(! in_hugepage_area(mm->context, addr));

if (! mm->context.huge_pgdir) {
pgd_t *new;
spin_unlock(&mm->page_table_lock);
/* Don't use pgd_alloc(), because we want __GFP_REPEAT */
new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT);
BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE));
spin_lock(&mm->page_table_lock);

/*
* Because we dropped the lock, we should re-check the
* entry, as somebody else could have populated it..
*/
if (mm->context.huge_pgdir)
pgd_free(new);
else
mm->context.huge_pgdir = new;
}
return hugepgd_offset(mm, addr);
}
addr &= HPAGE_MASK;

static pte_t *hugepte_alloc(struct mm_struct *mm, pud_t *dir, unsigned long addr)
{
if (! pud_present(*dir)) {
pte_t *new;
pg = pgd_offset(mm, addr);
pu = pud_alloc(mm, pg, addr);

spin_unlock(&mm->page_table_lock);
new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT);
BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE));
spin_lock(&mm->page_table_lock);
/*
* Because we dropped the lock, we should re-check the
* entry, as somebody else could have populated it..
*/
if (pud_present(*dir)) {
if (new)
kmem_cache_free(zero_cache, new);
} else {
struct page *ptepage;

if (! new)
return NULL;
ptepage = virt_to_page(new);
ptepage->mapping = (void *) mm;
ptepage->index = addr & HUGEPGDIR_MASK;
pud_populate(mm, dir, new);
if (pu) {
pm = pmd_alloc(mm, pu, addr);
if (pm) {
pt = (pte_t *)pm;
BUG_ON(!pmd_none(*pm)
&& !(pte_present(*pt) && pte_huge(*pt)));
return pt;
}
}

return hugepte_offset(dir, addr);
return NULL;
}

pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
pud_t *pud;
#define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE)

BUG_ON(! in_hugepage_area(mm->context, addr));
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte)
{
int i;

pud = hugepgd_offset(mm, addr);
if (! pud)
return NULL;
if (pte_present(*ptep)) {
pte_clear(mm, addr, ptep);
flush_tlb_pending();
}

return hugepte_offset(pud, addr);
for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) {
*ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
ptep++;
}
}

pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
pud_t *pud;
unsigned long old = pte_update(ptep, ~0UL);
int i;

BUG_ON(! in_hugepage_area(mm->context, addr));
if (old & _PAGE_HASHPTE)
hpte_update(mm, addr, old, 0);

pud = hugepgd_alloc(mm, addr);
if (! pud)
return NULL;
for (i = 1; i < HUGEPTE_BATCH_SIZE; i++)
ptep[i] = __pte(0);

return hugepte_alloc(mm, pud, addr);
return __pte(old);
}

/*
Expand Down Expand Up @@ -541,42 +508,6 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
}
}

void hugetlb_mm_free_pgd(struct mm_struct *mm)
{
int i;
pgd_t *pgdir;

spin_lock(&mm->page_table_lock);

pgdir = mm->context.huge_pgdir;
if (! pgdir)
goto out;

mm->context.huge_pgdir = NULL;

/* cleanup any hugepte pages leftover */
for (i = 0; i < PTRS_PER_HUGEPGD; i++) {
pud_t *pud = (pud_t *)(pgdir + i);

if (! pud_none(*pud)) {
pte_t *pte = (pte_t *)pud_page(*pud);
struct page *ptepage = virt_to_page(pte);

ptepage->mapping = NULL;

BUG_ON(memcmp(pte, empty_zero_page, PAGE_SIZE));
kmem_cache_free(zero_cache, pte);
}
pud_clear(pud);
}

BUG_ON(memcmp(pgdir, empty_zero_page, PAGE_SIZE));
kmem_cache_free(zero_cache, pgdir);

out:
spin_unlock(&mm->page_table_lock);
}

int hash_huge_page(struct mm_struct *mm, unsigned long access,
unsigned long ea, unsigned long vsid, int local)
{
Expand Down
2 changes: 1 addition & 1 deletion arch/ppc64/mm/imalloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ static int get_free_im_addr(unsigned long size, unsigned long *im_addr)
break;
if ((unsigned long)tmp->addr >= ioremap_bot)
addr = tmp->size + (unsigned long) tmp->addr;
if (addr > IMALLOC_END-size)
if (addr >= IMALLOC_END-size)
return 1;
}
*im_addr = addr;
Expand Down
62 changes: 41 additions & 21 deletions arch/ppc64/mm/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,14 @@
#include <asm/vdso.h>
#include <asm/imalloc.h>

#if PGTABLE_RANGE > USER_VSID_RANGE
#warning Limited user VSID range means pagetable space is wasted
#endif

#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE)
#warning TASK_SIZE is smaller than it needs to be.
#endif

int mem_init_done;
unsigned long ioremap_bot = IMALLOC_BASE;
static unsigned long phbs_io_bot = PHBS_IO_BASE;
Expand Down Expand Up @@ -226,7 +234,7 @@ void __iomem * __ioremap(unsigned long addr, unsigned long size,
* Before that, we map using addresses going
* up from ioremap_bot. imalloc will use
* the addresses from ioremap_bot through
* IMALLOC_END (0xE000001fffffffff)
* IMALLOC_END
*
*/
pa = addr & PAGE_MASK;
Expand Down Expand Up @@ -417,12 +425,6 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
int index;
int err;

#ifdef CONFIG_HUGETLB_PAGE
/* We leave htlb_segs as it was, but for a fork, we need to
* clear the huge_pgdir. */
mm->context.huge_pgdir = NULL;
#endif

again:
if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL))
return -ENOMEM;
Expand Down Expand Up @@ -453,8 +455,6 @@ void destroy_context(struct mm_struct *mm)
spin_unlock(&mmu_context_lock);

mm->context.id = NO_CONTEXT;

hugetlb_mm_free_pgd(mm);
}

/*
Expand Down Expand Up @@ -833,23 +833,43 @@ void __iomem * reserve_phb_iospace(unsigned long size)
return virt_addr;
}

kmem_cache_t *zero_cache;

static void zero_ctor(void *pte, kmem_cache_t *cache, unsigned long flags)
static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags)
{
memset(pte, 0, PAGE_SIZE);
memset(addr, 0, kmem_cache_size(cache));
}

static const int pgtable_cache_size[2] = {
PTE_TABLE_SIZE, PMD_TABLE_SIZE
};
static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = {
"pgd_pte_cache", "pud_pmd_cache",
};

kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)];

void pgtable_cache_init(void)
{
zero_cache = kmem_cache_create("zero",
PAGE_SIZE,
0,
SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN,
zero_ctor,
NULL);
if (!zero_cache)
panic("pgtable_cache_init(): could not create zero_cache!\n");
int i;

BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]);
BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]);
BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]);
BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]);

for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) {
int size = pgtable_cache_size[i];
const char *name = pgtable_cache_name[i];

pgtable_cache[i] = kmem_cache_create(name,
size, size,
SLAB_HWCACHE_ALIGN
| SLAB_MUST_HWCACHE_ALIGN,
zero_ctor,
NULL);
if (! pgtable_cache[i])
panic("pgtable_cache_init(): could not create %s!\n",
name);
}
}

pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr,
Expand Down
2 changes: 1 addition & 1 deletion arch/ppc64/mm/slb_low.S
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE)
0: /* user address: proto-VSID = context<<15 | ESID */
li r11,SLB_VSID_USER

srdi. r9,r3,13
srdi. r9,r3,USER_ESID_BITS
bne- 8f /* invalid ea bits set */

#ifdef CONFIG_HUGETLB_PAGE
Expand Down
Loading

0 comments on commit e28f7fa

Please sign in to comment.