Skip to content

Commit

Permalink
powerpc/mm: Move THP headers around
Browse files Browse the repository at this point in the history
We support THP only with book3s_64 and 64K page size. Move
THP details to hash64-64k.h to clarify the same.

Acked-by: Scott Wood <scottwood@freescale.com>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
  • Loading branch information
Aneesh Kumar K.V authored and Michael Ellerman committed Dec 14, 2015
1 parent 26a344a commit e34aa03
Show file tree
Hide file tree
Showing 6 changed files with 201 additions and 423 deletions.
126 changes: 126 additions & 0 deletions arch/powerpc/include/asm/book3s/64/hash-64k.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,132 @@ static inline int hugepd_ok(hugepd_t hpd)

#endif /* CONFIG_HUGETLB_PAGE */

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
extern unsigned long pmd_hugepage_update(struct mm_struct *mm,
unsigned long addr,
pmd_t *pmdp,
unsigned long clr,
unsigned long set);
static inline char *get_hpte_slot_array(pmd_t *pmdp)
{
/*
* The hpte hindex is stored in the pgtable whose address is in the
* second half of the PMD
*
* Order this load with the test for pmd_trans_huge in the caller
*/
smp_rmb();
return *(char **)(pmdp + PTRS_PER_PMD);


}
/*
* The linux hugepage PMD now include the pmd entries followed by the address
* to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
* [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per
* each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and
* with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t.
*
* The last three bits are intentionally left to zero. This memory location
* are also used as normal page PTE pointers. So if we have any pointers
* left around while we collapse a hugepage, we need to make sure
* _PAGE_PRESENT bit of that is zero when we look at them
*/
static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
{
return (hpte_slot_array[index] >> 3) & 0x1;
}

static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array,
int index)
{
return hpte_slot_array[index] >> 4;
}

static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
unsigned int index, unsigned int hidx)
{
hpte_slot_array[index] = hidx << 4 | 0x1 << 3;
}

/*
*
* For core kernel code by design pmd_trans_huge is never run on any hugetlbfs
* page. The hugetlbfs page table walking and mangling paths are totally
* separated form the core VM paths and they're differentiated by
* VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run.
*
* pmd_trans_huge() is defined as false at build time if
* CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build
* time in such case.
*
* For ppc64 we need to differntiate from explicit hugepages from THP, because
* for THP we also track the subpage details at the pmd level. We don't do
* that for explicit huge pages.
*
*/
static inline int pmd_trans_huge(pmd_t pmd)
{
/*
* leaf pte for huge page, bottom two bits != 00
*/
return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE);
}

static inline int pmd_trans_splitting(pmd_t pmd)
{
if (pmd_trans_huge(pmd))
return pmd_val(pmd) & _PAGE_SPLITTING;
return 0;
}

static inline int pmd_large(pmd_t pmd)
{
/*
* leaf pte for huge page, bottom two bits != 00
*/
return ((pmd_val(pmd) & 0x3) != 0x0);
}

static inline pmd_t pmd_mknotpresent(pmd_t pmd)
{
return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT);
}

static inline pmd_t pmd_mksplitting(pmd_t pmd)
{
return __pmd(pmd_val(pmd) | _PAGE_SPLITTING);
}

#define __HAVE_ARCH_PMD_SAME
static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
{
return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0);
}

static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
unsigned long old;

if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
return 0;
old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
return ((old & _PAGE_ACCESSED) != 0);
}

#define __HAVE_ARCH_PMDP_SET_WRPROTECT
static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp)
{

if ((pmd_val(*pmdp) & _PAGE_RW) == 0)
return;

pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0);
}

#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif /* __ASSEMBLY__ */

#endif /* _ASM_POWERPC_BOOK3S_64_HASH_64K_H */
223 changes: 49 additions & 174 deletions arch/powerpc/include/asm/book3s/64/hash.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,55 @@
#define _ASM_POWERPC_BOOK3S_64_HASH_H
#ifdef __KERNEL__

/*
* Common bits between 4K and 64K pages in a linux-style PTE.
* These match the bits in the (hardware-defined) PowerPC PTE as closely
* as possible. Additional bits may be defined in pgtable-hash64-*.h
*
* Note: We only support user read/write permissions. Supervisor always
* have full read/write to pages above PAGE_OFFSET (pages below that
* always use the user access permissions).
*
* We could create separate kernel read-only if we used the 3 PP bits
* combinations that newer processors provide but we currently don't.
*/
#define _PAGE_PRESENT 0x00001 /* software: pte contains a translation */
#define _PAGE_USER 0x00002 /* matches one of the PP bits */
#define _PAGE_BIT_SWAP_TYPE 2
#define _PAGE_EXEC 0x00004 /* No execute on POWER4 and newer (we invert) */
#define _PAGE_GUARDED 0x00008
/* We can derive Memory coherence from _PAGE_NO_CACHE */
#define _PAGE_COHERENT 0x0
#define _PAGE_NO_CACHE 0x00020 /* I: cache inhibit */
#define _PAGE_WRITETHRU 0x00040 /* W: cache write-through */
#define _PAGE_DIRTY 0x00080 /* C: page changed */
#define _PAGE_ACCESSED 0x00100 /* R: page referenced */
#define _PAGE_RW 0x00200 /* software: user write access allowed */
#define _PAGE_HASHPTE 0x00400 /* software: pte has an associated HPTE */
#define _PAGE_BUSY 0x00800 /* software: PTE & hash are busy */
#define _PAGE_F_GIX 0x07000 /* full page: hidx bits */
#define _PAGE_F_GIX_SHIFT 12
#define _PAGE_F_SECOND 0x08000 /* Whether to use secondary hash or not */
#define _PAGE_SPECIAL 0x10000 /* software: special page */

/*
* THP pages can't be special. So use the _PAGE_SPECIAL
*/
#define _PAGE_SPLITTING _PAGE_SPECIAL

/*
* We need to differentiate between explicit huge page and THP huge
* page, since THP huge page also need to track real subpage details
*/
#define _PAGE_THP_HUGE _PAGE_4K_PFN

/*
* set of bits not changed in pmd_modify.
*/
#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | \
_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \
_PAGE_THP_HUGE)

#ifdef CONFIG_PPC_64K_PAGES
#include <asm/book3s/64/hash-64k.h>
#else
Expand Down Expand Up @@ -57,36 +106,6 @@
#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
#endif /* CONFIG_PPC_MM_SLICES */
/*
* Common bits between 4K and 64K pages in a linux-style PTE.
* These match the bits in the (hardware-defined) PowerPC PTE as closely
* as possible. Additional bits may be defined in pgtable-hash64-*.h
*
* Note: We only support user read/write permissions. Supervisor always
* have full read/write to pages above PAGE_OFFSET (pages below that
* always use the user access permissions).
*
* We could create separate kernel read-only if we used the 3 PP bits
* combinations that newer processors provide but we currently don't.
*/
#define _PAGE_PRESENT 0x00001 /* software: pte contains a translation */
#define _PAGE_USER 0x00002 /* matches one of the PP bits */
#define _PAGE_BIT_SWAP_TYPE 2
#define _PAGE_EXEC 0x00004 /* No execute on POWER4 and newer (we invert) */
#define _PAGE_GUARDED 0x00008
/* We can derive Memory coherence from _PAGE_NO_CACHE */
#define _PAGE_COHERENT 0x0
#define _PAGE_NO_CACHE 0x00020 /* I: cache inhibit */
#define _PAGE_WRITETHRU 0x00040 /* W: cache write-through */
#define _PAGE_DIRTY 0x00080 /* C: page changed */
#define _PAGE_ACCESSED 0x00100 /* R: page referenced */
#define _PAGE_RW 0x00200 /* software: user write access allowed */
#define _PAGE_HASHPTE 0x00400 /* software: pte has an associated HPTE */
#define _PAGE_BUSY 0x00800 /* software: PTE & hash are busy */
#define _PAGE_F_GIX 0x07000 /* full page: hidx bits */
#define _PAGE_F_GIX_SHIFT 12
#define _PAGE_F_SECOND 0x08000 /* Whether to use secondary hash or not */
#define _PAGE_SPECIAL 0x10000 /* software: special page */

/* No separate kernel read-only */
#define _PAGE_KERNEL_RW (_PAGE_RW | _PAGE_DIRTY) /* user access blocked by key */
Expand All @@ -105,24 +124,6 @@

/* Hash table based platforms need atomic updates of the linux PTE */
#define PTE_ATOMIC_UPDATES 1

/*
* THP pages can't be special. So use the _PAGE_SPECIAL
*/
#define _PAGE_SPLITTING _PAGE_SPECIAL

/*
* We need to differentiate between explicit huge page and THP huge
* page, since THP huge page also need to track real subpage details
*/
#define _PAGE_THP_HUGE _PAGE_4K_PFN

/*
* set of bits not changed in pmd_modify.
*/
#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | \
_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \
_PAGE_THP_HUGE)
#define _PTE_NONE_MASK _PAGE_HPTEFLAGS
/*
* The mask convered by the RPN must be a ULL on 32-bit platforms with
Expand Down Expand Up @@ -231,11 +232,6 @@

extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, unsigned long pte, int huge);
extern unsigned long pmd_hugepage_update(struct mm_struct *mm,
unsigned long addr,
pmd_t *pmdp,
unsigned long clr,
unsigned long set);
extern unsigned long htab_convert_pte_flags(unsigned long pteflags);
/* Atomic PTE updates */
static inline unsigned long pte_update(struct mm_struct *mm,
Expand Down Expand Up @@ -361,127 +357,6 @@ static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
#define __HAVE_ARCH_PTE_SAME
#define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0)

static inline char *get_hpte_slot_array(pmd_t *pmdp)
{
/*
* The hpte hindex is stored in the pgtable whose address is in the
* second half of the PMD
*
* Order this load with the test for pmd_trans_huge in the caller
*/
smp_rmb();
return *(char **)(pmdp + PTRS_PER_PMD);


}
/*
* The linux hugepage PMD now include the pmd entries followed by the address
* to the stashed pgtable_t. The stashed pgtable_t contains the hpte bits.
* [ 1 bit secondary | 3 bit hidx | 1 bit valid | 000]. We use one byte per
* each HPTE entry. With 16MB hugepage and 64K HPTE we need 256 entries and
* with 4K HPTE we need 4096 entries. Both will fit in a 4K pgtable_t.
*
* The last three bits are intentionally left to zero. This memory location
* are also used as normal page PTE pointers. So if we have any pointers
* left around while we collapse a hugepage, we need to make sure
* _PAGE_PRESENT bit of that is zero when we look at them
*/
static inline unsigned int hpte_valid(unsigned char *hpte_slot_array, int index)
{
return (hpte_slot_array[index] >> 3) & 0x1;
}

static inline unsigned int hpte_hash_index(unsigned char *hpte_slot_array,
int index)
{
return hpte_slot_array[index] >> 4;
}

static inline void mark_hpte_slot_valid(unsigned char *hpte_slot_array,
unsigned int index, unsigned int hidx)
{
hpte_slot_array[index] = hidx << 4 | 0x1 << 3;
}

#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
*
* For core kernel code by design pmd_trans_huge is never run on any hugetlbfs
* page. The hugetlbfs page table walking and mangling paths are totally
* separated form the core VM paths and they're differentiated by
* VM_HUGETLB being set on vm_flags well before any pmd_trans_huge could run.
*
* pmd_trans_huge() is defined as false at build time if
* CONFIG_TRANSPARENT_HUGEPAGE=n to optimize away code blocks at build
* time in such case.
*
* For ppc64 we need to differntiate from explicit hugepages from THP, because
* for THP we also track the subpage details at the pmd level. We don't do
* that for explicit huge pages.
*
*/
static inline int pmd_trans_huge(pmd_t pmd)
{
/*
* leaf pte for huge page, bottom two bits != 00
*/
return (pmd_val(pmd) & 0x3) && (pmd_val(pmd) & _PAGE_THP_HUGE);
}

static inline int pmd_trans_splitting(pmd_t pmd)
{
if (pmd_trans_huge(pmd))
return pmd_val(pmd) & _PAGE_SPLITTING;
return 0;
}

#endif
static inline int pmd_large(pmd_t pmd)
{
/*
* leaf pte for huge page, bottom two bits != 00
*/
return ((pmd_val(pmd) & 0x3) != 0x0);
}

static inline pmd_t pmd_mknotpresent(pmd_t pmd)
{
return __pmd(pmd_val(pmd) & ~_PAGE_PRESENT);
}

static inline pmd_t pmd_mksplitting(pmd_t pmd)
{
return __pmd(pmd_val(pmd) | _PAGE_SPLITTING);
}

#define __HAVE_ARCH_PMD_SAME
static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
{
return (((pmd_val(pmd_a) ^ pmd_val(pmd_b)) & ~_PAGE_HPTEFLAGS) == 0);
}

static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
unsigned long addr, pmd_t *pmdp)
{
unsigned long old;

if ((pmd_val(*pmdp) & (_PAGE_ACCESSED | _PAGE_HASHPTE)) == 0)
return 0;
old = pmd_hugepage_update(mm, addr, pmdp, _PAGE_ACCESSED, 0);
return ((old & _PAGE_ACCESSED) != 0);
}

#define __HAVE_ARCH_PMDP_SET_WRPROTECT
static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp)
{

if ((pmd_val(*pmdp) & _PAGE_RW) == 0)
return;

pmd_hugepage_update(mm, addr, pmdp, _PAGE_RW, 0);
}

/* Generic accessors to PTE bits */
static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);}
static inline int pte_dirty(pte_t pte) { return !!(pte_val(pte) & _PAGE_DIRTY); }
Expand Down
Loading

0 comments on commit e34aa03

Please sign in to comment.