Skip to content

Commit

Permalink
powerpc/mm: Add page soft dirty tracking
Browse files Browse the repository at this point in the history
User space checkpoint and restart tool (CRIU) needs the page's change
to be soft tracked. This allows to do a pre checkpoint and then dump
only touched pages.

This is done by using a newly assigned PTE bit (_PAGE_SOFT_DIRTY) when
the page is backed in memory, and a new _PAGE_SWP_SOFT_DIRTY bit when
the page is swapped out.

To introduce a new PTE _PAGE_SOFT_DIRTY bit value common to hash 4k
and hash 64k pte, the bits already defined in hash-*4k.h should be
shifted left by one.

The _PAGE_SWP_SOFT_DIRTY bit is dynamically put after the swap type in
the swap pte. A check is added to ensure that the bit is not
overwritten by _PAGE_HPTEFLAGS.

Signed-off-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
CC: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
  • Loading branch information
Laurent Dufour authored and Michael Ellerman committed Dec 17, 2015
1 parent 2613265 commit 7207f43
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 8 deletions.
2 changes: 2 additions & 0 deletions arch/powerpc/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,7 @@ choice

config PPC_4K_PAGES
bool "4k page size"
select HAVE_ARCH_SOFT_DIRTY if CHECKPOINT_RESTORE && PPC_BOOK3S

config PPC_16K_PAGES
bool "16k page size"
Expand All @@ -567,6 +568,7 @@ config PPC_16K_PAGES
config PPC_64K_PAGES
bool "64k page size"
depends on !PPC_FSL_BOOK3E && (44x || PPC_STD_MMU_64 || PPC_BOOK3E_64)
select HAVE_ARCH_SOFT_DIRTY if CHECKPOINT_RESTORE && PPC_BOOK3S

config PPC_256K_PAGES
bool "256k page size"
Expand Down
2 changes: 1 addition & 1 deletion arch/powerpc/include/asm/book3s/64/hash-4k.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
_PAGE_F_SECOND | _PAGE_F_GIX)

/* shift to put page number into pte */
#define PTE_RPN_SHIFT (17)
#define PTE_RPN_SHIFT (18)

#define _PAGE_4K_PFN 0
#ifndef __ASSEMBLY__
Expand Down
4 changes: 2 additions & 2 deletions arch/powerpc/include/asm/book3s/64/hash-64k.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))

#define _PAGE_COMBO 0x00020000 /* this is a combo 4k page */
#define _PAGE_4K_PFN 0x00040000 /* PFN is for a single 4k page */
#define _PAGE_COMBO 0x00040000 /* this is a combo 4k page */
#define _PAGE_4K_PFN 0x00080000 /* PFN is for a single 4k page */
/*
* Used to track subpage group valid if _PAGE_COMBO is set
* This overloads _PAGE_F_GIX and _PAGE_F_SECOND
Expand Down
30 changes: 25 additions & 5 deletions arch/powerpc/include/asm/book3s/64/hash.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#define _PAGE_F_GIX_SHIFT 12
#define _PAGE_F_SECOND 0x08000 /* Whether to use secondary hash or not */
#define _PAGE_SPECIAL 0x10000 /* software: special page */
#define _PAGE_SOFT_DIRTY 0x20000 /* software: software dirty tracking */

/*
* THP pages can't be special. So use the _PAGE_SPECIAL
Expand All @@ -50,7 +51,7 @@
*/
#define _HPAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | \
_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPLITTING | \
_PAGE_THP_HUGE | _PAGE_PTE)
_PAGE_THP_HUGE | _PAGE_PTE | _PAGE_SOFT_DIRTY)

#ifdef CONFIG_PPC_64K_PAGES
#include <asm/book3s/64/hash-64k.h>
Expand Down Expand Up @@ -136,14 +137,16 @@
* pgprot changes
*/
#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
_PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE)
_PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE | \
_PAGE_SOFT_DIRTY)
/*
* Mask of bits returned by pte_pgprot()
*/
#define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
_PAGE_WRITETHRU | _PAGE_4K_PFN | \
_PAGE_USER | _PAGE_ACCESSED | \
_PAGE_RW | _PAGE_DIRTY | _PAGE_EXEC)
_PAGE_RW | _PAGE_DIRTY | _PAGE_EXEC | \
_PAGE_SOFT_DIRTY)
/*
* We define 2 sets of base prot bits, one for basic pages (ie,
* cacheable kernel and user pages) and one for non cacheable
Expand Down Expand Up @@ -339,7 +342,8 @@ static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
{
unsigned long bits = pte_val(entry) &
(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC |
_PAGE_SOFT_DIRTY);

unsigned long old, tmp;

Expand All @@ -366,6 +370,22 @@ static inline int pte_special(pte_t pte) { return !!(pte_val(pte) & _PAGE_SPECIA
static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }

#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
static inline bool pte_soft_dirty(pte_t pte)
{
return !!(pte_val(pte) & _PAGE_SOFT_DIRTY);
}
static inline pte_t pte_mksoft_dirty(pte_t pte)
{
return __pte(pte_val(pte) | _PAGE_SOFT_DIRTY);
}

static inline pte_t pte_clear_soft_dirty(pte_t pte)
{
return __pte(pte_val(pte) & ~_PAGE_SOFT_DIRTY);
}
#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */

#ifdef CONFIG_NUMA_BALANCING
/*
* These work without NUMA balancing but the kernel does not care. See the
Expand Down Expand Up @@ -424,7 +444,7 @@ static inline pte_t pte_mkwrite(pte_t pte)

static inline pte_t pte_mkdirty(pte_t pte)
{
return __pte(pte_val(pte) | _PAGE_DIRTY);
return __pte(pte_val(pte) | _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
}

static inline pte_t pte_mkyoung(pte_t pte)
Expand Down
26 changes: 26 additions & 0 deletions arch/powerpc/include/asm/book3s/64/pgtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
* We filter HPTEFLAGS on set_pte. \
*/ \
BUILD_BUG_ON(_PAGE_HPTEFLAGS & (0x1f << _PAGE_BIT_SWAP_TYPE)); \
BUILD_BUG_ON(_PAGE_HPTEFLAGS & _PAGE_SWP_SOFT_DIRTY); \
} while (0)
/*
* on pte we don't need handle RADIX_TREE_EXCEPTIONAL_SHIFT;
Expand All @@ -161,6 +162,24 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) })
#define __swp_entry_to_pte(x) __pte((x).val)

#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
#define _PAGE_SWP_SOFT_DIRTY (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE))
static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
{
return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY);
}
static inline bool pte_swp_soft_dirty(pte_t pte)
{
return !!(pte_val(pte) & _PAGE_SWP_SOFT_DIRTY);
}
static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
{
return __pte(pte_val(pte) & ~_PAGE_SWP_SOFT_DIRTY);
}
#else
#define _PAGE_SWP_SOFT_DIRTY 0
#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */

void pgtable_cache_add(unsigned shift, void (*ctor)(void *));
void pgtable_cache_init(void);

Expand Down Expand Up @@ -201,6 +220,13 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
#define pmd_mkdirty(pmd) pte_pmd(pte_mkdirty(pmd_pte(pmd)))
#define pmd_mkyoung(pmd) pte_pmd(pte_mkyoung(pmd_pte(pmd)))
#define pmd_mkwrite(pmd) pte_pmd(pte_mkwrite(pmd_pte(pmd)))

#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
#define pmd_soft_dirty(pmd) pte_soft_dirty(pmd_pte(pmd))
#define pmd_mksoft_dirty(pmd) pte_pmd(pte_mksoft_dirty(pmd_pte(pmd)))
#define pmd_clear_soft_dirty(pmd) pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd)))
#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */

#ifdef CONFIG_NUMA_BALANCING
static inline int pmd_protnone(pmd_t pmd)
{
Expand Down

0 comments on commit 7207f43

Please sign in to comment.