Skip to content

Commit

Permalink
x86/mm/dump_pagetables: Speed up page tables dump for CONFIG_KASAN=y
Browse files Browse the repository at this point in the history
KASAN fills kernel page tables with repeated values to map several
TBs of the virtual memory to the single kasan_zero_page:
  kasan_zero_p4d ->
    kasan_zero_pud ->
        kasan_zero_pmd->
            kasan_zero_pte->
                kasan_zero_page

Walking the whole KASAN shadow range takes a lot of time, especially
with 5-level page tables. Since we already know that all kasan page tables
eventually point to the kasan_zero_page we could call note_page()
right and avoid walking lower levels of the page tables.
This will not affect the output of the kernel_page_tables file,
but let us avoid spending time in page table walkers:

Before:

  $ time cat /sys/kernel/debug/kernel_page_tables > /dev/null

  real    0m55.855s
  user    0m0.000s
  sys     0m55.840s

After:

  $ time cat /sys/kernel/debug/kernel_page_tables > /dev/null

  real    0m0.054s
  user    0m0.000s
  sys     0m0.054s

Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20170724152558.24689-1-aryabinin@virtuozzo.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Andrey Ryabinin authored and Ingo Molnar committed Jul 25, 2017
1 parent 10af623 commit 04b6702
Showing 1 changed file with 41 additions and 23 deletions.
64 changes: 41 additions & 23 deletions arch/x86/mm/dump_pagetables.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
*/

#include <linux/debugfs.h>
#include <linux/kasan.h>
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/sched.h>
#include <linux/seq_file.h>

#include <asm/kasan.h>
#include <asm/pgtable.h>

/*
Expand Down Expand Up @@ -302,23 +302,53 @@ static void walk_pte_level(struct seq_file *m, struct pg_state *st, pmd_t addr,
start++;
}
}
#ifdef CONFIG_KASAN

/*
* This is an optimization for KASAN=y case. Since all kasan page tables
* eventually point to the kasan_zero_page we could call note_page()
* right away without walking through lower level page tables. This saves
* us dozens of seconds (minutes for 5-level config) while checking for
* W+X mapping or reading kernel_page_tables debugfs file.
*/
static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st,
void *pt)
{
if (__pa(pt) == __pa(kasan_zero_pmd) ||
#ifdef CONFIG_X86_5LEVEL
__pa(pt) == __pa(kasan_zero_p4d) ||
#endif
__pa(pt) == __pa(kasan_zero_pud)) {
pgprotval_t prot = pte_flags(kasan_zero_pte[0]);
note_page(m, st, __pgprot(prot), 5);
return true;
}
return false;
}
#else
static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st,
void *pt)
{
return false;
}
#endif

#if PTRS_PER_PMD > 1

static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, unsigned long P)
{
int i;
pmd_t *start;
pmd_t *start, *pmd_start;
pgprotval_t prot;

start = (pmd_t *)pud_page_vaddr(addr);
pmd_start = start = (pmd_t *)pud_page_vaddr(addr);
for (i = 0; i < PTRS_PER_PMD; i++) {
st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
if (!pmd_none(*start)) {
if (pmd_large(*start) || !pmd_present(*start)) {
prot = pmd_flags(*start);
note_page(m, st, __pgprot(prot), 4);
} else {
} else if (!kasan_page_table(m, st, pmd_start)) {
walk_pte_level(m, st, *start,
P + i * PMD_LEVEL_MULT);
}
Expand All @@ -336,34 +366,22 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr,

#if PTRS_PER_PUD > 1

/*
* This is an optimization for CONFIG_DEBUG_WX=y + CONFIG_KASAN=y
* KASAN fills page tables with the same values. Since there is no
* point in checking page table more than once we just skip repeated
* entries. This saves us dozens of seconds during boot.
*/
static bool pud_already_checked(pud_t *prev_pud, pud_t *pud, bool checkwx)
{
return checkwx && prev_pud && (pud_val(*prev_pud) == pud_val(*pud));
}

static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr, unsigned long P)
{
int i;
pud_t *start;
pud_t *start, *pud_start;
pgprotval_t prot;
pud_t *prev_pud = NULL;

start = (pud_t *)p4d_page_vaddr(addr);
pud_start = start = (pud_t *)p4d_page_vaddr(addr);

for (i = 0; i < PTRS_PER_PUD; i++) {
st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
if (!pud_none(*start) &&
!pud_already_checked(prev_pud, start, st->check_wx)) {
if (!pud_none(*start)) {
if (pud_large(*start) || !pud_present(*start)) {
prot = pud_flags(*start);
note_page(m, st, __pgprot(prot), 3);
} else {
} else if (!kasan_page_table(m, st, pud_start)) {
walk_pmd_level(m, st, *start,
P + i * PUD_LEVEL_MULT);
}
Expand All @@ -386,18 +404,18 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, p4d_t addr,
static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P)
{
int i;
p4d_t *start;
p4d_t *start, *p4d_start;
pgprotval_t prot;

start = (p4d_t *)pgd_page_vaddr(addr);
p4d_start = start = (p4d_t *)pgd_page_vaddr(addr);

for (i = 0; i < PTRS_PER_P4D; i++) {
st->current_address = normalize_addr(P + i * P4D_LEVEL_MULT);
if (!p4d_none(*start)) {
if (p4d_large(*start) || !p4d_present(*start)) {
prot = p4d_flags(*start);
note_page(m, st, __pgprot(prot), 2);
} else {
} else if (!kasan_page_table(m, st, p4d_start)) {
walk_pud_level(m, st, *start,
P + i * P4D_LEVEL_MULT);
}
Expand Down

0 comments on commit 04b6702

Please sign in to comment.