Skip to content

Commit

Permalink
x86/mm/tlb: Leave lazy TLB mode at page table free time
Browse files Browse the repository at this point in the history
Andy discovered that speculative memory accesses while in lazy
TLB mode can crash a system, when a CPU tries to dereference a
speculative access using memory contents that used to be valid
page table memory, but have since been reused for something else
and point into la-la land.

The latter problem can be prevented in two ways. The first is to
always send a TLB shootdown IPI to CPUs in lazy TLB mode, while
the second one is to only send the TLB shootdown at page table
freeing time.

The second should result in fewer IPIs, since operationgs like
mprotect and madvise are very common with some workloads, but
do not involve page table freeing. Also, on munmap, batching
of page table freeing covers much larger ranges of virtual
memory than the batching of unmapped user pages.

Tested-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Rik van Riel <riel@surriel.com>
Acked-by: Dave Hansen <dave.hansen@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: efault@gmx.de
Cc: kernel-team@fb.com
Cc: luto@kernel.org
Link: http://lkml.kernel.org/r/20180716190337.26133-3-riel@surriel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Rik van Riel authored and Ingo Molnar committed Jul 17, 2018
1 parent c1a2f7f commit 2ff6ddf
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 8 deletions.
5 changes: 5 additions & 0 deletions arch/x86/include/asm/tlbflush.h
Original file line number Diff line number Diff line change
Expand Up @@ -554,4 +554,9 @@ extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch);
native_flush_tlb_others(mask, info)
#endif

extern void tlb_flush_remove_tables(struct mm_struct *mm);
extern void tlb_flush_remove_tables_local(void *arg);

#define HAVE_TLB_FLUSH_REMOVE_TABLES

#endif /* _ASM_X86_TLBFLUSH_H */
27 changes: 27 additions & 0 deletions arch/x86/mm/tlb.c
Original file line number Diff line number Diff line change
Expand Up @@ -646,6 +646,33 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
put_cpu();
}

void tlb_flush_remove_tables_local(void *arg)
{
struct mm_struct *mm = arg;

if (this_cpu_read(cpu_tlbstate.loaded_mm) == mm &&
this_cpu_read(cpu_tlbstate.is_lazy)) {
/*
* We're in lazy mode. We need to at least flush our
* paging-structure cache to avoid speculatively reading
* garbage into our TLB. Since switching to init_mm is barely
* slower than a minimal flush, just switch to init_mm.
*/
switch_mm_irqs_off(NULL, &init_mm, NULL);
}
}

void tlb_flush_remove_tables(struct mm_struct *mm)
{
int cpu = get_cpu();
/*
* XXX: this really only needs to be called for CPUs in lazy TLB mode.
*/
if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
smp_call_function_many(mm_cpumask(mm), tlb_flush_remove_tables_local, (void *)mm, 1);

put_cpu();
}

static void do_flush_tlb_all(void *info)
{
Expand Down
10 changes: 10 additions & 0 deletions include/asm-generic/tlb.h
Original file line number Diff line number Diff line change
Expand Up @@ -303,4 +303,14 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,

#define tlb_migrate_finish(mm) do {} while (0)

/*
* Used to flush the TLB when page tables are removed, when lazy
* TLB mode may cause a CPU to retain intermediate translations
* pointing to about-to-be-freed page table memory.
*/
#ifndef HAVE_TLB_FLUSH_REMOVE_TABLES
#define tlb_flush_remove_tables(mm) do {} while (0)
#define tlb_flush_remove_tables_local(mm) do {} while (0)
#endif

#endif /* _ASM_GENERIC__TLB_H */
22 changes: 14 additions & 8 deletions mm/memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -326,16 +326,20 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_

#ifdef CONFIG_HAVE_RCU_TABLE_FREE

/*
* See the comment near struct mmu_table_batch.
*/

static void tlb_remove_table_smp_sync(void *arg)
{
/* Simply deliver the interrupt */
struct mm_struct __maybe_unused *mm = arg;
/*
* On most architectures this does nothing. Simply delivering the
* interrupt is enough to prevent races with software page table
* walking like that done in get_user_pages_fast.
*
* See the comment near struct mmu_table_batch.
*/
tlb_flush_remove_tables_local(mm);
}

static void tlb_remove_table_one(void *table)
static void tlb_remove_table_one(void *table, struct mmu_gather *tlb)
{
/*
* This isn't an RCU grace period and hence the page-tables cannot be
Expand All @@ -344,7 +348,7 @@ static void tlb_remove_table_one(void *table)
* It is however sufficient for software page-table walkers that rely on
* IRQ disabling. See the comment near struct mmu_table_batch.
*/
smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
smp_call_function(tlb_remove_table_smp_sync, tlb->mm, 1);
__tlb_remove_table(table);
}

Expand All @@ -365,6 +369,8 @@ void tlb_table_flush(struct mmu_gather *tlb)
{
struct mmu_table_batch **batch = &tlb->batch;

tlb_flush_remove_tables(tlb->mm);

if (*batch) {
call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
*batch = NULL;
Expand All @@ -387,7 +393,7 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
if (*batch == NULL) {
*batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
if (*batch == NULL) {
tlb_remove_table_one(table);
tlb_remove_table_one(table, tlb);
return;
}
(*batch)->nr = 0;
Expand Down

0 comments on commit 2ff6ddf

Please sign in to comment.