Skip to content

Commit

Permalink
x86/mm: Make MMU_GATHER_RCU_TABLE_FREE unconditional
Browse files Browse the repository at this point in the history
Currently x86 uses CONFIG_MMU_GATHER_TABLE_FREE when using
paravirt, and not when running on bare metal.

There is no real good reason to do things differently for
each setup. Make them all the same.

Currently get_user_pages_fast synchronizes against page table
freeing in two different ways:

 - on bare metal, by blocking IRQs, which block TLB flush IPIs
 - on paravirt, with MMU_GATHER_RCU_TABLE_FREE

This is done because some paravirt TLB flush implementations
handle the TLB flush in the hypervisor, and will do the flush
even when the target CPU has interrupts disabled.

Always handle page table freeing with MMU_GATHER_RCU_TABLE_FREE.
Using RCU synchronization between page table freeing and get_user_pages_fast()
allows bare metal to also do TLB flushing while interrupts are disabled.

Various places in the mm do still block IRQs or disable preemption
as an implicit way to block RCU frees.

That makes it safe to use INVLPGB on AMD CPUs.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Rik van Riel <riel@surriel.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Tested-by: Manali Shukla <Manali.Shukla@amd.com>
Tested-by: Brendan Jackman <jackmanb@google.com>
Tested-by: Michael Kelley <mhklinux@outlook.com>
Link: https://lore.kernel.org/r/20250213161423.449435-2-riel@surriel.com
  • Loading branch information
Rik van Riel authored and Ingo Molnar committed Feb 21, 2025
1 parent 282f395 commit a372597
Show file tree
Hide file tree
Showing 3 changed files with 6 additions and 40 deletions.
2 changes: 1 addition & 1 deletion arch/x86/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ config X86
select HAVE_PCI
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT
select MMU_GATHER_RCU_TABLE_FREE
select MMU_GATHER_MERGE_VMAS
select HAVE_POSIX_CPU_TIMERS_TASK_WORK
select HAVE_REGS_AND_STACK_ACCESS_API
Expand Down
17 changes: 1 addition & 16 deletions arch/x86/kernel/paravirt.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,21 +59,6 @@ void __init native_pv_lock_init(void)
static_branch_enable(&virt_spin_lock_key);
}

#ifndef CONFIG_PT_RECLAIM
static void native_tlb_remove_table(struct mmu_gather *tlb, void *table)
{
struct ptdesc *ptdesc = (struct ptdesc *)table;

pagetable_dtor(ptdesc);
tlb_remove_page(tlb, ptdesc_page(ptdesc));
}
#else
static void native_tlb_remove_table(struct mmu_gather *tlb, void *table)
{
tlb_remove_table(tlb, table);
}
#endif

struct static_key paravirt_steal_enabled;
struct static_key paravirt_steal_rq_enabled;

Expand Down Expand Up @@ -195,7 +180,7 @@ struct paravirt_patch_template pv_ops = {
.mmu.flush_tlb_kernel = native_flush_tlb_global,
.mmu.flush_tlb_one_user = native_flush_tlb_one_user,
.mmu.flush_tlb_multi = native_flush_tlb_multi,
.mmu.tlb_remove_table = native_tlb_remove_table,
.mmu.tlb_remove_table = tlb_remove_table,

.mmu.exit_mmap = paravirt_nop,
.mmu.notify_page_enc_status_changed = paravirt_nop,
Expand Down
27 changes: 4 additions & 23 deletions arch/x86/mm/pgtable.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,6 @@ EXPORT_SYMBOL(physical_mask);
#define PGTABLE_HIGHMEM 0
#endif

#ifndef CONFIG_PARAVIRT
#ifndef CONFIG_PT_RECLAIM
static inline
void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
{
struct ptdesc *ptdesc = (struct ptdesc *)table;

pagetable_dtor(ptdesc);
tlb_remove_page(tlb, ptdesc_page(ptdesc));
}
#else
static inline
void paravirt_tlb_remove_table(struct mmu_gather *tlb, void *table)
{
tlb_remove_table(tlb, table);
}
#endif /* !CONFIG_PT_RECLAIM */
#endif /* !CONFIG_PARAVIRT */

gfp_t __userpte_alloc_gfp = GFP_PGTABLE_USER | PGTABLE_HIGHMEM;

pgtable_t pte_alloc_one(struct mm_struct *mm)
Expand Down Expand Up @@ -64,7 +45,7 @@ early_param("userpte", setup_userpte);
void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
{
paravirt_release_pte(page_to_pfn(pte));
paravirt_tlb_remove_table(tlb, page_ptdesc(pte));
tlb_remove_table(tlb, page_ptdesc(pte));
}

#if CONFIG_PGTABLE_LEVELS > 2
Expand All @@ -78,21 +59,21 @@ void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
#ifdef CONFIG_X86_PAE
tlb->need_flush_all = 1;
#endif
paravirt_tlb_remove_table(tlb, virt_to_ptdesc(pmd));
tlb_remove_table(tlb, virt_to_ptdesc(pmd));
}

#if CONFIG_PGTABLE_LEVELS > 3
void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
{
paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
paravirt_tlb_remove_table(tlb, virt_to_ptdesc(pud));
tlb_remove_table(tlb, virt_to_ptdesc(pud));
}

#if CONFIG_PGTABLE_LEVELS > 4
void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d)
{
paravirt_release_p4d(__pa(p4d) >> PAGE_SHIFT);
paravirt_tlb_remove_table(tlb, virt_to_ptdesc(p4d));
tlb_remove_table(tlb, virt_to_ptdesc(p4d));
}
#endif /* CONFIG_PGTABLE_LEVELS > 4 */
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
Expand Down

0 comments on commit a372597

Please sign in to comment.