Skip to content

Commit

Permalink
x86/mm: Handle LAM on context switch
Browse files Browse the repository at this point in the history
Linear Address Masking mode for userspace pointers encoded in CR3 bits.
The mode is selected per-process and stored in mm_context_t.

switch_mm_irqs_off() now respects selected LAM mode and constructs CR3
accordingly.

The active LAM mode gets recorded in the tlb_state.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Alexander Potapenko <glider@google.com>
Link: https://lore.kernel.org/all/20230312112612.31869-5-kirill.shutemov%40linux.intel.com
  • Loading branch information
Kirill A. Shutemov authored and Dave Hansen committed Mar 16, 2023
1 parent 6449dcb commit 82721d8
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 17 deletions.
5 changes: 5 additions & 0 deletions arch/x86/include/asm/mmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,11 @@ typedef struct {
unsigned long flags;
#endif

#ifdef CONFIG_ADDRESS_MASKING
/* Active LAM mode: X86_CR3_LAM_U48 or X86_CR3_LAM_U57 or 0 (disabled) */
unsigned long lam_cr3_mask;
#endif

struct mutex lock;
void __user *vdso; /* vdso base address */
const struct vdso_image *vdso_image; /* vdso image in use */
Expand Down
24 changes: 24 additions & 0 deletions arch/x86/include/asm/mmu_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,29 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
}
#endif

#ifdef CONFIG_ADDRESS_MASKING
static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
{
return mm->context.lam_cr3_mask;
}

static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
{
mm->context.lam_cr3_mask = oldmm->context.lam_cr3_mask;
}

#else

static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
{
return 0;
}

static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
{
}
#endif

#define enter_lazy_tlb enter_lazy_tlb
extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);

Expand Down Expand Up @@ -169,6 +192,7 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
{
arch_dup_pkeys(oldmm, mm);
paravirt_arch_dup_mmap(oldmm, mm);
dup_lam(oldmm, mm);
return ldt_dup_context(oldmm, mm);
}

Expand Down
38 changes: 37 additions & 1 deletion arch/x86/include/asm/tlbflush.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#ifndef _ASM_X86_TLBFLUSH_H
#define _ASM_X86_TLBFLUSH_H

#include <linux/mm.h>
#include <linux/mm_types.h>
#include <linux/sched.h>

#include <asm/processor.h>
Expand All @@ -12,6 +12,7 @@
#include <asm/invpcid.h>
#include <asm/pti.h>
#include <asm/processor-flags.h>
#include <asm/pgtable.h>

void __flush_tlb_all(void);

Expand Down Expand Up @@ -101,6 +102,16 @@ struct tlb_state {
*/
bool invalidate_other;

#ifdef CONFIG_ADDRESS_MASKING
/*
* Active LAM mode.
*
* X86_CR3_LAM_U57/U48 shifted right by X86_CR3_LAM_U57_BIT or 0 if LAM
* disabled.
*/
u8 lam;
#endif

/*
* Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
* the corresponding user PCID needs a flush next time we
Expand Down Expand Up @@ -357,6 +368,31 @@ static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
}
#define huge_pmd_needs_flush huge_pmd_needs_flush

#ifdef CONFIG_ADDRESS_MASKING
static inline u64 tlbstate_lam_cr3_mask(void)
{
u64 lam = this_cpu_read(cpu_tlbstate.lam);

return lam << X86_CR3_LAM_U57_BIT;
}

static inline void set_tlbstate_lam_mode(struct mm_struct *mm)
{
this_cpu_write(cpu_tlbstate.lam,
mm->context.lam_cr3_mask >> X86_CR3_LAM_U57_BIT);
}

#else

static inline u64 tlbstate_lam_cr3_mask(void)
{
return 0;
}

static inline void set_tlbstate_lam_mode(struct mm_struct *mm)
{
}
#endif
#endif /* !MODULE */

static inline void __native_tlb_flush_global(unsigned long cr4)
Expand Down
53 changes: 37 additions & 16 deletions arch/x86/mm/tlb.c
Original file line number Diff line number Diff line change
Expand Up @@ -154,26 +154,30 @@ static inline u16 user_pcid(u16 asid)
return ret;
}

static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
static inline unsigned long build_cr3(pgd_t *pgd, u16 asid, unsigned long lam)
{
unsigned long cr3 = __sme_pa(pgd) | lam;

if (static_cpu_has(X86_FEATURE_PCID)) {
return __sme_pa(pgd) | kern_pcid(asid);
VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
cr3 |= kern_pcid(asid);
} else {
VM_WARN_ON_ONCE(asid != 0);
return __sme_pa(pgd);
}

return cr3;
}

static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid)
static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid,
unsigned long lam)
{
VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
/*
* Use boot_cpu_has() instead of this_cpu_has() as this function
* might be called during early boot. This should work even after
* boot because all CPU's the have same capabilities:
*/
VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID));
return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH;
return build_cr3(pgd, asid, lam) | CR3_NOFLUSH;
}

/*
Expand Down Expand Up @@ -274,15 +278,16 @@ static inline void invalidate_user_asid(u16 asid)
(unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
}

static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, unsigned long lam,
bool need_flush)
{
unsigned long new_mm_cr3;

if (need_flush) {
invalidate_user_asid(new_asid);
new_mm_cr3 = build_cr3(pgdir, new_asid);
new_mm_cr3 = build_cr3(pgdir, new_asid, lam);
} else {
new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
new_mm_cr3 = build_cr3_noflush(pgdir, new_asid, lam);
}

/*
Expand Down Expand Up @@ -491,6 +496,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
{
struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
unsigned long new_lam = mm_lam_cr3_mask(next);
bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy);
unsigned cpu = smp_processor_id();
u64 next_tlb_gen;
Expand Down Expand Up @@ -520,7 +526,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* isn't free.
*/
#ifdef CONFIG_DEBUG_VM
if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) {
if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid,
tlbstate_lam_cr3_mask()))) {
/*
* If we were to BUG here, we'd be very likely to kill
* the system so hard that we don't see the call trace.
Expand Down Expand Up @@ -552,9 +559,15 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* instruction.
*/
if (real_prev == next) {
/* Not actually switching mm's */
VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
next->context.ctx_id);

/*
* If this races with another thread that enables lam, 'new_lam'
* might not match tlbstate_lam_cr3_mask().
*/

/*
* Even in lazy TLB mode, the CPU should stay set in the
* mm_cpumask. The TLB shootdown code can figure out from
Expand Down Expand Up @@ -622,15 +635,16 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
barrier();
}

set_tlbstate_lam_mode(next);
if (need_flush) {
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
load_new_mm_cr3(next->pgd, new_asid, true);
load_new_mm_cr3(next->pgd, new_asid, new_lam, true);

trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
} else {
/* The new ASID is already up to date. */
load_new_mm_cr3(next->pgd, new_asid, false);
load_new_mm_cr3(next->pgd, new_asid, new_lam, false);

trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
}
Expand Down Expand Up @@ -691,6 +705,10 @@ void initialize_tlbstate_and_flush(void)
/* Assert that CR3 already references the right mm. */
WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));

/* LAM expected to be disabled */
WARN_ON(cr3 & (X86_CR3_LAM_U48 | X86_CR3_LAM_U57));
WARN_ON(mm_lam_cr3_mask(mm));

/*
* Assert that CR4.PCIDE is set if needed. (CR4.PCIDE initialization
* doesn't work like other CR4 bits because it can only be set from
Expand All @@ -699,15 +717,16 @@ void initialize_tlbstate_and_flush(void)
WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
!(cr4_read_shadow() & X86_CR4_PCIDE));

/* Force ASID 0 and force a TLB flush. */
write_cr3(build_cr3(mm->pgd, 0));
/* Disable LAM, force ASID 0 and force a TLB flush. */
write_cr3(build_cr3(mm->pgd, 0, 0));

/* Reinitialize tlbstate. */
this_cpu_write(cpu_tlbstate.last_user_mm_spec, LAST_USER_MM_INIT);
this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0);
this_cpu_write(cpu_tlbstate.next_asid, 1);
this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
set_tlbstate_lam_mode(mm);

for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
Expand Down Expand Up @@ -1071,8 +1090,10 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
*/
unsigned long __get_current_cr3_fast(void)
{
unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
this_cpu_read(cpu_tlbstate.loaded_mm_asid));
unsigned long cr3 =
build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
this_cpu_read(cpu_tlbstate.loaded_mm_asid),
tlbstate_lam_cr3_mask());

/* For now, be very restrictive about when this can be called. */
VM_WARN_ON(in_nmi() || preemptible());
Expand Down

0 comments on commit 82721d8

Please sign in to comment.