Skip to content

Commit

Permalink
x86/mm: Use INVPCID for __native_flush_tlb_single()
Browse files Browse the repository at this point in the history
This uses INVPCID to shoot down individual lines of the user mapping
instead of marking the entire user map as invalid. This
could/might/possibly be faster.

This for sure needs tlb_single_page_flush_ceiling to be redetermined;
esp. since INVPCID is _slow_.

A detailed performance analysis is available here:

  https://lkml.kernel.org/r/3062e486-3539-8a1f-5724-16199420be71@intel.com

[ Peterz: Split out from big combo patch ]

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Eduardo Valentin <eduval@amazon.com>
Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: aliguori@amazon.com
Cc: daniel.gruss@iaik.tugraz.at
Cc: hughd@google.com
Cc: keescook@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
  • Loading branch information
Dave Hansen authored and Ingo Molnar committed Dec 23, 2017
1 parent 21e9445 commit 6cff64b
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 28 deletions.
1 change: 1 addition & 0 deletions arch/x86/include/asm/cpufeatures.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */

#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
Expand Down
23 changes: 22 additions & 1 deletion arch/x86/include/asm/tlbflush.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,18 @@ static inline u16 kern_pcid(u16 asid)
return asid + 1;
}

/*
* The user PCID is just the kernel one, plus the "switch bit".
*/
static inline u16 user_pcid(u16 asid)
{
u16 ret = kern_pcid(asid);
#ifdef CONFIG_PAGE_TABLE_ISOLATION
ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
#endif
return ret;
}

struct pgd_t;
static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
{
Expand Down Expand Up @@ -335,6 +347,8 @@ static inline void __native_flush_tlb_global(void)
/*
* Using INVPCID is considerably faster than a pair of writes
* to CR4 sandwiched inside an IRQ flag save/restore.
*
* Note, this works with CR4.PCIDE=0 or 1.
*/
invpcid_flush_all();
return;
Expand Down Expand Up @@ -368,7 +382,14 @@ static inline void __native_flush_tlb_single(unsigned long addr)
if (!static_cpu_has(X86_FEATURE_PTI))
return;

invalidate_user_asid(loaded_mm_asid);
/*
* Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1.
* Just use invalidate_user_asid() in case we are called early.
*/
if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
invalidate_user_asid(loaded_mm_asid);
else
invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
}

/*
Expand Down
64 changes: 37 additions & 27 deletions arch/x86/mm/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -203,34 +203,44 @@ static void __init probe_page_size_mask(void)

static void setup_pcid(void)
{
#ifdef CONFIG_X86_64
if (boot_cpu_has(X86_FEATURE_PCID)) {
if (boot_cpu_has(X86_FEATURE_PGE)) {
/*
* This can't be cr4_set_bits_and_update_boot() --
* the trampoline code can't handle CR4.PCIDE and
* it wouldn't do any good anyway. Despite the name,
* cr4_set_bits_and_update_boot() doesn't actually
* cause the bits in question to remain set all the
* way through the secondary boot asm.
*
* Instead, we brute-force it and set CR4.PCIDE
* manually in start_secondary().
*/
cr4_set_bits(X86_CR4_PCIDE);
} else {
/*
* flush_tlb_all(), as currently implemented, won't
* work if PCID is on but PGE is not. Since that
* combination doesn't exist on real hardware, there's
* no reason to try to fully support it, but it's
* polite to avoid corrupting data if we're on
* an improperly configured VM.
*/
setup_clear_cpu_cap(X86_FEATURE_PCID);
}
if (!IS_ENABLED(CONFIG_X86_64))
return;

if (!boot_cpu_has(X86_FEATURE_PCID))
return;

if (boot_cpu_has(X86_FEATURE_PGE)) {
/*
* This can't be cr4_set_bits_and_update_boot() -- the
* trampoline code can't handle CR4.PCIDE and it wouldn't
* do any good anyway. Despite the name,
* cr4_set_bits_and_update_boot() doesn't actually cause
* the bits in question to remain set all the way through
* the secondary boot asm.
*
* Instead, we brute-force it and set CR4.PCIDE manually in
* start_secondary().
*/
cr4_set_bits(X86_CR4_PCIDE);

/*
* INVPCID's single-context modes (2/3) only work if we set
* X86_CR4_PCIDE, *and* we INVPCID support. It's unusable
* on systems that have X86_CR4_PCIDE clear, or that have
* no INVPCID support at all.
*/
if (boot_cpu_has(X86_FEATURE_INVPCID))
setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE);
} else {
/*
* flush_tlb_all(), as currently implemented, won't work if
* PCID is on but PGE is not. Since that combination
* doesn't exist on real hardware, there's no reason to try
* to fully support it, but it's polite to avoid corrupting
* data if we're on an improperly configured VM.
*/
setup_clear_cpu_cap(X86_FEATURE_PCID);
}
#endif
}

#ifdef CONFIG_X86_32
Expand Down

0 comments on commit 6cff64b

Please sign in to comment.