diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 05b4eca156cf..f614c0522a0b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -878,6 +878,7 @@ config INTEL_TDX_GUEST depends on X86_64 && CPU_SUP_INTEL depends on X86_X2APIC depends on EFI_STUB + depends on PARAVIRT select ARCH_HAS_CC_PLATFORM select X86_MEM_ENCRYPT select X86_MCE diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 7772b01ab738..aa0eb4057226 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -398,7 +399,7 @@ static int handle_halt(struct ve_info *ve) return ve_instr_len(ve); } -void __cpuidle tdx_safe_halt(void) +void __cpuidle tdx_halt(void) { const bool irq_disabled = false; @@ -409,6 +410,16 @@ void __cpuidle tdx_safe_halt(void) WARN_ONCE(1, "HLT instruction emulation failed\n"); } +static void __cpuidle tdx_safe_halt(void) +{ + tdx_halt(); + /* + * "__cpuidle" section doesn't support instrumentation, so stick + * with raw_* variant that avoids tracing hooks. + */ + raw_local_irq_enable(); +} + static int read_msr(struct pt_regs *regs, struct ve_info *ve) { struct tdx_module_args args = { @@ -1109,6 +1120,19 @@ void __init tdx_early_init(void) x86_platform.guest.enc_kexec_begin = tdx_kexec_begin; x86_platform.guest.enc_kexec_finish = tdx_kexec_finish; + /* + * Avoid "sti;hlt" execution in TDX guests as HLT induces a #VE that + * will enable interrupts before HLT TDCALL invocation if executed + * in STI-shadow, possibly resulting in missed wakeup events. + * + * Modify all possible HLT execution paths to use TDX specific routines + * that directly execute TDCALL and toggle the interrupt state as + * needed after TDCALL completion. This also reduces HLT related #VEs + * in addition to having a reliable halt logic execution. + */ + pv_ops.irq.safe_halt = tdx_safe_halt; + pv_ops.irq.halt = tdx_halt; + /* * TDX intercepts the RDMSR to read the X2APIC ID in the parallel * bringup low level code. That raises #VE which cannot be handled diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index 65394aa9b49f..4a1922ec80cf 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -58,7 +58,7 @@ void tdx_get_ve_info(struct ve_info *ve); bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve); -void tdx_safe_halt(void); +void tdx_halt(void); bool tdx_early_handle_ve(struct pt_regs *regs); @@ -72,7 +72,7 @@ void __init tdx_dump_td_ctls(u64 td_ctls); #else static inline void tdx_early_init(void) { }; -static inline void tdx_safe_halt(void) { }; +static inline void tdx_halt(void) { }; static inline bool tdx_early_handle_ve(struct pt_regs *regs) { return false; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 91f6ff618852..962c3ce39323 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -939,7 +939,7 @@ void __init select_idle_routine(void) static_call_update(x86_idle, mwait_idle); } else if (cpu_feature_enabled(X86_FEATURE_TDX_GUEST)) { pr_info("using TDX aware idle routine\n"); - static_call_update(x86_idle, tdx_safe_halt); + static_call_update(x86_idle, tdx_halt); } else { static_call_update(x86_idle, default_idle); }