Skip to content

Commit

Permalink
KVM: x86: Add support for SVM's Virtual NMI
Browse files Browse the repository at this point in the history
Add support for SVM's Virtual NMIs implementation, which adds proper
tracking of virtual NMI blocking, and an intr_ctrl flag that software can
set to mark a virtual NMI as pending.  Pending virtual NMIs are serviced
by hardware if/when virtual NMIs become unblocked, i.e. act more or less
like real NMIs.

Introduce two new kvm_x86_ops callbacks so to support SVM's vNMI, as KVM
needs to treat a pending vNMI as partially injected.  Specifically, if
two NMIs (for L1) arrive concurrently in KVM's software model, KVM's ABI
is to inject one and pend the other.  Without vNMI, KVM manually tracks
the pending NMI and uses NMI windows to detect when the NMI should be
injected.

With vNMI, the pending NMI is simply stuffed into the VMCB and handed
off to hardware.  This means that KVM needs to be able to set a vNMI
pending on-demand, and also query if a vNMI is pending, e.g. to honor the
"at most one NMI pending" rule and to preserve all NMIs across save and
restore.

Warn if KVM attempts to open an NMI window when vNMI is fully enabled,
as the above logic should prevent KVM from ever getting to
kvm_check_and_inject_events() with two NMIs pending _in software_, and
the "at most one NMI pending" logic should prevent having an NMI pending
in hardware and an NMI pending in software if NMIs are also blocked, i.e.
if KVM can't immediately inject the second NMI.

Signed-off-by: Santosh Shukla <Santosh.Shukla@amd.com>
Co-developed-by: Maxim Levitsky <mlevitsk@redhat.com>
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Link: https://lore.kernel.org/r/20230227084016.3368-11-santosh.shukla@amd.com
[sean: rewrite shortlog and changelog, massage code comments]
Signed-off-by: Sean Christopherson <seanjc@google.com>
  • Loading branch information
Santosh Shukla authored and Sean Christopherson committed Mar 23, 2023
1 parent bdedff2 commit fa4c027
Show file tree
Hide file tree
Showing 5 changed files with 146 additions and 23 deletions.
2 changes: 2 additions & 0 deletions arch/x86/include/asm/kvm-x86-ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ KVM_X86_OP(get_interrupt_shadow)
KVM_X86_OP(patch_hypercall)
KVM_X86_OP(inject_irq)
KVM_X86_OP(inject_nmi)
KVM_X86_OP_OPTIONAL_RET0(is_vnmi_pending)
KVM_X86_OP_OPTIONAL_RET0(set_vnmi_pending)
KVM_X86_OP(inject_exception)
KVM_X86_OP(cancel_injection)
KVM_X86_OP(interrupt_allowed)
Expand Down
11 changes: 10 additions & 1 deletion arch/x86/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -876,7 +876,8 @@ struct kvm_vcpu_arch {
u64 tsc_scaling_ratio; /* current scaling ratio */

atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
unsigned nmi_pending; /* NMI queued after currently running handler */
/* Number of NMIs pending injection, not including hardware vNMIs. */
unsigned int nmi_pending;
bool nmi_injected; /* Trying to inject an NMI this entry */
bool smi_pending; /* SMI queued after currently running handler */
u8 handling_intr_from_guest;
Expand Down Expand Up @@ -1621,6 +1622,13 @@ struct kvm_x86_ops {
int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
/* Whether or not a virtual NMI is pending in hardware. */
bool (*is_vnmi_pending)(struct kvm_vcpu *vcpu);
/*
* Attempt to pend a virtual NMI in harware. Returns %true on success
* to allow using static_call_ret0 as the fallback.
*/
bool (*set_vnmi_pending)(struct kvm_vcpu *vcpu);
void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
void (*enable_irq_window)(struct kvm_vcpu *vcpu);
void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
Expand Down Expand Up @@ -2005,6 +2013,7 @@ int kvm_pic_set_irq(struct kvm_pic *pic, int irq, int irq_source_id, int level);
void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id);

void kvm_inject_nmi(struct kvm_vcpu *vcpu);
int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu);

void kvm_update_dr7(struct kvm_vcpu *vcpu);

Expand Down
114 changes: 93 additions & 21 deletions arch/x86/kvm/svm/svm.c
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,8 @@ module_param(dump_invalid_vmcb, bool, 0644);
bool intercept_smi = true;
module_param(intercept_smi, bool, 0444);

bool vnmi = true;
module_param(vnmi, bool, 0444);

static bool svm_gp_erratum_intercept = true;

Expand Down Expand Up @@ -1311,6 +1313,9 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
if (kvm_vcpu_apicv_active(vcpu))
avic_init_vmcb(svm, vmcb);

if (vnmi)
svm->vmcb->control.int_ctl |= V_NMI_ENABLE_MASK;

if (vgif) {
svm_clr_intercept(svm, INTERCEPT_STGI);
svm_clr_intercept(svm, INTERCEPT_CLGI);
Expand Down Expand Up @@ -3525,6 +3530,39 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu)
++vcpu->stat.nmi_injections;
}

static bool svm_is_vnmi_pending(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);

if (!is_vnmi_enabled(svm))
return false;

return !!(svm->vmcb->control.int_ctl & V_NMI_BLOCKING_MASK);
}

static bool svm_set_vnmi_pending(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);

if (!is_vnmi_enabled(svm))
return false;

if (svm->vmcb->control.int_ctl & V_NMI_PENDING_MASK)
return false;

svm->vmcb->control.int_ctl |= V_NMI_PENDING_MASK;
vmcb_mark_dirty(svm->vmcb, VMCB_INTR);

/*
* Because the pending NMI is serviced by hardware, KVM can't know when
* the NMI is "injected", but for all intents and purposes, passing the
* NMI off to hardware counts as injection.
*/
++vcpu->stat.nmi_injections;

return true;
}

static void svm_inject_irq(struct kvm_vcpu *vcpu, bool reinjected)
{
struct vcpu_svm *svm = to_svm(vcpu);
Expand Down Expand Up @@ -3620,6 +3658,35 @@ static void svm_update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
svm_set_intercept(svm, INTERCEPT_CR8_WRITE);
}

static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);

if (is_vnmi_enabled(svm))
return svm->vmcb->control.int_ctl & V_NMI_BLOCKING_MASK;
else
return svm->nmi_masked;
}

static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
{
struct vcpu_svm *svm = to_svm(vcpu);

if (is_vnmi_enabled(svm)) {
if (masked)
svm->vmcb->control.int_ctl |= V_NMI_BLOCKING_MASK;
else
svm->vmcb->control.int_ctl &= ~V_NMI_BLOCKING_MASK;

} else {
svm->nmi_masked = masked;
if (masked)
svm_set_iret_intercept(svm);
else
svm_clr_iret_intercept(svm);
}
}

bool svm_nmi_blocked(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
Expand All @@ -3631,8 +3698,10 @@ bool svm_nmi_blocked(struct kvm_vcpu *vcpu)
if (is_guest_mode(vcpu) && nested_exit_on_nmi(svm))
return false;

return (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) ||
svm->nmi_masked;
if (svm_get_nmi_mask(vcpu))
return true;

return vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK;
}

static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
Expand All @@ -3650,24 +3719,6 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
return 1;
}

static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
{
return to_svm(vcpu)->nmi_masked;
}

static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
{
struct vcpu_svm *svm = to_svm(vcpu);

if (masked) {
svm->nmi_masked = true;
svm_set_iret_intercept(svm);
} else {
svm->nmi_masked = false;
svm_clr_iret_intercept(svm);
}
}

bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
Expand Down Expand Up @@ -3748,7 +3799,16 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);

if (svm->nmi_masked && !svm->awaiting_iret_completion)
/*
* KVM should never request an NMI window when vNMI is enabled, as KVM
* allows at most one to-be-injected NMI and one pending NMI, i.e. if
* two NMIs arrive simultaneously, KVM will inject one and set
* V_NMI_PENDING for the other. WARN, but continue with the standard
* single-step approach to try and salvage the pending NMI.
*/
WARN_ON_ONCE(is_vnmi_enabled(svm));

if (svm_get_nmi_mask(vcpu) && !svm->awaiting_iret_completion)
return; /* IRET will cause a vm exit */

if (!gif_set(svm)) {
Expand Down Expand Up @@ -4797,6 +4857,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.patch_hypercall = svm_patch_hypercall,
.inject_irq = svm_inject_irq,
.inject_nmi = svm_inject_nmi,
.is_vnmi_pending = svm_is_vnmi_pending,
.set_vnmi_pending = svm_set_vnmi_pending,
.inject_exception = svm_inject_exception,
.cancel_injection = svm_cancel_injection,
.interrupt_allowed = svm_interrupt_allowed,
Expand Down Expand Up @@ -5090,6 +5152,16 @@ static __init int svm_hardware_setup(void)
pr_info("Virtual GIF supported\n");
}

vnmi = vgif && vnmi && boot_cpu_has(X86_FEATURE_VNMI);
if (vnmi)
pr_info("Virtual NMI enabled\n");

if (!vnmi) {
svm_x86_ops.is_vnmi_pending = NULL;
svm_x86_ops.set_vnmi_pending = NULL;
}


if (lbrv) {
if (!boot_cpu_has(X86_FEATURE_LBRV))
lbrv = false;
Expand Down
22 changes: 22 additions & 0 deletions arch/x86/kvm/svm/svm.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ extern bool npt_enabled;
extern int vgif;
extern bool intercept_smi;
extern bool x2avic_enabled;
extern bool vnmi;

/*
* Clean bits in VMCB.
Expand Down Expand Up @@ -548,6 +549,27 @@ static inline bool is_x2apic_msrpm_offset(u32 offset)
(msr < (APIC_BASE_MSR + 0x100));
}

static inline struct vmcb *get_vnmi_vmcb_l1(struct vcpu_svm *svm)
{
if (!vnmi)
return NULL;

if (is_guest_mode(&svm->vcpu))
return NULL;
else
return svm->vmcb01.ptr;
}

static inline bool is_vnmi_enabled(struct vcpu_svm *svm)
{
struct vmcb *vmcb = get_vnmi_vmcb_l1(svm);

if (vmcb)
return !!(vmcb->control.int_ctl & V_NMI_ENABLE_MASK);
else
return false;
}

/* svm.c */
#define MSR_INVALID 0xffffffffU

Expand Down
20 changes: 19 additions & 1 deletion arch/x86/kvm/x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -5125,7 +5125,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
events->interrupt.shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);

events->nmi.injected = vcpu->arch.nmi_injected;
events->nmi.pending = vcpu->arch.nmi_pending;
events->nmi.pending = kvm_get_nr_pending_nmis(vcpu);
events->nmi.masked = static_call(kvm_x86_get_nmi_mask)(vcpu);

/* events->sipi_vector is never valid when reporting to user space */
Expand Down Expand Up @@ -10158,13 +10158,31 @@ static void process_nmi(struct kvm_vcpu *vcpu)
else
limit = 2;

/*
* Adjust the limit to account for pending virtual NMIs, which aren't
* tracked in vcpu->arch.nmi_pending.
*/
if (static_call(kvm_x86_is_vnmi_pending)(vcpu))
limit--;

vcpu->arch.nmi_pending += atomic_xchg(&vcpu->arch.nmi_queued, 0);
vcpu->arch.nmi_pending = min(vcpu->arch.nmi_pending, limit);

if (vcpu->arch.nmi_pending &&
(static_call(kvm_x86_set_vnmi_pending)(vcpu)))
vcpu->arch.nmi_pending--;

if (vcpu->arch.nmi_pending)
kvm_make_request(KVM_REQ_EVENT, vcpu);
}

/* Return total number of NMIs pending injection to the VM */
int kvm_get_nr_pending_nmis(struct kvm_vcpu *vcpu)
{
return vcpu->arch.nmi_pending +
static_call(kvm_x86_is_vnmi_pending)(vcpu);
}

void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
unsigned long *vcpu_bitmap)
{
Expand Down

0 comments on commit fa4c027

Please sign in to comment.