Skip to content

Commit

Permalink
KVM: VMX: use preemption timer to force immediate VMExit
Browse files Browse the repository at this point in the history
A VMX preemption timer value of '0' is guaranteed to cause a VMExit
prior to the CPU executing any instructions in the guest.  Use the
preemption timer (if it's supported) to trigger immediate VMExit
in place of the current method of sending a self-IPI.  This ensures
that pending VMExit injection to L1 occurs prior to executing any
instructions in the guest (regardless of nesting level).

When deferring VMExit injection, KVM generates an immediate VMExit
from the (possibly nested) guest by sending itself an IPI.  Because
hardware interrupts are blocked prior to VMEnter and are unblocked
(in hardware) after VMEnter, this results in taking a VMExit(INTR)
before any guest instruction is executed.  But, as this approach
relies on the IPI being received before VMEnter executes, it only
works as intended when KVM is running as L0.  Because there are no
architectural guarantees regarding when IPIs are delivered, when
running nested the INTR may "arrive" long after L2 is running e.g.
L0 KVM doesn't force an immediate switch to L1 to deliver an INTR.

For the most part, this unintended delay is not an issue since the
events being injected to L1 also do not have architectural guarantees
regarding their timing.  The notable exception is the VMX preemption
timer[1], which is architecturally guaranteed to cause a VMExit prior
to executing any instructions in the guest if the timer value is '0'
at VMEnter.  Specifically, the delay in injecting the VMExit causes
the preemption timer KVM unit test to fail when run in a nested guest.

Note: this approach is viable even on CPUs with a broken preemption
timer, as broken in this context only means the timer counts at the
wrong rate.  There are no known errata affecting timer value of '0'.

[1] I/O SMIs also have guarantees on when they arrive, but I have
    no idea if/how those are emulated in KVM.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
[Use a hook for SVM instead of leaving the default in x86.c - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
  • Loading branch information
Sean Christopherson authored and Paolo Bonzini committed Sep 19, 2018
1 parent f459a70 commit d264ee0
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 2 deletions.
2 changes: 2 additions & 0 deletions arch/x86/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -1055,6 +1055,7 @@ struct kvm_x86_ops {
bool (*umip_emulated)(void);

int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
void (*request_immediate_exit)(struct kvm_vcpu *vcpu);

void (*sched_in)(struct kvm_vcpu *kvm, int cpu);

Expand Down Expand Up @@ -1482,6 +1483,7 @@ extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);

int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
int kvm_complete_insn_gp(struct kvm_vcpu *vcpu, int err);
void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu);

int kvm_is_in_guest(void);

Expand Down
2 changes: 2 additions & 0 deletions arch/x86/kvm/svm.c
Original file line number Diff line number Diff line change
Expand Up @@ -7148,6 +7148,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.check_intercept = svm_check_intercept,
.handle_external_intr = svm_handle_external_intr,

.request_immediate_exit = __kvm_request_immediate_exit,

.sched_in = svm_sched_in,

.pmu_ops = &amd_pmu_ops,
Expand Down
21 changes: 20 additions & 1 deletion arch/x86/kvm/vmx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1020,6 +1020,8 @@ struct vcpu_vmx {
int ple_window;
bool ple_window_dirty;

bool req_immediate_exit;

/* Support for PML */
#define PML_ENTITY_NUM 512
struct page *pml_pg;
Expand Down Expand Up @@ -2865,6 +2867,8 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
u16 fs_sel, gs_sel;
int i;

vmx->req_immediate_exit = false;

if (vmx->loaded_cpu_state)
return;

Expand Down Expand Up @@ -7967,6 +7971,9 @@ static __init int hardware_setup(void)
kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
}

if (!cpu_has_vmx_preemption_timer())
kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit;

if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) {
u64 vmx_msr;

Expand Down Expand Up @@ -9209,7 +9216,8 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)

static int handle_preemption_timer(struct kvm_vcpu *vcpu)
{
kvm_lapic_expired_hv_timer(vcpu);
if (!to_vmx(vcpu)->req_immediate_exit)
kvm_lapic_expired_hv_timer(vcpu);
return 1;
}

Expand Down Expand Up @@ -10611,6 +10619,11 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
u64 tscl;
u32 delta_tsc;

if (vmx->req_immediate_exit) {
vmx_arm_hv_timer(vmx, 0);
return;
}

if (vmx->hv_deadline_tsc != -1) {
tscl = rdtsc();
if (vmx->hv_deadline_tsc > tscl)
Expand Down Expand Up @@ -12879,6 +12892,11 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
return 0;
}

static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu)
{
to_vmx(vcpu)->req_immediate_exit = true;
}

static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
{
ktime_t remaining =
Expand Down Expand Up @@ -14135,6 +14153,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.umip_emulated = vmx_umip_emulated,

.check_nested_events = vmx_check_nested_events,
.request_immediate_exit = vmx_request_immediate_exit,

.sched_in = vmx_sched_in,

Expand Down
8 changes: 7 additions & 1 deletion arch/x86/kvm/x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -7361,6 +7361,12 @@ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);

void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu)
{
smp_send_reschedule(vcpu->cpu);
}
EXPORT_SYMBOL_GPL(__kvm_request_immediate_exit);

/*
* Returns 1 to let vcpu_run() continue the guest execution loop without
* exiting to the userspace. Otherwise, the value will be returned to the
Expand Down Expand Up @@ -7565,7 +7571,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)

if (req_immediate_exit) {
kvm_make_request(KVM_REQ_EVENT, vcpu);
smp_send_reschedule(vcpu->cpu);
kvm_x86_ops->request_immediate_exit(vcpu);
}

trace_kvm_entry(vcpu->vcpu_id);
Expand Down

0 comments on commit d264ee0

Please sign in to comment.