Skip to content

Commit

Permalink
KVM: X86: Provide a capability to disable MWAIT intercepts
Browse files Browse the repository at this point in the history
Allowing a guest to execute MWAIT without interception enables a guest
to put a (physical) CPU into a power saving state, where it takes
longer to return from than what may be desired by the host.

Don't give a guest that power over a host by default. (Especially,
since nothing prevents a guest from using MWAIT even when it is not
advertised via CPUID.)

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Jan H. Schönherr <jschoenh@amazon.de>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
  • Loading branch information
Wanpeng Li authored and Paolo Bonzini committed Mar 16, 2018
1 parent 4956aa3 commit 4d5422c
Show file tree
Hide file tree
Showing 8 changed files with 53 additions and 25 deletions.
27 changes: 18 additions & 9 deletions Documentation/virtual/kvm/api.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4358,6 +4358,24 @@ enables QEMU to build error log and branch to guest kernel registered
machine check handling routine. Without this capability KVM will
branch to guests' 0x200 interrupt vector.

7.13 KVM_CAP_X86_DISABLE_EXITS

Architectures: x86
Parameters: args[0] defines which exits are disabled
Returns: 0 on success, -EINVAL when args[0] contains invalid exits

Valid bits in args[0] are

#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0)

Enabling this capability on a VM provides userspace with a way to no
longer intercept some instructions for improved latency in some
workloads, and is suggested when vCPUs are associated to dedicated
physical CPUs. More bits can be added in the future; userspace can
just pass the KVM_CHECK_EXTENSION result to KVM_ENABLE_CAP to disable
all such vmexits.


8. Other capabilities.
----------------------

Expand Down Expand Up @@ -4470,15 +4488,6 @@ reserved.
Both registers and addresses are 64-bits wide.
It will be possible to run 64-bit or 32-bit guest code.

8.8 KVM_CAP_X86_GUEST_MWAIT

Architectures: x86

This capability indicates that guest using memory monotoring instructions
(MWAIT/MWAITX) to stop the virtual CPU will not cause a VM exit. As such time
spent while virtual CPU is halted in this way will then be accounted for as
guest running time on the host (as opposed to e.g. HLT).

8.9 KVM_CAP_ARM_USER_IRQ

Architectures: arm, arm64
Expand Down
2 changes: 2 additions & 0 deletions arch/x86/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -811,6 +811,8 @@ struct kvm_arch {

gpa_t wall_clock;

bool mwait_in_guest;

bool ept_identity_pagetable_done;
gpa_t ept_identity_map_addr;

Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kvm/svm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1398,7 +1398,7 @@ static void init_vmcb(struct vcpu_svm *svm)
set_intercept(svm, INTERCEPT_XSETBV);
set_intercept(svm, INTERCEPT_RSM);

if (!kvm_mwait_in_guest()) {
if (!kvm_mwait_in_guest(svm->vcpu.kvm)) {
set_intercept(svm, INTERCEPT_MONITOR);
set_intercept(svm, INTERCEPT_MWAIT);
}
Expand Down
9 changes: 5 additions & 4 deletions arch/x86/kvm/vmx.c
Original file line number Diff line number Diff line change
Expand Up @@ -3746,13 +3746,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
CPU_BASED_UNCOND_IO_EXITING |
CPU_BASED_MOV_DR_EXITING |
CPU_BASED_USE_TSC_OFFSETING |
CPU_BASED_MWAIT_EXITING |
CPU_BASED_MONITOR_EXITING |
CPU_BASED_INVLPG_EXITING |
CPU_BASED_RDPMC_EXITING;

if (!kvm_mwait_in_guest())
min |= CPU_BASED_MWAIT_EXITING |
CPU_BASED_MONITOR_EXITING;

opt = CPU_BASED_TPR_SHADOW |
CPU_BASED_USE_MSR_BITMAPS |
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
Expand Down Expand Up @@ -5544,6 +5542,9 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
exec_control |= CPU_BASED_CR3_STORE_EXITING |
CPU_BASED_CR3_LOAD_EXITING |
CPU_BASED_INVLPG_EXITING;
if (kvm_mwait_in_guest(vmx->vcpu.kvm))
exec_control &= ~(CPU_BASED_MWAIT_EXITING |
CPU_BASED_MONITOR_EXITING);
return exec_control;
}

Expand Down
24 changes: 20 additions & 4 deletions arch/x86/kvm/x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -2813,9 +2813,15 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
return r;
}

static inline bool kvm_can_mwait_in_guest(void)
{
return boot_cpu_has(X86_FEATURE_MWAIT) &&
!boot_cpu_has_bug(X86_BUG_MONITOR);
}

int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{
int r;
int r = 0;

switch (ext) {
case KVM_CAP_IRQCHIP:
Expand Down Expand Up @@ -2871,8 +2877,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ADJUST_CLOCK:
r = KVM_CLOCK_TSC_STABLE;
break;
case KVM_CAP_X86_GUEST_MWAIT:
r = kvm_mwait_in_guest();
case KVM_CAP_X86_DISABLE_EXITS:
if(kvm_can_mwait_in_guest())
r |= KVM_X86_DISABLE_EXITS_MWAIT;
break;
case KVM_CAP_X86_SMM:
/* SMBASE is usually relocated above 1M on modern chipsets,
Expand Down Expand Up @@ -2913,7 +2920,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = KVM_X2APIC_API_VALID_FLAGS;
break;
default:
r = 0;
break;
}
return r;
Expand Down Expand Up @@ -4218,6 +4224,16 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,

r = 0;
break;
case KVM_CAP_X86_DISABLE_EXITS:
r = -EINVAL;
if (cap->args[0] & ~KVM_X86_DISABLE_VALID_EXITS)
break;

if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
kvm_can_mwait_in_guest())
kvm->arch.mwait_in_guest = true;
r = 0;
break;
default:
r = -EINVAL;
break;
Expand Down
10 changes: 5 additions & 5 deletions arch/x86/kvm/x86.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@
#ifndef ARCH_X86_KVM_X86_H
#define ARCH_X86_KVM_X86_H

#include <asm/processor.h>
#include <asm/mwait.h>
#include <linux/kvm_host.h>
#include <asm/pvclock.h>
#include "kvm_cache_regs.h"
Expand Down Expand Up @@ -266,10 +264,12 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
__rem; \
})

static inline bool kvm_mwait_in_guest(void)
#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0)
#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT)

static inline bool kvm_mwait_in_guest(struct kvm *kvm)
{
return boot_cpu_has(X86_FEATURE_MWAIT) &&
!boot_cpu_has_bug(X86_BUG_MONITOR);
return kvm->arch.mwait_in_guest;
}

#endif
2 changes: 1 addition & 1 deletion include/uapi/linux/kvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -929,7 +929,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_S390_GS 140
#define KVM_CAP_S390_AIS 141
#define KVM_CAP_SPAPR_TCE_VFIO 142
#define KVM_CAP_X86_GUEST_MWAIT 143
#define KVM_CAP_X86_DISABLE_EXITS 143
#define KVM_CAP_ARM_USER_IRQ 144
#define KVM_CAP_S390_CMMA_MIGRATION 145
#define KVM_CAP_PPC_FWNMI 146
Expand Down
2 changes: 1 addition & 1 deletion tools/include/uapi/linux/kvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -924,7 +924,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_S390_GS 140
#define KVM_CAP_S390_AIS 141
#define KVM_CAP_SPAPR_TCE_VFIO 142
#define KVM_CAP_X86_GUEST_MWAIT 143
#define KVM_CAP_X86_DISABLE_EXITS 143
#define KVM_CAP_ARM_USER_IRQ 144
#define KVM_CAP_S390_CMMA_MIGRATION 145
#define KVM_CAP_PPC_FWNMI 146
Expand Down

0 comments on commit 4d5422c

Please sign in to comment.