Skip to content

Commit

Permalink
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Browse files Browse the repository at this point in the history
Pull more KVM updates from Paolo Bonzini:
 "x86 KVM changes:

   - The usual accuracy improvements for nested virtualization

   - The usual round of code cleanups from Sean

   - Added back optimizations that were prematurely removed in 5.2 (the
     bare minimum needed to fix the regression was in 5.3-rc8, here
     comes the rest)

   - Support for UMWAIT/UMONITOR/TPAUSE

   - Direct L2->L0 TLB flushing when L0 is Hyper-V and L1 is KVM

   - Tell Windows guests if SMT is disabled on the host

   - More accurate detection of vmexit cost

   - Revert a pvqspinlock pessimization"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (56 commits)
  KVM: nVMX: cleanup and fix host 64-bit mode checks
  KVM: vmx: fix build warnings in hv_enable_direct_tlbflush() on i386
  KVM: x86: Don't check kvm_rebooting in __kvm_handle_fault_on_reboot()
  KVM: x86: Drop ____kvm_handle_fault_on_reboot()
  KVM: VMX: Add error handling to VMREAD helper
  KVM: VMX: Optimize VMX instruction error and fault handling
  KVM: x86: Check kvm_rebooting in kvm_spurious_fault()
  KVM: selftests: fix ucall on x86
  Revert "locking/pvqspinlock: Don't wait if vCPU is preempted"
  kvm: nvmx: limit atomic switch MSRs
  kvm: svm: Intercept RDPRU
  kvm: x86: Add "significant index" flag to a few CPUID leaves
  KVM: x86/mmu: Skip invalid pages during zapping iff root_count is zero
  KVM: x86/mmu: Explicitly track only a single invalid mmu generation
  KVM: x86/mmu: Revert "KVM: x86/mmu: Remove is_obsolete() call"
  KVM: x86/mmu: Revert "Revert "KVM: MMU: reclaim the zapped-obsolete page first""
  KVM: x86/mmu: Revert "Revert "KVM: MMU: collapse TLB flushes when zap all pages""
  KVM: x86/mmu: Revert "Revert "KVM: MMU: zap pages in batch""
  KVM: x86/mmu: Revert "Revert "KVM: MMU: add tracepoint for kvm_mmu_invalidate_all_pages""
  KVM: x86/mmu: Revert "Revert "KVM: MMU: show mmu_valid_gen in shadow page related tracepoints""
  ...
  • Loading branch information
Linus Torvalds committed Sep 27, 2019
2 parents e37e3bc + fd3edd4 commit 8bbe0de
Show file tree
Hide file tree
Showing 36 changed files with 906 additions and 468 deletions.
13 changes: 13 additions & 0 deletions Documentation/virt/kvm/api.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5309,3 +5309,16 @@ Architectures: x86
This capability indicates that KVM supports paravirtualized Hyper-V IPI send
hypercalls:
HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx.
8.21 KVM_CAP_HYPERV_DIRECT_TLBFLUSH

Architecture: x86

This capability indicates that KVM running on top of Hyper-V hypervisor
enables Direct TLB flush for its guests meaning that TLB flush
hypercalls are handled by Level 0 hypervisor (Hyper-V) bypassing KVM.
Due to the different ABI for hypercall parameters between Hyper-V and
KVM, enabling this capability effectively disables all hypercall
handling by KVM (as some KVM hypercall may be mistakenly treated as TLB
flush hypercalls by Hyper-V) so userspace should disable KVM identification
in CPUID and only exposes Hyper-V identification. In this case, guest
thinks it's running on Hyper-V and only use Hyper-V hypercalls.
31 changes: 26 additions & 5 deletions arch/x86/include/asm/hyperv-tlfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,15 @@
/* Recommend using enlightened VMCS */
#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14)

/*
* Virtual processor will never share a physical core with another virtual
* processor, except for virtual processors that are reported as sibling SMT
* threads.
*/
#define HV_X64_NO_NONARCH_CORESHARING BIT(18)

/* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */
#define HV_X64_NESTED_DIRECT_FLUSH BIT(17)
#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18)
#define HV_X64_NESTED_MSR_BITMAP BIT(19)

Expand Down Expand Up @@ -524,14 +532,24 @@ struct hv_timer_message_payload {
__u64 delivery_time; /* When the message was delivered */
} __packed;

struct hv_nested_enlightenments_control {
struct {
__u32 directhypercall:1;
__u32 reserved:31;
} features;
struct {
__u32 reserved;
} hypercallControls;
} __packed;

/* Define virtual processor assist page structure. */
struct hv_vp_assist_page {
__u32 apic_assist;
__u32 reserved;
__u64 vtl_control[2];
__u64 nested_enlightenments_control[2];
__u32 enlighten_vmentry;
__u32 padding;
__u32 reserved1;
__u64 vtl_control[3];
struct hv_nested_enlightenments_control nested_control;
__u8 enlighten_vmentry;
__u8 reserved2[7];
__u64 current_nested_vmcs;
} __packed;

Expand Down Expand Up @@ -882,4 +900,7 @@ struct hv_tlb_flush_ex {
u64 gva_list[];
} __packed;

struct hv_partition_assist_pg {
u32 tlb_lock_count;
};
#endif
64 changes: 41 additions & 23 deletions arch/x86/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@ struct kvm_mmu_page {
struct list_head link;
struct hlist_node hash_link;
bool unsync;
u8 mmu_valid_gen;
bool mmio_cached;

/*
Expand All @@ -335,7 +336,6 @@ struct kvm_mmu_page {
int root_count; /* Currently serving as active root */
unsigned int unsync_children;
struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
unsigned long mmu_valid_gen;
DECLARE_BITMAP(unsync_child_bitmap, 512);

#ifdef CONFIG_X86_32
Expand Down Expand Up @@ -844,6 +844,8 @@ struct kvm_hv {

/* How many vCPUs have VP index != vCPU index */
atomic_t num_mismatched_vp_indexes;

struct hv_partition_assist_pg *hv_pa_pg;
};

enum kvm_irqchip_mode {
Expand All @@ -857,12 +859,13 @@ struct kvm_arch {
unsigned long n_requested_mmu_pages;
unsigned long n_max_mmu_pages;
unsigned int indirect_shadow_pages;
unsigned long mmu_valid_gen;
u8 mmu_valid_gen;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
/*
* Hash table of struct kvm_mmu_page.
*/
struct list_head active_mmu_pages;
struct list_head zapped_obsolete_pages;
struct kvm_page_track_notifier_node mmu_sp_tracker;
struct kvm_page_track_notifier_head track_notifier_head;

Expand Down Expand Up @@ -1213,6 +1216,7 @@ struct kvm_x86_ops {
bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu);

bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
};

struct kvm_arch_async_pf {
Expand Down Expand Up @@ -1312,18 +1316,42 @@ extern u64 kvm_default_tsc_scaling_ratio;

extern u64 kvm_mce_cap_supported;

enum emulation_result {
EMULATE_DONE, /* no further processing */
EMULATE_USER_EXIT, /* kvm_run ready for userspace exit */
EMULATE_FAIL, /* can't emulate this instruction */
};

/*
* EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing
* userspace I/O) to indicate that the emulation context
* should be resued as is, i.e. skip initialization of
* emulation context, instruction fetch and decode.
*
* EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware.
* Indicates that only select instructions (tagged with
* EmulateOnUD) should be emulated (to minimize the emulator
* attack surface). See also EMULTYPE_TRAP_UD_FORCED.
*
* EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to
* decode the instruction length. For use *only* by
* kvm_x86_ops->skip_emulated_instruction() implementations.
*
* EMULTYPE_ALLOW_RETRY - Set when the emulator should resume the guest to
* retry native execution under certain conditions.
*
* EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was
* triggered by KVM's magic "force emulation" prefix,
* which is opt in via module param (off by default).
* Bypasses EmulateOnUD restriction despite emulating
* due to an intercepted #UD (see EMULTYPE_TRAP_UD).
* Used to test the full emulator from userspace.
*
* EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware
* backdoor emulation, which is opt in via module param.
* VMware backoor emulation handles select instructions
* and reinjects the #GP for all other cases.
*/
#define EMULTYPE_NO_DECODE (1 << 0)
#define EMULTYPE_TRAP_UD (1 << 1)
#define EMULTYPE_SKIP (1 << 2)
#define EMULTYPE_ALLOW_RETRY (1 << 3)
#define EMULTYPE_NO_UD_ON_FAIL (1 << 4)
#define EMULTYPE_VMWARE (1 << 5)
#define EMULTYPE_TRAP_UD_FORCED (1 << 4)
#define EMULTYPE_VMWARE_GP (1 << 5)
int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
void *insn, int insn_len);
Expand Down Expand Up @@ -1506,32 +1534,22 @@ enum {
#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)

asmlinkage void __noreturn kvm_spurious_fault(void);
asmlinkage void kvm_spurious_fault(void);

/*
* Hardware virtualization extension instructions may fault if a
* reboot turns off virtualization while processes are running.
* Usually after catching the fault we just panic; during reboot
* instead the instruction is ignored.
*/
#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \
#define __kvm_handle_fault_on_reboot(insn) \
"666: \n\t" \
insn "\n\t" \
"jmp 668f \n\t" \
"667: \n\t" \
"call kvm_spurious_fault \n\t" \
"668: \n\t" \
".pushsection .fixup, \"ax\" \n\t" \
"700: \n\t" \
cleanup_insn "\n\t" \
"cmpb $0, kvm_rebooting\n\t" \
"je 667b \n\t" \
"jmp 668b \n\t" \
".popsection \n\t" \
_ASM_EXTABLE(666b, 700b)

#define __kvm_handle_fault_on_reboot(insn) \
____kvm_handle_fault_on_reboot(insn, "")
_ASM_EXTABLE(666b, 667b)

#define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
Expand Down
1 change: 1 addition & 0 deletions arch/x86/include/asm/svm.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ enum {
INTERCEPT_MWAIT,
INTERCEPT_MWAIT_COND,
INTERCEPT_XSETBV,
INTERCEPT_RDPRU,
};


Expand Down
2 changes: 2 additions & 0 deletions arch/x86/include/asm/vmx.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
#define SECONDARY_EXEC_PT_USE_GPA 0x01000000
#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC 0x00400000
#define SECONDARY_EXEC_TSC_SCALING 0x02000000
#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE 0x04000000

#define PIN_BASED_EXT_INTR_MASK 0x00000001
#define PIN_BASED_NMI_EXITING 0x00000008
Expand Down Expand Up @@ -110,6 +111,7 @@
#define VMX_MISC_SAVE_EFER_LMA 0x00000020
#define VMX_MISC_ACTIVITY_HLT 0x00000040
#define VMX_MISC_ZERO_LEN_INS 0x40000000
#define VMX_MISC_MSR_LIST_MULTIPLIER 512

/* VMFUNC functions */
#define VMX_VMFUNC_EPTP_SWITCHING 0x00000001
Expand Down
1 change: 1 addition & 0 deletions arch/x86/include/uapi/asm/svm.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
#define SVM_EXIT_MWAIT 0x08b
#define SVM_EXIT_MWAIT_COND 0x08c
#define SVM_EXIT_XSETBV 0x08d
#define SVM_EXIT_RDPRU 0x08e
#define SVM_EXIT_NPF 0x400
#define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401
#define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402
Expand Down
6 changes: 5 additions & 1 deletion arch/x86/include/uapi/asm/vmx.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@
#define EXIT_REASON_PML_FULL 62
#define EXIT_REASON_XSAVES 63
#define EXIT_REASON_XRSTORS 64
#define EXIT_REASON_UMWAIT 67
#define EXIT_REASON_TPAUSE 68

#define VMX_EXIT_REASONS \
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
Expand Down Expand Up @@ -144,7 +146,9 @@
{ EXIT_REASON_RDSEED, "RDSEED" }, \
{ EXIT_REASON_PML_FULL, "PML_FULL" }, \
{ EXIT_REASON_XSAVES, "XSAVES" }, \
{ EXIT_REASON_XRSTORS, "XRSTORS" }
{ EXIT_REASON_XRSTORS, "XRSTORS" }, \
{ EXIT_REASON_UMWAIT, "UMWAIT" }, \
{ EXIT_REASON_TPAUSE, "TPAUSE" }

#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
#define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2
Expand Down
6 changes: 6 additions & 0 deletions arch/x86/kernel/cpu/umwait.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@
*/
static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE);

u32 get_umwait_control_msr(void)
{
return umwait_control_cached;
}
EXPORT_SYMBOL_GPL(get_umwait_control_msr);

/*
* Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by
* hardware or BIOS before kernel boot.
Expand Down
8 changes: 7 additions & 1 deletion arch/x86/kvm/cpuid.c
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,13 @@ static void do_host_cpuid(struct kvm_cpuid_entry2 *entry, u32 function,
case 7:
case 0xb:
case 0xd:
case 0xf:
case 0x10:
case 0x12:
case 0x14:
case 0x17:
case 0x18:
case 0x1f:
case 0x8000001d:
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
break;
Expand Down Expand Up @@ -360,7 +366,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B);
F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;

/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
Expand Down
16 changes: 13 additions & 3 deletions arch/x86/kvm/hyperv.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include "ioapic.h"
#include "hyperv.h"

#include <linux/cpu.h>
#include <linux/kvm_host.h>
#include <linux/highmem.h>
#include <linux/sched/cputime.h>
Expand Down Expand Up @@ -645,7 +646,9 @@ static int stimer_notify_direct(struct kvm_vcpu_hv_stimer *stimer)
.vector = stimer->config.apic_vector
};

return !kvm_apic_set_irq(vcpu, &irq, NULL);
if (lapic_in_kernel(vcpu))
return !kvm_apic_set_irq(vcpu, &irq, NULL);
return 0;
}

static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
Expand Down Expand Up @@ -1852,7 +1855,13 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,

ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE;
ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;

/*
* Direct Synthetic timers only make sense with in-kernel
* LAPIC
*/
if (lapic_in_kernel(vcpu))
ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;

break;

Expand All @@ -1864,7 +1873,8 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
if (evmcs_ver)
ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;

if (!cpu_smt_possible())
ent->eax |= HV_X64_NO_NONARCH_CORESHARING;
/*
* Default number of spinlock retry attempts, matches
* HyperV 2016.
Expand Down
Loading

0 comments on commit 8bbe0de

Please sign in to comment.