Skip to content

Commit

Permalink
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Browse files Browse the repository at this point in the history
Pull kvm
 "This is a pretty large diffstat for this time of the release. The main
  culprit is a reorganization of the AMD assembly trampoline, allowing
  percpu variables to be accessed early.

  This is needed for the return stack depth tracking retbleed mitigation
  that will be in 6.2, but it also makes it possible to tighten the IBRS
  restore on vmexit. The latter change is a long tail of the
  spectrev2/retbleed patches (the corresponding Intel change was simpler
  and went in already last June), which is why I am including it right
  now instead of sharing a topic branch with tip.

  Being assembly and being rich in comments makes the line count balloon
  a bit, but I am pretty confident in the change (famous last words)
  because the reorganization actually makes everything simpler and more
  understandable than before. It has also had external review and has
  been tested on the aforementioned 6.2 changes, which explode quite
  brutally without the fix.

  Apart from this, things are pretty normal.

  s390:

   - PCI fix

   - PV clock fix

  x86:

   - Fix clash between PMU MSRs and other MSRs

   - Prepare SVM assembly trampoline for 6.2 retbleed mitigation and
     for...

   - ... tightening IBRS restore on vmexit, moving it before the first
     RET or indirect branch

   - Fix log level for VMSA dump

   - Block all page faults during kvm_zap_gfn_range()

  Tools:

   - kvm_stat: fix incorrect detection of debugfs

   - kvm_stat: update vmexit definitions"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: x86/mmu: Block all page faults during kvm_zap_gfn_range()
  KVM: x86/pmu: Limit the maximum number of supported AMD GP counters
  KVM: x86/pmu: Limit the maximum number of supported Intel GP counters
  KVM: x86/pmu: Do not speculatively query Intel GP PMCs that don't exist yet
  KVM: SVM: Only dump VMSA to klog at KERN_DEBUG level
  tools/kvm_stat: update exit reasons for vmx/svm/aarch64/userspace
  tools/kvm_stat: fix incorrect detection of debugfs
  x86, KVM: remove unnecessary argument to x86_virt_spec_ctrl and callers
  KVM: SVM: move MSR_IA32_SPEC_CTRL save/restore to assembly
  KVM: SVM: restore host save area from assembly
  KVM: SVM: move guest vmsave/vmload back to assembly
  KVM: SVM: do not allocate struct svm_cpu_data dynamically
  KVM: SVM: remove dead field from struct svm_cpu_data
  KVM: SVM: remove unused field from struct vcpu_svm
  KVM: SVM: retrieve VMCB from assembly
  KVM: SVM: adjust register allocation for __svm_vcpu_run()
  KVM: SVM: replace regs argument of __svm_vcpu_run() with vcpu_svm
  KVM: x86: use a separate asm-offsets.c file
  KVM: s390: pci: Fix allocation size of aift kzdev elements
  KVM: s390: pv: don't allow userspace to set the clock under PV
  • Loading branch information
Linus Torvalds committed Nov 11, 2022
2 parents 5be07b3 + 6d3085e commit 74bd160
Show file tree
Hide file tree
Showing 23 changed files with 435 additions and 207 deletions.
3 changes: 3 additions & 0 deletions Documentation/virt/kvm/devices/vm.rst
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ KVM_S390_VM_TOD_EXT).
:Parameters: address of a buffer in user space to store the data (u8) to
:Returns: -EFAULT if the given address is not accessible from kernel space;
-EINVAL if setting the TOD clock extension to != 0 is not supported
-EOPNOTSUPP for a PV guest (TOD managed by the ultravisor)

3.2. ATTRIBUTE: KVM_S390_VM_TOD_LOW
-----------------------------------
Expand All @@ -224,6 +225,7 @@ the POP (u64).

:Parameters: address of a buffer in user space to store the data (u64) to
:Returns: -EFAULT if the given address is not accessible from kernel space
-EOPNOTSUPP for a PV guest (TOD managed by the ultravisor)

3.3. ATTRIBUTE: KVM_S390_VM_TOD_EXT
-----------------------------------
Expand All @@ -237,6 +239,7 @@ it, it is stored as 0 and not allowed to be set to a value != 0.
(kvm_s390_vm_tod_clock) to
:Returns: -EFAULT if the given address is not accessible from kernel space;
-EINVAL if setting the TOD clock extension to != 0 is not supported
-EOPNOTSUPP for a PV guest (TOD managed by the ultravisor)

4. GROUP: KVM_S390_VM_CRYPTO
============================
Expand Down
26 changes: 17 additions & 9 deletions arch/s390/kvm/kvm-s390.c
Original file line number Diff line number Diff line change
Expand Up @@ -1207,6 +1207,8 @@ static int kvm_s390_vm_get_migration(struct kvm *kvm,
return 0;
}

static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);

static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
{
struct kvm_s390_vm_tod_clock gtod;
Expand All @@ -1216,7 +1218,7 @@ static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)

if (!test_kvm_facility(kvm, 139) && gtod.epoch_idx)
return -EINVAL;
kvm_s390_set_tod_clock(kvm, &gtod);
__kvm_s390_set_tod_clock(kvm, &gtod);

VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
gtod.epoch_idx, gtod.tod);
Expand Down Expand Up @@ -1247,7 +1249,7 @@ static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
sizeof(gtod.tod)))
return -EFAULT;

kvm_s390_set_tod_clock(kvm, &gtod);
__kvm_s390_set_tod_clock(kvm, &gtod);
VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod.tod);
return 0;
}
Expand All @@ -1259,6 +1261,16 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
if (attr->flags)
return -EINVAL;

mutex_lock(&kvm->lock);
/*
* For protected guests, the TOD is managed by the ultravisor, so trying
* to change it will never bring the expected results.
*/
if (kvm_s390_pv_is_protected(kvm)) {
ret = -EOPNOTSUPP;
goto out_unlock;
}

switch (attr->attr) {
case KVM_S390_VM_TOD_EXT:
ret = kvm_s390_set_tod_ext(kvm, attr);
Expand All @@ -1273,6 +1285,9 @@ static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
ret = -ENXIO;
break;
}

out_unlock:
mutex_unlock(&kvm->lock);
return ret;
}

Expand Down Expand Up @@ -4377,13 +4392,6 @@ static void __kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_t
preempt_enable();
}

void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
{
mutex_lock(&kvm->lock);
__kvm_s390_set_tod_clock(kvm, gtod);
mutex_unlock(&kvm->lock);
}

int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod)
{
if (!mutex_trylock(&kvm->lock))
Expand Down
1 change: 0 additions & 1 deletion arch/s390/kvm/kvm-s390.h
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,6 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);

/* implemented in kvm-s390.c */
void kvm_s390_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
int kvm_s390_try_set_tod_clock(struct kvm *kvm, const struct kvm_s390_vm_tod_clock *gtod);
long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
Expand Down
2 changes: 1 addition & 1 deletion arch/s390/kvm/pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ int kvm_s390_pci_aen_init(u8 nisc)
return -EPERM;

mutex_lock(&aift->aift_lock);
aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev),
aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev *),
GFP_KERNEL);
if (!aift->kzdev) {
rc = -ENOMEM;
Expand Down
7 changes: 6 additions & 1 deletion arch/x86/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,12 @@ struct kvm_pmc {
bool intr;
};

/* More counters may conflict with other existing Architectural MSRs */
#define KVM_INTEL_PMC_MAX_GENERIC 8
#define MSR_ARCH_PERFMON_PERFCTR_MAX (MSR_ARCH_PERFMON_PERFCTR0 + KVM_INTEL_PMC_MAX_GENERIC - 1)
#define MSR_ARCH_PERFMON_EVENTSEL_MAX (MSR_ARCH_PERFMON_EVENTSEL0 + KVM_INTEL_PMC_MAX_GENERIC - 1)
#define KVM_PMC_MAX_FIXED 3
#define KVM_AMD_PMC_MAX_GENERIC 6
struct kvm_pmu {
unsigned nr_arch_gp_counters;
unsigned nr_arch_fixed_counters;
Expand All @@ -516,7 +521,7 @@ struct kvm_pmu {
u64 reserved_bits;
u64 raw_event_mask;
u8 version;
struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC];
struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC];
struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED];
struct irq_work irq_work;
DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
Expand Down
10 changes: 5 additions & 5 deletions arch/x86/include/asm/spec-ctrl.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
* Takes the guest view of SPEC_CTRL MSR as a parameter and also
* the guest's version of VIRT_SPEC_CTRL, if emulated.
*/
extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest);
extern void x86_virt_spec_ctrl(u64 guest_virt_spec_ctrl, bool guest);

/**
* x86_spec_ctrl_set_guest - Set speculation control registers for the guest
Expand All @@ -24,9 +24,9 @@ extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bo
* Avoids writing to the MSR if the content/bits are the same
*/
static inline
void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
void x86_spec_ctrl_set_guest(u64 guest_virt_spec_ctrl)
{
x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true);
x86_virt_spec_ctrl(guest_virt_spec_ctrl, true);
}

/**
Expand All @@ -38,9 +38,9 @@ void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
* Avoids writing to the MSR if the content/bits are the same
*/
static inline
void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
void x86_spec_ctrl_restore_host(u64 guest_virt_spec_ctrl)
{
x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false);
x86_virt_spec_ctrl(guest_virt_spec_ctrl, false);
}

/* AMD specific Speculative Store Bypass MSR data */
Expand Down
6 changes: 0 additions & 6 deletions arch/x86/kernel/asm-offsets.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#include <asm/suspend.h>
#include <asm/tlbflush.h>
#include <asm/tdx.h>
#include "../kvm/vmx/vmx.h"

#ifdef CONFIG_XEN
#include <xen/interface/xen.h>
Expand Down Expand Up @@ -108,9 +107,4 @@ static void __used common(void)
OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);

if (IS_ENABLED(CONFIG_KVM_INTEL)) {
BLANK();
OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl);
}
}
15 changes: 4 additions & 11 deletions arch/x86/kernel/cpu/bugs.c
Original file line number Diff line number Diff line change
Expand Up @@ -196,22 +196,15 @@ void __init check_bugs(void)
}

/*
* NOTE: This function is *only* called for SVM. VMX spec_ctrl handling is
* done in vmenter.S.
* NOTE: This function is *only* called for SVM, since Intel uses
* MSR_IA32_SPEC_CTRL for SSBD.
*/
void
x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
x86_virt_spec_ctrl(u64 guest_virt_spec_ctrl, bool setguest)
{
u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current();
u64 guestval, hostval;
struct thread_info *ti = current_thread_info();

if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
if (hostval != guestval) {
msrval = setguest ? guestval : hostval;
wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
}
}

/*
* If SSBD is not handled in MSR_SPEC_CTRL on AMD, update
* MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported.
Expand Down
2 changes: 2 additions & 0 deletions arch/x86/kvm/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/kvm-asm-offsets.s
/kvm-asm-offsets.h
12 changes: 12 additions & 0 deletions arch/x86/kvm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,15 @@ endif
obj-$(CONFIG_KVM) += kvm.o
obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
obj-$(CONFIG_KVM_AMD) += kvm-amd.o

AFLAGS_svm/vmenter.o := -iquote $(obj)
$(obj)/svm/vmenter.o: $(obj)/kvm-asm-offsets.h

AFLAGS_vmx/vmenter.o := -iquote $(obj)
$(obj)/vmx/vmenter.o: $(obj)/kvm-asm-offsets.h

$(obj)/kvm-asm-offsets.h: $(obj)/kvm-asm-offsets.s FORCE
$(call filechk,offsets,__KVM_ASM_OFFSETS_H__)

targets += kvm-asm-offsets.s
clean-files += kvm-asm-offsets.h
29 changes: 29 additions & 0 deletions arch/x86/kvm/kvm-asm-offsets.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Generate definitions needed by assembly language modules.
* This code generates raw asm output which is post-processed to extract
* and format the required data.
*/
#define COMPILE_OFFSETS

#include <linux/kbuild.h>
#include "vmx/vmx.h"
#include "svm/svm.h"

static void __used common(void)
{
if (IS_ENABLED(CONFIG_KVM_AMD)) {
BLANK();
OFFSET(SVM_vcpu_arch_regs, vcpu_svm, vcpu.arch.regs);
OFFSET(SVM_current_vmcb, vcpu_svm, current_vmcb);
OFFSET(SVM_spec_ctrl, vcpu_svm, spec_ctrl);
OFFSET(SVM_vmcb01, vcpu_svm, vmcb01);
OFFSET(KVM_VMCB_pa, kvm_vmcb_info, pa);
OFFSET(SD_save_area_pa, svm_cpu_data, save_area_pa);
}

if (IS_ENABLED(CONFIG_KVM_INTEL)) {
BLANK();
OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl);
}
}
4 changes: 2 additions & 2 deletions arch/x86/kvm/mmu/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -6056,7 +6056,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)

write_lock(&kvm->mmu_lock);

kvm_mmu_invalidate_begin(kvm, gfn_start, gfn_end);
kvm_mmu_invalidate_begin(kvm, 0, -1ul);

flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end);

Expand All @@ -6070,7 +6070,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
kvm_flush_remote_tlbs_with_address(kvm, gfn_start,
gfn_end - gfn_start);

kvm_mmu_invalidate_end(kvm, gfn_start, gfn_end);
kvm_mmu_invalidate_end(kvm, 0, -1ul);

write_unlock(&kvm->mmu_lock);
}
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kvm/pmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ static const struct x86_cpu_id vmx_icl_pebs_cpu[] = {
* code. Each pmc, stored in kvm_pmc.idx field, is unique across
* all perf counters (both gp and fixed). The mapping relationship
* between pmc and perf counters is as the following:
* * Intel: [0 .. INTEL_PMC_MAX_GENERIC-1] <=> gp counters
* * Intel: [0 .. KVM_INTEL_PMC_MAX_GENERIC-1] <=> gp counters
* [INTEL_PMC_IDX_FIXED .. INTEL_PMC_IDX_FIXED + 2] <=> fixed
* * AMD: [0 .. AMD64_NUM_COUNTERS-1] and, for families 15H
* and later, [0 .. AMD64_NUM_COUNTERS_CORE-1] <=> gp counters
Expand Down
7 changes: 4 additions & 3 deletions arch/x86/kvm/svm/pmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,10 @@ static void amd_pmu_init(struct kvm_vcpu *vcpu)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
int i;

BUILD_BUG_ON(AMD64_NUM_COUNTERS_CORE > INTEL_PMC_MAX_GENERIC);
BUILD_BUG_ON(KVM_AMD_PMC_MAX_GENERIC > AMD64_NUM_COUNTERS_CORE);
BUILD_BUG_ON(KVM_AMD_PMC_MAX_GENERIC > INTEL_PMC_MAX_GENERIC);

for (i = 0; i < AMD64_NUM_COUNTERS_CORE ; i++) {
for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC ; i++) {
pmu->gp_counters[i].type = KVM_PMC_GP;
pmu->gp_counters[i].vcpu = vcpu;
pmu->gp_counters[i].idx = i;
Expand All @@ -207,7 +208,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu)
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
int i;

for (i = 0; i < AMD64_NUM_COUNTERS_CORE; i++) {
for (i = 0; i < KVM_AMD_PMC_MAX_GENERIC; i++) {
struct kvm_pmc *pmc = &pmu->gp_counters[i];

pmc_stop_counter(pmc);
Expand Down
6 changes: 3 additions & 3 deletions arch/x86/kvm/svm/sev.c
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ static void sev_asid_free(struct kvm_sev_info *sev)
__set_bit(sev->asid, sev_reclaim_asid_bitmap);

for_each_possible_cpu(cpu) {
sd = per_cpu(svm_data, cpu);
sd = per_cpu_ptr(&svm_data, cpu);
sd->sev_vmcbs[sev->asid] = NULL;
}

Expand Down Expand Up @@ -605,7 +605,7 @@ static int sev_es_sync_vmsa(struct vcpu_svm *svm)
save->dr6 = svm->vcpu.arch.dr6;

pr_debug("Virtual Machine Save Area (VMSA):\n");
print_hex_dump(KERN_CONT, "", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false);
print_hex_dump_debug("", DUMP_PREFIX_NONE, 16, 1, save, sizeof(*save), false);

return 0;
}
Expand Down Expand Up @@ -2600,7 +2600,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm)

void pre_sev_run(struct vcpu_svm *svm, int cpu)
{
struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu);
int asid = sev_get_asid(svm->vcpu.kvm);

/* Assign the asid allocated with this SEV guest */
Expand Down
Loading

0 comments on commit 74bd160

Please sign in to comment.