Skip to content

Commit

Permalink
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Browse files Browse the repository at this point in the history
Pull kvm fixes from Paolo Bonzini:
 "This is a bit on the large side, mostly due to two changes:

   - Changes to disable some broken PMU virtualization (see below for
     details under "x86 PMU")

   - Clean up SVM's enter/exit assembly code so that it can be compiled
     without OBJECT_FILES_NON_STANDARD. This fixes a warning "Unpatched
     return thunk in use. This should not happen!" when running KVM
     selftests.

  Everything else is small bugfixes and selftest changes:

   - Fix a mostly benign bug in the gfn_to_pfn_cache infrastructure
     where KVM would allow userspace to refresh the cache with a bogus
     GPA. The bug has existed for quite some time, but was exposed by a
     new sanity check added in 6.9 (to ensure a cache is either
     GPA-based or HVA-based).

   - Drop an unused param from gfn_to_pfn_cache_invalidate_start() that
     got left behind during a 6.9 cleanup.

   - Fix a math goof in x86's hugepage logic for
     KVM_SET_MEMORY_ATTRIBUTES that results in an array overflow
     (detected by KASAN).

   - Fix a bug where KVM incorrectly clears root_role.direct when
     userspace sets guest CPUID.

   - Fix a dirty logging bug in the where KVM fails to write-protect
     SPTEs used by a nested guest, if KVM is using Page-Modification
     Logging and the nested hypervisor is NOT using EPT.

  x86 PMU:

   - Drop support for virtualizing adaptive PEBS, as KVM's
     implementation is architecturally broken without an obvious/easy
     path forward, and because exposing adaptive PEBS can leak host LBRs
     to the guest, i.e. can leak host kernel addresses to the guest.

   - Set the enable bits for general purpose counters in
     PERF_GLOBAL_CTRL at RESET time, as done by both Intel and AMD
     processors.

   - Disable LBR virtualization on CPUs that don't support LBR
     callstacks, as KVM unconditionally uses
     PERF_SAMPLE_BRANCH_CALL_STACK when creating the perf event, and
     would fail on such CPUs.

  Tests:

   - Fix a flaw in the max_guest_memory selftest that results in it
     exhausting the supply of ucall structures when run with more than
     256 vCPUs.

   - Mark KVM_MEM_READONLY as supported for RISC-V in
     set_memory_region_test"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (30 commits)
  KVM: Drop unused @may_block param from gfn_to_pfn_cache_invalidate_start()
  KVM: selftests: Add coverage of EPT-disabled to vmx_dirty_log_test
  KVM: x86/mmu: Fix and clarify comments about clearing D-bit vs. write-protecting
  KVM: x86/mmu: Remove function comments above clear_dirty_{gfn_range,pt_masked}()
  KVM: x86/mmu: Write-protect L2 SPTEs in TDP MMU when clearing dirty status
  KVM: x86/mmu: Precisely invalidate MMU root_role during CPUID update
  KVM: VMX: Disable LBR virtualization if the CPU doesn't support LBR callstacks
  perf/x86/intel: Expose existence of callback support to KVM
  KVM: VMX: Snapshot LBR capabilities during module initialization
  KVM: x86/pmu: Do not mask LVTPC when handling a PMI on AMD platforms
  KVM: x86: Snapshot if a vCPU's vendor model is AMD vs. Intel compatible
  KVM: x86: Stop compiling vmenter.S with OBJECT_FILES_NON_STANDARD
  KVM: SVM: Create a stack frame in __svm_sev_es_vcpu_run()
  KVM: SVM: Save/restore args across SEV-ES VMRUN via host save area
  KVM: SVM: Save/restore non-volatile GPRs in SEV-ES VMRUN via host save area
  KVM: SVM: Clobber RAX instead of RBX when discarding spec_ctrl_intercepted
  KVM: SVM: Drop 32-bit "support" from __svm_sev_es_vcpu_run()
  KVM: SVM: Wrap __svm_sev_es_vcpu_run() with #ifdef CONFIG_KVM_AMD_SEV
  KVM: SVM: Create a stack frame in __svm_vcpu_run() for unwinding
  KVM: SVM: Remove a useless zeroing of allocated memory
  ...
  • Loading branch information
Linus Torvalds committed Apr 20, 2024
2 parents e43afae + 44ecfa3 commit 8177722
Show file tree
Hide file tree
Showing 25 changed files with 267 additions and 159 deletions.
1 change: 1 addition & 0 deletions arch/x86/events/intel/lbr.c
Original file line number Diff line number Diff line change
Expand Up @@ -1693,6 +1693,7 @@ void x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
lbr->from = x86_pmu.lbr_from;
lbr->to = x86_pmu.lbr_to;
lbr->info = x86_pmu.lbr_info;
lbr->has_callstack = x86_pmu_has_lbr_callstack();
}
EXPORT_SYMBOL_GPL(x86_perf_get_lbr);

Expand Down
1 change: 1 addition & 0 deletions arch/x86/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -855,6 +855,7 @@ struct kvm_vcpu_arch {
int cpuid_nent;
struct kvm_cpuid_entry2 *cpuid_entries;
struct kvm_hypervisor_cpuid kvm_cpuid;
bool is_amd_compatible;

/*
* FIXME: Drop this macro and use KVM_NR_GOVERNED_FEATURES directly
Expand Down
1 change: 1 addition & 0 deletions arch/x86/include/asm/perf_event.h
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,7 @@ struct x86_pmu_lbr {
unsigned int from;
unsigned int to;
unsigned int info;
bool has_callstack;
};

extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap);
Expand Down
5 changes: 0 additions & 5 deletions arch/x86/kvm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,6 @@
ccflags-y += -I $(srctree)/arch/x86/kvm
ccflags-$(CONFIG_KVM_WERROR) += -Werror

ifeq ($(CONFIG_FRAME_POINTER),y)
OBJECT_FILES_NON_STANDARD_vmx/vmenter.o := y
OBJECT_FILES_NON_STANDARD_svm/vmenter.o := y
endif

include $(srctree)/virt/kvm/Makefile.kvm

kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
Expand Down
1 change: 1 addition & 0 deletions arch/x86/kvm/cpuid.c
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)

kvm_update_pv_runtime(vcpu);

vcpu->arch.is_amd_compatible = guest_cpuid_is_amd_or_hygon(vcpu);
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu);

Expand Down
10 changes: 10 additions & 0 deletions arch/x86/kvm/cpuid.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,16 @@ static inline bool guest_cpuid_is_intel(struct kvm_vcpu *vcpu)
return best && is_guest_vendor_intel(best->ebx, best->ecx, best->edx);
}

static inline bool guest_cpuid_is_amd_compatible(struct kvm_vcpu *vcpu)
{
return vcpu->arch.is_amd_compatible;
}

static inline bool guest_cpuid_is_intel_compatible(struct kvm_vcpu *vcpu)
{
return !guest_cpuid_is_amd_compatible(vcpu);
}

static inline int guest_cpuid_family(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
Expand Down
3 changes: 2 additions & 1 deletion arch/x86/kvm/lapic.c
Original file line number Diff line number Diff line change
Expand Up @@ -2776,7 +2776,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;

r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL);
if (r && lvt_type == APIC_LVTPC)
if (r && lvt_type == APIC_LVTPC &&
guest_cpuid_is_intel_compatible(apic->vcpu))
kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED);
return r;
}
Expand Down
11 changes: 6 additions & 5 deletions arch/x86/kvm/mmu/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -4935,7 +4935,7 @@ static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu,
context->cpu_role.base.level, is_efer_nx(context),
guest_can_use(vcpu, X86_FEATURE_GBPAGES),
is_cr4_pse(context),
guest_cpuid_is_amd_or_hygon(vcpu));
guest_cpuid_is_amd_compatible(vcpu));
}

static void __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check,
Expand Down Expand Up @@ -5576,9 +5576,9 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu)
* that problem is swept under the rug; KVM's CPUID API is horrific and
* it's all but impossible to solve it without introducing a new API.
*/
vcpu->arch.root_mmu.root_role.word = 0;
vcpu->arch.guest_mmu.root_role.word = 0;
vcpu->arch.nested_mmu.root_role.word = 0;
vcpu->arch.root_mmu.root_role.invalid = 1;
vcpu->arch.guest_mmu.root_role.invalid = 1;
vcpu->arch.nested_mmu.root_role.invalid = 1;
vcpu->arch.root_mmu.cpu_role.ext.valid = 0;
vcpu->arch.guest_mmu.cpu_role.ext.valid = 0;
vcpu->arch.nested_mmu.cpu_role.ext.valid = 0;
Expand Down Expand Up @@ -7399,7 +7399,8 @@ bool kvm_arch_post_set_memory_attributes(struct kvm *kvm,
* by the memslot, KVM can't use a hugepage due to the
* misaligned address regardless of memory attributes.
*/
if (gfn >= slot->base_gfn) {
if (gfn >= slot->base_gfn &&
gfn + nr_pages <= slot->base_gfn + slot->npages) {
if (hugepage_has_attrs(kvm, slot, gfn, level, attrs))
hugepage_clear_mixed(slot, gfn, level);
else
Expand Down
51 changes: 22 additions & 29 deletions arch/x86/kvm/mmu/tdp_mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -1548,17 +1548,21 @@ void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm,
}
}

/*
* Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If
* AD bits are enabled, this will involve clearing the dirty bit on each SPTE.
* If AD bits are not enabled, this will require clearing the writable bit on
* each SPTE. Returns true if an SPTE has been changed and the TLBs need to
* be flushed.
*/
static bool tdp_mmu_need_write_protect(struct kvm_mmu_page *sp)
{
/*
* All TDP MMU shadow pages share the same role as their root, aside
* from level, so it is valid to key off any shadow page to determine if
* write protection is needed for an entire tree.
*/
return kvm_mmu_page_ad_need_write_protect(sp) || !kvm_ad_enabled();
}

static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
gfn_t start, gfn_t end)
{
u64 dbit = kvm_ad_enabled() ? shadow_dirty_mask : PT_WRITABLE_MASK;
const u64 dbit = tdp_mmu_need_write_protect(root) ? PT_WRITABLE_MASK :
shadow_dirty_mask;
struct tdp_iter iter;
bool spte_set = false;

Expand All @@ -1573,7 +1577,7 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
continue;

KVM_MMU_WARN_ON(kvm_ad_enabled() &&
KVM_MMU_WARN_ON(dbit == shadow_dirty_mask &&
spte_ad_need_write_protect(iter.old_spte));

if (!(iter.old_spte & dbit))
Expand All @@ -1590,11 +1594,9 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
}

/*
* Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If
* AD bits are enabled, this will involve clearing the dirty bit on each SPTE.
* If AD bits are not enabled, this will require clearing the writable bit on
* each SPTE. Returns true if an SPTE has been changed and the TLBs need to
* be flushed.
* Clear the dirty status (D-bit or W-bit) of all the SPTEs mapping GFNs in the
* memslot. Returns true if an SPTE has been changed and the TLBs need to be
* flushed.
*/
bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
const struct kvm_memory_slot *slot)
Expand All @@ -1610,18 +1612,11 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm,
return spte_set;
}

/*
* Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is
* set in mask, starting at gfn. The given memslot is expected to contain all
* the GFNs represented by set bits in the mask. If AD bits are enabled,
* clearing the dirty status will involve clearing the dirty bit on each SPTE
* or, if AD bits are not enabled, clearing the writable bit on each SPTE.
*/
static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
gfn_t gfn, unsigned long mask, bool wrprot)
{
u64 dbit = (wrprot || !kvm_ad_enabled()) ? PT_WRITABLE_MASK :
shadow_dirty_mask;
const u64 dbit = (wrprot || tdp_mmu_need_write_protect(root)) ? PT_WRITABLE_MASK :
shadow_dirty_mask;
struct tdp_iter iter;

lockdep_assert_held_write(&kvm->mmu_lock);
Expand All @@ -1633,7 +1628,7 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
if (!mask)
break;

KVM_MMU_WARN_ON(kvm_ad_enabled() &&
KVM_MMU_WARN_ON(dbit == shadow_dirty_mask &&
spte_ad_need_write_protect(iter.old_spte));

if (iter.level > PG_LEVEL_4K ||
Expand All @@ -1659,11 +1654,9 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
}

/*
* Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is
* set in mask, starting at gfn. The given memslot is expected to contain all
* the GFNs represented by set bits in the mask. If AD bits are enabled,
* clearing the dirty status will involve clearing the dirty bit on each SPTE
* or, if AD bits are not enabled, clearing the writable bit on each SPTE.
* Clear the dirty status (D-bit or W-bit) of all the 4k SPTEs mapping GFNs for
* which a bit is set in mask, starting at gfn. The given memslot is expected to
* contain all the GFNs represented by set bits in the mask.
*/
void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot,
Expand Down
16 changes: 14 additions & 2 deletions arch/x86/kvm/pmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -775,8 +775,20 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->pebs_data_cfg_mask = ~0ull;
bitmap_zero(pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX);

if (vcpu->kvm->arch.enable_pmu)
static_call(kvm_x86_pmu_refresh)(vcpu);
if (!vcpu->kvm->arch.enable_pmu)
return;

static_call(kvm_x86_pmu_refresh)(vcpu);

/*
* At RESET, both Intel and AMD CPUs set all enable bits for general
* purpose counters in IA32_PERF_GLOBAL_CTRL (so that software that
* was written for v1 PMUs don't unknowingly leave GP counters disabled
* in the global controls). Emulate that behavior when refreshing the
* PMU so that userspace doesn't need to manually set PERF_GLOBAL_CTRL.
*/
if (kvm_pmu_has_perf_global_ctrl(pmu) && pmu->nr_arch_gp_counters)
pmu->global_ctrl = GENMASK_ULL(pmu->nr_arch_gp_counters - 1, 0);
}

void kvm_pmu_init(struct kvm_vcpu *vcpu)
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/kvm/svm/sev.c
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
/* Avoid using vmalloc for smaller buffers. */
size = npages * sizeof(struct page *);
if (size > PAGE_SIZE)
pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
pages = __vmalloc(size, GFP_KERNEL_ACCOUNT);
else
pages = kmalloc(size, GFP_KERNEL_ACCOUNT);

Expand Down
17 changes: 10 additions & 7 deletions arch/x86/kvm/svm/svm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1503,6 +1503,11 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu)
__free_pages(virt_to_page(svm->msrpm), get_order(MSRPM_SIZE));
}

static struct sev_es_save_area *sev_es_host_save_area(struct svm_cpu_data *sd)
{
return page_address(sd->save_area) + 0x400;
}

static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
Expand All @@ -1519,12 +1524,8 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
* or subsequent vmload of host save area.
*/
vmsave(sd->save_area_pa);
if (sev_es_guest(vcpu->kvm)) {
struct sev_es_save_area *hostsa;
hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400);

sev_es_prepare_switch_to_guest(hostsa);
}
if (sev_es_guest(vcpu->kvm))
sev_es_prepare_switch_to_guest(sev_es_host_save_area(sd));

if (tsc_scaling)
__svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
Expand Down Expand Up @@ -4101,14 +4102,16 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)

static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_intercepted)
{
struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu);
struct vcpu_svm *svm = to_svm(vcpu);

guest_state_enter_irqoff();

amd_clear_divider();

if (sev_es_guest(vcpu->kvm))
__svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted);
__svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted,
sev_es_host_save_area(sd));
else
__svm_vcpu_run(svm, spec_ctrl_intercepted);

Expand Down
3 changes: 2 additions & 1 deletion arch/x86/kvm/svm/svm.h
Original file line number Diff line number Diff line change
Expand Up @@ -698,7 +698,8 @@ struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu);

/* vmenter.S */

void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted,
struct sev_es_save_area *hostsa);
void __svm_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);

#define DEFINE_KVM_GHCB_ACCESSORS(field) \
Expand Down
Loading

0 comments on commit 8177722

Please sign in to comment.