Skip to content

Commit

Permalink
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Browse files Browse the repository at this point in the history
Pull kvm fixes from Paolo Bonzini:
 "ARM:

   - Rework heuristics for resolving the fault IPA (HPFAR_EL2 v. re-walk
     stage-1 page tables) to align with the architecture. This avoids
     possibly taking an SEA at EL2 on the page table walk or using an
     architecturally UNKNOWN fault IPA

   - Use acquire/release semantics in the KVM FF-A proxy to avoid
     reading a stale value for the FF-A version

   - Fix KVM guest driver to match PV CPUID hypercall ABI

   - Use Inner Shareable Normal Write-Back mappings at stage-1 in KVM
     selftests, which is the only memory type for which atomic
     instructions are architecturally guaranteed to work

  s390:

   - Don't use %pK for debug printing and tracepoints

  x86:

   - Use a separate subclass when acquiring KVM's per-CPU posted
     interrupts wakeup lock in the scheduled out path, i.e. when adding
     a vCPU on the list of vCPUs to wake, to workaround a false positive
     deadlock. The schedule out code runs with a scheduler lock that the
     wakeup handler takes in the opposite order; but it does so with
     IRQs disabled and cannot run concurrently with a wakeup

   - Explicitly zero-initialize on-stack CPUID unions

   - Allow building irqbypass.ko as as module when kvm.ko is a module

   - Wrap relatively expensive sanity check with KVM_PROVE_MMU

   - Acquire SRCU in KVM_GET_MP_STATE to protect guest memory accesses

  selftests:

   - Add more scenarios to the MONITOR/MWAIT test

   - Add option to rseq test to override /dev/cpu_dma_latency

   - Bring list of exit reasons up to date

   - Cleanup Makefile to list once tests that are valid on all
     architectures

  Other:

   - Documentation fixes"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (26 commits)
  KVM: arm64: Use acquire/release to communicate FF-A version negotiation
  KVM: arm64: selftests: Explicitly set the page attrs to Inner-Shareable
  KVM: arm64: selftests: Introduce and use hardware-definition macros
  KVM: VMX: Use separate subclasses for PI wakeup lock to squash false positive
  KVM: VMX: Assert that IRQs are disabled when putting vCPU on PI wakeup list
  KVM: x86: Explicitly zero-initialize on-stack CPUID unions
  KVM: Allow building irqbypass.ko as as module when kvm.ko is a module
  KVM: x86/mmu: Wrap sanity check on number of TDP MMU pages with KVM_PROVE_MMU
  KVM: selftests: Add option to rseq test to override /dev/cpu_dma_latency
  KVM: x86: Acquire SRCU in KVM_GET_MP_STATE to protect guest memory accesses
  Documentation: kvm: remove KVM_CAP_MIPS_TE
  Documentation: kvm: organize capabilities in the right section
  Documentation: kvm: fix some definition lists
  Documentation: kvm: drop "Capability" heading from capabilities
  Documentation: kvm: give correct name for KVM_CAP_SPAPR_MULTITCE
  Documentation: KVM: KVM_GET_SUPPORTED_CPUID now exposes TSC_DEADLINE
  selftests: kvm: list once tests that are valid on all architectures
  selftests: kvm: bring list of exit reasons up to date
  selftests: kvm: revamp MONITOR/MWAIT tests
  KVM: arm64: Don't translate FAR if invalid/unsafe
  ...
  • Loading branch information
Linus Torvalds committed Apr 8, 2025
2 parents bec7dcb + c478032 commit 0e88632
Show file tree
Hide file tree
Showing 29 changed files with 785 additions and 607 deletions.
789 changes: 383 additions & 406 deletions Documentation/virt/kvm/api.rst

Large diffs are not rendered by default.

44 changes: 42 additions & 2 deletions arch/arm64/include/asm/esr.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,15 @@
#define ESR_ELx_FSC_SEA_TTW(n) (0x14 + (n))
#define ESR_ELx_FSC_SECC (0x18)
#define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n))
#define ESR_ELx_FSC_ADDRSZ (0x00)

/*
* Annoyingly, the negative levels for Address size faults aren't laid out
* contiguously (or in the desired order)
*/
#define ESR_ELx_FSC_ADDRSZ_nL(n) ((n) == -1 ? 0x25 : 0x2C)
#define ESR_ELx_FSC_ADDRSZ_L(n) ((n) < 0 ? ESR_ELx_FSC_ADDRSZ_nL(n) : \
(ESR_ELx_FSC_ADDRSZ + (n)))

/* Status codes for individual page table levels */
#define ESR_ELx_FSC_ACCESS_L(n) (ESR_ELx_FSC_ACCESS + (n))
Expand Down Expand Up @@ -161,8 +170,6 @@
#define ESR_ELx_Xs_MASK (GENMASK_ULL(4, 0))

/* ISS field definitions for exceptions taken in to Hyp */
#define ESR_ELx_FSC_ADDRSZ (0x00)
#define ESR_ELx_FSC_ADDRSZ_L(n) (ESR_ELx_FSC_ADDRSZ + (n))
#define ESR_ELx_CV (UL(1) << 24)
#define ESR_ELx_COND_SHIFT (20)
#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT)
Expand Down Expand Up @@ -464,6 +471,39 @@ static inline bool esr_fsc_is_access_flag_fault(unsigned long esr)
(esr == ESR_ELx_FSC_ACCESS_L(0));
}

static inline bool esr_fsc_is_addr_sz_fault(unsigned long esr)
{
esr &= ESR_ELx_FSC;

return (esr == ESR_ELx_FSC_ADDRSZ_L(3)) ||
(esr == ESR_ELx_FSC_ADDRSZ_L(2)) ||
(esr == ESR_ELx_FSC_ADDRSZ_L(1)) ||
(esr == ESR_ELx_FSC_ADDRSZ_L(0)) ||
(esr == ESR_ELx_FSC_ADDRSZ_L(-1));
}

static inline bool esr_fsc_is_sea_ttw(unsigned long esr)
{
esr = esr & ESR_ELx_FSC;

return (esr == ESR_ELx_FSC_SEA_TTW(3)) ||
(esr == ESR_ELx_FSC_SEA_TTW(2)) ||
(esr == ESR_ELx_FSC_SEA_TTW(1)) ||
(esr == ESR_ELx_FSC_SEA_TTW(0)) ||
(esr == ESR_ELx_FSC_SEA_TTW(-1));
}

static inline bool esr_fsc_is_secc_ttw(unsigned long esr)
{
esr = esr & ESR_ELx_FSC;

return (esr == ESR_ELx_FSC_SECC_TTW(3)) ||
(esr == ESR_ELx_FSC_SECC_TTW(2)) ||
(esr == ESR_ELx_FSC_SECC_TTW(1)) ||
(esr == ESR_ELx_FSC_SECC_TTW(0)) ||
(esr == ESR_ELx_FSC_SECC_TTW(-1));
}

/* Indicate whether ESR.EC==0x1A is for an ERETAx instruction */
static inline bool esr_iss_is_eretax(unsigned long esr)
{
Expand Down
7 changes: 6 additions & 1 deletion arch/arm64/include/asm/kvm_emulate.h
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,12 @@ static __always_inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vc

static __always_inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
{
return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8;
u64 hpfar = vcpu->arch.fault.hpfar_el2;

if (unlikely(!(hpfar & HPFAR_EL2_NS)))
return INVALID_GPA;

return FIELD_GET(HPFAR_EL2_FIPA, hpfar) << 12;
}

static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu)
Expand Down
2 changes: 1 addition & 1 deletion arch/arm64/include/asm/kvm_ras.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* Was this synchronous external abort a RAS notification?
* Returns '0' for errors handled by some RAS subsystem, or -ENOENT.
*/
static inline int kvm_handle_guest_sea(phys_addr_t addr, u64 esr)
static inline int kvm_handle_guest_sea(void)
{
/* apei_claim_sea(NULL) expects to mask interrupts itself */
lockdep_assert_irqs_enabled();
Expand Down
70 changes: 48 additions & 22 deletions arch/arm64/kvm/hyp/include/hyp/fault.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,16 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>

static inline bool __fault_safe_to_translate(u64 esr)
{
u64 fsc = esr & ESR_ELx_FSC;

if (esr_fsc_is_sea_ttw(esr) || esr_fsc_is_secc_ttw(esr))
return false;

return !(fsc == ESR_ELx_FSC_EXTABT && (esr & ESR_ELx_FnV));
}

static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
{
int ret;
Expand Down Expand Up @@ -44,34 +54,50 @@ static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
return true;
}

static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
/*
* Checks for the conditions when HPFAR_EL2 is written, per ARM ARM R_FKLWR.
*/
static inline bool __hpfar_valid(u64 esr)
{
u64 hpfar, far;

far = read_sysreg_el2(SYS_FAR);

/*
* The HPFAR can be invalid if the stage 2 fault did not
* happen during a stage 1 page table walk (the ESR_EL2.S1PTW
* bit is clear) and one of the two following cases are true:
* 1. The fault was due to a permission fault
* 2. The processor carries errata 834220
* CPUs affected by ARM erratum #834220 may incorrectly report a
* stage-2 translation fault when a stage-1 permission fault occurs.
*
* Therefore, for all non S1PTW faults where we either have a
* permission fault or the errata workaround is enabled, we
* resolve the IPA using the AT instruction.
* Re-walk the page tables to determine if a stage-1 fault actually
* occurred.
*/
if (!(esr & ESR_ELx_S1PTW) &&
(cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
esr_fsc_is_permission_fault(esr))) {
if (!__translate_far_to_hpfar(far, &hpfar))
return false;
} else {
if (cpus_have_final_cap(ARM64_WORKAROUND_834220) &&
esr_fsc_is_translation_fault(esr))
return false;

if (esr_fsc_is_translation_fault(esr) || esr_fsc_is_access_flag_fault(esr))
return true;

if ((esr & ESR_ELx_S1PTW) && esr_fsc_is_permission_fault(esr))
return true;

return esr_fsc_is_addr_sz_fault(esr);
}

static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
{
u64 hpfar;

fault->far_el2 = read_sysreg_el2(SYS_FAR);
fault->hpfar_el2 = 0;

if (__hpfar_valid(esr))
hpfar = read_sysreg(hpfar_el2);
}
else if (unlikely(!__fault_safe_to_translate(esr)))
return true;
else if (!__translate_far_to_hpfar(fault->far_el2, &hpfar))
return false;

fault->far_el2 = far;
fault->hpfar_el2 = hpfar;
/*
* Hijack HPFAR_EL2.NS (RES0 in Non-secure) to indicate a valid
* HPFAR value.
*/
fault->hpfar_el2 = hpfar | HPFAR_EL2_NS;
return true;
}

Expand Down
9 changes: 5 additions & 4 deletions arch/arm64/kvm/hyp/nvhe/ffa.c
Original file line number Diff line number Diff line change
Expand Up @@ -730,10 +730,10 @@ static void do_ffa_version(struct arm_smccc_res *res,
hyp_ffa_version = ffa_req_version;
}

if (hyp_ffa_post_init())
if (hyp_ffa_post_init()) {
res->a0 = FFA_RET_NOT_SUPPORTED;
else {
has_version_negotiated = true;
} else {
smp_store_release(&has_version_negotiated, true);
res->a0 = hyp_ffa_version;
}
unlock:
Expand Down Expand Up @@ -809,7 +809,8 @@ bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id)
if (!is_ffa_call(func_id))
return false;

if (!has_version_negotiated && func_id != FFA_VERSION) {
if (func_id != FFA_VERSION &&
!smp_load_acquire(&has_version_negotiated)) {
ffa_to_smccc_error(&res, FFA_RET_INVALID_PARAMETERS);
goto out_handled;
}
Expand Down
9 changes: 8 additions & 1 deletion arch/arm64/kvm/hyp/nvhe/mem_protect.c
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,14 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
return;
}

addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;

/*
* Yikes, we couldn't resolve the fault IPA. This should reinject an
* abort into the host when we figure out how to do that.
*/
BUG_ON(!(fault.hpfar_el2 & HPFAR_EL2_NS));
addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12;

ret = host_stage2_idmap(addr);
BUG_ON(ret && ret != -EAGAIN);
}
Expand Down
31 changes: 19 additions & 12 deletions arch/arm64/kvm/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -1794,9 +1794,28 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
gfn_t gfn;
int ret, idx;

/* Synchronous External Abort? */
if (kvm_vcpu_abt_issea(vcpu)) {
/*
* For RAS the host kernel may handle this abort.
* There is no need to pass the error into the guest.
*/
if (kvm_handle_guest_sea())
kvm_inject_vabt(vcpu);

return 1;
}

esr = kvm_vcpu_get_esr(vcpu);

/*
* The fault IPA should be reliable at this point as we're not dealing
* with an SEA.
*/
ipa = fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
if (KVM_BUG_ON(ipa == INVALID_GPA, vcpu->kvm))
return -EFAULT;

is_iabt = kvm_vcpu_trap_is_iabt(vcpu);

if (esr_fsc_is_translation_fault(esr)) {
Expand All @@ -1818,18 +1837,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
}
}

/* Synchronous External Abort? */
if (kvm_vcpu_abt_issea(vcpu)) {
/*
* For RAS the host kernel may handle this abort.
* There is no need to pass the error into the guest.
*/
if (kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_esr(vcpu)))
kvm_inject_vabt(vcpu);

return 1;
}

trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu),
kvm_vcpu_get_hfar(vcpu), fault_ipa);

Expand Down
7 changes: 7 additions & 0 deletions arch/arm64/tools/sysreg
Original file line number Diff line number Diff line change
Expand Up @@ -3536,3 +3536,10 @@ Field 5 F
Field 4 P
Field 3:0 Align
EndSysreg

Sysreg HPFAR_EL2 3 4 6 0 4
Field 63 NS
Res0 62:48
Field 47:4 FIPA
Res0 3:0
EndSysreg
2 changes: 1 addition & 1 deletion arch/s390/kvm/intercept.c
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ static int handle_validity(struct kvm_vcpu *vcpu)

vcpu->stat.exit_validity++;
trace_kvm_s390_intercept_validity(vcpu, viwhy);
KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%pK)", viwhy,
KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%p)", viwhy,
current->pid, vcpu->kvm);

/* do not warn on invalid runtime instrumentation mode */
Expand Down
8 changes: 4 additions & 4 deletions arch/s390/kvm/interrupt.c
Original file line number Diff line number Diff line change
Expand Up @@ -3161,7 +3161,7 @@ void kvm_s390_gisa_clear(struct kvm *kvm)
if (!gi->origin)
return;
gisa_clear_ipm(gi->origin);
VM_EVENT(kvm, 3, "gisa 0x%pK cleared", gi->origin);
VM_EVENT(kvm, 3, "gisa 0x%p cleared", gi->origin);
}

void kvm_s390_gisa_init(struct kvm *kvm)
Expand All @@ -3177,7 +3177,7 @@ void kvm_s390_gisa_init(struct kvm *kvm)
hrtimer_setup(&gi->timer, gisa_vcpu_kicker, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
memset(gi->origin, 0, sizeof(struct kvm_s390_gisa));
gi->origin->next_alert = (u32)virt_to_phys(gi->origin);
VM_EVENT(kvm, 3, "gisa 0x%pK initialized", gi->origin);
VM_EVENT(kvm, 3, "gisa 0x%p initialized", gi->origin);
}

void kvm_s390_gisa_enable(struct kvm *kvm)
Expand Down Expand Up @@ -3218,7 +3218,7 @@ void kvm_s390_gisa_destroy(struct kvm *kvm)
process_gib_alert_list();
hrtimer_cancel(&gi->timer);
gi->origin = NULL;
VM_EVENT(kvm, 3, "gisa 0x%pK destroyed", gisa);
VM_EVENT(kvm, 3, "gisa 0x%p destroyed", gisa);
}

void kvm_s390_gisa_disable(struct kvm *kvm)
Expand Down Expand Up @@ -3467,7 +3467,7 @@ int __init kvm_s390_gib_init(u8 nisc)
}
}

KVM_EVENT(3, "gib 0x%pK (nisc=%d) initialized", gib, gib->nisc);
KVM_EVENT(3, "gib 0x%p (nisc=%d) initialized", gib, gib->nisc);
goto out;

out_unreg_gal:
Expand Down
10 changes: 5 additions & 5 deletions arch/s390/kvm/kvm-s390.c
Original file line number Diff line number Diff line change
Expand Up @@ -1022,7 +1022,7 @@ static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *att
}
mutex_unlock(&kvm->lock);
VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
VM_EVENT(kvm, 3, "New guest asce: 0x%p",
(void *) kvm->arch.gmap->asce);
break;
}
Expand Down Expand Up @@ -3466,7 +3466,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_s390_gisa_init(kvm);
INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup);
kvm->arch.pv.set_aside = NULL;
KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
KVM_EVENT(3, "vm 0x%p created by pid %u", kvm, current->pid);

return 0;
out_err:
Expand Down Expand Up @@ -3529,7 +3529,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_s390_destroy_adapters(kvm);
kvm_s390_clear_float_irqs(kvm);
kvm_s390_vsie_destroy(kvm);
KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
KVM_EVENT(3, "vm 0x%p destroyed", kvm);
}

/* Section: vcpu related */
Expand Down Expand Up @@ -3650,7 +3650,7 @@ static int sca_switch_to_extended(struct kvm *kvm)

free_page((unsigned long)old_sca);

VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
VM_EVENT(kvm, 2, "Switched to ESCA (0x%p -> 0x%p)",
old_sca, kvm->arch.sca);
return 0;
}
Expand Down Expand Up @@ -4027,7 +4027,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
goto out_free_sie_block;
}

VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK",
VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%p, sie block at 0x%p",
vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);

Expand Down
4 changes: 2 additions & 2 deletions arch/s390/kvm/trace-s390.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ TRACE_EVENT(kvm_s390_create_vcpu,
__entry->sie_block = sie_block;
),

TP_printk("create cpu %d at 0x%pK, sie block at 0x%pK",
TP_printk("create cpu %d at 0x%p, sie block at 0x%p",
__entry->id, __entry->vcpu, __entry->sie_block)
);

Expand Down Expand Up @@ -255,7 +255,7 @@ TRACE_EVENT(kvm_s390_enable_css,
__entry->kvm = kvm;
),

TP_printk("enabling channel I/O support (kvm @ %pK)\n",
TP_printk("enabling channel I/O support (kvm @ %p)\n",
__entry->kvm)
);

Expand Down
7 changes: 6 additions & 1 deletion arch/x86/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -1472,8 +1472,13 @@ struct kvm_arch {
struct once nx_once;

#ifdef CONFIG_X86_64
/* The number of TDP MMU pages across all roots. */
#ifdef CONFIG_KVM_PROVE_MMU
/*
* The number of TDP MMU pages across all roots. Used only to sanity
* check that KVM isn't leaking TDP MMU pages.
*/
atomic64_t tdp_mmu_pages;
#endif

/*
* List of struct kvm_mmu_pages being used as roots.
Expand Down
Loading

0 comments on commit 0e88632

Please sign in to comment.