Skip to content

Commit

Permalink
Merge tag 'kvmarm-fixes-6.15-1' of https://git.kernel.org/pub/scm/lin…
Browse files Browse the repository at this point in the history
…ux/kernel/git/kvmarm/kvmarm into HEAD

KVM/arm64: First batch of fixes for 6.15

 - Rework heuristics for resolving the fault IPA (HPFAR_EL2 v. re-walk
   stage-1 page tables) to align with the architecture. This avoids
   possibly taking an SEA at EL2 on the page table walk or using an
   architecturally UNKNOWN fault IPA.

 - Use acquire/release semantics in the KVM FF-A proxy to avoid reading
   a stale value for the FF-A version.

 - Fix KVM guest driver to match PV CPUID hypercall ABI.

 - Use Inner Shareable Normal Write-Back mappings at stage-1 in KVM
   selftests, which is the only memory type for which atomic
   instructions are architecturally guaranteed to work.
  • Loading branch information
Paolo Bonzini committed Apr 8, 2025
2 parents c77eee5 + a344e25 commit c478032
Show file tree
Hide file tree
Showing 12 changed files with 234 additions and 78 deletions.
44 changes: 42 additions & 2 deletions arch/arm64/include/asm/esr.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,15 @@
#define ESR_ELx_FSC_SEA_TTW(n) (0x14 + (n))
#define ESR_ELx_FSC_SECC (0x18)
#define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n))
#define ESR_ELx_FSC_ADDRSZ (0x00)

/*
* Annoyingly, the negative levels for Address size faults aren't laid out
* contiguously (or in the desired order)
*/
#define ESR_ELx_FSC_ADDRSZ_nL(n) ((n) == -1 ? 0x25 : 0x2C)
#define ESR_ELx_FSC_ADDRSZ_L(n) ((n) < 0 ? ESR_ELx_FSC_ADDRSZ_nL(n) : \
(ESR_ELx_FSC_ADDRSZ + (n)))

/* Status codes for individual page table levels */
#define ESR_ELx_FSC_ACCESS_L(n) (ESR_ELx_FSC_ACCESS + (n))
Expand Down Expand Up @@ -161,8 +170,6 @@
#define ESR_ELx_Xs_MASK (GENMASK_ULL(4, 0))

/* ISS field definitions for exceptions taken in to Hyp */
#define ESR_ELx_FSC_ADDRSZ (0x00)
#define ESR_ELx_FSC_ADDRSZ_L(n) (ESR_ELx_FSC_ADDRSZ + (n))
#define ESR_ELx_CV (UL(1) << 24)
#define ESR_ELx_COND_SHIFT (20)
#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT)
Expand Down Expand Up @@ -464,6 +471,39 @@ static inline bool esr_fsc_is_access_flag_fault(unsigned long esr)
(esr == ESR_ELx_FSC_ACCESS_L(0));
}

static inline bool esr_fsc_is_addr_sz_fault(unsigned long esr)
{
esr &= ESR_ELx_FSC;

return (esr == ESR_ELx_FSC_ADDRSZ_L(3)) ||
(esr == ESR_ELx_FSC_ADDRSZ_L(2)) ||
(esr == ESR_ELx_FSC_ADDRSZ_L(1)) ||
(esr == ESR_ELx_FSC_ADDRSZ_L(0)) ||
(esr == ESR_ELx_FSC_ADDRSZ_L(-1));
}

static inline bool esr_fsc_is_sea_ttw(unsigned long esr)
{
esr = esr & ESR_ELx_FSC;

return (esr == ESR_ELx_FSC_SEA_TTW(3)) ||
(esr == ESR_ELx_FSC_SEA_TTW(2)) ||
(esr == ESR_ELx_FSC_SEA_TTW(1)) ||
(esr == ESR_ELx_FSC_SEA_TTW(0)) ||
(esr == ESR_ELx_FSC_SEA_TTW(-1));
}

static inline bool esr_fsc_is_secc_ttw(unsigned long esr)
{
esr = esr & ESR_ELx_FSC;

return (esr == ESR_ELx_FSC_SECC_TTW(3)) ||
(esr == ESR_ELx_FSC_SECC_TTW(2)) ||
(esr == ESR_ELx_FSC_SECC_TTW(1)) ||
(esr == ESR_ELx_FSC_SECC_TTW(0)) ||
(esr == ESR_ELx_FSC_SECC_TTW(-1));
}

/* Indicate whether ESR.EC==0x1A is for an ERETAx instruction */
static inline bool esr_iss_is_eretax(unsigned long esr)
{
Expand Down
7 changes: 6 additions & 1 deletion arch/arm64/include/asm/kvm_emulate.h
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,12 @@ static __always_inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vc

static __always_inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu)
{
return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8;
u64 hpfar = vcpu->arch.fault.hpfar_el2;

if (unlikely(!(hpfar & HPFAR_EL2_NS)))
return INVALID_GPA;

return FIELD_GET(HPFAR_EL2_FIPA, hpfar) << 12;
}

static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu)
Expand Down
2 changes: 1 addition & 1 deletion arch/arm64/include/asm/kvm_ras.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* Was this synchronous external abort a RAS notification?
* Returns '0' for errors handled by some RAS subsystem, or -ENOENT.
*/
static inline int kvm_handle_guest_sea(phys_addr_t addr, u64 esr)
static inline int kvm_handle_guest_sea(void)
{
/* apei_claim_sea(NULL) expects to mask interrupts itself */
lockdep_assert_irqs_enabled();
Expand Down
70 changes: 48 additions & 22 deletions arch/arm64/kvm/hyp/include/hyp/fault.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,16 @@
#include <asm/kvm_hyp.h>
#include <asm/kvm_mmu.h>

static inline bool __fault_safe_to_translate(u64 esr)
{
u64 fsc = esr & ESR_ELx_FSC;

if (esr_fsc_is_sea_ttw(esr) || esr_fsc_is_secc_ttw(esr))
return false;

return !(fsc == ESR_ELx_FSC_EXTABT && (esr & ESR_ELx_FnV));
}

static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
{
int ret;
Expand Down Expand Up @@ -44,34 +54,50 @@ static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar)
return true;
}

static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
/*
* Checks for the conditions when HPFAR_EL2 is written, per ARM ARM R_FKLWR.
*/
static inline bool __hpfar_valid(u64 esr)
{
u64 hpfar, far;

far = read_sysreg_el2(SYS_FAR);

/*
* The HPFAR can be invalid if the stage 2 fault did not
* happen during a stage 1 page table walk (the ESR_EL2.S1PTW
* bit is clear) and one of the two following cases are true:
* 1. The fault was due to a permission fault
* 2. The processor carries errata 834220
* CPUs affected by ARM erratum #834220 may incorrectly report a
* stage-2 translation fault when a stage-1 permission fault occurs.
*
* Therefore, for all non S1PTW faults where we either have a
* permission fault or the errata workaround is enabled, we
* resolve the IPA using the AT instruction.
* Re-walk the page tables to determine if a stage-1 fault actually
* occurred.
*/
if (!(esr & ESR_ELx_S1PTW) &&
(cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
esr_fsc_is_permission_fault(esr))) {
if (!__translate_far_to_hpfar(far, &hpfar))
return false;
} else {
if (cpus_have_final_cap(ARM64_WORKAROUND_834220) &&
esr_fsc_is_translation_fault(esr))
return false;

if (esr_fsc_is_translation_fault(esr) || esr_fsc_is_access_flag_fault(esr))
return true;

if ((esr & ESR_ELx_S1PTW) && esr_fsc_is_permission_fault(esr))
return true;

return esr_fsc_is_addr_sz_fault(esr);
}

static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
{
u64 hpfar;

fault->far_el2 = read_sysreg_el2(SYS_FAR);
fault->hpfar_el2 = 0;

if (__hpfar_valid(esr))
hpfar = read_sysreg(hpfar_el2);
}
else if (unlikely(!__fault_safe_to_translate(esr)))
return true;
else if (!__translate_far_to_hpfar(fault->far_el2, &hpfar))
return false;

fault->far_el2 = far;
fault->hpfar_el2 = hpfar;
/*
* Hijack HPFAR_EL2.NS (RES0 in Non-secure) to indicate a valid
* HPFAR value.
*/
fault->hpfar_el2 = hpfar | HPFAR_EL2_NS;
return true;
}

Expand Down
9 changes: 5 additions & 4 deletions arch/arm64/kvm/hyp/nvhe/ffa.c
Original file line number Diff line number Diff line change
Expand Up @@ -730,10 +730,10 @@ static void do_ffa_version(struct arm_smccc_res *res,
hyp_ffa_version = ffa_req_version;
}

if (hyp_ffa_post_init())
if (hyp_ffa_post_init()) {
res->a0 = FFA_RET_NOT_SUPPORTED;
else {
has_version_negotiated = true;
} else {
smp_store_release(&has_version_negotiated, true);
res->a0 = hyp_ffa_version;
}
unlock:
Expand Down Expand Up @@ -809,7 +809,8 @@ bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id)
if (!is_ffa_call(func_id))
return false;

if (!has_version_negotiated && func_id != FFA_VERSION) {
if (func_id != FFA_VERSION &&
!smp_load_acquire(&has_version_negotiated)) {
ffa_to_smccc_error(&res, FFA_RET_INVALID_PARAMETERS);
goto out_handled;
}
Expand Down
9 changes: 8 additions & 1 deletion arch/arm64/kvm/hyp/nvhe/mem_protect.c
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,14 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
return;
}

addr = (fault.hpfar_el2 & HPFAR_MASK) << 8;

/*
* Yikes, we couldn't resolve the fault IPA. This should reinject an
* abort into the host when we figure out how to do that.
*/
BUG_ON(!(fault.hpfar_el2 & HPFAR_EL2_NS));
addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12;

ret = host_stage2_idmap(addr);
BUG_ON(ret && ret != -EAGAIN);
}
Expand Down
31 changes: 19 additions & 12 deletions arch/arm64/kvm/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -1794,9 +1794,28 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
gfn_t gfn;
int ret, idx;

/* Synchronous External Abort? */
if (kvm_vcpu_abt_issea(vcpu)) {
/*
* For RAS the host kernel may handle this abort.
* There is no need to pass the error into the guest.
*/
if (kvm_handle_guest_sea())
kvm_inject_vabt(vcpu);

return 1;
}

esr = kvm_vcpu_get_esr(vcpu);

/*
* The fault IPA should be reliable at this point as we're not dealing
* with an SEA.
*/
ipa = fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
if (KVM_BUG_ON(ipa == INVALID_GPA, vcpu->kvm))
return -EFAULT;

is_iabt = kvm_vcpu_trap_is_iabt(vcpu);

if (esr_fsc_is_translation_fault(esr)) {
Expand All @@ -1818,18 +1837,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
}
}

/* Synchronous External Abort? */
if (kvm_vcpu_abt_issea(vcpu)) {
/*
* For RAS the host kernel may handle this abort.
* There is no need to pass the error into the guest.
*/
if (kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_esr(vcpu)))
kvm_inject_vabt(vcpu);

return 1;
}

trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu),
kvm_vcpu_get_hfar(vcpu), fault_ipa);

Expand Down
7 changes: 7 additions & 0 deletions arch/arm64/tools/sysreg
Original file line number Diff line number Diff line change
Expand Up @@ -3433,3 +3433,10 @@ Field 5 F
Field 4 P
Field 3:0 Align
EndSysreg

Sysreg HPFAR_EL2 3 4 6 0 4
Field 63 NS
Res0 62:48
Field 47:4 FIPA
Res0 3:0
EndSysreg
4 changes: 2 additions & 2 deletions drivers/firmware/smccc/kvm_guest.c
Original file line number Diff line number Diff line change
Expand Up @@ -95,15 +95,15 @@ void __init kvm_arm_target_impl_cpu_init(void)

for (i = 0; i < max_cpus; i++) {
arm_smccc_1_1_invoke(ARM_SMCCC_VENDOR_HYP_KVM_DISCOVER_IMPL_CPUS_FUNC_ID,
i, &res);
i, 0, 0, &res);
if (res.a0 != SMCCC_RET_SUCCESS) {
pr_warn("Discovering target implementation CPUs failed\n");
goto mem_free;
}
target[i].midr = res.a1;
target[i].revidr = res.a2;
target[i].aidr = res.a3;
};
}

if (!cpu_errata_set_target_impl(max_cpus, target)) {
pr_warn("Failed to set target implementation CPUs\n");
Expand Down
2 changes: 1 addition & 1 deletion tools/testing/selftests/kvm/arm64/page_fault_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ static bool guest_set_ha(void)
if (hadbs == 0)
return false;

tcr = read_sysreg(tcr_el1) | TCR_EL1_HA;
tcr = read_sysreg(tcr_el1) | TCR_HA;
write_sysreg(tcr, tcr_el1);
isb();

Expand Down
67 changes: 61 additions & 6 deletions tools/testing/selftests/kvm/include/arm64/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,67 @@
MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \
MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT))

/* TCR_EL1 specific flags */
#define TCR_T0SZ_OFFSET 0
#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_T0SZ_OFFSET)

#define TCR_IRGN0_SHIFT 8
#define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT)
#define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT)
#define TCR_IRGN0_WBWA (UL(1) << TCR_IRGN0_SHIFT)
#define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT)
#define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT)

#define TCR_ORGN0_SHIFT 10
#define TCR_ORGN0_MASK (UL(3) << TCR_ORGN0_SHIFT)
#define TCR_ORGN0_NC (UL(0) << TCR_ORGN0_SHIFT)
#define TCR_ORGN0_WBWA (UL(1) << TCR_ORGN0_SHIFT)
#define TCR_ORGN0_WT (UL(2) << TCR_ORGN0_SHIFT)
#define TCR_ORGN0_WBnWA (UL(3) << TCR_ORGN0_SHIFT)

#define TCR_SH0_SHIFT 12
#define TCR_SH0_MASK (UL(3) << TCR_SH0_SHIFT)
#define TCR_SH0_INNER (UL(3) << TCR_SH0_SHIFT)

#define TCR_TG0_SHIFT 14
#define TCR_TG0_MASK (UL(3) << TCR_TG0_SHIFT)
#define TCR_TG0_4K (UL(0) << TCR_TG0_SHIFT)
#define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT)
#define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT)

#define TCR_IPS_SHIFT 32
#define TCR_IPS_MASK (UL(7) << TCR_IPS_SHIFT)
#define TCR_IPS_52_BITS (UL(6) << TCR_IPS_SHIFT)
#define TCR_IPS_48_BITS (UL(5) << TCR_IPS_SHIFT)
#define TCR_IPS_40_BITS (UL(2) << TCR_IPS_SHIFT)
#define TCR_IPS_36_BITS (UL(1) << TCR_IPS_SHIFT)

#define TCR_HA (UL(1) << 39)
#define TCR_DS (UL(1) << 59)

/*
* AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers).
*/
#define PTE_ATTRINDX(t) ((t) << 2)
#define PTE_ATTRINDX_MASK GENMASK(4, 2)
#define PTE_ATTRINDX_SHIFT 2

#define PTE_VALID BIT(0)
#define PGD_TYPE_TABLE BIT(1)
#define PUD_TYPE_TABLE BIT(1)
#define PMD_TYPE_TABLE BIT(1)
#define PTE_TYPE_PAGE BIT(1)

#define PTE_SHARED (UL(3) << 8) /* SH[1:0], inner shareable */
#define PTE_AF BIT(10)

#define PTE_ADDR_MASK(page_shift) GENMASK(47, (page_shift))
#define PTE_ADDR_51_48 GENMASK(15, 12)
#define PTE_ADDR_51_48_SHIFT 12
#define PTE_ADDR_MASK_LPA2(page_shift) GENMASK(49, (page_shift))
#define PTE_ADDR_51_50_LPA2 GENMASK(9, 8)
#define PTE_ADDR_51_50_LPA2_SHIFT 8

void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init);
struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
struct kvm_vcpu_init *init, void *guest_code);
Expand Down Expand Up @@ -102,12 +163,6 @@ enum {
(v) == VECTOR_SYNC_LOWER_64 || \
(v) == VECTOR_SYNC_LOWER_32)

/* Access flag */
#define PTE_AF (1ULL << 10)

/* Access flag update enable/disable */
#define TCR_EL1_HA (1ULL << 39)

void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
uint32_t *ipa16k, uint32_t *ipa64k);

Expand Down
Loading

0 comments on commit c478032

Please sign in to comment.