From acfcaf90db1fa833236d9f8249b6099cf638e5d1 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Thu, 27 Mar 2025 09:36:15 -0700 Subject: [PATCH 1/8] smccc: kvm_guest: Align with DISCOVER_IMPL_CPUS ABI The ABI of the hypercall requires that R2 and R3 are 0. Explicitly pass 0 for these parameters. Cc: Shameer Kolothum Fixes: 86edf6bdcf05 ("smccc/kvm_guest: Enable errata based on implementation CPUs") Reviewed-by: Shameer Kolothum Link: https://lore.kernel.org/r/20250327163613.2516073-1-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- drivers/firmware/smccc/kvm_guest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/smccc/kvm_guest.c b/drivers/firmware/smccc/kvm_guest.c index 5767aed25cdc..ac2d3cf8a776 100644 --- a/drivers/firmware/smccc/kvm_guest.c +++ b/drivers/firmware/smccc/kvm_guest.c @@ -95,7 +95,7 @@ void __init kvm_arm_target_impl_cpu_init(void) for (i = 0; i < max_cpus; i++) { arm_smccc_1_1_invoke(ARM_SMCCC_VENDOR_HYP_KVM_DISCOVER_IMPL_CPUS_FUNC_ID, - i, &res); + i, 0, 0, &res); if (res.a0 != SMCCC_RET_SUCCESS) { pr_warn("Discovering target implementation CPUs failed\n"); goto mem_free; From 1f5bdd3b0c7000156d99faeed19bd522615b38e3 Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Wed, 26 Mar 2025 12:06:59 +0800 Subject: [PATCH 2/8] smccc: kvm_guest: Remove unneeded semicolon Remove unnecessary semicolons reported by Coccinelle/coccicheck and the semantic patch at scripts/coccinelle/misc/semicolon.cocci. Signed-off-by: Chen Ni Link: https://lore.kernel.org/r/20250326040659.1190696-1-nichen@iscas.ac.cn Signed-off-by: Oliver Upton --- drivers/firmware/smccc/kvm_guest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/firmware/smccc/kvm_guest.c b/drivers/firmware/smccc/kvm_guest.c index ac2d3cf8a776..a123c05cbc9e 100644 --- a/drivers/firmware/smccc/kvm_guest.c +++ b/drivers/firmware/smccc/kvm_guest.c @@ -103,7 +103,7 @@ void __init kvm_arm_target_impl_cpu_init(void) target[i].midr = res.a1; target[i].revidr = res.a2; target[i].aidr = res.a3; - }; + } if (!cpu_errata_set_target_impl(max_cpus, target)) { pr_warn("Failed to set target implementation CPUs\n"); From fb8a3eba9c812b67f9cf5531e5b55d13a51e938e Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 2 Apr 2025 13:17:23 -0700 Subject: [PATCH 3/8] KVM: arm64: Only read HPFAR_EL2 when value is architecturally valid KVM's logic for deciding when HPFAR_EL2 is UNKNOWN doesn't align with the architecture. Most notably, KVM assumes HPFAR_EL2 contains the faulting IPA even in the case of an SEA. Align the logic with the architecture rather than attempting to paraphrase it. Additionally, take the opportunity to improve the language around ARM erratum #834220 such that it actually describes the bug. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20250402201725.2963645-2-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/include/asm/esr.h | 22 ++++++++++-- arch/arm64/kvm/hyp/include/hyp/fault.h | 46 ++++++++++++++++---------- 2 files changed, 48 insertions(+), 20 deletions(-) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index d1b1a33f9a8b..92fb26e90840 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -121,6 +121,15 @@ #define ESR_ELx_FSC_SEA_TTW(n) (0x14 + (n)) #define ESR_ELx_FSC_SECC (0x18) #define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n)) +#define ESR_ELx_FSC_ADDRSZ (0x00) + +/* + * Annoyingly, the negative levels for Address size faults aren't laid out + * contiguously (or in the desired order) + */ +#define ESR_ELx_FSC_ADDRSZ_nL(n) ((n) == -1 ? 0x25 : 0x2C) +#define ESR_ELx_FSC_ADDRSZ_L(n) ((n) < 0 ? ESR_ELx_FSC_ADDRSZ_nL(n) : \ + (ESR_ELx_FSC_ADDRSZ + (n))) /* Status codes for individual page table levels */ #define ESR_ELx_FSC_ACCESS_L(n) (ESR_ELx_FSC_ACCESS + (n)) @@ -161,8 +170,6 @@ #define ESR_ELx_Xs_MASK (GENMASK_ULL(4, 0)) /* ISS field definitions for exceptions taken in to Hyp */ -#define ESR_ELx_FSC_ADDRSZ (0x00) -#define ESR_ELx_FSC_ADDRSZ_L(n) (ESR_ELx_FSC_ADDRSZ + (n)) #define ESR_ELx_CV (UL(1) << 24) #define ESR_ELx_COND_SHIFT (20) #define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) @@ -464,6 +471,17 @@ static inline bool esr_fsc_is_access_flag_fault(unsigned long esr) (esr == ESR_ELx_FSC_ACCESS_L(0)); } +static inline bool esr_fsc_is_addr_sz_fault(unsigned long esr) +{ + esr &= ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_ADDRSZ_L(3)) || + (esr == ESR_ELx_FSC_ADDRSZ_L(2)) || + (esr == ESR_ELx_FSC_ADDRSZ_L(1)) || + (esr == ESR_ELx_FSC_ADDRSZ_L(0)) || + (esr == ESR_ELx_FSC_ADDRSZ_L(-1)); +} + /* Indicate whether ESR.EC==0x1A is for an ERETAx instruction */ static inline bool esr_iss_is_eretax(unsigned long esr) { diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h index 17df94570f03..59409685c14f 100644 --- a/arch/arm64/kvm/hyp/include/hyp/fault.h +++ b/arch/arm64/kvm/hyp/include/hyp/fault.h @@ -44,31 +44,41 @@ static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) return true; } +/* + * Checks for the conditions when HPFAR_EL2 is written, per ARM ARM R_FKLWR. + */ +static inline bool __hpfar_valid(u64 esr) +{ + /* + * CPUs affected by ARM erratum #834220 may incorrectly report a + * stage-2 translation fault when a stage-1 permission fault occurs. + * + * Re-walk the page tables to determine if a stage-1 fault actually + * occurred. + */ + if (cpus_have_final_cap(ARM64_WORKAROUND_834220) && + esr_fsc_is_translation_fault(esr)) + return false; + + if (esr_fsc_is_translation_fault(esr) || esr_fsc_is_access_flag_fault(esr)) + return true; + + if ((esr & ESR_ELx_S1PTW) && esr_fsc_is_permission_fault(esr)) + return true; + + return esr_fsc_is_addr_sz_fault(esr); +} + static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault) { u64 hpfar, far; far = read_sysreg_el2(SYS_FAR); - /* - * The HPFAR can be invalid if the stage 2 fault did not - * happen during a stage 1 page table walk (the ESR_EL2.S1PTW - * bit is clear) and one of the two following cases are true: - * 1. The fault was due to a permission fault - * 2. The processor carries errata 834220 - * - * Therefore, for all non S1PTW faults where we either have a - * permission fault or the errata workaround is enabled, we - * resolve the IPA using the AT instruction. - */ - if (!(esr & ESR_ELx_S1PTW) && - (cpus_have_final_cap(ARM64_WORKAROUND_834220) || - esr_fsc_is_permission_fault(esr))) { - if (!__translate_far_to_hpfar(far, &hpfar)) - return false; - } else { + if (__hpfar_valid(esr)) hpfar = read_sysreg(hpfar_el2); - } + else if (!__translate_far_to_hpfar(far, &hpfar)) + return false; fault->far_el2 = far; fault->hpfar_el2 = hpfar; From 1cf3e126f1528cdcaf77524f48e54ccbcb029473 Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 2 Apr 2025 13:17:24 -0700 Subject: [PATCH 4/8] arm64: Convert HPFAR_EL2 to sysreg table Switch over to the typical sysreg table for HPFAR_EL2 as we're about to start using more fields in the register. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20250402201725.2963645-3-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/include/asm/kvm_emulate.h | 4 +++- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 2 +- arch/arm64/tools/sysreg | 7 +++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index d7cf66573aca..44e3fc6483c8 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -305,7 +305,9 @@ static __always_inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vc static __always_inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu *vcpu) { - return ((phys_addr_t)vcpu->arch.fault.hpfar_el2 & HPFAR_MASK) << 8; + u64 hpfar = vcpu->arch.fault.hpfar_el2; + + return FIELD_GET(HPFAR_EL2_FIPA, hpfar) << 12; } static inline u64 kvm_vcpu_get_disr(const struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index f34f11c720d7..5ce2230054d9 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -578,7 +578,7 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) return; } - addr = (fault.hpfar_el2 & HPFAR_MASK) << 8; + addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12; ret = host_stage2_idmap(addr); BUG_ON(ret && ret != -EAGAIN); } diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 2c63662c1a48..31ad9ce2b91c 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -3433,3 +3433,10 @@ Field 5 F Field 4 P Field 3:0 Align EndSysreg + +Sysreg HPFAR_EL2 3 4 6 0 4 +Field 63 NS +Res0 62:48 +Field 47:4 FIPA +Res0 3:0 +EndSysreg From 26fbdf36922711f285fd185ad644f0acdf15959f Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Wed, 2 Apr 2025 13:17:25 -0700 Subject: [PATCH 5/8] KVM: arm64: Don't translate FAR if invalid/unsafe Don't re-walk the page tables if an SEA occurred during the faulting page table walk to avoid taking a fatal exception in the hyp. Additionally, check that FAR_EL2 is valid for SEAs not taken on PTW as the architecture doesn't guarantee it contains the fault VA. Finally, fix up the rest of the abort path by checking for SEAs early and bugging the VM if we get further along with an UNKNOWN fault IPA. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20250402201725.2963645-4-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/include/asm/esr.h | 22 ++++++++++++++++++ arch/arm64/include/asm/kvm_emulate.h | 3 +++ arch/arm64/include/asm/kvm_ras.h | 2 +- arch/arm64/kvm/hyp/include/hyp/fault.h | 26 ++++++++++++++++----- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 7 ++++++ arch/arm64/kvm/mmu.c | 31 ++++++++++++++++---------- 6 files changed, 73 insertions(+), 18 deletions(-) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 92fb26e90840..e4f77757937e 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -482,6 +482,28 @@ static inline bool esr_fsc_is_addr_sz_fault(unsigned long esr) (esr == ESR_ELx_FSC_ADDRSZ_L(-1)); } +static inline bool esr_fsc_is_sea_ttw(unsigned long esr) +{ + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_SEA_TTW(3)) || + (esr == ESR_ELx_FSC_SEA_TTW(2)) || + (esr == ESR_ELx_FSC_SEA_TTW(1)) || + (esr == ESR_ELx_FSC_SEA_TTW(0)) || + (esr == ESR_ELx_FSC_SEA_TTW(-1)); +} + +static inline bool esr_fsc_is_secc_ttw(unsigned long esr) +{ + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_SECC_TTW(3)) || + (esr == ESR_ELx_FSC_SECC_TTW(2)) || + (esr == ESR_ELx_FSC_SECC_TTW(1)) || + (esr == ESR_ELx_FSC_SECC_TTW(0)) || + (esr == ESR_ELx_FSC_SECC_TTW(-1)); +} + /* Indicate whether ESR.EC==0x1A is for an ERETAx instruction */ static inline bool esr_iss_is_eretax(unsigned long esr) { diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 44e3fc6483c8..bd020fc28aa9 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -307,6 +307,9 @@ static __always_inline phys_addr_t kvm_vcpu_get_fault_ipa(const struct kvm_vcpu { u64 hpfar = vcpu->arch.fault.hpfar_el2; + if (unlikely(!(hpfar & HPFAR_EL2_NS))) + return INVALID_GPA; + return FIELD_GET(HPFAR_EL2_FIPA, hpfar) << 12; } diff --git a/arch/arm64/include/asm/kvm_ras.h b/arch/arm64/include/asm/kvm_ras.h index 87e10d9a635b..9398ade632aa 100644 --- a/arch/arm64/include/asm/kvm_ras.h +++ b/arch/arm64/include/asm/kvm_ras.h @@ -14,7 +14,7 @@ * Was this synchronous external abort a RAS notification? * Returns '0' for errors handled by some RAS subsystem, or -ENOENT. */ -static inline int kvm_handle_guest_sea(phys_addr_t addr, u64 esr) +static inline int kvm_handle_guest_sea(void) { /* apei_claim_sea(NULL) expects to mask interrupts itself */ lockdep_assert_irqs_enabled(); diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h index 59409685c14f..fc573fc767b0 100644 --- a/arch/arm64/kvm/hyp/include/hyp/fault.h +++ b/arch/arm64/kvm/hyp/include/hyp/fault.h @@ -12,6 +12,16 @@ #include #include +static inline bool __fault_safe_to_translate(u64 esr) +{ + u64 fsc = esr & ESR_ELx_FSC; + + if (esr_fsc_is_sea_ttw(esr) || esr_fsc_is_secc_ttw(esr)) + return false; + + return !(fsc == ESR_ELx_FSC_EXTABT && (esr & ESR_ELx_FnV)); +} + static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) { int ret; @@ -71,17 +81,23 @@ static inline bool __hpfar_valid(u64 esr) static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault) { - u64 hpfar, far; + u64 hpfar; - far = read_sysreg_el2(SYS_FAR); + fault->far_el2 = read_sysreg_el2(SYS_FAR); + fault->hpfar_el2 = 0; if (__hpfar_valid(esr)) hpfar = read_sysreg(hpfar_el2); - else if (!__translate_far_to_hpfar(far, &hpfar)) + else if (unlikely(!__fault_safe_to_translate(esr))) + return true; + else if (!__translate_far_to_hpfar(fault->far_el2, &hpfar)) return false; - fault->far_el2 = far; - fault->hpfar_el2 = hpfar; + /* + * Hijack HPFAR_EL2.NS (RES0 in Non-secure) to indicate a valid + * HPFAR value. + */ + fault->hpfar_el2 = hpfar | HPFAR_EL2_NS; return true; } diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 5ce2230054d9..2a5284f749b4 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -578,7 +578,14 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) return; } + + /* + * Yikes, we couldn't resolve the fault IPA. This should reinject an + * abort into the host when we figure out how to do that. + */ + BUG_ON(!(fault.hpfar_el2 & HPFAR_EL2_NS)); addr = FIELD_GET(HPFAR_EL2_FIPA, fault.hpfar_el2) << 12; + ret = host_stage2_idmap(addr); BUG_ON(ret && ret != -EAGAIN); } diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 2feb6c6b63af..754f2fe0cc67 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1794,9 +1794,28 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) gfn_t gfn; int ret, idx; + /* Synchronous External Abort? */ + if (kvm_vcpu_abt_issea(vcpu)) { + /* + * For RAS the host kernel may handle this abort. + * There is no need to pass the error into the guest. + */ + if (kvm_handle_guest_sea()) + kvm_inject_vabt(vcpu); + + return 1; + } + esr = kvm_vcpu_get_esr(vcpu); + /* + * The fault IPA should be reliable at this point as we're not dealing + * with an SEA. + */ ipa = fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); + if (KVM_BUG_ON(ipa == INVALID_GPA, vcpu->kvm)) + return -EFAULT; + is_iabt = kvm_vcpu_trap_is_iabt(vcpu); if (esr_fsc_is_translation_fault(esr)) { @@ -1818,18 +1837,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) } } - /* Synchronous External Abort? */ - if (kvm_vcpu_abt_issea(vcpu)) { - /* - * For RAS the host kernel may handle this abort. - * There is no need to pass the error into the guest. - */ - if (kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_esr(vcpu))) - kvm_inject_vabt(vcpu); - - return 1; - } - trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_esr(vcpu), kvm_vcpu_get_hfar(vcpu), fault_ipa); From d8d78398e550039295e0237eafb703e2d21f7d57 Mon Sep 17 00:00:00 2001 From: Raghavendra Rao Ananta Date: Sat, 5 Apr 2025 00:10:41 +0000 Subject: [PATCH 6/8] KVM: arm64: selftests: Introduce and use hardware-definition macros The kvm selftest library for arm64 currently configures the hardware fields, such as shift and mask in the page-table entries and registers, directly with numbers. While it add comments at places, it's better to rewrite them with appropriate macros to improve the readability and reduce the risk of errors. Hence, introduce macros to define the hardware fields and use them in the arm64 processor library. Most of the definitions are primary copied from the Linux's header, arch/arm64/include/asm/pgtable-hwdef.h. No functional change intended. Suggested-by: Oliver Upton Signed-off-by: Raghavendra Rao Ananta Link: https://lore.kernel.org/r/20250405001042.1470552-2-rananta@google.com Signed-off-by: Oliver Upton --- .../selftests/kvm/arm64/page_fault_test.c | 2 +- .../selftests/kvm/include/arm64/processor.h | 66 +++++++++++++++++-- .../selftests/kvm/lib/arm64/processor.c | 57 ++++++++-------- 3 files changed, 92 insertions(+), 33 deletions(-) diff --git a/tools/testing/selftests/kvm/arm64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c index ec33a8f9c908..dc6559dad9d8 100644 --- a/tools/testing/selftests/kvm/arm64/page_fault_test.c +++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c @@ -199,7 +199,7 @@ static bool guest_set_ha(void) if (hadbs == 0) return false; - tcr = read_sysreg(tcr_el1) | TCR_EL1_HA; + tcr = read_sysreg(tcr_el1) | TCR_HA; write_sysreg(tcr, tcr_el1); isb(); diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index 1e8d0d531fbd..7d88ff22013a 100644 --- a/tools/testing/selftests/kvm/include/arm64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -62,6 +62,66 @@ MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \ MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT)) +/* TCR_EL1 specific flags */ +#define TCR_T0SZ_OFFSET 0 +#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_T0SZ_OFFSET) + +#define TCR_IRGN0_SHIFT 8 +#define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WBWA (UL(1) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT) + +#define TCR_ORGN0_SHIFT 10 +#define TCR_ORGN0_MASK (UL(3) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_NC (UL(0) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WBWA (UL(1) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WT (UL(2) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WBnWA (UL(3) << TCR_ORGN0_SHIFT) + +#define TCR_SH0_SHIFT 12 +#define TCR_SH0_MASK (UL(3) << TCR_SH0_SHIFT) +#define TCR_SH0_INNER (UL(3) << TCR_SH0_SHIFT) + +#define TCR_TG0_SHIFT 14 +#define TCR_TG0_MASK (UL(3) << TCR_TG0_SHIFT) +#define TCR_TG0_4K (UL(0) << TCR_TG0_SHIFT) +#define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT) +#define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT) + +#define TCR_IPS_SHIFT 32 +#define TCR_IPS_MASK (UL(7) << TCR_IPS_SHIFT) +#define TCR_IPS_52_BITS (UL(6) << TCR_IPS_SHIFT) +#define TCR_IPS_48_BITS (UL(5) << TCR_IPS_SHIFT) +#define TCR_IPS_40_BITS (UL(2) << TCR_IPS_SHIFT) +#define TCR_IPS_36_BITS (UL(1) << TCR_IPS_SHIFT) + +#define TCR_HA (UL(1) << 39) +#define TCR_DS (UL(1) << 59) + +/* + * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers). + */ +#define PTE_ATTRINDX(t) ((t) << 2) +#define PTE_ATTRINDX_MASK GENMASK(4, 2) +#define PTE_ATTRINDX_SHIFT 2 + +#define PTE_VALID BIT(0) +#define PGD_TYPE_TABLE BIT(1) +#define PUD_TYPE_TABLE BIT(1) +#define PMD_TYPE_TABLE BIT(1) +#define PTE_TYPE_PAGE BIT(1) + +#define PTE_AF BIT(10) + +#define PTE_ADDR_MASK(page_shift) GENMASK(47, (page_shift)) +#define PTE_ADDR_51_48 GENMASK(15, 12) +#define PTE_ADDR_51_48_SHIFT 12 +#define PTE_ADDR_MASK_LPA2(page_shift) GENMASK(49, (page_shift)) +#define PTE_ADDR_51_50_LPA2 GENMASK(9, 8) +#define PTE_ADDR_51_50_LPA2_SHIFT 8 + void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init); struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, struct kvm_vcpu_init *init, void *guest_code); @@ -102,12 +162,6 @@ enum { (v) == VECTOR_SYNC_LOWER_64 || \ (v) == VECTOR_SYNC_LOWER_32) -/* Access flag */ -#define PTE_AF (1ULL << 10) - -/* Access flag update enable/disable */ -#define TCR_EL1_HA (1ULL << 39) - void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, uint32_t *ipa16k, uint32_t *ipa64k); diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 7ba3aa3755f3..da5802c8a59c 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -72,13 +72,13 @@ static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs) uint64_t pte; if (use_lpa2_pte_format(vm)) { - pte = pa & GENMASK(49, vm->page_shift); - pte |= FIELD_GET(GENMASK(51, 50), pa) << 8; - attrs &= ~GENMASK(9, 8); + pte = pa & PTE_ADDR_MASK_LPA2(vm->page_shift); + pte |= FIELD_GET(GENMASK(51, 50), pa) << PTE_ADDR_51_50_LPA2_SHIFT; + attrs &= ~PTE_ADDR_51_50_LPA2; } else { - pte = pa & GENMASK(47, vm->page_shift); + pte = pa & PTE_ADDR_MASK(vm->page_shift); if (vm->page_shift == 16) - pte |= FIELD_GET(GENMASK(51, 48), pa) << 12; + pte |= FIELD_GET(GENMASK(51, 48), pa) << PTE_ADDR_51_48_SHIFT; } pte |= attrs; @@ -90,12 +90,12 @@ static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte) uint64_t pa; if (use_lpa2_pte_format(vm)) { - pa = pte & GENMASK(49, vm->page_shift); - pa |= FIELD_GET(GENMASK(9, 8), pte) << 50; + pa = pte & PTE_ADDR_MASK_LPA2(vm->page_shift); + pa |= FIELD_GET(PTE_ADDR_51_50_LPA2, pte) << 50; } else { - pa = pte & GENMASK(47, vm->page_shift); + pa = pte & PTE_ADDR_MASK(vm->page_shift); if (vm->page_shift == 16) - pa |= FIELD_GET(GENMASK(15, 12), pte) << 48; + pa |= FIELD_GET(PTE_ADDR_51_48, pte) << 48; } return pa; @@ -128,7 +128,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, uint64_t flags) { - uint8_t attr_idx = flags & 7; + uint8_t attr_idx = flags & (PTE_ATTRINDX_MASK >> PTE_ATTRINDX_SHIFT); + uint64_t pg_attr; uint64_t *ptep; TEST_ASSERT((vaddr % vm->page_size) == 0, @@ -147,18 +148,21 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PGD_TYPE_TABLE | PTE_VALID); switch (vm->pgtable_levels) { case 4: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PUD_TYPE_TABLE | PTE_VALID); /* fall through */ case 3: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PMD_TYPE_TABLE | PTE_VALID); /* fall through */ case 2: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8; @@ -167,7 +171,8 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, TEST_FAIL("Page table levels must be 2, 3, or 4"); } - *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3); /* AF */ + pg_attr = PTE_AF | PTE_ATTRINDX(attr_idx) | PTE_TYPE_PAGE | PTE_VALID; + *ptep = addr_pte(vm, paddr, pg_attr); } void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) @@ -293,20 +298,20 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) case VM_MODE_P48V48_64K: case VM_MODE_P40V48_64K: case VM_MODE_P36V48_64K: - tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ + tcr_el1 |= TCR_TG0_64K; break; case VM_MODE_P52V48_16K: case VM_MODE_P48V48_16K: case VM_MODE_P40V48_16K: case VM_MODE_P36V48_16K: case VM_MODE_P36V47_16K: - tcr_el1 |= 2ul << 14; /* TG0 = 16KB */ + tcr_el1 |= TCR_TG0_16K; break; case VM_MODE_P52V48_4K: case VM_MODE_P48V48_4K: case VM_MODE_P40V48_4K: case VM_MODE_P36V48_4K: - tcr_el1 |= 0ul << 14; /* TG0 = 4KB */ + tcr_el1 |= TCR_TG0_4K; break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); @@ -319,35 +324,35 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) case VM_MODE_P52V48_4K: case VM_MODE_P52V48_16K: case VM_MODE_P52V48_64K: - tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ + tcr_el1 |= TCR_IPS_52_BITS; ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2; break; case VM_MODE_P48V48_4K: case VM_MODE_P48V48_16K: case VM_MODE_P48V48_64K: - tcr_el1 |= 5ul << 32; /* IPS = 48 bits */ + tcr_el1 |= TCR_IPS_48_BITS; break; case VM_MODE_P40V48_4K: case VM_MODE_P40V48_16K: case VM_MODE_P40V48_64K: - tcr_el1 |= 2ul << 32; /* IPS = 40 bits */ + tcr_el1 |= TCR_IPS_40_BITS; break; case VM_MODE_P36V48_4K: case VM_MODE_P36V48_16K: case VM_MODE_P36V48_64K: case VM_MODE_P36V47_16K: - tcr_el1 |= 1ul << 32; /* IPS = 36 bits */ + tcr_el1 |= TCR_IPS_36_BITS; break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); } - sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */; - /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */; - tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12); - tcr_el1 |= (64 - vm->va_bits) /* T0SZ */; + sctlr_el1 |= SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_I; + + tcr_el1 |= TCR_IRGN0_WBWA | TCR_ORGN0_WBWA | TCR_SH0_INNER; + tcr_el1 |= TCR_T0SZ(vm->va_bits); if (use_lpa2_pte_format(vm)) - tcr_el1 |= (1ul << 59) /* DS */; + tcr_el1 |= TCR_DS; vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1); vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1); From c8631ea59b6523035ffb607634eef7bacc8947fe Mon Sep 17 00:00:00 2001 From: Raghavendra Rao Ananta Date: Sat, 5 Apr 2025 00:10:42 +0000 Subject: [PATCH 7/8] KVM: arm64: selftests: Explicitly set the page attrs to Inner-Shareable Atomic instructions such as 'ldset' in the guest have been observed to cause an EL1 data abort with FSC 0x35 (IMPLEMENTATION DEFINED fault (Unsupported Exclusive or Atomic access)) on Neoverse-N3. Per DDI0487L.a B2.2.6, atomic instructions are only architecturally guaranteed for Inner/Outer Shareable Normal Write-Back memory. For anything else the behavior is IMPLEMENTATION DEFINED and can lose atomicity, or, in this case, generate an abort. It would appear that selftests sets up the stage-1 mappings as Non Shareable, leading to the observed abort. Explicitly set the Shareability field to Inner Shareable for non-LPA2 page tables. Note that for the LPA2 page table format, translations for cacheable memory inherit the shareability attribute of the PTW, i.e. TCR_ELx.SH{0,1}. Suggested-by: Oliver Upton Signed-off-by: Raghavendra Rao Ananta Link: https://lore.kernel.org/r/20250405001042.1470552-3-rananta@google.com [oliver: Rephrase changelog] Signed-off-by: Oliver Upton --- tools/testing/selftests/kvm/include/arm64/processor.h | 1 + tools/testing/selftests/kvm/lib/arm64/processor.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index 7d88ff22013a..b0fc0f945766 100644 --- a/tools/testing/selftests/kvm/include/arm64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -113,6 +113,7 @@ #define PMD_TYPE_TABLE BIT(1) #define PTE_TYPE_PAGE BIT(1) +#define PTE_SHARED (UL(3) << 8) /* SH[1:0], inner shareable */ #define PTE_AF BIT(10) #define PTE_ADDR_MASK(page_shift) GENMASK(47, (page_shift)) diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index da5802c8a59c..9d69904cb608 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -172,6 +172,9 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, } pg_attr = PTE_AF | PTE_ATTRINDX(attr_idx) | PTE_TYPE_PAGE | PTE_VALID; + if (!use_lpa2_pte_format(vm)) + pg_attr |= PTE_SHARED; + *ptep = addr_pte(vm, paddr, pg_attr); } From a344e258acb0a7f0e7ed10a795c52d1baf705164 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 7 Apr 2025 16:27:55 +0100 Subject: [PATCH 8/8] KVM: arm64: Use acquire/release to communicate FF-A version negotiation The pKVM FF-A proxy rejects FF-A requests other than FFA_VERSION until version negotiation is complete, which is signalled by setting the global 'has_version_negotiated' variable. To avoid excessive locking, this variable is checked directly from kvm_host_ffa_handler() in response to an FF-A call, but this can race against another CPU performing the negotiation and potentially lead to reading a torn value (incredibly unlikely for a 'bool') or problematic re-ordering of the accesses to 'has_version_negotiated' and 'hyp_ffa_version' whereby a stale version number could be read by __do_ffa_mem_xfer(). Use acquire/release primitives when writing 'has_version_negotiated' with the version lock held and when reading without the lock held. Cc: Sebastian Ene Cc: Sudeep Holla Cc: Quentin Perret Cc: Oliver Upton Cc: Marc Zyngier Fixes: c9c012625e12 ("KVM: arm64: Trap FFA_VERSION host call in pKVM") Signed-off-by: Will Deacon Link: https://lore.kernel.org/r/20250407152755.1041-1-will@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/hyp/nvhe/ffa.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index e433dfab882a..3369dd0c4009 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -730,10 +730,10 @@ static void do_ffa_version(struct arm_smccc_res *res, hyp_ffa_version = ffa_req_version; } - if (hyp_ffa_post_init()) + if (hyp_ffa_post_init()) { res->a0 = FFA_RET_NOT_SUPPORTED; - else { - has_version_negotiated = true; + } else { + smp_store_release(&has_version_negotiated, true); res->a0 = hyp_ffa_version; } unlock: @@ -809,7 +809,8 @@ bool kvm_host_ffa_handler(struct kvm_cpu_context *host_ctxt, u32 func_id) if (!is_ffa_call(func_id)) return false; - if (!has_version_negotiated && func_id != FFA_VERSION) { + if (func_id != FFA_VERSION && + !smp_load_acquire(&has_version_negotiated)) { ffa_to_smccc_error(&res, FFA_RET_INVALID_PARAMETERS); goto out_handled; }