From 811154e234db72f0a11557a84ba9640f8b3bc823 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Tue, 30 May 2023 11:46:51 +0900 Subject: [PATCH 1/4] KVM: arm64: Populate fault info for watchpoint When handling ESR_ELx_EC_WATCHPT_LOW, far_el2 member of struct kvm_vcpu_fault_info will be copied to far member of struct kvm_debug_exit_arch and exposed to the userspace. The userspace will see stale values from older faults if the fault info does not get populated. Fixes: 8fb2046180a0 ("KVM: arm64: Move early handlers to per-EC handlers") Suggested-by: Marc Zyngier Signed-off-by: Akihiko Odaki Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230530024651.10014-1-akihiko.odaki@daynix.com Cc: stable@vger.kernel.org --- arch/arm64/kvm/hyp/include/hyp/switch.h | 8 ++++++-- arch/arm64/kvm/hyp/nvhe/switch.c | 2 ++ arch/arm64/kvm/hyp/vhe/switch.c | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index e78a08a72a3c9..5c15c58f90cce 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -412,17 +412,21 @@ static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code) return false; } -static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) +static bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu, u64 *exit_code) { if (!__populate_fault_info(vcpu)) return true; return false; } +static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) + __alias(kvm_hyp_handle_memory_fault); +static bool kvm_hyp_handle_watchpt_low(struct kvm_vcpu *vcpu, u64 *exit_code) + __alias(kvm_hyp_handle_memory_fault); static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) { - if (!__populate_fault_info(vcpu)) + if (kvm_hyp_handle_memory_fault(vcpu, exit_code)) return true; if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 71fa16a0dc775..77791495c9951 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -186,6 +186,7 @@ static const exit_handler_fn hyp_exit_handlers[] = { [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd, [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, + [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low, [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, }; @@ -196,6 +197,7 @@ static const exit_handler_fn pvm_exit_handlers[] = { [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd, [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, + [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low, [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, }; diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 3d868e84c7a0a..7a1aa511e7da6 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -110,6 +110,7 @@ static const exit_handler_fn hyp_exit_handlers[] = { [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd, [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, + [ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low, [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, }; From f6a27d6dc51b288106adaf053cff9c9b9cc12c4e Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 30 May 2023 19:32:13 +0000 Subject: [PATCH 2/4] KVM: arm64: Drop last page ref in kvm_pgtable_stage2_free_removed() The reference count on page table allocations is increased for every 'counted' PTE (valid or donated) in the table in addition to the initial reference from ->zalloc_page(). kvm_pgtable_stage2_free_removed() fails to drop the last reference on the root of the table walk, meaning we leak memory. Fix it by dropping the last reference after the free walker returns, at which point all references for 'counted' PTEs have been released. Cc: stable@vger.kernel.org Fixes: 5c359cca1faf ("KVM: arm64: Tear down unlinked stage-2 subtree after break-before-make") Reported-by: Yu Zhao Signed-off-by: Oliver Upton Tested-by: Yu Zhao Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230530193213.1663411-1-oliver.upton@linux.dev --- arch/arm64/kvm/hyp/pgtable.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index e1eacffbc41f4..95dae02ccc2e6 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -1332,4 +1332,7 @@ void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pg }; WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level + 1)); + + WARN_ON(mm_ops->page_count(pgtable) != 1); + mm_ops->put_page(pgtable); } From 1c913a1c35aa61cf280173b2bcc133c3953c38fc Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Thu, 25 May 2023 21:27:21 +0000 Subject: [PATCH 3/4] KVM: arm64: Iterate arm_pmus list to probe for default PMU To date KVM has relied on using a perf event to probe the core PMU at the time of vPMU initialization. Behind the scenes perf_event_init() would iteratively walk the PMUs of the system and return the PMU that could handle the event. However, an upcoming change in perf core will drop the iterative walk, thereby breaking the fragile dance we do on the KVM side. Avoid the problem altogether by iterating over the list of supported PMUs maintained in KVM, returning the core PMU that matches the CPU we were called on. Tested-by: Nathan Chancellor Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230525212723.3361524-2-oliver.upton@linux.dev --- arch/arm64/kvm/pmu-emul.c | 46 ++++++++++----------------------------- 1 file changed, 12 insertions(+), 34 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 45727d50d18dd..5deddc49e7454 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -694,45 +694,23 @@ void kvm_host_pmu_init(struct arm_pmu *pmu) static struct arm_pmu *kvm_pmu_probe_armpmu(void) { - struct perf_event_attr attr = { }; - struct perf_event *event; - struct arm_pmu *pmu = NULL; - - /* - * Create a dummy event that only counts user cycles. As we'll never - * leave this function with the event being live, it will never - * count anything. But it allows us to probe some of the PMU - * details. Yes, this is terrible. - */ - attr.type = PERF_TYPE_RAW; - attr.size = sizeof(attr); - attr.pinned = 1; - attr.disabled = 0; - attr.exclude_user = 0; - attr.exclude_kernel = 1; - attr.exclude_hv = 1; - attr.exclude_host = 1; - attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; - attr.sample_period = GENMASK(63, 0); + struct arm_pmu *tmp, *pmu = NULL; + struct arm_pmu_entry *entry; + int cpu; - event = perf_event_create_kernel_counter(&attr, -1, current, - kvm_pmu_perf_overflow, &attr); + mutex_lock(&arm_pmus_lock); - if (IS_ERR(event)) { - pr_err_once("kvm: pmu event creation failed %ld\n", - PTR_ERR(event)); - return NULL; - } + cpu = smp_processor_id(); + list_for_each_entry(entry, &arm_pmus, entry) { + tmp = entry->arm_pmu; - if (event->pmu) { - pmu = to_arm_pmu(event->pmu); - if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI || - pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) - pmu = NULL; + if (cpumask_test_cpu(cpu, &tmp->supported_cpus)) { + pmu = tmp; + break; + } } - perf_event_disable(event); - perf_event_release_kernel(event); + mutex_unlock(&arm_pmus_lock); return pmu; } From 40e54cad454076172cc3e2bca60aa924650a3e4b Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Thu, 25 May 2023 21:27:22 +0000 Subject: [PATCH 4/4] KVM: arm64: Document default vPMU behavior on heterogeneous systems KVM maintains a mask of supported CPUs when a vPMU type is explicitly selected by userspace and is used to reject any attempt to run the vCPU on an unsupported CPU. This is great, but we're still beholden to the default behavior where vCPUs can be scheduled anywhere and guest counters may silently stop working. Avoid confusing the next poor sod to look at this code and document the intended behavior. Signed-off-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20230525212723.3361524-3-oliver.upton@linux.dev --- arch/arm64/kvm/pmu-emul.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 5deddc49e7454..491ca7eb2a4c6 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -890,7 +890,17 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) return -EBUSY; if (!kvm->arch.arm_pmu) { - /* No PMU set, get the default one */ + /* + * No PMU set, get the default one. + * + * The observant among you will notice that the supported_cpus + * mask does not get updated for the default PMU even though it + * is quite possible the selected instance supports only a + * subset of cores in the system. This is intentional, and + * upholds the preexisting behavior on heterogeneous systems + * where vCPUs can be scheduled on any core but the guest + * counters could stop working. + */ kvm->arch.arm_pmu = kvm_pmu_probe_armpmu(); if (!kvm->arch.arm_pmu) return -ENODEV;