Skip to content

Commit

Permalink
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Browse files Browse the repository at this point in the history
Pull kvm fixes from Paolo Bonzini:
 "ARM:

   - Address some fallout of the locking rework, this time affecting the
     way the vgic is configured

   - Fix an issue where the page table walker frees a subtree and then
     proceeds with walking what it has just freed...

   - Check that a given PA donated to the guest is actually memory (only
     affecting pKVM)

   - Correctly handle MTE CMOs by Set/Way

   - Fix the reported address of a watchpoint forwarded to userspace

   - Fix the freeing of the root of stage-2 page tables

   - Stop creating spurious PMU events to perform detection of the
     default PMU and use the existing PMU list instead

  x86:

   - Fix a memslot lookup bug in the NX recovery thread that could
     theoretically let userspace bypass the NX hugepage mitigation

   - Fix a s/BLOCKING/PENDING bug in SVM's vNMI support

   - Account exit stats for fastpath VM-Exits that never leave the super
     tight run-loop

   - Fix an out-of-bounds bug in the optimized APIC map code, and add a
     regression test for the race"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: selftests: Add test for race in kvm_recalculate_apic_map()
  KVM: x86: Bail from kvm_recalculate_phys_map() if x2APIC ID is out-of-bounds
  KVM: x86: Account fastpath-only VM-Exits in vCPU stats
  KVM: SVM: vNMI pending bit is V_NMI_PENDING_MASK not V_NMI_BLOCKING_MASK
  KVM: x86/mmu: Grab memslot for correct address space in NX recovery worker
  KVM: arm64: Document default vPMU behavior on heterogeneous systems
  KVM: arm64: Iterate arm_pmus list to probe for default PMU
  KVM: arm64: Drop last page ref in kvm_pgtable_stage2_free_removed()
  KVM: arm64: Populate fault info for watchpoint
  KVM: arm64: Reload PTE after invoking walker callback on preorder traversal
  KVM: arm64: Handle trap of tagged Set/Way CMOs
  arm64: Add missing Set/Way CMO encodings
  KVM: arm64: Prevent unconditional donation of unmapped regions from the host
  KVM: arm64: vgic: Fix a comment
  KVM: arm64: vgic: Fix locking comment
  KVM: arm64: vgic: Wrap vgic_its_create() with config_lock
  KVM: arm64: vgic: Fix a circular locking issue
  • Loading branch information
Linus Torvalds committed Jun 4, 2023
2 parents 9455b4b + f211b45 commit b066935
Show file tree
Hide file tree
Showing 23 changed files with 248 additions and 95 deletions.
6 changes: 3 additions & 3 deletions arch/arm64/include/asm/kvm_pgtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -632,9 +632,9 @@ int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
*
* The walker will walk the page-table entries corresponding to the input
* address range specified, visiting entries according to the walker flags.
* Invalid entries are treated as leaf entries. Leaf entries are reloaded
* after invoking the walker callback, allowing the walker to descend into
* a newly installed table.
* Invalid entries are treated as leaf entries. The visited page table entry is
* reloaded after invoking the walker callback, allowing the walker to descend
* into a newly installed table.
*
* Returning a negative error code from the walker callback function will
* terminate the walk immediately with the same error code.
Expand Down
6 changes: 6 additions & 0 deletions arch/arm64/include/asm/sysreg.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,14 @@
#define SB_BARRIER_INSN __SYS_BARRIER_INSN(0, 7, 31)

#define SYS_DC_ISW sys_insn(1, 0, 7, 6, 2)
#define SYS_DC_IGSW sys_insn(1, 0, 7, 6, 4)
#define SYS_DC_IGDSW sys_insn(1, 0, 7, 6, 6)
#define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2)
#define SYS_DC_CGSW sys_insn(1, 0, 7, 10, 4)
#define SYS_DC_CGDSW sys_insn(1, 0, 7, 10, 6)
#define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2)
#define SYS_DC_CIGSW sys_insn(1, 0, 7, 14, 4)
#define SYS_DC_CIGDSW sys_insn(1, 0, 7, 14, 6)

/*
* Automatically generated definitions for system registers, the
Expand Down
8 changes: 6 additions & 2 deletions arch/arm64/kvm/hyp/include/hyp/switch.h
Original file line number Diff line number Diff line change
Expand Up @@ -412,17 +412,21 @@ static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code)
return false;
}

static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
static bool kvm_hyp_handle_memory_fault(struct kvm_vcpu *vcpu, u64 *exit_code)
{
if (!__populate_fault_info(vcpu))
return true;

return false;
}
static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
__alias(kvm_hyp_handle_memory_fault);
static bool kvm_hyp_handle_watchpt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
__alias(kvm_hyp_handle_memory_fault);

static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
{
if (!__populate_fault_info(vcpu))
if (kvm_hyp_handle_memory_fault(vcpu, exit_code))
return true;

if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
Expand Down
14 changes: 7 additions & 7 deletions arch/arm64/kvm/hyp/nvhe/mem_protect.c
Original file line number Diff line number Diff line change
Expand Up @@ -575,18 +575,15 @@ struct pkvm_mem_donation {

struct check_walk_data {
enum pkvm_page_state desired;
enum pkvm_page_state (*get_page_state)(kvm_pte_t pte);
enum pkvm_page_state (*get_page_state)(kvm_pte_t pte, u64 addr);
};

static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx,
enum kvm_pgtable_walk_flags visit)
{
struct check_walk_data *d = ctx->arg;

if (kvm_pte_valid(ctx->old) && !addr_is_allowed_memory(kvm_pte_to_phys(ctx->old)))
return -EINVAL;

return d->get_page_state(ctx->old) == d->desired ? 0 : -EPERM;
return d->get_page_state(ctx->old, ctx->addr) == d->desired ? 0 : -EPERM;
}

static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
Expand All @@ -601,8 +598,11 @@ static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
return kvm_pgtable_walk(pgt, addr, size, &walker);
}

static enum pkvm_page_state host_get_page_state(kvm_pte_t pte)
static enum pkvm_page_state host_get_page_state(kvm_pte_t pte, u64 addr)
{
if (!addr_is_allowed_memory(addr))
return PKVM_NOPAGE;

if (!kvm_pte_valid(pte) && pte)
return PKVM_NOPAGE;

Expand Down Expand Up @@ -709,7 +709,7 @@ static int host_complete_donation(u64 addr, const struct pkvm_mem_transition *tx
return host_stage2_set_owner_locked(addr, size, host_id);
}

static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte)
static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte, u64 addr)
{
if (!kvm_pte_valid(pte))
return PKVM_NOPAGE;
Expand Down
2 changes: 2 additions & 0 deletions arch/arm64/kvm/hyp/nvhe/switch.c
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ static const exit_handler_fn hyp_exit_handlers[] = {
[ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
};

Expand All @@ -196,6 +197,7 @@ static const exit_handler_fn pvm_exit_handlers[] = {
[ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
};

Expand Down
17 changes: 16 additions & 1 deletion arch/arm64/kvm/hyp/pgtable.c
Original file line number Diff line number Diff line change
Expand Up @@ -209,14 +209,26 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
.flags = flags,
};
int ret = 0;
bool reload = false;
kvm_pteref_t childp;
bool table = kvm_pte_table(ctx.old, level);

if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE))
if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE)) {
ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_PRE);
reload = true;
}

if (!table && (ctx.flags & KVM_PGTABLE_WALK_LEAF)) {
ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_LEAF);
reload = true;
}

/*
* Reload the page table after invoking the walker callback for leaf
* entries or after pre-order traversal, to allow the walker to descend
* into a newly installed or replaced table.
*/
if (reload) {
ctx.old = READ_ONCE(*ptep);
table = kvm_pte_table(ctx.old, level);
}
Expand Down Expand Up @@ -1320,4 +1332,7 @@ void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pg
};

WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level + 1));

WARN_ON(mm_ops->page_count(pgtable) != 1);
mm_ops->put_page(pgtable);
}
1 change: 1 addition & 0 deletions arch/arm64/kvm/hyp/vhe/switch.c
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ static const exit_handler_fn hyp_exit_handlers[] = {
[ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,
[ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low,
[ESR_ELx_EC_WATCHPT_LOW] = kvm_hyp_handle_watchpt_low,
[ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth,
};

Expand Down
58 changes: 23 additions & 35 deletions arch/arm64/kvm/pmu-emul.c
Original file line number Diff line number Diff line change
Expand Up @@ -694,45 +694,23 @@ void kvm_host_pmu_init(struct arm_pmu *pmu)

static struct arm_pmu *kvm_pmu_probe_armpmu(void)
{
struct perf_event_attr attr = { };
struct perf_event *event;
struct arm_pmu *pmu = NULL;

/*
* Create a dummy event that only counts user cycles. As we'll never
* leave this function with the event being live, it will never
* count anything. But it allows us to probe some of the PMU
* details. Yes, this is terrible.
*/
attr.type = PERF_TYPE_RAW;
attr.size = sizeof(attr);
attr.pinned = 1;
attr.disabled = 0;
attr.exclude_user = 0;
attr.exclude_kernel = 1;
attr.exclude_hv = 1;
attr.exclude_host = 1;
attr.config = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
attr.sample_period = GENMASK(63, 0);
struct arm_pmu *tmp, *pmu = NULL;
struct arm_pmu_entry *entry;
int cpu;

event = perf_event_create_kernel_counter(&attr, -1, current,
kvm_pmu_perf_overflow, &attr);
mutex_lock(&arm_pmus_lock);

if (IS_ERR(event)) {
pr_err_once("kvm: pmu event creation failed %ld\n",
PTR_ERR(event));
return NULL;
}
cpu = smp_processor_id();
list_for_each_entry(entry, &arm_pmus, entry) {
tmp = entry->arm_pmu;

if (event->pmu) {
pmu = to_arm_pmu(event->pmu);
if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI ||
pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
pmu = NULL;
if (cpumask_test_cpu(cpu, &tmp->supported_cpus)) {
pmu = tmp;
break;
}
}

perf_event_disable(event);
perf_event_release_kernel(event);
mutex_unlock(&arm_pmus_lock);

return pmu;
}
Expand Down Expand Up @@ -912,7 +890,17 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
return -EBUSY;

if (!kvm->arch.arm_pmu) {
/* No PMU set, get the default one */
/*
* No PMU set, get the default one.
*
* The observant among you will notice that the supported_cpus
* mask does not get updated for the default PMU even though it
* is quite possible the selected instance supports only a
* subset of cores in the system. This is intentional, and
* upholds the preexisting behavior on heterogeneous systems
* where vCPUs can be scheduled on any core but the guest
* counters could stop working.
*/
kvm->arch.arm_pmu = kvm_pmu_probe_armpmu();
if (!kvm->arch.arm_pmu)
return -ENODEV;
Expand Down
19 changes: 19 additions & 0 deletions arch/arm64/kvm/sys_regs.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,19 @@ static bool access_dcsw(struct kvm_vcpu *vcpu,
return true;
}

static bool access_dcgsw(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
{
if (!kvm_has_mte(vcpu->kvm)) {
kvm_inject_undefined(vcpu);
return false;
}

/* Treat MTE S/W ops as we treat the classic ones: with contempt */
return access_dcsw(vcpu, p, r);
}

static void get_access_mask(const struct sys_reg_desc *r, u64 *mask, u64 *shift)
{
switch (r->aarch32_map) {
Expand Down Expand Up @@ -1756,8 +1769,14 @@ static bool access_spsr(struct kvm_vcpu *vcpu,
*/
static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_DC_ISW), access_dcsw },
{ SYS_DESC(SYS_DC_IGSW), access_dcgsw },
{ SYS_DESC(SYS_DC_IGDSW), access_dcgsw },
{ SYS_DESC(SYS_DC_CSW), access_dcsw },
{ SYS_DESC(SYS_DC_CGSW), access_dcgsw },
{ SYS_DESC(SYS_DC_CGDSW), access_dcgsw },
{ SYS_DESC(SYS_DC_CISW), access_dcsw },
{ SYS_DESC(SYS_DC_CIGSW), access_dcgsw },
{ SYS_DESC(SYS_DC_CIGDSW), access_dcgsw },

DBG_BCR_BVR_WCR_WVR_EL1(0),
DBG_BCR_BVR_WCR_WVR_EL1(1),
Expand Down
27 changes: 21 additions & 6 deletions arch/arm64/kvm/vgic/vgic-init.c
Original file line number Diff line number Diff line change
Expand Up @@ -235,9 +235,9 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
* KVM io device for the redistributor that belongs to this VCPU.
*/
if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) {
mutex_lock(&vcpu->kvm->arch.config_lock);
mutex_lock(&vcpu->kvm->slots_lock);
ret = vgic_register_redist_iodev(vcpu);
mutex_unlock(&vcpu->kvm->arch.config_lock);
mutex_unlock(&vcpu->kvm->slots_lock);
}
return ret;
}
Expand Down Expand Up @@ -406,7 +406,7 @@ void kvm_vgic_destroy(struct kvm *kvm)

/**
* vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest
* is a GICv2. A GICv3 must be explicitly initialized by the guest using the
* is a GICv2. A GICv3 must be explicitly initialized by userspace using the
* KVM_DEV_ARM_VGIC_GRP_CTRL KVM_DEVICE group.
* @kvm: kvm struct pointer
*/
Expand Down Expand Up @@ -446,11 +446,13 @@ int vgic_lazy_init(struct kvm *kvm)
int kvm_vgic_map_resources(struct kvm *kvm)
{
struct vgic_dist *dist = &kvm->arch.vgic;
gpa_t dist_base;
int ret = 0;

if (likely(vgic_ready(kvm)))
return 0;

mutex_lock(&kvm->slots_lock);
mutex_lock(&kvm->arch.config_lock);
if (vgic_ready(kvm))
goto out;
Expand All @@ -463,13 +465,26 @@ int kvm_vgic_map_resources(struct kvm *kvm)
else
ret = vgic_v3_map_resources(kvm);

if (ret)
if (ret) {
__kvm_vgic_destroy(kvm);
else
dist->ready = true;
goto out;
}
dist->ready = true;
dist_base = dist->vgic_dist_base;
mutex_unlock(&kvm->arch.config_lock);

ret = vgic_register_dist_iodev(kvm, dist_base,
kvm_vgic_global_state.type);
if (ret) {
kvm_err("Unable to register VGIC dist MMIO regions\n");
kvm_vgic_destroy(kvm);
}
mutex_unlock(&kvm->slots_lock);
return ret;

out:
mutex_unlock(&kvm->arch.config_lock);
mutex_unlock(&kvm->slots_lock);
return ret;
}

Expand Down
14 changes: 10 additions & 4 deletions arch/arm64/kvm/vgic/vgic-its.c
Original file line number Diff line number Diff line change
Expand Up @@ -1936,6 +1936,7 @@ void vgic_lpi_translation_cache_destroy(struct kvm *kvm)

static int vgic_its_create(struct kvm_device *dev, u32 type)
{
int ret;
struct vgic_its *its;

if (type != KVM_DEV_TYPE_ARM_VGIC_ITS)
Expand All @@ -1945,9 +1946,12 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
if (!its)
return -ENOMEM;

mutex_lock(&dev->kvm->arch.config_lock);

if (vgic_initialized(dev->kvm)) {
int ret = vgic_v4_init(dev->kvm);
ret = vgic_v4_init(dev->kvm);
if (ret < 0) {
mutex_unlock(&dev->kvm->arch.config_lock);
kfree(its);
return ret;
}
Expand All @@ -1960,12 +1964,10 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)

/* Yep, even more trickery for lock ordering... */
#ifdef CONFIG_LOCKDEP
mutex_lock(&dev->kvm->arch.config_lock);
mutex_lock(&its->cmd_lock);
mutex_lock(&its->its_lock);
mutex_unlock(&its->its_lock);
mutex_unlock(&its->cmd_lock);
mutex_unlock(&dev->kvm->arch.config_lock);
#endif

its->vgic_its_base = VGIC_ADDR_UNDEF;
Expand All @@ -1986,7 +1988,11 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)

dev->private = its;

return vgic_its_set_abi(its, NR_ITS_ABIS - 1);
ret = vgic_its_set_abi(its, NR_ITS_ABIS - 1);

mutex_unlock(&dev->kvm->arch.config_lock);

return ret;
}

static void vgic_its_destroy(struct kvm_device *kvm_dev)
Expand Down
Loading

0 comments on commit b066935

Please sign in to comment.