From d561491ba927cb5634094ff311795e9d618e9b86 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 17 Nov 2024 16:57:54 +0000 Subject: [PATCH 01/11] KVM: arm64: vgic-v3: Sanitise guest writes to GICR_INVLPIR Make sure we filter out non-LPI invalidation when handling writes to GICR_INVLPIR. Fixes: 4645d11f4a553 ("KVM: arm64: vgic-v3: Implement MMIO-based LPI invalidation") Reported-by: Alexander Potapenko Tested-by: Alexander Potapenko Signed-off-by: Marc Zyngier Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20241117165757.247686-2-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/vgic/vgic-mmio-v3.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 9e50928f5d7df..70a44852cbafe 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -530,6 +530,7 @@ static void vgic_mmio_write_invlpi(struct kvm_vcpu *vcpu, unsigned long val) { struct vgic_irq *irq; + u32 intid; /* * If the guest wrote only to the upper 32bit part of the @@ -541,9 +542,13 @@ static void vgic_mmio_write_invlpi(struct kvm_vcpu *vcpu, if ((addr & 4) || !vgic_lpis_enabled(vcpu)) return; + intid = lower_32_bits(val); + if (intid < VGIC_MIN_LPI) + return; + vgic_set_rdist_busy(vcpu, true); - irq = vgic_get_irq(vcpu->kvm, NULL, lower_32_bits(val)); + irq = vgic_get_irq(vcpu->kvm, NULL, intid); if (irq) { vgic_its_inv_lpi(vcpu->kvm, irq); vgic_put_irq(vcpu->kvm, irq); From add570b39f9fc4b830e7f4b487bbc16d74c388ad Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 17 Nov 2024 16:57:55 +0000 Subject: [PATCH 02/11] KVM: arm64: vgic: Make vgic_get_irq() more robust vgic_get_irq() has an awkward signature, as it takes both a kvm *and* a vcpu, where the vcpu is allowed to be NULL if the INTID being looked up is a global interrupt (SPI or LPI). This leads to potentially problematic situations where the INTID passed is a private interrupt, but that there is no vcpu. In order to make things less ambiguous, let have *two* helpers instead: - vgic_get_irq(struct kvm *kvm, u32 intid), which is only concerned with *global* interrupts, as indicated by the lack of vcpu. - vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid), which can return *any* interrupt class, but must have of course a non-NULL vcpu. Most of the code nicely falls under one or the other situations, except for a couple of cases (close to the UABI or in the debug code) where we have to distinguish between the two cases. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241117165757.247686-3-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/vgic/vgic-debug.c | 5 +++- arch/arm64/kvm/vgic/vgic-init.c | 2 +- arch/arm64/kvm/vgic/vgic-its.c | 8 +++--- arch/arm64/kvm/vgic/vgic-mmio-v2.c | 12 ++++----- arch/arm64/kvm/vgic/vgic-mmio-v3.c | 8 +++--- arch/arm64/kvm/vgic/vgic-mmio.c | 38 +++++++++++++------------- arch/arm64/kvm/vgic/vgic-v2.c | 2 +- arch/arm64/kvm/vgic/vgic-v3.c | 2 +- arch/arm64/kvm/vgic/vgic-v4.c | 4 +-- arch/arm64/kvm/vgic/vgic.c | 43 +++++++++++++++++++----------- arch/arm64/kvm/vgic/vgic.h | 4 +-- 11 files changed, 71 insertions(+), 57 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index e1397ab2072a5..afb018528bc3b 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -287,7 +287,10 @@ static int vgic_debug_show(struct seq_file *s, void *v) * Expect this to succeed, as iter_mark_lpis() takes a reference on * every LPI to be visited. */ - irq = vgic_get_irq(kvm, vcpu, iter->intid); + if (iter->intid < VGIC_NR_PRIVATE_IRQS) + irq = vgic_get_vcpu_irq(vcpu, iter->intid); + else + irq = vgic_get_irq(kvm, iter->intid); if (WARN_ON_ONCE(!irq)) return -EINVAL; diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 48c952563e85f..bc7e22ab5d812 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -322,7 +322,7 @@ int vgic_init(struct kvm *kvm) goto out; for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { - struct vgic_irq *irq = vgic_get_irq(kvm, vcpu, i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, i); switch (dist->vgic_model) { case KVM_DEV_TYPE_ARM_VGIC_V3: diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 198296933e7eb..79c40708b6646 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -42,7 +42,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, struct kvm_vcpu *vcpu) { struct vgic_dist *dist = &kvm->arch.vgic; - struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intid), *oldirq; + struct vgic_irq *irq = vgic_get_irq(kvm, intid), *oldirq; unsigned long flags; int ret; @@ -419,7 +419,7 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) last_byte_offset = byte_offset; } - irq = vgic_get_irq(vcpu->kvm, NULL, intid); + irq = vgic_get_irq(vcpu->kvm, intid); if (!irq) continue; @@ -1288,7 +1288,7 @@ int vgic_its_invall(struct kvm_vcpu *vcpu) unsigned long intid; xa_for_each(&dist->lpi_xa, intid, irq) { - irq = vgic_get_irq(kvm, NULL, intid); + irq = vgic_get_irq(kvm, intid); if (!irq) continue; @@ -1354,7 +1354,7 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, return 0; xa_for_each(&dist->lpi_xa, intid, irq) { - irq = vgic_get_irq(kvm, NULL, intid); + irq = vgic_get_irq(kvm, intid); if (!irq) continue; diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c index e070cda86e12f..f25fccb1f8e63 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v2.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c @@ -148,7 +148,7 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu, if (!(targets & (1U << c))) continue; - irq = vgic_get_irq(source_vcpu->kvm, vcpu, intid); + irq = vgic_get_vcpu_irq(vcpu, intid); raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->pending_latch = true; @@ -167,7 +167,7 @@ static unsigned long vgic_mmio_read_target(struct kvm_vcpu *vcpu, u64 val = 0; for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); val |= (u64)irq->targets << (i * 8); @@ -191,7 +191,7 @@ static void vgic_mmio_write_target(struct kvm_vcpu *vcpu, return; for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid + i); + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, intid + i); int target; raw_spin_lock_irqsave(&irq->irq_lock, flags); @@ -213,7 +213,7 @@ static unsigned long vgic_mmio_read_sgipend(struct kvm_vcpu *vcpu, u64 val = 0; for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); val |= (u64)irq->source << (i * 8); @@ -231,7 +231,7 @@ static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu, unsigned long flags; for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); @@ -253,7 +253,7 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu, unsigned long flags; for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index 70a44852cbafe..ae4c0593d1145 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -194,7 +194,7 @@ static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) { int intid = VGIC_ADDR_TO_INTID(addr, 64); - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid); + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, intid); unsigned long ret = 0; if (!irq) @@ -220,7 +220,7 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu, if (addr & 4) return; - irq = vgic_get_irq(vcpu->kvm, NULL, intid); + irq = vgic_get_irq(vcpu->kvm, intid); if (!irq) return; @@ -548,7 +548,7 @@ static void vgic_mmio_write_invlpi(struct kvm_vcpu *vcpu, vgic_set_rdist_busy(vcpu, true); - irq = vgic_get_irq(vcpu->kvm, NULL, intid); + irq = vgic_get_irq(vcpu->kvm, intid); if (irq) { vgic_its_inv_lpi(vcpu->kvm, irq); vgic_put_irq(vcpu->kvm, irq); @@ -1025,7 +1025,7 @@ int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) static void vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, u32 sgi, bool allow_group1) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, sgi); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, sgi); unsigned long flags; raw_spin_lock_irqsave(&irq->irq_lock, flags); diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c index cf76523a21945..e416e433baff3 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio.c +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -50,7 +50,7 @@ unsigned long vgic_mmio_read_group(struct kvm_vcpu *vcpu, /* Loop over all IRQs affected by this read */ for (i = 0; i < len * 8; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); if (irq->group) value |= BIT(i); @@ -74,7 +74,7 @@ void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr, unsigned long flags; for (i = 0; i < len * 8; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->group = !!(val & BIT(i)); @@ -102,7 +102,7 @@ unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu, /* Loop over all IRQs affected by this read */ for (i = 0; i < len * 8; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); if (irq->enabled) value |= (1U << i); @@ -122,7 +122,7 @@ void vgic_mmio_write_senable(struct kvm_vcpu *vcpu, unsigned long flags; for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); if (irq->hw && vgic_irq_is_sgi(irq->intid)) { @@ -171,7 +171,7 @@ void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu, unsigned long flags; for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); if (irq->hw && vgic_irq_is_sgi(irq->intid) && irq->enabled) @@ -193,7 +193,7 @@ int vgic_uaccess_write_senable(struct kvm_vcpu *vcpu, unsigned long flags; for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->enabled = true; @@ -214,7 +214,7 @@ int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu, unsigned long flags; for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); irq->enabled = false; @@ -236,7 +236,7 @@ static unsigned long __read_pending(struct kvm_vcpu *vcpu, /* Loop over all IRQs affected by this read */ for (i = 0; i < len * 8; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); unsigned long flags; bool val; @@ -309,7 +309,7 @@ static void __set_pending(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long flags; for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); /* GICD_ISPENDR0 SGI bits are WI when written from the guest. */ if (is_vgic_v2_sgi(vcpu, irq) && !is_user) { @@ -395,7 +395,7 @@ static void __clear_pending(struct kvm_vcpu *vcpu, unsigned long flags; for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); /* GICD_ICPENDR0 SGI bits are WI when written from the guest. */ if (is_vgic_v2_sgi(vcpu, irq) && !is_user) { @@ -494,7 +494,7 @@ static unsigned long __vgic_mmio_read_active(struct kvm_vcpu *vcpu, /* Loop over all IRQs affected by this read */ for (i = 0; i < len * 8; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); /* * Even for HW interrupts, don't evaluate the HW state as @@ -598,7 +598,7 @@ static void __vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, int i; for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); vgic_mmio_change_active(vcpu, irq, false); vgic_put_irq(vcpu->kvm, irq); } @@ -635,7 +635,7 @@ static void __vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, int i; for_each_set_bit(i, &val, len * 8) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); vgic_mmio_change_active(vcpu, irq, true); vgic_put_irq(vcpu->kvm, irq); } @@ -672,7 +672,7 @@ unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, u64 val = 0; for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); val |= (u64)irq->priority << (i * 8); @@ -698,7 +698,7 @@ void vgic_mmio_write_priority(struct kvm_vcpu *vcpu, unsigned long flags; for (i = 0; i < len; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); /* Narrow the priority range to what we actually support */ @@ -719,7 +719,7 @@ unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu, int i; for (i = 0; i < len * 4; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, intid + i); if (irq->config == VGIC_CONFIG_EDGE) value |= (2U << (i * 2)); @@ -750,7 +750,7 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu, if (intid + i < VGIC_NR_PRIVATE_IRQS) continue; - irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + irq = vgic_get_irq(vcpu->kvm, intid + i); raw_spin_lock_irqsave(&irq->irq_lock, flags); if (test_bit(i * 2 + 1, &val)) @@ -775,7 +775,7 @@ u32 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid) if ((intid + i) < VGIC_NR_SGIS || (intid + i) >= nr_irqs) continue; - irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + irq = vgic_get_vcpu_irq(vcpu, intid + i); if (irq->config == VGIC_CONFIG_LEVEL && irq->line_level) val |= (1U << i); @@ -799,7 +799,7 @@ void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid, if ((intid + i) < VGIC_NR_SGIS || (intid + i) >= nr_irqs) continue; - irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + irq = vgic_get_vcpu_irq(vcpu, intid + i); /* * Line level is set irrespective of irq type diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c index ae5a44d5702d1..381673f03c395 100644 --- a/arch/arm64/kvm/vgic/vgic-v2.c +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -72,7 +72,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) kvm_notify_acked_irq(vcpu->kvm, 0, intid - VGIC_NR_PRIVATE_IRQS); - irq = vgic_get_irq(vcpu->kvm, vcpu, intid); + irq = vgic_get_vcpu_irq(vcpu, intid); raw_spin_lock(&irq->irq_lock); diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index b217b256853c2..f267bc2486a18 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -65,7 +65,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) kvm_notify_acked_irq(vcpu->kvm, 0, intid - VGIC_NR_PRIVATE_IRQS); - irq = vgic_get_irq(vcpu->kvm, vcpu, intid); + irq = vgic_get_vcpu_irq(vcpu, intid); if (!irq) /* An LPI could have been unmapped. */ continue; diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index 74a67ad87f29d..eedecbbbcf31b 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -123,7 +123,7 @@ static void vgic_v4_enable_vsgis(struct kvm_vcpu *vcpu) * IRQ. The SGI code will do its magic. */ for (i = 0; i < VGIC_NR_SGIS; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, i); struct irq_desc *desc; unsigned long flags; int ret; @@ -160,7 +160,7 @@ static void vgic_v4_disable_vsgis(struct kvm_vcpu *vcpu) int i; for (i = 0; i < VGIC_NR_SGIS; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, i); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, i); struct irq_desc *desc; unsigned long flags; int ret; diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index f50274fd55815..ffaa52448b6f8 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -84,17 +84,11 @@ static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid) * struct vgic_irq. It also increases the refcount, so any caller is expected * to call vgic_put_irq() once it's finished with this IRQ. */ -struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, - u32 intid) +struct vgic_irq *vgic_get_irq(struct kvm *kvm, u32 intid) { - /* SGIs and PPIs */ - if (intid <= VGIC_MAX_PRIVATE) { - intid = array_index_nospec(intid, VGIC_MAX_PRIVATE + 1); - return &vcpu->arch.vgic_cpu.private_irqs[intid]; - } - /* SPIs */ - if (intid < (kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) { + if (intid >= VGIC_NR_PRIVATE_IRQS && + intid < (kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) { intid = array_index_nospec(intid, kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS); return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS]; } @@ -106,6 +100,20 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, return NULL; } +struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid) +{ + if (WARN_ON(!vcpu)) + return NULL; + + /* SGIs and PPIs */ + if (intid <= VGIC_MAX_PRIVATE) { + intid = array_index_nospec(intid, VGIC_MAX_PRIVATE + 1); + return &vcpu->arch.vgic_cpu.private_irqs[intid]; + } + + return vgic_get_irq(vcpu->kvm, intid); +} + /* * We can't do anything in here, because we lack the kvm pointer to * lock and remove the item from the lpi_list. So we keep this function @@ -437,7 +445,10 @@ int kvm_vgic_inject_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, trace_vgic_update_irq_pending(vcpu ? vcpu->vcpu_idx : 0, intid, level); - irq = vgic_get_irq(kvm, vcpu, intid); + if (intid < VGIC_NR_PRIVATE_IRQS) + irq = vgic_get_vcpu_irq(vcpu, intid); + else + irq = vgic_get_irq(kvm, intid); if (!irq) return -EINVAL; @@ -499,7 +510,7 @@ static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq) int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, u32 vintid, struct irq_ops *ops) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid); unsigned long flags; int ret; @@ -524,7 +535,7 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, */ void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid); unsigned long flags; if (!irq->hw) @@ -547,7 +558,7 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid) if (!vgic_initialized(vcpu->kvm)) return -EAGAIN; - irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + irq = vgic_get_vcpu_irq(vcpu, vintid); BUG_ON(!irq); raw_spin_lock_irqsave(&irq->irq_lock, flags); @@ -560,7 +571,7 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid) int kvm_vgic_get_map(struct kvm_vcpu *vcpu, unsigned int vintid) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + struct vgic_irq *irq = vgic_get_vcpu_irq(vcpu, vintid); unsigned long flags; int ret = -1; @@ -596,7 +607,7 @@ int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner) if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid)) return -EINVAL; - irq = vgic_get_irq(vcpu->kvm, vcpu, intid); + irq = vgic_get_vcpu_irq(vcpu, intid); raw_spin_lock_irqsave(&irq->irq_lock, flags); if (irq->owner && irq->owner != owner) ret = -EEXIST; @@ -1008,7 +1019,7 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid) if (!vgic_initialized(vcpu->kvm)) return false; - irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + irq = vgic_get_vcpu_irq(vcpu, vintid); raw_spin_lock_irqsave(&irq->irq_lock, flags); map_is_active = irq->hw && irq->active; raw_spin_unlock_irqrestore(&irq->irq_lock, flags); diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 309295f5e1b07..8290f3276cf07 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -202,8 +202,8 @@ int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, const struct vgic_register_region * vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev, gpa_t addr, int len); -struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, - u32 intid); +struct vgic_irq *vgic_get_irq(struct kvm *kvm, u32 intid); +struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid); void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq); bool vgic_get_phys_line_level(struct vgic_irq *irq); void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending); From e7619f2a2f8f9b10feb784ec6b8ea5320ad3b97e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 17 Nov 2024 16:57:56 +0000 Subject: [PATCH 03/11] KVM: arm64: vgic: Kill VGIC_MAX_PRIVATE definition VGIC_MAX_PRIVATE is a pretty useless definition, and is better replaced with VGIC_NR_PRIVATE_IRQS. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241117165757.247686-4-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/vgic/vgic.c | 4 ++-- include/kvm/arm_vgic.h | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index ffaa52448b6f8..cc8c6b9b5dd8b 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -106,8 +106,8 @@ struct vgic_irq *vgic_get_vcpu_irq(struct kvm_vcpu *vcpu, u32 intid) return NULL; /* SGIs and PPIs */ - if (intid <= VGIC_MAX_PRIVATE) { - intid = array_index_nospec(intid, VGIC_MAX_PRIVATE + 1); + if (intid < VGIC_NR_PRIVATE_IRQS) { + intid = array_index_nospec(intid, VGIC_NR_PRIVATE_IRQS); return &vcpu->arch.vgic_cpu.private_irqs[intid]; } diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index f5172549f9ba0..3a8ccfda34d29 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -26,7 +26,6 @@ #define VGIC_NR_SGIS 16 #define VGIC_NR_PPIS 16 #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) -#define VGIC_MAX_PRIVATE (VGIC_NR_PRIVATE_IRQS - 1) #define VGIC_MAX_SPI 1019 #define VGIC_MAX_RESERVED 1023 #define VGIC_MIN_LPI 8192 From 3b2c81d5feb250dfdcb0ef5825319f36c29f8336 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 17 Nov 2024 16:57:57 +0000 Subject: [PATCH 04/11] KVM: arm64: vgic-its: Add stronger type-checking to the ITS entry sizes The ITS ABI infrastructure allows for some pretty lax code, where the size of the data doesn't have to match the size of the entry, potentially leading to a collection of interesting bugs. Commit 7fe28d7e68f9 ("KVM: arm64: vgic-its: Add a data length check in vgic_its_save_*") added some checks, but starts by implicitly casting all writes to a 64bit value, hiding some of the issues. Instead, introduce macros that will check the data type actually used for dealing with the table entries. The macros are taking a symbolic entry type that is used to fetch the size of the entry type for the current ABI. This immediately catches a couple of low-impact gotchas (zero values that are implicitly 32bit), easy enough to fix. Given that we currently only have a single ABI, hardcode a couple of BUILD_BUG_ON()s that will fire if we use anything but a 64bit quantity, and some (currently unreachable) fallback code that may become useful one day. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20241117165757.247686-5-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/vgic/vgic-its.c | 69 ++++++++++++++++++++++++---------- arch/arm64/kvm/vgic/vgic.h | 23 ------------ 2 files changed, 50 insertions(+), 42 deletions(-) diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 79c40708b6646..f4c4494645c34 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -31,6 +31,41 @@ static int vgic_its_commit_v0(struct vgic_its *its); static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, struct kvm_vcpu *filter_vcpu, bool needs_inv); +#define vgic_its_read_entry_lock(i, g, valp, t) \ + ({ \ + int __sz = vgic_its_get_abi(i)->t##_esz; \ + struct kvm *__k = (i)->dev->kvm; \ + int __ret; \ + \ + BUILD_BUG_ON(NR_ITS_ABIS == 1 && \ + sizeof(*(valp)) != ABI_0_ESZ); \ + if (NR_ITS_ABIS > 1 && \ + KVM_BUG_ON(__sz != sizeof(*(valp)), __k)) \ + __ret = -EINVAL; \ + else \ + __ret = kvm_read_guest_lock(__k, (g), \ + valp, __sz); \ + __ret; \ + }) + +#define vgic_its_write_entry_lock(i, g, val, t) \ + ({ \ + int __sz = vgic_its_get_abi(i)->t##_esz; \ + struct kvm *__k = (i)->dev->kvm; \ + typeof(val) __v = (val); \ + int __ret; \ + \ + BUILD_BUG_ON(NR_ITS_ABIS == 1 && \ + sizeof(__v) != ABI_0_ESZ); \ + if (NR_ITS_ABIS > 1 && \ + KVM_BUG_ON(__sz != sizeof(__v), __k)) \ + __ret = -EINVAL; \ + else \ + __ret = vgic_write_guest_lock(__k, (g), \ + &__v, __sz); \ + __ret; \ + }) + /* * Creates a new (reference to a) struct vgic_irq for a given LPI. * If this LPI is already mapped on another ITS, we increase its refcount @@ -794,7 +829,7 @@ static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its, its_free_ite(kvm, ite); - return vgic_its_write_entry_lock(its, gpa, 0, ite_esz); + return vgic_its_write_entry_lock(its, gpa, 0ULL, ite); } return E_ITS_DISCARD_UNMAPPED_INTERRUPT; @@ -1143,7 +1178,6 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its, bool valid = its_cmd_get_validbit(its_cmd); u8 num_eventid_bits = its_cmd_get_size(its_cmd); gpa_t itt_addr = its_cmd_get_ittaddr(its_cmd); - int dte_esz = vgic_its_get_abi(its)->dte_esz; struct its_device *device; gpa_t gpa; @@ -1168,7 +1202,7 @@ static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its, * is an error, so we are done in any case. */ if (!valid) - return vgic_its_write_entry_lock(its, gpa, 0, dte_esz); + return vgic_its_write_entry_lock(its, gpa, 0ULL, dte); device = vgic_its_alloc_device(its, device_id, itt_addr, num_eventid_bits); @@ -2090,7 +2124,7 @@ static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz, * vgic_its_save_ite - Save an interrupt translation entry at @gpa */ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, - struct its_ite *ite, gpa_t gpa, int ite_esz) + struct its_ite *ite, gpa_t gpa) { u32 next_offset; u64 val; @@ -2101,7 +2135,7 @@ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, ite->collection->collection_id; val = cpu_to_le64(val); - return vgic_its_write_entry_lock(its, gpa, val, ite_esz); + return vgic_its_write_entry_lock(its, gpa, val, ite); } /** @@ -2201,7 +2235,7 @@ static int vgic_its_save_itt(struct vgic_its *its, struct its_device *device) if (ite->irq->hw && !kvm_vgic_global_state.has_gicv4_1) return -EACCES; - ret = vgic_its_save_ite(its, device, ite, gpa, ite_esz); + ret = vgic_its_save_ite(its, device, ite, gpa); if (ret) return ret; } @@ -2240,10 +2274,9 @@ static int vgic_its_restore_itt(struct vgic_its *its, struct its_device *dev) * @its: ITS handle * @dev: ITS device * @ptr: GPA - * @dte_esz: device table entry size */ static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev, - gpa_t ptr, int dte_esz) + gpa_t ptr) { u64 val, itt_addr_field; u32 next_offset; @@ -2256,7 +2289,7 @@ static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev, (dev->num_eventid_bits - 1)); val = cpu_to_le64(val); - return vgic_its_write_entry_lock(its, ptr, val, dte_esz); + return vgic_its_write_entry_lock(its, ptr, val, dte); } /** @@ -2332,10 +2365,8 @@ static int vgic_its_device_cmp(void *priv, const struct list_head *a, */ static int vgic_its_save_device_tables(struct vgic_its *its) { - const struct vgic_its_abi *abi = vgic_its_get_abi(its); u64 baser = its->baser_device_table; struct its_device *dev; - int dte_esz = abi->dte_esz; if (!(baser & GITS_BASER_VALID)) return 0; @@ -2354,7 +2385,7 @@ static int vgic_its_save_device_tables(struct vgic_its *its) if (ret) return ret; - ret = vgic_its_save_dte(its, dev, eaddr, dte_esz); + ret = vgic_its_save_dte(its, dev, eaddr); if (ret) return ret; } @@ -2435,7 +2466,7 @@ static int vgic_its_restore_device_tables(struct vgic_its *its) static int vgic_its_save_cte(struct vgic_its *its, struct its_collection *collection, - gpa_t gpa, int esz) + gpa_t gpa) { u64 val; @@ -2444,7 +2475,7 @@ static int vgic_its_save_cte(struct vgic_its *its, collection->collection_id); val = cpu_to_le64(val); - return vgic_its_write_entry_lock(its, gpa, val, esz); + return vgic_its_write_entry_lock(its, gpa, val, cte); } /* @@ -2452,7 +2483,7 @@ static int vgic_its_save_cte(struct vgic_its *its, * Return +1 on success, 0 if the entry was invalid (which should be * interpreted as end-of-table), and a negative error value for generic errors. */ -static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) +static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa) { struct its_collection *collection; struct kvm *kvm = its->dev->kvm; @@ -2460,7 +2491,7 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) u64 val; int ret; - ret = vgic_its_read_entry_lock(its, gpa, &val, esz); + ret = vgic_its_read_entry_lock(its, gpa, &val, cte); if (ret) return ret; val = le64_to_cpu(val); @@ -2507,7 +2538,7 @@ static int vgic_its_save_collection_table(struct vgic_its *its) max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; list_for_each_entry(collection, &its->collection_list, coll_list) { - ret = vgic_its_save_cte(its, collection, gpa, cte_esz); + ret = vgic_its_save_cte(its, collection, gpa); if (ret) return ret; gpa += cte_esz; @@ -2521,7 +2552,7 @@ static int vgic_its_save_collection_table(struct vgic_its *its) * table is not fully filled, add a last dummy element * with valid bit unset */ - return vgic_its_write_entry_lock(its, gpa, 0, cte_esz); + return vgic_its_write_entry_lock(its, gpa, 0ULL, cte); } /* @@ -2546,7 +2577,7 @@ static int vgic_its_restore_collection_table(struct vgic_its *its) max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; while (read < max_size) { - ret = vgic_its_restore_cte(its, gpa, cte_esz); + ret = vgic_its_restore_cte(its, gpa); if (ret <= 0) break; gpa += cte_esz; diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 8290f3276cf07..122d95b4e2845 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -146,29 +146,6 @@ static inline int vgic_write_guest_lock(struct kvm *kvm, gpa_t gpa, return ret; } -static inline int vgic_its_read_entry_lock(struct vgic_its *its, gpa_t eaddr, - u64 *eval, unsigned long esize) -{ - struct kvm *kvm = its->dev->kvm; - - if (KVM_BUG_ON(esize != sizeof(*eval), kvm)) - return -EINVAL; - - return kvm_read_guest_lock(kvm, eaddr, eval, esize); - -} - -static inline int vgic_its_write_entry_lock(struct vgic_its *its, gpa_t eaddr, - u64 eval, unsigned long esize) -{ - struct kvm *kvm = its->dev->kvm; - - if (KVM_BUG_ON(esize != sizeof(eval), kvm)) - return -EINVAL; - - return vgic_write_guest_lock(kvm, eaddr, &eval, esize); -} - /* * This struct provides an intermediate representation of the fields contained * in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC From 0f3a0f23f5621b9a5a28c9235c950caf6e2012d5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 20 Nov 2024 11:15:16 +0000 Subject: [PATCH 05/11] KVM: arm64: Mark set_sysreg_masks() as inline to avoid build failure When compiling with CONFIG_CC_OPTIMIZE_FOR_SIZE=y, set_sysreg_masks() fails to compile thanks to: BUILD_BUG_ON(!__builtin_constant_p(sr)); as the compiler doesn't identify sr as a constant, despite all the callers passing constants. Fix the issue by always inlining this function, which allows GCC to do the right thing. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202411201857.ZNudtGJl-lkp@intel.com/ Fixes: a0162020095e2 ("KVM: arm64: Extend masking facility to arbitrary registers") Signed-off-by: Marc Zyngier Reviewed-by: Joey Gouly Link: https://lore.kernel.org/r/20241120111516.304250-1-maz@kernel.org Signed-off-by: Oliver Upton --- arch/arm64/kvm/nested.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index aeaa6017ffd89..9b36218b48def 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -951,7 +951,7 @@ u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *vcpu, return v; } -static void set_sysreg_masks(struct kvm *kvm, int sr, u64 res0, u64 res1) +static __always_inline void set_sysreg_masks(struct kvm *kvm, int sr, u64 res0, u64 res1) { int i = sr - __SANITISED_REG_START__; From 54bbee190d42166209185d89070c58a343bf514b Mon Sep 17 00:00:00 2001 From: Raghavendra Rao Ananta Date: Tue, 19 Nov 2024 16:52:29 -0800 Subject: [PATCH 06/11] KVM: arm64: Ignore PMCNTENSET_EL0 while checking for overflow status DDI0487K.a D13.3.1 describes the PMU overflow condition, which evaluates to true if any counter's global enable (PMCR_EL0.E), overflow flag (PMOVSSET_EL0[n]), and interrupt enable (PMINTENSET_EL1[n]) are all 1. Of note, this does not require a counter to be enabled (i.e. PMCNTENSET_EL0[n] = 1) to generate an overflow. Align kvm_pmu_overflow_status() with the reality of the architecture and stop using PMCNTENSET_EL0 as part of the overflow condition. The bug was discovered while running an SBSA PMU test [*], which only sets PMCR.E, PMOVSSET<0>, PMINTENSET<0>, and expects an overflow interrupt. Cc: stable@vger.kernel.org Fixes: 76d883c4e640 ("arm64: KVM: Add access handler for PMOVSSET and PMOVSCLR register") Link: https://github.com/ARM-software/sbsa-acs/blob/master/test_pool/pmu/operating_system/test_pmu001.c Signed-off-by: Raghavendra Rao Ananta [ oliver: massaged changelog ] Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20241120005230.2335682-2-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/kvm/pmu-emul.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 8ad62284fa230..3855cc9d0ca52 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -381,7 +381,6 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) if ((kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E)) { reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); - reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); } From 13905f4547b050316262d54a5391d50e83ce613a Mon Sep 17 00:00:00 2001 From: Oliver Upton Date: Tue, 19 Nov 2024 16:52:30 -0800 Subject: [PATCH 07/11] KVM: arm64: Use MDCR_EL2.HPME to evaluate overflow of hyp counters The 'global enable control' (as it is termed in the architecture) for counters reserved by EL2 is MDCR_EL2.HPME. Use that instead of PMCR_EL0.E when evaluating the overflow state for hyp counters. Change the return value to a bool while at it, which better reflects the fact that the overflow state is a shared signal and not a per-counter property. Reviewed-by: Marc Zyngier Link: https://lore.kernel.org/r/20241120005230.2335682-3-oliver.upton@linux.dev Signed-off-by: Oliver Upton --- arch/arm64/kvm/pmu-emul.c | 61 +++++++++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 16 deletions(-) diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 3855cc9d0ca52..456102bc0b555 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -274,12 +274,23 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) irq_work_sync(&vcpu->arch.pmu.overflow_work); } -bool kvm_pmu_counter_is_hyp(struct kvm_vcpu *vcpu, unsigned int idx) +static u64 kvm_pmu_hyp_counter_mask(struct kvm_vcpu *vcpu) { - unsigned int hpmn; + unsigned int hpmn, n; - if (!vcpu_has_nv(vcpu) || idx == ARMV8_PMU_CYCLE_IDX) - return false; + if (!vcpu_has_nv(vcpu)) + return 0; + + hpmn = SYS_FIELD_GET(MDCR_EL2, HPMN, __vcpu_sys_reg(vcpu, MDCR_EL2)); + n = vcpu->kvm->arch.pmcr_n; + + /* + * Programming HPMN to a value greater than PMCR_EL0.N is + * CONSTRAINED UNPREDICTABLE. Make the implementation choice that an + * UNKNOWN number of counters (in our case, zero) are reserved for EL2. + */ + if (hpmn >= n) + return 0; /* * Programming HPMN=0 is CONSTRAINED UNPREDICTABLE if FEAT_HPMN0 isn't @@ -288,20 +299,22 @@ bool kvm_pmu_counter_is_hyp(struct kvm_vcpu *vcpu, unsigned int idx) * implementation choice that all counters are included in the second * range reserved for EL2/EL3. */ - hpmn = SYS_FIELD_GET(MDCR_EL2, HPMN, __vcpu_sys_reg(vcpu, MDCR_EL2)); - return idx >= hpmn; + return GENMASK(n - 1, hpmn); +} + +bool kvm_pmu_counter_is_hyp(struct kvm_vcpu *vcpu, unsigned int idx) +{ + return kvm_pmu_hyp_counter_mask(vcpu) & BIT(idx); } u64 kvm_pmu_accessible_counter_mask(struct kvm_vcpu *vcpu) { u64 mask = kvm_pmu_implemented_counter_mask(vcpu); - u64 hpmn; if (!vcpu_has_nv(vcpu) || vcpu_is_el2(vcpu)) return mask; - hpmn = SYS_FIELD_GET(MDCR_EL2, HPMN, __vcpu_sys_reg(vcpu, MDCR_EL2)); - return mask & ~GENMASK(vcpu->kvm->arch.pmcr_n - 1, hpmn); + return mask & ~kvm_pmu_hyp_counter_mask(vcpu); } u64 kvm_pmu_implemented_counter_mask(struct kvm_vcpu *vcpu) @@ -375,14 +388,30 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) } } -static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) +/* + * Returns the PMU overflow state, which is true if there exists an event + * counter where the values of the global enable control, PMOVSSET_EL0[n], and + * PMINTENSET_EL1[n] are all 1. + */ +static bool kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) { - u64 reg = 0; + u64 reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); - if ((kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E)) { - reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); - reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); - } + reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); + + /* + * PMCR_EL0.E is the global enable control for event counters available + * to EL0 and EL1. + */ + if (!(kvm_vcpu_read_pmcr(vcpu) & ARMV8_PMU_PMCR_E)) + reg &= kvm_pmu_hyp_counter_mask(vcpu); + + /* + * Otherwise, MDCR_EL2.HPME is the global enable control for event + * counters reserved for EL2. + */ + if (!(vcpu_read_sys_reg(vcpu, MDCR_EL2) & MDCR_EL2_HPME)) + reg &= ~kvm_pmu_hyp_counter_mask(vcpu); return reg; } @@ -395,7 +424,7 @@ static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) if (!kvm_vcpu_has_pmu(vcpu)) return; - overflow = !!kvm_pmu_overflow_status(vcpu); + overflow = kvm_pmu_overflow_status(vcpu); if (pmu->irq_level == overflow) return; From 94a7734d0967e89fac5be1fd5115f5194e4a4017 Mon Sep 17 00:00:00 2001 From: Yong-Xuan Wang Date: Fri, 26 Jul 2024 16:49:26 +0800 Subject: [PATCH 08/11] RISC-V: Add Svade and Svadu Extensions Support Svade and Svadu extensions represent two schemes for managing the PTE A/D bits. When the PTE A/D bits need to be set, Svade extension intdicates that a related page fault will be raised. In contrast, the Svadu extension supports hardware updating of PTE A/D bits. Since the Svade extension is mandatory and the Svadu extension is optional in RVA23 profile, by default the M-mode firmware will enable the Svadu extension in the menvcfg CSR when only Svadu is present in DT. This patch detects Svade and Svadu extensions from DT and adds arch_has_hw_pte_young() to enable optimization in MGLRU and __wp_page_copy_user() when we have the PTE A/D bits hardware updating support. Co-developed-by: Jinyu Tang Signed-off-by: Jinyu Tang Signed-off-by: Yong-Xuan Wang Reviewed-by: Andrew Jones Reviewed-by: Alexandre Ghiti Acked-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20240726084931.28924-2-yongxuan.wang@sifive.com Signed-off-by: Anup Patel --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/csr.h | 1 + arch/riscv/include/asm/hwcap.h | 2 ++ arch/riscv/include/asm/pgtable.h | 13 ++++++++++++- arch/riscv/kernel/cpufeature.c | 12 ++++++++++++ 5 files changed, 28 insertions(+), 1 deletion(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 62545946ecf43..2e499918e15ea 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -32,6 +32,7 @@ config RISCV select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE + select ARCH_HAS_HW_PTE_YOUNG select ARCH_HAS_KCOV select ARCH_HAS_KERNEL_FPU_SUPPORT if 64BIT && FPU select ARCH_HAS_MEMBARRIER_CALLBACKS diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 25966995da04e..524cd4131c713 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -195,6 +195,7 @@ /* xENVCFG flags */ #define ENVCFG_STCE (_AC(1, ULL) << 63) #define ENVCFG_PBMTE (_AC(1, ULL) << 62) +#define ENVCFG_ADUE (_AC(1, ULL) << 61) #define ENVCFG_CBZE (_AC(1, UL) << 7) #define ENVCFG_CBCFE (_AC(1, UL) << 6) #define ENVCFG_CBIE_SHIFT 4 diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index 46d9de54179ed..7f72789ba3d5d 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -93,6 +93,8 @@ #define RISCV_ISA_EXT_ZCMOP 84 #define RISCV_ISA_EXT_ZAWRS 85 #define RISCV_ISA_EXT_SVVPTC 86 +#define RISCV_ISA_EXT_SVADE 87 +#define RISCV_ISA_EXT_SVADU 88 #define RISCV_ISA_EXT_XLINUXENVCFG 127 diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index e79f15293492d..02aa7ee6def8b 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -113,6 +113,7 @@ #include #include #include +#include #define __page_val_to_pfn(_val) (((_val) & _PAGE_PFN_MASK) >> _PAGE_PFN_SHIFT) @@ -284,7 +285,6 @@ static inline pte_t pud_pte(pud_t pud) } #ifdef CONFIG_RISCV_ISA_SVNAPOT -#include static __always_inline bool has_svnapot(void) { @@ -655,6 +655,17 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot) return __pgprot(prot); } +/* + * Both Svade and Svadu control the hardware behavior when the PTE A/D bits need to be set. By + * default the M-mode firmware enables the hardware updating scheme when only Svadu is present in + * DT. + */ +#define arch_has_hw_pte_young arch_has_hw_pte_young +static inline bool arch_has_hw_pte_young(void) +{ + return riscv_has_extension_unlikely(RISCV_ISA_EXT_SVADU); +} + /* * THP functions */ diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 3a8eeaa9310c3..06ca264d48101 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -132,6 +132,16 @@ static int riscv_ext_zcf_validate(const struct riscv_isa_ext_data *data, return -EPROBE_DEFER; } +static int riscv_ext_svadu_validate(const struct riscv_isa_ext_data *data, + const unsigned long *isa_bitmap) +{ + /* SVADE has already been detected, use SVADE only */ + if (__riscv_isa_extension_available(isa_bitmap, RISCV_ISA_EXT_SVADE)) + return -EOPNOTSUPP; + + return 0; +} + static const unsigned int riscv_zk_bundled_exts[] = { RISCV_ISA_EXT_ZBKB, RISCV_ISA_EXT_ZBKC, @@ -378,6 +388,8 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(ssaia, RISCV_ISA_EXT_SSAIA), __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF), __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC), + __RISCV_ISA_EXT_DATA(svade, RISCV_ISA_EXT_SVADE), + __RISCV_ISA_EXT_DATA_VALIDATE(svadu, RISCV_ISA_EXT_SVADU, riscv_ext_svadu_validate), __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL), __RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT), __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT), From b8d481671703c4ba24bb250a99225e0e3d8aedac Mon Sep 17 00:00:00 2001 From: Yong-Xuan Wang Date: Fri, 26 Jul 2024 16:49:27 +0800 Subject: [PATCH 09/11] dt-bindings: riscv: Add Svade and Svadu Entries Add entries for the Svade and Svadu extensions to the riscv,isa-extensions property. Signed-off-by: Yong-Xuan Wang Acked-by: Conor Dooley Reviewed-by: Alexandre Ghiti Reviewed-by: Samuel Holland Acked-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20240726084931.28924-3-yongxuan.wang@sifive.com Signed-off-by: Anup Patel --- .../devicetree/bindings/riscv/extensions.yaml | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/Documentation/devicetree/bindings/riscv/extensions.yaml b/Documentation/devicetree/bindings/riscv/extensions.yaml index 2cf2026cff574..c5a8d154bb24e 100644 --- a/Documentation/devicetree/bindings/riscv/extensions.yaml +++ b/Documentation/devicetree/bindings/riscv/extensions.yaml @@ -153,6 +153,34 @@ properties: ratified at commit 3f9ed34 ("Add ability to manually trigger workflow. (#2)") of riscv-time-compare. + - const: svade + description: | + The standard Svade supervisor-level extension for SW-managed PTE A/D + bit updates as ratified in the 20240213 version of the privileged + ISA specification. + + Both Svade and Svadu extensions control the hardware behavior when + the PTE A/D bits need to be set. The default behavior for the four + possible combinations of these extensions in the device tree are: + 1) Neither Svade nor Svadu present in DT => It is technically + unknown whether the platform uses Svade or Svadu. Supervisor + software should be prepared to handle either hardware updating + of the PTE A/D bits or page faults when they need updated. + 2) Only Svade present in DT => Supervisor must assume Svade to be + always enabled. + 3) Only Svadu present in DT => Supervisor must assume Svadu to be + always enabled. + 4) Both Svade and Svadu present in DT => Supervisor must assume + Svadu turned-off at boot time. To use Svadu, supervisor must + explicitly enable it using the SBI FWFT extension. + + - const: svadu + description: | + The standard Svadu supervisor-level extension for hardware updating + of PTE A/D bits as ratified in the 20240528 version of the + privileged ISA specification. Please refer to Svade dt-binding + description for more details. + - const: svinval description: The standard Svinval supervisor-level extension for fine-grained From 97eccf7db4f2e5e59d16bca45f7803ae3aeff6e1 Mon Sep 17 00:00:00 2001 From: Yong-Xuan Wang Date: Fri, 26 Jul 2024 16:49:28 +0800 Subject: [PATCH 10/11] RISC-V: KVM: Add Svade and Svadu Extensions Support for Guest/VM We extend the KVM ISA extension ONE_REG interface to allow VMM tools to detect and enable Svade and Svadu extensions for Guest/VM. Since the henvcfg.ADUE is read-only zero if the menvcfg.ADUE is zero, the Svadu extension is available for Guest/VM and the Svade extension is allowed to disabledonly when arch_has_hw_pte_young() is true. Signed-off-by: Yong-Xuan Wang Reviewed-by: Andrew Jones Reviewed-by: Samuel Holland Acked-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20240726084931.28924-4-yongxuan.wang@sifive.com Signed-off-by: Anup Patel --- arch/riscv/include/uapi/asm/kvm.h | 2 ++ arch/riscv/kvm/vcpu.c | 4 ++++ arch/riscv/kvm/vcpu_onereg.c | 15 +++++++++++++++ 3 files changed, 21 insertions(+) diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index e97db3296456e..85bbc472989df 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -175,6 +175,8 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_ZCF, KVM_RISCV_ISA_EXT_ZCMOP, KVM_RISCV_ISA_EXT_ZAWRS, + KVM_RISCV_ISA_EXT_SVADE, + KVM_RISCV_ISA_EXT_SVADU, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index dc3f76f6e46ce..e048dcc6e65e7 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -551,6 +551,10 @@ static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu) if (riscv_isa_extension_available(isa, ZICBOZ)) cfg->henvcfg |= ENVCFG_CBZE; + if (riscv_isa_extension_available(isa, SVADU) && + !riscv_isa_extension_available(isa, SVADE)) + cfg->henvcfg |= ENVCFG_ADUE; + if (riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN)) { cfg->hstateen0 |= SMSTATEEN0_HSENVCFG; if (riscv_isa_extension_available(isa, SSAIA)) diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index b319c4c13c54c..b3f58908902a7 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #define KVM_RISCV_BASE_ISA_MASK GENMASK(25, 0) @@ -38,6 +39,8 @@ static const unsigned long kvm_isa_ext_arr[] = { KVM_ISA_EXT_ARR(SSAIA), KVM_ISA_EXT_ARR(SSCOFPMF), KVM_ISA_EXT_ARR(SSTC), + KVM_ISA_EXT_ARR(SVADE), + KVM_ISA_EXT_ARR(SVADU), KVM_ISA_EXT_ARR(SVINVAL), KVM_ISA_EXT_ARR(SVNAPOT), KVM_ISA_EXT_ARR(SVPBMT), @@ -110,6 +113,12 @@ static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_SSCOFPMF: /* Sscofpmf depends on interrupt filtering defined in ssaia */ return __riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSAIA); + case KVM_RISCV_ISA_EXT_SVADU: + /* + * The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero. + * Guest OS can use Svadu only when host OS enable Svadu. + */ + return arch_has_hw_pte_young(); case KVM_RISCV_ISA_EXT_V: return riscv_v_vstate_ctrl_user_allowed(); default: @@ -181,6 +190,12 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) /* Extensions which can be disabled using Smstateen */ case KVM_RISCV_ISA_EXT_SSAIA: return riscv_has_extension_unlikely(RISCV_ISA_EXT_SMSTATEEN); + case KVM_RISCV_ISA_EXT_SVADE: + /* + * The henvcfg.ADUE is read-only zero if menvcfg.ADUE is zero. + * Svade is not allowed to disable when the platform use Svade. + */ + return arch_has_hw_pte_young(); default: break; } From c74bfe4ffe8c1ca94e3d60ec7af06cf679e23583 Mon Sep 17 00:00:00 2001 From: Yong-Xuan Wang Date: Fri, 26 Jul 2024 16:49:30 +0800 Subject: [PATCH 11/11] KVM: riscv: selftests: Add Svade and Svadu Extension to get-reg-list test Update the get-reg-list test to test the Svade and Svadu Extensions are available for guest OS. Signed-off-by: Yong-Xuan Wang Reviewed-by: Andrew Jones Acked-by: Palmer Dabbelt Link: https://lore.kernel.org/r/20240726084931.28924-6-yongxuan.wang@sifive.com Signed-off-by: Anup Patel --- tools/testing/selftests/kvm/riscv/get-reg-list.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index 8e34f7fa44e94..aac40652e1817 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -45,6 +45,8 @@ bool filter_reg(__u64 reg) case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSAIA: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSCOFPMF: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSTC: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVADE: + case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVADU: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVINVAL: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT: case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT: @@ -418,6 +420,8 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off) KVM_ISA_EXT_ARR(SSAIA), KVM_ISA_EXT_ARR(SSCOFPMF), KVM_ISA_EXT_ARR(SSTC), + KVM_ISA_EXT_ARR(SVADE), + KVM_ISA_EXT_ARR(SVADU), KVM_ISA_EXT_ARR(SVINVAL), KVM_ISA_EXT_ARR(SVNAPOT), KVM_ISA_EXT_ARR(SVPBMT), @@ -949,6 +953,8 @@ KVM_ISA_EXT_SIMPLE_CONFIG(h, H); KVM_ISA_EXT_SUBLIST_CONFIG(smstateen, SMSTATEEN); KVM_ISA_EXT_SIMPLE_CONFIG(sscofpmf, SSCOFPMF); KVM_ISA_EXT_SIMPLE_CONFIG(sstc, SSTC); +KVM_ISA_EXT_SIMPLE_CONFIG(svade, SVADE); +KVM_ISA_EXT_SIMPLE_CONFIG(svadu, SVADU); KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL); KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT); KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT); @@ -1012,6 +1018,8 @@ struct vcpu_reg_list *vcpu_configs[] = { &config_smstateen, &config_sscofpmf, &config_sstc, + &config_svade, + &config_svadu, &config_svinval, &config_svnapot, &config_svpbmt,