diff --git a/Documentation/virt/kvm/devices/arm-vgic-v3.rst b/Documentation/virt/kvm/devices/arm-vgic-v3.rst index 5817edb4e0467..e860498b1e359 100644 --- a/Documentation/virt/kvm/devices/arm-vgic-v3.rst +++ b/Documentation/virt/kvm/devices/arm-vgic-v3.rst @@ -291,8 +291,18 @@ Groups: | Aff3 | Aff2 | Aff1 | Aff0 | Errors: - ======= ============================================= -EINVAL vINTID is not multiple of 32 or info field is not VGIC_LEVEL_INFO_LINE_LEVEL ======= ============================================= + + KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ + Attributes: + + The attr field of kvm_device_attr encodes the following values: + + bits: | 31 .... 5 | 4 .... 0 | + values: | RES0 | vINTID | + + The vINTID specifies which interrupt is generated when the vGIC + must generate a maintenance interrupt. This must be a PPI. diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 78ec1ef2cfe82..d4e4212ff5967 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -275,6 +275,19 @@ static __always_inline u64 kvm_vcpu_get_esr(const struct kvm_vcpu *vcpu) return vcpu->arch.fault.esr_el2; } +static inline bool guest_hyp_wfx_traps_enabled(const struct kvm_vcpu *vcpu) +{ + u64 esr = kvm_vcpu_get_esr(vcpu); + bool is_wfe = !!(esr & ESR_ELx_WFx_ISS_WFE); + u64 hcr_el2 = __vcpu_sys_reg(vcpu, HCR_EL2); + + if (!vcpu_has_nv(vcpu) || vcpu_is_el2(vcpu)) + return false; + + return ((is_wfe && (hcr_el2 & HCR_TWE)) || + (!is_wfe && (hcr_el2 & HCR_TWI))); +} + static __always_inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu) { u64 esr = kvm_vcpu_get_esr(vcpu); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index d919557af5e50..a316af8f1e0d5 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -44,14 +44,15 @@ #define KVM_REQ_SLEEP \ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) -#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) -#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) -#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3) -#define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4) -#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5) -#define KVM_REQ_SUSPEND KVM_ARCH_REQ(6) -#define KVM_REQ_RESYNC_PMU_EL0 KVM_ARCH_REQ(7) -#define KVM_REQ_NESTED_S2_UNMAP KVM_ARCH_REQ(8) +#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) +#define KVM_REQ_VCPU_RESET KVM_ARCH_REQ(2) +#define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3) +#define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4) +#define KVM_REQ_RELOAD_PMU KVM_ARCH_REQ(5) +#define KVM_REQ_SUSPEND KVM_ARCH_REQ(6) +#define KVM_REQ_RESYNC_PMU_EL0 KVM_ARCH_REQ(7) +#define KVM_REQ_NESTED_S2_UNMAP KVM_ARCH_REQ(8) +#define KVM_REQ_GUEST_HYP_IRQ_PENDING KVM_ARCH_REQ(9) #define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ KVM_DIRTY_LOG_INITIALLY_SET) @@ -557,7 +558,33 @@ enum vcpu_sysreg { VNCR(CNTP_CVAL_EL0), VNCR(CNTP_CTL_EL0), + VNCR(ICH_LR0_EL2), + VNCR(ICH_LR1_EL2), + VNCR(ICH_LR2_EL2), + VNCR(ICH_LR3_EL2), + VNCR(ICH_LR4_EL2), + VNCR(ICH_LR5_EL2), + VNCR(ICH_LR6_EL2), + VNCR(ICH_LR7_EL2), + VNCR(ICH_LR8_EL2), + VNCR(ICH_LR9_EL2), + VNCR(ICH_LR10_EL2), + VNCR(ICH_LR11_EL2), + VNCR(ICH_LR12_EL2), + VNCR(ICH_LR13_EL2), + VNCR(ICH_LR14_EL2), + VNCR(ICH_LR15_EL2), + + VNCR(ICH_AP0R0_EL2), + VNCR(ICH_AP0R1_EL2), + VNCR(ICH_AP0R2_EL2), + VNCR(ICH_AP0R3_EL2), + VNCR(ICH_AP1R0_EL2), + VNCR(ICH_AP1R1_EL2), + VNCR(ICH_AP1R2_EL2), + VNCR(ICH_AP1R3_EL2), VNCR(ICH_HCR_EL2), + VNCR(ICH_VMCR_EL2), NR_SYS_REGS /* Nothing after this line! */ }; @@ -919,6 +946,8 @@ struct kvm_vcpu_arch { #define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(5)) /* WFI instruction trapped */ #define IN_WFI __vcpu_single_flag(sflags, BIT(6)) +/* KVM is currently emulating a nested ERET */ +#define IN_NESTED_ERET __vcpu_single_flag(sflags, BIT(7)) /* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index c838309e4ec47..e6be1f5d0967f 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -76,6 +76,8 @@ DECLARE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); +u64 __gic_v3_get_lr(unsigned int lr); + void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if); void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if); void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 05ea5223d2d55..9511f3faac462 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -562,9 +562,6 @@ #define SYS_ICH_VSEIR_EL2 sys_reg(3, 4, 12, 9, 4) #define SYS_ICC_SRE_EL2 sys_reg(3, 4, 12, 9, 5) -#define SYS_ICH_HCR_EL2 sys_reg(3, 4, 12, 11, 0) -#define SYS_ICH_VTR_EL2 sys_reg(3, 4, 12, 11, 1) -#define SYS_ICH_MISR_EL2 sys_reg(3, 4, 12, 11, 2) #define SYS_ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3) #define SYS_ICH_ELRSR_EL2 sys_reg(3, 4, 12, 11, 5) #define SYS_ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7) @@ -985,10 +982,6 @@ #define SYS_MPIDR_SAFE_VAL (BIT(31)) /* GIC Hypervisor interface registers */ -/* ICH_MISR_EL2 bit definitions */ -#define ICH_MISR_EOI (1 << 0) -#define ICH_MISR_U (1 << 1) - /* ICH_LR*_EL2 bit definitions */ #define ICH_LR_VIRTUAL_ID_MASK ((1ULL << 32) - 1) @@ -1003,17 +996,6 @@ #define ICH_LR_PRIORITY_SHIFT 48 #define ICH_LR_PRIORITY_MASK (0xffULL << ICH_LR_PRIORITY_SHIFT) -/* ICH_HCR_EL2 bit definitions */ -#define ICH_HCR_EN (1 << 0) -#define ICH_HCR_UIE (1 << 1) -#define ICH_HCR_NPIE (1 << 3) -#define ICH_HCR_TC (1 << 10) -#define ICH_HCR_TALL0 (1 << 11) -#define ICH_HCR_TALL1 (1 << 12) -#define ICH_HCR_TDIR (1 << 14) -#define ICH_HCR_EOIcount_SHIFT 27 -#define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT) - /* ICH_VMCR_EL2 bit definitions */ #define ICH_VMCR_ACK_CTL_SHIFT 2 #define ICH_VMCR_ACK_CTL_MASK (1 << ICH_VMCR_ACK_CTL_SHIFT) @@ -1034,18 +1016,6 @@ #define ICH_VMCR_ENG1_SHIFT 1 #define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT) -/* ICH_VTR_EL2 bit definitions */ -#define ICH_VTR_PRI_BITS_SHIFT 29 -#define ICH_VTR_PRI_BITS_MASK (7 << ICH_VTR_PRI_BITS_SHIFT) -#define ICH_VTR_ID_BITS_SHIFT 23 -#define ICH_VTR_ID_BITS_MASK (7 << ICH_VTR_ID_BITS_SHIFT) -#define ICH_VTR_SEIS_SHIFT 22 -#define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT) -#define ICH_VTR_A3V_SHIFT 21 -#define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT) -#define ICH_VTR_TDS_SHIFT 19 -#define ICH_VTR_TDS_MASK (1 << ICH_VTR_TDS_SHIFT) - /* * Permission Indirection Extension (PIE) permission encodings. * Encodings with the _O suffix, have overlays applied (Permission Overlay Extension). diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 568bf858f3198..fc5a641b3ed6c 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -403,6 +403,7 @@ enum { #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 #define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8 +#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ 9 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 3cf7adb2b5038..209bc76263f10 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -23,7 +23,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ vgic/vgic-v3.o vgic/vgic-v4.o \ vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \ vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \ - vgic/vgic-its.o vgic/vgic-debug.o + vgic/vgic-its.o vgic/vgic-debug.o vgic/vgic-v3-nested.o kvm-$(CONFIG_HW_PERF_EVENTS) += pmu-emul.o pmu.o kvm-$(CONFIG_ARM64_PTR_AUTH) += pauth.o diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index f2a65470349a1..f906fa3297cac 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -590,8 +590,12 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) nommu: vcpu->cpu = cpu; - kvm_vgic_load(vcpu); + /* + * The timer must be loaded before the vgic to correctly set up physical + * interrupt deactivation in nested state (e.g. timer interrupt). + */ kvm_timer_vcpu_load(vcpu); + kvm_vgic_load(vcpu); kvm_vcpu_load_debug(vcpu); if (has_vhe()) kvm_vcpu_load_vhe(vcpu); @@ -829,6 +833,12 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) if (ret) return ret; + if (vcpu_has_nv(vcpu)) { + ret = kvm_vgic_vcpu_nv_init(vcpu); + if (ret) + return ret; + } + /* * This needs to happen after any restriction has been applied * to the feature set. @@ -2311,6 +2321,13 @@ static int __init init_subsystems(void) goto out; } + if (kvm_mode == KVM_MODE_NV && + !(vgic_present && kvm_vgic_global_state.type == VGIC_V3)) { + kvm_err("NV support requires GICv3, giving up\n"); + err = -EINVAL; + goto out; + } + /* * Init HYP architected timer support */ diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c index 607d37bab70b4..834c587500699 100644 --- a/arch/arm64/kvm/emulate-nested.c +++ b/arch/arm64/kvm/emulate-nested.c @@ -412,26 +412,26 @@ static const struct trap_bits coarse_trap_bits[] = { }, [CGT_ICH_HCR_TC] = { .index = ICH_HCR_EL2, - .value = ICH_HCR_TC, - .mask = ICH_HCR_TC, + .value = ICH_HCR_EL2_TC, + .mask = ICH_HCR_EL2_TC, .behaviour = BEHAVE_FORWARD_RW, }, [CGT_ICH_HCR_TALL0] = { .index = ICH_HCR_EL2, - .value = ICH_HCR_TALL0, - .mask = ICH_HCR_TALL0, + .value = ICH_HCR_EL2_TALL0, + .mask = ICH_HCR_EL2_TALL0, .behaviour = BEHAVE_FORWARD_RW, }, [CGT_ICH_HCR_TALL1] = { .index = ICH_HCR_EL2, - .value = ICH_HCR_TALL1, - .mask = ICH_HCR_TALL1, + .value = ICH_HCR_EL2_TALL1, + .mask = ICH_HCR_EL2_TALL1, .behaviour = BEHAVE_FORWARD_RW, }, [CGT_ICH_HCR_TDIR] = { .index = ICH_HCR_EL2, - .value = ICH_HCR_TDIR, - .mask = ICH_HCR_TDIR, + .value = ICH_HCR_EL2_TDIR, + .mask = ICH_HCR_EL2_TDIR, .behaviour = BEHAVE_FORWARD_RW, }, }; @@ -2503,6 +2503,7 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu) } preempt_disable(); + vcpu_set_flag(vcpu, IN_NESTED_ERET); kvm_arch_vcpu_put(vcpu); if (!esr_iss_is_eretax(esr)) @@ -2514,6 +2515,7 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu) *vcpu_cpsr(vcpu) = spsr; kvm_arch_vcpu_load(vcpu, smp_processor_id()); + vcpu_clear_flag(vcpu, IN_NESTED_ERET); preempt_enable(); kvm_pmu_nested_transition(vcpu); diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 512d152233ff2..b73dc26bc44b4 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -129,8 +129,12 @@ static int kvm_handle_fpasimd(struct kvm_vcpu *vcpu) static int kvm_handle_wfx(struct kvm_vcpu *vcpu) { u64 esr = kvm_vcpu_get_esr(vcpu); + bool is_wfe = !!(esr & ESR_ELx_WFx_ISS_WFE); - if (esr & ESR_ELx_WFx_ISS_WFE) { + if (guest_hyp_wfx_traps_enabled(vcpu)) + return kvm_inject_nested_sync(vcpu, kvm_vcpu_get_esr(vcpu)); + + if (is_wfe) { trace_kvm_wfx_arm64(*vcpu_pc(vcpu), true); vcpu->stat.wfe_exit_stat++; } else { diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 3f9741e51d41b..ed363aa3027e5 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -18,7 +18,7 @@ #define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1) #define vtr_to_nr_apr_regs(v) (1 << (vtr_to_nr_pre_bits(v) - 5)) -static u64 __gic_v3_get_lr(unsigned int lr) +u64 __gic_v3_get_lr(unsigned int lr) { switch (lr & 0xf) { case 0: @@ -218,7 +218,7 @@ void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if) elrsr = read_gicreg(ICH_ELRSR_EL2); - write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EN, ICH_HCR_EL2); + write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EL2_En, ICH_HCR_EL2); for (i = 0; i < used_lrs; i++) { if (elrsr & (1 << i)) @@ -274,7 +274,7 @@ void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if) * system registers to trap to EL1 (duh), force ICC_SRE_EL1.SRE to 1 * so that the trap bits can take effect. Yes, we *loves* the GIC. */ - if (!(cpu_if->vgic_hcr & ICH_HCR_EN)) { + if (!(cpu_if->vgic_hcr & ICH_HCR_EL2_En)) { write_gicreg(ICC_SRE_EL1_SRE, ICC_SRE_EL1); isb(); } else if (!cpu_if->vgic_sre) { @@ -752,7 +752,7 @@ static void __vgic_v3_bump_eoicount(void) u32 hcr; hcr = read_gicreg(ICH_HCR_EL2); - hcr += 1 << ICH_HCR_EOIcount_SHIFT; + hcr += 1 << ICH_HCR_EL2_EOIcount_SHIFT; write_gicreg(hcr, ICH_HCR_EL2); } @@ -1069,7 +1069,7 @@ static bool __vgic_v3_check_trap_forwarding(struct kvm_vcpu *vcpu, case SYS_ICC_EOIR0_EL1: case SYS_ICC_HPPIR0_EL1: case SYS_ICC_IAR0_EL1: - return ich_hcr & ICH_HCR_TALL0; + return ich_hcr & ICH_HCR_EL2_TALL0; case SYS_ICC_IGRPEN1_EL1: if (is_read && @@ -1090,10 +1090,10 @@ static bool __vgic_v3_check_trap_forwarding(struct kvm_vcpu *vcpu, case SYS_ICC_EOIR1_EL1: case SYS_ICC_HPPIR1_EL1: case SYS_ICC_IAR1_EL1: - return ich_hcr & ICH_HCR_TALL1; + return ich_hcr & ICH_HCR_EL2_TALL1; case SYS_ICC_DIR_EL1: - if (ich_hcr & ICH_HCR_TDIR) + if (ich_hcr & ICH_HCR_EL2_TDIR) return true; fallthrough; @@ -1101,7 +1101,7 @@ static bool __vgic_v3_check_trap_forwarding(struct kvm_vcpu *vcpu, case SYS_ICC_RPR_EL1: case SYS_ICC_CTLR_EL1: case SYS_ICC_PMR_EL1: - return ich_hcr & ICH_HCR_TC; + return ich_hcr & ICH_HCR_EL2_TC; default: return false; diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index 0c9387d2f5070..722e61e410e2e 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -1290,6 +1290,15 @@ int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu) res0 |= GENMASK(11, 8); set_sysreg_masks(kvm, CNTHCTL_EL2, res0, res1); + /* ICH_HCR_EL2 */ + res0 = ICH_HCR_EL2_RES0; + res1 = ICH_HCR_EL2_RES1; + if (!(kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_EL2_TDS)) + res0 |= ICH_HCR_EL2_TDIR; + /* No GICv4 is presented to the guest */ + res0 |= ICH_HCR_EL2_DVIM | ICH_HCR_EL2_vSGIEOICount; + set_sysreg_masks(kvm, ICH_HCR_EL2, res0, res1); + out: for (enum vcpu_sysreg sr = __SANITISED_REG_START__; sr < NR_SYS_REGS; sr++) (void)__vcpu_sys_reg(vcpu, sr); @@ -1309,4 +1318,8 @@ void check_nested_vcpu_requests(struct kvm_vcpu *vcpu) } write_unlock(&vcpu->kvm->mmu_lock); } + + /* Must be last, as may switch context! */ + if (kvm_check_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu)) + kvm_inject_nested_irq(vcpu); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 82430c1e1dd02..84248fabd418c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -531,7 +532,13 @@ static bool access_gic_sre(struct kvm_vcpu *vcpu, if (p->is_write) return ignore_write(vcpu, p); - p->regval = vcpu->arch.vgic_cpu.vgic_v3.vgic_sre; + if (p->Op1 == 4) { /* ICC_SRE_EL2 */ + p->regval = (ICC_SRE_EL2_ENABLE | ICC_SRE_EL2_SRE | + ICC_SRE_EL1_DIB | ICC_SRE_EL1_DFB); + } else { /* ICC_SRE_EL1 */ + p->regval = vcpu->arch.vgic_cpu.vgic_v3.vgic_sre; + } + return true; } @@ -2426,6 +2433,59 @@ static bool access_zcr_el2(struct kvm_vcpu *vcpu, vq = SYS_FIELD_GET(ZCR_ELx, LEN, p->regval) + 1; vq = min(vq, vcpu_sve_max_vq(vcpu)); vcpu_write_sys_reg(vcpu, vq - 1, ZCR_EL2); + + return true; +} + +static bool access_gic_vtr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return write_to_read_only(vcpu, p, r); + + p->regval = kvm_vgic_global_state.ich_vtr_el2; + p->regval &= ~(ICH_VTR_EL2_DVIM | + ICH_VTR_EL2_A3V | + ICH_VTR_EL2_IDbits); + p->regval |= ICH_VTR_EL2_nV4; + + return true; +} + +static bool access_gic_misr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return write_to_read_only(vcpu, p, r); + + p->regval = vgic_v3_get_misr(vcpu); + + return true; +} + +static bool access_gic_eisr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return write_to_read_only(vcpu, p, r); + + p->regval = vgic_v3_get_eisr(vcpu); + + return true; +} + +static bool access_gic_elrsr(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) + return write_to_read_only(vcpu, p, r); + + p->regval = vgic_v3_get_elrsr(vcpu); + return true; } @@ -3102,7 +3162,40 @@ static const struct sys_reg_desc sys_reg_descs[] = { EL2_REG(RVBAR_EL2, access_rw, reset_val, 0), { SYS_DESC(SYS_RMR_EL2), undef_access }, + EL2_REG_VNCR(ICH_AP0R0_EL2, reset_val, 0), + EL2_REG_VNCR(ICH_AP0R1_EL2, reset_val, 0), + EL2_REG_VNCR(ICH_AP0R2_EL2, reset_val, 0), + EL2_REG_VNCR(ICH_AP0R3_EL2, reset_val, 0), + EL2_REG_VNCR(ICH_AP1R0_EL2, reset_val, 0), + EL2_REG_VNCR(ICH_AP1R1_EL2, reset_val, 0), + EL2_REG_VNCR(ICH_AP1R2_EL2, reset_val, 0), + EL2_REG_VNCR(ICH_AP1R3_EL2, reset_val, 0), + + { SYS_DESC(SYS_ICC_SRE_EL2), access_gic_sre }, + EL2_REG_VNCR(ICH_HCR_EL2, reset_val, 0), + { SYS_DESC(SYS_ICH_VTR_EL2), access_gic_vtr }, + { SYS_DESC(SYS_ICH_MISR_EL2), access_gic_misr }, + { SYS_DESC(SYS_ICH_EISR_EL2), access_gic_eisr }, + { SYS_DESC(SYS_ICH_ELRSR_EL2), access_gic_elrsr }, + EL2_REG_VNCR(ICH_VMCR_EL2, reset_val, 0), + + EL2_REG_VNCR(ICH_LR0_EL2, reset_val, 0), + EL2_REG_VNCR(ICH_LR1_EL2, reset_val, 0), + EL2_REG_VNCR(ICH_LR2_EL2, reset_val, 0), + EL2_REG_VNCR(ICH_LR3_EL2, reset_val, 0), + EL2_REG_VNCR(ICH_LR4_EL2, reset_val, 0), + EL2_REG_VNCR(ICH_LR5_EL2, reset_val, 0), + EL2_REG_VNCR(ICH_LR6_EL2, reset_val, 0), + EL2_REG_VNCR(ICH_LR7_EL2, reset_val, 0), + EL2_REG_VNCR(ICH_LR8_EL2, reset_val, 0), + EL2_REG_VNCR(ICH_LR9_EL2, reset_val, 0), + EL2_REG_VNCR(ICH_LR10_EL2, reset_val, 0), + EL2_REG_VNCR(ICH_LR11_EL2, reset_val, 0), + EL2_REG_VNCR(ICH_LR12_EL2, reset_val, 0), + EL2_REG_VNCR(ICH_LR13_EL2, reset_val, 0), + EL2_REG_VNCR(ICH_LR14_EL2, reset_val, 0), + EL2_REG_VNCR(ICH_LR15_EL2, reset_val, 0), EL2_REG(CONTEXTIDR_EL2, access_rw, reset_val, 0), EL2_REG(TPIDR_EL2, access_rw, reset_val, 0), diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c index 9e7c486b48c2e..5eacb4b3250a1 100644 --- a/arch/arm64/kvm/vgic-sys-reg-v3.c +++ b/arch/arm64/kvm/vgic-sys-reg-v3.c @@ -35,12 +35,12 @@ static int set_gic_ctlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, vgic_v3_cpu->num_id_bits = host_id_bits; - host_seis = FIELD_GET(ICH_VTR_SEIS_MASK, kvm_vgic_global_state.ich_vtr_el2); + host_seis = FIELD_GET(ICH_VTR_EL2_SEIS, kvm_vgic_global_state.ich_vtr_el2); seis = FIELD_GET(ICC_CTLR_EL1_SEIS_MASK, val); if (host_seis != seis) return -EINVAL; - host_a3v = FIELD_GET(ICH_VTR_A3V_MASK, kvm_vgic_global_state.ich_vtr_el2); + host_a3v = FIELD_GET(ICH_VTR_EL2_A3V, kvm_vgic_global_state.ich_vtr_el2); a3v = FIELD_GET(ICC_CTLR_EL1_A3V_MASK, val); if (host_a3v != a3v) return -EINVAL; @@ -68,10 +68,10 @@ static int get_gic_ctlr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r, val |= FIELD_PREP(ICC_CTLR_EL1_PRI_BITS_MASK, vgic_v3_cpu->num_pri_bits - 1); val |= FIELD_PREP(ICC_CTLR_EL1_ID_BITS_MASK, vgic_v3_cpu->num_id_bits); val |= FIELD_PREP(ICC_CTLR_EL1_SEIS_MASK, - FIELD_GET(ICH_VTR_SEIS_MASK, + FIELD_GET(ICH_VTR_EL2_SEIS, kvm_vgic_global_state.ich_vtr_el2)); val |= FIELD_PREP(ICC_CTLR_EL1_A3V_MASK, - FIELD_GET(ICH_VTR_A3V_MASK, kvm_vgic_global_state.ich_vtr_el2)); + FIELD_GET(ICH_VTR_EL2_A3V, kvm_vgic_global_state.ich_vtr_el2)); /* * The VMCR.CTLR value is in ICC_CTLR_EL1 layout. * Extract it directly using ICC_CTLR_EL1 reg definitions. diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 775461cf2d2db..1f33e71c2a731 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -198,6 +198,27 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) return 0; } +/* Default GICv3 Maintenance Interrupt INTID, as per SBSA */ +#define DEFAULT_MI_INTID 25 + +int kvm_vgic_vcpu_nv_init(struct kvm_vcpu *vcpu) +{ + int ret; + + guard(mutex)(&vcpu->kvm->arch.config_lock); + + /* + * Matching the tradition established with the timers, provide + * a default PPI for the maintenance interrupt. It makes + * things easier to reason about. + */ + if (vcpu->kvm->arch.vgic.mi_intid == 0) + vcpu->kvm->arch.vgic.mi_intid = DEFAULT_MI_INTID; + ret = kvm_vgic_set_owner(vcpu, vcpu->kvm->arch.vgic.mi_intid, vcpu); + + return ret; +} + static int vgic_allocate_private_irqs_locked(struct kvm_vcpu *vcpu, u32 type) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; @@ -588,12 +609,20 @@ void kvm_vgic_cpu_down(void) static irqreturn_t vgic_maintenance_handler(int irq, void *data) { + struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)data; + /* * We cannot rely on the vgic maintenance interrupt to be * delivered synchronously. This means we can only use it to * exit the VM, and we perform the handling of EOIed * interrupts on the exit path (see vgic_fold_lr_state). + * + * Of course, NV throws a wrench in this plan, and needs + * something special. */ + if (vcpu && vgic_state_is_nested(vcpu)) + vgic_v3_handle_nested_maint_irq(vcpu); + return IRQ_HANDLED; } diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index 5f4f57aaa23ec..359094f68c23e 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -303,6 +303,12 @@ static int vgic_get_common_attr(struct kvm_device *dev, VGIC_NR_PRIVATE_IRQS, uaddr); break; } + case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + + r = put_user(dev->kvm->arch.vgic.mi_intid, uaddr); + break; + } } return r; @@ -517,7 +523,7 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, struct vgic_reg_attr reg_attr; gpa_t addr; struct kvm_vcpu *vcpu; - bool uaccess; + bool uaccess, post_init = true; u32 val; int ret; @@ -533,6 +539,9 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, /* Sysregs uaccess is performed by the sysreg handling code */ uaccess = false; break; + case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: + post_init = false; + fallthrough; default: uaccess = true; } @@ -552,7 +561,7 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, mutex_lock(&dev->kvm->arch.config_lock); - if (unlikely(!vgic_initialized(dev->kvm))) { + if (post_init != vgic_initialized(dev->kvm)) { ret = -EBUSY; goto out; } @@ -582,6 +591,19 @@ static int vgic_v3_attr_regs_access(struct kvm_device *dev, } break; } + case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: + if (!is_write) { + val = dev->kvm->arch.vgic.mi_intid; + ret = 0; + break; + } + + ret = -EINVAL; + if ((val < VGIC_NR_PRIVATE_IRQS) && (val >= VGIC_NR_SGIS)) { + dev->kvm->arch.vgic.mi_intid = val; + ret = 0; + } + break; default: ret = -EINVAL; break; @@ -608,6 +630,7 @@ static int vgic_v3_set_attr(struct kvm_device *dev, case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: + case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: return vgic_v3_attr_regs_access(dev, attr, true); default: return vgic_set_common_attr(dev, attr); @@ -622,6 +645,7 @@ static int vgic_v3_get_attr(struct kvm_device *dev, case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: + case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: return vgic_v3_attr_regs_access(dev, attr, false); default: return vgic_get_common_attr(dev, attr); @@ -645,6 +669,7 @@ static int vgic_v3_has_attr(struct kvm_device *dev, case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: return vgic_v3_has_attr_regs(dev, attr); case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: + case KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ: return 0; case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { if (((attr->attr & KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK) >> diff --git a/arch/arm64/kvm/vgic/vgic-v3-nested.c b/arch/arm64/kvm/vgic/vgic-v3-nested.c new file mode 100644 index 0000000000000..bfa5bde1f1067 --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-v3-nested.c @@ -0,0 +1,409 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include "vgic.h" + +#define ICH_LRN(n) (ICH_LR0_EL2 + (n)) +#define ICH_AP0RN(n) (ICH_AP0R0_EL2 + (n)) +#define ICH_AP1RN(n) (ICH_AP1R0_EL2 + (n)) + +struct mi_state { + u16 eisr; + u16 elrsr; + bool pend; +}; + +/* + * The shadow registers loaded to the hardware when running a L2 guest + * with the virtual IMO/FMO bits set. + */ +struct shadow_if { + struct vgic_v3_cpu_if cpuif; + unsigned long lr_map; +}; + +static DEFINE_PER_CPU(struct shadow_if, shadow_if); + +/* + * Nesting GICv3 support + * + * On a non-nesting VM (only running at EL0/EL1), the host hypervisor + * completely controls the interrupts injected via the list registers. + * Consequently, most of the state that is modified by the guest (by ACK-ing + * and EOI-ing interrupts) is synced by KVM on each entry/exit, so that we + * keep a semi-consistent view of the interrupts. + * + * This still applies for a NV guest, but only while "InHost" (either + * running at EL2, or at EL0 with HCR_EL2.{E2H.TGE}=={1,1}. + * + * When running a L2 guest ("not InHost"), things are radically different, + * as the L1 guest is in charge of provisioning the interrupts via its own + * view of the ICH_LR*_EL2 registers, which conveniently live in the VNCR + * page. This means that the flow described above does work (there is no + * state to rebuild in the L0 hypervisor), and that most things happed on L2 + * load/put: + * + * - on L2 load: move the in-memory L1 vGIC configuration into a shadow, + * per-CPU data structure that is used to populate the actual LRs. This is + * an extra copy that we could avoid, but life is short. In the process, + * we remap any interrupt that has the HW bit set to the mapped interrupt + * on the host, should the host consider it a HW one. This allows the HW + * deactivation to take its course, such as for the timer. + * + * - on L2 put: perform the inverse transformation, so that the result of L2 + * running becomes visible to L1 in the VNCR-accessible registers. + * + * - there is nothing to do on L2 entry, as everything will have happened + * on load. However, this is the point where we detect that an interrupt + * targeting L1 and prepare the grand switcheroo. + * + * - on L2 exit: emulate the HW bit, and deactivate corresponding the L1 + * interrupt. The L0 active state will be cleared by the HW if the L1 + * interrupt was itself backed by a HW interrupt. + * + * Maintenance Interrupt (MI) management: + * + * Since the L2 guest runs the vgic in its full glory, MIs get delivered and + * used as a handover point between L2 and L1. + * + * - on delivery of a MI to L0 while L2 is running: make the L1 MI pending, + * and let it rip. This will initiate a vcpu_put() on L2, and allow L1 to + * run and process the MI. + * + * - L1 MI is a fully virtual interrupt, not linked to the host's MI. Its + * state must be computed at each entry/exit of the guest, much like we do + * it for the PMU interrupt. + * + * - because most of the ICH_*_EL2 registers live in the VNCR page, the + * quality of emulation is poor: L1 can setup the vgic so that an MI would + * immediately fire, and not observe anything until the next exit. Trying + * to read ICH_MISR_EL2 would do the trick, for example. + * + * System register emulation: + * + * We get two classes of registers: + * + * - those backed by memory (LRs, APRs, HCR, VMCR): L1 can freely access + * them, and L0 doesn't see a thing. + * + * - those that always trap (ELRSR, EISR, MISR): these are status registers + * that are built on the fly based on the in-memory state. + * + * Only L1 can access the ICH_*_EL2 registers. A non-NV L2 obviously cannot, + * and a NV L2 would either access the VNCR page provided by L1 (memory + * based registers), or see the access redirected to L1 (registers that + * trap) thanks to NV being set by L1. + */ + +bool vgic_state_is_nested(struct kvm_vcpu *vcpu) +{ + u64 xmo; + + if (vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu)) { + xmo = __vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_IMO | HCR_FMO); + WARN_ONCE(xmo && xmo != (HCR_IMO | HCR_FMO), + "Separate virtual IRQ/FIQ settings not supported\n"); + + return !!xmo; + } + + return false; +} + +static struct shadow_if *get_shadow_if(void) +{ + return this_cpu_ptr(&shadow_if); +} + +static bool lr_triggers_eoi(u64 lr) +{ + return !(lr & (ICH_LR_STATE | ICH_LR_HW)) && (lr & ICH_LR_EOI); +} + +static void vgic_compute_mi_state(struct kvm_vcpu *vcpu, struct mi_state *mi_state) +{ + u16 eisr = 0, elrsr = 0; + bool pend = false; + + for (int i = 0; i < kvm_vgic_global_state.nr_lr; i++) { + u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i)); + + if (lr_triggers_eoi(lr)) + eisr |= BIT(i); + if (!(lr & ICH_LR_STATE)) + elrsr |= BIT(i); + pend |= (lr & ICH_LR_PENDING_BIT); + } + + mi_state->eisr = eisr; + mi_state->elrsr = elrsr; + mi_state->pend = pend; +} + +u16 vgic_v3_get_eisr(struct kvm_vcpu *vcpu) +{ + struct mi_state mi_state; + + vgic_compute_mi_state(vcpu, &mi_state); + return mi_state.eisr; +} + +u16 vgic_v3_get_elrsr(struct kvm_vcpu *vcpu) +{ + struct mi_state mi_state; + + vgic_compute_mi_state(vcpu, &mi_state); + return mi_state.elrsr; +} + +u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu) +{ + struct mi_state mi_state; + u64 reg = 0, hcr, vmcr; + + hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2); + vmcr = __vcpu_sys_reg(vcpu, ICH_VMCR_EL2); + + vgic_compute_mi_state(vcpu, &mi_state); + + if (mi_state.eisr) + reg |= ICH_MISR_EL2_EOI; + + if (__vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_UIE) { + int used_lrs = kvm_vgic_global_state.nr_lr; + + used_lrs -= hweight16(mi_state.elrsr); + reg |= (used_lrs <= 1) ? ICH_MISR_EL2_U : 0; + } + + if ((hcr & ICH_HCR_EL2_LRENPIE) && FIELD_GET(ICH_HCR_EL2_EOIcount_MASK, hcr)) + reg |= ICH_MISR_EL2_LRENP; + + if ((hcr & ICH_HCR_EL2_NPIE) && !mi_state.pend) + reg |= ICH_MISR_EL2_NP; + + if ((hcr & ICH_HCR_EL2_VGrp0EIE) && (vmcr & ICH_VMCR_ENG0_MASK)) + reg |= ICH_MISR_EL2_VGrp0E; + + if ((hcr & ICH_HCR_EL2_VGrp0DIE) && !(vmcr & ICH_VMCR_ENG0_MASK)) + reg |= ICH_MISR_EL2_VGrp0D; + + if ((hcr & ICH_HCR_EL2_VGrp1EIE) && (vmcr & ICH_VMCR_ENG1_MASK)) + reg |= ICH_MISR_EL2_VGrp1E; + + if ((hcr & ICH_HCR_EL2_VGrp1DIE) && !(vmcr & ICH_VMCR_ENG1_MASK)) + reg |= ICH_MISR_EL2_VGrp1D; + + return reg; +} + +/* + * For LRs which have HW bit set such as timer interrupts, we modify them to + * have the host hardware interrupt number instead of the virtual one programmed + * by the guest hypervisor. + */ +static void vgic_v3_create_shadow_lr(struct kvm_vcpu *vcpu, + struct vgic_v3_cpu_if *s_cpu_if) +{ + unsigned long lr_map = 0; + int index = 0; + + for (int i = 0; i < kvm_vgic_global_state.nr_lr; i++) { + u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i)); + struct vgic_irq *irq; + + if (!(lr & ICH_LR_STATE)) + lr = 0; + + if (!(lr & ICH_LR_HW)) + goto next; + + /* We have the HW bit set, check for validity of pINTID */ + irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); + if (!irq || !irq->hw || irq->intid > VGIC_MAX_SPI ) { + /* There was no real mapping, so nuke the HW bit */ + lr &= ~ICH_LR_HW; + if (irq) + vgic_put_irq(vcpu->kvm, irq); + goto next; + } + + /* It is illegal to have the EOI bit set with HW */ + lr &= ~ICH_LR_EOI; + + /* Translate the virtual mapping to the real one */ + lr &= ~ICH_LR_PHYS_ID_MASK; + lr |= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid); + + vgic_put_irq(vcpu->kvm, irq); + +next: + s_cpu_if->vgic_lr[index] = lr; + if (lr) { + lr_map |= BIT(i); + index++; + } + } + + container_of(s_cpu_if, struct shadow_if, cpuif)->lr_map = lr_map; + s_cpu_if->used_lrs = index; +} + +void vgic_v3_sync_nested(struct kvm_vcpu *vcpu) +{ + struct shadow_if *shadow_if = get_shadow_if(); + int i, index = 0; + + for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) { + u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i)); + struct vgic_irq *irq; + + if (!(lr & ICH_LR_HW) || !(lr & ICH_LR_STATE)) + goto next; + + /* + * If we had a HW lr programmed by the guest hypervisor, we + * need to emulate the HW effect between the guest hypervisor + * and the nested guest. + */ + irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr)); + if (WARN_ON(!irq)) /* Shouldn't happen as we check on load */ + goto next; + + lr = __gic_v3_get_lr(index); + if (!(lr & ICH_LR_STATE)) + irq->active = false; + + vgic_put_irq(vcpu->kvm, irq); + next: + index++; + } +} + +static void vgic_v3_create_shadow_state(struct kvm_vcpu *vcpu, + struct vgic_v3_cpu_if *s_cpu_if) +{ + struct vgic_v3_cpu_if *host_if = &vcpu->arch.vgic_cpu.vgic_v3; + u64 val = 0; + int i; + + /* + * If we're on a system with a broken vgic that requires + * trapping, propagate the trapping requirements. + * + * Ah, the smell of rotten fruits... + */ + if (static_branch_unlikely(&vgic_v3_cpuif_trap)) + val = host_if->vgic_hcr & (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 | + ICH_HCR_EL2_TC | ICH_HCR_EL2_TDIR); + s_cpu_if->vgic_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2) | val; + s_cpu_if->vgic_vmcr = __vcpu_sys_reg(vcpu, ICH_VMCR_EL2); + s_cpu_if->vgic_sre = host_if->vgic_sre; + + for (i = 0; i < 4; i++) { + s_cpu_if->vgic_ap0r[i] = __vcpu_sys_reg(vcpu, ICH_AP0RN(i)); + s_cpu_if->vgic_ap1r[i] = __vcpu_sys_reg(vcpu, ICH_AP1RN(i)); + } + + vgic_v3_create_shadow_lr(vcpu, s_cpu_if); +} + +void vgic_v3_load_nested(struct kvm_vcpu *vcpu) +{ + struct shadow_if *shadow_if = get_shadow_if(); + struct vgic_v3_cpu_if *cpu_if = &shadow_if->cpuif; + + BUG_ON(!vgic_state_is_nested(vcpu)); + + vgic_v3_create_shadow_state(vcpu, cpu_if); + + __vgic_v3_restore_vmcr_aprs(cpu_if); + __vgic_v3_activate_traps(cpu_if); + + __vgic_v3_restore_state(cpu_if); + + /* + * Propagate the number of used LRs for the benefit of the HYP + * GICv3 emulation code. Yes, this is a pretty sorry hack. + */ + vcpu->arch.vgic_cpu.vgic_v3.used_lrs = cpu_if->used_lrs; +} + +void vgic_v3_put_nested(struct kvm_vcpu *vcpu) +{ + struct shadow_if *shadow_if = get_shadow_if(); + struct vgic_v3_cpu_if *s_cpu_if = &shadow_if->cpuif; + u64 val; + int i; + + __vgic_v3_save_vmcr_aprs(s_cpu_if); + __vgic_v3_deactivate_traps(s_cpu_if); + __vgic_v3_save_state(s_cpu_if); + + /* + * Translate the shadow state HW fields back to the virtual ones + * before copying the shadow struct back to the nested one. + */ + val = __vcpu_sys_reg(vcpu, ICH_HCR_EL2); + val &= ~ICH_HCR_EL2_EOIcount_MASK; + val |= (s_cpu_if->vgic_hcr & ICH_HCR_EL2_EOIcount_MASK); + __vcpu_sys_reg(vcpu, ICH_HCR_EL2) = val; + __vcpu_sys_reg(vcpu, ICH_VMCR_EL2) = s_cpu_if->vgic_vmcr; + + for (i = 0; i < 4; i++) { + __vcpu_sys_reg(vcpu, ICH_AP0RN(i)) = s_cpu_if->vgic_ap0r[i]; + __vcpu_sys_reg(vcpu, ICH_AP1RN(i)) = s_cpu_if->vgic_ap1r[i]; + } + + for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) { + val = __vcpu_sys_reg(vcpu, ICH_LRN(i)); + + val &= ~ICH_LR_STATE; + val |= s_cpu_if->vgic_lr[i] & ICH_LR_STATE; + + __vcpu_sys_reg(vcpu, ICH_LRN(i)) = val; + s_cpu_if->vgic_lr[i] = 0; + } + + shadow_if->lr_map = 0; + vcpu->arch.vgic_cpu.vgic_v3.used_lrs = 0; +} + +/* + * If we exit a L2 VM with a pending maintenance interrupt from the GIC, + * then we need to forward this to L1 so that it can re-sync the appropriate + * LRs and sample level triggered interrupts again. + */ +void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu) +{ + bool state = read_sysreg_s(SYS_ICH_MISR_EL2); + + /* This will force a switch back to L1 if the level is high */ + kvm_vgic_inject_irq(vcpu->kvm, vcpu, + vcpu->kvm->arch.vgic.mi_intid, state, vcpu); + + sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EL2_En, 0); +} + +void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu) +{ + bool level; + + level = __vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En; + if (level) + level &= vgic_v3_get_misr(vcpu); + kvm_vgic_inject_irq(vcpu->kvm, vcpu, + vcpu->kvm->arch.vgic.mi_intid, level, vcpu); +} diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index d7233ab982d01..73a8a7df4bd23 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -24,7 +24,7 @@ void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; - cpuif->vgic_hcr |= ICH_HCR_UIE; + cpuif->vgic_hcr |= ICH_HCR_EL2_UIE; } static bool lr_signals_eoi_mi(u64 lr_val) @@ -42,7 +42,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); - cpuif->vgic_hcr &= ~ICH_HCR_UIE; + cpuif->vgic_hcr &= ~ICH_HCR_EL2_UIE; for (lr = 0; lr < cpuif->used_lrs; lr++) { u64 val = cpuif->vgic_lr[lr]; @@ -284,15 +284,13 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) vgic_v3->vgic_sre = 0; } - vcpu->arch.vgic_cpu.num_id_bits = (kvm_vgic_global_state.ich_vtr_el2 & - ICH_VTR_ID_BITS_MASK) >> - ICH_VTR_ID_BITS_SHIFT; - vcpu->arch.vgic_cpu.num_pri_bits = ((kvm_vgic_global_state.ich_vtr_el2 & - ICH_VTR_PRI_BITS_MASK) >> - ICH_VTR_PRI_BITS_SHIFT) + 1; + vcpu->arch.vgic_cpu.num_id_bits = FIELD_GET(ICH_VTR_EL2_IDbits, + kvm_vgic_global_state.ich_vtr_el2); + vcpu->arch.vgic_cpu.num_pri_bits = FIELD_GET(ICH_VTR_EL2_PRIbits, + kvm_vgic_global_state.ich_vtr_el2) + 1; /* Get the show on the road... */ - vgic_v3->vgic_hcr = ICH_HCR_EN; + vgic_v3->vgic_hcr = ICH_HCR_EL2_En; } void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu) @@ -301,18 +299,19 @@ void vcpu_set_ich_hcr(struct kvm_vcpu *vcpu) /* Hide GICv3 sysreg if necessary */ if (!kvm_has_gicv3(vcpu->kvm)) { - vgic_v3->vgic_hcr |= ICH_HCR_TALL0 | ICH_HCR_TALL1 | ICH_HCR_TC; + vgic_v3->vgic_hcr |= (ICH_HCR_EL2_TALL0 | ICH_HCR_EL2_TALL1 | + ICH_HCR_EL2_TC); return; } if (group0_trap) - vgic_v3->vgic_hcr |= ICH_HCR_TALL0; + vgic_v3->vgic_hcr |= ICH_HCR_EL2_TALL0; if (group1_trap) - vgic_v3->vgic_hcr |= ICH_HCR_TALL1; + vgic_v3->vgic_hcr |= ICH_HCR_EL2_TALL1; if (common_trap) - vgic_v3->vgic_hcr |= ICH_HCR_TC; + vgic_v3->vgic_hcr |= ICH_HCR_EL2_TC; if (dir_trap) - vgic_v3->vgic_hcr |= ICH_HCR_TDIR; + vgic_v3->vgic_hcr |= ICH_HCR_EL2_TDIR; } int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq) @@ -632,7 +631,7 @@ static const struct midr_range broken_seis[] = { static bool vgic_v3_broken_seis(void) { - return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) && + return ((kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_EL2_SEIS) && is_midr_in_range_list(read_cpuid_id(), broken_seis)); } @@ -706,10 +705,10 @@ int vgic_v3_probe(const struct gic_kvm_info *info) if (vgic_v3_broken_seis()) { kvm_info("GICv3 with broken locally generated SEI\n"); - kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_SEIS_MASK; + kvm_vgic_global_state.ich_vtr_el2 &= ~ICH_VTR_EL2_SEIS; group0_trap = true; group1_trap = true; - if (ich_vtr_el2 & ICH_VTR_TDS_MASK) + if (ich_vtr_el2 & ICH_VTR_EL2_TDS) dir_trap = true; else common_trap = true; @@ -735,6 +734,12 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + /* If the vgic is nested, perform the full state loading */ + if (vgic_state_is_nested(vcpu)) { + vgic_v3_load_nested(vcpu); + return; + } + if (likely(!is_protected_kvm_enabled())) kvm_call_hyp(__vgic_v3_restore_vmcr_aprs, cpu_if); @@ -748,6 +753,11 @@ void vgic_v3_put(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + if (vgic_state_is_nested(vcpu)) { + vgic_v3_put_nested(vcpu); + return; + } + if (likely(!is_protected_kvm_enabled())) kvm_call_hyp(__vgic_v3_save_vmcr_aprs, cpu_if); WARN_ON(vgic_v4_put(vcpu)); diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index 9f51d58b5f8a4..c7de6154627c4 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -336,6 +336,22 @@ void vgic_v4_teardown(struct kvm *kvm) its_vm->vpes = NULL; } +static inline bool vgic_v4_want_doorbell(struct kvm_vcpu *vcpu) +{ + if (vcpu_get_flag(vcpu, IN_WFI)) + return true; + + if (likely(!vcpu_has_nv(vcpu))) + return false; + + /* + * GICv4 hardware is only ever used for the L1. Mark the vPE (i.e. the + * L1 context) nonresident and request a doorbell to kick us out of the + * L2 when an IRQ becomes pending. + */ + return vcpu_get_flag(vcpu, IN_NESTED_ERET); +} + int vgic_v4_put(struct kvm_vcpu *vcpu) { struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; @@ -343,7 +359,7 @@ int vgic_v4_put(struct kvm_vcpu *vcpu) if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident) return 0; - return its_make_vpe_non_resident(vpe, !!vcpu_get_flag(vcpu, IN_WFI)); + return its_make_vpe_non_resident(vpe, vgic_v4_want_doorbell(vcpu)); } int vgic_v4_load(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index cc8c6b9b5dd8b..8f8096d489252 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -872,6 +872,15 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { int used_lrs; + /* If nesting, emulate the HW effect from L0 to L1 */ + if (vgic_state_is_nested(vcpu)) { + vgic_v3_sync_nested(vcpu); + return; + } + + if (vcpu_has_nv(vcpu)) + vgic_v3_nested_update_mi(vcpu); + /* An empty ap_list_head implies used_lrs == 0 */ if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) return; @@ -900,6 +909,35 @@ static inline void vgic_restore_state(struct kvm_vcpu *vcpu) /* Flush our emulation state into the GIC hardware before entering the guest. */ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) { + /* + * If in a nested state, we must return early. Two possibilities: + * + * - If we have any pending IRQ for the guest and the guest + * expects IRQs to be handled in its virtual EL2 mode (the + * virtual IMO bit is set) and it is not already running in + * virtual EL2 mode, then we have to emulate an IRQ + * exception to virtual EL2. + * + * We do that by placing a request to ourselves which will + * abort the entry procedure and inject the exception at the + * beginning of the run loop. + * + * - Otherwise, do exactly *NOTHING*. The guest state is + * already loaded, and we can carry on with running it. + * + * If we have NV, but are not in a nested state, compute the + * maintenance interrupt state, as it may fire. + */ + if (vgic_state_is_nested(vcpu)) { + if (kvm_vgic_vcpu_pending_irq(vcpu)) + kvm_make_request(KVM_REQ_GUEST_HYP_IRQ_PENDING, vcpu); + + return; + } + + if (vcpu_has_nv(vcpu)) + vgic_v3_nested_update_mi(vcpu); + /* * If there are no virtual interrupts active or pending for this * VCPU, then there is no work to do and we can bail out without diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 122d95b4e2845..0c5a63712702b 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -353,4 +353,10 @@ static inline bool kvm_has_gicv3(struct kvm *kvm) return kvm_has_feat(kvm, ID_AA64PFR0_EL1, GIC, IMP); } +void vgic_v3_sync_nested(struct kvm_vcpu *vcpu); +void vgic_v3_load_nested(struct kvm_vcpu *vcpu); +void vgic_v3_put_nested(struct kvm_vcpu *vcpu); +void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu); +void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu); + #endif diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg index 762ee084b37c5..2c63662c1a489 100644 --- a/arch/arm64/tools/sysreg +++ b/arch/arm64/tools/sysreg @@ -3035,6 +3035,54 @@ Field 31:16 PhyPARTID29 Field 15:0 PhyPARTID28 EndSysreg +Sysreg ICH_HCR_EL2 3 4 12 11 0 +Res0 63:32 +Field 31:27 EOIcount +Res0 26:16 +Field 15 DVIM +Field 14 TDIR +Field 13 TSEI +Field 12 TALL1 +Field 11 TALL0 +Field 10 TC +Res0 9 +Field 8 vSGIEOICount +Field 7 VGrp1DIE +Field 6 VGrp1EIE +Field 5 VGrp0DIE +Field 4 VGrp0EIE +Field 3 NPIE +Field 2 LRENPIE +Field 1 UIE +Field 0 En +EndSysreg + +Sysreg ICH_VTR_EL2 3 4 12 11 1 +Res0 63:32 +Field 31:29 PRIbits +Field 28:26 PREbits +Field 25:23 IDbits +Field 22 SEIS +Field 21 A3V +Field 20 nV4 +Field 19 TDS +Field 18 DVIM +Res0 17:5 +Field 4:0 ListRegs +EndSysreg + +Sysreg ICH_MISR_EL2 3 4 12 11 2 +Res0 63:8 +Field 7 VGrp1D +Field 6 VGrp1E +Field 5 VGrp0D +Field 4 VGrp0E +Field 3 NP +Field 2 LRENP +Field 1 U +Field 0 EOI +EndSysreg + Sysreg CONTEXTIDR_EL2 3 4 13 0 1 Fields CONTEXTIDR_ELx EndSysreg diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c index 2b1684c60e3ca..974dc088c8530 100644 --- a/drivers/irqchip/irq-apple-aic.c +++ b/drivers/irqchip/irq-apple-aic.c @@ -409,15 +409,15 @@ static void __exception_irq_entry aic_handle_irq(struct pt_regs *regs) * in use, and be cleared when coming back from the handler. */ if (is_kernel_in_hyp_mode() && - (read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EN) && + (read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EL2_En) && read_sysreg_s(SYS_ICH_MISR_EL2) != 0) { generic_handle_domain_irq(aic_irqc->hw_domain, AIC_FIQ_HWIRQ(AIC_VGIC_MI)); - if (unlikely((read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EN) && + if (unlikely((read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EL2_En) && read_sysreg_s(SYS_ICH_MISR_EL2))) { pr_err_ratelimited("vGIC IRQ fired and not handled by KVM, disabling.\n"); - sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EN, 0); + sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EL2_En, 0); } } } @@ -841,7 +841,7 @@ static int aic_init_cpu(unsigned int cpu) VM_TMR_FIQ_ENABLE_V | VM_TMR_FIQ_ENABLE_P, 0); /* vGIC maintenance IRQ */ - sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EN, 0); + sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EL2_En, 0); } /* PMC FIQ */ diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 3a8ccfda34d29..714cef854c1c3 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -249,6 +249,9 @@ struct vgic_dist { int nr_spis; + /* The GIC maintenance IRQ for nested hypervisors. */ + u32 mi_intid; + /* base addresses in guest physical address space: */ gpa_t vgic_dist_base; /* distributor */ union { @@ -369,6 +372,7 @@ extern struct static_key_false vgic_v3_cpuif_trap; int kvm_set_legacy_vgic_v2_addr(struct kvm *kvm, struct kvm_arm_device_addr *dev_addr); void kvm_vgic_early_init(struct kvm *kvm); int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu); +int kvm_vgic_vcpu_nv_init(struct kvm_vcpu *vcpu); int kvm_vgic_create(struct kvm *kvm, u32 type); void kvm_vgic_destroy(struct kvm *kvm); void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); @@ -389,6 +393,10 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); void kvm_vgic_load(struct kvm_vcpu *vcpu); void kvm_vgic_put(struct kvm_vcpu *vcpu); +u16 vgic_v3_get_eisr(struct kvm_vcpu *vcpu); +u16 vgic_v3_get_elrsr(struct kvm_vcpu *vcpu); +u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu); + #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) #define vgic_initialized(k) ((k)->arch.vgic.initialized) #define vgic_ready(k) ((k)->arch.vgic.ready) @@ -433,6 +441,8 @@ int vgic_v4_load(struct kvm_vcpu *vcpu); void vgic_v4_commit(struct kvm_vcpu *vcpu); int vgic_v4_put(struct kvm_vcpu *vcpu); +bool vgic_state_is_nested(struct kvm_vcpu *vcpu); + /* CPU HP callbacks */ void kvm_vgic_cpu_up(void); void kvm_vgic_cpu_down(void); diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h index 03cd7c19a683b..d5dd969028175 100644 --- a/tools/arch/arm/include/uapi/asm/kvm.h +++ b/tools/arch/arm/include/uapi/asm/kvm.h @@ -246,6 +246,7 @@ struct kvm_vcpu_events { #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 #define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8 +#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ 9 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) diff --git a/tools/arch/arm64/include/asm/sysreg.h b/tools/arch/arm64/include/asm/sysreg.h index 150416682e2cb..b6c5ece4fdee7 100644 --- a/tools/arch/arm64/include/asm/sysreg.h +++ b/tools/arch/arm64/include/asm/sysreg.h @@ -558,9 +558,6 @@ #define SYS_ICH_VSEIR_EL2 sys_reg(3, 4, 12, 9, 4) #define SYS_ICC_SRE_EL2 sys_reg(3, 4, 12, 9, 5) -#define SYS_ICH_HCR_EL2 sys_reg(3, 4, 12, 11, 0) -#define SYS_ICH_VTR_EL2 sys_reg(3, 4, 12, 11, 1) -#define SYS_ICH_MISR_EL2 sys_reg(3, 4, 12, 11, 2) #define SYS_ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3) #define SYS_ICH_ELRSR_EL2 sys_reg(3, 4, 12, 11, 5) #define SYS_ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7) @@ -981,10 +978,6 @@ #define SYS_MPIDR_SAFE_VAL (BIT(31)) /* GIC Hypervisor interface registers */ -/* ICH_MISR_EL2 bit definitions */ -#define ICH_MISR_EOI (1 << 0) -#define ICH_MISR_U (1 << 1) - /* ICH_LR*_EL2 bit definitions */ #define ICH_LR_VIRTUAL_ID_MASK ((1ULL << 32) - 1) @@ -999,17 +992,6 @@ #define ICH_LR_PRIORITY_SHIFT 48 #define ICH_LR_PRIORITY_MASK (0xffULL << ICH_LR_PRIORITY_SHIFT) -/* ICH_HCR_EL2 bit definitions */ -#define ICH_HCR_EN (1 << 0) -#define ICH_HCR_UIE (1 << 1) -#define ICH_HCR_NPIE (1 << 3) -#define ICH_HCR_TC (1 << 10) -#define ICH_HCR_TALL0 (1 << 11) -#define ICH_HCR_TALL1 (1 << 12) -#define ICH_HCR_TDIR (1 << 14) -#define ICH_HCR_EOIcount_SHIFT 27 -#define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT) - /* ICH_VMCR_EL2 bit definitions */ #define ICH_VMCR_ACK_CTL_SHIFT 2 #define ICH_VMCR_ACK_CTL_MASK (1 << ICH_VMCR_ACK_CTL_SHIFT) @@ -1030,18 +1012,6 @@ #define ICH_VMCR_ENG1_SHIFT 1 #define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT) -/* ICH_VTR_EL2 bit definitions */ -#define ICH_VTR_PRI_BITS_SHIFT 29 -#define ICH_VTR_PRI_BITS_MASK (7 << ICH_VTR_PRI_BITS_SHIFT) -#define ICH_VTR_ID_BITS_SHIFT 23 -#define ICH_VTR_ID_BITS_MASK (7 << ICH_VTR_ID_BITS_SHIFT) -#define ICH_VTR_SEIS_SHIFT 22 -#define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT) -#define ICH_VTR_A3V_SHIFT 21 -#define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT) -#define ICH_VTR_TDS_SHIFT 19 -#define ICH_VTR_TDS_MASK (1 << ICH_VTR_TDS_SHIFT) - /* * Permission Indirection Extension (PIE) permission encodings. * Encodings with the _O suffix, have overlays applied (Permission Overlay Extension).