Skip to content

Commit

Permalink
Merge branch 'kvm-arm64/nv-idregs' into kvmarm/next
Browse files Browse the repository at this point in the history
* kvm-arm64/nv-idregs:
  : Changes to exposure of NV features, courtesy of Marc Zyngier
  :
  : Apply NV-specific feature restrictions at reset rather than at the point
  : of KVM_RUN. This makes the true feature set visible to userspace, a
  : necessary step towards save/restore support or NV VMs.
  :
  : Add an additional vCPU feature flag for selecting the E2H0 flavor of NV,
  : such that the VHE-ness of the VM can be applied to the feature set.
  KVM: arm64: selftests: Test that TGRAN*_2 fields are writable
  KVM: arm64: Allow userspace to write ID_AA64MMFR0_EL1.TGRAN*_2
  KVM: arm64: Advertise FEAT_ECV when possible
  KVM: arm64: Make ID_AA64MMFR4_EL1.NV_frac writable
  KVM: arm64: Allow userspace to limit NV support to nVHE
  KVM: arm64: Move NV-specific capping to idreg sanitisation
  KVM: arm64: Enforce NV limits on a per-idregs basis
  KVM: arm64: Make ID_REG_LIMIT_FIELD_ENUM() more widely available
  KVM: arm64: Consolidate idreg callbacks
  KVM: arm64: Advertise NV2 in the boot messages
  KVM: arm64: Mark HCR.EL2.{NV*,AT} RES0 when ID_AA64MMFR4_EL1.NV_frac is 0
  KVM: arm64: Mark HCR.EL2.E2H RES0 when ID_AA64MMFR1_EL1.VH is zero
  KVM: arm64: Hide ID_AA64MMFR2_EL1.NV from guest and userspace
  arm64: cpufeature: Handle NV_frac as a synonym of NV2

Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
  • Loading branch information
Oliver Upton committed Mar 19, 2025
2 parents 56e3e5c + edfd826 commit 13f64f6
Show file tree
Hide file tree
Showing 8 changed files with 233 additions and 171 deletions.
1 change: 1 addition & 0 deletions arch/arm64/include/asm/kvm_nested.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ static inline bool kvm_supported_tlbi_s1e2_op(struct kvm_vcpu *vpcu, u32 instr)
}

int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu);
u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val);

#ifdef CONFIG_ARM64_PTR_AUTH
bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr);
Expand Down
1 change: 1 addition & 0 deletions arch/arm64/include/uapi/asm/kvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ struct kvm_regs {
#define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */
#define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */
#define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */
#define KVM_ARM_VCPU_HAS_EL2_E2H0 8 /* Limit NV support to E2H RES0 */

struct kvm_vcpu_init {
__u32 target;
Expand Down
15 changes: 13 additions & 2 deletions arch/arm64/kernel/cpufeature.c
Original file line number Diff line number Diff line change
Expand Up @@ -497,6 +497,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr3[] = {

static const struct arm64_ftr_bits ftr_id_aa64mmfr4[] = {
S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR4_EL1_E2H0_SHIFT, 4, 0),
ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR4_EL1_NV_frac_SHIFT, 4, 0),
ARM64_FTR_END,
};

Expand Down Expand Up @@ -2162,7 +2163,7 @@ static bool has_nested_virt_support(const struct arm64_cpu_capabilities *cap,
if (kvm_get_mode() != KVM_MODE_NV)
return false;

if (!has_cpuid_feature(cap, scope)) {
if (!cpucap_multi_entry_cap_matches(cap, scope)) {
pr_warn("unavailable: %s\n", cap->desc);
return false;
}
Expand Down Expand Up @@ -2519,7 +2520,17 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.capability = ARM64_HAS_NESTED_VIRT,
.type = ARM64_CPUCAP_SYSTEM_FEATURE,
.matches = has_nested_virt_support,
ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, NV, NV2)
.match_list = (const struct arm64_cpu_capabilities []){
{
.matches = has_cpuid_feature,
ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, NV, NV2)
},
{
.matches = has_cpuid_feature,
ARM64_CPUID_FIELDS(ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY)
},
{ /* Sentinel */ }
},
},
{
.capability = ARM64_HAS_32BIT_EL0_DO_NOT_USE,
Expand Down
5 changes: 3 additions & 2 deletions arch/arm64/kvm/arm.c
Original file line number Diff line number Diff line change
Expand Up @@ -2836,11 +2836,12 @@ static __init int kvm_arm_init(void)
if (err)
goto out_hyp;

kvm_info("%s%sVHE mode initialized successfully\n",
kvm_info("%s%sVHE%s mode initialized successfully\n",
in_hyp_mode ? "" : (is_protected_kvm_enabled() ?
"Protected " : "Hyp "),
in_hyp_mode ? "" : (cpus_have_final_cap(ARM64_KVM_HVHE) ?
"h" : "n"));
"h" : "n"),
cpus_have_final_cap(ARM64_HAS_NESTED_VIRT) ? "+NV2": "");

/*
* FIXME: Do something reasonable if kvm_init() fails after pKVM
Expand Down
285 changes: 152 additions & 133 deletions arch/arm64/kvm/nested.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,6 @@

#include "sys_regs.h"

/* Protection against the sysreg repainting madness... */
#define NV_FTR(r, f) ID_AA64##r##_EL1_##f

/*
* Ratio of live shadow S2 MMU per vcpu. This is a trade-off between
* memory usage and potential number of different sets of S2 PTs in
Expand Down Expand Up @@ -54,6 +51,10 @@ int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu)
struct kvm_s2_mmu *tmp;
int num_mmus, ret = 0;

if (test_bit(KVM_ARM_VCPU_HAS_EL2_E2H0, kvm->arch.vcpu_features) &&
!cpus_have_final_cap(ARM64_HAS_HCR_NV1))
return -EINVAL;

/*
* Let's treat memory allocation failures as benign: If we fail to
* allocate anything, return an error and keep the allocated array
Expand Down Expand Up @@ -807,134 +808,151 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
* This list should get updated as new features get added to the NV
* support, and new extension to the architecture.
*/
static void limit_nv_id_regs(struct kvm *kvm)
u64 limit_nv_id_reg(struct kvm *kvm, u32 reg, u64 val)
{
u64 val, tmp;

/* Support everything but TME */
val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64ISAR0_EL1);
val &= ~NV_FTR(ISAR0, TME);
kvm_set_vm_id_reg(kvm, SYS_ID_AA64ISAR0_EL1, val);

/* Support everything but Spec Invalidation and LS64 */
val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64ISAR1_EL1);
val &= ~(NV_FTR(ISAR1, LS64) |
NV_FTR(ISAR1, SPECRES));
kvm_set_vm_id_reg(kvm, SYS_ID_AA64ISAR1_EL1, val);

/* No AMU, MPAM, S-EL2, or RAS */
val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1);
val &= ~(GENMASK_ULL(55, 52) |
NV_FTR(PFR0, AMU) |
NV_FTR(PFR0, MPAM) |
NV_FTR(PFR0, SEL2) |
NV_FTR(PFR0, RAS) |
NV_FTR(PFR0, EL3) |
NV_FTR(PFR0, EL2) |
NV_FTR(PFR0, EL1) |
NV_FTR(PFR0, EL0));
/* 64bit only at any EL */
val |= FIELD_PREP(NV_FTR(PFR0, EL0), 0b0001);
val |= FIELD_PREP(NV_FTR(PFR0, EL1), 0b0001);
val |= FIELD_PREP(NV_FTR(PFR0, EL2), 0b0001);
val |= FIELD_PREP(NV_FTR(PFR0, EL3), 0b0001);
kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1, val);

/* Only support BTI, SSBS, CSV2_frac */
val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR1_EL1);
val &= (NV_FTR(PFR1, BT) |
NV_FTR(PFR1, SSBS) |
NV_FTR(PFR1, CSV2_frac));
kvm_set_vm_id_reg(kvm, SYS_ID_AA64PFR1_EL1, val);

/* Hide ECV, ExS, Secure Memory */
val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64MMFR0_EL1);
val &= ~(NV_FTR(MMFR0, ECV) |
NV_FTR(MMFR0, EXS) |
NV_FTR(MMFR0, TGRAN4_2) |
NV_FTR(MMFR0, TGRAN16_2) |
NV_FTR(MMFR0, TGRAN64_2) |
NV_FTR(MMFR0, SNSMEM));

/* Disallow unsupported S2 page sizes */
switch (PAGE_SIZE) {
case SZ_64K:
val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN16_2), 0b0001);
fallthrough;
case SZ_16K:
val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN4_2), 0b0001);
fallthrough;
case SZ_4K:
/* Support everything */
switch (reg) {
case SYS_ID_AA64ISAR0_EL1:
/* Support everything but TME */
val &= ~ID_AA64ISAR0_EL1_TME;
break;
}
/*
* Since we can't support a guest S2 page size smaller than
* the host's own page size (due to KVM only populating its
* own S2 using the kernel's page size), advertise the
* limitation using FEAT_GTG.
*/
switch (PAGE_SIZE) {
case SZ_4K:
val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN4_2), 0b0010);
fallthrough;
case SZ_16K:
val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN16_2), 0b0010);
fallthrough;
case SZ_64K:
val |= FIELD_PREP(NV_FTR(MMFR0, TGRAN64_2), 0b0010);

case SYS_ID_AA64ISAR1_EL1:
/* Support everything but LS64 and Spec Invalidation */
val &= ~(ID_AA64ISAR1_EL1_LS64 |
ID_AA64ISAR1_EL1_SPECRES);
break;

case SYS_ID_AA64PFR0_EL1:
/* No RME, AMU, MPAM, S-EL2, or RAS */
val &= ~(ID_AA64PFR0_EL1_RME |
ID_AA64PFR0_EL1_AMU |
ID_AA64PFR0_EL1_MPAM |
ID_AA64PFR0_EL1_SEL2 |
ID_AA64PFR0_EL1_RAS |
ID_AA64PFR0_EL1_EL3 |
ID_AA64PFR0_EL1_EL2 |
ID_AA64PFR0_EL1_EL1 |
ID_AA64PFR0_EL1_EL0);
/* 64bit only at any EL */
val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL0, IMP);
val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL1, IMP);
val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL2, IMP);
val |= SYS_FIELD_PREP_ENUM(ID_AA64PFR0_EL1, EL3, IMP);
break;

case SYS_ID_AA64PFR1_EL1:
/* Only support BTI, SSBS, CSV2_frac */
val &= (ID_AA64PFR1_EL1_BT |
ID_AA64PFR1_EL1_SSBS |
ID_AA64PFR1_EL1_CSV2_frac);
break;

case SYS_ID_AA64MMFR0_EL1:
/* Hide ExS, Secure Memory */
val &= ~(ID_AA64MMFR0_EL1_EXS |
ID_AA64MMFR0_EL1_TGRAN4_2 |
ID_AA64MMFR0_EL1_TGRAN16_2 |
ID_AA64MMFR0_EL1_TGRAN64_2 |
ID_AA64MMFR0_EL1_SNSMEM);

/* Hide CNTPOFF if present */
val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64MMFR0_EL1, ECV, IMP);

/* Disallow unsupported S2 page sizes */
switch (PAGE_SIZE) {
case SZ_64K:
val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, NI);
fallthrough;
case SZ_16K:
val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, NI);
fallthrough;
case SZ_4K:
/* Support everything */
break;
}

/*
* Since we can't support a guest S2 page size smaller
* than the host's own page size (due to KVM only
* populating its own S2 using the kernel's page
* size), advertise the limitation using FEAT_GTG.
*/
switch (PAGE_SIZE) {
case SZ_4K:
val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN4_2, IMP);
fallthrough;
case SZ_16K:
val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN16_2, IMP);
fallthrough;
case SZ_64K:
val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR0_EL1, TGRAN64_2, IMP);
break;
}

/* Cap PARange to 48bits */
val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64MMFR0_EL1, PARANGE, 48);
break;

case SYS_ID_AA64MMFR1_EL1:
val &= (ID_AA64MMFR1_EL1_HCX |
ID_AA64MMFR1_EL1_PAN |
ID_AA64MMFR1_EL1_LO |
ID_AA64MMFR1_EL1_HPDS |
ID_AA64MMFR1_EL1_VH |
ID_AA64MMFR1_EL1_VMIDBits);
/* FEAT_E2H0 implies no VHE */
if (test_bit(KVM_ARM_VCPU_HAS_EL2_E2H0, kvm->arch.vcpu_features))
val &= ~ID_AA64MMFR1_EL1_VH;
break;

case SYS_ID_AA64MMFR2_EL1:
val &= ~(ID_AA64MMFR2_EL1_BBM |
ID_AA64MMFR2_EL1_TTL |
GENMASK_ULL(47, 44) |
ID_AA64MMFR2_EL1_ST |
ID_AA64MMFR2_EL1_CCIDX |
ID_AA64MMFR2_EL1_VARange);

/* Force TTL support */
val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR2_EL1, TTL, IMP);
break;

case SYS_ID_AA64MMFR4_EL1:
/*
* You get EITHER
*
* - FEAT_VHE without FEAT_E2H0
* - FEAT_NV limited to FEAT_NV2
* - HCR_EL2.NV1 being RES0
*
* OR
*
* - FEAT_E2H0 without FEAT_VHE nor FEAT_NV
*
* Life is too short for anything else.
*/
if (test_bit(KVM_ARM_VCPU_HAS_EL2_E2H0, kvm->arch.vcpu_features)) {
val = 0;
} else {
val = SYS_FIELD_PREP_ENUM(ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY);
val |= SYS_FIELD_PREP_ENUM(ID_AA64MMFR4_EL1, E2H0, NI_NV1);
}
break;

case SYS_ID_AA64DFR0_EL1:
/* Only limited support for PMU, Debug, BPs, WPs, and HPMN0 */
val &= (ID_AA64DFR0_EL1_PMUVer |
ID_AA64DFR0_EL1_WRPs |
ID_AA64DFR0_EL1_BRPs |
ID_AA64DFR0_EL1_DebugVer|
ID_AA64DFR0_EL1_HPMN0);

/* Cap Debug to ARMv8.1 */
val = ID_REG_LIMIT_FIELD_ENUM(val, ID_AA64DFR0_EL1, DebugVer, VHE);
break;
}
/* Cap PARange to 48bits */
tmp = FIELD_GET(NV_FTR(MMFR0, PARANGE), val);
if (tmp > 0b0101) {
val &= ~NV_FTR(MMFR0, PARANGE);
val |= FIELD_PREP(NV_FTR(MMFR0, PARANGE), 0b0101);
}
kvm_set_vm_id_reg(kvm, SYS_ID_AA64MMFR0_EL1, val);

val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64MMFR1_EL1);
val &= (NV_FTR(MMFR1, HCX) |
NV_FTR(MMFR1, PAN) |
NV_FTR(MMFR1, LO) |
NV_FTR(MMFR1, HPDS) |
NV_FTR(MMFR1, VH) |
NV_FTR(MMFR1, VMIDBits));
kvm_set_vm_id_reg(kvm, SYS_ID_AA64MMFR1_EL1, val);

val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64MMFR2_EL1);
val &= ~(NV_FTR(MMFR2, BBM) |
NV_FTR(MMFR2, TTL) |
GENMASK_ULL(47, 44) |
NV_FTR(MMFR2, ST) |
NV_FTR(MMFR2, CCIDX) |
NV_FTR(MMFR2, VARange));

/* Force TTL support */
val |= FIELD_PREP(NV_FTR(MMFR2, TTL), 0b0001);
kvm_set_vm_id_reg(kvm, SYS_ID_AA64MMFR2_EL1, val);

val = 0;
if (!cpus_have_final_cap(ARM64_HAS_HCR_NV1))
val |= FIELD_PREP(NV_FTR(MMFR4, E2H0),
ID_AA64MMFR4_EL1_E2H0_NI_NV1);
kvm_set_vm_id_reg(kvm, SYS_ID_AA64MMFR4_EL1, val);

/* Only limited support for PMU, Debug, BPs, WPs, and HPMN0 */
val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64DFR0_EL1);
val &= (NV_FTR(DFR0, PMUVer) |
NV_FTR(DFR0, WRPs) |
NV_FTR(DFR0, BRPs) |
NV_FTR(DFR0, DebugVer) |
NV_FTR(DFR0, HPMN0));

/* Cap Debug to ARMv8.1 */
tmp = FIELD_GET(NV_FTR(DFR0, DebugVer), val);
if (tmp > 0b0111) {
val &= ~NV_FTR(DFR0, DebugVer);
val |= FIELD_PREP(NV_FTR(DFR0, DebugVer), 0b0111);
}
kvm_set_vm_id_reg(kvm, SYS_ID_AA64DFR0_EL1, val);

return val;
}

u64 kvm_vcpu_apply_reg_masks(const struct kvm_vcpu *vcpu,
Expand Down Expand Up @@ -981,8 +999,6 @@ int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu)
if (!kvm->arch.sysreg_masks)
return -ENOMEM;

limit_nv_id_regs(kvm);

/* VTTBR_EL2 */
res0 = res1 = 0;
if (!kvm_has_feat_enum(kvm, ID_AA64MMFR1_EL1, VMIDBits, 16))
Expand Down Expand Up @@ -1021,10 +1037,11 @@ int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu)
res0 |= HCR_FIEN;
if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, FWB, IMP))
res0 |= HCR_FWB;
if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, NV, NV2))
res0 |= HCR_NV2;
if (!kvm_has_feat(kvm, ID_AA64MMFR2_EL1, NV, IMP))
res0 |= (HCR_AT | HCR_NV1 | HCR_NV);
/* Implementation choice: NV2 is the only supported config */
if (!kvm_has_feat(kvm, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY))
res0 |= (HCR_NV2 | HCR_NV | HCR_AT);
if (!kvm_has_feat(kvm, ID_AA64MMFR4_EL1, E2H0, NI))
res0 |= HCR_NV1;
if (!(kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_PTRAUTH_ADDRESS) &&
kvm_vcpu_has_feature(kvm, KVM_ARM_VCPU_PTRAUTH_GENERIC)))
res0 |= (HCR_API | HCR_APK);
Expand All @@ -1034,6 +1051,8 @@ int kvm_init_nv_sysregs(struct kvm_vcpu *vcpu)
res0 |= (HCR_TEA | HCR_TERR);
if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, LO, IMP))
res0 |= HCR_TLOR;
if (!kvm_has_feat(kvm, ID_AA64MMFR1_EL1, VH, IMP))
res0 |= HCR_E2H;
if (!kvm_has_feat(kvm, ID_AA64MMFR4_EL1, E2H0, IMP))
res1 |= HCR_E2H;
set_sysreg_masks(kvm, HCR_EL2, res0, res1);
Expand Down
Loading

0 comments on commit 13f64f6

Please sign in to comment.