Skip to content

Commit

Permalink
KVM: arm64: nv: Support multiple nested Stage-2 mmu structures
Browse files Browse the repository at this point in the history
Add Stage-2 mmu data structures for virtual EL2 and for nested guests.
We don't yet populate shadow Stage-2 page tables, but we now have a
framework for getting to a shadow Stage-2 pgd.

We allocate twice the number of vcpus as Stage-2 mmu structures because
that's sufficient for each vcpu running two translation regimes without
having to flush the Stage-2 page tables.

Co-developed-by: Christoffer Dall <christoffer.dall@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20240614144552.2773592-2-maz@kernel.org
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
  • Loading branch information
Marc Zyngier authored and Oliver Upton committed Jun 19, 2024
1 parent 83a7eef commit 4f128f8
Show file tree
Hide file tree
Showing 7 changed files with 349 additions and 21 deletions.
36 changes: 36 additions & 0 deletions arch/arm64/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,33 @@ struct kvm_s2_mmu {
uint64_t split_page_chunk_size;

struct kvm_arch *arch;

/*
* For a shadow stage-2 MMU, the virtual vttbr used by the
* host to parse the guest S2.
* This either contains:
* - the virtual VTTBR programmed by the guest hypervisor with
* CnP cleared
* - The value 1 (VMID=0, BADDR=0, CnP=1) if invalid
*
* We also cache the full VTCR which gets used for TLB invalidation,
* taking the ARM ARM's "Any of the bits in VTCR_EL2 are permitted
* to be cached in a TLB" to the letter.
*/
u64 tlb_vttbr;
u64 tlb_vtcr;

/*
* true when this represents a nested context where virtual
* HCR_EL2.VM == 1
*/
bool nested_stage2_enabled;

/*
* 0: Nobody is currently using this, check vttbr for validity
* >0: Somebody is actively using this.
*/
atomic_t refcnt;
};

struct kvm_arch_memory_slot {
Expand Down Expand Up @@ -256,6 +283,14 @@ struct kvm_arch {
*/
u64 fgu[__NR_FGT_GROUP_IDS__];

/*
* Stage 2 paging state for VMs with nested S2 using a virtual
* VMID.
*/
struct kvm_s2_mmu *nested_mmus;
size_t nested_mmus_size;
int nested_mmus_next;

/* Interrupt controller */
struct vgic_dist vgic;

Expand Down Expand Up @@ -1306,6 +1341,7 @@ void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu);
void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu);

int __init kvm_set_ipa_limit(void);
u32 kvm_get_pa_bits(struct kvm *kvm);

#define __KVM_HAVE_ARCH_VM_ALLOC
struct kvm *kvm_arch_alloc_vm(void);
Expand Down
24 changes: 24 additions & 0 deletions arch/arm64/include/asm/kvm_mmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ alternative_cb_end
#include <asm/mmu_context.h>
#include <asm/kvm_emulate.h>
#include <asm/kvm_host.h>
#include <asm/kvm_nested.h>

void kvm_update_va_mask(struct alt_instr *alt,
__le32 *origptr, __le32 *updptr, int nr_inst);
Expand Down Expand Up @@ -165,6 +166,8 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr);
void __init free_hyp_pgds(void);

void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size);

void stage2_unmap_vm(struct kvm *kvm);
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type);
void kvm_uninit_stage2_mmu(struct kvm *kvm);
Expand Down Expand Up @@ -326,5 +329,26 @@ static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu)
{
return container_of(mmu->arch, struct kvm, arch);
}

static inline u64 get_vmid(u64 vttbr)
{
return (vttbr & VTTBR_VMID_MASK(kvm_get_vmid_bits())) >>
VTTBR_VMID_SHIFT;
}

static inline bool kvm_s2_mmu_valid(struct kvm_s2_mmu *mmu)
{
return !(mmu->tlb_vttbr & VTTBR_CNP_BIT);
}

static inline bool kvm_is_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
{
/*
* Be careful, mmu may not be fully initialised so do look at
* *any* of its fields.
*/
return &kvm->arch.mmu != mmu;
}

#endif /* __ASSEMBLY__ */
#endif /* __ARM64_KVM_MMU_H__ */
6 changes: 6 additions & 0 deletions arch/arm64/include/asm/kvm_nested.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@ static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0)
}

extern bool forward_smc_trap(struct kvm_vcpu *vcpu);
extern void kvm_init_nested(struct kvm *kvm);
extern int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu);
extern void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu);
extern struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu);
extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu);
extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu);

int kvm_init_nv_sysregs(struct kvm *kvm);

Expand Down
11 changes: 11 additions & 0 deletions arch/arm64/kvm/arm.c
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
mutex_unlock(&kvm->lock);
#endif

kvm_init_nested(kvm);

ret = kvm_share_hyp(kvm, kvm + 1);
if (ret)
return ret;
Expand Down Expand Up @@ -551,6 +553,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
struct kvm_s2_mmu *mmu;
int *last_ran;

if (vcpu_has_nv(vcpu))
kvm_vcpu_load_hw_mmu(vcpu);

mmu = vcpu->arch.hw_mmu;
last_ran = this_cpu_ptr(mmu->last_vcpu_ran);

Expand Down Expand Up @@ -601,6 +606,8 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
kvm_timer_vcpu_put(vcpu);
kvm_vgic_put(vcpu);
kvm_vcpu_pmu_restore_host(vcpu);
if (vcpu_has_nv(vcpu))
kvm_vcpu_put_hw_mmu(vcpu);
kvm_arm_vmid_clear_active();

vcpu_clear_on_unsupported_cpu(vcpu);
Expand Down Expand Up @@ -1459,6 +1466,10 @@ static int kvm_setup_vcpu(struct kvm_vcpu *vcpu)
if (kvm_vcpu_has_pmu(vcpu) && !kvm->arch.arm_pmu)
ret = kvm_arm_set_default_pmu(kvm);

/* Prepare for nested if required */
if (!ret && vcpu_has_nv(vcpu))
ret = kvm_vcpu_init_nested(vcpu);

return ret;
}

Expand Down
69 changes: 48 additions & 21 deletions arch/arm64/kvm/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64
may_block));
}

static void unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size)
{
__unmap_stage2_range(mmu, start, size, true);
}
Expand Down Expand Up @@ -855,21 +855,9 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
.icache_inval_pou = invalidate_icache_guest_page,
};

/**
* kvm_init_stage2_mmu - Initialise a S2 MMU structure
* @kvm: The pointer to the KVM structure
* @mmu: The pointer to the s2 MMU structure
* @type: The machine type of the virtual machine
*
* Allocates only the stage-2 HW PGD level table(s).
* Note we don't need locking here as this is only called when the VM is
* created, which can only be done once.
*/
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type)
static int kvm_init_ipa_range(struct kvm_s2_mmu *mmu, unsigned long type)
{
u32 kvm_ipa_limit = get_kvm_ipa_limit();
int cpu, err;
struct kvm_pgtable *pgt;
u64 mmfr0, mmfr1;
u32 phys_shift;

Expand All @@ -896,11 +884,51 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
mmu->vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);

return 0;
}

/**
* kvm_init_stage2_mmu - Initialise a S2 MMU structure
* @kvm: The pointer to the KVM structure
* @mmu: The pointer to the s2 MMU structure
* @type: The machine type of the virtual machine
*
* Allocates only the stage-2 HW PGD level table(s).
* Note we don't need locking here as this is only called in two cases:
*
* - when the VM is created, which can't race against anything
*
* - when secondary kvm_s2_mmu structures are initialised for NV
* guests, and the caller must hold kvm->lock as this is called on a
* per-vcpu basis.
*/
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type)
{
int cpu, err;
struct kvm_pgtable *pgt;

/*
* If we already have our page tables in place, and that the
* MMU context is the canonical one, we have a bug somewhere,
* as this is only supposed to ever happen once per VM.
*
* Otherwise, we're building nested page tables, and that's
* probably because userspace called KVM_ARM_VCPU_INIT more
* than once on the same vcpu. Since that's actually legal,
* don't kick a fuss and leave gracefully.
*/
if (mmu->pgt != NULL) {
if (kvm_is_nested_s2_mmu(kvm, mmu))
return 0;

kvm_err("kvm_arch already initialized?\n");
return -EINVAL;
}

err = kvm_init_ipa_range(mmu, type);
if (err)
return err;

pgt = kzalloc(sizeof(*pgt), GFP_KERNEL_ACCOUNT);
if (!pgt)
return -ENOMEM;
Expand All @@ -925,6 +953,10 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t

mmu->pgt = pgt;
mmu->pgd_phys = __pa(pgt->pgd);

if (kvm_is_nested_s2_mmu(kvm, mmu))
kvm_init_nested_s2_mmu(mmu);

return 0;

out_destroy_pgtable:
Expand Down Expand Up @@ -976,7 +1008,7 @@ static void stage2_unmap_memslot(struct kvm *kvm,

if (!(vma->vm_flags & VM_PFNMAP)) {
gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
unmap_stage2_range(&kvm->arch.mmu, gpa, vm_end - vm_start);
kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, vm_end - vm_start);
}
hva = vm_end;
} while (hva < reg_end);
Expand Down Expand Up @@ -2022,19 +2054,14 @@ void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen)
{
}

void kvm_arch_flush_shadow_all(struct kvm *kvm)
{
kvm_uninit_stage2_mmu(kvm);
}

void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot)
{
gpa_t gpa = slot->base_gfn << PAGE_SHIFT;
phys_addr_t size = slot->npages << PAGE_SHIFT;

write_lock(&kvm->mmu_lock);
unmap_stage2_range(&kvm->arch.mmu, gpa, size);
kvm_stage2_unmap_range(&kvm->arch.mmu, gpa, size);
write_unlock(&kvm->mmu_lock);
}

Expand Down
Loading

0 comments on commit 4f128f8

Please sign in to comment.