Skip to content

Commit

Permalink
Merge branch kvm-arm64/pkvm-np-guest into kvmarm-master/next
Browse files Browse the repository at this point in the history
* kvm-arm64/pkvm-np-guest:
  : .
  : pKVM support for non-protected guests using the standard MM
  : infrastructure, courtesy of Quentin Perret. From the cover letter:
  :
  : "This series moves the stage-2 page-table management of non-protected
  : guests to EL2 when pKVM is enabled. This is only intended as an
  : incremental step towards a 'feature-complete' pKVM, there is however a
  : lot more that needs to come on top.
  :
  : With that series applied, pKVM provides near-parity with standard KVM
  : from a functional perspective all while Linux no longer touches the
  : stage-2 page-tables itself at EL1. The majority of mm-related KVM
  : features work out of the box, including MMU notifiers, dirty logging,
  : RO memslots and things of that nature. There are however two gotchas:
  :
  :  - We don't support mapping devices into guests: this requires
  :    additional hypervisor support for tracking the 'state' of devices,
  :    which will come in a later series. No device assignment until then.
  :
  :  - Stage-2 mappings are forced to page-granularity even when backed by a
  :    huge page for the sake of simplicity of this series. I'm only aiming
  :    at functional parity-ish (from userspace's PoV) for now, support for
  :    HP can be added on top later as a perf improvement."
  : .
  KVM: arm64: Plumb the pKVM MMU in KVM
  KVM: arm64: Introduce the EL1 pKVM MMU
  KVM: arm64: Introduce __pkvm_tlb_flush_vmid()
  KVM: arm64: Introduce __pkvm_host_mkyoung_guest()
  KVM: arm64: Introduce __pkvm_host_test_clear_young_guest()
  KVM: arm64: Introduce __pkvm_host_wrprotect_guest()
  KVM: arm64: Introduce __pkvm_host_relax_guest_perms()
  KVM: arm64: Introduce __pkvm_host_unshare_guest()
  KVM: arm64: Introduce __pkvm_host_share_guest()
  KVM: arm64: Introduce __pkvm_vcpu_{load,put}()
  KVM: arm64: Add {get,put}_pkvm_hyp_vm() helpers
  KVM: arm64: Make kvm_pgtable_stage2_init() a static inline function
  KVM: arm64: Pass walk flags to kvm_pgtable_stage2_relax_perms
  KVM: arm64: Pass walk flags to kvm_pgtable_stage2_mkyoung
  KVM: arm64: Move host page ownership tracking to the hyp vmemmap
  KVM: arm64: Make hyp_page::order a u8
  KVM: arm64: Move enum pkvm_page_state to memory.h
  KVM: arm64: Change the layout of enum pkvm_page_state

Signed-off-by: Marc Zyngier <maz@kernel.org>

# Conflicts:
#	arch/arm64/kvm/arm.c
  • Loading branch information
Marc Zyngier committed Jan 12, 2025
2 parents b7bf7c8 + fce886a commit d067012
Show file tree
Hide file tree
Showing 19 changed files with 1,006 additions and 145 deletions.
9 changes: 9 additions & 0 deletions arch/arm64/include/asm/kvm_asm.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,12 @@ enum __kvm_host_smccc_func {
/* Hypercalls available after pKVM finalisation */
__KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp,
__KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp,
__KVM_HOST_SMCCC_FUNC___pkvm_host_share_guest,
__KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_guest,
__KVM_HOST_SMCCC_FUNC___pkvm_host_relax_perms_guest,
__KVM_HOST_SMCCC_FUNC___pkvm_host_wrprotect_guest,
__KVM_HOST_SMCCC_FUNC___pkvm_host_test_clear_young_guest,
__KVM_HOST_SMCCC_FUNC___pkvm_host_mkyoung_guest,
__KVM_HOST_SMCCC_FUNC___kvm_adjust_pc,
__KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
__KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,
Expand All @@ -78,6 +84,9 @@ enum __kvm_host_smccc_func {
__KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
__KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm,
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load,
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put,
__KVM_HOST_SMCCC_FUNC___pkvm_tlb_flush_vmid,
};

#define DECLARE_KVM_VHE_SYM(sym) extern char sym[]
Expand Down
4 changes: 4 additions & 0 deletions arch/arm64/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
struct kvm_hyp_memcache {
phys_addr_t head;
unsigned long nr_pages;
struct pkvm_mapping *mapping; /* only used from EL1 */
};

static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc,
Expand Down Expand Up @@ -775,6 +776,9 @@ struct kvm_vcpu_arch {
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;

/* Pages to top-up the pKVM/EL2 guest pool */
struct kvm_hyp_memcache pkvm_memcache;

/* Virtual SError ESR to restore when HCR_EL2.VSE is set */
u64 vsesr_el2;

Expand Down
16 changes: 16 additions & 0 deletions arch/arm64/include/asm/kvm_mmu.h
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,22 @@ static inline bool kvm_is_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
return &kvm->arch.mmu != mmu;
}

static inline void kvm_fault_lock(struct kvm *kvm)
{
if (is_protected_kvm_enabled())
write_lock(&kvm->mmu_lock);
else
read_lock(&kvm->mmu_lock);
}

static inline void kvm_fault_unlock(struct kvm *kvm)
{
if (is_protected_kvm_enabled())
write_unlock(&kvm->mmu_lock);
else
read_unlock(&kvm->mmu_lock);
}

#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
void kvm_s2_ptdump_create_debugfs(struct kvm *kvm);
#else
Expand Down
38 changes: 25 additions & 13 deletions arch/arm64/include/asm/kvm_pgtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -412,15 +412,20 @@ static inline bool kvm_pgtable_walk_lock_held(void)
* be used instead of block mappings.
*/
struct kvm_pgtable {
u32 ia_bits;
s8 start_level;
kvm_pteref_t pgd;
struct kvm_pgtable_mm_ops *mm_ops;

/* Stage-2 only */
struct kvm_s2_mmu *mmu;
enum kvm_pgtable_stage2_flags flags;
kvm_pgtable_force_pte_cb_t force_pte_cb;
union {
struct rb_root pkvm_mappings;
struct {
u32 ia_bits;
s8 start_level;
kvm_pteref_t pgd;
struct kvm_pgtable_mm_ops *mm_ops;

/* Stage-2 only */
enum kvm_pgtable_stage2_flags flags;
kvm_pgtable_force_pte_cb_t force_pte_cb;
};
};
struct kvm_s2_mmu *mmu;
};

/**
Expand Down Expand Up @@ -526,8 +531,11 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
enum kvm_pgtable_stage2_flags flags,
kvm_pgtable_force_pte_cb_t force_pte_cb);

#define kvm_pgtable_stage2_init(pgt, mmu, mm_ops) \
__kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL)
static inline int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
struct kvm_pgtable_mm_ops *mm_ops)
{
return __kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL);
}

/**
* kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
Expand Down Expand Up @@ -669,13 +677,15 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
* kvm_pgtable_stage2_mkyoung() - Set the access flag in a page-table entry.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
* @addr: Intermediate physical address to identify the page-table entry.
* @flags: Flags to control the page-table walk (ex. a shared walk)
*
* The offset of @addr within a page is ignored.
*
* If there is a valid, leaf page-table entry used to translate @addr, then
* set the access flag in that entry.
*/
void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr);
void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
enum kvm_pgtable_walk_flags flags);

/**
* kvm_pgtable_stage2_test_clear_young() - Test and optionally clear the access
Expand Down Expand Up @@ -705,6 +715,7 @@ bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
* @addr: Intermediate physical address to identify the page-table entry.
* @prot: Additional permissions to grant for the mapping.
* @flags: Flags to control the page-table walk (ex. a shared walk)
*
* The offset of @addr within a page is ignored.
*
Expand All @@ -717,7 +728,8 @@ bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
* Return: 0 on success, negative error code on failure.
*/
int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
enum kvm_pgtable_prot prot);
enum kvm_pgtable_prot prot,
enum kvm_pgtable_walk_flags flags);

/**
* kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point
Expand Down
26 changes: 26 additions & 0 deletions arch/arm64/include/asm/kvm_pkvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,4 +137,30 @@ static inline size_t pkvm_host_sve_state_size(void)
SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl)));
}

struct pkvm_mapping {
struct rb_node node;
u64 gfn;
u64 pfn;
};

int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
struct kvm_pgtable_mm_ops *mm_ops);
void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
enum kvm_pgtable_prot prot, void *mc,
enum kvm_pgtable_walk_flags flags);
int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold);
int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
enum kvm_pgtable_walk_flags flags);
void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
enum kvm_pgtable_walk_flags flags);
int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
struct kvm_mmu_memory_cache *mc);
void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level);
kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level,
enum kvm_pgtable_prot prot, void *mc,
bool force_pte);
#endif /* __ARM64_KVM_PKVM_H__ */
23 changes: 22 additions & 1 deletion arch/arm64/kvm/arm.c
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,10 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)

void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
{
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
if (!is_protected_kvm_enabled())
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
else
free_hyp_memcache(&vcpu->arch.pkvm_memcache);
kvm_timer_vcpu_terminate(vcpu);
kvm_pmu_vcpu_destroy(vcpu);
kvm_vgic_vcpu_destroy(vcpu);
Expand Down Expand Up @@ -572,6 +575,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
struct kvm_s2_mmu *mmu;
int *last_ran;

if (is_protected_kvm_enabled())
goto nommu;

if (vcpu_has_nv(vcpu))
kvm_vcpu_load_hw_mmu(vcpu);

Expand All @@ -592,6 +598,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
*last_ran = vcpu->vcpu_idx;
}

nommu:
vcpu->cpu = cpu;

kvm_vgic_load(vcpu);
Expand All @@ -616,12 +623,26 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)

vcpu_set_pauth_traps(vcpu);

if (is_protected_kvm_enabled()) {
kvm_call_hyp_nvhe(__pkvm_vcpu_load,
vcpu->kvm->arch.pkvm.handle,
vcpu->vcpu_idx, vcpu->arch.hcr_el2);
kvm_call_hyp(__vgic_v3_restore_vmcr_aprs,
&vcpu->arch.vgic_cpu.vgic_v3);
}

if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus))
vcpu_set_on_unsupported_cpu(vcpu);
}

void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
if (is_protected_kvm_enabled()) {
kvm_call_hyp(__vgic_v3_save_vmcr_aprs,
&vcpu->arch.vgic_cpu.vgic_v3);
kvm_call_hyp_nvhe(__pkvm_vcpu_put);
}

kvm_vcpu_put_debug(vcpu);
kvm_arch_vcpu_put_fp(vcpu);
if (has_vhe())
Expand Down
6 changes: 3 additions & 3 deletions arch/arm64/kvm/hyp/include/nvhe/gfp.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include <nvhe/memory.h>
#include <nvhe/spinlock.h>

#define HYP_NO_ORDER USHRT_MAX
#define HYP_NO_ORDER ((u8)(~0))

struct hyp_pool {
/*
Expand All @@ -19,11 +19,11 @@ struct hyp_pool {
struct list_head free_area[NR_PAGE_ORDERS];
phys_addr_t range_start;
phys_addr_t range_end;
unsigned short max_order;
u8 max_order;
};

/* Allocation */
void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order);
void *hyp_alloc_pages(struct hyp_pool *pool, u8 order);
void hyp_split_page(struct hyp_page *page);
void hyp_get_page(struct hyp_pool *pool, void *addr);
void hyp_put_page(struct hyp_pool *pool, void *addr);
Expand Down
39 changes: 8 additions & 31 deletions arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,40 +11,10 @@
#include <asm/kvm_mmu.h>
#include <asm/kvm_pgtable.h>
#include <asm/virt.h>
#include <nvhe/memory.h>
#include <nvhe/pkvm.h>
#include <nvhe/spinlock.h>

/*
* SW bits 0-1 are reserved to track the memory ownership state of each page:
* 00: The page is owned exclusively by the page-table owner.
* 01: The page is owned by the page-table owner, but is shared
* with another entity.
* 10: The page is shared with, but not owned by the page-table owner.
* 11: Reserved for future use (lending).
*/
enum pkvm_page_state {
PKVM_PAGE_OWNED = 0ULL,
PKVM_PAGE_SHARED_OWNED = KVM_PGTABLE_PROT_SW0,
PKVM_PAGE_SHARED_BORROWED = KVM_PGTABLE_PROT_SW1,
__PKVM_PAGE_RESERVED = KVM_PGTABLE_PROT_SW0 |
KVM_PGTABLE_PROT_SW1,

/* Meta-states which aren't encoded directly in the PTE's SW bits */
PKVM_NOPAGE,
};

#define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1)
static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot,
enum pkvm_page_state state)
{
return (prot & ~PKVM_PAGE_STATE_PROT_MASK) | state;
}

static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot)
{
return prot & PKVM_PAGE_STATE_PROT_MASK;
}

struct host_mmu {
struct kvm_arch arch;
struct kvm_pgtable pgt;
Expand All @@ -69,6 +39,13 @@ int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages);
int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages);
int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages);
int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu,
enum kvm_pgtable_prot prot);
int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *hyp_vm);
int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot);
int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *hyp_vm);
int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm);
int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu);

bool addr_is_memory(phys_addr_t phys);
int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
Expand Down
50 changes: 47 additions & 3 deletions arch/arm64/kvm/hyp/include/nvhe/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,47 @@

#include <linux/types.h>

/*
* Bits 0-1 are reserved to track the memory ownership state of each page:
* 00: The page is owned exclusively by the page-table owner.
* 01: The page is owned by the page-table owner, but is shared
* with another entity.
* 10: The page is shared with, but not owned by the page-table owner.
* 11: Reserved for future use (lending).
*/
enum pkvm_page_state {
PKVM_PAGE_OWNED = 0ULL,
PKVM_PAGE_SHARED_OWNED = BIT(0),
PKVM_PAGE_SHARED_BORROWED = BIT(1),
__PKVM_PAGE_RESERVED = BIT(0) | BIT(1),

/* Meta-states which aren't encoded directly in the PTE's SW bits */
PKVM_NOPAGE = BIT(2),
};
#define PKVM_PAGE_META_STATES_MASK (~__PKVM_PAGE_RESERVED)

#define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1)
static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot,
enum pkvm_page_state state)
{
prot &= ~PKVM_PAGE_STATE_PROT_MASK;
prot |= FIELD_PREP(PKVM_PAGE_STATE_PROT_MASK, state);
return prot;
}

static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot)
{
return FIELD_GET(PKVM_PAGE_STATE_PROT_MASK, prot);
}

struct hyp_page {
unsigned short refcount;
unsigned short order;
u16 refcount;
u8 order;

/* Host (non-meta) state. Guarded by the host stage-2 lock. */
enum pkvm_page_state host_state : 8;

u32 host_share_guest_count;
};

extern u64 __hyp_vmemmap;
Expand All @@ -29,7 +67,13 @@ static inline phys_addr_t hyp_virt_to_phys(void *addr)

#define hyp_phys_to_pfn(phys) ((phys) >> PAGE_SHIFT)
#define hyp_pfn_to_phys(pfn) ((phys_addr_t)((pfn) << PAGE_SHIFT))
#define hyp_phys_to_page(phys) (&hyp_vmemmap[hyp_phys_to_pfn(phys)])

static inline struct hyp_page *hyp_phys_to_page(phys_addr_t phys)
{
BUILD_BUG_ON(sizeof(struct hyp_page) != sizeof(u64));
return &hyp_vmemmap[hyp_phys_to_pfn(phys)];
}

#define hyp_virt_to_page(virt) hyp_phys_to_page(__hyp_pa(virt))
#define hyp_virt_to_pfn(virt) hyp_phys_to_pfn(__hyp_pa(virt))

Expand Down
Loading

0 comments on commit d067012

Please sign in to comment.