Skip to content

Commit

Permalink
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Browse files Browse the repository at this point in the history
Pull kvm fixes from Paolo Bonzini:
 "ARM:

   - Avoid pKVM finalization if KVM initialization fails

   - Add missing BTI instructions in the hypervisor, fixing an early
     boot failure on BTI systems

   - Handle MMU notifiers correctly for non hugepage-aligned memslots

   - Work around a bug in the architecture where hypervisor timer
     controls have UNKNOWN behavior under nested virt

   - Disable preemption in kvm_arch_hardware_enable(), fixing a kernel
     BUG in cpu hotplug resulting from per-CPU accessor sanity checking

   - Make WFI emulation on GICv4 systems robust w.r.t. preemption,
     consistently requesting a doorbell interrupt on vcpu_put()

   - Uphold RES0 sysreg behavior when emulating older PMU versions

   - Avoid macro expansion when initializing PMU register names,
     ensuring the tracepoints pretty-print the sysreg

  s390:

   - Two fixes for asynchronous destroy

  x86 fixes will come early next week"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: s390: pv: fix index value of replaced ASCE
  KVM: s390: pv: simplify shutdown and fix race
  KVM: arm64: Fix the name of sys_reg_desc related to PMU
  KVM: arm64: Correctly handle RES0 bits PMEVTYPER<n>_EL0.evtCount
  KVM: arm64: vgic-v4: Make the doorbell request robust w.r.t preemption
  KVM: arm64: Add missing BTI instructions
  KVM: arm64: Correctly handle page aging notifiers for unaligned memslot
  KVM: arm64: Disable preemption in kvm_arch_hardware_enable()
  KVM: arm64: Handle kvm_arm_init failure correctly in finalize_pkvm
  KVM: arm64: timers: Use CNTHCTL_EL2 when setting non-CNTKCTL_EL1 bits
  • Loading branch information
Linus Torvalds committed Jul 23, 2023
2 parents 15b593b + 0c18970 commit 269f4a4
Show file tree
Hide file tree
Showing 17 changed files with 140 additions and 72 deletions.
2 changes: 2 additions & 0 deletions arch/arm64/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,8 @@ struct kvm_vcpu_arch {
#define DBG_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(5))
/* PMUSERENR for the guest EL0 is on physical CPU */
#define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(6))
/* WFI instruction trapped */
#define IN_WFI __vcpu_single_flag(sflags, BIT(7))


/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
Expand Down
26 changes: 9 additions & 17 deletions arch/arm64/include/asm/kvm_pgtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -608,22 +608,26 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr);

/**
* kvm_pgtable_stage2_mkold() - Clear the access flag in a page-table entry.
* kvm_pgtable_stage2_test_clear_young() - Test and optionally clear the access
* flag in a page-table entry.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
* @addr: Intermediate physical address to identify the page-table entry.
* @size: Size of the address range to visit.
* @mkold: True if the access flag should be cleared.
*
* The offset of @addr within a page is ignored.
*
* If there is a valid, leaf page-table entry used to translate @addr, then
* clear the access flag in that entry.
* Tests and conditionally clears the access flag for every valid, leaf
* page-table entry used to translate the range [@addr, @addr + @size).
*
* Note that it is the caller's responsibility to invalidate the TLB after
* calling this function to ensure that the updated permissions are visible
* to the CPUs.
*
* Return: The old page-table entry prior to clearing the flag, 0 on failure.
* Return: True if any of the visited PTEs had the access flag set.
*/
kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr);
bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
u64 size, bool mkold);

/**
* kvm_pgtable_stage2_relax_perms() - Relax the permissions enforced by a
Expand All @@ -645,18 +649,6 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr);
int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
enum kvm_pgtable_prot prot);

/**
* kvm_pgtable_stage2_is_young() - Test whether a page-table entry has the
* access flag set.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
* @addr: Intermediate physical address to identify the page-table entry.
*
* The offset of @addr within a page is ignored.
*
* Return: True if the page-table entry has the access flag set, false otherwise.
*/
bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr);

/**
* kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point
* of Coherency for guest stage-2 address
Expand Down
1 change: 1 addition & 0 deletions arch/arm64/include/asm/virt.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ extern u32 __boot_cpu_mode[2];

void __hyp_set_vectors(phys_addr_t phys_vector_base);
void __hyp_reset_vectors(void);
bool is_kvm_arm_initialised(void);

DECLARE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);

Expand Down
6 changes: 3 additions & 3 deletions arch/arm64/kvm/arch_timer.c
Original file line number Diff line number Diff line change
Expand Up @@ -827,8 +827,8 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr);
assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr);

/* This only happens on VHE, so use the CNTKCTL_EL1 accessor */
sysreg_clear_set(cntkctl_el1, clr, set);
/* This only happens on VHE, so use the CNTHCTL_EL2 accessor. */
sysreg_clear_set(cnthctl_el2, clr, set);
}

void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
Expand Down Expand Up @@ -1563,7 +1563,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
void kvm_timer_init_vhe(void)
{
if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF))
sysreg_clear_set(cntkctl_el1, 0, CNTHCTL_ECV);
sysreg_clear_set(cnthctl_el2, 0, CNTHCTL_ECV);
}

int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
Expand Down
28 changes: 24 additions & 4 deletions arch/arm64/kvm/arm.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,16 @@ DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);

DECLARE_KVM_NVHE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);

static bool vgic_present;
static bool vgic_present, kvm_arm_initialised;

static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);

bool is_kvm_arm_initialised(void)
{
return kvm_arm_initialised;
}

int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
{
return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
Expand Down Expand Up @@ -713,13 +718,15 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
*/
preempt_disable();
kvm_vgic_vmcr_sync(vcpu);
vgic_v4_put(vcpu, true);
vcpu_set_flag(vcpu, IN_WFI);
vgic_v4_put(vcpu);
preempt_enable();

kvm_vcpu_halt(vcpu);
vcpu_clear_flag(vcpu, IN_WFIT);

preempt_disable();
vcpu_clear_flag(vcpu, IN_WFI);
vgic_v4_load(vcpu);
preempt_enable();
}
Expand Down Expand Up @@ -787,7 +794,7 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_RELOAD_GICv4, vcpu)) {
/* The distributor enable bits were changed */
preempt_disable();
vgic_v4_put(vcpu, false);
vgic_v4_put(vcpu);
vgic_v4_load(vcpu);
preempt_enable();
}
Expand Down Expand Up @@ -1867,15 +1874,26 @@ static void _kvm_arch_hardware_enable(void *discard)

int kvm_arch_hardware_enable(void)
{
int was_enabled = __this_cpu_read(kvm_arm_hardware_enabled);
int was_enabled;

/*
* Most calls to this function are made with migration
* disabled, but not with preemption disabled. The former is
* enough to ensure correctness, but most of the helpers
* expect the later and will throw a tantrum otherwise.
*/
preempt_disable();

was_enabled = __this_cpu_read(kvm_arm_hardware_enabled);
_kvm_arch_hardware_enable(NULL);

if (!was_enabled) {
kvm_vgic_cpu_up();
kvm_timer_cpu_up();
}

preempt_enable();

return 0;
}

Expand Down Expand Up @@ -2482,6 +2500,8 @@ static __init int kvm_arm_init(void)
if (err)
goto out_subs;

kvm_arm_initialised = true;

return 0;

out_subs:
Expand Down
8 changes: 8 additions & 0 deletions arch/arm64/kvm/hyp/hyp-entry.S
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,12 @@ SYM_CODE_END(\label)
esb
stp x0, x1, [sp, #-16]!
662:
/*
* spectre vectors __bp_harden_hyp_vecs generate br instructions at runtime
* that jump at offset 8 at __kvm_hyp_vector.
* As hyp .text is guarded section, it needs bti j.
*/
bti j
b \target

check_preamble_length 661b, 662b
Expand All @@ -165,6 +171,8 @@ check_preamble_length 661b, 662b
nop
stp x0, x1, [sp, #-16]!
662:
/* Check valid_vect */
bti j
b \target

check_preamble_length 661b, 662b
Expand Down
10 changes: 10 additions & 0 deletions arch/arm64/kvm/hyp/nvhe/host.S
Original file line number Diff line number Diff line change
Expand Up @@ -297,3 +297,13 @@ SYM_CODE_START(__kvm_hyp_host_forward_smc)

ret
SYM_CODE_END(__kvm_hyp_host_forward_smc)

/*
* kvm_host_psci_cpu_entry is called through br instruction, which requires
* bti j instruction as compilers (gcc and llvm) doesn't insert bti j for external
* functions, but bti c instead.
*/
SYM_CODE_START(kvm_host_psci_cpu_entry)
bti j
b __kvm_host_psci_cpu_entry
SYM_CODE_END(kvm_host_psci_cpu_entry)
2 changes: 1 addition & 1 deletion arch/arm64/kvm/hyp/nvhe/psci-relay.c
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ static int psci_system_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt)
__hyp_pa(init_params), 0);
}

asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on)
asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on)
{
struct psci_boot_args *boot_args;
struct kvm_cpu_context *host_ctxt;
Expand Down
47 changes: 38 additions & 9 deletions arch/arm64/kvm/hyp/pgtable.c
Original file line number Diff line number Diff line change
Expand Up @@ -1195,25 +1195,54 @@ kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr)
return pte;
}

kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr)
struct stage2_age_data {
bool mkold;
bool young;
};

static int stage2_age_walker(const struct kvm_pgtable_visit_ctx *ctx,
enum kvm_pgtable_walk_flags visit)
{
kvm_pte_t pte = 0;
stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF,
&pte, NULL, 0);
kvm_pte_t new = ctx->old & ~KVM_PTE_LEAF_ATTR_LO_S2_AF;
struct stage2_age_data *data = ctx->arg;

if (!kvm_pte_valid(ctx->old) || new == ctx->old)
return 0;

data->young = true;

/*
* stage2_age_walker() is always called while holding the MMU lock for
* write, so this will always succeed. Nonetheless, this deliberately
* follows the race detection pattern of the other stage-2 walkers in
* case the locking mechanics of the MMU notifiers is ever changed.
*/
if (data->mkold && !stage2_try_set_pte(ctx, new))
return -EAGAIN;

/*
* "But where's the TLBI?!", you scream.
* "Over in the core code", I sigh.
*
* See the '->clear_flush_young()' callback on the KVM mmu notifier.
*/
return pte;
return 0;
}

bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr)
bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
u64 size, bool mkold)
{
kvm_pte_t pte = 0;
stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL, 0);
return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF;
struct stage2_age_data data = {
.mkold = mkold,
};
struct kvm_pgtable_walker walker = {
.cb = stage2_age_walker,
.arg = &data,
.flags = KVM_PGTABLE_WALK_LEAF,
};

WARN_ON(kvm_pgtable_walk(pgt, addr, size, &walker));
return data.young;
}

int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
Expand Down
18 changes: 8 additions & 10 deletions arch/arm64/kvm/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -1756,27 +1756,25 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
u64 size = (range->end - range->start) << PAGE_SHIFT;
kvm_pte_t kpte;
pte_t pte;

if (!kvm->arch.mmu.pgt)
return false;

WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);

kpte = kvm_pgtable_stage2_mkold(kvm->arch.mmu.pgt,
range->start << PAGE_SHIFT);
pte = __pte(kpte);
return pte_valid(pte) && pte_young(pte);
return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt,
range->start << PAGE_SHIFT,
size, true);
}

bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
u64 size = (range->end - range->start) << PAGE_SHIFT;

if (!kvm->arch.mmu.pgt)
return false;

return kvm_pgtable_stage2_is_young(kvm->arch.mmu.pgt,
range->start << PAGE_SHIFT);
return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt,
range->start << PAGE_SHIFT,
size, false);
}

phys_addr_t kvm_mmu_get_httbr(void)
Expand Down
2 changes: 1 addition & 1 deletion arch/arm64/kvm/pkvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ static int __init finalize_pkvm(void)
{
int ret;

if (!is_protected_kvm_enabled())
if (!is_protected_kvm_enabled() || !is_kvm_arm_initialised())
return 0;

/*
Expand Down
Loading

0 comments on commit 269f4a4

Please sign in to comment.