Skip to content

Commit

Permalink
kvm: defer huge page recovery vhost task to later
Browse files Browse the repository at this point in the history
Some libraries want to ensure they are single threaded before forking,
so making the kernel's kvm huge page recovery process a vhost task of
the user process breaks those. The minijail library used by crosvm is
one such affected application.

Defer the task to after the first VM_RUN call, which occurs after the
parent process has forked all its jailed processes. This needs to happen
only once for the kvm instance, so introduce some general-purpose
infrastructure for that, too.  It's similar in concept to pthread_once;
except it is actually usable, because the callback takes a parameter.

Cc: Sean Christopherson <seanjc@google.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Tested-by: Alyssa Ross <hi@alyssa.is>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Message-ID: <20250123153543.2769928-1-kbusch@meta.com>
[Move call_once API to include/linux. - Paolo]
Cc: stable@vger.kernel.org
Fixes: d96c77b ("KVM: x86: switch hugepage recovery thread to vhost_task")
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
  • Loading branch information
Keith Busch authored and Paolo Bonzini committed Jan 24, 2025
1 parent 86eb1ae commit 931656b
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 6 deletions.
2 changes: 2 additions & 0 deletions arch/x86/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <linux/hyperv.h>
#include <linux/kfifo.h>
#include <linux/sched/vhost_task.h>
#include <linux/call_once.h>

#include <asm/apic.h>
#include <asm/pvclock-abi.h>
Expand Down Expand Up @@ -1466,6 +1467,7 @@ struct kvm_arch {
struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter;
struct vhost_task *nx_huge_page_recovery_thread;
u64 nx_huge_page_last;
struct once nx_once;

#ifdef CONFIG_X86_64
/* The number of TDP MMU pages across all roots. */
Expand Down
18 changes: 13 additions & 5 deletions arch/x86/kvm/mmu/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -7447,20 +7447,28 @@ static bool kvm_nx_huge_page_recovery_worker(void *data)
return true;
}

int kvm_mmu_post_init_vm(struct kvm *kvm)
static void kvm_mmu_start_lpage_recovery(struct once *once)
{
if (nx_hugepage_mitigation_hard_disabled)
return 0;
struct kvm_arch *ka = container_of(once, struct kvm_arch, nx_once);
struct kvm *kvm = container_of(ka, struct kvm, arch);

kvm->arch.nx_huge_page_last = get_jiffies_64();
kvm->arch.nx_huge_page_recovery_thread = vhost_task_create(
kvm_nx_huge_page_recovery_worker, kvm_nx_huge_page_recovery_worker_kill,
kvm, "kvm-nx-lpage-recovery");

if (kvm->arch.nx_huge_page_recovery_thread)
vhost_task_start(kvm->arch.nx_huge_page_recovery_thread);
}

int kvm_mmu_post_init_vm(struct kvm *kvm)
{
if (nx_hugepage_mitigation_hard_disabled)
return 0;

call_once(&kvm->arch.nx_once, kvm_mmu_start_lpage_recovery);
if (!kvm->arch.nx_huge_page_recovery_thread)
return -ENOMEM;

vhost_task_start(kvm->arch.nx_huge_page_recovery_thread);
return 0;
}

Expand Down
7 changes: 6 additions & 1 deletion arch/x86/kvm/x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -11471,6 +11471,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
struct kvm_run *kvm_run = vcpu->run;
int r;

r = kvm_mmu_post_init_vm(vcpu->kvm);
if (r)
return r;

vcpu_load(vcpu);
kvm_sigset_activate(vcpu);
kvm_run->flags = 0;
Expand Down Expand Up @@ -12748,7 +12752,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)

int kvm_arch_post_init_vm(struct kvm *kvm)
{
return kvm_mmu_post_init_vm(kvm);
once_init(&kvm->arch.nx_once);
return 0;
}

static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
Expand Down
45 changes: 45 additions & 0 deletions include/linux/call_once.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#ifndef _LINUX_CALL_ONCE_H
#define _LINUX_CALL_ONCE_H

#include <linux/types.h>
#include <linux/mutex.h>

#define ONCE_NOT_STARTED 0
#define ONCE_RUNNING 1
#define ONCE_COMPLETED 2

struct once {
atomic_t state;
struct mutex lock;
};

static inline void __once_init(struct once *once, const char *name,
struct lock_class_key *key)
{
atomic_set(&once->state, ONCE_NOT_STARTED);
__mutex_init(&once->lock, name, key);
}

#define once_init(once) \
do { \
static struct lock_class_key __key; \
__once_init((once), #once, &__key); \
} while (0)

static inline void call_once(struct once *once, void (*cb)(struct once *))
{
/* Pairs with atomic_set_release() below. */
if (atomic_read_acquire(&once->state) == ONCE_COMPLETED)
return;

guard(mutex)(&once->lock);
WARN_ON(atomic_read(&once->state) == ONCE_RUNNING);
if (atomic_read(&once->state) != ONCE_NOT_STARTED)
return;

atomic_set(&once->state, ONCE_RUNNING);
cb(once);
atomic_set_release(&once->state, ONCE_COMPLETED);
}

#endif /* _LINUX_CALL_ONCE_H */

0 comments on commit 931656b

Please sign in to comment.