Skip to content

Commit

Permalink
Merge branch 'kvm-pi-fix-lockdep' into HEAD
Browse files Browse the repository at this point in the history
  • Loading branch information
Paolo Bonzini committed Apr 4, 2025
2 parents 369348e + c0b8dca commit c77eee5
Showing 1 changed file with 30 additions and 7 deletions.
37 changes: 30 additions & 7 deletions arch/x86/kvm/vmx/posted_intr.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ static DEFINE_PER_CPU(struct list_head, wakeup_vcpus_on_cpu);
*/
static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock);

#define PI_LOCK_SCHED_OUT SINGLE_DEPTH_NESTING

static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
{
return &(to_vmx(vcpu)->pi_desc);
Expand Down Expand Up @@ -89,9 +91,20 @@ void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
* current pCPU if the task was migrated.
*/
if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR) {
raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
raw_spinlock_t *spinlock = &per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu);

/*
* In addition to taking the wakeup lock for the regular/IRQ
* context, tell lockdep it is being taken for the "sched out"
* context as well. vCPU loads happens in task context, and
* this is taking the lock of the *previous* CPU, i.e. can race
* with both the scheduler and the wakeup handler.
*/
raw_spin_lock(spinlock);
spin_acquire(&spinlock->dep_map, PI_LOCK_SCHED_OUT, 0, _RET_IP_);
list_del(&vmx->pi_wakeup_list);
raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
spin_release(&spinlock->dep_map, _RET_IP_);
raw_spin_unlock(spinlock);
}

dest = cpu_physical_id(cpu);
Expand Down Expand Up @@ -148,11 +161,23 @@ static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu)
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct pi_desc old, new;
unsigned long flags;

local_irq_save(flags);
lockdep_assert_irqs_disabled();

raw_spin_lock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
/*
* Acquire the wakeup lock using the "sched out" context to workaround
* a lockdep false positive. When this is called, schedule() holds
* various per-CPU scheduler locks. When the wakeup handler runs, it
* holds this CPU's wakeup lock while calling try_to_wake_up(), which
* can eventually take the aforementioned scheduler locks, which causes
* lockdep to assume there is deadlock.
*
* Deadlock can't actually occur because IRQs are disabled for the
* entirety of the sched_out critical section, i.e. the wakeup handler
* can't run while the scheduler locks are held.
*/
raw_spin_lock_nested(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu),
PI_LOCK_SCHED_OUT);
list_add_tail(&vmx->pi_wakeup_list,
&per_cpu(wakeup_vcpus_on_cpu, vcpu->cpu));
raw_spin_unlock(&per_cpu(wakeup_vcpus_on_cpu_lock, vcpu->cpu));
Expand All @@ -176,8 +201,6 @@ static void pi_enable_wakeup_handler(struct kvm_vcpu *vcpu)
*/
if (pi_test_on(&new))
__apic_send_IPI_self(POSTED_INTR_WAKEUP_VECTOR);

local_irq_restore(flags);
}

static bool vmx_needs_pi_wakeup(struct kvm_vcpu *vcpu)
Expand Down

0 comments on commit c77eee5

Please sign in to comment.