Skip to content

Commit

Permalink
powerpc/qspinlock: allow propagation of yield CPU down the queue
Browse files Browse the repository at this point in the history
Having all CPUs poll the lock word for the owner CPU that should be
yielded to defeats most of the purpose of using MCS queueing for
scalability. Yet it may be desirable for queued waiters to yield to a
preempted owner.

With this change, queue waiters never sample the owner CPU directly from
the lock word. The queue head (which is spinning on the lock) propagates
the owner CPU back to the next waiter if it finds the owner has been
preempted. That waiter then propagates the owner CPU back to the next
waiter, and so on.

s390 addresses this problem differenty, by having queued waiters sample
the lock word to find the owner at a low frequency. That has the
advantage of being simpler, the advantage of propagation is that the
lock word never has to be accesed by queued waiters, and the transfer of
cache lines to transmit the owner data is only required when lock holder
vCPU preemption occurs.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221126095932.1234527-11-npiggin@gmail.com
  • Loading branch information
Nicholas Piggin authored and Michael Ellerman committed Dec 2, 2022
1 parent b4c3cdc commit 28db61e
Showing 1 changed file with 79 additions and 0 deletions.
79 changes: 79 additions & 0 deletions arch/powerpc/lib/qspinlock.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
struct qnode {
struct qnode *next;
struct qspinlock *lock;
int yield_cpu;
u8 locked; /* 1 if lock acquired */
};

Expand All @@ -28,6 +29,7 @@ static int head_spins __read_mostly = (1 << 8);
static bool pv_yield_owner __read_mostly = true;
static bool pv_yield_allow_steal __read_mostly = false;
static bool pv_yield_prev __read_mostly = true;
static bool pv_yield_propagate_owner __read_mostly = true;

static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);

Expand Down Expand Up @@ -232,14 +234,67 @@ static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u
__yield_to_locked_owner(lock, val, paravirt, mustq);
}

static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt)
{
struct qnode *next;
int owner;

if (!paravirt)
return;
if (!pv_yield_propagate_owner)
return;

owner = get_owner_cpu(val);
if (*set_yield_cpu == owner)
return;

next = READ_ONCE(node->next);
if (!next)
return;

if (vcpu_is_preempted(owner)) {
next->yield_cpu = owner;
*set_yield_cpu = owner;
} else if (*set_yield_cpu != -1) {
next->yield_cpu = owner;
*set_yield_cpu = owner;
}
}

static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt)
{
int prev_cpu = decode_tail_cpu(val);
u32 yield_count;
int yield_cpu;

if (!paravirt)
goto relax;

if (!pv_yield_propagate_owner)
goto yield_prev;

yield_cpu = READ_ONCE(node->yield_cpu);
if (yield_cpu == -1) {
/* Propagate back the -1 CPU */
if (node->next && node->next->yield_cpu != -1)
node->next->yield_cpu = yield_cpu;
goto yield_prev;
}

yield_count = yield_count_of(yield_cpu);
if ((yield_count & 1) == 0)
goto yield_prev; /* owner vcpu is running */

smp_rmb();

if (yield_cpu == node->yield_cpu) {
if (node->next && node->next->yield_cpu != yield_cpu)
node->next->yield_cpu = yield_cpu;
yield_to_preempted(yield_cpu, yield_count);
return;
}

yield_prev:
if (!pv_yield_prev)
goto relax;

Expand Down Expand Up @@ -293,6 +348,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
u32 val, old, tail;
bool mustq = false;
int idx;
int set_yield_cpu = -1;
int iters = 0;

BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
Expand All @@ -314,6 +370,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
node = &qnodesp->nodes[idx];
node->next = NULL;
node->lock = lock;
node->yield_cpu = -1;
node->locked = 0;

tail = encode_tail_cpu(smp_processor_id());
Expand All @@ -334,6 +391,10 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
while (!node->locked)
yield_to_prev(lock, node, old, paravirt);

/* Clear out stale propagated yield_cpu */
if (paravirt && pv_yield_propagate_owner && node->yield_cpu != -1)
node->yield_cpu = -1;

smp_rmb(); /* acquire barrier for the mcs lock */
}

Expand All @@ -344,6 +405,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
if (!(val & _Q_LOCKED_VAL))
break;

propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
yield_head_to_locked_owner(lock, val, paravirt);
if (!maybe_stealers)
continue;
Expand Down Expand Up @@ -512,6 +574,22 @@ static int pv_yield_prev_get(void *data, u64 *val)

DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");

static int pv_yield_propagate_owner_set(void *data, u64 val)
{
pv_yield_propagate_owner = !!val;

return 0;
}

static int pv_yield_propagate_owner_get(void *data, u64 *val)
{
*val = pv_yield_propagate_owner;

return 0;
}

DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_propagate_owner, pv_yield_propagate_owner_get, pv_yield_propagate_owner_set, "%llu\n");

static __init int spinlock_debugfs_init(void)
{
debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
Expand All @@ -520,6 +598,7 @@ static __init int spinlock_debugfs_init(void)
debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner);
}

return 0;
Expand Down

0 comments on commit 28db61e

Please sign in to comment.