Skip to content

Commit

Permalink
workqueue: Enable unbound cpumask update on ordered workqueues
Browse files Browse the repository at this point in the history
Ordered workqueues does not currently follow changes made to the
global unbound cpumask because per-pool workqueue changes may break
the ordering guarantee. IOW, a work function in an ordered workqueue
may run on an isolated CPU.

This patch enables ordered workqueues to follow changes made to the
global unbound cpumask by temporaily plug or suspend the newly allocated
pool_workqueue from executing newly queued work items until the old
pwq has been properly drained. For ordered workqueues, there should
only be one pwq that is unplugged, the rests should be plugged.

This enables ordered workqueues to follow the unbound cpumask changes
like other unbound workqueues at the expense of some delay in execution
of work functions during the transition period.

Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
  • Loading branch information
Waiman Long authored and Tejun Heo committed Feb 8, 2024
1 parent 26fb7e3 commit 4c065db
Showing 1 changed file with 59 additions and 10 deletions.
69 changes: 59 additions & 10 deletions kernel/workqueue.c
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,7 @@ struct pool_workqueue {
int refcnt; /* L: reference count */
int nr_in_flight[WORK_NR_COLORS];
/* L: nr of in_flight works */
bool plugged; /* L: execution suspended */

/*
* nr_active management and WORK_STRUCT_INACTIVE:
Expand Down Expand Up @@ -1708,6 +1709,9 @@ static bool pwq_tryinc_nr_active(struct pool_workqueue *pwq, bool fill)
goto out;
}

if (unlikely(pwq->plugged))
return false;

/*
* Unbound workqueue uses per-node shared nr_active $nna. If @pwq is
* already waiting on $nna, pwq_dec_nr_active() will maintain the
Expand Down Expand Up @@ -1782,6 +1786,43 @@ static bool pwq_activate_first_inactive(struct pool_workqueue *pwq, bool fill)
}
}

/**
* unplug_oldest_pwq - restart an oldest plugged pool_workqueue
* @wq: workqueue_struct to be restarted
*
* pwq's are linked into wq->pwqs with the oldest first. For ordered
* workqueues, only the oldest pwq is unplugged, the others are plugged to
* suspend execution until the oldest one is drained. When this happens, the
* next oldest one (first plugged pwq in iteration) will be unplugged to
* restart work item execution to ensure proper work item ordering.
*
* dfl_pwq --------------+ [P] - plugged
* |
* v
* pwqs -> A -> B [P] -> C [P] (newest)
* | | |
* 1 3 5
* | | |
* 2 4 6
*/
static void unplug_oldest_pwq(struct workqueue_struct *wq)
{
struct pool_workqueue *pwq;

lockdep_assert_held(&wq->mutex);

/* Caller should make sure that pwqs isn't empty before calling */
pwq = list_first_entry_or_null(&wq->pwqs, struct pool_workqueue,
pwqs_node);
raw_spin_lock_irq(&pwq->pool->lock);
if (pwq->plugged) {
pwq->plugged = false;
if (pwq_activate_first_inactive(pwq, true))
kick_pool(pwq->pool);
}
raw_spin_unlock_irq(&pwq->pool->lock);
}

/**
* node_activate_pending_pwq - Activate a pending pwq on a wq_node_nr_active
* @nna: wq_node_nr_active to activate a pending pwq for
Expand Down Expand Up @@ -4740,6 +4781,13 @@ static void pwq_release_workfn(struct kthread_work *work)
mutex_lock(&wq->mutex);
list_del_rcu(&pwq->pwqs_node);
is_last = list_empty(&wq->pwqs);

/*
* For ordered workqueue with a plugged dfl_pwq, restart it now.
*/
if (!is_last && (wq->flags & __WQ_ORDERED))
unplug_oldest_pwq(wq);

mutex_unlock(&wq->mutex);
}

Expand Down Expand Up @@ -4966,6 +5014,15 @@ apply_wqattrs_prepare(struct workqueue_struct *wq,
cpumask_copy(new_attrs->__pod_cpumask, new_attrs->cpumask);
ctx->attrs = new_attrs;

/*
* For initialized ordered workqueues, there should only be one pwq
* (dfl_pwq). Set the plugged flag of ctx->dfl_pwq to suspend execution
* of newly queued work items until execution of older work items in
* the old pwq's have completed.
*/
if ((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs))
ctx->dfl_pwq->plugged = true;

ctx->wq = wq;
return ctx;

Expand Down Expand Up @@ -5006,10 +5063,6 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
if (WARN_ON(!(wq->flags & WQ_UNBOUND)))
return -EINVAL;

/* creating multiple pwqs breaks ordering guarantee */
if (!list_empty(&wq->pwqs) && WARN_ON(wq->flags & __WQ_ORDERED))
return -EINVAL;

ctx = apply_wqattrs_prepare(wq, attrs, wq_unbound_cpumask);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
Expand Down Expand Up @@ -6489,9 +6542,6 @@ static int workqueue_apply_unbound_cpumask(const cpumask_var_t unbound_cpumask)
list_for_each_entry(wq, &workqueues, list) {
if (!(wq->flags & WQ_UNBOUND) || (wq->flags & __WQ_DESTROYING))
continue;
/* creating multiple pwqs breaks ordering guarantee */
if (wq->flags & __WQ_ORDERED)
continue;

ctx = apply_wqattrs_prepare(wq, wq->unbound_attrs, unbound_cpumask);
if (IS_ERR(ctx)) {
Expand Down Expand Up @@ -7006,9 +7056,8 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
int ret;

/*
* Adjusting max_active or creating new pwqs by applying
* attributes breaks ordering guarantee. Disallow exposing ordered
* workqueues.
* Adjusting max_active breaks ordering guarantee. Disallow exposing
* ordered workqueues.
*/
if (WARN_ON(wq->flags & __WQ_ORDERED))
return -EINVAL;
Expand Down

0 comments on commit 4c065db

Please sign in to comment.