Skip to content

Commit

Permalink
Merge branch 'lockless-qdisc-opts'
Browse files Browse the repository at this point in the history
Yunsheng Lin says:

====================
Some optimization for lockless qdisc

Patch 1: remove unnecessary seqcount operation.
Patch 2: implement TCQ_F_CAN_BYPASS.
Patch 3: remove qdisc->empty.

Performance data for pktgen in queue_xmit mode + dummy netdev
with pfifo_fast:

 threads    unpatched           patched             delta
    1       2.60Mpps            3.21Mpps             +23%
    2       3.84Mpps            5.56Mpps             +44%
    4       5.52Mpps            5.58Mpps             +1%
    8       2.77Mpps            2.76Mpps             -0.3%
   16       2.24Mpps            2.23Mpps             -0.4%

Performance for IP forward testing: 1.05Mpps increases to
1.16Mpps, about 10% improvement.

V3: Add 'Acked-by' from Jakub and 'Tested-by' from Vladimir,
    and resend based on latest net-next.
V2: Adjust the comment and commit log according to discussion
    in V1.
V1: Drop RFC tag, add nolock_qdisc_is_empty() and do the qdisc
    empty checking without the protection of qdisc->seqlock to
    aviod doing unnecessary spin_trylock() for contention case.
RFC v4: Use STATE_MISSED and STATE_DRAINING to indicate non-empty
        qdisc, and add patch 1 and 3.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Jun 23, 2021
2 parents 38f7592 + d3e0f57 commit e940eb3
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 22 deletions.
31 changes: 18 additions & 13 deletions include/net/sch_generic.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,15 @@ enum qdisc_state_t {
__QDISC_STATE_SCHED,
__QDISC_STATE_DEACTIVATED,
__QDISC_STATE_MISSED,
__QDISC_STATE_DRAINING,
};

#define QDISC_STATE_MISSED BIT(__QDISC_STATE_MISSED)
#define QDISC_STATE_DRAINING BIT(__QDISC_STATE_DRAINING)

#define QDISC_STATE_NON_EMPTY (QDISC_STATE_MISSED | \
QDISC_STATE_DRAINING)

struct qdisc_size_table {
struct rcu_head rcu;
struct list_head list;
Expand Down Expand Up @@ -110,8 +117,6 @@ struct Qdisc {
spinlock_t busylock ____cacheline_aligned_in_smp;
spinlock_t seqlock;

/* for NOLOCK qdisc, true if there are no enqueued skbs */
bool empty;
struct rcu_head rcu;

/* private data */
Expand Down Expand Up @@ -145,6 +150,11 @@ static inline bool qdisc_is_running(struct Qdisc *qdisc)
return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
}

static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc)
{
return !(READ_ONCE(qdisc->state) & QDISC_STATE_NON_EMPTY);
}

static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
{
return q->flags & TCQ_F_CPUSTATS;
Expand All @@ -153,15 +163,15 @@ static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
{
if (qdisc_is_percpu_stats(qdisc))
return READ_ONCE(qdisc->empty);
return nolock_qdisc_is_empty(qdisc);
return !READ_ONCE(qdisc->q.qlen);
}

static inline bool qdisc_run_begin(struct Qdisc *qdisc)
{
if (qdisc->flags & TCQ_F_NOLOCK) {
if (spin_trylock(&qdisc->seqlock))
goto nolock_empty;
return true;

/* If the MISSED flag is set, it means other thread has
* set the MISSED flag before second spin_trylock(), so
Expand All @@ -183,11 +193,7 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
/* Retry again in case other CPU may not see the new flag
* after it releases the lock at the end of qdisc_run_end().
*/
if (!spin_trylock(&qdisc->seqlock))
return false;

nolock_empty:
WRITE_ONCE(qdisc->empty, false);
return spin_trylock(&qdisc->seqlock);
} else if (qdisc_is_running(qdisc)) {
return false;
}
Expand All @@ -201,15 +207,14 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)

static inline void qdisc_run_end(struct Qdisc *qdisc)
{
write_seqcount_end(&qdisc->running);
if (qdisc->flags & TCQ_F_NOLOCK) {
spin_unlock(&qdisc->seqlock);

if (unlikely(test_bit(__QDISC_STATE_MISSED,
&qdisc->state))) {
clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
&qdisc->state)))
__netif_schedule(qdisc);
}
} else {
write_seqcount_end(&qdisc->running);
}
}

Expand Down
27 changes: 25 additions & 2 deletions net/core/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -3852,10 +3852,33 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
qdisc_calculate_pkt_len(skb, q);

if (q->flags & TCQ_F_NOLOCK) {
if (q->flags & TCQ_F_CAN_BYPASS && nolock_qdisc_is_empty(q) &&
qdisc_run_begin(q)) {
/* Retest nolock_qdisc_is_empty() within the protection
* of q->seqlock to protect from racing with requeuing.
*/
if (unlikely(!nolock_qdisc_is_empty(q))) {
rc = q->enqueue(skb, q, &to_free) &
NET_XMIT_MASK;
__qdisc_run(q);
qdisc_run_end(q);

goto no_lock_out;
}

qdisc_bstats_cpu_update(q, skb);
if (sch_direct_xmit(skb, q, dev, txq, NULL, true) &&
!nolock_qdisc_is_empty(q))
__qdisc_run(q);

qdisc_run_end(q);
return NET_XMIT_SUCCESS;
}

rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
if (likely(!netif_xmit_frozen_or_stopped(txq)))
qdisc_run(q);
qdisc_run(q);

no_lock_out:
if (unlikely(to_free))
kfree_skb_list(to_free);
return rc;
Expand Down
23 changes: 16 additions & 7 deletions net/sched/sch_generic.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ static void qdisc_maybe_clear_missed(struct Qdisc *q,
*/
if (!netif_xmit_frozen_or_stopped(txq))
set_bit(__QDISC_STATE_MISSED, &q->state);
else
set_bit(__QDISC_STATE_DRAINING, &q->state);
}

/* Main transmission queue. */
Expand Down Expand Up @@ -164,9 +166,13 @@ static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)

skb = next;
}
if (lock)

if (lock) {
spin_unlock(lock);
__netif_schedule(q);
set_bit(__QDISC_STATE_MISSED, &q->state);
} else {
__netif_schedule(q);
}
}

static void try_bulk_dequeue_skb(struct Qdisc *q,
Expand Down Expand Up @@ -409,7 +415,11 @@ void __qdisc_run(struct Qdisc *q)
while (qdisc_restart(q, &packets)) {
quota -= packets;
if (quota <= 0) {
__netif_schedule(q);
if (q->flags & TCQ_F_NOLOCK)
set_bit(__QDISC_STATE_MISSED, &q->state);
else
__netif_schedule(q);

break;
}
}
Expand Down Expand Up @@ -698,13 +708,14 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
if (likely(skb)) {
qdisc_update_stats_at_dequeue(qdisc, skb);
} else if (need_retry &&
test_bit(__QDISC_STATE_MISSED, &qdisc->state)) {
READ_ONCE(qdisc->state) & QDISC_STATE_NON_EMPTY) {
/* Delay clearing the STATE_MISSED here to reduce
* the overhead of the second spin_trylock() in
* qdisc_run_begin() and __netif_schedule() calling
* in qdisc_run_end().
*/
clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);

/* Make sure dequeuing happens after clearing
* STATE_MISSED.
Expand All @@ -714,8 +725,6 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
need_retry = false;

goto retry;
} else {
WRITE_ONCE(qdisc->empty, true);
}

return skb;
Expand Down Expand Up @@ -916,7 +925,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
sch->enqueue = ops->enqueue;
sch->dequeue = ops->dequeue;
sch->dev_queue = dev_queue;
sch->empty = true;
dev_hold(dev);
refcount_set(&sch->refcnt, 1);

Expand Down Expand Up @@ -1222,6 +1230,7 @@ static void dev_reset_queue(struct net_device *dev,
spin_unlock_bh(qdisc_lock(qdisc));
if (nolock) {
clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);
spin_unlock_bh(&qdisc->seqlock);
}
}
Expand Down

0 comments on commit e940eb3

Please sign in to comment.