Skip to content

Commit

Permalink
net_sched: drop packets after root qdisc lock is released
Browse files Browse the repository at this point in the history
Qdisc performance suffers when packets are dropped at enqueue()
time because drops (kfree_skb()) are done while qdisc lock is held,
delaying a dequeue() draining the queue.

Nominal throughput can be reduced by 50 % when this happens,
at a time we would like the dequeue() to proceed as fast as possible.

Even FQ is vulnerable to this problem, while one of FQ goals was
to provide some flow isolation.

This patch adds a 'struct sk_buff **to_free' parameter to all
qdisc->enqueue(), and in qdisc_drop() helper.

I measured a performance increase of up to 12 %, but this patch
is a prereq so that future batches in enqueue() can fly.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Eric Dumazet authored and David S. Miller committed Jun 25, 2016
1 parent 36195d8 commit 520ac30
Showing 28 changed files with 170 additions and 114 deletions.
41 changes: 30 additions & 11 deletions include/net/sch_generic.h
Original file line number Diff line number Diff line change
@@ -37,8 +37,10 @@ struct qdisc_size_table {
};

struct Qdisc {
int (*enqueue)(struct sk_buff *skb, struct Qdisc *dev);
struct sk_buff * (*dequeue)(struct Qdisc *dev);
int (*enqueue)(struct sk_buff *skb,
struct Qdisc *sch,
struct sk_buff **to_free);
struct sk_buff * (*dequeue)(struct Qdisc *sch);
unsigned int flags;
#define TCQ_F_BUILTIN 1
#define TCQ_F_INGRESS 2
@@ -160,7 +162,9 @@ struct Qdisc_ops {
char id[IFNAMSIZ];
int priv_size;

int (*enqueue)(struct sk_buff *, struct Qdisc *);
int (*enqueue)(struct sk_buff *skb,
struct Qdisc *sch,
struct sk_buff **to_free);
struct sk_buff * (*dequeue)(struct Qdisc *);
struct sk_buff * (*peek)(struct Qdisc *);

@@ -498,10 +502,11 @@ static inline void qdisc_calculate_pkt_len(struct sk_buff *skb,
#endif
}

static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
qdisc_calculate_pkt_len(skb, sch);
return sch->enqueue(skb, sch);
return sch->enqueue(skb, sch, to_free);
}

static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
@@ -626,24 +631,36 @@ static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch)
return __qdisc_dequeue_head(sch, &sch->q);
}

/* Instead of calling kfree_skb() while root qdisc lock is held,
* queue the skb for future freeing at end of __dev_xmit_skb()
*/
static inline void __qdisc_drop(struct sk_buff *skb, struct sk_buff **to_free)
{
skb->next = *to_free;
*to_free = skb;
}

static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch,
struct sk_buff_head *list)
struct sk_buff_head *list,
struct sk_buff **to_free)
{
struct sk_buff *skb = __skb_dequeue(list);

if (likely(skb != NULL)) {
unsigned int len = qdisc_pkt_len(skb);

qdisc_qstats_backlog_dec(sch, skb);
kfree_skb(skb);
__qdisc_drop(skb, to_free);
return len;
}

return 0;
}

static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch)
static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch,
struct sk_buff **to_free)
{
return __qdisc_queue_drop_head(sch, &sch->q);
return __qdisc_queue_drop_head(sch, &sch->q, to_free);
}

static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch)
@@ -724,9 +741,11 @@ static inline void rtnl_qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)
qdisc_qstats_drop(sch);
}

static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch)

static inline int qdisc_drop(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
kfree_skb(skb);
__qdisc_drop(skb, to_free);
qdisc_qstats_drop(sch);

return NET_XMIT_DROP;
7 changes: 5 additions & 2 deletions net/core/dev.c
Original file line number Diff line number Diff line change
@@ -3070,6 +3070,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
struct netdev_queue *txq)
{
spinlock_t *root_lock = qdisc_lock(q);
struct sk_buff *to_free = NULL;
bool contended;
int rc;

@@ -3086,7 +3087,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,

spin_lock(root_lock);
if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
kfree_skb(skb);
__qdisc_drop(skb, &to_free);
rc = NET_XMIT_DROP;
} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
qdisc_run_begin(q)) {
@@ -3109,7 +3110,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,

rc = NET_XMIT_SUCCESS;
} else {
rc = q->enqueue(skb, q) & NET_XMIT_MASK;
rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
if (qdisc_run_begin(q)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);
@@ -3119,6 +3120,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
}
}
spin_unlock(root_lock);
if (unlikely(to_free))
kfree_skb_list(to_free);
if (unlikely(contended))
spin_unlock(&q->busylock);
return rc;
9 changes: 5 additions & 4 deletions net/sched/sch_atm.c
Original file line number Diff line number Diff line change
@@ -357,7 +357,8 @@ static struct tcf_proto __rcu **atm_tc_find_tcf(struct Qdisc *sch,

/* --------------------------- Qdisc operations ---------------------------- */

static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
struct atm_flow_data *flow;
@@ -398,10 +399,10 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
switch (result) {
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
kfree_skb(skb);
__qdisc_drop(skb, to_free);
return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
case TC_ACT_SHOT:
kfree_skb(skb);
__qdisc_drop(skb, to_free);
goto drop;
case TC_ACT_RECLASSIFY:
if (flow->excess)
@@ -413,7 +414,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
#endif
}

ret = qdisc_enqueue(skb, flow->q);
ret = qdisc_enqueue(skb, flow->q, to_free);
if (ret != NET_XMIT_SUCCESS) {
drop: __maybe_unused
if (net_xmit_drop_count(ret)) {
5 changes: 3 additions & 2 deletions net/sched/sch_blackhole.c
Original file line number Diff line number Diff line change
@@ -17,9 +17,10 @@
#include <linux/skbuff.h>
#include <net/pkt_sched.h>

static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch)
static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
qdisc_drop(skb, sch);
qdisc_drop(skb, sch, to_free);
return NET_XMIT_SUCCESS;
}

7 changes: 4 additions & 3 deletions net/sched/sch_cbq.c
Original file line number Diff line number Diff line change
@@ -358,7 +358,8 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
}

static int
cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
struct cbq_sched_data *q = qdisc_priv(sch);
int uninitialized_var(ret);
@@ -370,11 +371,11 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (cl == NULL) {
if (ret & __NET_XMIT_BYPASS)
qdisc_qstats_drop(sch);
kfree_skb(skb);
__qdisc_drop(skb, to_free);
return ret;
}

ret = qdisc_enqueue(skb, cl->q);
ret = qdisc_enqueue(skb, cl->q, to_free);
if (ret == NET_XMIT_SUCCESS) {
sch->q.qlen++;
cbq_mark_toplevel(q, cl);
16 changes: 9 additions & 7 deletions net/sched/sch_choke.c
Original file line number Diff line number Diff line change
@@ -115,7 +115,8 @@ static void choke_zap_tail_holes(struct choke_sched_data *q)
}

/* Drop packet from queue array by creating a "hole" */
static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx,
struct sk_buff **to_free)
{
struct choke_sched_data *q = qdisc_priv(sch);
struct sk_buff *skb = q->tab[idx];
@@ -129,7 +130,7 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)

qdisc_qstats_backlog_dec(sch, skb);
qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
qdisc_drop(skb, sch);
qdisc_drop(skb, sch, to_free);
--sch->q.qlen;
}

@@ -261,7 +262,8 @@ static bool choke_match_random(const struct choke_sched_data *q,
return choke_match_flow(oskb, nskb);
}

static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
struct choke_sched_data *q = qdisc_priv(sch);
@@ -288,7 +290,7 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
/* Draw a packet at random from queue and compare flow */
if (choke_match_random(q, skb, &idx)) {
q->stats.matched++;
choke_drop_by_idx(sch, idx);
choke_drop_by_idx(sch, idx, to_free);
goto congestion_drop;
}

@@ -331,16 +333,16 @@ static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}

q->stats.pdrop++;
return qdisc_drop(skb, sch);
return qdisc_drop(skb, sch, to_free);

congestion_drop:
qdisc_drop(skb, sch);
qdisc_drop(skb, sch, to_free);
return NET_XMIT_CN;

other_drop:
if (ret & __NET_XMIT_BYPASS)
qdisc_qstats_drop(sch);
kfree_skb(skb);
__qdisc_drop(skb, to_free);
return ret;
}

8 changes: 5 additions & 3 deletions net/sched/sch_codel.c
Original file line number Diff line number Diff line change
@@ -82,7 +82,8 @@ static void drop_func(struct sk_buff *skb, void *ctx)
{
struct Qdisc *sch = ctx;

qdisc_drop(skb, sch);
kfree_skb(skb);
qdisc_qstats_drop(sch);
}

static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
@@ -107,7 +108,8 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
return skb;
}

static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
struct codel_sched_data *q;

@@ -117,7 +119,7 @@ static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
q = qdisc_priv(sch);
q->drop_overlimit++;
return qdisc_drop(skb, sch);
return qdisc_drop(skb, sch, to_free);
}

static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = {
7 changes: 4 additions & 3 deletions net/sched/sch_drr.c
Original file line number Diff line number Diff line change
@@ -350,7 +350,8 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
return NULL;
}

static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
struct drr_sched *q = qdisc_priv(sch);
struct drr_class *cl;
@@ -360,11 +361,11 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (cl == NULL) {
if (err & __NET_XMIT_BYPASS)
qdisc_qstats_drop(sch);
kfree_skb(skb);
__qdisc_drop(skb, to_free);
return err;
}

err = qdisc_enqueue(skb, cl->qdisc);
err = qdisc_enqueue(skb, cl->qdisc, to_free);
if (unlikely(err != NET_XMIT_SUCCESS)) {
if (net_xmit_drop_count(err)) {
cl->qstats.drops++;
9 changes: 5 additions & 4 deletions net/sched/sch_dsmark.c
Original file line number Diff line number Diff line change
@@ -191,7 +191,8 @@ static inline struct tcf_proto __rcu **dsmark_find_tcf(struct Qdisc *sch,

/* --------------------------- Qdisc operations ---------------------------- */

static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
struct dsmark_qdisc_data *p = qdisc_priv(sch);
int err;
@@ -234,7 +235,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
#ifdef CONFIG_NET_CLS_ACT
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
kfree_skb(skb);
__qdisc_drop(skb, to_free);
return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;

case TC_ACT_SHOT:
@@ -251,7 +252,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
}
}

err = qdisc_enqueue(skb, p->q);
err = qdisc_enqueue(skb, p->q, to_free);
if (err != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(err))
qdisc_qstats_drop(sch);
@@ -264,7 +265,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return NET_XMIT_SUCCESS;

drop:
qdisc_drop(skb, sch);
qdisc_drop(skb, sch, to_free);
return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
}

15 changes: 9 additions & 6 deletions net/sched/sch_fifo.c
Original file line number Diff line number Diff line change
@@ -19,29 +19,32 @@

/* 1 band FIFO pseudo-"scheduler" */

static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit))
return qdisc_enqueue_tail(skb, sch);

return qdisc_drop(skb, sch);
return qdisc_drop(skb, sch, to_free);
}

static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch)
static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
if (likely(skb_queue_len(&sch->q) < sch->limit))
return qdisc_enqueue_tail(skb, sch);

return qdisc_drop(skb, sch);
return qdisc_drop(skb, sch, to_free);
}

static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch)
static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
if (likely(skb_queue_len(&sch->q) < sch->limit))
return qdisc_enqueue_tail(skb, sch);

/* queue full, remove one skb to fulfill the limit */
__qdisc_queue_drop_head(sch, &sch->q);
__qdisc_queue_drop_head(sch, &sch->q, to_free);
qdisc_qstats_drop(sch);
qdisc_enqueue_tail(skb, sch);

Loading

0 comments on commit 520ac30

Please sign in to comment.