Skip to content

Commit

Permalink
Merge branch 'net_sched-reduce-the-number-of-qdisc-resets'
Browse files Browse the repository at this point in the history
Cong Wang says:

====================
net_sched: reduce the number of qdisc resets

This patchset aims to reduce the number of qdisc resets during
qdisc tear down. Patch 1~3 are preparation for their following
patches, especially patch 2 and patch 3 add a few tracepoints
so that we can observe the whole lifetime of qdisc's. Patch 4
and patch 5 are the ones do the actual work. Please find more
details in each patch description.

Vaclav Zindulka tested this patchset and his large ruleset with
over 13k qdiscs defined got from 22s to 520ms.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed May 27, 2020
2 parents b3037ac + 759ae57 commit bdad7f9
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 43 deletions.
75 changes: 75 additions & 0 deletions include/trace/events/qdisc.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#include <linux/netdevice.h>
#include <linux/tracepoint.h>
#include <linux/ftrace.h>
#include <linux/pkt_sched.h>
#include <net/sch_generic.h>

TRACE_EVENT(qdisc_dequeue,

Expand Down Expand Up @@ -44,6 +46,79 @@ TRACE_EVENT(qdisc_dequeue,
__entry->txq_state, __entry->packets, __entry->skbaddr )
);

TRACE_EVENT(qdisc_reset,

TP_PROTO(struct Qdisc *q),

TP_ARGS(q),

TP_STRUCT__entry(
__string( dev, qdisc_dev(q) )
__string( kind, q->ops->id )
__field( u32, parent )
__field( u32, handle )
),

TP_fast_assign(
__assign_str(dev, qdisc_dev(q));
__assign_str(kind, q->ops->id);
__entry->parent = q->parent;
__entry->handle = q->handle;
),

TP_printk("dev=%s kind=%s parent=%x:%x handle=%x:%x", __get_str(dev),
__get_str(kind), TC_H_MAJ(__entry->parent) >> 16, TC_H_MIN(__entry->parent),
TC_H_MAJ(__entry->handle) >> 16, TC_H_MIN(__entry->handle))
);

TRACE_EVENT(qdisc_destroy,

TP_PROTO(struct Qdisc *q),

TP_ARGS(q),

TP_STRUCT__entry(
__string( dev, qdisc_dev(q) )
__string( kind, q->ops->id )
__field( u32, parent )
__field( u32, handle )
),

TP_fast_assign(
__assign_str(dev, qdisc_dev(q));
__assign_str(kind, q->ops->id);
__entry->parent = q->parent;
__entry->handle = q->handle;
),

TP_printk("dev=%s kind=%s parent=%x:%x handle=%x:%x", __get_str(dev),
__get_str(kind), TC_H_MAJ(__entry->parent) >> 16, TC_H_MIN(__entry->parent),
TC_H_MAJ(__entry->handle) >> 16, TC_H_MIN(__entry->handle))
);

TRACE_EVENT(qdisc_create,

TP_PROTO(const struct Qdisc_ops *ops, struct net_device *dev, u32 parent),

TP_ARGS(ops, dev, parent),

TP_STRUCT__entry(
__string( dev, dev->name )
__string( kind, ops->id )
__field( u32, parent )
),

TP_fast_assign(
__assign_str(dev, dev->name);
__assign_str(kind, ops->id);
__entry->parent = parent;
),

TP_printk("dev=%s kind=%s parent=%x:%x",
__get_str(dev), __get_str(kind),
TC_H_MAJ(__entry->parent) >> 16, TC_H_MIN(__entry->parent))
);

#endif /* _TRACE_QDISC_H */

/* This part must be outside protection */
Expand Down
3 changes: 3 additions & 0 deletions net/sched/sch_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>

#include <trace/events/qdisc.h>

/*
Short review.
Expand Down Expand Up @@ -1283,6 +1285,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
}

qdisc_hash_add(sch, false);
trace_qdisc_create(ops, dev, parent);

return sch;

Expand Down
75 changes: 32 additions & 43 deletions net/sched/sch_generic.c
Original file line number Diff line number Diff line change
Expand Up @@ -896,8 +896,10 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
}
sch->parent = parentid;

if (!ops->init || ops->init(sch, NULL, extack) == 0)
if (!ops->init || ops->init(sch, NULL, extack) == 0) {
trace_qdisc_create(ops, dev_queue->dev, parentid);
return sch;
}

qdisc_put(sch);
return NULL;
Expand All @@ -911,6 +913,8 @@ void qdisc_reset(struct Qdisc *qdisc)
const struct Qdisc_ops *ops = qdisc->ops;
struct sk_buff *skb, *tmp;

trace_qdisc_reset(qdisc);

if (ops->reset)
ops->reset(qdisc);

Expand Down Expand Up @@ -949,31 +953,23 @@ static void qdisc_free_cb(struct rcu_head *head)
static void qdisc_destroy(struct Qdisc *qdisc)
{
const struct Qdisc_ops *ops = qdisc->ops;
struct sk_buff *skb, *tmp;

#ifdef CONFIG_NET_SCHED
qdisc_hash_del(qdisc);

qdisc_put_stab(rtnl_dereference(qdisc->stab));
#endif
gen_kill_estimator(&qdisc->rate_est);
if (ops->reset)
ops->reset(qdisc);

qdisc_reset(qdisc);

if (ops->destroy)
ops->destroy(qdisc);

module_put(ops->owner);
dev_put(qdisc_dev(qdisc));

skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
__skb_unlink(skb, &qdisc->gso_skb);
kfree_skb_list(skb);
}

skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
__skb_unlink(skb, &qdisc->skb_bad_txq);
kfree_skb_list(skb);
}
trace_qdisc_destroy(qdisc);

call_rcu(&qdisc->rcu, qdisc_free_cb);
}
Expand Down Expand Up @@ -1132,6 +1128,28 @@ void dev_activate(struct net_device *dev)
}
EXPORT_SYMBOL(dev_activate);

static void qdisc_deactivate(struct Qdisc *qdisc)
{
bool nolock = qdisc->flags & TCQ_F_NOLOCK;

if (qdisc->flags & TCQ_F_BUILTIN)
return;
if (test_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state))
return;

if (nolock)
spin_lock_bh(&qdisc->seqlock);
spin_lock_bh(qdisc_lock(qdisc));

set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);

qdisc_reset(qdisc);

spin_unlock_bh(qdisc_lock(qdisc));
if (nolock)
spin_unlock_bh(&qdisc->seqlock);
}

static void dev_deactivate_queue(struct net_device *dev,
struct netdev_queue *dev_queue,
void *_qdisc_default)
Expand All @@ -1141,21 +1159,8 @@ static void dev_deactivate_queue(struct net_device *dev,

qdisc = rtnl_dereference(dev_queue->qdisc);
if (qdisc) {
bool nolock = qdisc->flags & TCQ_F_NOLOCK;

if (nolock)
spin_lock_bh(&qdisc->seqlock);
spin_lock_bh(qdisc_lock(qdisc));

if (!(qdisc->flags & TCQ_F_BUILTIN))
set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);

qdisc_deactivate(qdisc);
rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
qdisc_reset(qdisc);

spin_unlock_bh(qdisc_lock(qdisc));
if (nolock)
spin_unlock_bh(&qdisc->seqlock);
}
}

Expand Down Expand Up @@ -1186,16 +1191,6 @@ static bool some_qdisc_is_busy(struct net_device *dev)
return false;
}

static void dev_qdisc_reset(struct net_device *dev,
struct netdev_queue *dev_queue,
void *none)
{
struct Qdisc *qdisc = dev_queue->qdisc_sleeping;

if (qdisc)
qdisc_reset(qdisc);
}

/**
* dev_deactivate_many - deactivate transmissions on several devices
* @head: list of devices to deactivate
Expand Down Expand Up @@ -1232,12 +1227,6 @@ void dev_deactivate_many(struct list_head *head)
*/
schedule_timeout_uninterruptible(1);
}
/* The new qdisc is assigned at this point so we can safely
* unwind stale skb lists and qdisc statistics
*/
netdev_for_each_tx_queue(dev, dev_qdisc_reset, NULL);
if (dev_ingress_queue(dev))
dev_qdisc_reset(dev, dev_ingress_queue(dev), NULL);
}
}

Expand Down

0 comments on commit bdad7f9

Please sign in to comment.