Skip to content

Commit

Permalink
pkt_sched: Manage qdisc list inside of root qdisc.
Browse files Browse the repository at this point in the history
Idea is from Patrick McHardy.

Instead of managing the list of qdiscs on the device level, manage it
in the root qdisc of a netdev_queue.  This solves all kinds of
visibility issues during qdisc destruction.

The way to iterate over all qdiscs of a netdev_queue is to visit
the netdev_queue->qdisc, and then traverse it's list.

The only special case is to ignore builting qdiscs at the root when
dumping or doing a qdisc_lookup().  That was not needed previously
because builtin qdiscs were not added to the device's qdisc_list.

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Jul 19, 2008
1 parent 72b25a9 commit 3072367
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 56 deletions.
2 changes: 0 additions & 2 deletions include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -636,8 +636,6 @@ struct net_device
unsigned int real_num_tx_queues;

unsigned long tx_queue_len; /* Max frames per queue allowed */
spinlock_t qdisc_list_lock;
struct list_head qdisc_list;

/*
* One part is mostly used on xmit path (device)
Expand Down
2 changes: 0 additions & 2 deletions net/core/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -3888,8 +3888,6 @@ int register_netdevice(struct net_device *dev)
net = dev_net(dev);

spin_lock_init(&dev->addr_list_lock);
spin_lock_init(&dev->qdisc_list_lock);
INIT_LIST_HEAD(&dev->qdisc_list);
netdev_init_queue_locks(dev);

dev->iflink = -1;
Expand Down
175 changes: 131 additions & 44 deletions net/sched/sch_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -185,11 +185,20 @@ EXPORT_SYMBOL(unregister_qdisc);

struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
{
struct Qdisc *q;
unsigned int i;

for (i = 0; i < dev->num_tx_queues; i++) {
struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
struct Qdisc *q, *txq_root = txq->qdisc;

list_for_each_entry(q, &dev->qdisc_list, list) {
if (q->handle == handle)
return q;
if (!(txq_root->flags & TCQ_F_BUILTIN) &&
txq_root->handle == handle)
return txq_root;

list_for_each_entry(q, &txq_root->list, list) {
if (q->handle == handle)
return q;
}
}
return NULL;
}
Expand Down Expand Up @@ -676,9 +685,8 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
goto err_out3;
}
}
spin_lock_bh(&dev->qdisc_list_lock);
list_add_tail(&sch->list, &dev->qdisc_list);
spin_unlock_bh(&dev->qdisc_list_lock);
if (parent)
list_add_tail(&sch->list, &dev_queue->qdisc->list);

return sch;
}
Expand Down Expand Up @@ -1037,13 +1045,57 @@ static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
return -EINVAL;
}

static bool tc_qdisc_dump_ignore(struct Qdisc *q)
{
return (q->flags & TCQ_F_BUILTIN) ? true : false;
}

static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
struct netlink_callback *cb,
int *q_idx_p, int s_q_idx)
{
int ret = 0, q_idx = *q_idx_p;
struct Qdisc *q;

if (!root)
return 0;

q = root;
if (q_idx < s_q_idx) {
q_idx++;
} else {
if (!tc_qdisc_dump_ignore(q) &&
tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
goto done;
q_idx++;
}
list_for_each_entry(q, &root->list, list) {
if (q_idx < s_q_idx) {
q_idx++;
continue;
}
if (!tc_qdisc_dump_ignore(q) &&
tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
goto done;
q_idx++;
}

out:
*q_idx_p = q_idx;
return ret;
done:
ret = -1;
goto out;
}

static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
int idx, q_idx;
int s_idx, s_q_idx;
struct net_device *dev;
struct Qdisc *q;

if (net != &init_net)
return 0;
Expand All @@ -1053,21 +1105,22 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
read_lock(&dev_base_lock);
idx = 0;
for_each_netdev(&init_net, dev) {
struct netdev_queue *dev_queue;

if (idx < s_idx)
goto cont;
if (idx > s_idx)
s_q_idx = 0;
q_idx = 0;
list_for_each_entry(q, &dev->qdisc_list, list) {
if (q_idx < s_q_idx) {
q_idx++;
continue;
}
if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
goto done;
q_idx++;
}

dev_queue = netdev_get_tx_queue(dev, 0);
if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
goto done;

dev_queue = &dev->rx_queue;
if (tc_dump_qdisc_root(dev_queue->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
goto done;

cont:
idx++;
}
Expand Down Expand Up @@ -1285,15 +1338,62 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk
a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
}

static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
struct tcmsg *tcm, struct netlink_callback *cb,
int *t_p, int s_t)
{
struct qdisc_dump_args arg;

if (tc_qdisc_dump_ignore(q) ||
*t_p < s_t || !q->ops->cl_ops ||
(tcm->tcm_parent &&
TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
(*t_p)++;
return 0;
}
if (*t_p > s_t)
memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
arg.w.fn = qdisc_class_dump;
arg.skb = skb;
arg.cb = cb;
arg.w.stop = 0;
arg.w.skip = cb->args[1];
arg.w.count = 0;
q->ops->cl_ops->walk(q, &arg.w);
cb->args[1] = arg.w.count;
if (arg.w.stop)
return -1;
(*t_p)++;
return 0;
}

static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
struct tcmsg *tcm, struct netlink_callback *cb,
int *t_p, int s_t)
{
struct Qdisc *q;

if (!root)
return 0;

if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
return -1;

list_for_each_entry(q, &root->list, list) {
if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
return -1;
}

return 0;
}

static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
{
struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
struct net *net = sock_net(skb->sk);
int t;
int s_t;
struct netdev_queue *dev_queue;
struct net_device *dev;
struct Qdisc *q;
struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
struct qdisc_dump_args arg;
int t, s_t;

if (net != &init_net)
return 0;
Expand All @@ -1306,28 +1406,15 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
s_t = cb->args[0];
t = 0;

list_for_each_entry(q, &dev->qdisc_list, list) {
if (t < s_t || !q->ops->cl_ops ||
(tcm->tcm_parent &&
TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
t++;
continue;
}
if (t > s_t)
memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
arg.w.fn = qdisc_class_dump;
arg.skb = skb;
arg.cb = cb;
arg.w.stop = 0;
arg.w.skip = cb->args[1];
arg.w.count = 0;
q->ops->cl_ops->walk(q, &arg.w);
cb->args[1] = arg.w.count;
if (arg.w.stop)
break;
t++;
}
dev_queue = netdev_get_tx_queue(dev, 0);
if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0)
goto done;

dev_queue = &dev->rx_queue;
if (tc_dump_tclass_root(dev_queue->qdisc, skb, tcm, cb, &t, s_t) < 0)
goto done;

done:
cb->args[0] = t;

dev_put(dev);
Expand Down
10 changes: 2 additions & 8 deletions net/sched/sch_generic.c
Original file line number Diff line number Diff line change
Expand Up @@ -480,15 +480,12 @@ static void __qdisc_destroy(struct rcu_head *head)

void qdisc_destroy(struct Qdisc *qdisc)
{
struct net_device *dev = qdisc_dev(qdisc);

if (qdisc->flags & TCQ_F_BUILTIN ||
!atomic_dec_and_test(&qdisc->refcnt))
return;

spin_lock_bh(&dev->qdisc_list_lock);
list_del(&qdisc->list);
spin_unlock_bh(&dev->qdisc_list_lock);
if (qdisc->parent)
list_del(&qdisc->list);

call_rcu(&qdisc->q_rcu, __qdisc_destroy);
}
Expand Down Expand Up @@ -520,9 +517,6 @@ static void attach_one_default_qdisc(struct net_device *dev,
printk(KERN_INFO "%s: activation failed\n", dev->name);
return;
}
spin_lock_bh(&dev->qdisc_list_lock);
list_add_tail(&qdisc->list, &dev->qdisc_list);
spin_unlock_bh(&dev->qdisc_list_lock);
} else {
qdisc = &noqueue_qdisc;
}
Expand Down

0 comments on commit 3072367

Please sign in to comment.