Skip to content

Commit

Permalink
Merge branch 'net-sched-do-not-drop-root-lock-in-tcf_qevent_handle'
Browse files Browse the repository at this point in the history
Petr Machata says:

====================
net: sched: Do not drop root lock in tcf_qevent_handle()

Mirred currently does not mix well with blocks executed after the qdisc
root lock is taken. This includes classification blocks (such as in PRIO,
ETS, DRR qdiscs) and qevents. The locking caused by the packet mirrored by
mirred can cause deadlocks: either when the thread of execution attempts to
take the lock a second time, or when two threads end up waiting on each
other's locks.

The qevent patchset attempted to not introduce further badness of this
sort, and dropped the lock before executing the qevent block. However this
lead to too little locking and races between qdisc configuration and packet
enqueue in the RED qdisc.

Before the deadlock issues are solved in a way that can be applied across
many qdiscs reasonably easily, do for qevents what is done for the
classification blocks and just keep holding the root lock.

That is done in patch #1. Patch #2 then drops the now unnecessary root_lock
argument from Qdisc_ops.enqueue.
====================

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Jakub Kicinski committed Jul 16, 2020
2 parents 89e35f6 + ac5c66f commit 4291dc1
Show file tree
Hide file tree
Showing 37 changed files with 77 additions and 85 deletions.
4 changes: 2 additions & 2 deletions include/net/pkt_cls.h
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,7 @@ void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch);
int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr,
struct netlink_ext_ack *extack);
struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb,
spinlock_t *root_lock, struct sk_buff **to_free, int *ret);
struct sk_buff **to_free, int *ret);
int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe);
#else
static inline int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch,
Expand All @@ -591,7 +591,7 @@ static inline int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlatt

static inline struct sk_buff *
tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb,
spinlock_t *root_lock, struct sk_buff **to_free, int *ret)
struct sk_buff **to_free, int *ret)
{
return skb;
}
Expand Down
6 changes: 2 additions & 4 deletions include/net/sch_generic.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ struct qdisc_skb_head {
struct Qdisc {
int (*enqueue)(struct sk_buff *skb,
struct Qdisc *sch,
spinlock_t *root_lock,
struct sk_buff **to_free);
struct sk_buff * (*dequeue)(struct Qdisc *sch);
unsigned int flags;
Expand Down Expand Up @@ -242,7 +241,6 @@ struct Qdisc_ops {

int (*enqueue)(struct sk_buff *skb,
struct Qdisc *sch,
spinlock_t *root_lock,
struct sk_buff **to_free);
struct sk_buff * (*dequeue)(struct Qdisc *);
struct sk_buff * (*peek)(struct Qdisc *);
Expand Down Expand Up @@ -790,11 +788,11 @@ static inline void qdisc_calculate_pkt_len(struct sk_buff *skb,
#endif
}

static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
qdisc_calculate_pkt_len(skb, sch);
return sch->enqueue(skb, sch, root_lock, to_free);
return sch->enqueue(skb, sch, to_free);
}

static inline void _bstats_update(struct gnet_stats_basic_packed *bstats,
Expand Down
4 changes: 2 additions & 2 deletions net/core/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -3749,7 +3749,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
qdisc_calculate_pkt_len(skb, q);

if (q->flags & TCQ_F_NOLOCK) {
rc = q->enqueue(skb, q, NULL, &to_free) & NET_XMIT_MASK;
rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
qdisc_run(q);

if (unlikely(to_free))
Expand Down Expand Up @@ -3792,7 +3792,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
qdisc_run_end(q);
rc = NET_XMIT_SUCCESS;
} else {
rc = q->enqueue(skb, q, root_lock, &to_free) & NET_XMIT_MASK;
rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
if (qdisc_run_begin(q)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);
Expand Down
8 changes: 1 addition & 7 deletions net/sched/cls_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -3822,7 +3822,7 @@ int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index
EXPORT_SYMBOL(tcf_qevent_validate_change);

struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb,
spinlock_t *root_lock, struct sk_buff **to_free, int *ret)
struct sk_buff **to_free, int *ret)
{
struct tcf_result cl_res;
struct tcf_proto *fl;
Expand All @@ -3832,9 +3832,6 @@ struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, stru

fl = rcu_dereference_bh(qe->filter_chain);

if (root_lock)
spin_unlock(root_lock);

switch (tcf_classify(skb, fl, &cl_res, false)) {
case TC_ACT_SHOT:
qdisc_qstats_drop(sch);
Expand All @@ -3853,9 +3850,6 @@ struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, stru
return NULL;
}

if (root_lock)
spin_lock(root_lock);

return skb;
}
EXPORT_SYMBOL(tcf_qevent_handle);
Expand Down
4 changes: 2 additions & 2 deletions net/sched/sch_atm.c
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl,

/* --------------------------- Qdisc operations ---------------------------- */

static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
struct atm_qdisc_data *p = qdisc_priv(sch);
Expand Down Expand Up @@ -432,7 +432,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *ro
#endif
}

ret = qdisc_enqueue(skb, flow->q, root_lock, to_free);
ret = qdisc_enqueue(skb, flow->q, to_free);
if (ret != NET_XMIT_SUCCESS) {
drop: __maybe_unused
if (net_xmit_drop_count(ret)) {
Expand Down
2 changes: 1 addition & 1 deletion net/sched/sch_blackhole.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#include <linux/skbuff.h>
#include <net/pkt_sched.h>

static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
qdisc_drop(skb, sch, to_free);
Expand Down
2 changes: 1 addition & 1 deletion net/sched/sch_cake.c
Original file line number Diff line number Diff line change
Expand Up @@ -1687,7 +1687,7 @@ static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t,

static void cake_reconfigure(struct Qdisc *sch);

static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
struct cake_sched_data *q = qdisc_priv(sch);
Expand Down
4 changes: 2 additions & 2 deletions net/sched/sch_cbq.c
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
}

static int
cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
struct cbq_sched_data *q = qdisc_priv(sch);
Expand All @@ -373,7 +373,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
return ret;
}

ret = qdisc_enqueue(skb, cl->q, root_lock, to_free);
ret = qdisc_enqueue(skb, cl->q, to_free);
if (ret == NET_XMIT_SUCCESS) {
sch->q.qlen++;
cbq_mark_toplevel(q, cl);
Expand Down
18 changes: 9 additions & 9 deletions net/sched/sch_cbs.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,21 +77,21 @@ struct cbs_sched_data {
s64 sendslope; /* in bytes/s */
s64 idleslope; /* in bytes/s */
struct qdisc_watchdog watchdog;
int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free);
struct sk_buff *(*dequeue)(struct Qdisc *sch);
struct Qdisc *qdisc;
struct list_head cbs_list;
};

static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct Qdisc *child, spinlock_t *root_lock,
struct Qdisc *child,
struct sk_buff **to_free)
{
unsigned int len = qdisc_pkt_len(skb);
int err;

err = child->ops->enqueue(skb, child, root_lock, to_free);
err = child->ops->enqueue(skb, child, to_free);
if (err != NET_XMIT_SUCCESS)
return err;

Expand All @@ -101,16 +101,16 @@ static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return NET_XMIT_SUCCESS;
}

static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
struct cbs_sched_data *q = qdisc_priv(sch);
struct Qdisc *qdisc = q->qdisc;

return cbs_child_enqueue(skb, sch, qdisc, root_lock, to_free);
return cbs_child_enqueue(skb, sch, qdisc, to_free);
}

static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
struct cbs_sched_data *q = qdisc_priv(sch);
Expand All @@ -124,15 +124,15 @@ static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *
q->last = ktime_get_ns();
}

return cbs_child_enqueue(skb, sch, qdisc, root_lock, to_free);
return cbs_child_enqueue(skb, sch, qdisc, to_free);
}

static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
struct cbs_sched_data *q = qdisc_priv(sch);

return q->enqueue(skb, sch, root_lock, to_free);
return q->enqueue(skb, sch, to_free);
}

/* timediff is in ns, slope is in bytes/s */
Expand Down
2 changes: 1 addition & 1 deletion net/sched/sch_choke.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ static bool choke_match_random(const struct choke_sched_data *q,
return choke_match_flow(oskb, nskb);
}

static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
struct choke_sched_data *q = qdisc_priv(sch);
Expand Down
2 changes: 1 addition & 1 deletion net/sched/sch_codel.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
return skb;
}

static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
struct codel_sched_data *q;
Expand Down
4 changes: 2 additions & 2 deletions net/sched/sch_drr.c
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
return NULL;
}

static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
unsigned int len = qdisc_pkt_len(skb);
Expand All @@ -355,7 +355,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_
}

first = !cl->qdisc->q.qlen;
err = qdisc_enqueue(skb, cl->qdisc, root_lock, to_free);
err = qdisc_enqueue(skb, cl->qdisc, to_free);
if (unlikely(err != NET_XMIT_SUCCESS)) {
if (net_xmit_drop_count(err)) {
cl->qstats.drops++;
Expand Down
4 changes: 2 additions & 2 deletions net/sched/sch_dsmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl,

/* --------------------------- Qdisc operations ---------------------------- */

static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
unsigned int len = qdisc_pkt_len(skb);
Expand Down Expand Up @@ -267,7 +267,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *ro
}
}

err = qdisc_enqueue(skb, p->q, root_lock, to_free);
err = qdisc_enqueue(skb, p->q, to_free);
if (err != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(err))
qdisc_qstats_drop(sch);
Expand Down
2 changes: 1 addition & 1 deletion net/sched/sch_etf.c
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ static void report_sock_error(struct sk_buff *skb, u32 err, u8 code)
}

static int etf_enqueue_timesortedlist(struct sk_buff *nskb, struct Qdisc *sch,
spinlock_t *root_lock, struct sk_buff **to_free)
struct sk_buff **to_free)
{
struct etf_sched_data *q = qdisc_priv(sch);
struct rb_node **p = &q->head.rb_root.rb_node, *parent = NULL;
Expand Down
4 changes: 2 additions & 2 deletions net/sched/sch_ets.c
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ static struct ets_class *ets_classify(struct sk_buff *skb, struct Qdisc *sch,
return &q->classes[band];
}

static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
unsigned int len = qdisc_pkt_len(skb);
Expand All @@ -433,7 +433,7 @@ static int ets_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t
}

first = !cl->qdisc->q.qlen;
err = qdisc_enqueue(skb, cl->qdisc, root_lock, to_free);
err = qdisc_enqueue(skb, cl->qdisc, to_free);
if (unlikely(err != NET_XMIT_SUCCESS)) {
if (net_xmit_drop_count(err)) {
cl->qstats.drops++;
Expand Down
6 changes: 3 additions & 3 deletions net/sched/sch_fifo.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

/* 1 band FIFO pseudo-"scheduler" */

static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <= sch->limit))
Expand All @@ -25,7 +25,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *roo
return qdisc_drop(skb, sch, to_free);
}

static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
if (likely(sch->q.qlen < sch->limit))
Expand All @@ -34,7 +34,7 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *roo
return qdisc_drop(skb, sch, to_free);
}

static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
unsigned int prev_backlog;
Expand Down
2 changes: 1 addition & 1 deletion net/sched/sch_fq.c
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ static bool fq_packet_beyond_horizon(const struct sk_buff *skb,
return unlikely((s64)skb->tstamp > (s64)(q->ktime_cache + q->horizon));
}

static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
struct fq_sched_data *q = qdisc_priv(sch);
Expand Down
2 changes: 1 addition & 1 deletion net/sched/sch_fq_codel.c
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets,
return idx;
}

static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
Expand Down
2 changes: 1 addition & 1 deletion net/sched/sch_fq_pie.c
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ static inline void flow_queue_add(struct fq_pie_flow *flow,
skb->next = NULL;
}

static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
struct fq_pie_sched_data *q = qdisc_priv(sch);
Expand Down
4 changes: 2 additions & 2 deletions net/sched/sch_generic.c
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,7 @@ EXPORT_SYMBOL(netif_carrier_off);
cheaper.
*/

static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, spinlock_t *root_lock,
static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
struct sk_buff **to_free)
{
__qdisc_drop(skb, to_free);
Expand Down Expand Up @@ -614,7 +614,7 @@ static inline struct skb_array *band2list(struct pfifo_fast_priv *priv,
return &priv->q[band];
}

static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, spinlock_t *root_lock,
static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
struct sk_buff **to_free)
{
int band = prio2band[skb->priority & TC_PRIO_MAX];
Expand Down
2 changes: 1 addition & 1 deletion net/sched/sch_gred.c
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ static bool gred_per_vq_red_flags_used(struct gred_sched *table)
return false;
}

static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
struct gred_sched_data *q = NULL;
Expand Down
6 changes: 3 additions & 3 deletions net/sched/sch_hfsc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1528,8 +1528,8 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
return -1;
}

static int hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root_lock,
struct sk_buff **to_free)
static int
hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
{
unsigned int len = qdisc_pkt_len(skb);
struct hfsc_class *cl;
Expand All @@ -1545,7 +1545,7 @@ static int hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, spinlock_t *root
}

first = !cl->qdisc->q.qlen;
err = qdisc_enqueue(skb, cl->qdisc, root_lock, to_free);
err = qdisc_enqueue(skb, cl->qdisc, to_free);
if (unlikely(err != NET_XMIT_SUCCESS)) {
if (net_xmit_drop_count(err)) {
cl->qstats.drops++;
Expand Down
Loading

0 comments on commit 4291dc1

Please sign in to comment.