Skip to content

Commit

Permalink
net: sched: refine software bypass handling in tc_run
Browse files Browse the repository at this point in the history
This patch addresses issues with filter counting in block (tcf_block),
particularly for software bypass scenarios, by introducing a more
accurate mechanism using useswcnt.

Previously, filtercnt and skipswcnt were introduced by:

  Commit 2081fd3 ("net: sched: cls_api: add filter counter") and
  Commit f631ef3 ("net: sched: cls_api: add skip_sw counter")

  filtercnt tracked all tp (tcf_proto) objects added to a block, and
  skipswcnt counted tp objects with the skipsw attribute set.

The problem is: a single tp can contain multiple filters, some with skipsw
and others without. The current implementation fails in the case:

  When the first filter in a tp has skipsw, both skipswcnt and filtercnt
  are incremented, then adding a second filter without skipsw to the same
  tp does not modify these counters because tp->counted is already set.

  This results in bypass software behavior based solely on skipswcnt
  equaling filtercnt, even when the block includes filters without
  skipsw. Consequently, filters without skipsw are inadvertently bypassed.

To address this, the patch introduces useswcnt in block to explicitly count
tp objects containing at least one filter without skipsw. Key changes
include:

  Whenever a filter without skipsw is added, its tp is marked with usesw
  and counted in useswcnt. tc_run() now uses useswcnt to determine software
  bypass, eliminating reliance on filtercnt and skipswcnt.

  This refined approach prevents software bypass for blocks containing
  mixed filters, ensuring correct behavior in tc_run().

Additionally, as atomic operations on useswcnt ensure thread safety and
tp->lock guards access to tp->usesw and tp->counted, the broader lock
down_write(&block->cb_lock) is no longer required in tc_new_tfilter(),
and this resolves a performance regression caused by the filter counting
mechanism during parallel filter insertions.

  The improvement can be demonstrated using the following script:

  # cat insert_tc_rules.sh

    tc qdisc add dev ens1f0np0 ingress
    for i in $(seq 16); do
        taskset -c $i tc -b rules_$i.txt &
    done
    wait

  Each of rules_$i.txt files above includes 100000 tc filter rules to a
  mlx5 driver NIC ens1f0np0.

  Without this patch:

  # time sh insert_tc_rules.sh

    real    0m50.780s
    user    0m23.556s
    sys	    4m13.032s

  With this patch:

  # time sh insert_tc_rules.sh

    real    0m17.718s
    user    0m7.807s
    sys     3m45.050s

Fixes: 047f340 ("net: sched: make skip_sw actually skip software")
Reported-by: Shuang Li <shuali@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Reviewed-by: Asbjørn Sloth Tønnesen <ast@fiberby.net>
Tested-by: Asbjørn Sloth Tønnesen <ast@fiberby.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Xin Long authored and David S. Miller committed Jan 20, 2025
1 parent 59372af commit a12c76a
Show file tree
Hide file tree
Showing 8 changed files with 55 additions and 45 deletions.
13 changes: 11 additions & 2 deletions include/net/pkt_cls.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,11 +75,11 @@ static inline bool tcf_block_non_null_shared(struct tcf_block *block)
}

#ifdef CONFIG_NET_CLS_ACT
DECLARE_STATIC_KEY_FALSE(tcf_bypass_check_needed_key);
DECLARE_STATIC_KEY_FALSE(tcf_sw_enabled_key);

static inline bool tcf_block_bypass_sw(struct tcf_block *block)
{
return block && block->bypass_wanted;
return block && !atomic_read(&block->useswcnt);
}
#endif

Expand Down Expand Up @@ -760,6 +760,15 @@ tc_cls_common_offload_init(struct flow_cls_common_offload *cls_common,
cls_common->extack = extack;
}

static inline void tcf_proto_update_usesw(struct tcf_proto *tp, u32 flags)
{
if (tp->usesw)
return;
if (tc_skip_sw(flags) && tc_in_hw(flags))
return;
tp->usesw = true;
}

#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
static inline struct tc_skb_ext *tc_skb_ext_alloc(struct sk_buff *skb)
{
Expand Down
5 changes: 2 additions & 3 deletions include/net/sch_generic.h
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,7 @@ struct tcf_proto {
spinlock_t lock;
bool deleting;
bool counted;
bool usesw;
refcount_t refcnt;
struct rcu_head rcu;
struct hlist_node destroy_ht_node;
Expand Down Expand Up @@ -474,9 +475,7 @@ struct tcf_block {
struct flow_block flow_block;
struct list_head owner_list;
bool keep_dst;
bool bypass_wanted;
atomic_t filtercnt; /* Number of filters */
atomic_t skipswcnt; /* Number of skip_sw filters */
atomic_t useswcnt;
atomic_t offloadcnt; /* Number of oddloaded filters */
unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */
unsigned int lockeddevcnt; /* Number of devs that require rtnl lock. */
Expand Down
15 changes: 9 additions & 6 deletions net/core/dev.c
Original file line number Diff line number Diff line change
Expand Up @@ -2248,8 +2248,8 @@ EXPORT_SYMBOL_GPL(net_dec_egress_queue);
#endif

#ifdef CONFIG_NET_CLS_ACT
DEFINE_STATIC_KEY_FALSE(tcf_bypass_check_needed_key);
EXPORT_SYMBOL(tcf_bypass_check_needed_key);
DEFINE_STATIC_KEY_FALSE(tcf_sw_enabled_key);
EXPORT_SYMBOL(tcf_sw_enabled_key);
#endif

DEFINE_STATIC_KEY_FALSE(netstamp_needed_key);
Expand Down Expand Up @@ -4144,10 +4144,13 @@ static int tc_run(struct tcx_entry *entry, struct sk_buff *skb,
if (!miniq)
return ret;

if (static_branch_unlikely(&tcf_bypass_check_needed_key)) {
if (tcf_block_bypass_sw(miniq->block))
return ret;
}
/* Global bypass */
if (!static_branch_likely(&tcf_sw_enabled_key))
return ret;

/* Block-wise bypass */
if (tcf_block_bypass_sw(miniq->block))
return ret;

tc_skb_cb(skb)->mru = 0;
tc_skb_cb(skb)->post_ct = false;
Expand Down
57 changes: 23 additions & 34 deletions net/sched/cls_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
tp->protocol = protocol;
tp->prio = prio;
tp->chain = chain;
tp->usesw = !tp->ops->reoffload;
spin_lock_init(&tp->lock);
refcount_set(&tp->refcnt, 1);

Expand All @@ -410,39 +411,31 @@ static void tcf_proto_get(struct tcf_proto *tp)
refcount_inc(&tp->refcnt);
}

static void tcf_maintain_bypass(struct tcf_block *block)
static void tcf_proto_count_usesw(struct tcf_proto *tp, bool add)
{
int filtercnt = atomic_read(&block->filtercnt);
int skipswcnt = atomic_read(&block->skipswcnt);
bool bypass_wanted = filtercnt > 0 && filtercnt == skipswcnt;

if (bypass_wanted != block->bypass_wanted) {
#ifdef CONFIG_NET_CLS_ACT
if (bypass_wanted)
static_branch_inc(&tcf_bypass_check_needed_key);
else
static_branch_dec(&tcf_bypass_check_needed_key);
#endif
block->bypass_wanted = bypass_wanted;
struct tcf_block *block = tp->chain->block;
bool counted = false;

if (!add) {
if (tp->usesw && tp->counted) {
if (!atomic_dec_return(&block->useswcnt))
static_branch_dec(&tcf_sw_enabled_key);
tp->counted = false;
}
return;
}
}

static void tcf_block_filter_cnt_update(struct tcf_block *block, bool *counted, bool add)
{
lockdep_assert_not_held(&block->cb_lock);

down_write(&block->cb_lock);
if (*counted != add) {
if (add) {
atomic_inc(&block->filtercnt);
*counted = true;
} else {
atomic_dec(&block->filtercnt);
*counted = false;
}
spin_lock(&tp->lock);
if (tp->usesw && !tp->counted) {
counted = true;
tp->counted = true;
}
tcf_maintain_bypass(block);
up_write(&block->cb_lock);
spin_unlock(&tp->lock);

if (counted && atomic_inc_return(&block->useswcnt) == 1)
static_branch_inc(&tcf_sw_enabled_key);
#endif
}

static void tcf_chain_put(struct tcf_chain *chain);
Expand All @@ -451,7 +444,7 @@ static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
bool sig_destroy, struct netlink_ext_ack *extack)
{
tp->ops->destroy(tp, rtnl_held, extack);
tcf_block_filter_cnt_update(tp->chain->block, &tp->counted, false);
tcf_proto_count_usesw(tp, false);
if (sig_destroy)
tcf_proto_signal_destroyed(tp->chain, tp);
tcf_chain_put(tp->chain);
Expand Down Expand Up @@ -2409,7 +2402,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
tfilter_notify(net, skb, n, tp, block, q, parent, fh,
RTM_NEWTFILTER, false, rtnl_held, extack);
tfilter_put(tp, fh);
tcf_block_filter_cnt_update(block, &tp->counted, true);
tcf_proto_count_usesw(tp, true);
/* q pointer is NULL for shared blocks */
if (q)
q->flags &= ~TCQ_F_CAN_BYPASS;
Expand Down Expand Up @@ -3532,8 +3525,6 @@ static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
if (*flags & TCA_CLS_FLAGS_IN_HW)
return;
*flags |= TCA_CLS_FLAGS_IN_HW;
if (tc_skip_sw(*flags))
atomic_inc(&block->skipswcnt);
atomic_inc(&block->offloadcnt);
}

Expand All @@ -3542,8 +3533,6 @@ static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
if (!(*flags & TCA_CLS_FLAGS_IN_HW))
return;
*flags &= ~TCA_CLS_FLAGS_IN_HW;
if (tc_skip_sw(*flags))
atomic_dec(&block->skipswcnt);
atomic_dec(&block->offloadcnt);
}

Expand Down
2 changes: 2 additions & 0 deletions net/sched/cls_bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
if (!tc_in_hw(prog->gen_flags))
prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW;

tcf_proto_update_usesw(tp, prog->gen_flags);

if (oldprog) {
idr_replace(&head->handle_idr, prog, handle);
list_replace_rcu(&oldprog->link, &prog->link);
Expand Down
2 changes: 2 additions & 0 deletions net/sched/cls_flower.c
Original file line number Diff line number Diff line change
Expand Up @@ -2503,6 +2503,8 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
if (!tc_in_hw(fnew->flags))
fnew->flags |= TCA_CLS_FLAGS_NOT_IN_HW;

tcf_proto_update_usesw(tp, fnew->flags);

spin_lock(&tp->lock);

/* tp was deleted concurrently. -EAGAIN will cause caller to lookup
Expand Down
2 changes: 2 additions & 0 deletions net/sched/cls_matchall.c
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
if (!tc_in_hw(new->flags))
new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;

tcf_proto_update_usesw(tp, new->flags);

*arg = head;
rcu_assign_pointer(tp->root, new);
return 0;
Expand Down
4 changes: 4 additions & 0 deletions net/sched/cls_u32.c
Original file line number Diff line number Diff line change
Expand Up @@ -951,6 +951,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
if (!tc_in_hw(new->flags))
new->flags |= TCA_CLS_FLAGS_NOT_IN_HW;

tcf_proto_update_usesw(tp, new->flags);

u32_replace_knode(tp, tp_c, new);
tcf_unbind_filter(tp, &n->res);
tcf_exts_get_net(&n->exts);
Expand Down Expand Up @@ -1164,6 +1166,8 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
if (!tc_in_hw(n->flags))
n->flags |= TCA_CLS_FLAGS_NOT_IN_HW;

tcf_proto_update_usesw(tp, n->flags);

ins = &ht->ht[TC_U32_HASH(handle)];
for (pins = rtnl_dereference(*ins); pins;
ins = &pins->next, pins = rtnl_dereference(*ins))
Expand Down

0 comments on commit a12c76a

Please sign in to comment.