Skip to content

Commit

Permalink
Merge branch 'net-sched-transition-actions-to-pcpu-stats-and-rcu'
Browse files Browse the repository at this point in the history
Pedro Tammela says:

====================
net/sched: transition actions to pcpu stats and rcu

Following the work done for act_pedit[0], transition the remaining tc
actions to percpu stats and rcu, whenever possible.
Percpu stats make updating the action stats very cheap, while combining
it with rcu action parameters makes it possible to get rid of the per
action lock in the datapath.

For act_connmark and act_nat we run the following tests:
- tc filter add dev ens2f0 ingress matchall action connmark
- tc filter add dev ens2f0 ingress matchall action nat ingress any 10.10.10.10

Our setup consists of a 26 cores Intel CPU and a 25G NIC.
We use TRex to shoot 10mpps TCP packets and take perf measurements.
Both actions improved performance as expected since the datapath lock disappeared.

For act_pedit we move the drop counter to percpu, when available.
For act_gate we move the counters to percpu, when available.

[0] https://lore.kernel.org/all/20230131145149.3776656-1-pctammela@mojatatu.com/
====================

Link: https://lore.kernel.org/r/20230214211534.735718-1-pctammela@mojatatu.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
  • Loading branch information
Paolo Abeni committed Feb 16, 2023
2 parents 10d1342 + 2d2e75d commit e9ab255
Show file tree
Hide file tree
Showing 6 changed files with 148 additions and 84 deletions.
9 changes: 7 additions & 2 deletions include/net/tc_act/tc_connmark.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,15 @@

#include <net/act_api.h>

struct tcf_connmark_info {
struct tc_action common;
struct tcf_connmark_parms {
struct net *net;
u16 zone;
struct rcu_head rcu;
};

struct tcf_connmark_info {
struct tc_action common;
struct tcf_connmark_parms __rcu *parms;
};

#define to_connmark(a) ((struct tcf_connmark_info *)a)
Expand Down
10 changes: 7 additions & 3 deletions include/net/tc_act/tc_nat.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,17 @@
#include <linux/types.h>
#include <net/act_api.h>

struct tcf_nat {
struct tc_action common;

struct tcf_nat_parms {
__be32 old_addr;
__be32 new_addr;
__be32 mask;
u32 flags;
struct rcu_head rcu;
};

struct tcf_nat {
struct tc_action common;
struct tcf_nat_parms __rcu *parms;
};

#define to_tcf_nat(a) ((struct tcf_nat *)a)
Expand Down
107 changes: 68 additions & 39 deletions net/sched/act_connmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,15 @@ TC_INDIRECT_SCOPE int tcf_connmark_act(struct sk_buff *skb,
struct nf_conntrack_tuple tuple;
enum ip_conntrack_info ctinfo;
struct tcf_connmark_info *ca = to_connmark(a);
struct tcf_connmark_parms *parms;
struct nf_conntrack_zone zone;
struct nf_conn *c;
int proto;

spin_lock(&ca->tcf_lock);
tcf_lastuse_update(&ca->tcf_tm);
bstats_update(&ca->tcf_bstats, skb);
tcf_action_update_bstats(&ca->common, skb);

parms = rcu_dereference_bh(ca->parms);

switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
Expand All @@ -64,31 +66,29 @@ TC_INDIRECT_SCOPE int tcf_connmark_act(struct sk_buff *skb,
c = nf_ct_get(skb, &ctinfo);
if (c) {
skb->mark = READ_ONCE(c->mark);
/* using overlimits stats to count how many packets marked */
ca->tcf_qstats.overlimits++;
goto out;
goto count;
}

if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
proto, ca->net, &tuple))
if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, parms->net,
&tuple))
goto out;

zone.id = ca->zone;
zone.id = parms->zone;
zone.dir = NF_CT_DEFAULT_ZONE_DIR;

thash = nf_conntrack_find_get(ca->net, &zone, &tuple);
thash = nf_conntrack_find_get(parms->net, &zone, &tuple);
if (!thash)
goto out;

c = nf_ct_tuplehash_to_ctrack(thash);
/* using overlimits stats to count how many packets marked */
ca->tcf_qstats.overlimits++;
skb->mark = READ_ONCE(c->mark);
nf_ct_put(c);

count:
/* using overlimits stats to count how many packets marked */
tcf_action_inc_overlimit_qstats(&ca->common);
out:
spin_unlock(&ca->tcf_lock);
return ca->tcf_action;
return READ_ONCE(ca->tcf_action);
}

static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
Expand All @@ -101,6 +101,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, act_connmark_ops.net_id);
struct tcf_connmark_parms *nparms, *oparms;
struct nlattr *tb[TCA_CONNMARK_MAX + 1];
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct tcf_chain *goto_ch = NULL;
Expand All @@ -120,52 +121,66 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
if (!tb[TCA_CONNMARK_PARMS])
return -EINVAL;

nparms = kzalloc(sizeof(*nparms), GFP_KERNEL);
if (!nparms)
return -ENOMEM;

parm = nla_data(tb[TCA_CONNMARK_PARMS]);
index = parm->index;
ret = tcf_idr_check_alloc(tn, &index, a, bind);
if (!ret) {
ret = tcf_idr_create(tn, index, est, a,
&act_connmark_ops, bind, false, flags);
ret = tcf_idr_create_from_flags(tn, index, est, a,
&act_connmark_ops, bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
err = ret;
goto out_free;
}

ci = to_connmark(*a);
err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch,
extack);
if (err < 0)
goto release_idr;
tcf_action_set_ctrlact(*a, parm->action, goto_ch);
ci->net = net;
ci->zone = parm->zone;

nparms->net = net;
nparms->zone = parm->zone;

ret = ACT_P_CREATED;
} else if (ret > 0) {
ci = to_connmark(*a);
if (bind)
return 0;
if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
tcf_idr_release(*a, bind);
return -EEXIST;
if (bind) {
err = 0;
goto out_free;
}
err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch,
extack);
if (err < 0)
if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
err = -EEXIST;
goto release_idr;
/* replacing action and zone */
spin_lock_bh(&ci->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
ci->zone = parm->zone;
spin_unlock_bh(&ci->tcf_lock);
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
}

nparms->net = rtnl_dereference(ci->parms)->net;
nparms->zone = parm->zone;

ret = 0;
}

err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
if (err < 0)
goto release_idr;

spin_lock_bh(&ci->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
oparms = rcu_replace_pointer(ci->parms, nparms, lockdep_is_held(&ci->tcf_lock));
spin_unlock_bh(&ci->tcf_lock);

if (goto_ch)
tcf_chain_put_by_act(goto_ch);

if (oparms)
kfree_rcu(oparms, rcu);

return ret;

release_idr:
tcf_idr_release(*a, bind);
out_free:
kfree(nparms);
return err;
}

Expand All @@ -179,11 +194,14 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
.refcnt = refcount_read(&ci->tcf_refcnt) - ref,
.bindcnt = atomic_read(&ci->tcf_bindcnt) - bind,
};
struct tcf_connmark_parms *parms;
struct tcf_t t;

spin_lock_bh(&ci->tcf_lock);
parms = rcu_dereference_protected(ci->parms, lockdep_is_held(&ci->tcf_lock));

opt.action = ci->tcf_action;
opt.zone = ci->zone;
opt.zone = parms->zone;
if (nla_put(skb, TCA_CONNMARK_PARMS, sizeof(opt), &opt))
goto nla_put_failure;

Expand All @@ -201,13 +219,24 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
return -1;
}

static void tcf_connmark_cleanup(struct tc_action *a)
{
struct tcf_connmark_info *ci = to_connmark(a);
struct tcf_connmark_parms *parms;

parms = rcu_dereference_protected(ci->parms, 1);
if (parms)
kfree_rcu(parms, rcu);
}

static struct tc_action_ops act_connmark_ops = {
.kind = "connmark",
.id = TCA_ID_CONNMARK,
.owner = THIS_MODULE,
.act = tcf_connmark_act,
.dump = tcf_connmark_dump,
.init = tcf_connmark_init,
.cleanup = tcf_connmark_cleanup,
.size = sizeof(struct tcf_connmark_info),
};

Expand Down
30 changes: 16 additions & 14 deletions net/sched/act_gate.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,35 +119,37 @@ TC_INDIRECT_SCOPE int tcf_gate_act(struct sk_buff *skb,
struct tcf_result *res)
{
struct tcf_gate *gact = to_gate(a);

spin_lock(&gact->tcf_lock);
int action = READ_ONCE(gact->tcf_action);

tcf_lastuse_update(&gact->tcf_tm);
bstats_update(&gact->tcf_bstats, skb);
tcf_action_update_bstats(&gact->common, skb);

spin_lock(&gact->tcf_lock);
if (unlikely(gact->current_gate_status & GATE_ACT_PENDING)) {
spin_unlock(&gact->tcf_lock);
return gact->tcf_action;
return action;
}

if (!(gact->current_gate_status & GATE_ACT_GATE_OPEN))
if (!(gact->current_gate_status & GATE_ACT_GATE_OPEN)) {
spin_unlock(&gact->tcf_lock);
goto drop;
}

if (gact->current_max_octets >= 0) {
gact->current_entry_octets += qdisc_pkt_len(skb);
if (gact->current_entry_octets > gact->current_max_octets) {
gact->tcf_qstats.overlimits++;
goto drop;
spin_unlock(&gact->tcf_lock);
goto overlimit;
}
}

spin_unlock(&gact->tcf_lock);

return gact->tcf_action;
drop:
gact->tcf_qstats.drops++;
spin_unlock(&gact->tcf_lock);
return action;

overlimit:
tcf_action_inc_overlimit_qstats(&gact->common);
drop:
tcf_action_inc_drop_qstats(&gact->common);
return TC_ACT_SHOT;
}

Expand Down Expand Up @@ -357,8 +359,8 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
return 0;

if (!err) {
ret = tcf_idr_create(tn, index, est, a,
&act_gate_ops, bind, false, flags);
ret = tcf_idr_create_from_flags(tn, index, est, a,
&act_gate_ops, bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
Expand Down
Loading

0 comments on commit e9ab255

Please sign in to comment.