Skip to content

Commit

Permalink
Merge branch 'act_bpf_lockless'
Browse files Browse the repository at this point in the history
Alexei Starovoitov says:

====================
act_bpf: remove spinlock in fast path

v1 version had a race condition in cleanup path of bpf_prog.
I tried to fix it by adding new callback 'cleanup_rcu' to 'struct tcf_common'
and call it out of act_api cleanup path, but Daniel noticed
(thanks for the idea!) that most of the classifiers already do action cleanup
out of rcu callback.
So instead this set of patches converts tcindex and rsvp classifiers to call
tcf_exts_destroy() after rcu grace period and since action cleanup logic
in __tcf_hash_release() is only called when bind and refcnt goes to zero,
it's guaranteed that cleanup() callback is called from rcu callback.
More specifically:
patches 1 and 2 - simple fixes
patches 2 and 3 - convert tcf_exts_destroy in tcindex and rsvp to call_rcu
patch 5 - removes spin_lock from act_bpf

The cleanup of actions is now universally done after rcu grace period
and in the future we can drop (now unnecessary) call_rcu from tcf_hash_destroy()
patch 5 is using synchronize_rcu() in act_bpf replacement path, since it's
very rare and alternative of dynamically allocating 'struct tcf_bpf_cfg' just
to pass it to call_rcu looks even less appealing.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Aug 26, 2015
2 parents dc8242f + cff8245 commit 8c5bbe7
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 30 deletions.
1 change: 0 additions & 1 deletion include/net/act_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,6 @@ struct tc_action_ops {
};

int tcf_hash_search(struct tc_action *a, u32 index);
void tcf_hash_destroy(struct tc_action *a);
u32 tcf_hash_new_index(struct tcf_hashinfo *hinfo);
int tcf_hash_check(u32 index, struct tc_action *a, int bind);
int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a,
Expand Down
2 changes: 1 addition & 1 deletion include/net/tc_act/tc_bpf.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

struct tcf_bpf {
struct tcf_common common;
struct bpf_prog *filter;
struct bpf_prog __rcu *filter;
union {
u32 bpf_fd;
u16 bpf_num_ops;
Expand Down
3 changes: 1 addition & 2 deletions net/sched/act_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ static void free_tcf(struct rcu_head *head)
kfree(p);
}

void tcf_hash_destroy(struct tc_action *a)
static void tcf_hash_destroy(struct tc_action *a)
{
struct tcf_common *p = a->priv;
struct tcf_hashinfo *hinfo = a->ops->hinfo;
Expand All @@ -52,7 +52,6 @@ void tcf_hash_destroy(struct tc_action *a)
*/
call_rcu(&p->tcfc_rcu, free_tcf);
}
EXPORT_SYMBOL(tcf_hash_destroy);

int __tcf_hash_release(struct tc_action *a, bool bind, bool strict)
{
Expand Down
38 changes: 20 additions & 18 deletions net/sched/act_bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,25 +37,24 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
struct tcf_result *res)
{
struct tcf_bpf *prog = act->priv;
struct bpf_prog *filter;
int action, filter_res;
bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;

if (unlikely(!skb_mac_header_was_set(skb)))
return TC_ACT_UNSPEC;

spin_lock(&prog->tcf_lock);

prog->tcf_tm.lastuse = jiffies;
bstats_update(&prog->tcf_bstats, skb);
tcf_lastuse_update(&prog->tcf_tm);
bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb);

/* Needed here for accessing maps. */
rcu_read_lock();
filter = rcu_dereference(prog->filter);
if (at_ingress) {
__skb_push(skb, skb->mac_len);
filter_res = BPF_PROG_RUN(prog->filter, skb);
filter_res = BPF_PROG_RUN(filter, skb);
__skb_pull(skb, skb->mac_len);
} else {
filter_res = BPF_PROG_RUN(prog->filter, skb);
filter_res = BPF_PROG_RUN(filter, skb);
}
rcu_read_unlock();

Expand All @@ -77,7 +76,7 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
break;
case TC_ACT_SHOT:
action = filter_res;
prog->tcf_qstats.drops++;
qstats_drop_inc(this_cpu_ptr(prog->common.cpu_qstats));
break;
case TC_ACT_UNSPEC:
action = prog->tcf_action;
Expand All @@ -87,7 +86,6 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
break;
}

spin_unlock(&prog->tcf_lock);
return action;
}

Expand Down Expand Up @@ -263,7 +261,10 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
struct tcf_bpf_cfg *cfg)
{
cfg->is_ebpf = tcf_bpf_is_ebpf(prog);
cfg->filter = prog->filter;
/* updates to prog->filter are prevented, since it's called either
* with rtnl lock or during final cleanup in rcu callback
*/
cfg->filter = rcu_dereference_protected(prog->filter, 1);

cfg->bpf_ops = prog->bpf_ops;
cfg->bpf_name = prog->bpf_name;
Expand Down Expand Up @@ -294,7 +295,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,

if (!tcf_hash_check(parm->index, act, bind)) {
ret = tcf_hash_create(parm->index, est, act,
sizeof(*prog), bind, false);
sizeof(*prog), bind, true);
if (ret < 0)
return ret;

Expand Down Expand Up @@ -325,9 +326,9 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
goto out;

prog = to_bpf(act);
spin_lock_bh(&prog->tcf_lock);
ASSERT_RTNL();

if (ret != ACT_P_CREATED)
if (res != ACT_P_CREATED)
tcf_bpf_prog_fill_cfg(prog, &old);

prog->bpf_ops = cfg.bpf_ops;
Expand All @@ -339,14 +340,15 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
prog->bpf_fd = cfg.bpf_fd;

prog->tcf_action = parm->action;
prog->filter = cfg.filter;

spin_unlock_bh(&prog->tcf_lock);
rcu_assign_pointer(prog->filter, cfg.filter);

if (res == ACT_P_CREATED)
if (res == ACT_P_CREATED) {
tcf_hash_insert(act);
else
} else {
/* make sure the program being replaced is no longer executing */
synchronize_rcu();
tcf_bpf_cfg_cleanup(&old);
}

return res;
out:
Expand Down
18 changes: 14 additions & 4 deletions net/sched/cls_rsvp.h
Original file line number Diff line number Diff line change
Expand Up @@ -283,12 +283,22 @@ static int rsvp_init(struct tcf_proto *tp)
return -ENOBUFS;
}

static void
rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
static void rsvp_delete_filter_rcu(struct rcu_head *head)
{
tcf_unbind_filter(tp, &f->res);
struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu);

tcf_exts_destroy(&f->exts);
kfree_rcu(f, rcu);
kfree(f);
}

static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
{
tcf_unbind_filter(tp, &f->res);
/* all classifiers are required to call tcf_exts_destroy() after rcu
* grace period, since converted-to-rcu actions are relying on that
* in cleanup() callback
*/
call_rcu(&f->rcu, rsvp_delete_filter_rcu);
}

static bool rsvp_destroy(struct tcf_proto *tp, bool force)
Expand Down
29 changes: 25 additions & 4 deletions net/sched/cls_tcindex.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
struct tcindex_filter_result {
struct tcf_exts exts;
struct tcf_result res;
struct rcu_head rcu;
};

struct tcindex_filter {
Expand Down Expand Up @@ -133,8 +134,23 @@ static int tcindex_init(struct tcf_proto *tp)
return 0;
}

static int
tcindex_delete(struct tcf_proto *tp, unsigned long arg)
static void tcindex_destroy_rexts(struct rcu_head *head)
{
struct tcindex_filter_result *r;

r = container_of(head, struct tcindex_filter_result, rcu);
tcf_exts_destroy(&r->exts);
}

static void tcindex_destroy_fexts(struct rcu_head *head)
{
struct tcindex_filter *f = container_of(head, struct tcindex_filter, rcu);

tcf_exts_destroy(&f->result.exts);
kfree(f);
}

static int tcindex_delete(struct tcf_proto *tp, unsigned long arg)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
Expand Down Expand Up @@ -162,9 +178,14 @@ tcindex_delete(struct tcf_proto *tp, unsigned long arg)
rcu_assign_pointer(*walk, rtnl_dereference(f->next));
}
tcf_unbind_filter(tp, &r->res);
tcf_exts_destroy(&r->exts);
/* all classifiers are required to call tcf_exts_destroy() after rcu
* grace period, since converted-to-rcu actions are relying on that
* in cleanup() callback
*/
if (f)
kfree_rcu(f, rcu);
call_rcu(&f->rcu, tcindex_destroy_fexts);
else
call_rcu(&r->rcu, tcindex_destroy_rexts);
return 0;
}

Expand Down

0 comments on commit 8c5bbe7

Please sign in to comment.