Skip to content

Commit

Permalink
Merge branch 'inet-frags-avoid-possible-races-at-netns-dismantle'
Browse files Browse the repository at this point in the history
Eric Dumazet says:

====================
inet: frags: avoid possible races at netns dismantle

This patch series fixes a race happening on netns dismantle with
frag queues. While rhashtable_free_and_destroy() is running,
concurrent timers might run inet_frag_kill() and attempt
rhashtable_remove_fast() calls. This is not allowed by
rhashtable logic.

Since I do not want to add expensive synchronize_rcu() calls
in the netns dismantle path, I had to no longer inline
netns_frags structures, but dynamically allocate them.

The ten first patches make this preparation, so that
the last patch clearly shows the fix.

As this patch series is not exactly trivial, I chose to
target 5.3. We will backport it once soaked a bit.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed May 26, 2019
2 parents ddf6ddb + 3c8fc87 commit 8fb91c3
Show file tree
Hide file tree
Showing 11 changed files with 181 additions and 167 deletions.
48 changes: 34 additions & 14 deletions include/net/inet_frag.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,22 @@

#include <linux/rhashtable-types.h>

struct netns_frags {
/* Per netns frag queues directory */
struct fqdir {
/* sysctls */
long high_thresh;
long low_thresh;
int timeout;
int max_dist;
struct inet_frags *f;
struct net *net;
bool dead;

struct rhashtable rhashtable ____cacheline_aligned_in_smp;

/* Keep atomic mem on separate cachelines in structs that include it */
atomic_long_t mem ____cacheline_aligned_in_smp;
struct rcu_work destroy_rwork;
};

/**
Expand All @@ -24,11 +28,13 @@ struct netns_frags {
* @INET_FRAG_FIRST_IN: first fragment has arrived
* @INET_FRAG_LAST_IN: final fragment has arrived
* @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction
* @INET_FRAG_HASH_DEAD: inet_frag_kill() has not removed fq from rhashtable
*/
enum {
INET_FRAG_FIRST_IN = BIT(0),
INET_FRAG_LAST_IN = BIT(1),
INET_FRAG_COMPLETE = BIT(2),
INET_FRAG_HASH_DEAD = BIT(3),
};

struct frag_v4_compare_key {
Expand Down Expand Up @@ -64,7 +70,7 @@ struct frag_v6_compare_key {
* @meat: length of received fragments so far
* @flags: fragment queue flags
* @max_size: maximum received fragment size
* @net: namespace that this frag belongs to
* @fqdir: pointer to struct fqdir
* @rcu: rcu head for freeing deferall
*/
struct inet_frag_queue {
Expand All @@ -84,7 +90,7 @@ struct inet_frag_queue {
int meat;
__u8 flags;
u16 max_size;
struct netns_frags *net;
struct fqdir *fqdir;
struct rcu_head rcu;
};

Expand All @@ -103,16 +109,30 @@ struct inet_frags {
int inet_frags_init(struct inet_frags *);
void inet_frags_fini(struct inet_frags *);

static inline int inet_frags_init_net(struct netns_frags *nf)
static inline int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f,
struct net *net)
{
atomic_long_set(&nf->mem, 0);
return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params);
struct fqdir *fqdir = kzalloc(sizeof(*fqdir), GFP_KERNEL);
int res;

if (!fqdir)
return -ENOMEM;
fqdir->f = f;
fqdir->net = net;
res = rhashtable_init(&fqdir->rhashtable, &fqdir->f->rhash_params);
if (res < 0) {
kfree(fqdir);
return res;
}
*fqdirp = fqdir;
return 0;
}
void inet_frags_exit_net(struct netns_frags *nf);

void fqdir_exit(struct fqdir *fqdir);

void inet_frag_kill(struct inet_frag_queue *q);
void inet_frag_destroy(struct inet_frag_queue *q);
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key);
struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key);

/* Free all skbs in the queue; return the sum of their truesizes. */
unsigned int inet_frag_rbtree_purge(struct rb_root *root);
Expand All @@ -125,19 +145,19 @@ static inline void inet_frag_put(struct inet_frag_queue *q)

/* Memory Tracking Functions. */

static inline long frag_mem_limit(const struct netns_frags *nf)
static inline long frag_mem_limit(const struct fqdir *fqdir)
{
return atomic_long_read(&nf->mem);
return atomic_long_read(&fqdir->mem);
}

static inline void sub_frag_mem_limit(struct netns_frags *nf, long val)
static inline void sub_frag_mem_limit(struct fqdir *fqdir, long val)
{
atomic_long_sub(val, &nf->mem);
atomic_long_sub(val, &fqdir->mem);
}

static inline void add_frag_mem_limit(struct netns_frags *nf, long val)
static inline void add_frag_mem_limit(struct fqdir *fqdir, long val)
{
atomic_long_add(val, &nf->mem);
atomic_long_add(val, &fqdir->mem);
}

/* RFC 3168 support :
Expand Down
2 changes: 1 addition & 1 deletion include/net/netns/ieee802154_6lowpan.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ struct netns_sysctl_lowpan {

struct netns_ieee802154_lowpan {
struct netns_sysctl_lowpan sysctl;
struct netns_frags frags;
struct fqdir *fqdir;
};

#endif
2 changes: 1 addition & 1 deletion include/net/netns/ipv4.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ struct netns_ipv4 {

struct inet_peer_base *peers;
struct sock * __percpu *tcp_sk;
struct netns_frags frags;
struct fqdir *fqdir;
#ifdef CONFIG_NETFILTER
struct xt_table *iptable_filter;
struct xt_table *iptable_mangle;
Expand Down
4 changes: 2 additions & 2 deletions include/net/netns/ipv6.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ struct netns_ipv6 {
struct ipv6_devconf *devconf_all;
struct ipv6_devconf *devconf_dflt;
struct inet_peer_base *peers;
struct netns_frags frags;
struct fqdir *fqdir;
#ifdef CONFIG_NETFILTER
struct xt_table *ip6table_filter;
struct xt_table *ip6table_mangle;
Expand Down Expand Up @@ -116,7 +116,7 @@ struct netns_ipv6 {

#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
struct netns_nf_frag {
struct netns_frags frags;
struct fqdir *fqdir;
};
#endif

Expand Down
36 changes: 16 additions & 20 deletions net/ieee802154/6lowpan/reassembly.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb,
key.src = *src;
key.dst = *dst;

q = inet_frag_find(&ieee802154_lowpan->frags, &key);
q = inet_frag_find(ieee802154_lowpan->fqdir, &key);
if (!q)
return NULL;

Expand Down Expand Up @@ -139,7 +139,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
fq->q.flags |= INET_FRAG_FIRST_IN;

fq->q.meat += skb->len;
add_frag_mem_limit(fq->q.net, skb->truesize);
add_frag_mem_limit(fq->q.fqdir, skb->truesize);

if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
fq->q.meat == fq->q.len) {
Expand Down Expand Up @@ -326,23 +326,18 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
static struct ctl_table lowpan_frags_ns_ctl_table[] = {
{
.procname = "6lowpanfrag_high_thresh",
.data = &init_net.ieee802154_lowpan.frags.high_thresh,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.ieee802154_lowpan.frags.low_thresh
},
{
.procname = "6lowpanfrag_low_thresh",
.data = &init_net.ieee802154_lowpan.frags.low_thresh,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
.extra2 = &init_net.ieee802154_lowpan.frags.high_thresh
},
{
.procname = "6lowpanfrag_time",
.data = &init_net.ieee802154_lowpan.frags.timeout,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
Expand Down Expand Up @@ -377,17 +372,17 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
if (table == NULL)
goto err_alloc;

table[0].data = &ieee802154_lowpan->frags.high_thresh;
table[0].extra1 = &ieee802154_lowpan->frags.low_thresh;
table[1].data = &ieee802154_lowpan->frags.low_thresh;
table[1].extra2 = &ieee802154_lowpan->frags.high_thresh;
table[2].data = &ieee802154_lowpan->frags.timeout;

/* Don't export sysctls to unprivileged users */
if (net->user_ns != &init_user_ns)
table[0].procname = NULL;
}

table[0].data = &ieee802154_lowpan->fqdir->high_thresh;
table[0].extra1 = &ieee802154_lowpan->fqdir->low_thresh;
table[1].data = &ieee802154_lowpan->fqdir->low_thresh;
table[1].extra2 = &ieee802154_lowpan->fqdir->high_thresh;
table[2].data = &ieee802154_lowpan->fqdir->timeout;

hdr = register_net_sysctl(net, "net/ieee802154/6lowpan", table);
if (hdr == NULL)
goto err_reg;
Expand Down Expand Up @@ -454,17 +449,18 @@ static int __net_init lowpan_frags_init_net(struct net *net)
net_ieee802154_lowpan(net);
int res;

ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
ieee802154_lowpan->frags.f = &lowpan_frags;

res = inet_frags_init_net(&ieee802154_lowpan->frags);
res = fqdir_init(&ieee802154_lowpan->fqdir, &lowpan_frags, net);
if (res < 0)
return res;

ieee802154_lowpan->fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
ieee802154_lowpan->fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
ieee802154_lowpan->fqdir->timeout = IPV6_FRAG_TIMEOUT;

res = lowpan_frags_ns_sysctl_register(net);
if (res < 0)
inet_frags_exit_net(&ieee802154_lowpan->frags);
fqdir_exit(ieee802154_lowpan->fqdir);
return res;
}

Expand All @@ -474,7 +470,7 @@ static void __net_exit lowpan_frags_exit_net(struct net *net)
net_ieee802154_lowpan(net);

lowpan_frags_ns_sysctl_unregister(net);
inet_frags_exit_net(&ieee802154_lowpan->frags);
fqdir_exit(ieee802154_lowpan->fqdir);
}

static struct pernet_operations lowpan_frags_ops = {
Expand Down
Loading

0 comments on commit 8fb91c3

Please sign in to comment.