From f8f2eb9de69a1119117d198547c13d7a1123a5a9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Apr 2024 09:51:04 +0000 Subject: [PATCH 1/3] neighbour: add RCU protection to neigh_tables[] In order to remove RTNL protection from neightbl_dump_info() and neigh_dump_info() later, we need to add RCU protection to neigh_tables[]. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/neighbour.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 47d07b122f7ab..ccb770539f1a8 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1771,7 +1771,7 @@ static void neigh_parms_destroy(struct neigh_parms *parms) static struct lock_class_key neigh_table_proxy_queue_class; -static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly; +static struct neigh_table __rcu *neigh_tables[NEIGH_NR_TABLES] __read_mostly; void neigh_table_init(int index, struct neigh_table *tbl) { @@ -1828,13 +1828,19 @@ void neigh_table_init(int index, struct neigh_table *tbl) tbl->last_flush = now; tbl->last_rand = now + tbl->parms.reachable_time * 20; - neigh_tables[index] = tbl; + rcu_assign_pointer(neigh_tables[index], tbl); } EXPORT_SYMBOL(neigh_table_init); +/* + * Only called from ndisc_cleanup(), which means this is dead code + * because we no longer can unload IPv6 module. + */ int neigh_table_clear(int index, struct neigh_table *tbl) { - neigh_tables[index] = NULL; + RCU_INIT_POINTER(neigh_tables[index], NULL); + synchronize_rcu(); + /* It is not clean... Fix it to unload IPv6 module safely */ cancel_delayed_work_sync(&tbl->managed_work); cancel_delayed_work_sync(&tbl->gc_work); @@ -1866,10 +1872,10 @@ static struct neigh_table *neigh_find_table(int family) switch (family) { case AF_INET: - tbl = neigh_tables[NEIGH_ARP_TABLE]; + tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ARP_TABLE]); break; case AF_INET6: - tbl = neigh_tables[NEIGH_ND_TABLE]; + tbl = rcu_dereference_rtnl(neigh_tables[NEIGH_ND_TABLE]); break; } @@ -2333,7 +2339,7 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, ndtmsg = nlmsg_data(nlh); for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) { - tbl = neigh_tables[tidx]; + tbl = rcu_dereference_rtnl(neigh_tables[tidx]); if (!tbl) continue; if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family) @@ -2521,7 +2527,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) { struct neigh_parms *p; - tbl = neigh_tables[tidx]; + tbl = rcu_dereference_rtnl(neigh_tables[tidx]); if (!tbl) continue; @@ -2881,7 +2887,7 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; for (t = 0; t < NEIGH_NR_TABLES; t++) { - tbl = neigh_tables[t]; + tbl = rcu_dereference_rtnl(neigh_tables[t]); if (!tbl) continue; @@ -3145,14 +3151,15 @@ int neigh_xmit(int index, struct net_device *dev, const void *addr, struct sk_buff *skb) { int err = -EAFNOSUPPORT; + if (likely(index < NEIGH_NR_TABLES)) { struct neigh_table *tbl; struct neighbour *neigh; - tbl = neigh_tables[index]; - if (!tbl) - goto out; rcu_read_lock(); + tbl = rcu_dereference(neigh_tables[index]); + if (!tbl) + goto out_unlock; if (index == NEIGH_ARP_TABLE) { u32 key = *((u32 *)addr); @@ -3168,6 +3175,7 @@ int neigh_xmit(int index, struct net_device *dev, goto out_kfree_skb; } err = READ_ONCE(neigh->output)(neigh, skb); +out_unlock: rcu_read_unlock(); } else if (index == NEIGH_LINK_TABLE) { From 7e4975f7e7fb0eba3cbb69d9c467750a1c3ce131 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Apr 2024 09:51:05 +0000 Subject: [PATCH 2/3] neighbour: fix neigh_dump_info() return value Change neigh_dump_table() and pneigh_dump_table() to either return 0 or -EMSGSIZE if not enough space was available in the skb. Then neigh_dump_info() can do the same. This allows NLMSG_DONE to be appended to the current skb at the end of a dump, saving a couple of recvmsg() system calls. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/neighbour.c | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index ccb770539f1a8..d8c3ffdee29f1 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2715,7 +2715,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, { struct net *net = sock_net(skb->sk); struct neighbour *n; - int rc, h, s_h = cb->args[1]; + int err = 0, h, s_h = cb->args[1]; int idx, s_idx = idx = cb->args[2]; struct neigh_hash_table *nht; unsigned int flags = NLM_F_MULTI; @@ -2737,23 +2737,20 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || neigh_master_filtered(n->dev, filter->master_idx)) goto next; - if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - RTM_NEWNEIGH, - flags) < 0) { - rc = -1; + err = neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNEIGH, flags); + if (err < 0) goto out; - } next: idx++; } } - rc = skb->len; out: rcu_read_unlock(); cb->args[1] = h; cb->args[2] = idx; - return rc; + return err; } static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, @@ -2762,7 +2759,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, { struct pneigh_entry *n; struct net *net = sock_net(skb->sk); - int rc, h, s_h = cb->args[3]; + int err = 0, h, s_h = cb->args[3]; int idx, s_idx = idx = cb->args[4]; unsigned int flags = NLM_F_MULTI; @@ -2780,11 +2777,11 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || neigh_master_filtered(n->dev, filter->master_idx)) goto next; - if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - RTM_NEWNEIGH, flags, tbl) < 0) { + err = pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWNEIGH, flags, tbl); + if (err < 0) { read_unlock_bh(&tbl->lock); - rc = -1; goto out; } next: @@ -2793,12 +2790,10 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, } read_unlock_bh(&tbl->lock); - rc = skb->len; out: cb->args[3] = h; cb->args[4] = idx; - return rc; - + return err; } static int neigh_valid_dump_req(const struct nlmsghdr *nlh, @@ -2905,7 +2900,7 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) } cb->args[0] = t; - return skb->len; + return err; } static int neigh_valid_get_req(const struct nlmsghdr *nlh, From ba0f780694237d96a1d6366f931cd716fb0f7ab5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 18 Apr 2024 09:51:06 +0000 Subject: [PATCH 3/3] neighbour: no longer hold RTNL in neigh_dump_info() neigh_dump_table() is already relying on RCU protection. pneigh_dump_table() is using its own protection (tbl->lock) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/neighbour.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index d8c3ffdee29f1..0805c00c63d43 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2723,7 +2723,6 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, if (filter->dev_idx || filter->master_idx) flags |= NLM_F_DUMP_FILTERED; - rcu_read_lock(); nht = rcu_dereference(tbl->nht); for (h = s_h; h < (1 << nht->hash_shift); h++) { @@ -2747,7 +2746,6 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, } } out: - rcu_read_unlock(); cb->args[1] = h; cb->args[2] = idx; return err; @@ -2881,8 +2879,9 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; + rcu_read_lock(); for (t = 0; t < NEIGH_NR_TABLES; t++) { - tbl = rcu_dereference_rtnl(neigh_tables[t]); + tbl = rcu_dereference(neigh_tables[t]); if (!tbl) continue; @@ -2898,6 +2897,7 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) if (err < 0) break; } + rcu_read_unlock(); cb->args[0] = t; return err; @@ -3894,7 +3894,8 @@ static int __init neigh_init(void) { rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0); rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0); + rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, + RTNL_FLAG_DUMP_UNLOCKED); rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info, 0);