Skip to content

Commit

Permalink
net: bridge: convert multicast to generic rhashtable
Browse files Browse the repository at this point in the history
The bridge multicast code currently uses a custom resizable hashtable
which predates the generic rhashtable interface. It has many
shortcomings compared and duplicates functionality that is presently
available via the generic rhashtable, so this patch removes the custom
rhashtable implementation in favor of the kernel's generic rhashtable.
The hash maximum is kept and the rhashtable's size is used to do a loose
check if it's reached in which case we revert to the old behaviour and
disable further bridge multicast processing. Also now we can support any
hash maximum, doesn't need to be a power of 2.

v3: add non-rcu br_mdb_get variant and use it where multicast_lock is
    held to avoid RCU splat, drop hash_max function and just set it
    directly

v2: handle when IGMP snooping is undefined, add br_mdb_init/uninit
    placeholders

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Nikolay Aleksandrov authored and David S. Miller committed Dec 6, 2018
1 parent ba5dfaf commit 19e3a9c
Show file tree
Hide file tree
Showing 6 changed files with 163 additions and 436 deletions.
10 changes: 10 additions & 0 deletions net/bridge/br_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,17 @@ static int br_dev_init(struct net_device *dev)
return err;
}

err = br_mdb_hash_init(br);
if (err) {
free_percpu(br->stats);
br_fdb_hash_fini(br);
return err;
}

err = br_vlan_init(br);
if (err) {
free_percpu(br->stats);
br_mdb_hash_fini(br);
br_fdb_hash_fini(br);
return err;
}
Expand All @@ -142,6 +150,7 @@ static int br_dev_init(struct net_device *dev)
if (err) {
free_percpu(br->stats);
br_vlan_flush(br);
br_mdb_hash_fini(br);
br_fdb_hash_fini(br);
}
br_set_lockdep_class(dev);
Expand All @@ -156,6 +165,7 @@ static void br_dev_uninit(struct net_device *dev)
br_multicast_dev_del(br);
br_multicast_uninit_stats(br);
br_vlan_flush(br);
br_mdb_hash_fini(br);
br_fdb_hash_fini(br);
free_percpu(br->stats);
}
Expand Down
120 changes: 51 additions & 69 deletions net/bridge/br_mdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,82 +78,72 @@ static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip)
static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb,
struct net_device *dev)
{
int idx = 0, s_idx = cb->args[1], err = 0;
struct net_bridge *br = netdev_priv(dev);
struct net_bridge_mdb_htable *mdb;
struct net_bridge_mdb_entry *mp;
struct nlattr *nest, *nest2;
int i, err = 0;
int idx = 0, s_idx = cb->args[1];

if (!br_opt_get(br, BROPT_MULTICAST_ENABLED))
return 0;

mdb = rcu_dereference(br->mdb);
if (!mdb)
return 0;

nest = nla_nest_start(skb, MDBA_MDB);
if (nest == NULL)
return -EMSGSIZE;

for (i = 0; i < mdb->max; i++) {
struct net_bridge_mdb_entry *mp;
hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) {
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
struct net_bridge_port *port;

hlist_for_each_entry_rcu(mp, &mdb->mhash[i], hlist[mdb->ver]) {
if (idx < s_idx)
goto skip;
if (idx < s_idx)
goto skip;

nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY);
if (nest2 == NULL) {
err = -EMSGSIZE;
goto out;
}
nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY);
if (!nest2) {
err = -EMSGSIZE;
break;
}

for (pp = &mp->ports;
(p = rcu_dereference(*pp)) != NULL;
pp = &p->next) {
struct nlattr *nest_ent;
struct br_mdb_entry e;

port = p->port;
if (!port)
continue;

memset(&e, 0, sizeof(e));
e.ifindex = port->dev->ifindex;
e.vid = p->addr.vid;
__mdb_entry_fill_flags(&e, p->flags);
if (p->addr.proto == htons(ETH_P_IP))
e.addr.u.ip4 = p->addr.u.ip4;
for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL;
pp = &p->next) {
struct nlattr *nest_ent;
struct br_mdb_entry e;

port = p->port;
if (!port)
continue;

memset(&e, 0, sizeof(e));
e.ifindex = port->dev->ifindex;
e.vid = p->addr.vid;
__mdb_entry_fill_flags(&e, p->flags);
if (p->addr.proto == htons(ETH_P_IP))
e.addr.u.ip4 = p->addr.u.ip4;
#if IS_ENABLED(CONFIG_IPV6)
if (p->addr.proto == htons(ETH_P_IPV6))
e.addr.u.ip6 = p->addr.u.ip6;
if (p->addr.proto == htons(ETH_P_IPV6))
e.addr.u.ip6 = p->addr.u.ip6;
#endif
e.addr.proto = p->addr.proto;
nest_ent = nla_nest_start(skb,
MDBA_MDB_ENTRY_INFO);
if (!nest_ent) {
nla_nest_cancel(skb, nest2);
err = -EMSGSIZE;
goto out;
}
if (nla_put_nohdr(skb, sizeof(e), &e) ||
nla_put_u32(skb,
MDBA_MDB_EATTR_TIMER,
br_timer_value(&p->timer))) {
nla_nest_cancel(skb, nest_ent);
nla_nest_cancel(skb, nest2);
err = -EMSGSIZE;
goto out;
}
nla_nest_end(skb, nest_ent);
e.addr.proto = p->addr.proto;
nest_ent = nla_nest_start(skb, MDBA_MDB_ENTRY_INFO);
if (!nest_ent) {
nla_nest_cancel(skb, nest2);
err = -EMSGSIZE;
goto out;
}
nla_nest_end(skb, nest2);
skip:
idx++;
if (nla_put_nohdr(skb, sizeof(e), &e) ||
nla_put_u32(skb,
MDBA_MDB_EATTR_TIMER,
br_timer_value(&p->timer))) {
nla_nest_cancel(skb, nest_ent);
nla_nest_cancel(skb, nest2);
err = -EMSGSIZE;
goto out;
}
nla_nest_end(skb, nest_ent);
}
nla_nest_end(skb, nest2);
skip:
idx++;
}

out:
Expand Down Expand Up @@ -203,8 +193,7 @@ static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)

rcu_read_lock();

/* In theory this could be wrapped to 0... */
cb->seq = net->dev_base_seq + br_mdb_rehash_seq;
cb->seq = net->dev_base_seq;

for_each_netdev_rcu(net, dev) {
if (dev->priv_flags & IFF_EBRIDGE) {
Expand Down Expand Up @@ -297,7 +286,6 @@ static void br_mdb_complete(struct net_device *dev, int err, void *priv)
struct br_mdb_complete_info *data = priv;
struct net_bridge_port_group __rcu **pp;
struct net_bridge_port_group *p;
struct net_bridge_mdb_htable *mdb;
struct net_bridge_mdb_entry *mp;
struct net_bridge_port *port = data->port;
struct net_bridge *br = port->br;
Expand All @@ -306,8 +294,7 @@ static void br_mdb_complete(struct net_device *dev, int err, void *priv)
goto err;

spin_lock_bh(&br->multicast_lock);
mdb = mlock_dereference(br->mdb, br);
mp = br_mdb_ip_get(mdb, &data->ip);
mp = br_mdb_ip_get(br, &data->ip);
if (!mp)
goto out;
for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL;
Expand Down Expand Up @@ -588,14 +575,12 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
struct net_bridge_mdb_htable *mdb;
unsigned long now = jiffies;
int err;

mdb = mlock_dereference(br->mdb, br);
mp = br_mdb_ip_get(mdb, group);
mp = br_mdb_ip_get(br, group);
if (!mp) {
mp = br_multicast_new_group(br, port, group);
mp = br_multicast_new_group(br, group);
err = PTR_ERR_OR_ZERO(mp);
if (err)
return err;
Expand Down Expand Up @@ -696,7 +681,6 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh,

static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
{
struct net_bridge_mdb_htable *mdb;
struct net_bridge_mdb_entry *mp;
struct net_bridge_port_group *p;
struct net_bridge_port_group __rcu **pp;
Expand All @@ -709,9 +693,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
__mdb_entry_to_br_ip(entry, &ip);

spin_lock_bh(&br->multicast_lock);
mdb = mlock_dereference(br->mdb, br);

mp = br_mdb_ip_get(mdb, &ip);
mp = br_mdb_ip_get(br, &ip);
if (!mp)
goto unlock;

Expand Down
Loading

0 comments on commit 19e3a9c

Please sign in to comment.