From b4454bc6a0fbf2f9edcddd08862175085b990856 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean <vladimir.oltean@nxp.com> Date: Wed, 28 Jul 2021 21:27:47 +0300 Subject: [PATCH 1/2] net: bridge: switchdev: replay the entire FDB for each port Currently when a switchdev port joins a bridge, we replay all FDB entries pointing towards that port or towards the bridge. However, this is insufficient in certain situations: (a) DSA, through its assisted_learning_on_cpu_port logic, snoops dynamically learned FDB entries on foreign interfaces. These are FDB entries that are pointing neither towards the newly joined switchdev port, nor towards the bridge. So these addresses would be missed when joining a bridge where a foreign interface has already learned some addresses, and they would also linger on if the DSA port leaves the bridge before the foreign interface forgets them. None of this happens if we replay the entire FDB when the port joins. (b) There is a desire to treat local FDB entries on a port (i.e. the port's termination MAC address) identically to FDB entries pointing towards the bridge itself. More details on the reason behind this in the next patch. The point is that this cannot be done given the current structure of br_fdb_replay() in this situation: ip link set swp0 master br0 # br0 inherits its MAC address from swp0 ip link set swp1 master br0 What is desirable is that when swp1 joins the bridge, br_fdb_replay() also notifies swp1 of br0's MAC address, but this won't in fact happen because the MAC address of br0 does not have fdb->dst == NULL (it doesn't point towards the bridge), but it has fdb->dst == swp0. So our current logic makes it impossible for that address to be replayed. But if we dump the entire FDB instead of just the entries with fdb->dst == swp1 and fdb->dst == NULL, then the inherited MAC address of br0 will be replayed too, which is what we need. A natural question arises: say there is an FDB entry to be replayed, like a MAC address dynamically learned on a foreign interface that belongs to a bridge where no switchdev port has joined yet. If 10 switchdev ports belonging to the same driver join this bridge, one by one, won't every port get notified 10 times of the foreign FDB entry, amounting to a total of 100 notifications for this FDB entry in the switchdev driver? Well, yes, but this is where the "void *ctx" argument for br_fdb_replay is useful: every port of the switchdev driver is notified whenever any other port requests an FDB replay, but because the replay was initiated by a different port, its context is different from the initiating port's context, so it ignores those replays. So the foreign FDB entry will be installed only 10 times, once per port. This is done so that the following 4 code paths are always well balanced: (a) addition of foreign FDB entry is replayed when port joins bridge (b) deletion of foreign FDB entry is replayed when port leaves bridge (c) addition of foreign FDB entry is notified to all ports currently in bridge (c) deletion of foreign FDB entry is notified to all ports currently in bridge Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/bridge/br_fdb.c | 23 +++++++---------------- net/bridge/br_private.h | 4 ++-- net/bridge/br_switchdev.c | 14 ++------------ 3 files changed, 11 insertions(+), 30 deletions(-) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 5b345bb720785..be75889ceeba5 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -732,11 +732,12 @@ static inline size_t fdb_nlmsg_size(void) + nla_total_size(sizeof(u8)); /* NFEA_ACTIVITY_NOTIFY */ } -static int br_fdb_replay_one(struct notifier_block *nb, +static int br_fdb_replay_one(struct net_bridge *br, struct notifier_block *nb, const struct net_bridge_fdb_entry *fdb, - struct net_device *dev, unsigned long action, - const void *ctx) + unsigned long action, const void *ctx) { + const struct net_bridge_port *p = READ_ONCE(fdb->dst); + struct net_device *dev = p ? p->dev : br->dev; struct switchdev_notifier_fdb_info item; int err; @@ -752,8 +753,8 @@ static int br_fdb_replay_one(struct notifier_block *nb, return notifier_to_errno(err); } -int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev, - const void *ctx, bool adding, struct notifier_block *nb) +int br_fdb_replay(const struct net_device *br_dev, const void *ctx, bool adding, + struct notifier_block *nb) { struct net_bridge_fdb_entry *fdb; struct net_bridge *br; @@ -766,9 +767,6 @@ int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev, if (!netif_is_bridge_master(br_dev)) return -EINVAL; - if (!netif_is_bridge_port(dev) && !netif_is_bridge_master(dev)) - return -EINVAL; - br = netdev_priv(br_dev); if (adding) @@ -779,14 +777,7 @@ int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev, rcu_read_lock(); hlist_for_each_entry_rcu(fdb, &br->fdb_list, fdb_node) { - const struct net_bridge_port *dst = READ_ONCE(fdb->dst); - struct net_device *dst_dev; - - dst_dev = dst ? dst->dev : br->dev; - if (dst_dev && dst_dev != dev) - continue; - - err = br_fdb_replay_one(nb, fdb, dst_dev, action, ctx); + err = br_fdb_replay_one(br, nb, fdb, action, ctx); if (err) break; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index f2d34ea1ea372..c939631428b97 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -777,8 +777,8 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, bool swdev_notify); void br_fdb_offloaded_set(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid, bool offloaded); -int br_fdb_replay(const struct net_device *br_dev, const struct net_device *dev, - const void *ctx, bool adding, struct notifier_block *nb); +int br_fdb_replay(const struct net_device *br_dev, const void *ctx, bool adding, + struct notifier_block *nb); /* br_forward.c */ enum br_pkt_type { diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 9cf9ab320c489..8bc3c7fc415f7 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -287,13 +287,7 @@ static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx, if (err && err != -EOPNOTSUPP) return err; - /* Forwarding and termination FDB entries on the port */ - err = br_fdb_replay(br_dev, dev, ctx, true, atomic_nb); - if (err && err != -EOPNOTSUPP) - return err; - - /* Termination FDB entries on the bridge itself */ - err = br_fdb_replay(br_dev, br_dev, ctx, true, atomic_nb); + err = br_fdb_replay(br_dev, ctx, true, atomic_nb); if (err && err != -EOPNOTSUPP) return err; @@ -312,11 +306,7 @@ static void nbp_switchdev_unsync_objs(struct net_bridge_port *p, br_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL); - /* Forwarding and termination FDB entries on the port */ - br_fdb_replay(br_dev, dev, ctx, false, atomic_nb); - - /* Termination FDB entries on the bridge itself */ - br_fdb_replay(br_dev, br_dev, ctx, false, atomic_nb); + br_fdb_replay(br_dev, ctx, false, atomic_nb); } /* Let the bridge know that this port is offloaded, so that it can assign a From 52e4bec15546d58f2a14dc74a6b8be128aa7db0a Mon Sep 17 00:00:00 2001 From: Vladimir Oltean <vladimir.oltean@nxp.com> Date: Wed, 28 Jul 2021 21:27:48 +0300 Subject: [PATCH 2/2] net: bridge: switchdev: treat local FDBs the same as entries towards the bridge Currently the following script: 1. ip link add br0 type bridge vlan_filtering 1 && ip link set br0 up 2. ip link set swp2 up && ip link set swp2 master br0 3. ip link set swp3 up && ip link set swp3 master br0 4. ip link set swp4 up && ip link set swp4 master br0 5. bridge vlan del dev swp2 vid 1 6. bridge vlan del dev swp3 vid 1 7. ip link set swp4 nomaster 8. ip link set swp3 nomaster produces the following output: [ 641.010738] sja1105 spi0.1: port 2 failed to delete 00:1f:7b:63:02:48 vid 1 from fdb: -2 [ swp2, swp3 and br0 all have the same MAC address, the one listed above ] In short, this happens because the number of FDB entry additions notified to switchdev is unbalanced with the number of deletions. At step 1, the bridge has a random MAC address. At step 2, the br_fdb_replay of swp2 receives this initial MAC address. Then the bridge inherits the MAC address of swp2 via br_fdb_change_mac_address(), and it notifies switchdev (only swp2 at this point) of the deletion of the random MAC address and the addition of 00:1f:7b:63:02:48 as a local FDB entry with fdb->dst == swp2, in VLANs 0 and the default_pvid (1). During step 7: del_nbp -> br_fdb_delete_by_port(br, p, vid=0, do_all=1); -> fdb_delete_local(br, p, f); br_fdb_delete_by_port() deletes all entries towards the ports, regardless of vid, because do_all is 1. fdb_delete_local() has logic to migrate local FDB entries deleted from one port to another port which shares the same MAC address and is in the same VLAN, or to the bridge device itself. This migration happens without notifying switchdev of the deletion on the old port and the addition on the new one, just fdb->dst is changed and the added_by_user flag is cleared. In the example above, the del_nbp(swp4) causes the "addr 00:1f:7b:63:02:48 vid 1" local FDB entry with fdb->dst == swp4 that existed up until then to be migrated directly towards the bridge (fdb->dst == NULL). This is because it cannot be migrated to any of the other ports (swp2 and swp3 are not in VLAN 1). After the migration to br0 takes place, swp4 requests a deletion replay of all FDB entries. Since the "addr 00:1f:7b:63:02:48 vid 1" entry now point towards the bridge, a deletion of it is replayed. There was just a prior addition of this address, so the switchdev driver deletes this entry. Then, the del_nbp(swp3) at step 8 triggers another br_fdb_replay, and switchdev is notified again to delete "addr 00:1f:7b:63:02:48 vid 1". But it can't because it no longer has it, so it returns -ENOENT. There are other possibilities to trigger this issue, but this is by far the simplest to explain. To fix this, we must avoid the situation where the addition of an FDB entry is notified to switchdev as a local entry on a port, and the deletion is notified on the bridge itself. Considering that the 2 types of FDB entries are completely equivalent and we cannot have the same MAC address as a local entry on 2 bridge ports, or on a bridge port and pointing towards the bridge at the same time, it makes sense to hide away from switchdev completely the fact that a local FDB entry is associated with a given bridge port at all. Just say that it points towards the bridge, it should make no difference whatsoever to the switchdev driver and should even lead to a simpler overall implementation, will less cases to handle. This also avoids any modification at all to the core bridge driver, just what is reported to switchdev changes. With the local/permanent entries on bridge ports being already reported to user space, it is hard to believe that the bridge behavior can change in any backwards-incompatible way such as making all local FDB entries point towards the bridge. Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/bridge/br_fdb.c | 3 +-- net/bridge/br_switchdev.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index be75889ceeba5..4ff8c67ac88f5 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -737,7 +737,6 @@ static int br_fdb_replay_one(struct net_bridge *br, struct notifier_block *nb, unsigned long action, const void *ctx) { const struct net_bridge_port *p = READ_ONCE(fdb->dst); - struct net_device *dev = p ? p->dev : br->dev; struct switchdev_notifier_fdb_info item; int err; @@ -746,7 +745,7 @@ static int br_fdb_replay_one(struct net_bridge *br, struct notifier_block *nb, item.added_by_user = test_bit(BR_FDB_ADDED_BY_USER, &fdb->flags); item.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags); item.is_local = test_bit(BR_FDB_LOCAL, &fdb->flags); - item.info.dev = dev; + item.info.dev = item.is_local ? br->dev : p->dev; item.info.ctx = ctx; err = nb->notifier_call(nb, action, &item); diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 8bc3c7fc415f7..023de0e958f12 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -127,7 +127,6 @@ br_switchdev_fdb_notify(struct net_bridge *br, const struct net_bridge_fdb_entry *fdb, int type) { const struct net_bridge_port *dst = READ_ONCE(fdb->dst); - struct net_device *dev = dst ? dst->dev : br->dev; struct switchdev_notifier_fdb_info info = { .addr = fdb->key.addr.addr, .vid = fdb->key.vlan_id, @@ -135,6 +134,7 @@ br_switchdev_fdb_notify(struct net_bridge *br, .is_local = test_bit(BR_FDB_LOCAL, &fdb->flags), .offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags), }; + struct net_device *dev = info.is_local ? br->dev : dst->dev; switch (type) { case RTM_DELNEIGH: