From f307668bfcb7e83b6f62bda6a703e09613a00bd0 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Mon, 27 Mar 2017 11:37:30 -0700 Subject: [PATCH 1/5] bonding: split bond_set_slave_link_state into two parts Split the function into two (a) propose (b) commit phase without changing the semantics for the original API. Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- include/net/bonding.h | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/include/net/bonding.h b/include/net/bonding.h index 3c857778a6ca6..fb2dd97857c4f 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -153,7 +153,8 @@ struct slave { unsigned long last_link_up; unsigned long last_rx; unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS]; - s8 link; /* one of BOND_LINK_XXXX */ + s8 link; /* one of BOND_LINK_XXXX */ + s8 link_new_state; /* one of BOND_LINK_XXXX */ s8 new_link; u8 backup:1, /* indicates backup slave. Value corresponds with BOND_STATE_ACTIVE and BOND_STATE_BACKUP */ @@ -504,13 +505,17 @@ static inline bool bond_is_slave_inactive(struct slave *slave) return slave->inactive; } -static inline void bond_set_slave_link_state(struct slave *slave, int state, - bool notify) +static inline void bond_propose_link_state(struct slave *slave, int state) +{ + slave->link_new_state = state; +} + +static inline void bond_commit_link_state(struct slave *slave, bool notify) { - if (slave->link == state) + if (slave->link == slave->link_new_state) return; - slave->link = state; + slave->link = slave->link_new_state; if (notify) { bond_queue_slave_event(slave); bond_lower_state_changed(slave); @@ -523,6 +528,13 @@ static inline void bond_set_slave_link_state(struct slave *slave, int state, } } +static inline void bond_set_slave_link_state(struct slave *slave, int state, + bool notify) +{ + bond_propose_link_state(slave, state); + bond_commit_link_state(slave, notify); +} + static inline void bond_slave_link_notify(struct bonding *bond) { struct list_head *iter; From de77ecd4ef02ca783f7762e04e92b3d0964be66b Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Mon, 27 Mar 2017 11:37:33 -0700 Subject: [PATCH 2/5] bonding: improve link-status update in mii-monitoring The primary issue is that mii-inspect phase updates link-state and expects changes to be committed during the mii-commit phase. After the inspect phase if it fails to acquire rtnl-mutex, the commit phase (bond_mii_commit) doesn't get to run. This partially updated state stays and makes the internal-state inconsistent. e.g. setup bond0 => slaves: eth1, eth2 eth1 goes DOWN -> UP mii_monitor() mii-inspect() bond_set_slave_link_state(eth1, UP, DontNotify) rtnl_trylock() <- fails! Next mii-monitor round eth1: No change mii_monitor() mii-inspect() eth1->link == current-status (ethtool_ops->get_link) no-change-detected End result: eth1: Link = BOND_LINK_UP Speed = 0xfffff [SpeedUnknown] Duplex = 0xff [DuplexUnknown] This doesn't always happen but for some unlucky machines in a large set of machines it creates problems. The fix for this is to avoid making changes during inspect phase and postpone them until acquiring the rtnl-mutex / invoking commit phase. Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index ba934020dfaa0..85999e479916f 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2033,8 +2033,7 @@ static int bond_miimon_inspect(struct bonding *bond) if (link_state) continue; - bond_set_slave_link_state(slave, BOND_LINK_FAIL, - BOND_SLAVE_NOTIFY_LATER); + bond_propose_link_state(slave, BOND_LINK_FAIL); slave->delay = bond->params.downdelay; if (slave->delay) { netdev_info(bond->dev, "link status down for %sinterface %s, disabling it in %d ms\n", @@ -2049,8 +2048,7 @@ static int bond_miimon_inspect(struct bonding *bond) case BOND_LINK_FAIL: if (link_state) { /* recovered before downdelay expired */ - bond_set_slave_link_state(slave, BOND_LINK_UP, - BOND_SLAVE_NOTIFY_LATER); + bond_propose_link_state(slave, BOND_LINK_UP); slave->last_link_up = jiffies; netdev_info(bond->dev, "link status up again after %d ms for interface %s\n", (bond->params.downdelay - slave->delay) * @@ -2072,8 +2070,7 @@ static int bond_miimon_inspect(struct bonding *bond) if (!link_state) continue; - bond_set_slave_link_state(slave, BOND_LINK_BACK, - BOND_SLAVE_NOTIFY_LATER); + bond_propose_link_state(slave, BOND_LINK_BACK); slave->delay = bond->params.updelay; if (slave->delay) { @@ -2086,9 +2083,7 @@ static int bond_miimon_inspect(struct bonding *bond) /*FALLTHRU*/ case BOND_LINK_BACK: if (!link_state) { - bond_set_slave_link_state(slave, - BOND_LINK_DOWN, - BOND_SLAVE_NOTIFY_LATER); + bond_propose_link_state(slave, BOND_LINK_DOWN); netdev_info(bond->dev, "link status down again after %d ms for interface %s\n", (bond->params.updelay - slave->delay) * bond->params.miimon, @@ -2225,6 +2220,8 @@ static void bond_mii_monitor(struct work_struct *work) mii_work.work); bool should_notify_peers = false; unsigned long delay; + struct slave *slave; + struct list_head *iter; delay = msecs_to_jiffies(bond->params.miimon); @@ -2245,6 +2242,9 @@ static void bond_mii_monitor(struct work_struct *work) goto re_arm; } + bond_for_each_slave(bond, slave, iter) { + bond_commit_link_state(slave, BOND_SLAVE_NOTIFY_LATER); + } bond_miimon_commit(bond); rtnl_unlock(); /* might sleep, hold no other locks */ From c4adfc822bf5d8e97660b6114b5a8892530ce8cb Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Mon, 27 Mar 2017 11:37:35 -0700 Subject: [PATCH 3/5] bonding: make speed, duplex setting consistent with link state bond_update_speed_duplex() retrieves speed and duplex settings. There is a possibility of failure in retrieving these values but caller has to assume it's always successful. This leads to having inconsistent slave link settings. If these (speed, duplex) values cannot be retrieved, then keeping the link UP causes problems. The updated bond_update_speed_duplex() returns 0 on success if it retrieves sane values for speed and duplex. On failure it returns 1 and marks the link down. Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 85999e479916f..ad317bb631936 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -365,9 +365,10 @@ int bond_set_carrier(struct bonding *bond) /* Get link speed and duplex from the slave's base driver * using ethtool. If for some reason the call fails or the * values are invalid, set speed and duplex to -1, - * and return. + * and return. Return 1 if speed or duplex settings are + * UNKNOWN; 0 otherwise. */ -static void bond_update_speed_duplex(struct slave *slave) +static int bond_update_speed_duplex(struct slave *slave) { struct net_device *slave_dev = slave->dev; struct ethtool_link_ksettings ecmd; @@ -377,24 +378,27 @@ static void bond_update_speed_duplex(struct slave *slave) slave->duplex = DUPLEX_UNKNOWN; res = __ethtool_get_link_ksettings(slave_dev, &ecmd); - if (res < 0) - return; - - if (ecmd.base.speed == 0 || ecmd.base.speed == ((__u32)-1)) - return; - + if (res < 0) { + slave->link = BOND_LINK_DOWN; + return 1; + } + if (ecmd.base.speed == 0 || ecmd.base.speed == ((__u32)-1)) { + slave->link = BOND_LINK_DOWN; + return 1; + } switch (ecmd.base.duplex) { case DUPLEX_FULL: case DUPLEX_HALF: break; default: - return; + slave->link = BOND_LINK_DOWN; + return 1; } slave->speed = ecmd.base.speed; slave->duplex = ecmd.base.duplex; - return; + return 0; } const char *bond_slave_link_status(s8 link) From b5bf0f5b16b9c316c34df9f31d4be8729eb86845 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Mon, 27 Mar 2017 11:37:37 -0700 Subject: [PATCH 4/5] bonding: correctly update link status during mii-commit phase bond_miimon_commit() marks the link UP after attempting to get the speed and duplex settings for the link. There is a possibility that bond_update_speed_duplex() could fail. This is another place where it could result into an inconsistent bonding link state. With this patch the link will be marked UP only if the speed and duplex values retrieved have sane values and processed further. Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index ad317bb631936..6cea964ab70ad 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2125,7 +2125,12 @@ static void bond_miimon_commit(struct bonding *bond) continue; case BOND_LINK_UP: - bond_update_speed_duplex(slave); + if (bond_update_speed_duplex(slave)) { + netdev_warn(bond->dev, + "failed to get link speed/duplex for %s\n", + slave->dev->name); + continue; + } bond_set_slave_link_state(slave, BOND_LINK_UP, BOND_SLAVE_NOTIFY_NOW); slave->last_link_up = jiffies; From e292dcae1648cd5b5c80031a154f594aa590667f Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Mon, 27 Mar 2017 11:37:40 -0700 Subject: [PATCH 5/5] bonding: avoid printing while holding a spinlock Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/bonding/bond_3ad.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 508713b4e5338..c5fd4259da331 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -2446,9 +2446,9 @@ void bond_3ad_adapter_speed_duplex_changed(struct slave *slave) spin_lock_bh(&slave->bond->mode_lock); ad_update_actor_keys(port, false); + spin_unlock_bh(&slave->bond->mode_lock); netdev_dbg(slave->bond->dev, "Port %d slave %s changed speed/duplex\n", port->actor_port_number, slave->dev->name); - spin_unlock_bh(&slave->bond->mode_lock); } /** @@ -2492,12 +2492,12 @@ void bond_3ad_handle_link_change(struct slave *slave, char link) agg = __get_first_agg(port); ad_agg_selection_logic(agg, &dummy); + spin_unlock_bh(&slave->bond->mode_lock); + netdev_dbg(slave->bond->dev, "Port %d changed link status to %s\n", port->actor_port_number, link == BOND_LINK_UP ? "UP" : "DOWN"); - spin_unlock_bh(&slave->bond->mode_lock); - /* RTNL is held and mode_lock is released so it's safe * to update slave_array here. */