Skip to content

Commit

Permalink
Merge branch 'bonding-fix-bond-recovery-in-mode-2'
Browse files Browse the repository at this point in the history
Jonathan Toppins says:

====================
bonding: fix bond recovery in mode 2

When a bond is configured with a non-zero updelay and in mode 2 the bond
never recovers after all slaves lose link. The first patch adds
selftests that demonstrate the issue and the second patch fixes the
issue by ignoring the updelay when there are no usable slaves.
====================

Link: https://lore.kernel.org/r/cover.1669147951.git.jtoppins@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Jakub Kicinski committed Nov 24, 2022
2 parents 64a8f8f + f8a65ab commit 170d977
Show file tree
Hide file tree
Showing 6 changed files with 210 additions and 3 deletions.
11 changes: 10 additions & 1 deletion drivers/net/bonding/bond_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -2529,7 +2529,16 @@ static int bond_miimon_inspect(struct bonding *bond)
struct slave *slave;
bool ignore_updelay;

ignore_updelay = !rcu_dereference(bond->curr_active_slave);
if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) {
ignore_updelay = !rcu_dereference(bond->curr_active_slave);
} else {
struct bond_up_slave *usable_slaves;

usable_slaves = rcu_dereference(bond->usable_slaves);

if (usable_slaves && usable_slaves->count == 0)
ignore_updelay = true;
}

bond_for_each_slave_rcu(bond, slave, iter) {
bond_propose_link_state(slave, BOND_LINK_NOCHANGE);
Expand Down
4 changes: 3 additions & 1 deletion tools/testing/selftests/drivers/net/bonding/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ TEST_PROGS := \
bond-arp-interval-causes-panic.sh \
bond-break-lacpdu-tx.sh \
bond-lladdr-target.sh \
dev_addr_lists.sh
dev_addr_lists.sh \
mode-1-recovery-updelay.sh \
mode-2-recovery-updelay.sh

TEST_FILES := \
lag_lib.sh \
Expand Down
106 changes: 106 additions & 0 deletions tools/testing/selftests/drivers/net/bonding/lag_lib.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0

NAMESPACES=""

# Test that a link aggregation device (bonding, team) removes the hardware
# addresses that it adds on its underlying devices.
test_LAG_cleanup()
Expand Down Expand Up @@ -59,3 +61,107 @@ test_LAG_cleanup()

log_test "$driver cleanup mode $mode"
}

# Build a generic 2 node net namespace with 2 connections
# between the namespaces
#
# +-----------+ +-----------+
# | node1 | | node2 |
# | | | |
# | | | |
# | eth0 +-------+ eth0 |
# | | | |
# | eth1 +-------+ eth1 |
# | | | |
# +-----------+ +-----------+
lag_setup2x2()
{
local state=${1:-down}
local namespaces="lag_node1 lag_node2"

# create namespaces
for n in ${namespaces}; do
ip netns add ${n}
done

# wire up namespaces
ip link add name lag1 type veth peer name lag1-end
ip link set dev lag1 netns lag_node1 $state name eth0
ip link set dev lag1-end netns lag_node2 $state name eth0

ip link add name lag1 type veth peer name lag1-end
ip link set dev lag1 netns lag_node1 $state name eth1
ip link set dev lag1-end netns lag_node2 $state name eth1

NAMESPACES="${namespaces}"
}

# cleanup all lag related namespaces and remove the bonding module
lag_cleanup()
{
for n in ${NAMESPACES}; do
ip netns delete ${n} >/dev/null 2>&1 || true
done
modprobe -r bonding
}

SWITCH="lag_node1"
CLIENT="lag_node2"
CLIENTIP="172.20.2.1"
SWITCHIP="172.20.2.2"

lag_setup_network()
{
lag_setup2x2 "down"

# create switch
ip netns exec ${SWITCH} ip link add br0 up type bridge
ip netns exec ${SWITCH} ip link set eth0 master br0 up
ip netns exec ${SWITCH} ip link set eth1 master br0 up
ip netns exec ${SWITCH} ip addr add ${SWITCHIP}/24 dev br0
}

lag_reset_network()
{
ip netns exec ${CLIENT} ip link del bond0
ip netns exec ${SWITCH} ip link set eth0 up
ip netns exec ${SWITCH} ip link set eth1 up
}

create_bond()
{
# create client
ip netns exec ${CLIENT} ip link set eth0 down
ip netns exec ${CLIENT} ip link set eth1 down

ip netns exec ${CLIENT} ip link add bond0 type bond $@
ip netns exec ${CLIENT} ip link set eth0 master bond0
ip netns exec ${CLIENT} ip link set eth1 master bond0
ip netns exec ${CLIENT} ip link set bond0 up
ip netns exec ${CLIENT} ip addr add ${CLIENTIP}/24 dev bond0
}

test_bond_recovery()
{
RET=0

create_bond $@

# verify connectivity
ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 >/dev/null 2>&1
check_err $? "No connectivity"

# force the links of the bond down
ip netns exec ${SWITCH} ip link set eth0 down
sleep 2
ip netns exec ${SWITCH} ip link set eth0 up
ip netns exec ${SWITCH} ip link set eth1 down

# re-verify connectivity
ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 >/dev/null 2>&1

local rc=$?
check_err $rc "Bond failed to recover"
log_test "$1 ($2) bond recovery"
lag_reset_network
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0

# Regression Test:
# When the bond is configured with down/updelay and the link state of
# slave members flaps if there are no remaining members up the bond
# should immediately select a member to bring up. (from bonding.txt
# section 13.1 paragraph 4)
#
# +-------------+ +-----------+
# | client | | switch |
# | | | |
# | +--------| link1 |-----+ |
# | | +-------+ | |
# | | | | | |
# | | +-------+ | |
# | | bond | link2 | Br0 | |
# +-------------+ +-----------+
# 172.20.2.1 172.20.2.2


REQUIRE_MZ=no
REQUIRE_JQ=no
NUM_NETIFS=0
lib_dir=$(dirname "$0")
source "$lib_dir"/net_forwarding_lib.sh
source "$lib_dir"/lag_lib.sh

cleanup()
{
lag_cleanup
}

trap cleanup 0 1 2

lag_setup_network
test_bond_recovery mode 1 miimon 100 updelay 0
test_bond_recovery mode 1 miimon 100 updelay 200
test_bond_recovery mode 1 miimon 100 updelay 500
test_bond_recovery mode 1 miimon 100 updelay 1000
test_bond_recovery mode 1 miimon 100 updelay 2000
test_bond_recovery mode 1 miimon 100 updelay 5000
test_bond_recovery mode 1 miimon 100 updelay 10000

exit "$EXIT_STATUS"
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0

# Regression Test:
# When the bond is configured with down/updelay and the link state of
# slave members flaps if there are no remaining members up the bond
# should immediately select a member to bring up. (from bonding.txt
# section 13.1 paragraph 4)
#
# +-------------+ +-----------+
# | client | | switch |
# | | | |
# | +--------| link1 |-----+ |
# | | +-------+ | |
# | | | | | |
# | | +-------+ | |
# | | bond | link2 | Br0 | |
# +-------------+ +-----------+
# 172.20.2.1 172.20.2.2


REQUIRE_MZ=no
REQUIRE_JQ=no
NUM_NETIFS=0
lib_dir=$(dirname "$0")
source "$lib_dir"/net_forwarding_lib.sh
source "$lib_dir"/lag_lib.sh

cleanup()
{
lag_cleanup
}

trap cleanup 0 1 2

lag_setup_network
test_bond_recovery mode 2 miimon 100 updelay 0
test_bond_recovery mode 2 miimon 100 updelay 200
test_bond_recovery mode 2 miimon 100 updelay 500
test_bond_recovery mode 2 miimon 100 updelay 1000
test_bond_recovery mode 2 miimon 100 updelay 2000
test_bond_recovery mode 2 miimon 100 updelay 5000
test_bond_recovery mode 2 miimon 100 updelay 10000

exit "$EXIT_STATUS"
2 changes: 1 addition & 1 deletion tools/testing/selftests/drivers/net/bonding/settings
Original file line number Diff line number Diff line change
@@ -1 +1 @@
timeout=60
timeout=120

0 comments on commit 170d977

Please sign in to comment.