Skip to content

Commit

Permalink
bonding: add some slack to arp monitoring time limits
Browse files Browse the repository at this point in the history
Currently, all the time limits in the bonding ARP monitor are in
multiples of arp_interval -- the time interval at which the ARP
monitor is periodically scheduled.

With a fast network round-trip and a little scheduling latency
of the ARP monitor work, a limit of n*delta_in_ticks may
effectively mean (n-1)*delta_in_ticks.

This is fatal in case of n==1  (the link will stay down
forever) and makes the behaviour non-deterministic in all the
other cases.

Add a delta_in_ticks/2 time slack to all the time limits.

Signed-off-by: Jiri Bohac <jbohac@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Jiri Bohac authored and David S. Miller committed Aug 31, 2012
1 parent eb7e057 commit da210f5
Showing 1 changed file with 20 additions and 11 deletions.
31 changes: 20 additions & 11 deletions drivers/net/bonding/bond_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -2811,12 +2811,13 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
arp_work.work);
struct slave *slave, *oldcurrent;
int do_failover = 0;
int delta_in_ticks;
int delta_in_ticks, extra_ticks;
int i;

read_lock(&bond->lock);

delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
extra_ticks = delta_in_ticks / 2;

if (bond->slave_cnt == 0)
goto re_arm;
Expand All @@ -2839,10 +2840,10 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
if (slave->link != BOND_LINK_UP) {
if (time_in_range(jiffies,
trans_start - delta_in_ticks,
trans_start + delta_in_ticks) &&
trans_start + delta_in_ticks + extra_ticks) &&
time_in_range(jiffies,
slave->dev->last_rx - delta_in_ticks,
slave->dev->last_rx + delta_in_ticks)) {
slave->dev->last_rx + delta_in_ticks + extra_ticks)) {

slave->link = BOND_LINK_UP;
bond_set_active_slave(slave);
Expand Down Expand Up @@ -2872,10 +2873,10 @@ void bond_loadbalance_arp_mon(struct work_struct *work)
*/
if (!time_in_range(jiffies,
trans_start - delta_in_ticks,
trans_start + 2 * delta_in_ticks) ||
trans_start + 2 * delta_in_ticks + extra_ticks) ||
!time_in_range(jiffies,
slave->dev->last_rx - delta_in_ticks,
slave->dev->last_rx + 2 * delta_in_ticks)) {
slave->dev->last_rx + 2 * delta_in_ticks + extra_ticks)) {

slave->link = BOND_LINK_DOWN;
bond_set_backup_slave(slave);
Expand Down Expand Up @@ -2933,14 +2934,22 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
struct slave *slave;
int i, commit = 0;
unsigned long trans_start;
int extra_ticks;

/* All the time comparisons below need some extra time. Otherwise, on
* fast networks the ARP probe/reply may arrive within the same jiffy
* as it was sent. Then, the next time the ARP monitor is run, one
* arp_interval will already have passed in the comparisons.
*/
extra_ticks = delta_in_ticks / 2;

bond_for_each_slave(bond, slave, i) {
slave->new_link = BOND_LINK_NOCHANGE;

if (slave->link != BOND_LINK_UP) {
if (time_in_range(jiffies,
slave_last_rx(bond, slave) - delta_in_ticks,
slave_last_rx(bond, slave) + delta_in_ticks)) {
slave_last_rx(bond, slave) + delta_in_ticks + extra_ticks)) {

slave->new_link = BOND_LINK_UP;
commit++;
Expand All @@ -2956,7 +2965,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
*/
if (time_in_range(jiffies,
slave->jiffies - delta_in_ticks,
slave->jiffies + 2 * delta_in_ticks))
slave->jiffies + 2 * delta_in_ticks + extra_ticks))
continue;

/*
Expand All @@ -2976,7 +2985,7 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
!bond->current_arp_slave &&
!time_in_range(jiffies,
slave_last_rx(bond, slave) - delta_in_ticks,
slave_last_rx(bond, slave) + 3 * delta_in_ticks)) {
slave_last_rx(bond, slave) + 3 * delta_in_ticks + extra_ticks)) {

slave->new_link = BOND_LINK_DOWN;
commit++;
Expand All @@ -2992,10 +3001,10 @@ static int bond_ab_arp_inspect(struct bonding *bond, int delta_in_ticks)
if (bond_is_active_slave(slave) &&
(!time_in_range(jiffies,
trans_start - delta_in_ticks,
trans_start + 2 * delta_in_ticks) ||
trans_start + 2 * delta_in_ticks + extra_ticks) ||
!time_in_range(jiffies,
slave_last_rx(bond, slave) - delta_in_ticks,
slave_last_rx(bond, slave) + 2 * delta_in_ticks))) {
slave_last_rx(bond, slave) + 2 * delta_in_ticks + extra_ticks))) {

slave->new_link = BOND_LINK_DOWN;
commit++;
Expand Down Expand Up @@ -3027,7 +3036,7 @@ static void bond_ab_arp_commit(struct bonding *bond, int delta_in_ticks)
if ((!bond->curr_active_slave &&
time_in_range(jiffies,
trans_start - delta_in_ticks,
trans_start + delta_in_ticks)) ||
trans_start + delta_in_ticks + delta_in_ticks / 2)) ||
bond->curr_active_slave != slave) {
slave->link = BOND_LINK_UP;
if (bond->current_arp_slave) {
Expand Down

0 comments on commit da210f5

Please sign in to comment.