Skip to content

Commit

Permalink
bnx2x: Revising locking scheme for MAC configuration
Browse files Browse the repository at this point in the history
On very rare occasions, repeated load/unload stress test in the presence of
our storage driver (bnx2i/bnx2fc) causes a kernel panic in bnx2x code
(NULL pointer dereference). Stack traces indicate the issue happens during MAC
configuration; thorough code review showed that indeed several races exist
in which one thread can iterate over the list of configured MACs while another
deletes entries from the same list.

This patch adds a varient on the single-writer/Multiple-reader lock mechanism -
It utilizes an already exsiting bottom-half lock, using it so that Whenever
a writer is unable to continue due to the existence of another writer/reader,
it pends its request for future deliverance.
The writer / last readers will check for the existence of such requests and
perform them instead of the original initiator.
This prevents the writer from having to sleep while waiting for the lock
to be accessible, which might cause deadlocks given the locks already
held by the writer.

Another result of this patch is that setting of Rx Mode is now made in
sleepable context - Setting of Rx Mode is made under a bottom-half lock, which
was always nontrivial for the bnx2x driver, as the HW/FW configuration requires
wait for completions.
Since sleep was impossible (due to the sleepless-context), various mechanisms
were utilized to prevent the calling thread from sleep, but the truth was that
when the caller thread (i.e, the one calling ndo_set_rx_mode()) returned, the
Rx mode was still not set in HW/FW.

bnx2x_set_rx_mode() will now overtly schedule for the Rx changes to be
configured by the sp_rtnl_task which hold the RTNL lock and is sleepable
context.

Signed-off-by: Yuval Mintz <yuvalmin@broadcom.com>
Signed-off-by: Ariel Elior <ariele@broadcom.com>
Signed-off-by: Eilon Greenstein <eilong@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Yuval Mintz authored and David S. Miller committed Aug 1, 2013
1 parent 4beac02 commit 8b09be5
Show file tree
Hide file tree
Showing 7 changed files with 340 additions and 60 deletions.
2 changes: 1 addition & 1 deletion drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
Original file line number Diff line number Diff line change
Expand Up @@ -1331,7 +1331,7 @@ enum {
BNX2X_SP_RTNL_ENABLE_SRIOV,
BNX2X_SP_RTNL_VFPF_MCAST,
BNX2X_SP_RTNL_VFPF_CHANNEL_DOWN,
BNX2X_SP_RTNL_VFPF_STORM_RX_MODE,
BNX2X_SP_RTNL_RX_MODE,
BNX2X_SP_RTNL_HYPERVISOR_VLAN,
};

Expand Down
16 changes: 9 additions & 7 deletions drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
Original file line number Diff line number Diff line change
Expand Up @@ -2060,7 +2060,11 @@ void bnx2x_squeeze_objects(struct bnx2x *bp)
rparam.mcast_obj = &bp->mcast_obj;
__set_bit(RAMROD_DRV_CLR_ONLY, &rparam.ramrod_flags);

/* Add a DEL command... */
/* Add a DEL command... - Since we're doing a driver cleanup only,
* we take a lock surrounding both the initial send and the CONTs,
* as we don't want a true completion to disrupt us in the middle.
*/
netif_addr_lock_bh(bp->dev);
rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL);
if (rc < 0)
BNX2X_ERR("Failed to add a new DEL command to a multi-cast object: %d\n",
Expand All @@ -2072,11 +2076,13 @@ void bnx2x_squeeze_objects(struct bnx2x *bp)
if (rc < 0) {
BNX2X_ERR("Failed to clean multi-cast object: %d\n",
rc);
netif_addr_unlock_bh(bp->dev);
return;
}

rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_CONT);
}
netif_addr_unlock_bh(bp->dev);
}

#ifndef BNX2X_STOP_ON_ERROR
Expand Down Expand Up @@ -2432,9 +2438,7 @@ int bnx2x_load_cnic(struct bnx2x *bp)
}

/* Initialize Rx filter. */
netif_addr_lock_bh(bp->dev);
bnx2x_set_rx_mode(bp->dev);
netif_addr_unlock_bh(bp->dev);
bnx2x_set_rx_mode_inner(bp);

/* re-read iscsi info */
bnx2x_get_iscsi_info(bp);
Expand Down Expand Up @@ -2704,9 +2708,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
/* Start fast path */

/* Initialize Rx filter. */
netif_addr_lock_bh(bp->dev);
bnx2x_set_rx_mode(bp->dev);
netif_addr_unlock_bh(bp->dev);
bnx2x_set_rx_mode_inner(bp);

/* Start the Tx */
switch (load_mode) {
Expand Down
1 change: 1 addition & 0 deletions drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,7 @@ int bnx2x_set_eth_mac(struct bnx2x *bp, bool set);
* netif_addr_lock_bh()
*/
void bnx2x_set_rx_mode(struct net_device *dev);
void bnx2x_set_rx_mode_inner(struct bnx2x *bp);

/**
* bnx2x_set_storm_rx_mode - configure MAC filtering rules in a FW.
Expand Down
56 changes: 33 additions & 23 deletions drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -9628,11 +9628,9 @@ static void bnx2x_sp_rtnl_task(struct work_struct *work)
}
}

if (test_and_clear_bit(BNX2X_SP_RTNL_VFPF_STORM_RX_MODE,
&bp->sp_rtnl_state)) {
DP(BNX2X_MSG_SP,
"sending set storm rx mode vf pf channel message from rtnl sp-task\n");
bnx2x_vfpf_storm_rx_mode(bp);
if (test_and_clear_bit(BNX2X_SP_RTNL_RX_MODE, &bp->sp_rtnl_state)) {
DP(BNX2X_MSG_SP, "Handling Rx Mode setting\n");
bnx2x_set_rx_mode_inner(bp);
}

if (test_and_clear_bit(BNX2X_SP_RTNL_HYPERVISOR_VLAN,
Expand Down Expand Up @@ -11849,34 +11847,48 @@ static int bnx2x_set_mc_list(struct bnx2x *bp)
void bnx2x_set_rx_mode(struct net_device *dev)
{
struct bnx2x *bp = netdev_priv(dev);
u32 rx_mode = BNX2X_RX_MODE_NORMAL;

if (bp->state != BNX2X_STATE_OPEN) {
DP(NETIF_MSG_IFUP, "state is %x, returning\n", bp->state);
return;
} else {
/* Schedule an SP task to handle rest of change */
DP(NETIF_MSG_IFUP, "Scheduling an Rx mode change\n");
smp_mb__before_clear_bit();
set_bit(BNX2X_SP_RTNL_RX_MODE, &bp->sp_rtnl_state);
smp_mb__after_clear_bit();
schedule_delayed_work(&bp->sp_rtnl_task, 0);
}
}

void bnx2x_set_rx_mode_inner(struct bnx2x *bp)
{
u32 rx_mode = BNX2X_RX_MODE_NORMAL;

DP(NETIF_MSG_IFUP, "dev->flags = %x\n", bp->dev->flags);

if (dev->flags & IFF_PROMISC)
netif_addr_lock_bh(bp->dev);

if (bp->dev->flags & IFF_PROMISC) {
rx_mode = BNX2X_RX_MODE_PROMISC;
else if ((dev->flags & IFF_ALLMULTI) ||
((netdev_mc_count(dev) > BNX2X_MAX_MULTICAST) &&
CHIP_IS_E1(bp)))
} else if ((bp->dev->flags & IFF_ALLMULTI) ||
((netdev_mc_count(bp->dev) > BNX2X_MAX_MULTICAST) &&
CHIP_IS_E1(bp))) {
rx_mode = BNX2X_RX_MODE_ALLMULTI;
else {
} else {
if (IS_PF(bp)) {
/* some multicasts */
if (bnx2x_set_mc_list(bp) < 0)
rx_mode = BNX2X_RX_MODE_ALLMULTI;

/* release bh lock, as bnx2x_set_uc_list might sleep */
netif_addr_unlock_bh(bp->dev);
if (bnx2x_set_uc_list(bp) < 0)
rx_mode = BNX2X_RX_MODE_PROMISC;
netif_addr_lock_bh(bp->dev);
} else {
/* configuring mcast to a vf involves sleeping (when we
* wait for the pf's response). Since this function is
* called from non sleepable context we must schedule
* a work item for this purpose
* wait for the pf's response).
*/
smp_mb__before_clear_bit();
set_bit(BNX2X_SP_RTNL_VFPF_MCAST,
Expand All @@ -11894,22 +11906,20 @@ void bnx2x_set_rx_mode(struct net_device *dev)
/* Schedule the rx_mode command */
if (test_bit(BNX2X_FILTER_RX_MODE_PENDING, &bp->sp_state)) {
set_bit(BNX2X_FILTER_RX_MODE_SCHED, &bp->sp_state);
netif_addr_unlock_bh(bp->dev);
return;
}

if (IS_PF(bp)) {
bnx2x_set_storm_rx_mode(bp);
netif_addr_unlock_bh(bp->dev);
} else {
/* configuring rx mode to storms in a vf involves sleeping (when
* we wait for the pf's response). Since this function is
* called from non sleepable context we must schedule
* a work item for this purpose
/* VF will need to request the PF to make this change, and so
* the VF needs to release the bottom-half lock prior to the
* request (as it will likely require sleep on the VF side)
*/
smp_mb__before_clear_bit();
set_bit(BNX2X_SP_RTNL_VFPF_STORM_RX_MODE,
&bp->sp_rtnl_state);
smp_mb__after_clear_bit();
schedule_delayed_work(&bp->sp_rtnl_task, 0);
netif_addr_unlock_bh(bp->dev);
bnx2x_vfpf_storm_rx_mode(bp);
}
}

Expand Down
Loading

0 comments on commit 8b09be5

Please sign in to comment.