Skip to content

Commit

Permalink
net/mlx4_core: Enable device recovery flow with SRIOV
Browse files Browse the repository at this point in the history
In SRIOV, both the PF and the VF may attempt device recovery whenever they
assume that the device is not functioning.  When the PF driver resets the
device, the VF should detect this and attempt to reinitialize itself.

The VF must be able to reset itself under all circumstances, even
if the PF is not responsive.

The VF shall reset itself in the following cases:

1. Commands are not processed within reasonable time over the communication channel.
This is done considering device state and the correct return code based on
the command as was done in the native mode, done in the next patch.

2. The VF driver receives an internal error event reported by the PF on the
communication channel. This occurs when the PF driver resets the device or
when VF is out of sync with the PF.

Add 'VF reset' capability, which allows the VF to reinitialize itself even when the
PF is not responsive.

As PF and VF may run their reset flow simulantanisly, there are several cases
that are handled:
- Prevent freeing VF resources upon FLR, when PF is in its unloading stage.
- Prevent PF getting VF commands before it has finished initializing its resources.
- Upon VF startup, check that comm-channel is online before sending
  commands to the PF and getting timed-out.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Yishai Hadas authored and David S. Miller committed Jan 25, 2015
1 parent 2ba5fbd commit 55ad359
Show file tree
Hide file tree
Showing 8 changed files with 292 additions and 70 deletions.
120 changes: 103 additions & 17 deletions drivers/net/ethernet/mellanox/mlx4/catas.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,7 @@ enum {
int mlx4_internal_err_reset = 1;
module_param_named(internal_err_reset, mlx4_internal_err_reset, int, 0644);
MODULE_PARM_DESC(internal_err_reset,
"Reset device on internal errors if non-zero"
" (default 1, in SRIOV mode default is 0)");
"Reset device on internal errors if non-zero (default 1)");

static int read_vendor_id(struct mlx4_dev *dev)
{
Expand All @@ -71,6 +70,9 @@ static int mlx4_reset_master(struct mlx4_dev *dev)
{
int err = 0;

if (mlx4_is_master(dev))
mlx4_report_internal_err_comm_event(dev);

if (!pci_channel_offline(dev->persist->pdev)) {
err = read_vendor_id(dev);
/* If PCI can't be accessed to read vendor ID we assume that its
Expand All @@ -87,6 +89,81 @@ static int mlx4_reset_master(struct mlx4_dev *dev)
return err;
}

static int mlx4_reset_slave(struct mlx4_dev *dev)
{
#define COM_CHAN_RST_REQ_OFFSET 0x10
#define COM_CHAN_RST_ACK_OFFSET 0x08

u32 comm_flags;
u32 rst_req;
u32 rst_ack;
unsigned long end;
struct mlx4_priv *priv = mlx4_priv(dev);

if (pci_channel_offline(dev->persist->pdev))
return 0;

comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
MLX4_COMM_CHAN_FLAGS));
if (comm_flags == 0xffffffff) {
mlx4_err(dev, "VF reset is not needed\n");
return 0;
}

if (!(dev->caps.vf_caps & MLX4_VF_CAP_FLAG_RESET)) {
mlx4_err(dev, "VF reset is not supported\n");
return -EOPNOTSUPP;
}

rst_req = (comm_flags & (u32)(1 << COM_CHAN_RST_REQ_OFFSET)) >>
COM_CHAN_RST_REQ_OFFSET;
rst_ack = (comm_flags & (u32)(1 << COM_CHAN_RST_ACK_OFFSET)) >>
COM_CHAN_RST_ACK_OFFSET;
if (rst_req != rst_ack) {
mlx4_err(dev, "Communication channel isn't sync, fail to send reset\n");
return -EIO;
}

rst_req ^= 1;
mlx4_warn(dev, "VF is sending reset request to Firmware\n");
comm_flags = rst_req << COM_CHAN_RST_REQ_OFFSET;
__raw_writel((__force u32)cpu_to_be32(comm_flags),
(__iomem char *)priv->mfunc.comm + MLX4_COMM_CHAN_FLAGS);
/* Make sure that our comm channel write doesn't
* get mixed in with writes from another CPU.
*/
mmiowb();

end = msecs_to_jiffies(MLX4_COMM_TIME) + jiffies;
while (time_before(jiffies, end)) {
comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
MLX4_COMM_CHAN_FLAGS));
rst_ack = (comm_flags & (u32)(1 << COM_CHAN_RST_ACK_OFFSET)) >>
COM_CHAN_RST_ACK_OFFSET;

/* Reading rst_req again since the communication channel can
* be reset at any time by the PF and all its bits will be
* set to zero.
*/
rst_req = (comm_flags & (u32)(1 << COM_CHAN_RST_REQ_OFFSET)) >>
COM_CHAN_RST_REQ_OFFSET;

if (rst_ack == rst_req) {
mlx4_warn(dev, "VF Reset succeed\n");
return 0;
}
cond_resched();
}
mlx4_err(dev, "Fail to send reset over the communication channel\n");
return -ETIMEDOUT;
}

static int mlx4_comm_internal_err(u32 slave_read)
{
return (u32)COMM_CHAN_EVENT_INTERNAL_ERR ==
(slave_read & (u32)COMM_CHAN_EVENT_INTERNAL_ERR) ? 1 : 0;
}

void mlx4_enter_error_state(struct mlx4_dev_persistent *persist)
{
int err;
Expand All @@ -101,7 +178,10 @@ void mlx4_enter_error_state(struct mlx4_dev_persistent *persist)

dev = persist->dev;
mlx4_err(dev, "device is going to be reset\n");
err = mlx4_reset_master(dev);
if (mlx4_is_slave(dev))
err = mlx4_reset_slave(dev);
else
err = mlx4_reset_master(dev);
BUG_ON(err != 0);

dev->persist->state |= MLX4_DEVICE_STATE_INTERNAL_ERROR;
Expand Down Expand Up @@ -148,8 +228,15 @@ static void poll_catas(unsigned long dev_ptr)
{
struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr;
struct mlx4_priv *priv = mlx4_priv(dev);

if (readl(priv->catas_err.map)) {
u32 slave_read;

if (mlx4_is_slave(dev)) {
slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
if (mlx4_comm_internal_err(slave_read)) {
mlx4_warn(dev, "Internal error detected on the communication channel\n");
goto internal_err;
}
} else if (readl(priv->catas_err.map)) {
dump_err_buf(dev);
goto internal_err;
}
Expand Down Expand Up @@ -182,22 +269,21 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev)
struct mlx4_priv *priv = mlx4_priv(dev);
phys_addr_t addr;

/*If we are in SRIOV the default of the module param must be 0*/
if (mlx4_is_mfunc(dev))
mlx4_internal_err_reset = 0;

INIT_LIST_HEAD(&priv->catas_err.list);
init_timer(&priv->catas_err.timer);
priv->catas_err.map = NULL;

addr = pci_resource_start(dev->persist->pdev, priv->fw.catas_bar) +
priv->fw.catas_offset;

priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
if (!priv->catas_err.map) {
mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n",
(unsigned long long) addr);
return;
if (!mlx4_is_slave(dev)) {
addr = pci_resource_start(dev->persist->pdev,
priv->fw.catas_bar) +
priv->fw.catas_offset;

priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
if (!priv->catas_err.map) {
mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n",
(unsigned long long)addr);
return;
}
}

priv->catas_err.timer.data = (unsigned long) dev;
Expand Down
77 changes: 58 additions & 19 deletions drivers/net/ethernet/mellanox/mlx4/cmd.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include <linux/mlx4/device.h>
#include <linux/semaphore.h>
#include <rdma/ib_smi.h>
#include <linux/delay.h>

#include <asm/io.h>

Expand Down Expand Up @@ -729,7 +730,7 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
EXPORT_SYMBOL_GPL(__mlx4_cmd);


static int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev)
int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev)
{
return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_ARM_COMM_CHANNEL,
MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
Expand Down Expand Up @@ -1945,8 +1946,11 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
break;
case MLX4_COMM_CMD_VHCR_POST:
if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) &&
(slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST))
(slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST)) {
mlx4_warn(dev, "slave:%d is out of sync, cmd=0x%x, last command=0x%x, reset is needed\n",
slave, cmd, slave_state[slave].last_cmd);
goto reset_slave;
}

mutex_lock(&priv->cmd.slave_cmd_mutex);
if (mlx4_master_process_vhcr(dev, slave, NULL)) {
Expand Down Expand Up @@ -1980,7 +1984,18 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,

reset_slave:
/* cleanup any slave resources */
mlx4_delete_all_resources_for_slave(dev, slave);
if (dev->persist->interface_state & MLX4_INTERFACE_STATE_UP)
mlx4_delete_all_resources_for_slave(dev, slave);

if (cmd != MLX4_COMM_CMD_RESET) {
mlx4_warn(dev, "Turn on internal error to force reset, slave=%d, cmd=0x%x\n",
slave, cmd);
/* Turn on internal error letting slave reset itself immeditaly,
* otherwise it might take till timeout on command is passed
*/
reply |= ((u32)COMM_CHAN_EVENT_INTERNAL_ERR);
}

spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
if (!slave_state[slave].is_slave_going_down)
slave_state[slave].last_cmd = MLX4_COMM_CMD_RESET;
Expand Down Expand Up @@ -2056,17 +2071,28 @@ void mlx4_master_comm_channel(struct work_struct *work)
static int sync_toggles(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
int wr_toggle;
int rd_toggle;
u32 wr_toggle;
u32 rd_toggle;
unsigned long end;

wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write)) >> 31;
end = jiffies + msecs_to_jiffies(5000);
wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write));
if (wr_toggle == 0xffffffff)
end = jiffies + msecs_to_jiffies(30000);
else
end = jiffies + msecs_to_jiffies(5000);

while (time_before(jiffies, end)) {
rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)) >> 31;
if (rd_toggle == wr_toggle) {
priv->cmd.comm_toggle = rd_toggle;
rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read));
if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) {
/* PCI might be offline */
msleep(100);
wr_toggle = swab32(readl(&priv->mfunc.comm->
slave_write));
continue;
}

if (rd_toggle >> 31 == wr_toggle >> 31) {
priv->cmd.comm_toggle = rd_toggle >> 31;
return 0;
}

Expand Down Expand Up @@ -2172,13 +2198,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
if (mlx4_init_resource_tracker(dev))
goto err_thread;

err = mlx4_ARM_COMM_CHANNEL(dev);
if (err) {
mlx4_err(dev, " Failed to arm comm channel eq: %x\n",
err);
goto err_resource;
}

} else {
err = sync_toggles(dev);
if (err) {
Expand All @@ -2188,8 +2207,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
}
return 0;

err_resource:
mlx4_free_resource_tracker(dev, RES_TR_FREE_ALL);
err_thread:
flush_workqueue(priv->mfunc.master.comm_wq);
destroy_workqueue(priv->mfunc.master.comm_wq);
Expand Down Expand Up @@ -2266,6 +2283,27 @@ int mlx4_cmd_init(struct mlx4_dev *dev)
return -ENOMEM;
}

void mlx4_report_internal_err_comm_event(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
int slave;
u32 slave_read;

/* Report an internal error event to all
* communication channels.
*/
for (slave = 0; slave < dev->num_slaves; slave++) {
slave_read = swab32(readl(&priv->mfunc.comm[slave].slave_read));
slave_read |= (u32)COMM_CHAN_EVENT_INTERNAL_ERR;
__raw_writel((__force u32)cpu_to_be32(slave_read),
&priv->mfunc.comm[slave].slave_read);
/* Make sure that our comm channel write doesn't
* get mixed in with writes from another CPU.
*/
mmiowb();
}
}

void mlx4_multi_func_cleanup(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
Expand All @@ -2281,6 +2319,7 @@ void mlx4_multi_func_cleanup(struct mlx4_dev *dev)
kfree(priv->mfunc.master.slave_state);
kfree(priv->mfunc.master.vf_admin);
kfree(priv->mfunc.master.vf_oper);
dev->num_slaves = 0;
}

iounmap(priv->mfunc.comm);
Expand Down
10 changes: 8 additions & 2 deletions drivers/net/ethernet/mellanox/mlx4/eq.c
Original file line number Diff line number Diff line change
Expand Up @@ -429,8 +429,14 @@ void mlx4_master_handle_slave_flr(struct work_struct *work)
if (MLX4_COMM_CMD_FLR == slave_state[i].last_cmd) {
mlx4_dbg(dev, "mlx4_handle_slave_flr: clean slave: %d\n",
i);

mlx4_delete_all_resources_for_slave(dev, i);
/* In case of 'Reset flow' FLR can be generated for
* a slave before mlx4_load_one is done.
* make sure interface is up before trying to delete
* slave resources which weren't allocated yet.
*/
if (dev->persist->interface_state &
MLX4_INTERFACE_STATE_UP)
mlx4_delete_all_resources_for_slave(dev, i);
/*return the slave to running mode*/
spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
slave_state[i].last_cmd = MLX4_COMM_CMD_RESET;
Expand Down
6 changes: 2 additions & 4 deletions drivers/net/ethernet/mellanox/mlx4/intf.c
Original file line number Diff line number Diff line change
Expand Up @@ -144,8 +144,7 @@ int mlx4_register_device(struct mlx4_dev *dev)
mlx4_add_device(intf, priv);

mutex_unlock(&intf_mutex);
if (!mlx4_is_slave(dev))
mlx4_start_catas_poll(dev);
mlx4_start_catas_poll(dev);

return 0;
}
Expand All @@ -155,8 +154,7 @@ void mlx4_unregister_device(struct mlx4_dev *dev)
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_interface *intf;

if (!mlx4_is_slave(dev))
mlx4_stop_catas_poll(dev);
mlx4_stop_catas_poll(dev);
mutex_lock(&intf_mutex);

list_for_each_entry(intf, &intf_list, list)
Expand Down
Loading

0 comments on commit 55ad359

Please sign in to comment.