diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 0eb141c41416..a31e031afd87 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -154,7 +154,7 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev, continue; slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ; - if (slave_id >= dev->dev->num_vfs + 1) + if (slave_id >= dev->dev->persist->num_vfs + 1) return; tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE]; form_cache_ag = get_cached_alias_guid(dev, port_num, diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 82a7dd87089b..c7619716c31d 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1951,7 +1951,8 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, ctx->ib_dev = &dev->ib_dev; for (i = 0; - i < min(dev->dev->caps.sqp_demux, (u16)(dev->dev->num_vfs + 1)); + i < min(dev->dev->caps.sqp_demux, + (u16)(dev->dev->persist->num_vfs + 1)); i++) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev->dev, i); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 57ecc5b204f3..b4fa6f658800 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -198,7 +198,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; - props->vendor_part_id = dev->dev->pdev->device; + props->vendor_part_id = dev->dev->persist->pdev->device; props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32)); memcpy(&props->sys_image_guid, out_mad->data + 4, 8); @@ -1375,7 +1375,7 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr, { struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev, ib_dev.dev); - return sprintf(buf, "MT%d\n", dev->dev->pdev->device); + return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device); } static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, @@ -1937,7 +1937,8 @@ static void init_pkeys(struct mlx4_ib_dev *ibdev) int i; if (mlx4_is_master(ibdev->dev)) { - for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) { + for (slave = 0; slave <= ibdev->dev->persist->num_vfs; + ++slave) { for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) { for (i = 0; i < ibdev->dev->phys_caps.pkey_phys_table_len[port]; @@ -1994,7 +1995,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) { for (j = 0; j < eq_per_port; j++) { snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%s", - i, j, dev->pdev->bus->name); + i, j, dev->persist->pdev->bus->name); /* Set IRQ for specific name (per ring) */ if (mlx4_assign_eq(dev, name, NULL, &ibdev->eq_table[eq])) { @@ -2058,7 +2059,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev); if (!ibdev) { - dev_err(&dev->pdev->dev, "Device struct alloc failed\n"); + dev_err(&dev->persist->pdev->dev, + "Device struct alloc failed\n"); return NULL; } @@ -2085,7 +2087,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->num_ports = num_ports; ibdev->ib_dev.phys_port_cnt = ibdev->num_ports; ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; - ibdev->ib_dev.dma_device = &dev->pdev->dev; + ibdev->ib_dev.dma_device = &dev->persist->pdev->dev; if (dev->caps.userspace_caps) ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; @@ -2236,7 +2238,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) sizeof(long), GFP_KERNEL); if (!ibdev->ib_uc_qpns_bitmap) { - dev_err(&dev->pdev->dev, "bit map alloc failed\n"); + dev_err(&dev->persist->pdev->dev, + "bit map alloc failed\n"); goto err_steer_qp_release; } diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index c36ccbd9a644..e0d271782d0a 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -401,7 +401,8 @@ struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device if (!mfrpl->ibfrpl.page_list) goto err_free; - mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->pdev->dev, + mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->persist-> + pdev->dev, size, &mfrpl->map, GFP_KERNEL); if (!mfrpl->mapped_page_list) @@ -423,7 +424,8 @@ void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); int size = page_list->max_page_list_len * sizeof (u64); - dma_free_coherent(&dev->dev->pdev->dev, size, mfrpl->mapped_page_list, + dma_free_coherent(&dev->dev->persist->pdev->dev, size, + mfrpl->mapped_page_list, mfrpl->map); kfree(mfrpl->ibfrpl.page_list); kfree(mfrpl); diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index cb4c66e723b5..d10c2b8a5dad 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -375,7 +375,7 @@ static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max) char base_name[9]; /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */ - strlcpy(name, pci_name(dev->dev->pdev), max); + strlcpy(name, pci_name(dev->dev->persist->pdev), max); strncpy(base_name, name, 8); /*till xxxx:yy:*/ base_name[8] = '\0'; /* with no ARI only 3 last bits are used so when the fn is higher than 8 @@ -792,7 +792,7 @@ static int register_pkey_tree(struct mlx4_ib_dev *device) if (!mlx4_is_master(device->dev)) return 0; - for (i = 0; i <= device->dev->num_vfs; ++i) + for (i = 0; i <= device->dev->persist->num_vfs; ++i) register_one_pkey_tree(device, i); return 0; @@ -807,7 +807,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device) if (!mlx4_is_master(device->dev)) return; - for (slave = device->dev->num_vfs; slave >= 0; --slave) { + for (slave = device->dev->persist->num_vfs; slave >= 0; --slave) { list_for_each_entry_safe(p, t, &device->pkeys.pkey_port_list[slave], entry) { diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index 963dd7e6d547..a716c26e0d99 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -592,7 +592,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, buf->nbufs = 1; buf->npages = 1; buf->page_shift = get_order(size) + PAGE_SHIFT; - buf->direct.buf = dma_alloc_coherent(&dev->pdev->dev, + buf->direct.buf = dma_alloc_coherent(&dev->persist->pdev->dev, size, &t, gfp); if (!buf->direct.buf) return -ENOMEM; @@ -619,7 +619,8 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, for (i = 0; i < buf->nbufs; ++i) { buf->page_list[i].buf = - dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, + dma_alloc_coherent(&dev->persist->pdev->dev, + PAGE_SIZE, &t, gfp); if (!buf->page_list[i].buf) goto err_free; @@ -657,7 +658,8 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf) int i; if (buf->nbufs == 1) - dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf, + dma_free_coherent(&dev->persist->pdev->dev, size, + buf->direct.buf, buf->direct.map); else { if (BITS_PER_LONG == 64 && buf->direct.buf) @@ -665,7 +667,8 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf) for (i = 0; i < buf->nbufs; ++i) if (buf->page_list[i].buf) - dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + dma_free_coherent(&dev->persist->pdev->dev, + PAGE_SIZE, buf->page_list[i].buf, buf->page_list[i].map); kfree(buf->page_list); @@ -738,7 +741,7 @@ int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, gfp_t gfp if (!mlx4_alloc_db_from_pgdir(pgdir, db, order)) goto out; - pgdir = mlx4_alloc_db_pgdir(&(dev->pdev->dev), gfp); + pgdir = mlx4_alloc_db_pgdir(&dev->persist->pdev->dev, gfp); if (!pgdir) { ret = -ENOMEM; goto out; @@ -775,7 +778,7 @@ void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db) set_bit(i, db->u.pgdir->bits[o]); if (bitmap_full(db->u.pgdir->order1, MLX4_DB_PER_PAGE / 2)) { - dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, + dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, db->u.pgdir->db_page, db->u.pgdir->db_dma); list_del(&db->u.pgdir->list); kfree(db->u.pgdir); diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 9c656fe4983d..715de8affcc9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -40,16 +40,177 @@ enum { MLX4_CATAS_POLL_INTERVAL = 5 * HZ, }; -static DEFINE_SPINLOCK(catas_lock); -static LIST_HEAD(catas_list); -static struct work_struct catas_work; -static int internal_err_reset = 1; -module_param(internal_err_reset, int, 0644); +int mlx4_internal_err_reset = 1; +module_param_named(internal_err_reset, mlx4_internal_err_reset, int, 0644); MODULE_PARM_DESC(internal_err_reset, - "Reset device on internal errors if non-zero" - " (default 1, in SRIOV mode default is 0)"); + "Reset device on internal errors if non-zero (default 1)"); + +static int read_vendor_id(struct mlx4_dev *dev) +{ + u16 vendor_id = 0; + int ret; + + ret = pci_read_config_word(dev->persist->pdev, 0, &vendor_id); + if (ret) { + mlx4_err(dev, "Failed to read vendor ID, ret=%d\n", ret); + return ret; + } + + if (vendor_id == 0xffff) { + mlx4_err(dev, "PCI can't be accessed to read vendor id\n"); + return -EINVAL; + } + + return 0; +} + +static int mlx4_reset_master(struct mlx4_dev *dev) +{ + int err = 0; + + if (mlx4_is_master(dev)) + mlx4_report_internal_err_comm_event(dev); + + if (!pci_channel_offline(dev->persist->pdev)) { + err = read_vendor_id(dev); + /* If PCI can't be accessed to read vendor ID we assume that its + * link was disabled and chip was already reset. + */ + if (err) + return 0; + + err = mlx4_reset(dev); + if (err) + mlx4_err(dev, "Fail to reset HCA\n"); + } + + return err; +} + +static int mlx4_reset_slave(struct mlx4_dev *dev) +{ +#define COM_CHAN_RST_REQ_OFFSET 0x10 +#define COM_CHAN_RST_ACK_OFFSET 0x08 + + u32 comm_flags; + u32 rst_req; + u32 rst_ack; + unsigned long end; + struct mlx4_priv *priv = mlx4_priv(dev); + + if (pci_channel_offline(dev->persist->pdev)) + return 0; + + comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm + + MLX4_COMM_CHAN_FLAGS)); + if (comm_flags == 0xffffffff) { + mlx4_err(dev, "VF reset is not needed\n"); + return 0; + } + + if (!(dev->caps.vf_caps & MLX4_VF_CAP_FLAG_RESET)) { + mlx4_err(dev, "VF reset is not supported\n"); + return -EOPNOTSUPP; + } + + rst_req = (comm_flags & (u32)(1 << COM_CHAN_RST_REQ_OFFSET)) >> + COM_CHAN_RST_REQ_OFFSET; + rst_ack = (comm_flags & (u32)(1 << COM_CHAN_RST_ACK_OFFSET)) >> + COM_CHAN_RST_ACK_OFFSET; + if (rst_req != rst_ack) { + mlx4_err(dev, "Communication channel isn't sync, fail to send reset\n"); + return -EIO; + } + + rst_req ^= 1; + mlx4_warn(dev, "VF is sending reset request to Firmware\n"); + comm_flags = rst_req << COM_CHAN_RST_REQ_OFFSET; + __raw_writel((__force u32)cpu_to_be32(comm_flags), + (__iomem char *)priv->mfunc.comm + MLX4_COMM_CHAN_FLAGS); + /* Make sure that our comm channel write doesn't + * get mixed in with writes from another CPU. + */ + mmiowb(); + + end = msecs_to_jiffies(MLX4_COMM_TIME) + jiffies; + while (time_before(jiffies, end)) { + comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm + + MLX4_COMM_CHAN_FLAGS)); + rst_ack = (comm_flags & (u32)(1 << COM_CHAN_RST_ACK_OFFSET)) >> + COM_CHAN_RST_ACK_OFFSET; + + /* Reading rst_req again since the communication channel can + * be reset at any time by the PF and all its bits will be + * set to zero. + */ + rst_req = (comm_flags & (u32)(1 << COM_CHAN_RST_REQ_OFFSET)) >> + COM_CHAN_RST_REQ_OFFSET; + + if (rst_ack == rst_req) { + mlx4_warn(dev, "VF Reset succeed\n"); + return 0; + } + cond_resched(); + } + mlx4_err(dev, "Fail to send reset over the communication channel\n"); + return -ETIMEDOUT; +} + +static int mlx4_comm_internal_err(u32 slave_read) +{ + return (u32)COMM_CHAN_EVENT_INTERNAL_ERR == + (slave_read & (u32)COMM_CHAN_EVENT_INTERNAL_ERR) ? 1 : 0; +} + +void mlx4_enter_error_state(struct mlx4_dev_persistent *persist) +{ + int err; + struct mlx4_dev *dev; + + if (!mlx4_internal_err_reset) + return; + + mutex_lock(&persist->device_state_mutex); + if (persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + goto out; + + dev = persist->dev; + mlx4_err(dev, "device is going to be reset\n"); + if (mlx4_is_slave(dev)) + err = mlx4_reset_slave(dev); + else + err = mlx4_reset_master(dev); + BUG_ON(err != 0); + + dev->persist->state |= MLX4_DEVICE_STATE_INTERNAL_ERROR; + mlx4_err(dev, "device was reset successfully\n"); + mutex_unlock(&persist->device_state_mutex); + + /* At that step HW was already reset, now notify clients */ + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0); + mlx4_cmd_wake_completions(dev); + return; + +out: + mutex_unlock(&persist->device_state_mutex); +} + +static void mlx4_handle_error_state(struct mlx4_dev_persistent *persist) +{ + int err = 0; + + mlx4_enter_error_state(persist); + mutex_lock(&persist->interface_state_mutex); + if (persist->interface_state & MLX4_INTERFACE_STATE_UP && + !(persist->interface_state & MLX4_INTERFACE_STATE_DELETION)) { + err = mlx4_restart_one(persist->pdev); + mlx4_info(persist->dev, "mlx4_restart_one was ended, ret=%d\n", + err); + } + mutex_unlock(&persist->interface_state_mutex); +} static void dump_err_buf(struct mlx4_dev *dev) { @@ -67,58 +228,40 @@ static void poll_catas(unsigned long dev_ptr) { struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr; struct mlx4_priv *priv = mlx4_priv(dev); + u32 slave_read; - if (readl(priv->catas_err.map)) { - /* If the device is off-line, we cannot try to recover it */ - if (pci_channel_offline(dev->pdev)) - mod_timer(&priv->catas_err.timer, - round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL)); - else { - dump_err_buf(dev); - mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0); - - if (internal_err_reset) { - spin_lock(&catas_lock); - list_add(&priv->catas_err.list, &catas_list); - spin_unlock(&catas_lock); - - queue_work(mlx4_wq, &catas_work); - } + if (mlx4_is_slave(dev)) { + slave_read = swab32(readl(&priv->mfunc.comm->slave_read)); + if (mlx4_comm_internal_err(slave_read)) { + mlx4_warn(dev, "Internal error detected on the communication channel\n"); + goto internal_err; } - } else - mod_timer(&priv->catas_err.timer, - round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL)); + } else if (readl(priv->catas_err.map)) { + dump_err_buf(dev); + goto internal_err; + } + + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + mlx4_warn(dev, "Internal error mark was detected on device\n"); + goto internal_err; + } + + mod_timer(&priv->catas_err.timer, + round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL)); + return; + +internal_err: + if (mlx4_internal_err_reset) + queue_work(dev->persist->catas_wq, &dev->persist->catas_work); } static void catas_reset(struct work_struct *work) { - struct mlx4_priv *priv, *tmppriv; - struct mlx4_dev *dev; + struct mlx4_dev_persistent *persist = + container_of(work, struct mlx4_dev_persistent, + catas_work); - LIST_HEAD(tlist); - int ret; - - spin_lock_irq(&catas_lock); - list_splice_init(&catas_list, &tlist); - spin_unlock_irq(&catas_lock); - - list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) { - struct pci_dev *pdev = priv->dev.pdev; - - /* If the device is off-line, we cannot reset it */ - if (pci_channel_offline(pdev)) - continue; - - ret = mlx4_restart_one(priv->dev.pdev); - /* 'priv' now is not valid */ - if (ret) - pr_err("mlx4 %s: Reset failed (%d)\n", - pci_name(pdev), ret); - else { - dev = pci_get_drvdata(pdev); - mlx4_dbg(dev, "Reset succeeded\n"); - } - } + mlx4_handle_error_state(persist); } void mlx4_start_catas_poll(struct mlx4_dev *dev) @@ -126,22 +269,21 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); phys_addr_t addr; - /*If we are in SRIOV the default of the module param must be 0*/ - if (mlx4_is_mfunc(dev)) - internal_err_reset = 0; - INIT_LIST_HEAD(&priv->catas_err.list); init_timer(&priv->catas_err.timer); priv->catas_err.map = NULL; - addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) + - priv->fw.catas_offset; + if (!mlx4_is_slave(dev)) { + addr = pci_resource_start(dev->persist->pdev, + priv->fw.catas_bar) + + priv->fw.catas_offset; - priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4); - if (!priv->catas_err.map) { - mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n", - (unsigned long long) addr); - return; + priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4); + if (!priv->catas_err.map) { + mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n", + (unsigned long long)addr); + return; + } } priv->catas_err.timer.data = (unsigned long) dev; @@ -157,15 +299,29 @@ void mlx4_stop_catas_poll(struct mlx4_dev *dev) del_timer_sync(&priv->catas_err.timer); - if (priv->catas_err.map) + if (priv->catas_err.map) { iounmap(priv->catas_err.map); + priv->catas_err.map = NULL; + } - spin_lock_irq(&catas_lock); - list_del(&priv->catas_err.list); - spin_unlock_irq(&catas_lock); + if (dev->persist->interface_state & MLX4_INTERFACE_STATE_DELETION) + flush_workqueue(dev->persist->catas_wq); } -void __init mlx4_catas_init(void) +int mlx4_catas_init(struct mlx4_dev *dev) { - INIT_WORK(&catas_work, catas_reset); + INIT_WORK(&dev->persist->catas_work, catas_reset); + dev->persist->catas_wq = create_singlethread_workqueue("mlx4_health"); + if (!dev->persist->catas_wq) + return -ENOMEM; + + return 0; +} + +void mlx4_catas_end(struct mlx4_dev *dev) +{ + if (dev->persist->catas_wq) { + destroy_workqueue(dev->persist->catas_wq); + dev->persist->catas_wq = NULL; + } } diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 5c93d1451c44..2b48932855e7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -42,6 +42,7 @@ #include #include #include +#include #include @@ -182,6 +183,72 @@ static u8 mlx4_errno_to_status(int errno) } } +static int mlx4_internal_err_ret_value(struct mlx4_dev *dev, u16 op, + u8 op_modifier) +{ + switch (op) { + case MLX4_CMD_UNMAP_ICM: + case MLX4_CMD_UNMAP_ICM_AUX: + case MLX4_CMD_UNMAP_FA: + case MLX4_CMD_2RST_QP: + case MLX4_CMD_HW2SW_EQ: + case MLX4_CMD_HW2SW_CQ: + case MLX4_CMD_HW2SW_SRQ: + case MLX4_CMD_HW2SW_MPT: + case MLX4_CMD_CLOSE_HCA: + case MLX4_QP_FLOW_STEERING_DETACH: + case MLX4_CMD_FREE_RES: + case MLX4_CMD_CLOSE_PORT: + return CMD_STAT_OK; + + case MLX4_CMD_QP_ATTACH: + /* On Detach case return success */ + if (op_modifier == 0) + return CMD_STAT_OK; + return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + + default: + return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + } +} + +static int mlx4_closing_cmd_fatal_error(u16 op, u8 fw_status) +{ + /* Any error during the closing commands below is considered fatal */ + if (op == MLX4_CMD_CLOSE_HCA || + op == MLX4_CMD_HW2SW_EQ || + op == MLX4_CMD_HW2SW_CQ || + op == MLX4_CMD_2RST_QP || + op == MLX4_CMD_HW2SW_SRQ || + op == MLX4_CMD_SYNC_TPT || + op == MLX4_CMD_UNMAP_ICM || + op == MLX4_CMD_UNMAP_ICM_AUX || + op == MLX4_CMD_UNMAP_FA) + return 1; + /* Error on MLX4_CMD_HW2SW_MPT is fatal except when fw status equals + * CMD_STAT_REG_BOUND. + * This status indicates that memory region has memory windows bound to it + * which may result from invalid user space usage and is not fatal. + */ + if (op == MLX4_CMD_HW2SW_MPT && fw_status != CMD_STAT_REG_BOUND) + return 1; + return 0; +} + +static int mlx4_cmd_reset_flow(struct mlx4_dev *dev, u16 op, u8 op_modifier, + int err) +{ + /* Only if reset flow is really active return code is based on + * command, otherwise current error code is returned. + */ + if (mlx4_internal_err_reset) { + mlx4_enter_error_state(dev->persist); + err = mlx4_internal_err_ret_value(dev, op, op_modifier); + } + + return err; +} + static int comm_pending(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -190,16 +257,30 @@ static int comm_pending(struct mlx4_dev *dev) return (swab32(status) >> 31) != priv->cmd.comm_toggle; } -static void mlx4_comm_cmd_post(struct mlx4_dev *dev, u8 cmd, u16 param) +static int mlx4_comm_cmd_post(struct mlx4_dev *dev, u8 cmd, u16 param) { struct mlx4_priv *priv = mlx4_priv(dev); u32 val; + /* To avoid writing to unknown addresses after the device state was + * changed to internal error and the function was rest, + * check the INTERNAL_ERROR flag which is updated under + * device_state_mutex lock. + */ + mutex_lock(&dev->persist->device_state_mutex); + + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + mutex_unlock(&dev->persist->device_state_mutex); + return -EIO; + } + priv->cmd.comm_toggle ^= 1; val = param | (cmd << 16) | (priv->cmd.comm_toggle << 31); __raw_writel((__force u32) cpu_to_be32(val), &priv->mfunc.comm->slave_write); mmiowb(); + mutex_unlock(&dev->persist->device_state_mutex); + return 0; } static int mlx4_comm_cmd_poll(struct mlx4_dev *dev, u8 cmd, u16 param, @@ -219,7 +300,13 @@ static int mlx4_comm_cmd_poll(struct mlx4_dev *dev, u8 cmd, u16 param, /* Write command */ down(&priv->cmd.poll_sem); - mlx4_comm_cmd_post(dev, cmd, param); + if (mlx4_comm_cmd_post(dev, cmd, param)) { + /* Only in case the device state is INTERNAL_ERROR, + * mlx4_comm_cmd_post returns with an error + */ + err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + goto out; + } end = msecs_to_jiffies(timeout) + jiffies; while (comm_pending(dev) && time_before(jiffies, end)) @@ -231,18 +318,23 @@ static int mlx4_comm_cmd_poll(struct mlx4_dev *dev, u8 cmd, u16 param, * is MLX4_DELAY_RESET_SLAVE*/ if ((MLX4_COMM_CMD_RESET == cmd)) { err = MLX4_DELAY_RESET_SLAVE; + goto out; } else { - mlx4_warn(dev, "Communication channel timed out\n"); - err = -ETIMEDOUT; + mlx4_warn(dev, "Communication channel command 0x%x timed out\n", + cmd); + err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); } } + if (err) + mlx4_enter_error_state(dev->persist); +out: up(&priv->cmd.poll_sem); return err; } -static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 op, - u16 param, unsigned long timeout) +static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 vhcr_cmd, + u16 param, u16 op, unsigned long timeout) { struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; struct mlx4_cmd_context *context; @@ -258,34 +350,49 @@ static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 op, cmd->free_head = context->next; spin_unlock(&cmd->context_lock); - init_completion(&context->done); + reinit_completion(&context->done); - mlx4_comm_cmd_post(dev, op, param); + if (mlx4_comm_cmd_post(dev, vhcr_cmd, param)) { + /* Only in case the device state is INTERNAL_ERROR, + * mlx4_comm_cmd_post returns with an error + */ + err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + goto out; + } if (!wait_for_completion_timeout(&context->done, msecs_to_jiffies(timeout))) { - mlx4_warn(dev, "communication channel command 0x%x timed out\n", - op); - err = -EBUSY; - goto out; + mlx4_warn(dev, "communication channel command 0x%x (op=0x%x) timed out\n", + vhcr_cmd, op); + goto out_reset; } err = context->result; if (err && context->fw_status != CMD_STAT_MULTI_FUNC_REQ) { mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n", - op, context->fw_status); - goto out; + vhcr_cmd, context->fw_status); + if (mlx4_closing_cmd_fatal_error(op, context->fw_status)) + goto out_reset; } -out: /* wait for comm channel ready * this is necessary for prevention the race * when switching between event to polling mode + * Skipping this section in case the device is in FATAL_ERROR state, + * In this state, no commands are sent via the comm channel until + * the device has returned from reset. */ - end = msecs_to_jiffies(timeout) + jiffies; - while (comm_pending(dev) && time_before(jiffies, end)) - cond_resched(); + if (!(dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) { + end = msecs_to_jiffies(timeout) + jiffies; + while (comm_pending(dev) && time_before(jiffies, end)) + cond_resched(); + } + goto out; +out_reset: + err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + mlx4_enter_error_state(dev->persist); +out: spin_lock(&cmd->context_lock); context->next = cmd->free_head; cmd->free_head = context - cmd->context; @@ -296,10 +403,13 @@ static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 op, } int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param, - unsigned long timeout) + u16 op, unsigned long timeout) { + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + if (mlx4_priv(dev)->cmd.use_events) - return mlx4_comm_cmd_wait(dev, cmd, param, timeout); + return mlx4_comm_cmd_wait(dev, cmd, param, op, timeout); return mlx4_comm_cmd_poll(dev, cmd, param, timeout); } @@ -307,7 +417,7 @@ static int cmd_pending(struct mlx4_dev *dev) { u32 status; - if (pci_channel_offline(dev->pdev)) + if (pci_channel_offline(dev->persist->pdev)) return -EIO; status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET); @@ -323,17 +433,21 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param, { struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; u32 __iomem *hcr = cmd->hcr; - int ret = -EAGAIN; + int ret = -EIO; unsigned long end; - mutex_lock(&cmd->hcr_mutex); - - if (pci_channel_offline(dev->pdev)) { + mutex_lock(&dev->persist->device_state_mutex); + /* To avoid writing to unknown addresses after the device state was + * changed to internal error and the chip was reset, + * check the INTERNAL_ERROR flag which is updated under + * device_state_mutex lock. + */ + if (pci_channel_offline(dev->persist->pdev) || + (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) { /* * Device is going through error recovery * and cannot accept commands. */ - ret = -EIO; goto out; } @@ -342,12 +456,11 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param, end += msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS); while (cmd_pending(dev)) { - if (pci_channel_offline(dev->pdev)) { + if (pci_channel_offline(dev->persist->pdev)) { /* * Device is going through error recovery * and cannot accept commands. */ - ret = -EIO; goto out; } @@ -391,7 +504,11 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param, ret = 0; out: - mutex_unlock(&cmd->hcr_mutex); + if (ret) + mlx4_warn(dev, "Could not post command 0x%x: ret=%d, in_param=0x%llx, in_mod=0x%x, op_mod=0x%x\n", + op, ret, in_param, in_modifier, op_modifier); + mutex_unlock(&dev->persist->device_state_mutex); + return ret; } @@ -428,8 +545,11 @@ static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, } ret = mlx4_status_to_errno(vhcr->status); } + if (ret && + dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + ret = mlx4_internal_err_ret_value(dev, op, op_modifier); } else { - ret = mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_POST, 0, + ret = mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_POST, 0, op, MLX4_COMM_TIME + timeout); if (!ret) { if (out_is_imm) { @@ -443,9 +563,14 @@ static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, } } ret = mlx4_status_to_errno(vhcr->status); - } else - mlx4_err(dev, "failed execution of VHCR_POST command opcode 0x%x\n", - op); + } else { + if (dev->persist->state & + MLX4_DEVICE_STATE_INTERNAL_ERROR) + ret = mlx4_internal_err_ret_value(dev, op, + op_modifier); + else + mlx4_err(dev, "failed execution of VHCR_POST command opcode 0x%x\n", op); + } } mutex_unlock(&priv->cmd.slave_cmd_mutex); @@ -464,12 +589,12 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, down(&priv->cmd.poll_sem); - if (pci_channel_offline(dev->pdev)) { + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { /* * Device is going through error recovery * and cannot accept commands. */ - err = -EIO; + err = mlx4_internal_err_ret_value(dev, op, op_modifier); goto out; } @@ -483,16 +608,21 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0, in_modifier, op_modifier, op, CMD_POLL_TOKEN, 0); if (err) - goto out; + goto out_reset; end = msecs_to_jiffies(timeout) + jiffies; while (cmd_pending(dev) && time_before(jiffies, end)) { - if (pci_channel_offline(dev->pdev)) { + if (pci_channel_offline(dev->persist->pdev)) { /* * Device is going through error recovery * and cannot accept commands. */ err = -EIO; + goto out_reset; + } + + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + err = mlx4_internal_err_ret_value(dev, op, op_modifier); goto out; } @@ -502,8 +632,8 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, if (cmd_pending(dev)) { mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n", op); - err = -ETIMEDOUT; - goto out; + err = -EIO; + goto out_reset; } if (out_is_imm) @@ -515,10 +645,17 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, stat = be32_to_cpu((__force __be32) __raw_readl(hcr + HCR_STATUS_OFFSET)) >> 24; err = mlx4_status_to_errno(stat); - if (err) + if (err) { mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n", op, stat); + if (mlx4_closing_cmd_fatal_error(op, stat)) + goto out_reset; + goto out; + } +out_reset: + if (err) + err = mlx4_cmd_reset_flow(dev, op, op_modifier, err); out: up(&priv->cmd.poll_sem); return err; @@ -565,17 +702,19 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, goto out; } - init_completion(&context->done); + reinit_completion(&context->done); - mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0, - in_modifier, op_modifier, op, context->token, 1); + err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0, + in_modifier, op_modifier, op, context->token, 1); + if (err) + goto out_reset; if (!wait_for_completion_timeout(&context->done, msecs_to_jiffies(timeout))) { mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n", op); - err = -EBUSY; - goto out; + err = -EIO; + goto out_reset; } err = context->result; @@ -592,12 +731,20 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, else mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n", op, context->fw_status); + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + err = mlx4_internal_err_ret_value(dev, op, op_modifier); + else if (mlx4_closing_cmd_fatal_error(op, context->fw_status)) + goto out_reset; + goto out; } if (out_is_imm) *out_param = context->out_param; +out_reset: + if (err) + err = mlx4_cmd_reset_flow(dev, op, op_modifier, err); out: spin_lock(&cmd->context_lock); context->next = cmd->free_head; @@ -612,10 +759,13 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, int out_is_imm, u32 in_modifier, u8 op_modifier, u16 op, unsigned long timeout, int native) { - if (pci_channel_offline(dev->pdev)) - return -EIO; + if (pci_channel_offline(dev->persist->pdev)) + return mlx4_cmd_reset_flow(dev, op, op_modifier, -EIO); if (!mlx4_is_mfunc(dev) || (native && mlx4_is_master(dev))) { + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + return mlx4_internal_err_ret_value(dev, op, + op_modifier); if (mlx4_priv(dev)->cmd.use_events) return mlx4_cmd_wait(dev, in_param, out_param, out_is_imm, in_modifier, @@ -631,7 +781,7 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, EXPORT_SYMBOL_GPL(__mlx4_cmd); -static int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev) +int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev) { return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_ARM_COMM_CHANNEL, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); @@ -1460,8 +1610,10 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, ALIGN(sizeof(struct mlx4_vhcr_cmd), MLX4_ACCESS_MEM_ALIGN), 1); if (ret) { - mlx4_err(dev, "%s: Failed reading vhcr ret: 0x%x\n", - __func__, ret); + if (!(dev->persist->state & + MLX4_DEVICE_STATE_INTERNAL_ERROR)) + mlx4_err(dev, "%s: Failed reading vhcr ret: 0x%x\n", + __func__, ret); kfree(vhcr); return ret; } @@ -1500,11 +1652,14 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, goto out_status; } - if (mlx4_ACCESS_MEM(dev, inbox->dma, slave, - vhcr->in_param, - MLX4_MAILBOX_SIZE, 1)) { - mlx4_err(dev, "%s: Failed reading inbox (cmd:0x%x)\n", - __func__, cmd->opcode); + ret = mlx4_ACCESS_MEM(dev, inbox->dma, slave, + vhcr->in_param, + MLX4_MAILBOX_SIZE, 1); + if (ret) { + if (!(dev->persist->state & + MLX4_DEVICE_STATE_INTERNAL_ERROR)) + mlx4_err(dev, "%s: Failed reading inbox (cmd:0x%x)\n", + __func__, cmd->opcode); vhcr_cmd->status = CMD_STAT_INTERNAL_ERR; goto out_status; } @@ -1552,8 +1707,9 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, } if (err) { - mlx4_warn(dev, "vhcr command:0x%x slave:%d failed with error:%d, status %d\n", - vhcr->op, slave, vhcr->errno, err); + if (!(dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) + mlx4_warn(dev, "vhcr command:0x%x slave:%d failed with error:%d, status %d\n", + vhcr->op, slave, vhcr->errno, err); vhcr_cmd->status = mlx4_errno_to_status(err); goto out_status; } @@ -1568,7 +1724,9 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, /* If we failed to write back the outbox after the *command was successfully executed, we must fail this * slave, as it is now in undefined state */ - mlx4_err(dev, "%s:Failed writing outbox\n", __func__); + if (!(dev->persist->state & + MLX4_DEVICE_STATE_INTERNAL_ERROR)) + mlx4_err(dev, "%s:Failed writing outbox\n", __func__); goto out; } } @@ -1847,8 +2005,11 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, break; case MLX4_COMM_CMD_VHCR_POST: if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) && - (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST)) + (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST)) { + mlx4_warn(dev, "slave:%d is out of sync, cmd=0x%x, last command=0x%x, reset is needed\n", + slave, cmd, slave_state[slave].last_cmd); goto reset_slave; + } mutex_lock(&priv->cmd.slave_cmd_mutex); if (mlx4_master_process_vhcr(dev, slave, NULL)) { @@ -1882,7 +2043,18 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, reset_slave: /* cleanup any slave resources */ - mlx4_delete_all_resources_for_slave(dev, slave); + if (dev->persist->interface_state & MLX4_INTERFACE_STATE_UP) + mlx4_delete_all_resources_for_slave(dev, slave); + + if (cmd != MLX4_COMM_CMD_RESET) { + mlx4_warn(dev, "Turn on internal error to force reset, slave=%d, cmd=0x%x\n", + slave, cmd); + /* Turn on internal error letting slave reset itself immeditaly, + * otherwise it might take till timeout on command is passed + */ + reply |= ((u32)COMM_CHAN_EVENT_INTERNAL_ERR); + } + spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); if (!slave_state[slave].is_slave_going_down) slave_state[slave].last_cmd = MLX4_COMM_CMD_RESET; @@ -1958,17 +2130,28 @@ void mlx4_master_comm_channel(struct work_struct *work) static int sync_toggles(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); - int wr_toggle; - int rd_toggle; + u32 wr_toggle; + u32 rd_toggle; unsigned long end; - wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write)) >> 31; - end = jiffies + msecs_to_jiffies(5000); + wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write)); + if (wr_toggle == 0xffffffff) + end = jiffies + msecs_to_jiffies(30000); + else + end = jiffies + msecs_to_jiffies(5000); while (time_before(jiffies, end)) { - rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)) >> 31; - if (rd_toggle == wr_toggle) { - priv->cmd.comm_toggle = rd_toggle; + rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)); + if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) { + /* PCI might be offline */ + msleep(100); + wr_toggle = swab32(readl(&priv->mfunc.comm-> + slave_write)); + continue; + } + + if (rd_toggle >> 31 == wr_toggle >> 31) { + priv->cmd.comm_toggle = rd_toggle >> 31; return 0; } @@ -1997,11 +2180,12 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) if (mlx4_is_master(dev)) priv->mfunc.comm = - ioremap(pci_resource_start(dev->pdev, priv->fw.comm_bar) + + ioremap(pci_resource_start(dev->persist->pdev, + priv->fw.comm_bar) + priv->fw.comm_base, MLX4_COMM_PAGESIZE); else priv->mfunc.comm = - ioremap(pci_resource_start(dev->pdev, 2) + + ioremap(pci_resource_start(dev->persist->pdev, 2) + MLX4_SLAVE_COMM_BASE, MLX4_COMM_PAGESIZE); if (!priv->mfunc.comm) { mlx4_err(dev, "Couldn't map communication vector\n"); @@ -2073,13 +2257,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) if (mlx4_init_resource_tracker(dev)) goto err_thread; - err = mlx4_ARM_COMM_CHANNEL(dev); - if (err) { - mlx4_err(dev, " Failed to arm comm channel eq: %x\n", - err); - goto err_resource; - } - } else { err = sync_toggles(dev); if (err) { @@ -2089,8 +2266,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) } return 0; -err_resource: - mlx4_free_resource_tracker(dev, RES_TR_FREE_ALL); err_thread: flush_workqueue(priv->mfunc.master.comm_wq); destroy_workqueue(priv->mfunc.master.comm_wq); @@ -2107,9 +2282,9 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) err_comm: iounmap(priv->mfunc.comm); err_vhcr: - dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, - priv->mfunc.vhcr, - priv->mfunc.vhcr_dma); + dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, + priv->mfunc.vhcr, + priv->mfunc.vhcr_dma); priv->mfunc.vhcr = NULL; return -ENOMEM; } @@ -2120,7 +2295,6 @@ int mlx4_cmd_init(struct mlx4_dev *dev) int flags = 0; if (!priv->cmd.initialized) { - mutex_init(&priv->cmd.hcr_mutex); mutex_init(&priv->cmd.slave_cmd_mutex); sema_init(&priv->cmd.poll_sem, 1); priv->cmd.use_events = 0; @@ -2130,8 +2304,8 @@ int mlx4_cmd_init(struct mlx4_dev *dev) } if (!mlx4_is_slave(dev) && !priv->cmd.hcr) { - priv->cmd.hcr = ioremap(pci_resource_start(dev->pdev, 0) + - MLX4_HCR_BASE, MLX4_HCR_SIZE); + priv->cmd.hcr = ioremap(pci_resource_start(dev->persist->pdev, + 0) + MLX4_HCR_BASE, MLX4_HCR_SIZE); if (!priv->cmd.hcr) { mlx4_err(dev, "Couldn't map command register\n"); goto err; @@ -2140,7 +2314,8 @@ int mlx4_cmd_init(struct mlx4_dev *dev) } if (mlx4_is_mfunc(dev) && !priv->mfunc.vhcr) { - priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE, + priv->mfunc.vhcr = dma_alloc_coherent(&dev->persist->pdev->dev, + PAGE_SIZE, &priv->mfunc.vhcr_dma, GFP_KERNEL); if (!priv->mfunc.vhcr) @@ -2150,7 +2325,8 @@ int mlx4_cmd_init(struct mlx4_dev *dev) } if (!priv->cmd.pool) { - priv->cmd.pool = pci_pool_create("mlx4_cmd", dev->pdev, + priv->cmd.pool = pci_pool_create("mlx4_cmd", + dev->persist->pdev, MLX4_MAILBOX_SIZE, MLX4_MAILBOX_SIZE, 0); if (!priv->cmd.pool) @@ -2166,6 +2342,27 @@ int mlx4_cmd_init(struct mlx4_dev *dev) return -ENOMEM; } +void mlx4_report_internal_err_comm_event(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int slave; + u32 slave_read; + + /* Report an internal error event to all + * communication channels. + */ + for (slave = 0; slave < dev->num_slaves; slave++) { + slave_read = swab32(readl(&priv->mfunc.comm[slave].slave_read)); + slave_read |= (u32)COMM_CHAN_EVENT_INTERNAL_ERR; + __raw_writel((__force u32)cpu_to_be32(slave_read), + &priv->mfunc.comm[slave].slave_read); + /* Make sure that our comm channel write doesn't + * get mixed in with writes from another CPU. + */ + mmiowb(); + } +} + void mlx4_multi_func_cleanup(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -2181,6 +2378,7 @@ void mlx4_multi_func_cleanup(struct mlx4_dev *dev) kfree(priv->mfunc.master.slave_state); kfree(priv->mfunc.master.vf_admin); kfree(priv->mfunc.master.vf_oper); + dev->num_slaves = 0; } iounmap(priv->mfunc.comm); @@ -2202,7 +2400,7 @@ void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask) } if (mlx4_is_mfunc(dev) && priv->mfunc.vhcr && (cleanup_mask & MLX4_CMD_CLEANUP_VHCR)) { - dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, + dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, priv->mfunc.vhcr, priv->mfunc.vhcr_dma); priv->mfunc.vhcr = NULL; } @@ -2229,6 +2427,11 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) for (i = 0; i < priv->cmd.max_cmds; ++i) { priv->cmd.context[i].token = i; priv->cmd.context[i].next = i + 1; + /* To support fatal error flow, initialize all + * cmd contexts to allow simulating completions + * with complete() at any time. + */ + init_completion(&priv->cmd.context[i].done); } priv->cmd.context[priv->cmd.max_cmds - 1].next = -1; @@ -2306,8 +2509,9 @@ u32 mlx4_comm_get_version(void) static int mlx4_get_slave_indx(struct mlx4_dev *dev, int vf) { - if ((vf < 0) || (vf >= dev->num_vfs)) { - mlx4_err(dev, "Bad vf number:%d (number of activated vf: %d)\n", vf, dev->num_vfs); + if ((vf < 0) || (vf >= dev->persist->num_vfs)) { + mlx4_err(dev, "Bad vf number:%d (number of activated vf: %d)\n", + vf, dev->persist->num_vfs); return -EINVAL; } @@ -2316,7 +2520,7 @@ static int mlx4_get_slave_indx(struct mlx4_dev *dev, int vf) int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave) { - if (slave < 1 || slave > dev->num_vfs) { + if (slave < 1 || slave > dev->persist->num_vfs) { mlx4_err(dev, "Bad slave number:%d (number of activated slaves: %lu)\n", slave, dev->num_slaves); @@ -2325,6 +2529,25 @@ int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave) return slave - 1; } +void mlx4_cmd_wake_completions(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_cmd_context *context; + int i; + + spin_lock(&priv->cmd.context_lock); + if (priv->cmd.context) { + for (i = 0; i < priv->cmd.max_cmds; ++i) { + context = &priv->cmd.context[i]; + context->fw_status = CMD_STAT_INTERNAL_ERR; + context->result = + mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + complete(&context->done); + } + } + spin_unlock(&priv->cmd.context_lock); +} + struct mlx4_active_ports mlx4_get_active_ports(struct mlx4_dev *dev, int slave) { struct mlx4_active_ports actv_ports; @@ -2388,7 +2611,7 @@ struct mlx4_slaves_pport mlx4_phys_to_slaves_pport(struct mlx4_dev *dev, if (port <= 0 || port > dev->caps.num_ports) return slaves_pport; - for (i = 0; i < dev->num_vfs + 1; i++) { + for (i = 0; i < dev->persist->num_vfs + 1; i++) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, i); if (test_bit(port - 1, actv_ports.ports)) @@ -2408,7 +2631,7 @@ struct mlx4_slaves_pport mlx4_phys_to_slaves_pport_actv( bitmap_zero(slaves_pport.slaves, MLX4_MFUNC_MAX); - for (i = 0; i < dev->num_vfs + 1; i++) { + for (i = 0; i < dev->persist->num_vfs + 1; i++) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, i); if (bitmap_equal(crit_ports->ports, actv_ports.ports, diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 82322b1c8411..22da4d0d0f05 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -70,10 +70,10 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv, /* Allocate HW buffers on provided NUMA node. * dev->numa_node is used in mtt range allocation flow. */ - set_dev_node(&mdev->dev->pdev->dev, node); + set_dev_node(&mdev->dev->persist->pdev->dev, node); err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres, cq->buf_size, 2 * PAGE_SIZE); - set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node); + set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node); if (err) goto err_cq; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 90e0f045a6bc..569eda9e83d6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -92,7 +92,7 @@ mlx4_en_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) (u16) (mdev->dev->caps.fw_ver >> 32), (u16) ((mdev->dev->caps.fw_ver >> 16) & 0xffff), (u16) (mdev->dev->caps.fw_ver & 0xffff)); - strlcpy(drvinfo->bus_info, pci_name(mdev->dev->pdev), + strlcpy(drvinfo->bus_info, pci_name(mdev->dev->persist->pdev), sizeof(drvinfo->bus_info)); drvinfo->n_stats = 0; drvinfo->regdump_len = 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index 9f16f754137b..c643d2bbb7b9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -241,8 +241,8 @@ static void *mlx4_en_add(struct mlx4_dev *dev) spin_lock_init(&mdev->uar_lock); mdev->dev = dev; - mdev->dma_device = &(dev->pdev->dev); - mdev->pdev = dev->pdev; + mdev->dma_device = &dev->persist->pdev->dev; + mdev->pdev = dev->persist->pdev; mdev->device_up = false; mdev->LSO_support = !!(dev->caps.flags & (1 << 15)); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index d0d6dc1b8e46..43a3f9822f74 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2457,7 +2457,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, netif_set_real_num_tx_queues(dev, prof->tx_ring_num); netif_set_real_num_rx_queues(dev, prof->rx_ring_num); - SET_NETDEV_DEV(dev, &mdev->dev->pdev->dev); + SET_NETDEV_DEV(dev, &mdev->dev->persist->pdev->dev); dev->dev_port = port - 1; /* diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index a0474eb94aa3..2ba5d368edce 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -387,10 +387,10 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, ring->rx_info, tmp); /* Allocate HW buffers on provided NUMA node */ - set_dev_node(&mdev->dev->pdev->dev, node); + set_dev_node(&mdev->dev->persist->pdev->dev, node); err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, 2 * PAGE_SIZE); - set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node); + set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node); if (err) goto err_info; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 359bb1286eb5..55f9f5c5344e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -91,10 +91,10 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE); /* Allocate HW buffers on provided NUMA node */ - set_dev_node(&mdev->dev->pdev->dev, node); + set_dev_node(&mdev->dev->persist->pdev->dev, node); err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, 2 * PAGE_SIZE); - set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node); + set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node); if (err) { en_err(priv, "Failed allocating hwq resources\n"); goto err_bounce; diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 3d275fbaf0eb..2f2e6067426d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -237,7 +237,7 @@ int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port) struct mlx4_eqe eqe; /*don't send if we don't have the that slave */ - if (dev->num_vfs < slave) + if (dev->persist->num_vfs < slave) return 0; memset(&eqe, 0, sizeof eqe); @@ -255,7 +255,7 @@ int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, struct mlx4_eqe eqe; /*don't send if we don't have the that slave */ - if (dev->num_vfs < slave) + if (dev->persist->num_vfs < slave) return 0; memset(&eqe, 0, sizeof eqe); @@ -310,7 +310,7 @@ static void set_all_slave_state(struct mlx4_dev *dev, u8 port, int event) struct mlx4_slaves_pport slaves_pport = mlx4_phys_to_slaves_pport(dev, port); - for (i = 0; i < dev->num_vfs + 1; i++) + for (i = 0; i < dev->persist->num_vfs + 1; i++) if (test_bit(i, slaves_pport.slaves)) set_and_calc_slave_port_state(dev, i, port, event, &gen_event); @@ -429,8 +429,14 @@ void mlx4_master_handle_slave_flr(struct work_struct *work) if (MLX4_COMM_CMD_FLR == slave_state[i].last_cmd) { mlx4_dbg(dev, "mlx4_handle_slave_flr: clean slave: %d\n", i); - - mlx4_delete_all_resources_for_slave(dev, i); + /* In case of 'Reset flow' FLR can be generated for + * a slave before mlx4_load_one is done. + * make sure interface is up before trying to delete + * slave resources which weren't allocated yet. + */ + if (dev->persist->interface_state & + MLX4_INTERFACE_STATE_UP) + mlx4_delete_all_resources_for_slave(dev, i); /*return the slave to running mode*/ spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); slave_state[i].last_cmd = MLX4_COMM_CMD_RESET; @@ -560,7 +566,8 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) mlx4_priv(dev)->sense.do_sense_port[port] = 1; if (!mlx4_is_master(dev)) break; - for (i = 0; i < dev->num_vfs + 1; i++) { + for (i = 0; i < dev->persist->num_vfs + 1; + i++) { if (!test_bit(i, slaves_port.slaves)) continue; if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) { @@ -596,7 +603,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) if (!mlx4_is_master(dev)) break; if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) - for (i = 0; i < dev->num_vfs + 1; i++) { + for (i = 0; + i < dev->persist->num_vfs + 1; + i++) { if (!test_bit(i, slaves_port.slaves)) continue; if (i == mlx4_master_func_num(dev)) @@ -865,7 +874,7 @@ static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq) if (!priv->eq_table.uar_map[index]) { priv->eq_table.uar_map[index] = - ioremap(pci_resource_start(dev->pdev, 2) + + ioremap(pci_resource_start(dev->persist->pdev, 2) + ((eq->eqn / 4) << PAGE_SHIFT), PAGE_SIZE); if (!priv->eq_table.uar_map[index]) { @@ -928,8 +937,10 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent, eq_context = mailbox->buf; for (i = 0; i < npages; ++i) { - eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev, - PAGE_SIZE, &t, GFP_KERNEL); + eq->page_list[i].buf = dma_alloc_coherent(&dev->persist-> + pdev->dev, + PAGE_SIZE, &t, + GFP_KERNEL); if (!eq->page_list[i].buf) goto err_out_free_pages; @@ -995,7 +1006,7 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent, err_out_free_pages: for (i = 0; i < npages; ++i) if (eq->page_list[i].buf) - dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, eq->page_list[i].buf, eq->page_list[i].map); @@ -1044,9 +1055,9 @@ static void mlx4_free_eq(struct mlx4_dev *dev, mlx4_mtt_cleanup(dev, &eq->mtt); for (i = 0; i < npages; ++i) - dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, - eq->page_list[i].buf, - eq->page_list[i].map); + dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, + eq->page_list[i].buf, + eq->page_list[i].map); kfree(eq->page_list); mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn, MLX4_USE_RR); @@ -1060,7 +1071,7 @@ static void mlx4_free_irqs(struct mlx4_dev *dev) int i, vec; if (eq_table->have_irq) - free_irq(dev->pdev->irq, dev); + free_irq(dev->persist->pdev->irq, dev); for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) if (eq_table->eq[i].have_irq) { @@ -1089,7 +1100,8 @@ static int mlx4_map_clr_int(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); - priv->clr_base = ioremap(pci_resource_start(dev->pdev, priv->fw.clr_int_bar) + + priv->clr_base = ioremap(pci_resource_start(dev->persist->pdev, + priv->fw.clr_int_bar) + priv->fw.clr_int_base, MLX4_CLR_INT_SIZE); if (!priv->clr_base) { mlx4_err(dev, "Couldn't map interrupt clear register, aborting\n"); @@ -1212,13 +1224,13 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) i * MLX4_IRQNAME_SIZE, MLX4_IRQNAME_SIZE, "mlx4-comp-%d@pci:%s", i, - pci_name(dev->pdev)); + pci_name(dev->persist->pdev)); } else { snprintf(priv->eq_table.irq_names + i * MLX4_IRQNAME_SIZE, MLX4_IRQNAME_SIZE, "mlx4-async@pci:%s", - pci_name(dev->pdev)); + pci_name(dev->persist->pdev)); } eq_name = priv->eq_table.irq_names + @@ -1235,8 +1247,8 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) snprintf(priv->eq_table.irq_names, MLX4_IRQNAME_SIZE, DRV_NAME "@pci:%s", - pci_name(dev->pdev)); - err = request_irq(dev->pdev->irq, mlx4_interrupt, + pci_name(dev->persist->pdev)); + err = request_irq(dev->persist->pdev->irq, mlx4_interrupt, IRQF_SHARED, priv->eq_table.irq_names, dev); if (err) goto err_out_async; diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index 97c9b1db1d27..2a9dd460a95f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -56,7 +56,7 @@ static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chu int i; if (chunk->nsg > 0) - pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages, + pci_unmap_sg(dev->persist->pdev, chunk->mem, chunk->npages, PCI_DMA_BIDIRECTIONAL); for (i = 0; i < chunk->npages; ++i) @@ -69,7 +69,8 @@ static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk * int i; for (i = 0; i < chunk->npages; ++i) - dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length, + dma_free_coherent(&dev->persist->pdev->dev, + chunk->mem[i].length, lowmem_page_address(sg_page(&chunk->mem[i])), sg_dma_address(&chunk->mem[i])); } @@ -173,7 +174,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, --cur_order; if (coherent) - ret = mlx4_alloc_icm_coherent(&dev->pdev->dev, + ret = mlx4_alloc_icm_coherent(&dev->persist->pdev->dev, &chunk->mem[chunk->npages], cur_order, gfp_mask); else @@ -193,7 +194,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, if (coherent) ++chunk->nsg; else if (chunk->npages == MLX4_ICM_CHUNK_LEN) { - chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, + chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->mem, chunk->npages, PCI_DMA_BIDIRECTIONAL); @@ -208,7 +209,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, } if (!coherent && chunk) { - chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, + chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->mem, chunk->npages, PCI_DMA_BIDIRECTIONAL); diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index 116895ac8b35..68d2bad325d5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -138,13 +138,13 @@ int mlx4_register_device(struct mlx4_dev *dev) mutex_lock(&intf_mutex); + dev->persist->interface_state |= MLX4_INTERFACE_STATE_UP; list_add_tail(&priv->dev_list, &dev_list); list_for_each_entry(intf, &intf_list, list) mlx4_add_device(intf, priv); mutex_unlock(&intf_mutex); - if (!mlx4_is_slave(dev)) - mlx4_start_catas_poll(dev); + mlx4_start_catas_poll(dev); return 0; } @@ -154,14 +154,14 @@ void mlx4_unregister_device(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_interface *intf; - if (!mlx4_is_slave(dev)) - mlx4_stop_catas_poll(dev); + mlx4_stop_catas_poll(dev); mutex_lock(&intf_mutex); list_for_each_entry(intf, &intf_list, list) mlx4_remove_device(intf, priv); list_del(&priv->dev_list); + dev->persist->interface_state &= ~MLX4_INTERFACE_STATE_UP; mutex_unlock(&intf_mutex); } diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 03e9eb0dc761..9c7ef0bffb52 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -108,6 +108,8 @@ MODULE_PARM_DESC(enable_64b_cqe_eqe, MLX4_FUNC_CAP_EQE_CQE_STRIDE | \ MLX4_FUNC_CAP_DMFS_A0_STATIC) +#define RESET_PERSIST_MASK_FLAGS (MLX4_FLAG_SRIOV) + static char mlx4_version[] = DRV_NAME ": Mellanox ConnectX core driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; @@ -318,10 +320,11 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) return -ENODEV; } - if (dev_cap->uar_size > pci_resource_len(dev->pdev, 2)) { + if (dev_cap->uar_size > pci_resource_len(dev->persist->pdev, 2)) { mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n", dev_cap->uar_size, - (unsigned long long) pci_resource_len(dev->pdev, 2)); + (unsigned long long) + pci_resource_len(dev->persist->pdev, 2)); return -ENODEV; } @@ -541,8 +544,10 @@ static int mlx4_get_pcie_dev_link_caps(struct mlx4_dev *dev, *speed = PCI_SPEED_UNKNOWN; *width = PCIE_LNK_WIDTH_UNKNOWN; - err1 = pcie_capability_read_dword(dev->pdev, PCI_EXP_LNKCAP, &lnkcap1); - err2 = pcie_capability_read_dword(dev->pdev, PCI_EXP_LNKCAP2, &lnkcap2); + err1 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP, + &lnkcap1); + err2 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP2, + &lnkcap2); if (!err2 && lnkcap2) { /* PCIe r3.0-compliant */ if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB) *speed = PCIE_SPEED_8_0GT; @@ -587,7 +592,7 @@ static void mlx4_check_pcie_caps(struct mlx4_dev *dev) return; } - err = pcie_get_minimum_link(dev->pdev, &speed, &width); + err = pcie_get_minimum_link(dev->persist->pdev, &speed, &width); if (err || speed == PCI_SPEED_UNKNOWN || width == PCIE_LNK_WIDTH_UNKNOWN) { mlx4_warn(dev, @@ -837,10 +842,12 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) if (dev->caps.uar_page_size * (dev->caps.num_uars - dev->caps.reserved_uars) > - pci_resource_len(dev->pdev, 2)) { + pci_resource_len(dev->persist->pdev, + 2)) { mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n", dev->caps.uar_page_size * dev->caps.num_uars, - (unsigned long long) pci_resource_len(dev->pdev, 2)); + (unsigned long long) + pci_resource_len(dev->persist->pdev, 2)); goto err_mem; } @@ -1477,7 +1484,8 @@ static void mlx4_slave_exit(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); mutex_lock(&priv->cmd.slave_cmd_mutex); - if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME)) + if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP, + MLX4_COMM_TIME)) mlx4_warn(dev, "Failed to close slave function\n"); mutex_unlock(&priv->cmd.slave_cmd_mutex); } @@ -1492,9 +1500,9 @@ static int map_bf_area(struct mlx4_dev *dev) if (!dev->caps.bf_reg_size) return -ENXIO; - bf_start = pci_resource_start(dev->pdev, 2) + + bf_start = pci_resource_start(dev->persist->pdev, 2) + (dev->caps.num_uars << PAGE_SHIFT); - bf_len = pci_resource_len(dev->pdev, 2) - + bf_len = pci_resource_len(dev->persist->pdev, 2) - (dev->caps.num_uars << PAGE_SHIFT); priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len); if (!priv->bf_mapping) @@ -1536,7 +1544,8 @@ static int map_internal_clock(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); priv->clock_mapping = - ioremap(pci_resource_start(dev->pdev, priv->fw.clock_bar) + + ioremap(pci_resource_start(dev->persist->pdev, + priv->fw.clock_bar) + priv->fw.clock_offset, MLX4_CLOCK_SIZE); if (!priv->clock_mapping) @@ -1573,6 +1582,50 @@ static void mlx4_close_fw(struct mlx4_dev *dev) } } +static int mlx4_comm_check_offline(struct mlx4_dev *dev) +{ +#define COMM_CHAN_OFFLINE_OFFSET 0x09 + + u32 comm_flags; + u32 offline_bit; + unsigned long end; + struct mlx4_priv *priv = mlx4_priv(dev); + + end = msecs_to_jiffies(MLX4_COMM_OFFLINE_TIME_OUT) + jiffies; + while (time_before(jiffies, end)) { + comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm + + MLX4_COMM_CHAN_FLAGS)); + offline_bit = (comm_flags & + (u32)(1 << COMM_CHAN_OFFLINE_OFFSET)); + if (!offline_bit) + return 0; + /* There are cases as part of AER/Reset flow that PF needs + * around 100 msec to load. We therefore sleep for 100 msec + * to allow other tasks to make use of that CPU during this + * time interval. + */ + msleep(100); + } + mlx4_err(dev, "Communication channel is offline.\n"); + return -EIO; +} + +static void mlx4_reset_vf_support(struct mlx4_dev *dev) +{ +#define COMM_CHAN_RST_OFFSET 0x1e + + struct mlx4_priv *priv = mlx4_priv(dev); + u32 comm_rst; + u32 comm_caps; + + comm_caps = swab32(readl((__iomem char *)priv->mfunc.comm + + MLX4_COMM_CHAN_CAPS)); + comm_rst = (comm_caps & (u32)(1 << COMM_CHAN_RST_OFFSET)); + + if (comm_rst) + dev->caps.vf_caps |= MLX4_VF_CAP_FLAG_RESET; +} + static int mlx4_init_slave(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1588,9 +1641,15 @@ static int mlx4_init_slave(struct mlx4_dev *dev) mutex_lock(&priv->cmd.slave_cmd_mutex); priv->cmd.max_cmds = 1; + if (mlx4_comm_check_offline(dev)) { + mlx4_err(dev, "PF is not responsive, skipping initialization\n"); + goto err_offline; + } + + mlx4_reset_vf_support(dev); mlx4_warn(dev, "Sending reset\n"); ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, - MLX4_COMM_TIME); + MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME); /* if we are in the middle of flr the slave will try * NUM_OF_RESET_RETRIES times before leaving.*/ if (ret_from_reset) { @@ -1615,22 +1674,24 @@ static int mlx4_init_slave(struct mlx4_dev *dev) mlx4_warn(dev, "Sending vhcr0\n"); if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48, - MLX4_COMM_TIME)) + MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME)) goto err; if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32, - MLX4_COMM_TIME)) + MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME)) goto err; if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16, - MLX4_COMM_TIME)) + MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME)) goto err; - if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME)) + if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, + MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME)) goto err; mutex_unlock(&priv->cmd.slave_cmd_mutex); return 0; err: - mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0); + mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP, 0); +err_offline: mutex_unlock(&priv->cmd.slave_cmd_mutex); return -EIO; } @@ -1705,7 +1766,8 @@ static void choose_steering_mode(struct mlx4_dev *dev, if (mlx4_log_num_mgm_entry_size <= 0 && dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN && (!mlx4_is_mfunc(dev) || - (dev_cap->fs_max_num_qp_per_entry >= (dev->num_vfs + 1))) && + (dev_cap->fs_max_num_qp_per_entry >= + (dev->persist->num_vfs + 1))) && choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >= MLX4_MIN_MGM_LOG_ENTRY_SIZE) { dev->oper_log_mgm_entry_size = @@ -2288,7 +2350,8 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev) for (i = 0; i < nreq; ++i) entries[i].entry = i; - nreq = pci_enable_msix_range(dev->pdev, entries, 2, nreq); + nreq = pci_enable_msix_range(dev->persist->pdev, entries, 2, + nreq); if (nreq < 0) { kfree(entries); @@ -2316,7 +2379,7 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev) dev->caps.comp_pool = 0; for (i = 0; i < 2; ++i) - priv->eq_table.eq[i].irq = dev->pdev->irq; + priv->eq_table.eq[i].irq = dev->persist->pdev->irq; } static int mlx4_init_port_info(struct mlx4_dev *dev, int port) @@ -2344,7 +2407,7 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port) info->port_attr.show = show_port_type; sysfs_attr_init(&info->port_attr.attr); - err = device_create_file(&dev->pdev->dev, &info->port_attr); + err = device_create_file(&dev->persist->pdev->dev, &info->port_attr); if (err) { mlx4_err(dev, "Failed to create file for port %d\n", port); info->port = -1; @@ -2361,10 +2424,12 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port) info->port_mtu_attr.show = show_port_ib_mtu; sysfs_attr_init(&info->port_mtu_attr.attr); - err = device_create_file(&dev->pdev->dev, &info->port_mtu_attr); + err = device_create_file(&dev->persist->pdev->dev, + &info->port_mtu_attr); if (err) { mlx4_err(dev, "Failed to create mtu file for port %d\n", port); - device_remove_file(&info->dev->pdev->dev, &info->port_attr); + device_remove_file(&info->dev->persist->pdev->dev, + &info->port_attr); info->port = -1; } @@ -2376,8 +2441,9 @@ static void mlx4_cleanup_port_info(struct mlx4_port_info *info) if (info->port < 0) return; - device_remove_file(&info->dev->pdev->dev, &info->port_attr); - device_remove_file(&info->dev->pdev->dev, &info->port_mtu_attr); + device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr); + device_remove_file(&info->dev->persist->pdev->dev, + &info->port_mtu_attr); } static int mlx4_init_steering(struct mlx4_dev *dev) @@ -2444,10 +2510,11 @@ static int mlx4_get_ownership(struct mlx4_dev *dev) void __iomem *owner; u32 ret; - if (pci_channel_offline(dev->pdev)) + if (pci_channel_offline(dev->persist->pdev)) return -EIO; - owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, + owner = ioremap(pci_resource_start(dev->persist->pdev, 0) + + MLX4_OWNER_BASE, MLX4_OWNER_SIZE); if (!owner) { mlx4_err(dev, "Failed to obtain ownership bit\n"); @@ -2463,10 +2530,11 @@ static void mlx4_free_ownership(struct mlx4_dev *dev) { void __iomem *owner; - if (pci_channel_offline(dev->pdev)) + if (pci_channel_offline(dev->persist->pdev)) return; - owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, + owner = ioremap(pci_resource_start(dev->persist->pdev, 0) + + MLX4_OWNER_BASE, MLX4_OWNER_SIZE); if (!owner) { mlx4_err(dev, "Failed to obtain ownership bit\n"); @@ -2481,11 +2549,19 @@ static void mlx4_free_ownership(struct mlx4_dev *dev) !!((flags) & MLX4_FLAG_MASTER)) static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev, - u8 total_vfs, int existing_vfs) + u8 total_vfs, int existing_vfs, int reset_flow) { u64 dev_flags = dev->flags; int err = 0; + if (reset_flow) { + dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs), + GFP_KERNEL); + if (!dev->dev_vfs) + goto free_mem; + return dev_flags; + } + atomic_inc(&pf_loading); if (dev->flags & MLX4_FLAG_SRIOV) { if (existing_vfs != total_vfs) { @@ -2514,13 +2590,14 @@ static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev, dev_flags |= MLX4_FLAG_SRIOV | MLX4_FLAG_MASTER; dev_flags &= ~MLX4_FLAG_SLAVE; - dev->num_vfs = total_vfs; + dev->persist->num_vfs = total_vfs; } return dev_flags; disable_sriov: atomic_dec(&pf_loading); - dev->num_vfs = 0; +free_mem: + dev->persist->num_vfs = 0; kfree(dev->dev_vfs); return dev_flags & ~MLX4_FLAG_MASTER; } @@ -2544,7 +2621,8 @@ static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap } static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, - int total_vfs, int *nvfs, struct mlx4_priv *priv) + int total_vfs, int *nvfs, struct mlx4_priv *priv, + int reset_flow) { struct mlx4_dev *dev; unsigned sum = 0; @@ -2607,10 +2685,15 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, existing_vfs = pci_num_vf(pdev); if (existing_vfs) dev->flags |= MLX4_FLAG_SRIOV; - dev->num_vfs = total_vfs; + dev->persist->num_vfs = total_vfs; } } + /* on load remove any previous indication of internal error, + * device is up. + */ + dev->persist->state = MLX4_DEVICE_STATE_UP; + slave_start: err = mlx4_cmd_init(dev); if (err) { @@ -2661,8 +2744,10 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, goto err_fw; if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) { - u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, - existing_vfs); + u64 dev_flags = mlx4_enable_sriov(dev, pdev, + total_vfs, + existing_vfs, + reset_flow); mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); dev->flags = dev_flags; @@ -2704,7 +2789,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, if (dev->flags & MLX4_FLAG_SRIOV) { if (!existing_vfs) pci_disable_sriov(pdev); - if (mlx4_is_master(dev)) + if (mlx4_is_master(dev) && !reset_flow) atomic_dec(&pf_loading); dev->flags &= ~MLX4_FLAG_SRIOV; } @@ -2718,7 +2803,8 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, } if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) { - u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, existing_vfs); + u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, + existing_vfs, reset_flow); if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) { mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR); @@ -2771,12 +2857,14 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, dev->caps.num_ports); goto err_close; } - memcpy(dev->nvfs, nvfs, sizeof(dev->nvfs)); + memcpy(dev->persist->nvfs, nvfs, sizeof(dev->persist->nvfs)); - for (i = 0; i < sizeof(dev->nvfs)/sizeof(dev->nvfs[0]); i++) { + for (i = 0; + i < sizeof(dev->persist->nvfs)/ + sizeof(dev->persist->nvfs[0]); i++) { unsigned j; - for (j = 0; j < dev->nvfs[i]; ++sum, ++j) { + for (j = 0; j < dev->persist->nvfs[i]; ++sum, ++j) { dev->dev_vfs[sum].min_port = i < 2 ? i + 1 : 1; dev->dev_vfs[sum].n_ports = i < 2 ? 1 : dev->caps.num_ports; @@ -2828,6 +2916,17 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, goto err_steer; mlx4_init_quotas(dev); + /* When PF resources are ready arm its comm channel to enable + * getting commands + */ + if (mlx4_is_master(dev)) { + err = mlx4_ARM_COMM_CHANNEL(dev); + if (err) { + mlx4_err(dev, " Failed to arm comm channel eq: %x\n", + err); + goto err_steer; + } + } for (port = 1; port <= dev->caps.num_ports; port++) { err = mlx4_init_port_info(dev, port); @@ -2846,7 +2945,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, priv->removed = 0; - if (mlx4_is_master(dev) && dev->num_vfs) + if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow) atomic_dec(&pf_loading); kfree(dev_cap); @@ -2905,10 +3004,12 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); err_sriov: - if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) + if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) { pci_disable_sriov(pdev); + dev->flags &= ~MLX4_FLAG_SRIOV; + } - if (mlx4_is_master(dev) && dev->num_vfs) + if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow) atomic_dec(&pf_loading); kfree(priv->dev.dev_vfs); @@ -3049,11 +3150,19 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data, } } - err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv); + err = mlx4_catas_init(&priv->dev); if (err) goto err_release_regions; + + err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 0); + if (err) + goto err_catas; + return 0; +err_catas: + mlx4_catas_end(&priv->dev); + err_release_regions: pci_release_regions(pdev); @@ -3076,38 +3185,60 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) return -ENOMEM; dev = &priv->dev; - dev->pdev = pdev; - pci_set_drvdata(pdev, dev); + dev->persist = kzalloc(sizeof(*dev->persist), GFP_KERNEL); + if (!dev->persist) { + kfree(priv); + return -ENOMEM; + } + dev->persist->pdev = pdev; + dev->persist->dev = dev; + pci_set_drvdata(pdev, dev->persist); priv->pci_dev_data = id->driver_data; + mutex_init(&dev->persist->device_state_mutex); + mutex_init(&dev->persist->interface_state_mutex); ret = __mlx4_init_one(pdev, id->driver_data, priv); - if (ret) + if (ret) { + kfree(dev->persist); kfree(priv); + } else { + pci_save_state(pdev); + } return ret; } +static void mlx4_clean_dev(struct mlx4_dev *dev) +{ + struct mlx4_dev_persistent *persist = dev->persist; + struct mlx4_priv *priv = mlx4_priv(dev); + unsigned long flags = (dev->flags & RESET_PERSIST_MASK_FLAGS); + + memset(priv, 0, sizeof(*priv)); + priv->dev.persist = persist; + priv->dev.flags = flags; +} + static void mlx4_unload_one(struct pci_dev *pdev) { - struct mlx4_dev *dev = pci_get_drvdata(pdev); + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; struct mlx4_priv *priv = mlx4_priv(dev); int pci_dev_data; - int p; - int active_vfs = 0; + int p, i; if (priv->removed) return; + /* saving current ports type for further use */ + for (i = 0; i < dev->caps.num_ports; i++) { + dev->persist->curr_port_type[i] = dev->caps.port_type[i + 1]; + dev->persist->curr_port_poss_type[i] = dev->caps. + possible_type[i + 1]; + } + pci_dev_data = priv->pci_dev_data; - /* Disabling SR-IOV is not allowed while there are active vf's */ - if (mlx4_is_master(dev)) { - active_vfs = mlx4_how_many_lives_vf(dev); - if (active_vfs) { - pr_warn("Removing PF when there are active VF's !!\n"); - pr_warn("Will not disable SR-IOV.\n"); - } - } mlx4_stop_sense(dev); mlx4_unregister_device(dev); @@ -3151,12 +3282,6 @@ static void mlx4_unload_one(struct pci_dev *pdev) if (dev->flags & MLX4_FLAG_MSI_X) pci_disable_msix(pdev); - if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) { - mlx4_warn(dev, "Disabling SR-IOV\n"); - pci_disable_sriov(pdev); - dev->flags &= ~MLX4_FLAG_SRIOV; - dev->num_vfs = 0; - } if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); @@ -3168,42 +3293,96 @@ static void mlx4_unload_one(struct pci_dev *pdev) kfree(dev->caps.qp1_proxy); kfree(dev->dev_vfs); - memset(priv, 0, sizeof(*priv)); + mlx4_clean_dev(dev); priv->pci_dev_data = pci_dev_data; priv->removed = 1; } static void mlx4_remove_one(struct pci_dev *pdev) { - struct mlx4_dev *dev = pci_get_drvdata(pdev); + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; struct mlx4_priv *priv = mlx4_priv(dev); + int active_vfs = 0; + + mutex_lock(&persist->interface_state_mutex); + persist->interface_state |= MLX4_INTERFACE_STATE_DELETION; + mutex_unlock(&persist->interface_state_mutex); + + /* Disabling SR-IOV is not allowed while there are active vf's */ + if (mlx4_is_master(dev) && dev->flags & MLX4_FLAG_SRIOV) { + active_vfs = mlx4_how_many_lives_vf(dev); + if (active_vfs) { + pr_warn("Removing PF when there are active VF's !!\n"); + pr_warn("Will not disable SR-IOV.\n"); + } + } + + /* device marked to be under deletion running now without the lock + * letting other tasks to be terminated + */ + if (persist->interface_state & MLX4_INTERFACE_STATE_UP) + mlx4_unload_one(pdev); + else + mlx4_info(dev, "%s: interface is down\n", __func__); + mlx4_catas_end(dev); + if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) { + mlx4_warn(dev, "Disabling SR-IOV\n"); + pci_disable_sriov(pdev); + } - mlx4_unload_one(pdev); pci_release_regions(pdev); pci_disable_device(pdev); + kfree(dev->persist); kfree(priv); pci_set_drvdata(pdev, NULL); } +static int restore_current_port_types(struct mlx4_dev *dev, + enum mlx4_port_type *types, + enum mlx4_port_type *poss_types) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int err, i; + + mlx4_stop_sense(dev); + + mutex_lock(&priv->port_mutex); + for (i = 0; i < dev->caps.num_ports; i++) + dev->caps.possible_type[i + 1] = poss_types[i]; + err = mlx4_change_port_types(dev, types); + mlx4_start_sense(dev); + mutex_unlock(&priv->port_mutex); + + return err; +} + int mlx4_restart_one(struct pci_dev *pdev) { - struct mlx4_dev *dev = pci_get_drvdata(pdev); + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; struct mlx4_priv *priv = mlx4_priv(dev); int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0}; int pci_dev_data, err, total_vfs; pci_dev_data = priv->pci_dev_data; - total_vfs = dev->num_vfs; - memcpy(nvfs, dev->nvfs, sizeof(dev->nvfs)); + total_vfs = dev->persist->num_vfs; + memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs)); mlx4_unload_one(pdev); - err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv); + err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 1); if (err) { mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n", __func__, pci_name(pdev), err); return err; } + err = restore_current_port_types(dev, dev->persist->curr_port_type, + dev->persist->curr_port_poss_type); + if (err) + mlx4_err(dev, "could not restore original port types (%d)\n", + err); + return err; } @@ -3258,23 +3437,79 @@ MODULE_DEVICE_TABLE(pci, mlx4_pci_table); static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state) { - mlx4_unload_one(pdev); + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + + mlx4_err(persist->dev, "mlx4_pci_err_detected was called\n"); + mlx4_enter_error_state(persist); - return state == pci_channel_io_perm_failure ? - PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; + mutex_lock(&persist->interface_state_mutex); + if (persist->interface_state & MLX4_INTERFACE_STATE_UP) + mlx4_unload_one(pdev); + + mutex_unlock(&persist->interface_state_mutex); + if (state == pci_channel_io_perm_failure) + return PCI_ERS_RESULT_DISCONNECT; + + pci_disable_device(pdev); + return PCI_ERS_RESULT_NEED_RESET; } static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) { - struct mlx4_dev *dev = pci_get_drvdata(pdev); + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; struct mlx4_priv *priv = mlx4_priv(dev); int ret; + int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0}; + int total_vfs; + + mlx4_err(dev, "mlx4_pci_slot_reset was called\n"); + ret = pci_enable_device(pdev); + if (ret) { + mlx4_err(dev, "Can not re-enable device, ret=%d\n", ret); + return PCI_ERS_RESULT_DISCONNECT; + } + + pci_set_master(pdev); + pci_restore_state(pdev); + pci_save_state(pdev); + + total_vfs = dev->persist->num_vfs; + memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs)); + + mutex_lock(&persist->interface_state_mutex); + if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) { + ret = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, nvfs, + priv, 1); + if (ret) { + mlx4_err(dev, "%s: mlx4_load_one failed, ret=%d\n", + __func__, ret); + goto end; + } - ret = __mlx4_init_one(pdev, priv->pci_dev_data, priv); + ret = restore_current_port_types(dev, dev->persist-> + curr_port_type, dev->persist-> + curr_port_poss_type); + if (ret) + mlx4_err(dev, "could not restore original port types (%d)\n", ret); + } +end: + mutex_unlock(&persist->interface_state_mutex); return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; } +static void mlx4_shutdown(struct pci_dev *pdev) +{ + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + + mlx4_info(persist->dev, "mlx4_shutdown was called\n"); + mutex_lock(&persist->interface_state_mutex); + if (persist->interface_state & MLX4_INTERFACE_STATE_UP) + mlx4_unload_one(pdev); + mutex_unlock(&persist->interface_state_mutex); +} + static const struct pci_error_handlers mlx4_err_handler = { .error_detected = mlx4_pci_err_detected, .slot_reset = mlx4_pci_slot_reset, @@ -3284,7 +3519,7 @@ static struct pci_driver mlx4_driver = { .name = DRV_NAME, .id_table = mlx4_pci_table, .probe = mlx4_init_one, - .shutdown = mlx4_unload_one, + .shutdown = mlx4_shutdown, .remove = mlx4_remove_one, .err_handler = &mlx4_err_handler, }; @@ -3336,7 +3571,6 @@ static int __init mlx4_init(void) if (mlx4_verify_params()) return -EINVAL; - mlx4_catas_init(); mlx4_wq = create_singlethread_workqueue("mlx4"); if (!mlx4_wq) diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index a3867e7ef885..bd9ea0d01aae 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -1318,6 +1318,9 @@ int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], mutex_unlock(&priv->mcg_table.mutex); mlx4_free_cmd_mailbox(dev, mailbox); + if (err && dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + /* In case device is under an error, return success as a closing command */ + err = 0; return err; } @@ -1347,6 +1350,9 @@ static int mlx4_QP_ATTACH(struct mlx4_dev *dev, struct mlx4_qp *qp, MLX4_CMD_WRAPPED); mlx4_free_cmd_mailbox(dev, mailbox); + if (err && !attach && + dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + err = 0; return err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index bdd4eea2247c..096a81c16a9b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -85,7 +85,9 @@ enum { MLX4_CLR_INT_SIZE = 0x00008, MLX4_SLAVE_COMM_BASE = 0x0, MLX4_COMM_PAGESIZE = 0x1000, - MLX4_CLOCK_SIZE = 0x00008 + MLX4_CLOCK_SIZE = 0x00008, + MLX4_COMM_CHAN_CAPS = 0x8, + MLX4_COMM_CHAN_FLAGS = 0xc }; enum { @@ -120,6 +122,10 @@ enum mlx4_mpt_state { }; #define MLX4_COMM_TIME 10000 +#define MLX4_COMM_OFFLINE_TIME_OUT 30000 +#define MLX4_COMM_CMD_NA_OP 0x0 + + enum { MLX4_COMM_CMD_RESET, MLX4_COMM_CMD_VHCR0, @@ -221,19 +227,21 @@ extern int mlx4_debug_level; #define mlx4_dbg(mdev, format, ...) \ do { \ if (mlx4_debug_level) \ - dev_printk(KERN_DEBUG, &(mdev)->pdev->dev, format, \ + dev_printk(KERN_DEBUG, \ + &(mdev)->persist->pdev->dev, format, \ ##__VA_ARGS__); \ } while (0) #define mlx4_err(mdev, format, ...) \ - dev_err(&(mdev)->pdev->dev, format, ##__VA_ARGS__) + dev_err(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__) #define mlx4_info(mdev, format, ...) \ - dev_info(&(mdev)->pdev->dev, format, ##__VA_ARGS__) + dev_info(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__) #define mlx4_warn(mdev, format, ...) \ - dev_warn(&(mdev)->pdev->dev, format, ##__VA_ARGS__) + dev_warn(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__) extern int mlx4_log_num_mgm_entry_size; extern int log_mtts_per_seg; +extern int mlx4_internal_err_reset; #define MLX4_MAX_NUM_SLAVES (MLX4_MAX_NUM_PF + MLX4_MAX_NUM_VF) #define ALL_SLAVES 0xff @@ -606,7 +614,6 @@ struct mlx4_mgm { struct mlx4_cmd { struct pci_pool *pool; void __iomem *hcr; - struct mutex hcr_mutex; struct mutex slave_cmd_mutex; struct semaphore poll_sem; struct semaphore event_sem; @@ -994,7 +1001,8 @@ void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn); void mlx4_start_catas_poll(struct mlx4_dev *dev); void mlx4_stop_catas_poll(struct mlx4_dev *dev); -void mlx4_catas_init(void); +int mlx4_catas_init(struct mlx4_dev *dev); +void mlx4_catas_end(struct mlx4_dev *dev); int mlx4_restart_one(struct pci_dev *pdev); int mlx4_register_device(struct mlx4_dev *dev); void mlx4_unregister_device(struct mlx4_dev *dev); @@ -1160,13 +1168,14 @@ enum { int mlx4_cmd_init(struct mlx4_dev *dev); void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask); int mlx4_multi_func_init(struct mlx4_dev *dev); +int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev); void mlx4_multi_func_cleanup(struct mlx4_dev *dev); void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param); int mlx4_cmd_use_events(struct mlx4_dev *dev); void mlx4_cmd_use_polling(struct mlx4_dev *dev); int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param, - unsigned long timeout); + u16 op, unsigned long timeout); void mlx4_cq_tasklet_cb(unsigned long data); void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn); @@ -1176,7 +1185,7 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type); void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type); -void mlx4_handle_catas_err(struct mlx4_dev *dev); +void mlx4_enter_error_state(struct mlx4_dev_persistent *persist); int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port, enum mlx4_port_type *type); diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 7094a9c70fd5..8dbdf1d29357 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -708,13 +708,13 @@ static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt, if (!mtts) return -ENOMEM; - dma_sync_single_for_cpu(&dev->pdev->dev, dma_handle, + dma_sync_single_for_cpu(&dev->persist->pdev->dev, dma_handle, npages * sizeof (u64), DMA_TO_DEVICE); for (i = 0; i < npages; ++i) mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT); - dma_sync_single_for_device(&dev->pdev->dev, dma_handle, + dma_sync_single_for_device(&dev->persist->pdev->dev, dma_handle, npages * sizeof (u64), DMA_TO_DEVICE); return 0; @@ -1020,13 +1020,13 @@ int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list /* Make sure MPT status is visible before writing MTT entries */ wmb(); - dma_sync_single_for_cpu(&dev->pdev->dev, fmr->dma_handle, + dma_sync_single_for_cpu(&dev->persist->pdev->dev, fmr->dma_handle, npages * sizeof(u64), DMA_TO_DEVICE); for (i = 0; i < npages; ++i) fmr->mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT); - dma_sync_single_for_device(&dev->pdev->dev, fmr->dma_handle, + dma_sync_single_for_device(&dev->persist->pdev->dev, fmr->dma_handle, npages * sizeof(u64), DMA_TO_DEVICE); fmr->mpt->key = cpu_to_be32(key); diff --git a/drivers/net/ethernet/mellanox/mlx4/pd.c b/drivers/net/ethernet/mellanox/mlx4/pd.c index 74216071201f..a42b4c0a9ed9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/pd.c +++ b/drivers/net/ethernet/mellanox/mlx4/pd.c @@ -151,11 +151,13 @@ int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar) return -ENOMEM; if (mlx4_is_slave(dev)) - offset = uar->index % ((int) pci_resource_len(dev->pdev, 2) / + offset = uar->index % ((int)pci_resource_len(dev->persist->pdev, + 2) / dev->caps.uar_page_size); else offset = uar->index; - uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + offset; + uar->pfn = (pci_resource_start(dev->persist->pdev, 2) >> PAGE_SHIFT) + + offset; uar->map = NULL; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 30eb1ead0fe6..9f268f05290a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -553,9 +553,9 @@ int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port) slaves_pport_actv = mlx4_phys_to_slaves_pport_actv( dev, &exclusive_ports); slave_gid -= bitmap_weight(slaves_pport_actv.slaves, - dev->num_vfs + 1); + dev->persist->num_vfs + 1); } - vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1; + vfs = bitmap_weight(slaves_pport.slaves, dev->persist->num_vfs + 1) - 1; if (slave_gid <= ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) % vfs)) return ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / vfs) + 1; return (MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / vfs; @@ -590,10 +590,10 @@ int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port) slaves_pport_actv = mlx4_phys_to_slaves_pport_actv( dev, &exclusive_ports); slave_gid -= bitmap_weight(slaves_pport_actv.slaves, - dev->num_vfs + 1); + dev->persist->num_vfs + 1); } gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS; - vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1; + vfs = bitmap_weight(slaves_pport.slaves, dev->persist->num_vfs + 1) - 1; if (slave_gid <= gids % vfs) return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave_gid - 1); @@ -644,7 +644,7 @@ void mlx4_reset_roce_gids(struct mlx4_dev *dev, int slave) int num_eth_ports, err; int i; - if (slave < 0 || slave > dev->num_vfs) + if (slave < 0 || slave > dev->persist->num_vfs) return; actv_ports = mlx4_get_active_ports(dev, slave); @@ -1214,7 +1214,8 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, return -EINVAL; slaves_pport = mlx4_phys_to_slaves_pport(dev, port); - num_vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1; + num_vfs = bitmap_weight(slaves_pport.slaves, + dev->persist->num_vfs + 1) - 1; for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) { if (!memcmp(priv->port[port].gid_table.roce_gids[i].raw, gid, @@ -1258,7 +1259,7 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, dev, &exclusive_ports); num_vfs_before += bitmap_weight( slaves_pport_actv.slaves, - dev->num_vfs + 1); + dev->persist->num_vfs + 1); } /* candidate_slave_gid isn't necessarily the correct slave, but @@ -1288,7 +1289,7 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, dev, &exclusive_ports); slave_gid += bitmap_weight( slaves_pport_actv.slaves, - dev->num_vfs + 1); + dev->persist->num_vfs + 1); } } *slave_id = slave_gid; diff --git a/drivers/net/ethernet/mellanox/mlx4/reset.c b/drivers/net/ethernet/mellanox/mlx4/reset.c index ea1c6d092145..0076d88587ca 100644 --- a/drivers/net/ethernet/mellanox/mlx4/reset.c +++ b/drivers/net/ethernet/mellanox/mlx4/reset.c @@ -76,19 +76,21 @@ int mlx4_reset(struct mlx4_dev *dev) goto out; } - pcie_cap = pci_pcie_cap(dev->pdev); + pcie_cap = pci_pcie_cap(dev->persist->pdev); for (i = 0; i < 64; ++i) { if (i == 22 || i == 23) continue; - if (pci_read_config_dword(dev->pdev, i * 4, hca_header + i)) { + if (pci_read_config_dword(dev->persist->pdev, i * 4, + hca_header + i)) { err = -ENODEV; mlx4_err(dev, "Couldn't save HCA PCI header, aborting\n"); goto out; } } - reset = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_RESET_BASE, + reset = ioremap(pci_resource_start(dev->persist->pdev, 0) + + MLX4_RESET_BASE, MLX4_RESET_SIZE); if (!reset) { err = -ENOMEM; @@ -122,8 +124,8 @@ int mlx4_reset(struct mlx4_dev *dev) end = jiffies + MLX4_RESET_TIMEOUT_JIFFIES; do { - if (!pci_read_config_word(dev->pdev, PCI_VENDOR_ID, &vendor) && - vendor != 0xffff) + if (!pci_read_config_word(dev->persist->pdev, PCI_VENDOR_ID, + &vendor) && vendor != 0xffff) break; msleep(1); @@ -138,14 +140,16 @@ int mlx4_reset(struct mlx4_dev *dev) /* Now restore the PCI headers */ if (pcie_cap) { devctl = hca_header[(pcie_cap + PCI_EXP_DEVCTL) / 4]; - if (pcie_capability_write_word(dev->pdev, PCI_EXP_DEVCTL, + if (pcie_capability_write_word(dev->persist->pdev, + PCI_EXP_DEVCTL, devctl)) { err = -ENODEV; mlx4_err(dev, "Couldn't restore HCA PCI Express Device Control register, aborting\n"); goto out; } linkctl = hca_header[(pcie_cap + PCI_EXP_LNKCTL) / 4]; - if (pcie_capability_write_word(dev->pdev, PCI_EXP_LNKCTL, + if (pcie_capability_write_word(dev->persist->pdev, + PCI_EXP_LNKCTL, linkctl)) { err = -ENODEV; mlx4_err(dev, "Couldn't restore HCA PCI Express Link control register, aborting\n"); @@ -157,7 +161,8 @@ int mlx4_reset(struct mlx4_dev *dev) if (i * 4 == PCI_COMMAND) continue; - if (pci_write_config_dword(dev->pdev, i * 4, hca_header[i])) { + if (pci_write_config_dword(dev->persist->pdev, i * 4, + hca_header[i])) { err = -ENODEV; mlx4_err(dev, "Couldn't restore HCA reg %x, aborting\n", i); @@ -165,7 +170,7 @@ int mlx4_reset(struct mlx4_dev *dev) } } - if (pci_write_config_dword(dev->pdev, PCI_COMMAND, + if (pci_write_config_dword(dev->persist->pdev, PCI_COMMAND, hca_header[PCI_COMMAND / 4])) { err = -ENODEV; mlx4_err(dev, "Couldn't restore HCA COMMAND, aborting\n"); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 4efbd1eca611..3e93879bccce 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -309,12 +309,13 @@ static inline int mlx4_grant_resource(struct mlx4_dev *dev, int slave, int allocated, free, reserved, guaranteed, from_free; int from_rsvd; - if (slave > dev->num_vfs) + if (slave > dev->persist->num_vfs) return -EINVAL; spin_lock(&res_alloc->alloc_lock); allocated = (port > 0) ? - res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] : + res_alloc->allocated[(port - 1) * + (dev->persist->num_vfs + 1) + slave] : res_alloc->allocated[slave]; free = (port > 0) ? res_alloc->res_port_free[port - 1] : res_alloc->res_free; @@ -352,7 +353,8 @@ static inline int mlx4_grant_resource(struct mlx4_dev *dev, int slave, if (!err) { /* grant the request */ if (port > 0) { - res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] += count; + res_alloc->allocated[(port - 1) * + (dev->persist->num_vfs + 1) + slave] += count; res_alloc->res_port_free[port - 1] -= count; res_alloc->res_port_rsvd[port - 1] -= from_rsvd; } else { @@ -376,13 +378,14 @@ static inline void mlx4_release_resource(struct mlx4_dev *dev, int slave, &priv->mfunc.master.res_tracker.res_alloc[res_type]; int allocated, guaranteed, from_rsvd; - if (slave > dev->num_vfs) + if (slave > dev->persist->num_vfs) return; spin_lock(&res_alloc->alloc_lock); allocated = (port > 0) ? - res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] : + res_alloc->allocated[(port - 1) * + (dev->persist->num_vfs + 1) + slave] : res_alloc->allocated[slave]; guaranteed = res_alloc->guaranteed[slave]; @@ -397,7 +400,8 @@ static inline void mlx4_release_resource(struct mlx4_dev *dev, int slave, } if (port > 0) { - res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] -= count; + res_alloc->allocated[(port - 1) * + (dev->persist->num_vfs + 1) + slave] -= count; res_alloc->res_port_free[port - 1] += count; res_alloc->res_port_rsvd[port - 1] += from_rsvd; } else { @@ -415,7 +419,8 @@ static inline void initialize_res_quotas(struct mlx4_dev *dev, enum mlx4_resource res_type, int vf, int num_instances) { - res_alloc->guaranteed[vf] = num_instances / (2 * (dev->num_vfs + 1)); + res_alloc->guaranteed[vf] = num_instances / + (2 * (dev->persist->num_vfs + 1)); res_alloc->quota[vf] = (num_instances / 2) + res_alloc->guaranteed[vf]; if (vf == mlx4_master_func_num(dev)) { res_alloc->res_free = num_instances; @@ -486,21 +491,26 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) { struct resource_allocator *res_alloc = &priv->mfunc.master.res_tracker.res_alloc[i]; - res_alloc->quota = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); - res_alloc->guaranteed = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); + res_alloc->quota = kmalloc((dev->persist->num_vfs + 1) * + sizeof(int), GFP_KERNEL); + res_alloc->guaranteed = kmalloc((dev->persist->num_vfs + 1) * + sizeof(int), GFP_KERNEL); if (i == RES_MAC || i == RES_VLAN) res_alloc->allocated = kzalloc(MLX4_MAX_PORTS * - (dev->num_vfs + 1) * sizeof(int), - GFP_KERNEL); + (dev->persist->num_vfs + + 1) * + sizeof(int), GFP_KERNEL); else - res_alloc->allocated = kzalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); + res_alloc->allocated = kzalloc((dev->persist-> + num_vfs + 1) * + sizeof(int), GFP_KERNEL); if (!res_alloc->quota || !res_alloc->guaranteed || !res_alloc->allocated) goto no_mem_err; spin_lock_init(&res_alloc->alloc_lock); - for (t = 0; t < dev->num_vfs + 1; t++) { + for (t = 0; t < dev->persist->num_vfs + 1; t++) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, t); switch (i) { diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 64d25941b329..c989442ffc6a 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -279,6 +279,8 @@ int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_in int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_state); int mlx4_config_dev_retrieval(struct mlx4_dev *dev, struct mlx4_config_dev_params *params); +void mlx4_cmd_wake_completions(struct mlx4_dev *dev); +void mlx4_report_internal_err_comm_event(struct mlx4_dev *dev); /* * mlx4_get_slave_default_vlan - * return true if VST ( default vlan) @@ -288,5 +290,6 @@ bool mlx4_get_slave_default_vlan(struct mlx4_dev *dev, int port, int slave, u16 *vlan, u8 *qos); #define MLX4_COMM_GET_IF_REV(cmd_chan_ver) (u8)((cmd_chan_ver) >> 8) +#define COMM_CHAN_EVENT_INTERNAL_ERR (1 << 17) #endif /* MLX4_CMD_H */ diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index f1e41b33462f..5ef54e145e4d 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -208,6 +208,10 @@ enum { MLX4_QUERY_FUNC_FLAGS_A0_RES_QP = 1LL << 1 }; +enum { + MLX4_VF_CAP_FLAG_RESET = 1 << 0 +}; + /* bit enums for an 8-bit flags field indicating special use * QPs which require special handling in qp_reserve_range. * Currently, this only includes QPs used by the ETH interface, @@ -411,6 +415,16 @@ enum { MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK = 1 << 4, }; +enum { + MLX4_DEVICE_STATE_UP = 1 << 0, + MLX4_DEVICE_STATE_INTERNAL_ERROR = 1 << 1, +}; + +enum { + MLX4_INTERFACE_STATE_UP = 1 << 0, + MLX4_INTERFACE_STATE_DELETION = 1 << 1, +}; + #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK) @@ -535,6 +549,7 @@ struct mlx4_caps { u8 alloc_res_qp_mask; u32 dmfs_high_rate_qpn_base; u32 dmfs_high_rate_qpn_range; + u32 vf_caps; }; struct mlx4_buf_list { @@ -744,8 +759,23 @@ struct mlx4_vf_dev { u8 n_ports; }; -struct mlx4_dev { +struct mlx4_dev_persistent { struct pci_dev *pdev; + struct mlx4_dev *dev; + int nvfs[MLX4_MAX_PORTS + 1]; + int num_vfs; + enum mlx4_port_type curr_port_type[MLX4_MAX_PORTS + 1]; + enum mlx4_port_type curr_port_poss_type[MLX4_MAX_PORTS + 1]; + struct work_struct catas_work; + struct workqueue_struct *catas_wq; + struct mutex device_state_mutex; /* protect HW state */ + u8 state; + struct mutex interface_state_mutex; /* protect SW state */ + u8 interface_state; +}; + +struct mlx4_dev { + struct mlx4_dev_persistent *persist; unsigned long flags; unsigned long num_slaves; struct mlx4_caps caps; @@ -754,13 +784,11 @@ struct mlx4_dev { struct radix_tree_root qp_table_tree; u8 rev_id; char board_id[MLX4_BOARD_ID_LEN]; - int num_vfs; int numa_node; int oper_log_mgm_entry_size; u64 regid_promisc_array[MLX4_MAX_PORTS + 1]; u64 regid_allmulti_array[MLX4_MAX_PORTS + 1]; struct mlx4_vf_dev *dev_vfs; - int nvfs[MLX4_MAX_PORTS + 1]; }; struct mlx4_eqe {