From 872bf2fb69d90e3619befee842fc26db39d8e475 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 25 Jan 2015 16:59:35 +0200 Subject: [PATCH 1/9] net/mlx4_core: Maintain a persistent memory for mlx4 device Maintain a persistent memory that should survive reset flow/PCI error. This comes as a preparation for coming series to support above flows. Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/infiniband/hw/mlx4/alias_GUID.c | 2 +- drivers/infiniband/hw/mlx4/mad.c | 3 +- drivers/infiniband/hw/mlx4/main.c | 17 +-- drivers/infiniband/hw/mlx4/mr.c | 6 +- drivers/infiniband/hw/mlx4/sysfs.c | 6 +- drivers/net/ethernet/mellanox/mlx4/alloc.c | 15 ++- drivers/net/ethernet/mellanox/mlx4/catas.c | 13 ++- drivers/net/ethernet/mellanox/mlx4/cmd.c | 46 ++++---- drivers/net/ethernet/mellanox/mlx4/en_cq.c | 4 +- .../net/ethernet/mellanox/mlx4/en_ethtool.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_main.c | 4 +- .../net/ethernet/mellanox/mlx4/en_netdev.c | 2 +- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 4 +- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 4 +- drivers/net/ethernet/mellanox/mlx4/eq.c | 42 ++++--- drivers/net/ethernet/mellanox/mlx4/icm.c | 11 +- drivers/net/ethernet/mellanox/mlx4/main.c | 106 +++++++++++------- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 9 +- drivers/net/ethernet/mellanox/mlx4/mr.c | 8 +- drivers/net/ethernet/mellanox/mlx4/pd.c | 6 +- drivers/net/ethernet/mellanox/mlx4/port.c | 17 +-- drivers/net/ethernet/mellanox/mlx4/reset.c | 23 ++-- .../ethernet/mellanox/mlx4/resource_tracker.c | 36 +++--- include/linux/mlx4/device.h | 11 +- 24 files changed, 234 insertions(+), 163 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 0eb141c41416..a31e031afd87 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -154,7 +154,7 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev, continue; slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ; - if (slave_id >= dev->dev->num_vfs + 1) + if (slave_id >= dev->dev->persist->num_vfs + 1) return; tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE]; form_cache_ag = get_cached_alias_guid(dev, port_num, diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 82a7dd87089b..c7619716c31d 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1951,7 +1951,8 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, ctx->ib_dev = &dev->ib_dev; for (i = 0; - i < min(dev->dev->caps.sqp_demux, (u16)(dev->dev->num_vfs + 1)); + i < min(dev->dev->caps.sqp_demux, + (u16)(dev->dev->persist->num_vfs + 1)); i++) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev->dev, i); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 57ecc5b204f3..b4fa6f658800 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -198,7 +198,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) & 0xffffff; - props->vendor_part_id = dev->dev->pdev->device; + props->vendor_part_id = dev->dev->persist->pdev->device; props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32)); memcpy(&props->sys_image_guid, out_mad->data + 4, 8); @@ -1375,7 +1375,7 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr, { struct mlx4_ib_dev *dev = container_of(device, struct mlx4_ib_dev, ib_dev.dev); - return sprintf(buf, "MT%d\n", dev->dev->pdev->device); + return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device); } static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr, @@ -1937,7 +1937,8 @@ static void init_pkeys(struct mlx4_ib_dev *ibdev) int i; if (mlx4_is_master(ibdev->dev)) { - for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) { + for (slave = 0; slave <= ibdev->dev->persist->num_vfs; + ++slave) { for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) { for (i = 0; i < ibdev->dev->phys_caps.pkey_phys_table_len[port]; @@ -1994,7 +1995,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) { for (j = 0; j < eq_per_port; j++) { snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%s", - i, j, dev->pdev->bus->name); + i, j, dev->persist->pdev->bus->name); /* Set IRQ for specific name (per ring) */ if (mlx4_assign_eq(dev, name, NULL, &ibdev->eq_table[eq])) { @@ -2058,7 +2059,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev); if (!ibdev) { - dev_err(&dev->pdev->dev, "Device struct alloc failed\n"); + dev_err(&dev->persist->pdev->dev, + "Device struct alloc failed\n"); return NULL; } @@ -2085,7 +2087,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->num_ports = num_ports; ibdev->ib_dev.phys_port_cnt = ibdev->num_ports; ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors; - ibdev->ib_dev.dma_device = &dev->pdev->dev; + ibdev->ib_dev.dma_device = &dev->persist->pdev->dev; if (dev->caps.userspace_caps) ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION; @@ -2236,7 +2238,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) sizeof(long), GFP_KERNEL); if (!ibdev->ib_uc_qpns_bitmap) { - dev_err(&dev->pdev->dev, "bit map alloc failed\n"); + dev_err(&dev->persist->pdev->dev, + "bit map alloc failed\n"); goto err_steer_qp_release; } diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index c36ccbd9a644..e0d271782d0a 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -401,7 +401,8 @@ struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device if (!mfrpl->ibfrpl.page_list) goto err_free; - mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->pdev->dev, + mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->persist-> + pdev->dev, size, &mfrpl->map, GFP_KERNEL); if (!mfrpl->mapped_page_list) @@ -423,7 +424,8 @@ void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); int size = page_list->max_page_list_len * sizeof (u64); - dma_free_coherent(&dev->dev->pdev->dev, size, mfrpl->mapped_page_list, + dma_free_coherent(&dev->dev->persist->pdev->dev, size, + mfrpl->mapped_page_list, mfrpl->map); kfree(mfrpl->ibfrpl.page_list); kfree(mfrpl); diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c index cb4c66e723b5..d10c2b8a5dad 100644 --- a/drivers/infiniband/hw/mlx4/sysfs.c +++ b/drivers/infiniband/hw/mlx4/sysfs.c @@ -375,7 +375,7 @@ static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max) char base_name[9]; /* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */ - strlcpy(name, pci_name(dev->dev->pdev), max); + strlcpy(name, pci_name(dev->dev->persist->pdev), max); strncpy(base_name, name, 8); /*till xxxx:yy:*/ base_name[8] = '\0'; /* with no ARI only 3 last bits are used so when the fn is higher than 8 @@ -792,7 +792,7 @@ static int register_pkey_tree(struct mlx4_ib_dev *device) if (!mlx4_is_master(device->dev)) return 0; - for (i = 0; i <= device->dev->num_vfs; ++i) + for (i = 0; i <= device->dev->persist->num_vfs; ++i) register_one_pkey_tree(device, i); return 0; @@ -807,7 +807,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device) if (!mlx4_is_master(device->dev)) return; - for (slave = device->dev->num_vfs; slave >= 0; --slave) { + for (slave = device->dev->persist->num_vfs; slave >= 0; --slave) { list_for_each_entry_safe(p, t, &device->pkeys.pkey_port_list[slave], entry) { diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index 963dd7e6d547..a716c26e0d99 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -592,7 +592,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, buf->nbufs = 1; buf->npages = 1; buf->page_shift = get_order(size) + PAGE_SHIFT; - buf->direct.buf = dma_alloc_coherent(&dev->pdev->dev, + buf->direct.buf = dma_alloc_coherent(&dev->persist->pdev->dev, size, &t, gfp); if (!buf->direct.buf) return -ENOMEM; @@ -619,7 +619,8 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct, for (i = 0; i < buf->nbufs; ++i) { buf->page_list[i].buf = - dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE, + dma_alloc_coherent(&dev->persist->pdev->dev, + PAGE_SIZE, &t, gfp); if (!buf->page_list[i].buf) goto err_free; @@ -657,7 +658,8 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf) int i; if (buf->nbufs == 1) - dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf, + dma_free_coherent(&dev->persist->pdev->dev, size, + buf->direct.buf, buf->direct.map); else { if (BITS_PER_LONG == 64 && buf->direct.buf) @@ -665,7 +667,8 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf) for (i = 0; i < buf->nbufs; ++i) if (buf->page_list[i].buf) - dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + dma_free_coherent(&dev->persist->pdev->dev, + PAGE_SIZE, buf->page_list[i].buf, buf->page_list[i].map); kfree(buf->page_list); @@ -738,7 +741,7 @@ int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, gfp_t gfp if (!mlx4_alloc_db_from_pgdir(pgdir, db, order)) goto out; - pgdir = mlx4_alloc_db_pgdir(&(dev->pdev->dev), gfp); + pgdir = mlx4_alloc_db_pgdir(&dev->persist->pdev->dev, gfp); if (!pgdir) { ret = -ENOMEM; goto out; @@ -775,7 +778,7 @@ void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db) set_bit(i, db->u.pgdir->bits[o]); if (bitmap_full(db->u.pgdir->order1, MLX4_DB_PER_PAGE / 2)) { - dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, + dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, db->u.pgdir->db_page, db->u.pgdir->db_dma); list_del(&db->u.pgdir->list); kfree(db->u.pgdir); diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 9c656fe4983d..1a102c9bac99 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -70,7 +70,7 @@ static void poll_catas(unsigned long dev_ptr) if (readl(priv->catas_err.map)) { /* If the device is off-line, we cannot try to recover it */ - if (pci_channel_offline(dev->pdev)) + if (pci_channel_offline(dev->persist->pdev)) mod_timer(&priv->catas_err.timer, round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL)); else { @@ -94,6 +94,7 @@ static void catas_reset(struct work_struct *work) { struct mlx4_priv *priv, *tmppriv; struct mlx4_dev *dev; + struct mlx4_dev_persistent *persist; LIST_HEAD(tlist); int ret; @@ -103,20 +104,20 @@ static void catas_reset(struct work_struct *work) spin_unlock_irq(&catas_lock); list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) { - struct pci_dev *pdev = priv->dev.pdev; + struct pci_dev *pdev = priv->dev.persist->pdev; /* If the device is off-line, we cannot reset it */ if (pci_channel_offline(pdev)) continue; - ret = mlx4_restart_one(priv->dev.pdev); + ret = mlx4_restart_one(priv->dev.persist->pdev); /* 'priv' now is not valid */ if (ret) pr_err("mlx4 %s: Reset failed (%d)\n", pci_name(pdev), ret); else { - dev = pci_get_drvdata(pdev); - mlx4_dbg(dev, "Reset succeeded\n"); + persist = pci_get_drvdata(pdev); + mlx4_dbg(persist->dev, "Reset succeeded\n"); } } } @@ -134,7 +135,7 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev) init_timer(&priv->catas_err.timer); priv->catas_err.map = NULL; - addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) + + addr = pci_resource_start(dev->persist->pdev, priv->fw.catas_bar) + priv->fw.catas_offset; priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4); diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 5c93d1451c44..7cd90e6a4272 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -307,7 +307,7 @@ static int cmd_pending(struct mlx4_dev *dev) { u32 status; - if (pci_channel_offline(dev->pdev)) + if (pci_channel_offline(dev->persist->pdev)) return -EIO; status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET); @@ -328,7 +328,7 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param, mutex_lock(&cmd->hcr_mutex); - if (pci_channel_offline(dev->pdev)) { + if (pci_channel_offline(dev->persist->pdev)) { /* * Device is going through error recovery * and cannot accept commands. @@ -342,7 +342,7 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param, end += msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS); while (cmd_pending(dev)) { - if (pci_channel_offline(dev->pdev)) { + if (pci_channel_offline(dev->persist->pdev)) { /* * Device is going through error recovery * and cannot accept commands. @@ -464,7 +464,7 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, down(&priv->cmd.poll_sem); - if (pci_channel_offline(dev->pdev)) { + if (pci_channel_offline(dev->persist->pdev)) { /* * Device is going through error recovery * and cannot accept commands. @@ -487,7 +487,7 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, end = msecs_to_jiffies(timeout) + jiffies; while (cmd_pending(dev) && time_before(jiffies, end)) { - if (pci_channel_offline(dev->pdev)) { + if (pci_channel_offline(dev->persist->pdev)) { /* * Device is going through error recovery * and cannot accept commands. @@ -612,7 +612,7 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, int out_is_imm, u32 in_modifier, u8 op_modifier, u16 op, unsigned long timeout, int native) { - if (pci_channel_offline(dev->pdev)) + if (pci_channel_offline(dev->persist->pdev)) return -EIO; if (!mlx4_is_mfunc(dev) || (native && mlx4_is_master(dev))) { @@ -1997,11 +1997,12 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) if (mlx4_is_master(dev)) priv->mfunc.comm = - ioremap(pci_resource_start(dev->pdev, priv->fw.comm_bar) + + ioremap(pci_resource_start(dev->persist->pdev, + priv->fw.comm_bar) + priv->fw.comm_base, MLX4_COMM_PAGESIZE); else priv->mfunc.comm = - ioremap(pci_resource_start(dev->pdev, 2) + + ioremap(pci_resource_start(dev->persist->pdev, 2) + MLX4_SLAVE_COMM_BASE, MLX4_COMM_PAGESIZE); if (!priv->mfunc.comm) { mlx4_err(dev, "Couldn't map communication vector\n"); @@ -2107,9 +2108,9 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) err_comm: iounmap(priv->mfunc.comm); err_vhcr: - dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, - priv->mfunc.vhcr, - priv->mfunc.vhcr_dma); + dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, + priv->mfunc.vhcr, + priv->mfunc.vhcr_dma); priv->mfunc.vhcr = NULL; return -ENOMEM; } @@ -2130,8 +2131,8 @@ int mlx4_cmd_init(struct mlx4_dev *dev) } if (!mlx4_is_slave(dev) && !priv->cmd.hcr) { - priv->cmd.hcr = ioremap(pci_resource_start(dev->pdev, 0) + - MLX4_HCR_BASE, MLX4_HCR_SIZE); + priv->cmd.hcr = ioremap(pci_resource_start(dev->persist->pdev, + 0) + MLX4_HCR_BASE, MLX4_HCR_SIZE); if (!priv->cmd.hcr) { mlx4_err(dev, "Couldn't map command register\n"); goto err; @@ -2140,7 +2141,8 @@ int mlx4_cmd_init(struct mlx4_dev *dev) } if (mlx4_is_mfunc(dev) && !priv->mfunc.vhcr) { - priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE, + priv->mfunc.vhcr = dma_alloc_coherent(&dev->persist->pdev->dev, + PAGE_SIZE, &priv->mfunc.vhcr_dma, GFP_KERNEL); if (!priv->mfunc.vhcr) @@ -2150,7 +2152,8 @@ int mlx4_cmd_init(struct mlx4_dev *dev) } if (!priv->cmd.pool) { - priv->cmd.pool = pci_pool_create("mlx4_cmd", dev->pdev, + priv->cmd.pool = pci_pool_create("mlx4_cmd", + dev->persist->pdev, MLX4_MAILBOX_SIZE, MLX4_MAILBOX_SIZE, 0); if (!priv->cmd.pool) @@ -2202,7 +2205,7 @@ void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask) } if (mlx4_is_mfunc(dev) && priv->mfunc.vhcr && (cleanup_mask & MLX4_CMD_CLEANUP_VHCR)) { - dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE, + dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, priv->mfunc.vhcr, priv->mfunc.vhcr_dma); priv->mfunc.vhcr = NULL; } @@ -2306,8 +2309,9 @@ u32 mlx4_comm_get_version(void) static int mlx4_get_slave_indx(struct mlx4_dev *dev, int vf) { - if ((vf < 0) || (vf >= dev->num_vfs)) { - mlx4_err(dev, "Bad vf number:%d (number of activated vf: %d)\n", vf, dev->num_vfs); + if ((vf < 0) || (vf >= dev->persist->num_vfs)) { + mlx4_err(dev, "Bad vf number:%d (number of activated vf: %d)\n", + vf, dev->persist->num_vfs); return -EINVAL; } @@ -2316,7 +2320,7 @@ static int mlx4_get_slave_indx(struct mlx4_dev *dev, int vf) int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave) { - if (slave < 1 || slave > dev->num_vfs) { + if (slave < 1 || slave > dev->persist->num_vfs) { mlx4_err(dev, "Bad slave number:%d (number of activated slaves: %lu)\n", slave, dev->num_slaves); @@ -2388,7 +2392,7 @@ struct mlx4_slaves_pport mlx4_phys_to_slaves_pport(struct mlx4_dev *dev, if (port <= 0 || port > dev->caps.num_ports) return slaves_pport; - for (i = 0; i < dev->num_vfs + 1; i++) { + for (i = 0; i < dev->persist->num_vfs + 1; i++) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, i); if (test_bit(port - 1, actv_ports.ports)) @@ -2408,7 +2412,7 @@ struct mlx4_slaves_pport mlx4_phys_to_slaves_pport_actv( bitmap_zero(slaves_pport.slaves, MLX4_MFUNC_MAX); - for (i = 0; i < dev->num_vfs + 1; i++) { + for (i = 0; i < dev->persist->num_vfs + 1; i++) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, i); if (bitmap_equal(crit_ports->ports, actv_ports.ports, diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c index 82322b1c8411..22da4d0d0f05 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c @@ -70,10 +70,10 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv, /* Allocate HW buffers on provided NUMA node. * dev->numa_node is used in mtt range allocation flow. */ - set_dev_node(&mdev->dev->pdev->dev, node); + set_dev_node(&mdev->dev->persist->pdev->dev, node); err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres, cq->buf_size, 2 * PAGE_SIZE); - set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node); + set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node); if (err) goto err_cq; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 90e0f045a6bc..569eda9e83d6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -92,7 +92,7 @@ mlx4_en_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) (u16) (mdev->dev->caps.fw_ver >> 32), (u16) ((mdev->dev->caps.fw_ver >> 16) & 0xffff), (u16) (mdev->dev->caps.fw_ver & 0xffff)); - strlcpy(drvinfo->bus_info, pci_name(mdev->dev->pdev), + strlcpy(drvinfo->bus_info, pci_name(mdev->dev->persist->pdev), sizeof(drvinfo->bus_info)); drvinfo->n_stats = 0; drvinfo->regdump_len = 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c index 9f16f754137b..c643d2bbb7b9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c @@ -241,8 +241,8 @@ static void *mlx4_en_add(struct mlx4_dev *dev) spin_lock_init(&mdev->uar_lock); mdev->dev = dev; - mdev->dma_device = &(dev->pdev->dev); - mdev->pdev = dev->pdev; + mdev->dma_device = &dev->persist->pdev->dev; + mdev->pdev = dev->persist->pdev; mdev->device_up = false; mdev->LSO_support = !!(dev->caps.flags & (1 << 15)); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index d0d6dc1b8e46..43a3f9822f74 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2457,7 +2457,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, netif_set_real_num_tx_queues(dev, prof->tx_ring_num); netif_set_real_num_rx_queues(dev, prof->rx_ring_num); - SET_NETDEV_DEV(dev, &mdev->dev->pdev->dev); + SET_NETDEV_DEV(dev, &mdev->dev->persist->pdev->dev); dev->dev_port = port - 1; /* diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index a0474eb94aa3..2ba5d368edce 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -387,10 +387,10 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, ring->rx_info, tmp); /* Allocate HW buffers on provided NUMA node */ - set_dev_node(&mdev->dev->pdev->dev, node); + set_dev_node(&mdev->dev->persist->pdev->dev, node); err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, 2 * PAGE_SIZE); - set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node); + set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node); if (err) goto err_info; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 359bb1286eb5..55f9f5c5344e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -91,10 +91,10 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE); /* Allocate HW buffers on provided NUMA node */ - set_dev_node(&mdev->dev->pdev->dev, node); + set_dev_node(&mdev->dev->persist->pdev->dev, node); err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size, 2 * PAGE_SIZE); - set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node); + set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node); if (err) { en_err(priv, "Failed allocating hwq resources\n"); goto err_bounce; diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 3d275fbaf0eb..7538c9ce98a9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -237,7 +237,7 @@ int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port) struct mlx4_eqe eqe; /*don't send if we don't have the that slave */ - if (dev->num_vfs < slave) + if (dev->persist->num_vfs < slave) return 0; memset(&eqe, 0, sizeof eqe); @@ -255,7 +255,7 @@ int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, struct mlx4_eqe eqe; /*don't send if we don't have the that slave */ - if (dev->num_vfs < slave) + if (dev->persist->num_vfs < slave) return 0; memset(&eqe, 0, sizeof eqe); @@ -310,7 +310,7 @@ static void set_all_slave_state(struct mlx4_dev *dev, u8 port, int event) struct mlx4_slaves_pport slaves_pport = mlx4_phys_to_slaves_pport(dev, port); - for (i = 0; i < dev->num_vfs + 1; i++) + for (i = 0; i < dev->persist->num_vfs + 1; i++) if (test_bit(i, slaves_pport.slaves)) set_and_calc_slave_port_state(dev, i, port, event, &gen_event); @@ -560,7 +560,8 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) mlx4_priv(dev)->sense.do_sense_port[port] = 1; if (!mlx4_is_master(dev)) break; - for (i = 0; i < dev->num_vfs + 1; i++) { + for (i = 0; i < dev->persist->num_vfs + 1; + i++) { if (!test_bit(i, slaves_port.slaves)) continue; if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) { @@ -596,7 +597,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) if (!mlx4_is_master(dev)) break; if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) - for (i = 0; i < dev->num_vfs + 1; i++) { + for (i = 0; + i < dev->persist->num_vfs + 1; + i++) { if (!test_bit(i, slaves_port.slaves)) continue; if (i == mlx4_master_func_num(dev)) @@ -865,7 +868,7 @@ static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq) if (!priv->eq_table.uar_map[index]) { priv->eq_table.uar_map[index] = - ioremap(pci_resource_start(dev->pdev, 2) + + ioremap(pci_resource_start(dev->persist->pdev, 2) + ((eq->eqn / 4) << PAGE_SHIFT), PAGE_SIZE); if (!priv->eq_table.uar_map[index]) { @@ -928,8 +931,10 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent, eq_context = mailbox->buf; for (i = 0; i < npages; ++i) { - eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev, - PAGE_SIZE, &t, GFP_KERNEL); + eq->page_list[i].buf = dma_alloc_coherent(&dev->persist-> + pdev->dev, + PAGE_SIZE, &t, + GFP_KERNEL); if (!eq->page_list[i].buf) goto err_out_free_pages; @@ -995,7 +1000,7 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent, err_out_free_pages: for (i = 0; i < npages; ++i) if (eq->page_list[i].buf) - dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, eq->page_list[i].buf, eq->page_list[i].map); @@ -1044,9 +1049,9 @@ static void mlx4_free_eq(struct mlx4_dev *dev, mlx4_mtt_cleanup(dev, &eq->mtt); for (i = 0; i < npages; ++i) - dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, - eq->page_list[i].buf, - eq->page_list[i].map); + dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE, + eq->page_list[i].buf, + eq->page_list[i].map); kfree(eq->page_list); mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn, MLX4_USE_RR); @@ -1060,7 +1065,7 @@ static void mlx4_free_irqs(struct mlx4_dev *dev) int i, vec; if (eq_table->have_irq) - free_irq(dev->pdev->irq, dev); + free_irq(dev->persist->pdev->irq, dev); for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i) if (eq_table->eq[i].have_irq) { @@ -1089,7 +1094,8 @@ static int mlx4_map_clr_int(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); - priv->clr_base = ioremap(pci_resource_start(dev->pdev, priv->fw.clr_int_bar) + + priv->clr_base = ioremap(pci_resource_start(dev->persist->pdev, + priv->fw.clr_int_bar) + priv->fw.clr_int_base, MLX4_CLR_INT_SIZE); if (!priv->clr_base) { mlx4_err(dev, "Couldn't map interrupt clear register, aborting\n"); @@ -1212,13 +1218,13 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) i * MLX4_IRQNAME_SIZE, MLX4_IRQNAME_SIZE, "mlx4-comp-%d@pci:%s", i, - pci_name(dev->pdev)); + pci_name(dev->persist->pdev)); } else { snprintf(priv->eq_table.irq_names + i * MLX4_IRQNAME_SIZE, MLX4_IRQNAME_SIZE, "mlx4-async@pci:%s", - pci_name(dev->pdev)); + pci_name(dev->persist->pdev)); } eq_name = priv->eq_table.irq_names + @@ -1235,8 +1241,8 @@ int mlx4_init_eq_table(struct mlx4_dev *dev) snprintf(priv->eq_table.irq_names, MLX4_IRQNAME_SIZE, DRV_NAME "@pci:%s", - pci_name(dev->pdev)); - err = request_irq(dev->pdev->irq, mlx4_interrupt, + pci_name(dev->persist->pdev)); + err = request_irq(dev->persist->pdev->irq, mlx4_interrupt, IRQF_SHARED, priv->eq_table.irq_names, dev); if (err) goto err_out_async; diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c index 97c9b1db1d27..2a9dd460a95f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/icm.c +++ b/drivers/net/ethernet/mellanox/mlx4/icm.c @@ -56,7 +56,7 @@ static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chu int i; if (chunk->nsg > 0) - pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages, + pci_unmap_sg(dev->persist->pdev, chunk->mem, chunk->npages, PCI_DMA_BIDIRECTIONAL); for (i = 0; i < chunk->npages; ++i) @@ -69,7 +69,8 @@ static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk * int i; for (i = 0; i < chunk->npages; ++i) - dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length, + dma_free_coherent(&dev->persist->pdev->dev, + chunk->mem[i].length, lowmem_page_address(sg_page(&chunk->mem[i])), sg_dma_address(&chunk->mem[i])); } @@ -173,7 +174,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, --cur_order; if (coherent) - ret = mlx4_alloc_icm_coherent(&dev->pdev->dev, + ret = mlx4_alloc_icm_coherent(&dev->persist->pdev->dev, &chunk->mem[chunk->npages], cur_order, gfp_mask); else @@ -193,7 +194,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, if (coherent) ++chunk->nsg; else if (chunk->npages == MLX4_ICM_CHUNK_LEN) { - chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, + chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->mem, chunk->npages, PCI_DMA_BIDIRECTIONAL); @@ -208,7 +209,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages, } if (!coherent && chunk) { - chunk->nsg = pci_map_sg(dev->pdev, chunk->mem, + chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->mem, chunk->npages, PCI_DMA_BIDIRECTIONAL); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 03e9eb0dc761..abcee61f8a47 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -318,10 +318,11 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) return -ENODEV; } - if (dev_cap->uar_size > pci_resource_len(dev->pdev, 2)) { + if (dev_cap->uar_size > pci_resource_len(dev->persist->pdev, 2)) { mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n", dev_cap->uar_size, - (unsigned long long) pci_resource_len(dev->pdev, 2)); + (unsigned long long) + pci_resource_len(dev->persist->pdev, 2)); return -ENODEV; } @@ -541,8 +542,10 @@ static int mlx4_get_pcie_dev_link_caps(struct mlx4_dev *dev, *speed = PCI_SPEED_UNKNOWN; *width = PCIE_LNK_WIDTH_UNKNOWN; - err1 = pcie_capability_read_dword(dev->pdev, PCI_EXP_LNKCAP, &lnkcap1); - err2 = pcie_capability_read_dword(dev->pdev, PCI_EXP_LNKCAP2, &lnkcap2); + err1 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP, + &lnkcap1); + err2 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP2, + &lnkcap2); if (!err2 && lnkcap2) { /* PCIe r3.0-compliant */ if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB) *speed = PCIE_SPEED_8_0GT; @@ -587,7 +590,7 @@ static void mlx4_check_pcie_caps(struct mlx4_dev *dev) return; } - err = pcie_get_minimum_link(dev->pdev, &speed, &width); + err = pcie_get_minimum_link(dev->persist->pdev, &speed, &width); if (err || speed == PCI_SPEED_UNKNOWN || width == PCIE_LNK_WIDTH_UNKNOWN) { mlx4_warn(dev, @@ -837,10 +840,12 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) if (dev->caps.uar_page_size * (dev->caps.num_uars - dev->caps.reserved_uars) > - pci_resource_len(dev->pdev, 2)) { + pci_resource_len(dev->persist->pdev, + 2)) { mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n", dev->caps.uar_page_size * dev->caps.num_uars, - (unsigned long long) pci_resource_len(dev->pdev, 2)); + (unsigned long long) + pci_resource_len(dev->persist->pdev, 2)); goto err_mem; } @@ -1492,9 +1497,9 @@ static int map_bf_area(struct mlx4_dev *dev) if (!dev->caps.bf_reg_size) return -ENXIO; - bf_start = pci_resource_start(dev->pdev, 2) + + bf_start = pci_resource_start(dev->persist->pdev, 2) + (dev->caps.num_uars << PAGE_SHIFT); - bf_len = pci_resource_len(dev->pdev, 2) - + bf_len = pci_resource_len(dev->persist->pdev, 2) - (dev->caps.num_uars << PAGE_SHIFT); priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len); if (!priv->bf_mapping) @@ -1536,7 +1541,8 @@ static int map_internal_clock(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); priv->clock_mapping = - ioremap(pci_resource_start(dev->pdev, priv->fw.clock_bar) + + ioremap(pci_resource_start(dev->persist->pdev, + priv->fw.clock_bar) + priv->fw.clock_offset, MLX4_CLOCK_SIZE); if (!priv->clock_mapping) @@ -1705,7 +1711,8 @@ static void choose_steering_mode(struct mlx4_dev *dev, if (mlx4_log_num_mgm_entry_size <= 0 && dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN && (!mlx4_is_mfunc(dev) || - (dev_cap->fs_max_num_qp_per_entry >= (dev->num_vfs + 1))) && + (dev_cap->fs_max_num_qp_per_entry >= + (dev->persist->num_vfs + 1))) && choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >= MLX4_MIN_MGM_LOG_ENTRY_SIZE) { dev->oper_log_mgm_entry_size = @@ -2288,7 +2295,8 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev) for (i = 0; i < nreq; ++i) entries[i].entry = i; - nreq = pci_enable_msix_range(dev->pdev, entries, 2, nreq); + nreq = pci_enable_msix_range(dev->persist->pdev, entries, 2, + nreq); if (nreq < 0) { kfree(entries); @@ -2316,7 +2324,7 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev) dev->caps.comp_pool = 0; for (i = 0; i < 2; ++i) - priv->eq_table.eq[i].irq = dev->pdev->irq; + priv->eq_table.eq[i].irq = dev->persist->pdev->irq; } static int mlx4_init_port_info(struct mlx4_dev *dev, int port) @@ -2344,7 +2352,7 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port) info->port_attr.show = show_port_type; sysfs_attr_init(&info->port_attr.attr); - err = device_create_file(&dev->pdev->dev, &info->port_attr); + err = device_create_file(&dev->persist->pdev->dev, &info->port_attr); if (err) { mlx4_err(dev, "Failed to create file for port %d\n", port); info->port = -1; @@ -2361,10 +2369,12 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port) info->port_mtu_attr.show = show_port_ib_mtu; sysfs_attr_init(&info->port_mtu_attr.attr); - err = device_create_file(&dev->pdev->dev, &info->port_mtu_attr); + err = device_create_file(&dev->persist->pdev->dev, + &info->port_mtu_attr); if (err) { mlx4_err(dev, "Failed to create mtu file for port %d\n", port); - device_remove_file(&info->dev->pdev->dev, &info->port_attr); + device_remove_file(&info->dev->persist->pdev->dev, + &info->port_attr); info->port = -1; } @@ -2376,8 +2386,9 @@ static void mlx4_cleanup_port_info(struct mlx4_port_info *info) if (info->port < 0) return; - device_remove_file(&info->dev->pdev->dev, &info->port_attr); - device_remove_file(&info->dev->pdev->dev, &info->port_mtu_attr); + device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr); + device_remove_file(&info->dev->persist->pdev->dev, + &info->port_mtu_attr); } static int mlx4_init_steering(struct mlx4_dev *dev) @@ -2444,10 +2455,11 @@ static int mlx4_get_ownership(struct mlx4_dev *dev) void __iomem *owner; u32 ret; - if (pci_channel_offline(dev->pdev)) + if (pci_channel_offline(dev->persist->pdev)) return -EIO; - owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, + owner = ioremap(pci_resource_start(dev->persist->pdev, 0) + + MLX4_OWNER_BASE, MLX4_OWNER_SIZE); if (!owner) { mlx4_err(dev, "Failed to obtain ownership bit\n"); @@ -2463,10 +2475,11 @@ static void mlx4_free_ownership(struct mlx4_dev *dev) { void __iomem *owner; - if (pci_channel_offline(dev->pdev)) + if (pci_channel_offline(dev->persist->pdev)) return; - owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE, + owner = ioremap(pci_resource_start(dev->persist->pdev, 0) + + MLX4_OWNER_BASE, MLX4_OWNER_SIZE); if (!owner) { mlx4_err(dev, "Failed to obtain ownership bit\n"); @@ -2514,13 +2527,13 @@ static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev, dev_flags |= MLX4_FLAG_SRIOV | MLX4_FLAG_MASTER; dev_flags &= ~MLX4_FLAG_SLAVE; - dev->num_vfs = total_vfs; + dev->persist->num_vfs = total_vfs; } return dev_flags; disable_sriov: atomic_dec(&pf_loading); - dev->num_vfs = 0; + dev->persist->num_vfs = 0; kfree(dev->dev_vfs); return dev_flags & ~MLX4_FLAG_MASTER; } @@ -2607,7 +2620,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, existing_vfs = pci_num_vf(pdev); if (existing_vfs) dev->flags |= MLX4_FLAG_SRIOV; - dev->num_vfs = total_vfs; + dev->persist->num_vfs = total_vfs; } } @@ -2771,12 +2784,14 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, dev->caps.num_ports); goto err_close; } - memcpy(dev->nvfs, nvfs, sizeof(dev->nvfs)); + memcpy(dev->persist->nvfs, nvfs, sizeof(dev->persist->nvfs)); - for (i = 0; i < sizeof(dev->nvfs)/sizeof(dev->nvfs[0]); i++) { + for (i = 0; + i < sizeof(dev->persist->nvfs)/ + sizeof(dev->persist->nvfs[0]); i++) { unsigned j; - for (j = 0; j < dev->nvfs[i]; ++sum, ++j) { + for (j = 0; j < dev->persist->nvfs[i]; ++sum, ++j) { dev->dev_vfs[sum].min_port = i < 2 ? i + 1 : 1; dev->dev_vfs[sum].n_ports = i < 2 ? 1 : dev->caps.num_ports; @@ -2846,7 +2861,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, priv->removed = 0; - if (mlx4_is_master(dev) && dev->num_vfs) + if (mlx4_is_master(dev) && dev->persist->num_vfs) atomic_dec(&pf_loading); kfree(dev_cap); @@ -2908,7 +2923,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) pci_disable_sriov(pdev); - if (mlx4_is_master(dev) && dev->num_vfs) + if (mlx4_is_master(dev) && dev->persist->num_vfs) atomic_dec(&pf_loading); kfree(priv->dev.dev_vfs); @@ -3076,20 +3091,28 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) return -ENOMEM; dev = &priv->dev; - dev->pdev = pdev; - pci_set_drvdata(pdev, dev); + dev->persist = kzalloc(sizeof(*dev->persist), GFP_KERNEL); + if (!dev->persist) { + kfree(priv); + return -ENOMEM; + } + dev->persist->pdev = pdev; + dev->persist->dev = dev; + pci_set_drvdata(pdev, dev->persist); priv->pci_dev_data = id->driver_data; ret = __mlx4_init_one(pdev, id->driver_data, priv); - if (ret) + if (ret) { + kfree(dev->persist); kfree(priv); - + } return ret; } static void mlx4_unload_one(struct pci_dev *pdev) { - struct mlx4_dev *dev = pci_get_drvdata(pdev); + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; struct mlx4_priv *priv = mlx4_priv(dev); int pci_dev_data; int p; @@ -3155,7 +3178,7 @@ static void mlx4_unload_one(struct pci_dev *pdev) mlx4_warn(dev, "Disabling SR-IOV\n"); pci_disable_sriov(pdev); dev->flags &= ~MLX4_FLAG_SRIOV; - dev->num_vfs = 0; + dev->persist->num_vfs = 0; } if (!mlx4_is_slave(dev)) @@ -3175,26 +3198,29 @@ static void mlx4_unload_one(struct pci_dev *pdev) static void mlx4_remove_one(struct pci_dev *pdev) { - struct mlx4_dev *dev = pci_get_drvdata(pdev); + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; struct mlx4_priv *priv = mlx4_priv(dev); mlx4_unload_one(pdev); pci_release_regions(pdev); pci_disable_device(pdev); + kfree(dev->persist); kfree(priv); pci_set_drvdata(pdev, NULL); } int mlx4_restart_one(struct pci_dev *pdev) { - struct mlx4_dev *dev = pci_get_drvdata(pdev); + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; struct mlx4_priv *priv = mlx4_priv(dev); int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0}; int pci_dev_data, err, total_vfs; pci_dev_data = priv->pci_dev_data; - total_vfs = dev->num_vfs; - memcpy(nvfs, dev->nvfs, sizeof(dev->nvfs)); + total_vfs = dev->persist->num_vfs; + memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs)); mlx4_unload_one(pdev); err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index bdd4eea2247c..faa37ab75a9d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -221,16 +221,17 @@ extern int mlx4_debug_level; #define mlx4_dbg(mdev, format, ...) \ do { \ if (mlx4_debug_level) \ - dev_printk(KERN_DEBUG, &(mdev)->pdev->dev, format, \ + dev_printk(KERN_DEBUG, \ + &(mdev)->persist->pdev->dev, format, \ ##__VA_ARGS__); \ } while (0) #define mlx4_err(mdev, format, ...) \ - dev_err(&(mdev)->pdev->dev, format, ##__VA_ARGS__) + dev_err(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__) #define mlx4_info(mdev, format, ...) \ - dev_info(&(mdev)->pdev->dev, format, ##__VA_ARGS__) + dev_info(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__) #define mlx4_warn(mdev, format, ...) \ - dev_warn(&(mdev)->pdev->dev, format, ##__VA_ARGS__) + dev_warn(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__) extern int mlx4_log_num_mgm_entry_size; extern int log_mtts_per_seg; diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c index 7094a9c70fd5..8dbdf1d29357 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mr.c +++ b/drivers/net/ethernet/mellanox/mlx4/mr.c @@ -708,13 +708,13 @@ static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt, if (!mtts) return -ENOMEM; - dma_sync_single_for_cpu(&dev->pdev->dev, dma_handle, + dma_sync_single_for_cpu(&dev->persist->pdev->dev, dma_handle, npages * sizeof (u64), DMA_TO_DEVICE); for (i = 0; i < npages; ++i) mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT); - dma_sync_single_for_device(&dev->pdev->dev, dma_handle, + dma_sync_single_for_device(&dev->persist->pdev->dev, dma_handle, npages * sizeof (u64), DMA_TO_DEVICE); return 0; @@ -1020,13 +1020,13 @@ int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list /* Make sure MPT status is visible before writing MTT entries */ wmb(); - dma_sync_single_for_cpu(&dev->pdev->dev, fmr->dma_handle, + dma_sync_single_for_cpu(&dev->persist->pdev->dev, fmr->dma_handle, npages * sizeof(u64), DMA_TO_DEVICE); for (i = 0; i < npages; ++i) fmr->mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT); - dma_sync_single_for_device(&dev->pdev->dev, fmr->dma_handle, + dma_sync_single_for_device(&dev->persist->pdev->dev, fmr->dma_handle, npages * sizeof(u64), DMA_TO_DEVICE); fmr->mpt->key = cpu_to_be32(key); diff --git a/drivers/net/ethernet/mellanox/mlx4/pd.c b/drivers/net/ethernet/mellanox/mlx4/pd.c index 74216071201f..a42b4c0a9ed9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/pd.c +++ b/drivers/net/ethernet/mellanox/mlx4/pd.c @@ -151,11 +151,13 @@ int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar) return -ENOMEM; if (mlx4_is_slave(dev)) - offset = uar->index % ((int) pci_resource_len(dev->pdev, 2) / + offset = uar->index % ((int)pci_resource_len(dev->persist->pdev, + 2) / dev->caps.uar_page_size); else offset = uar->index; - uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + offset; + uar->pfn = (pci_resource_start(dev->persist->pdev, 2) >> PAGE_SHIFT) + + offset; uar->map = NULL; return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 30eb1ead0fe6..9f268f05290a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -553,9 +553,9 @@ int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port) slaves_pport_actv = mlx4_phys_to_slaves_pport_actv( dev, &exclusive_ports); slave_gid -= bitmap_weight(slaves_pport_actv.slaves, - dev->num_vfs + 1); + dev->persist->num_vfs + 1); } - vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1; + vfs = bitmap_weight(slaves_pport.slaves, dev->persist->num_vfs + 1) - 1; if (slave_gid <= ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) % vfs)) return ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / vfs) + 1; return (MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / vfs; @@ -590,10 +590,10 @@ int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port) slaves_pport_actv = mlx4_phys_to_slaves_pport_actv( dev, &exclusive_ports); slave_gid -= bitmap_weight(slaves_pport_actv.slaves, - dev->num_vfs + 1); + dev->persist->num_vfs + 1); } gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS; - vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1; + vfs = bitmap_weight(slaves_pport.slaves, dev->persist->num_vfs + 1) - 1; if (slave_gid <= gids % vfs) return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave_gid - 1); @@ -644,7 +644,7 @@ void mlx4_reset_roce_gids(struct mlx4_dev *dev, int slave) int num_eth_ports, err; int i; - if (slave < 0 || slave > dev->num_vfs) + if (slave < 0 || slave > dev->persist->num_vfs) return; actv_ports = mlx4_get_active_ports(dev, slave); @@ -1214,7 +1214,8 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, return -EINVAL; slaves_pport = mlx4_phys_to_slaves_pport(dev, port); - num_vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1; + num_vfs = bitmap_weight(slaves_pport.slaves, + dev->persist->num_vfs + 1) - 1; for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) { if (!memcmp(priv->port[port].gid_table.roce_gids[i].raw, gid, @@ -1258,7 +1259,7 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, dev, &exclusive_ports); num_vfs_before += bitmap_weight( slaves_pport_actv.slaves, - dev->num_vfs + 1); + dev->persist->num_vfs + 1); } /* candidate_slave_gid isn't necessarily the correct slave, but @@ -1288,7 +1289,7 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid, dev, &exclusive_ports); slave_gid += bitmap_weight( slaves_pport_actv.slaves, - dev->num_vfs + 1); + dev->persist->num_vfs + 1); } } *slave_id = slave_gid; diff --git a/drivers/net/ethernet/mellanox/mlx4/reset.c b/drivers/net/ethernet/mellanox/mlx4/reset.c index ea1c6d092145..0076d88587ca 100644 --- a/drivers/net/ethernet/mellanox/mlx4/reset.c +++ b/drivers/net/ethernet/mellanox/mlx4/reset.c @@ -76,19 +76,21 @@ int mlx4_reset(struct mlx4_dev *dev) goto out; } - pcie_cap = pci_pcie_cap(dev->pdev); + pcie_cap = pci_pcie_cap(dev->persist->pdev); for (i = 0; i < 64; ++i) { if (i == 22 || i == 23) continue; - if (pci_read_config_dword(dev->pdev, i * 4, hca_header + i)) { + if (pci_read_config_dword(dev->persist->pdev, i * 4, + hca_header + i)) { err = -ENODEV; mlx4_err(dev, "Couldn't save HCA PCI header, aborting\n"); goto out; } } - reset = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_RESET_BASE, + reset = ioremap(pci_resource_start(dev->persist->pdev, 0) + + MLX4_RESET_BASE, MLX4_RESET_SIZE); if (!reset) { err = -ENOMEM; @@ -122,8 +124,8 @@ int mlx4_reset(struct mlx4_dev *dev) end = jiffies + MLX4_RESET_TIMEOUT_JIFFIES; do { - if (!pci_read_config_word(dev->pdev, PCI_VENDOR_ID, &vendor) && - vendor != 0xffff) + if (!pci_read_config_word(dev->persist->pdev, PCI_VENDOR_ID, + &vendor) && vendor != 0xffff) break; msleep(1); @@ -138,14 +140,16 @@ int mlx4_reset(struct mlx4_dev *dev) /* Now restore the PCI headers */ if (pcie_cap) { devctl = hca_header[(pcie_cap + PCI_EXP_DEVCTL) / 4]; - if (pcie_capability_write_word(dev->pdev, PCI_EXP_DEVCTL, + if (pcie_capability_write_word(dev->persist->pdev, + PCI_EXP_DEVCTL, devctl)) { err = -ENODEV; mlx4_err(dev, "Couldn't restore HCA PCI Express Device Control register, aborting\n"); goto out; } linkctl = hca_header[(pcie_cap + PCI_EXP_LNKCTL) / 4]; - if (pcie_capability_write_word(dev->pdev, PCI_EXP_LNKCTL, + if (pcie_capability_write_word(dev->persist->pdev, + PCI_EXP_LNKCTL, linkctl)) { err = -ENODEV; mlx4_err(dev, "Couldn't restore HCA PCI Express Link control register, aborting\n"); @@ -157,7 +161,8 @@ int mlx4_reset(struct mlx4_dev *dev) if (i * 4 == PCI_COMMAND) continue; - if (pci_write_config_dword(dev->pdev, i * 4, hca_header[i])) { + if (pci_write_config_dword(dev->persist->pdev, i * 4, + hca_header[i])) { err = -ENODEV; mlx4_err(dev, "Couldn't restore HCA reg %x, aborting\n", i); @@ -165,7 +170,7 @@ int mlx4_reset(struct mlx4_dev *dev) } } - if (pci_write_config_dword(dev->pdev, PCI_COMMAND, + if (pci_write_config_dword(dev->persist->pdev, PCI_COMMAND, hca_header[PCI_COMMAND / 4])) { err = -ENODEV; mlx4_err(dev, "Couldn't restore HCA COMMAND, aborting\n"); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 4efbd1eca611..3e93879bccce 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -309,12 +309,13 @@ static inline int mlx4_grant_resource(struct mlx4_dev *dev, int slave, int allocated, free, reserved, guaranteed, from_free; int from_rsvd; - if (slave > dev->num_vfs) + if (slave > dev->persist->num_vfs) return -EINVAL; spin_lock(&res_alloc->alloc_lock); allocated = (port > 0) ? - res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] : + res_alloc->allocated[(port - 1) * + (dev->persist->num_vfs + 1) + slave] : res_alloc->allocated[slave]; free = (port > 0) ? res_alloc->res_port_free[port - 1] : res_alloc->res_free; @@ -352,7 +353,8 @@ static inline int mlx4_grant_resource(struct mlx4_dev *dev, int slave, if (!err) { /* grant the request */ if (port > 0) { - res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] += count; + res_alloc->allocated[(port - 1) * + (dev->persist->num_vfs + 1) + slave] += count; res_alloc->res_port_free[port - 1] -= count; res_alloc->res_port_rsvd[port - 1] -= from_rsvd; } else { @@ -376,13 +378,14 @@ static inline void mlx4_release_resource(struct mlx4_dev *dev, int slave, &priv->mfunc.master.res_tracker.res_alloc[res_type]; int allocated, guaranteed, from_rsvd; - if (slave > dev->num_vfs) + if (slave > dev->persist->num_vfs) return; spin_lock(&res_alloc->alloc_lock); allocated = (port > 0) ? - res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] : + res_alloc->allocated[(port - 1) * + (dev->persist->num_vfs + 1) + slave] : res_alloc->allocated[slave]; guaranteed = res_alloc->guaranteed[slave]; @@ -397,7 +400,8 @@ static inline void mlx4_release_resource(struct mlx4_dev *dev, int slave, } if (port > 0) { - res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] -= count; + res_alloc->allocated[(port - 1) * + (dev->persist->num_vfs + 1) + slave] -= count; res_alloc->res_port_free[port - 1] += count; res_alloc->res_port_rsvd[port - 1] += from_rsvd; } else { @@ -415,7 +419,8 @@ static inline void initialize_res_quotas(struct mlx4_dev *dev, enum mlx4_resource res_type, int vf, int num_instances) { - res_alloc->guaranteed[vf] = num_instances / (2 * (dev->num_vfs + 1)); + res_alloc->guaranteed[vf] = num_instances / + (2 * (dev->persist->num_vfs + 1)); res_alloc->quota[vf] = (num_instances / 2) + res_alloc->guaranteed[vf]; if (vf == mlx4_master_func_num(dev)) { res_alloc->res_free = num_instances; @@ -486,21 +491,26 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev) for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) { struct resource_allocator *res_alloc = &priv->mfunc.master.res_tracker.res_alloc[i]; - res_alloc->quota = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); - res_alloc->guaranteed = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); + res_alloc->quota = kmalloc((dev->persist->num_vfs + 1) * + sizeof(int), GFP_KERNEL); + res_alloc->guaranteed = kmalloc((dev->persist->num_vfs + 1) * + sizeof(int), GFP_KERNEL); if (i == RES_MAC || i == RES_VLAN) res_alloc->allocated = kzalloc(MLX4_MAX_PORTS * - (dev->num_vfs + 1) * sizeof(int), - GFP_KERNEL); + (dev->persist->num_vfs + + 1) * + sizeof(int), GFP_KERNEL); else - res_alloc->allocated = kzalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL); + res_alloc->allocated = kzalloc((dev->persist-> + num_vfs + 1) * + sizeof(int), GFP_KERNEL); if (!res_alloc->quota || !res_alloc->guaranteed || !res_alloc->allocated) goto no_mem_err; spin_lock_init(&res_alloc->alloc_lock); - for (t = 0; t < dev->num_vfs + 1; t++) { + for (t = 0; t < dev->persist->num_vfs + 1; t++) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, t); switch (i) { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index f1e41b33462f..1069ce65e8b4 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -744,8 +744,15 @@ struct mlx4_vf_dev { u8 n_ports; }; -struct mlx4_dev { +struct mlx4_dev_persistent { struct pci_dev *pdev; + struct mlx4_dev *dev; + int nvfs[MLX4_MAX_PORTS + 1]; + int num_vfs; +}; + +struct mlx4_dev { + struct mlx4_dev_persistent *persist; unsigned long flags; unsigned long num_slaves; struct mlx4_caps caps; @@ -754,13 +761,11 @@ struct mlx4_dev { struct radix_tree_root qp_table_tree; u8 rev_id; char board_id[MLX4_BOARD_ID_LEN]; - int num_vfs; int numa_node; int oper_log_mgm_entry_size; u64 regid_promisc_array[MLX4_MAX_PORTS + 1]; u64 regid_allmulti_array[MLX4_MAX_PORTS + 1]; struct mlx4_vf_dev *dev_vfs; - int nvfs[MLX4_MAX_PORTS + 1]; }; struct mlx4_eqe { From dd0eefe3abbf47442db296bf68f27eb2860c1cdf Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 25 Jan 2015 16:59:36 +0200 Subject: [PATCH 2/9] net/mlx4_core: Set device configuration data to be persistent across reset When an HCA enters an internal error state, this is detected by the driver. The driver then should reset the HCA and restart the software stack. Keep ports information and some SRIOV configuration in a persistent area to have it valid across reset. Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 45 ++++++++++++++++++++++- include/linux/mlx4/device.h | 2 + 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index abcee61f8a47..2c5a555dff89 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -3109,18 +3109,34 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) return ret; } +static void mlx4_clean_dev(struct mlx4_dev *dev) +{ + struct mlx4_dev_persistent *persist = dev->persist; + struct mlx4_priv *priv = mlx4_priv(dev); + + memset(priv, 0, sizeof(*priv)); + priv->dev.persist = persist; +} + static void mlx4_unload_one(struct pci_dev *pdev) { struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); struct mlx4_dev *dev = persist->dev; struct mlx4_priv *priv = mlx4_priv(dev); int pci_dev_data; - int p; + int p, i; int active_vfs = 0; if (priv->removed) return; + /* saving current ports type for further use */ + for (i = 0; i < dev->caps.num_ports; i++) { + dev->persist->curr_port_type[i] = dev->caps.port_type[i + 1]; + dev->persist->curr_port_poss_type[i] = dev->caps. + possible_type[i + 1]; + } + pci_dev_data = priv->pci_dev_data; /* Disabling SR-IOV is not allowed while there are active vf's */ @@ -3191,7 +3207,7 @@ static void mlx4_unload_one(struct pci_dev *pdev) kfree(dev->caps.qp1_proxy); kfree(dev->dev_vfs); - memset(priv, 0, sizeof(*priv)); + mlx4_clean_dev(dev); priv->pci_dev_data = pci_dev_data; priv->removed = 1; } @@ -3210,6 +3226,25 @@ static void mlx4_remove_one(struct pci_dev *pdev) pci_set_drvdata(pdev, NULL); } +static int restore_current_port_types(struct mlx4_dev *dev, + enum mlx4_port_type *types, + enum mlx4_port_type *poss_types) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int err, i; + + mlx4_stop_sense(dev); + + mutex_lock(&priv->port_mutex); + for (i = 0; i < dev->caps.num_ports; i++) + dev->caps.possible_type[i + 1] = poss_types[i]; + err = mlx4_change_port_types(dev, types); + mlx4_start_sense(dev); + mutex_unlock(&priv->port_mutex); + + return err; +} + int mlx4_restart_one(struct pci_dev *pdev) { struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); @@ -3230,6 +3265,12 @@ int mlx4_restart_one(struct pci_dev *pdev) return err; } + err = restore_current_port_types(dev, dev->persist->curr_port_type, + dev->persist->curr_port_poss_type); + if (err) + mlx4_err(dev, "could not restore original port types (%d)\n", + err); + return err; } diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 1069ce65e8b4..8c3837ac1a2d 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -749,6 +749,8 @@ struct mlx4_dev_persistent { struct mlx4_dev *dev; int nvfs[MLX4_MAX_PORTS + 1]; int num_vfs; + enum mlx4_port_type curr_port_type[MLX4_MAX_PORTS + 1]; + enum mlx4_port_type curr_port_poss_type[MLX4_MAX_PORTS + 1]; }; struct mlx4_dev { From ad9a0bf08ffbf32b8f292c3bb78ca0f24bb8f6b2 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 25 Jan 2015 16:59:37 +0200 Subject: [PATCH 3/9] net/mlx4_core: Refactor the catas flow to work per device Using a WQ per device instead of a single global WQ, this allows independent reset handling per device even when SRIOV is used. This comes as a pre-patch for supporting chip reset for both native and SRIOV. Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/catas.c | 75 ++++++++++------------ drivers/net/ethernet/mellanox/mlx4/main.c | 12 +++- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 3 +- include/linux/mlx4/device.h | 2 + 4 files changed, 48 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 1a102c9bac99..5bb9aa6e281d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -40,10 +40,7 @@ enum { MLX4_CATAS_POLL_INTERVAL = 5 * HZ, }; -static DEFINE_SPINLOCK(catas_lock); -static LIST_HEAD(catas_list); -static struct work_struct catas_work; static int internal_err_reset = 1; module_param(internal_err_reset, int, 0644); @@ -77,13 +74,9 @@ static void poll_catas(unsigned long dev_ptr) dump_err_buf(dev); mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0); - if (internal_err_reset) { - spin_lock(&catas_lock); - list_add(&priv->catas_err.list, &catas_list); - spin_unlock(&catas_lock); - - queue_work(mlx4_wq, &catas_work); - } + if (internal_err_reset) + queue_work(dev->persist->catas_wq, + &dev->persist->catas_work); } } else mod_timer(&priv->catas_err.timer, @@ -92,34 +85,23 @@ static void poll_catas(unsigned long dev_ptr) static void catas_reset(struct work_struct *work) { - struct mlx4_priv *priv, *tmppriv; - struct mlx4_dev *dev; - struct mlx4_dev_persistent *persist; - - LIST_HEAD(tlist); + struct mlx4_dev_persistent *persist = + container_of(work, struct mlx4_dev_persistent, + catas_work); + struct pci_dev *pdev = persist->pdev; int ret; - spin_lock_irq(&catas_lock); - list_splice_init(&catas_list, &tlist); - spin_unlock_irq(&catas_lock); - - list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) { - struct pci_dev *pdev = priv->dev.persist->pdev; - - /* If the device is off-line, we cannot reset it */ - if (pci_channel_offline(pdev)) - continue; + /* If the device is off-line, we cannot reset it */ + if (pci_channel_offline(pdev)) + return; - ret = mlx4_restart_one(priv->dev.persist->pdev); - /* 'priv' now is not valid */ - if (ret) - pr_err("mlx4 %s: Reset failed (%d)\n", - pci_name(pdev), ret); - else { - persist = pci_get_drvdata(pdev); - mlx4_dbg(persist->dev, "Reset succeeded\n"); - } - } + ret = mlx4_restart_one(pdev); + /* 'priv' now is not valid */ + if (ret) + pr_err("mlx4 %s: Reset failed (%d)\n", + pci_name(pdev), ret); + else + mlx4_dbg(persist->dev, "Reset succeeded\n"); } void mlx4_start_catas_poll(struct mlx4_dev *dev) @@ -158,15 +140,26 @@ void mlx4_stop_catas_poll(struct mlx4_dev *dev) del_timer_sync(&priv->catas_err.timer); - if (priv->catas_err.map) + if (priv->catas_err.map) { iounmap(priv->catas_err.map); + priv->catas_err.map = NULL; + } +} - spin_lock_irq(&catas_lock); - list_del(&priv->catas_err.list); - spin_unlock_irq(&catas_lock); +int mlx4_catas_init(struct mlx4_dev *dev) +{ + INIT_WORK(&dev->persist->catas_work, catas_reset); + dev->persist->catas_wq = create_singlethread_workqueue("mlx4_health"); + if (!dev->persist->catas_wq) + return -ENOMEM; + + return 0; } -void __init mlx4_catas_init(void) +void mlx4_catas_end(struct mlx4_dev *dev) { - INIT_WORK(&catas_work, catas_reset); + if (dev->persist->catas_wq) { + destroy_workqueue(dev->persist->catas_wq); + dev->persist->catas_wq = NULL; + } } diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 2c5a555dff89..a61694cc1476 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -3064,11 +3064,19 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data, } } - err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv); + err = mlx4_catas_init(&priv->dev); if (err) goto err_release_regions; + + err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv); + if (err) + goto err_catas; + return 0; +err_catas: + mlx4_catas_end(&priv->dev); + err_release_regions: pci_release_regions(pdev); @@ -3219,6 +3227,7 @@ static void mlx4_remove_one(struct pci_dev *pdev) struct mlx4_priv *priv = mlx4_priv(dev); mlx4_unload_one(pdev); + mlx4_catas_end(dev); pci_release_regions(pdev); pci_disable_device(pdev); kfree(dev->persist); @@ -3403,7 +3412,6 @@ static int __init mlx4_init(void) if (mlx4_verify_params()) return -EINVAL; - mlx4_catas_init(); mlx4_wq = create_singlethread_workqueue("mlx4"); if (!mlx4_wq) diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index faa37ab75a9d..d41af84f965b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -995,7 +995,8 @@ void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn); void mlx4_start_catas_poll(struct mlx4_dev *dev); void mlx4_stop_catas_poll(struct mlx4_dev *dev); -void mlx4_catas_init(void); +int mlx4_catas_init(struct mlx4_dev *dev); +void mlx4_catas_end(struct mlx4_dev *dev); int mlx4_restart_one(struct pci_dev *pdev); int mlx4_register_device(struct mlx4_dev *dev); void mlx4_unregister_device(struct mlx4_dev *dev); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 8c3837ac1a2d..da425d2f3708 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -751,6 +751,8 @@ struct mlx4_dev_persistent { int num_vfs; enum mlx4_port_type curr_port_type[MLX4_MAX_PORTS + 1]; enum mlx4_port_type curr_port_poss_type[MLX4_MAX_PORTS + 1]; + struct work_struct catas_work; + struct workqueue_struct *catas_wq; }; struct mlx4_dev { From f6bc11e42646e661e699a5593cbd1e9dba7191d0 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 25 Jan 2015 16:59:38 +0200 Subject: [PATCH 4/9] net/mlx4_core: Enhance the catas flow to support device reset This includes: - resetting the chip when a fatal error is detected (the current code does not do this). - exposing the ability to enter error state from outside the catas code by calling its functionality. (E.g. FW Command timeout, AER error). - managing a persistent device state. This is needed to sync between reset flow cases. Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/catas.c | 122 ++++++++++++++++----- drivers/net/ethernet/mellanox/mlx4/main.c | 6 + drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 +- include/linux/mlx4/device.h | 7 ++ 4 files changed, 108 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 5bb9aa6e281d..588d6b5e1211 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -48,6 +48,83 @@ MODULE_PARM_DESC(internal_err_reset, "Reset device on internal errors if non-zero" " (default 1, in SRIOV mode default is 0)"); +static int read_vendor_id(struct mlx4_dev *dev) +{ + u16 vendor_id = 0; + int ret; + + ret = pci_read_config_word(dev->persist->pdev, 0, &vendor_id); + if (ret) { + mlx4_err(dev, "Failed to read vendor ID, ret=%d\n", ret); + return ret; + } + + if (vendor_id == 0xffff) { + mlx4_err(dev, "PCI can't be accessed to read vendor id\n"); + return -EINVAL; + } + + return 0; +} + +static int mlx4_reset_master(struct mlx4_dev *dev) +{ + int err = 0; + + if (!pci_channel_offline(dev->persist->pdev)) { + err = read_vendor_id(dev); + /* If PCI can't be accessed to read vendor ID we assume that its + * link was disabled and chip was already reset. + */ + if (err) + return 0; + + err = mlx4_reset(dev); + if (err) + mlx4_err(dev, "Fail to reset HCA\n"); + } + + return err; +} + +void mlx4_enter_error_state(struct mlx4_dev_persistent *persist) +{ + int err; + struct mlx4_dev *dev; + + if (!internal_err_reset) + return; + + mutex_lock(&persist->device_state_mutex); + if (persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + goto out; + + dev = persist->dev; + mlx4_err(dev, "device is going to be reset\n"); + err = mlx4_reset_master(dev); + BUG_ON(err != 0); + + dev->persist->state |= MLX4_DEVICE_STATE_INTERNAL_ERROR; + mlx4_err(dev, "device was reset successfully\n"); + mutex_unlock(&persist->device_state_mutex); + + /* At that step HW was already reset, now notify clients */ + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0); + return; + +out: + mutex_unlock(&persist->device_state_mutex); +} + +static void mlx4_handle_error_state(struct mlx4_dev_persistent *persist) +{ + int err = 0; + + mlx4_enter_error_state(persist); + err = mlx4_restart_one(persist->pdev); + mlx4_info(persist->dev, "mlx4_restart_one was ended, ret=%d\n", err); +} + static void dump_err_buf(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -66,21 +143,22 @@ static void poll_catas(unsigned long dev_ptr) struct mlx4_priv *priv = mlx4_priv(dev); if (readl(priv->catas_err.map)) { - /* If the device is off-line, we cannot try to recover it */ - if (pci_channel_offline(dev->persist->pdev)) - mod_timer(&priv->catas_err.timer, - round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL)); - else { - dump_err_buf(dev); - mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0); - - if (internal_err_reset) - queue_work(dev->persist->catas_wq, - &dev->persist->catas_work); - } - } else - mod_timer(&priv->catas_err.timer, - round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL)); + dump_err_buf(dev); + goto internal_err; + } + + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + mlx4_warn(dev, "Internal error mark was detected on device\n"); + goto internal_err; + } + + mod_timer(&priv->catas_err.timer, + round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL)); + return; + +internal_err: + if (internal_err_reset) + queue_work(dev->persist->catas_wq, &dev->persist->catas_work); } static void catas_reset(struct work_struct *work) @@ -88,20 +166,8 @@ static void catas_reset(struct work_struct *work) struct mlx4_dev_persistent *persist = container_of(work, struct mlx4_dev_persistent, catas_work); - struct pci_dev *pdev = persist->pdev; - int ret; - - /* If the device is off-line, we cannot reset it */ - if (pci_channel_offline(pdev)) - return; - ret = mlx4_restart_one(pdev); - /* 'priv' now is not valid */ - if (ret) - pr_err("mlx4 %s: Reset failed (%d)\n", - pci_name(pdev), ret); - else - mlx4_dbg(persist->dev, "Reset succeeded\n"); + mlx4_handle_error_state(persist); } void mlx4_start_catas_poll(struct mlx4_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index a61694cc1476..dc2d910fcc88 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2624,6 +2624,11 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, } } + /* on load remove any previous indication of internal error, + * device is up. + */ + dev->persist->state = MLX4_DEVICE_STATE_UP; + slave_start: err = mlx4_cmd_init(dev); if (err) { @@ -3108,6 +3113,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) dev->persist->dev = dev; pci_set_drvdata(pdev, dev->persist); priv->pci_dev_data = id->driver_data; + mutex_init(&dev->persist->device_state_mutex); ret = __mlx4_init_one(pdev, id->driver_data, priv); if (ret) { diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index d41af84f965b..aa1ecbc5a606 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1178,7 +1178,7 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type); void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type); -void mlx4_handle_catas_err(struct mlx4_dev *dev); +void mlx4_enter_error_state(struct mlx4_dev_persistent *persist); int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port, enum mlx4_port_type *type); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index da425d2f3708..7d5d317cb7a6 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -411,6 +411,11 @@ enum { MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK = 1 << 4, }; +enum { + MLX4_DEVICE_STATE_UP = 1 << 0, + MLX4_DEVICE_STATE_INTERNAL_ERROR = 1 << 1, +}; + #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK) @@ -753,6 +758,8 @@ struct mlx4_dev_persistent { enum mlx4_port_type curr_port_poss_type[MLX4_MAX_PORTS + 1]; struct work_struct catas_work; struct workqueue_struct *catas_wq; + struct mutex device_state_mutex; /* protect HW state */ + u8 state; }; struct mlx4_dev { From f5aef5aa35063f2b45c3605871cd525d0cb7fb7a Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 25 Jan 2015 16:59:39 +0200 Subject: [PATCH 5/9] net/mlx4_core: Activate reset flow upon fatal command cases We activate reset flow upon command fatal errors, when the device enters an erroneous state, and must be reset. The cases below are assumed to be fatal: FW command timed-out, an error from FW on closing commands, pci is offline when posting/pending a command. In those cases we place the device into an error state: chip is reset, pending commands are awakened and completed immediately. Subsequent commands will return immediately. The return code in the above cases will depend on the command. Commands which free and close resources will return success (because the chip was reset, so callers may safely free their kernel resources). Other commands will return -EIO. Since the device's state was marked as error, the catas poller will detect this and restart the device's software stack (as is done when a FW internal error is directly detected). The device state is protected by a persistent mutex lives on its mlx4_dev, as such no need any more for the hcr_mutex which is removed. Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/catas.c | 11 +- drivers/net/ethernet/mellanox/mlx4/cmd.c | 163 ++++++++++++++++++--- drivers/net/ethernet/mellanox/mlx4/mcg.c | 3 + drivers/net/ethernet/mellanox/mlx4/mlx4.h | 2 +- include/linux/mlx4/cmd.h | 1 + 5 files changed, 153 insertions(+), 27 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 588d6b5e1211..63f14ffcc906 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -42,8 +42,8 @@ enum { -static int internal_err_reset = 1; -module_param(internal_err_reset, int, 0644); +int mlx4_internal_err_reset = 1; +module_param_named(internal_err_reset, mlx4_internal_err_reset, int, 0644); MODULE_PARM_DESC(internal_err_reset, "Reset device on internal errors if non-zero" " (default 1, in SRIOV mode default is 0)"); @@ -92,7 +92,7 @@ void mlx4_enter_error_state(struct mlx4_dev_persistent *persist) int err; struct mlx4_dev *dev; - if (!internal_err_reset) + if (!mlx4_internal_err_reset) return; mutex_lock(&persist->device_state_mutex); @@ -110,6 +110,7 @@ void mlx4_enter_error_state(struct mlx4_dev_persistent *persist) /* At that step HW was already reset, now notify clients */ mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0); + mlx4_cmd_wake_completions(dev); return; out: @@ -157,7 +158,7 @@ static void poll_catas(unsigned long dev_ptr) return; internal_err: - if (internal_err_reset) + if (mlx4_internal_err_reset) queue_work(dev->persist->catas_wq, &dev->persist->catas_work); } @@ -177,7 +178,7 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev) /*If we are in SRIOV the default of the module param must be 0*/ if (mlx4_is_mfunc(dev)) - internal_err_reset = 0; + mlx4_internal_err_reset = 0; INIT_LIST_HEAD(&priv->catas_err.list); init_timer(&priv->catas_err.timer); diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 7cd90e6a4272..3895b2b5fc92 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -182,6 +182,72 @@ static u8 mlx4_errno_to_status(int errno) } } +static int mlx4_internal_err_ret_value(struct mlx4_dev *dev, u16 op, + u8 op_modifier) +{ + switch (op) { + case MLX4_CMD_UNMAP_ICM: + case MLX4_CMD_UNMAP_ICM_AUX: + case MLX4_CMD_UNMAP_FA: + case MLX4_CMD_2RST_QP: + case MLX4_CMD_HW2SW_EQ: + case MLX4_CMD_HW2SW_CQ: + case MLX4_CMD_HW2SW_SRQ: + case MLX4_CMD_HW2SW_MPT: + case MLX4_CMD_CLOSE_HCA: + case MLX4_QP_FLOW_STEERING_DETACH: + case MLX4_CMD_FREE_RES: + case MLX4_CMD_CLOSE_PORT: + return CMD_STAT_OK; + + case MLX4_CMD_QP_ATTACH: + /* On Detach case return success */ + if (op_modifier == 0) + return CMD_STAT_OK; + return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + + default: + return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + } +} + +static int mlx4_closing_cmd_fatal_error(u16 op, u8 fw_status) +{ + /* Any error during the closing commands below is considered fatal */ + if (op == MLX4_CMD_CLOSE_HCA || + op == MLX4_CMD_HW2SW_EQ || + op == MLX4_CMD_HW2SW_CQ || + op == MLX4_CMD_2RST_QP || + op == MLX4_CMD_HW2SW_SRQ || + op == MLX4_CMD_SYNC_TPT || + op == MLX4_CMD_UNMAP_ICM || + op == MLX4_CMD_UNMAP_ICM_AUX || + op == MLX4_CMD_UNMAP_FA) + return 1; + /* Error on MLX4_CMD_HW2SW_MPT is fatal except when fw status equals + * CMD_STAT_REG_BOUND. + * This status indicates that memory region has memory windows bound to it + * which may result from invalid user space usage and is not fatal. + */ + if (op == MLX4_CMD_HW2SW_MPT && fw_status != CMD_STAT_REG_BOUND) + return 1; + return 0; +} + +static int mlx4_cmd_reset_flow(struct mlx4_dev *dev, u16 op, u8 op_modifier, + int err) +{ + /* Only if reset flow is really active return code is based on + * command, otherwise current error code is returned. + */ + if (mlx4_internal_err_reset) { + mlx4_enter_error_state(dev->persist); + err = mlx4_internal_err_ret_value(dev, op, op_modifier); + } + + return err; +} + static int comm_pending(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -258,7 +324,7 @@ static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 op, cmd->free_head = context->next; spin_unlock(&cmd->context_lock); - init_completion(&context->done); + reinit_completion(&context->done); mlx4_comm_cmd_post(dev, op, param); @@ -323,17 +389,21 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param, { struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; u32 __iomem *hcr = cmd->hcr; - int ret = -EAGAIN; + int ret = -EIO; unsigned long end; - mutex_lock(&cmd->hcr_mutex); - - if (pci_channel_offline(dev->persist->pdev)) { + mutex_lock(&dev->persist->device_state_mutex); + /* To avoid writing to unknown addresses after the device state was + * changed to internal error and the chip was reset, + * check the INTERNAL_ERROR flag which is updated under + * device_state_mutex lock. + */ + if (pci_channel_offline(dev->persist->pdev) || + (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) { /* * Device is going through error recovery * and cannot accept commands. */ - ret = -EIO; goto out; } @@ -347,7 +417,6 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param, * Device is going through error recovery * and cannot accept commands. */ - ret = -EIO; goto out; } @@ -391,7 +460,11 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param, ret = 0; out: - mutex_unlock(&cmd->hcr_mutex); + if (ret) + mlx4_warn(dev, "Could not post command 0x%x: ret=%d, in_param=0x%llx, in_mod=0x%x, op_mod=0x%x\n", + op, ret, in_param, in_modifier, op_modifier); + mutex_unlock(&dev->persist->device_state_mutex); + return ret; } @@ -464,12 +537,12 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, down(&priv->cmd.poll_sem); - if (pci_channel_offline(dev->persist->pdev)) { + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { /* * Device is going through error recovery * and cannot accept commands. */ - err = -EIO; + err = mlx4_internal_err_ret_value(dev, op, op_modifier); goto out; } @@ -483,7 +556,7 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0, in_modifier, op_modifier, op, CMD_POLL_TOKEN, 0); if (err) - goto out; + goto out_reset; end = msecs_to_jiffies(timeout) + jiffies; while (cmd_pending(dev) && time_before(jiffies, end)) { @@ -493,6 +566,11 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, * and cannot accept commands. */ err = -EIO; + goto out_reset; + } + + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + err = mlx4_internal_err_ret_value(dev, op, op_modifier); goto out; } @@ -502,8 +580,8 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, if (cmd_pending(dev)) { mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n", op); - err = -ETIMEDOUT; - goto out; + err = -EIO; + goto out_reset; } if (out_is_imm) @@ -515,10 +593,17 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param, stat = be32_to_cpu((__force __be32) __raw_readl(hcr + HCR_STATUS_OFFSET)) >> 24; err = mlx4_status_to_errno(stat); - if (err) + if (err) { mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n", op, stat); + if (mlx4_closing_cmd_fatal_error(op, stat)) + goto out_reset; + goto out; + } +out_reset: + if (err) + err = mlx4_cmd_reset_flow(dev, op, op_modifier, err); out: up(&priv->cmd.poll_sem); return err; @@ -565,17 +650,19 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, goto out; } - init_completion(&context->done); + reinit_completion(&context->done); - mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0, - in_modifier, op_modifier, op, context->token, 1); + err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0, + in_modifier, op_modifier, op, context->token, 1); + if (err) + goto out_reset; if (!wait_for_completion_timeout(&context->done, msecs_to_jiffies(timeout))) { mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n", op); - err = -EBUSY; - goto out; + err = -EIO; + goto out_reset; } err = context->result; @@ -592,12 +679,20 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param, else mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n", op, context->fw_status); + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + err = mlx4_internal_err_ret_value(dev, op, op_modifier); + else if (mlx4_closing_cmd_fatal_error(op, context->fw_status)) + goto out_reset; + goto out; } if (out_is_imm) *out_param = context->out_param; +out_reset: + if (err) + err = mlx4_cmd_reset_flow(dev, op, op_modifier, err); out: spin_lock(&cmd->context_lock); context->next = cmd->free_head; @@ -613,9 +708,12 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, u16 op, unsigned long timeout, int native) { if (pci_channel_offline(dev->persist->pdev)) - return -EIO; + return mlx4_cmd_reset_flow(dev, op, op_modifier, -EIO); if (!mlx4_is_mfunc(dev) || (native && mlx4_is_master(dev))) { + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + return mlx4_internal_err_ret_value(dev, op, + op_modifier); if (mlx4_priv(dev)->cmd.use_events) return mlx4_cmd_wait(dev, in_param, out_param, out_is_imm, in_modifier, @@ -2121,7 +2219,6 @@ int mlx4_cmd_init(struct mlx4_dev *dev) int flags = 0; if (!priv->cmd.initialized) { - mutex_init(&priv->cmd.hcr_mutex); mutex_init(&priv->cmd.slave_cmd_mutex); sema_init(&priv->cmd.poll_sem, 1); priv->cmd.use_events = 0; @@ -2232,6 +2329,11 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) for (i = 0; i < priv->cmd.max_cmds; ++i) { priv->cmd.context[i].token = i; priv->cmd.context[i].next = i + 1; + /* To support fatal error flow, initialize all + * cmd contexts to allow simulating completions + * with complete() at any time. + */ + init_completion(&priv->cmd.context[i].done); } priv->cmd.context[priv->cmd.max_cmds - 1].next = -1; @@ -2329,6 +2431,25 @@ int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave) return slave - 1; } +void mlx4_cmd_wake_completions(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_cmd_context *context; + int i; + + spin_lock(&priv->cmd.context_lock); + if (priv->cmd.context) { + for (i = 0; i < priv->cmd.max_cmds; ++i) { + context = &priv->cmd.context[i]; + context->fw_status = CMD_STAT_INTERNAL_ERR; + context->result = + mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + complete(&context->done); + } + } + spin_unlock(&priv->cmd.context_lock); +} + struct mlx4_active_ports mlx4_get_active_ports(struct mlx4_dev *dev, int slave) { struct mlx4_active_ports actv_ports; diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index a3867e7ef885..d22d9283d2cd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -1318,6 +1318,9 @@ int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], mutex_unlock(&priv->mcg_table.mutex); mlx4_free_cmd_mailbox(dev, mailbox); + if (err && dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + /* In case device is under an error, return success as a closing command */ + err = 0; return err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index aa1ecbc5a606..5c772ea4473b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -235,6 +235,7 @@ do { \ extern int mlx4_log_num_mgm_entry_size; extern int log_mtts_per_seg; +extern int mlx4_internal_err_reset; #define MLX4_MAX_NUM_SLAVES (MLX4_MAX_NUM_PF + MLX4_MAX_NUM_VF) #define ALL_SLAVES 0xff @@ -607,7 +608,6 @@ struct mlx4_mgm { struct mlx4_cmd { struct pci_pool *pool; void __iomem *hcr; - struct mutex hcr_mutex; struct mutex slave_cmd_mutex; struct semaphore poll_sem; struct semaphore event_sem; diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 64d25941b329..e7543844cc7a 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -279,6 +279,7 @@ int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_in int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_state); int mlx4_config_dev_retrieval(struct mlx4_dev *dev, struct mlx4_config_dev_params *params); +void mlx4_cmd_wake_completions(struct mlx4_dev *dev); /* * mlx4_get_slave_default_vlan - * return true if VST ( default vlan) From c69453e294c9f16da977b68e658a8028b854c209 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 25 Jan 2015 16:59:40 +0200 Subject: [PATCH 6/9] net/mlx4_core: Manage interface state for Reset flow cases We need to manage interface state to sync between reset flow and some other relative cases such as remove_one. This has to be done to prevent certain races. For example in case software stack is down as a result of unload call, the remove_one should skip the unload phase. Implement the remove_one case, handling AER and other cases comes next. The interface can be up/down, upon remove_one, the state will include an extra bit indicating that the device is cleaned-up, forcing other tasks to finish before the final cleanup. Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/catas.c | 13 +++++++++++-- drivers/net/ethernet/mellanox/mlx4/intf.c | 2 ++ drivers/net/ethernet/mellanox/mlx4/main.c | 13 ++++++++++++- include/linux/mlx4/device.h | 7 +++++++ 4 files changed, 32 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 63f14ffcc906..3fcf3cfaedfc 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -122,8 +122,14 @@ static void mlx4_handle_error_state(struct mlx4_dev_persistent *persist) int err = 0; mlx4_enter_error_state(persist); - err = mlx4_restart_one(persist->pdev); - mlx4_info(persist->dev, "mlx4_restart_one was ended, ret=%d\n", err); + mutex_lock(&persist->interface_state_mutex); + if (persist->interface_state & MLX4_INTERFACE_STATE_UP && + !(persist->interface_state & MLX4_INTERFACE_STATE_DELETION)) { + err = mlx4_restart_one(persist->pdev); + mlx4_info(persist->dev, "mlx4_restart_one was ended, ret=%d\n", + err); + } + mutex_unlock(&persist->interface_state_mutex); } static void dump_err_buf(struct mlx4_dev *dev) @@ -211,6 +217,9 @@ void mlx4_stop_catas_poll(struct mlx4_dev *dev) iounmap(priv->catas_err.map); priv->catas_err.map = NULL; } + + if (dev->persist->interface_state & MLX4_INTERFACE_STATE_DELETION) + flush_workqueue(dev->persist->catas_wq); } int mlx4_catas_init(struct mlx4_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index 116895ac8b35..fba0b96a6f28 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -138,6 +138,7 @@ int mlx4_register_device(struct mlx4_dev *dev) mutex_lock(&intf_mutex); + dev->persist->interface_state |= MLX4_INTERFACE_STATE_UP; list_add_tail(&priv->dev_list, &dev_list); list_for_each_entry(intf, &intf_list, list) mlx4_add_device(intf, priv); @@ -162,6 +163,7 @@ void mlx4_unregister_device(struct mlx4_dev *dev) mlx4_remove_device(intf, priv); list_del(&priv->dev_list); + dev->persist->interface_state &= ~MLX4_INTERFACE_STATE_UP; mutex_unlock(&intf_mutex); } diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index dc2d910fcc88..d59cae5da3f0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -3114,6 +3114,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) pci_set_drvdata(pdev, dev->persist); priv->pci_dev_data = id->driver_data; mutex_init(&dev->persist->device_state_mutex); + mutex_init(&dev->persist->interface_state_mutex); ret = __mlx4_init_one(pdev, id->driver_data, priv); if (ret) { @@ -3232,7 +3233,17 @@ static void mlx4_remove_one(struct pci_dev *pdev) struct mlx4_dev *dev = persist->dev; struct mlx4_priv *priv = mlx4_priv(dev); - mlx4_unload_one(pdev); + mutex_lock(&persist->interface_state_mutex); + persist->interface_state |= MLX4_INTERFACE_STATE_DELETION; + mutex_unlock(&persist->interface_state_mutex); + + /* device marked to be under deletion running now without the lock + * letting other tasks to be terminated + */ + if (persist->interface_state & MLX4_INTERFACE_STATE_UP) + mlx4_unload_one(pdev); + else + mlx4_info(dev, "%s: interface is down\n", __func__); mlx4_catas_end(dev); pci_release_regions(pdev); pci_disable_device(pdev); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 7d5d317cb7a6..33f9ca71925c 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -416,6 +416,11 @@ enum { MLX4_DEVICE_STATE_INTERNAL_ERROR = 1 << 1, }; +enum { + MLX4_INTERFACE_STATE_UP = 1 << 0, + MLX4_INTERFACE_STATE_DELETION = 1 << 1, +}; + #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK) @@ -760,6 +765,8 @@ struct mlx4_dev_persistent { struct workqueue_struct *catas_wq; struct mutex device_state_mutex; /* protect HW state */ u8 state; + struct mutex interface_state_mutex; /* protect SW state */ + u8 interface_state; }; struct mlx4_dev { From 2ba5fbd62b2534335f4e3b844ecc7860115525a3 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 25 Jan 2015 16:59:41 +0200 Subject: [PATCH 7/9] net/mlx4_core: Handle AER flow properly Fix AER callbacks to work properly, it includes: - Refractoring AER to be aligned with Reset flow support. - Sync with concurrent catas flow. In addition, fix the shutdown PCI callback to sync with concurrent catas flow. Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 71 +++++++++++++++++++++-- 1 file changed, 65 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index d59cae5da3f0..6bb0fca137cd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -3120,7 +3120,10 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) { kfree(dev->persist); kfree(priv); + } else { + pci_save_state(pdev); } + return ret; } @@ -3351,23 +3354,79 @@ MODULE_DEVICE_TABLE(pci, mlx4_pci_table); static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state) { - mlx4_unload_one(pdev); + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + + mlx4_err(persist->dev, "mlx4_pci_err_detected was called\n"); + mlx4_enter_error_state(persist); - return state == pci_channel_io_perm_failure ? - PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET; + mutex_lock(&persist->interface_state_mutex); + if (persist->interface_state & MLX4_INTERFACE_STATE_UP) + mlx4_unload_one(pdev); + + mutex_unlock(&persist->interface_state_mutex); + if (state == pci_channel_io_perm_failure) + return PCI_ERS_RESULT_DISCONNECT; + + pci_disable_device(pdev); + return PCI_ERS_RESULT_NEED_RESET; } static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) { - struct mlx4_dev *dev = pci_get_drvdata(pdev); + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + struct mlx4_dev *dev = persist->dev; struct mlx4_priv *priv = mlx4_priv(dev); int ret; + int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0}; + int total_vfs; - ret = __mlx4_init_one(pdev, priv->pci_dev_data, priv); + mlx4_err(dev, "mlx4_pci_slot_reset was called\n"); + ret = pci_enable_device(pdev); + if (ret) { + mlx4_err(dev, "Can not re-enable device, ret=%d\n", ret); + return PCI_ERS_RESULT_DISCONNECT; + } + + pci_set_master(pdev); + pci_restore_state(pdev); + pci_save_state(pdev); + + total_vfs = dev->persist->num_vfs; + memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs)); + + mutex_lock(&persist->interface_state_mutex); + if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) { + ret = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, nvfs, + priv); + if (ret) { + mlx4_err(dev, "%s: mlx4_load_one failed, ret=%d\n", + __func__, ret); + goto end; + } + + ret = restore_current_port_types(dev, dev->persist-> + curr_port_type, dev->persist-> + curr_port_poss_type); + if (ret) + mlx4_err(dev, "could not restore original port types (%d)\n", ret); + } +end: + mutex_unlock(&persist->interface_state_mutex); return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; } +static void mlx4_shutdown(struct pci_dev *pdev) +{ + struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); + + mlx4_info(persist->dev, "mlx4_shutdown was called\n"); + mutex_lock(&persist->interface_state_mutex); + if (persist->interface_state & MLX4_INTERFACE_STATE_UP) + mlx4_unload_one(pdev); + mutex_unlock(&persist->interface_state_mutex); +} + static const struct pci_error_handlers mlx4_err_handler = { .error_detected = mlx4_pci_err_detected, .slot_reset = mlx4_pci_slot_reset, @@ -3377,7 +3436,7 @@ static struct pci_driver mlx4_driver = { .name = DRV_NAME, .id_table = mlx4_pci_table, .probe = mlx4_init_one, - .shutdown = mlx4_unload_one, + .shutdown = mlx4_shutdown, .remove = mlx4_remove_one, .err_handler = &mlx4_err_handler, }; From 55ad359225b2232b9b8f04a0dfa169bd3a7d86d2 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 25 Jan 2015 16:59:42 +0200 Subject: [PATCH 8/9] net/mlx4_core: Enable device recovery flow with SRIOV In SRIOV, both the PF and the VF may attempt device recovery whenever they assume that the device is not functioning. When the PF driver resets the device, the VF should detect this and attempt to reinitialize itself. The VF must be able to reset itself under all circumstances, even if the PF is not responsive. The VF shall reset itself in the following cases: 1. Commands are not processed within reasonable time over the communication channel. This is done considering device state and the correct return code based on the command as was done in the native mode, done in the next patch. 2. The VF driver receives an internal error event reported by the PF on the communication channel. This occurs when the PF driver resets the device or when VF is out of sync with the PF. Add 'VF reset' capability, which allows the VF to reinitialize itself even when the PF is not responsive. As PF and VF may run their reset flow simulantanisly, there are several cases that are handled: - Prevent freeing VF resources upon FLR, when PF is in its unloading stage. - Prevent PF getting VF commands before it has finished initializing its resources. - Upon VF startup, check that comm-channel is online before sending commands to the PF and getting timed-out. Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/catas.c | 120 +++++++++++++++--- drivers/net/ethernet/mellanox/mlx4/cmd.c | 77 +++++++++--- drivers/net/ethernet/mellanox/mlx4/eq.c | 10 +- drivers/net/ethernet/mellanox/mlx4/intf.c | 6 +- drivers/net/ethernet/mellanox/mlx4/main.c | 135 ++++++++++++++++----- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 7 +- include/linux/mlx4/cmd.h | 2 + include/linux/mlx4/device.h | 5 + 8 files changed, 292 insertions(+), 70 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c index 3fcf3cfaedfc..715de8affcc9 100644 --- a/drivers/net/ethernet/mellanox/mlx4/catas.c +++ b/drivers/net/ethernet/mellanox/mlx4/catas.c @@ -45,8 +45,7 @@ enum { int mlx4_internal_err_reset = 1; module_param_named(internal_err_reset, mlx4_internal_err_reset, int, 0644); MODULE_PARM_DESC(internal_err_reset, - "Reset device on internal errors if non-zero" - " (default 1, in SRIOV mode default is 0)"); + "Reset device on internal errors if non-zero (default 1)"); static int read_vendor_id(struct mlx4_dev *dev) { @@ -71,6 +70,9 @@ static int mlx4_reset_master(struct mlx4_dev *dev) { int err = 0; + if (mlx4_is_master(dev)) + mlx4_report_internal_err_comm_event(dev); + if (!pci_channel_offline(dev->persist->pdev)) { err = read_vendor_id(dev); /* If PCI can't be accessed to read vendor ID we assume that its @@ -87,6 +89,81 @@ static int mlx4_reset_master(struct mlx4_dev *dev) return err; } +static int mlx4_reset_slave(struct mlx4_dev *dev) +{ +#define COM_CHAN_RST_REQ_OFFSET 0x10 +#define COM_CHAN_RST_ACK_OFFSET 0x08 + + u32 comm_flags; + u32 rst_req; + u32 rst_ack; + unsigned long end; + struct mlx4_priv *priv = mlx4_priv(dev); + + if (pci_channel_offline(dev->persist->pdev)) + return 0; + + comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm + + MLX4_COMM_CHAN_FLAGS)); + if (comm_flags == 0xffffffff) { + mlx4_err(dev, "VF reset is not needed\n"); + return 0; + } + + if (!(dev->caps.vf_caps & MLX4_VF_CAP_FLAG_RESET)) { + mlx4_err(dev, "VF reset is not supported\n"); + return -EOPNOTSUPP; + } + + rst_req = (comm_flags & (u32)(1 << COM_CHAN_RST_REQ_OFFSET)) >> + COM_CHAN_RST_REQ_OFFSET; + rst_ack = (comm_flags & (u32)(1 << COM_CHAN_RST_ACK_OFFSET)) >> + COM_CHAN_RST_ACK_OFFSET; + if (rst_req != rst_ack) { + mlx4_err(dev, "Communication channel isn't sync, fail to send reset\n"); + return -EIO; + } + + rst_req ^= 1; + mlx4_warn(dev, "VF is sending reset request to Firmware\n"); + comm_flags = rst_req << COM_CHAN_RST_REQ_OFFSET; + __raw_writel((__force u32)cpu_to_be32(comm_flags), + (__iomem char *)priv->mfunc.comm + MLX4_COMM_CHAN_FLAGS); + /* Make sure that our comm channel write doesn't + * get mixed in with writes from another CPU. + */ + mmiowb(); + + end = msecs_to_jiffies(MLX4_COMM_TIME) + jiffies; + while (time_before(jiffies, end)) { + comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm + + MLX4_COMM_CHAN_FLAGS)); + rst_ack = (comm_flags & (u32)(1 << COM_CHAN_RST_ACK_OFFSET)) >> + COM_CHAN_RST_ACK_OFFSET; + + /* Reading rst_req again since the communication channel can + * be reset at any time by the PF and all its bits will be + * set to zero. + */ + rst_req = (comm_flags & (u32)(1 << COM_CHAN_RST_REQ_OFFSET)) >> + COM_CHAN_RST_REQ_OFFSET; + + if (rst_ack == rst_req) { + mlx4_warn(dev, "VF Reset succeed\n"); + return 0; + } + cond_resched(); + } + mlx4_err(dev, "Fail to send reset over the communication channel\n"); + return -ETIMEDOUT; +} + +static int mlx4_comm_internal_err(u32 slave_read) +{ + return (u32)COMM_CHAN_EVENT_INTERNAL_ERR == + (slave_read & (u32)COMM_CHAN_EVENT_INTERNAL_ERR) ? 1 : 0; +} + void mlx4_enter_error_state(struct mlx4_dev_persistent *persist) { int err; @@ -101,7 +178,10 @@ void mlx4_enter_error_state(struct mlx4_dev_persistent *persist) dev = persist->dev; mlx4_err(dev, "device is going to be reset\n"); - err = mlx4_reset_master(dev); + if (mlx4_is_slave(dev)) + err = mlx4_reset_slave(dev); + else + err = mlx4_reset_master(dev); BUG_ON(err != 0); dev->persist->state |= MLX4_DEVICE_STATE_INTERNAL_ERROR; @@ -148,8 +228,15 @@ static void poll_catas(unsigned long dev_ptr) { struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr; struct mlx4_priv *priv = mlx4_priv(dev); - - if (readl(priv->catas_err.map)) { + u32 slave_read; + + if (mlx4_is_slave(dev)) { + slave_read = swab32(readl(&priv->mfunc.comm->slave_read)); + if (mlx4_comm_internal_err(slave_read)) { + mlx4_warn(dev, "Internal error detected on the communication channel\n"); + goto internal_err; + } + } else if (readl(priv->catas_err.map)) { dump_err_buf(dev); goto internal_err; } @@ -182,22 +269,21 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); phys_addr_t addr; - /*If we are in SRIOV the default of the module param must be 0*/ - if (mlx4_is_mfunc(dev)) - mlx4_internal_err_reset = 0; - INIT_LIST_HEAD(&priv->catas_err.list); init_timer(&priv->catas_err.timer); priv->catas_err.map = NULL; - addr = pci_resource_start(dev->persist->pdev, priv->fw.catas_bar) + - priv->fw.catas_offset; - - priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4); - if (!priv->catas_err.map) { - mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n", - (unsigned long long) addr); - return; + if (!mlx4_is_slave(dev)) { + addr = pci_resource_start(dev->persist->pdev, + priv->fw.catas_bar) + + priv->fw.catas_offset; + + priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4); + if (!priv->catas_err.map) { + mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n", + (unsigned long long)addr); + return; + } } priv->catas_err.timer.data = (unsigned long) dev; diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 3895b2b5fc92..7652eed4bbc8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -42,6 +42,7 @@ #include #include #include +#include #include @@ -729,7 +730,7 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, EXPORT_SYMBOL_GPL(__mlx4_cmd); -static int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev) +int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev) { return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_ARM_COMM_CHANNEL, MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE); @@ -1945,8 +1946,11 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, break; case MLX4_COMM_CMD_VHCR_POST: if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) && - (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST)) + (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST)) { + mlx4_warn(dev, "slave:%d is out of sync, cmd=0x%x, last command=0x%x, reset is needed\n", + slave, cmd, slave_state[slave].last_cmd); goto reset_slave; + } mutex_lock(&priv->cmd.slave_cmd_mutex); if (mlx4_master_process_vhcr(dev, slave, NULL)) { @@ -1980,7 +1984,18 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, reset_slave: /* cleanup any slave resources */ - mlx4_delete_all_resources_for_slave(dev, slave); + if (dev->persist->interface_state & MLX4_INTERFACE_STATE_UP) + mlx4_delete_all_resources_for_slave(dev, slave); + + if (cmd != MLX4_COMM_CMD_RESET) { + mlx4_warn(dev, "Turn on internal error to force reset, slave=%d, cmd=0x%x\n", + slave, cmd); + /* Turn on internal error letting slave reset itself immeditaly, + * otherwise it might take till timeout on command is passed + */ + reply |= ((u32)COMM_CHAN_EVENT_INTERNAL_ERR); + } + spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); if (!slave_state[slave].is_slave_going_down) slave_state[slave].last_cmd = MLX4_COMM_CMD_RESET; @@ -2056,17 +2071,28 @@ void mlx4_master_comm_channel(struct work_struct *work) static int sync_toggles(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); - int wr_toggle; - int rd_toggle; + u32 wr_toggle; + u32 rd_toggle; unsigned long end; - wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write)) >> 31; - end = jiffies + msecs_to_jiffies(5000); + wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write)); + if (wr_toggle == 0xffffffff) + end = jiffies + msecs_to_jiffies(30000); + else + end = jiffies + msecs_to_jiffies(5000); while (time_before(jiffies, end)) { - rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)) >> 31; - if (rd_toggle == wr_toggle) { - priv->cmd.comm_toggle = rd_toggle; + rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)); + if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) { + /* PCI might be offline */ + msleep(100); + wr_toggle = swab32(readl(&priv->mfunc.comm-> + slave_write)); + continue; + } + + if (rd_toggle >> 31 == wr_toggle >> 31) { + priv->cmd.comm_toggle = rd_toggle >> 31; return 0; } @@ -2172,13 +2198,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) if (mlx4_init_resource_tracker(dev)) goto err_thread; - err = mlx4_ARM_COMM_CHANNEL(dev); - if (err) { - mlx4_err(dev, " Failed to arm comm channel eq: %x\n", - err); - goto err_resource; - } - } else { err = sync_toggles(dev); if (err) { @@ -2188,8 +2207,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) } return 0; -err_resource: - mlx4_free_resource_tracker(dev, RES_TR_FREE_ALL); err_thread: flush_workqueue(priv->mfunc.master.comm_wq); destroy_workqueue(priv->mfunc.master.comm_wq); @@ -2266,6 +2283,27 @@ int mlx4_cmd_init(struct mlx4_dev *dev) return -ENOMEM; } +void mlx4_report_internal_err_comm_event(struct mlx4_dev *dev) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + int slave; + u32 slave_read; + + /* Report an internal error event to all + * communication channels. + */ + for (slave = 0; slave < dev->num_slaves; slave++) { + slave_read = swab32(readl(&priv->mfunc.comm[slave].slave_read)); + slave_read |= (u32)COMM_CHAN_EVENT_INTERNAL_ERR; + __raw_writel((__force u32)cpu_to_be32(slave_read), + &priv->mfunc.comm[slave].slave_read); + /* Make sure that our comm channel write doesn't + * get mixed in with writes from another CPU. + */ + mmiowb(); + } +} + void mlx4_multi_func_cleanup(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -2281,6 +2319,7 @@ void mlx4_multi_func_cleanup(struct mlx4_dev *dev) kfree(priv->mfunc.master.slave_state); kfree(priv->mfunc.master.vf_admin); kfree(priv->mfunc.master.vf_oper); + dev->num_slaves = 0; } iounmap(priv->mfunc.comm); diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 7538c9ce98a9..2f2e6067426d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -429,8 +429,14 @@ void mlx4_master_handle_slave_flr(struct work_struct *work) if (MLX4_COMM_CMD_FLR == slave_state[i].last_cmd) { mlx4_dbg(dev, "mlx4_handle_slave_flr: clean slave: %d\n", i); - - mlx4_delete_all_resources_for_slave(dev, i); + /* In case of 'Reset flow' FLR can be generated for + * a slave before mlx4_load_one is done. + * make sure interface is up before trying to delete + * slave resources which weren't allocated yet. + */ + if (dev->persist->interface_state & + MLX4_INTERFACE_STATE_UP) + mlx4_delete_all_resources_for_slave(dev, i); /*return the slave to running mode*/ spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags); slave_state[i].last_cmd = MLX4_COMM_CMD_RESET; diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c index fba0b96a6f28..68d2bad325d5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/intf.c +++ b/drivers/net/ethernet/mellanox/mlx4/intf.c @@ -144,8 +144,7 @@ int mlx4_register_device(struct mlx4_dev *dev) mlx4_add_device(intf, priv); mutex_unlock(&intf_mutex); - if (!mlx4_is_slave(dev)) - mlx4_start_catas_poll(dev); + mlx4_start_catas_poll(dev); return 0; } @@ -155,8 +154,7 @@ void mlx4_unregister_device(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_interface *intf; - if (!mlx4_is_slave(dev)) - mlx4_stop_catas_poll(dev); + mlx4_stop_catas_poll(dev); mutex_lock(&intf_mutex); list_for_each_entry(intf, &intf_list, list) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 6bb0fca137cd..1baf1f1e2866 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -108,6 +108,8 @@ MODULE_PARM_DESC(enable_64b_cqe_eqe, MLX4_FUNC_CAP_EQE_CQE_STRIDE | \ MLX4_FUNC_CAP_DMFS_A0_STATIC) +#define RESET_PERSIST_MASK_FLAGS (MLX4_FLAG_SRIOV) + static char mlx4_version[] = DRV_NAME ": Mellanox ConnectX core driver v" DRV_VERSION " (" DRV_RELDATE ")\n"; @@ -1579,6 +1581,50 @@ static void mlx4_close_fw(struct mlx4_dev *dev) } } +static int mlx4_comm_check_offline(struct mlx4_dev *dev) +{ +#define COMM_CHAN_OFFLINE_OFFSET 0x09 + + u32 comm_flags; + u32 offline_bit; + unsigned long end; + struct mlx4_priv *priv = mlx4_priv(dev); + + end = msecs_to_jiffies(MLX4_COMM_OFFLINE_TIME_OUT) + jiffies; + while (time_before(jiffies, end)) { + comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm + + MLX4_COMM_CHAN_FLAGS)); + offline_bit = (comm_flags & + (u32)(1 << COMM_CHAN_OFFLINE_OFFSET)); + if (!offline_bit) + return 0; + /* There are cases as part of AER/Reset flow that PF needs + * around 100 msec to load. We therefore sleep for 100 msec + * to allow other tasks to make use of that CPU during this + * time interval. + */ + msleep(100); + } + mlx4_err(dev, "Communication channel is offline.\n"); + return -EIO; +} + +static void mlx4_reset_vf_support(struct mlx4_dev *dev) +{ +#define COMM_CHAN_RST_OFFSET 0x1e + + struct mlx4_priv *priv = mlx4_priv(dev); + u32 comm_rst; + u32 comm_caps; + + comm_caps = swab32(readl((__iomem char *)priv->mfunc.comm + + MLX4_COMM_CHAN_CAPS)); + comm_rst = (comm_caps & (u32)(1 << COMM_CHAN_RST_OFFSET)); + + if (comm_rst) + dev->caps.vf_caps |= MLX4_VF_CAP_FLAG_RESET; +} + static int mlx4_init_slave(struct mlx4_dev *dev) { struct mlx4_priv *priv = mlx4_priv(dev); @@ -1594,6 +1640,12 @@ static int mlx4_init_slave(struct mlx4_dev *dev) mutex_lock(&priv->cmd.slave_cmd_mutex); priv->cmd.max_cmds = 1; + if (mlx4_comm_check_offline(dev)) { + mlx4_err(dev, "PF is not responsive, skipping initialization\n"); + goto err_offline; + } + + mlx4_reset_vf_support(dev); mlx4_warn(dev, "Sending reset\n"); ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME); @@ -1637,6 +1689,7 @@ static int mlx4_init_slave(struct mlx4_dev *dev) err: mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0); +err_offline: mutex_unlock(&priv->cmd.slave_cmd_mutex); return -EIO; } @@ -2494,11 +2547,19 @@ static void mlx4_free_ownership(struct mlx4_dev *dev) !!((flags) & MLX4_FLAG_MASTER)) static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev, - u8 total_vfs, int existing_vfs) + u8 total_vfs, int existing_vfs, int reset_flow) { u64 dev_flags = dev->flags; int err = 0; + if (reset_flow) { + dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs), + GFP_KERNEL); + if (!dev->dev_vfs) + goto free_mem; + return dev_flags; + } + atomic_inc(&pf_loading); if (dev->flags & MLX4_FLAG_SRIOV) { if (existing_vfs != total_vfs) { @@ -2533,6 +2594,7 @@ static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev, disable_sriov: atomic_dec(&pf_loading); +free_mem: dev->persist->num_vfs = 0; kfree(dev->dev_vfs); return dev_flags & ~MLX4_FLAG_MASTER; @@ -2557,7 +2619,8 @@ static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap } static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, - int total_vfs, int *nvfs, struct mlx4_priv *priv) + int total_vfs, int *nvfs, struct mlx4_priv *priv, + int reset_flow) { struct mlx4_dev *dev; unsigned sum = 0; @@ -2679,8 +2742,10 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, goto err_fw; if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) { - u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, - existing_vfs); + u64 dev_flags = mlx4_enable_sriov(dev, pdev, + total_vfs, + existing_vfs, + reset_flow); mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); dev->flags = dev_flags; @@ -2722,7 +2787,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, if (dev->flags & MLX4_FLAG_SRIOV) { if (!existing_vfs) pci_disable_sriov(pdev); - if (mlx4_is_master(dev)) + if (mlx4_is_master(dev) && !reset_flow) atomic_dec(&pf_loading); dev->flags &= ~MLX4_FLAG_SRIOV; } @@ -2736,7 +2801,8 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, } if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) { - u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, existing_vfs); + u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, + existing_vfs, reset_flow); if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) { mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR); @@ -2848,6 +2914,17 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, goto err_steer; mlx4_init_quotas(dev); + /* When PF resources are ready arm its comm channel to enable + * getting commands + */ + if (mlx4_is_master(dev)) { + err = mlx4_ARM_COMM_CHANNEL(dev); + if (err) { + mlx4_err(dev, " Failed to arm comm channel eq: %x\n", + err); + goto err_steer; + } + } for (port = 1; port <= dev->caps.num_ports; port++) { err = mlx4_init_port_info(dev, port); @@ -2866,7 +2943,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, priv->removed = 0; - if (mlx4_is_master(dev) && dev->persist->num_vfs) + if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow) atomic_dec(&pf_loading); kfree(dev_cap); @@ -2925,10 +3002,12 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL); err_sriov: - if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) + if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) { pci_disable_sriov(pdev); + dev->flags &= ~MLX4_FLAG_SRIOV; + } - if (mlx4_is_master(dev) && dev->persist->num_vfs) + if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow) atomic_dec(&pf_loading); kfree(priv->dev.dev_vfs); @@ -3073,7 +3152,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data, if (err) goto err_release_regions; - err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv); + err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 0); if (err) goto err_catas; @@ -3131,9 +3210,11 @@ static void mlx4_clean_dev(struct mlx4_dev *dev) { struct mlx4_dev_persistent *persist = dev->persist; struct mlx4_priv *priv = mlx4_priv(dev); + unsigned long flags = (dev->flags & RESET_PERSIST_MASK_FLAGS); memset(priv, 0, sizeof(*priv)); priv->dev.persist = persist; + priv->dev.flags = flags; } static void mlx4_unload_one(struct pci_dev *pdev) @@ -3143,7 +3224,6 @@ static void mlx4_unload_one(struct pci_dev *pdev) struct mlx4_priv *priv = mlx4_priv(dev); int pci_dev_data; int p, i; - int active_vfs = 0; if (priv->removed) return; @@ -3157,14 +3237,6 @@ static void mlx4_unload_one(struct pci_dev *pdev) pci_dev_data = priv->pci_dev_data; - /* Disabling SR-IOV is not allowed while there are active vf's */ - if (mlx4_is_master(dev)) { - active_vfs = mlx4_how_many_lives_vf(dev); - if (active_vfs) { - pr_warn("Removing PF when there are active VF's !!\n"); - pr_warn("Will not disable SR-IOV.\n"); - } - } mlx4_stop_sense(dev); mlx4_unregister_device(dev); @@ -3208,12 +3280,6 @@ static void mlx4_unload_one(struct pci_dev *pdev) if (dev->flags & MLX4_FLAG_MSI_X) pci_disable_msix(pdev); - if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) { - mlx4_warn(dev, "Disabling SR-IOV\n"); - pci_disable_sriov(pdev); - dev->flags &= ~MLX4_FLAG_SRIOV; - dev->persist->num_vfs = 0; - } if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); @@ -3235,11 +3301,21 @@ static void mlx4_remove_one(struct pci_dev *pdev) struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev); struct mlx4_dev *dev = persist->dev; struct mlx4_priv *priv = mlx4_priv(dev); + int active_vfs = 0; mutex_lock(&persist->interface_state_mutex); persist->interface_state |= MLX4_INTERFACE_STATE_DELETION; mutex_unlock(&persist->interface_state_mutex); + /* Disabling SR-IOV is not allowed while there are active vf's */ + if (mlx4_is_master(dev) && dev->flags & MLX4_FLAG_SRIOV) { + active_vfs = mlx4_how_many_lives_vf(dev); + if (active_vfs) { + pr_warn("Removing PF when there are active VF's !!\n"); + pr_warn("Will not disable SR-IOV.\n"); + } + } + /* device marked to be under deletion running now without the lock * letting other tasks to be terminated */ @@ -3248,6 +3324,11 @@ static void mlx4_remove_one(struct pci_dev *pdev) else mlx4_info(dev, "%s: interface is down\n", __func__); mlx4_catas_end(dev); + if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) { + mlx4_warn(dev, "Disabling SR-IOV\n"); + pci_disable_sriov(pdev); + } + pci_release_regions(pdev); pci_disable_device(pdev); kfree(dev->persist); @@ -3287,7 +3368,7 @@ int mlx4_restart_one(struct pci_dev *pdev) memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs)); mlx4_unload_one(pdev); - err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv); + err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 1); if (err) { mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n", __func__, pci_name(pdev), err); @@ -3397,7 +3478,7 @@ static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev) mutex_lock(&persist->interface_state_mutex); if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) { ret = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, nvfs, - priv); + priv, 1); if (ret) { mlx4_err(dev, "%s: mlx4_load_one failed, ret=%d\n", __func__, ret); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 5c772ea4473b..2a15b8248e77 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -85,7 +85,9 @@ enum { MLX4_CLR_INT_SIZE = 0x00008, MLX4_SLAVE_COMM_BASE = 0x0, MLX4_COMM_PAGESIZE = 0x1000, - MLX4_CLOCK_SIZE = 0x00008 + MLX4_CLOCK_SIZE = 0x00008, + MLX4_COMM_CHAN_CAPS = 0x8, + MLX4_COMM_CHAN_FLAGS = 0xc }; enum { @@ -120,6 +122,8 @@ enum mlx4_mpt_state { }; #define MLX4_COMM_TIME 10000 +#define MLX4_COMM_OFFLINE_TIME_OUT 30000 + enum { MLX4_COMM_CMD_RESET, MLX4_COMM_CMD_VHCR0, @@ -1162,6 +1166,7 @@ enum { int mlx4_cmd_init(struct mlx4_dev *dev); void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask); int mlx4_multi_func_init(struct mlx4_dev *dev); +int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev); void mlx4_multi_func_cleanup(struct mlx4_dev *dev); void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param); int mlx4_cmd_use_events(struct mlx4_dev *dev); diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index e7543844cc7a..c989442ffc6a 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -280,6 +280,7 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_stat int mlx4_config_dev_retrieval(struct mlx4_dev *dev, struct mlx4_config_dev_params *params); void mlx4_cmd_wake_completions(struct mlx4_dev *dev); +void mlx4_report_internal_err_comm_event(struct mlx4_dev *dev); /* * mlx4_get_slave_default_vlan - * return true if VST ( default vlan) @@ -289,5 +290,6 @@ bool mlx4_get_slave_default_vlan(struct mlx4_dev *dev, int port, int slave, u16 *vlan, u8 *qos); #define MLX4_COMM_GET_IF_REV(cmd_chan_ver) (u8)((cmd_chan_ver) >> 8) +#define COMM_CHAN_EVENT_INTERNAL_ERR (1 << 17) #endif /* MLX4_CMD_H */ diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 33f9ca71925c..5ef54e145e4d 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -208,6 +208,10 @@ enum { MLX4_QUERY_FUNC_FLAGS_A0_RES_QP = 1LL << 1 }; +enum { + MLX4_VF_CAP_FLAG_RESET = 1 << 0 +}; + /* bit enums for an 8-bit flags field indicating special use * QPs which require special handling in qp_reserve_range. * Currently, this only includes QPs used by the ETH interface, @@ -545,6 +549,7 @@ struct mlx4_caps { u8 alloc_res_qp_mask; u32 dmfs_high_rate_qpn_base; u32 dmfs_high_rate_qpn_range; + u32 vf_caps; }; struct mlx4_buf_list { From 0cd9302734111abc0b5912b695336f2ee63cb22b Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 25 Jan 2015 16:59:43 +0200 Subject: [PATCH 9/9] net/mlx4_core: Reset flow activation upon SRIOV fatal command cases When SRIOV commands are executed over the comm-channel and get a fatal error (e.g. timeout, closing command failure) the VF enters into error state and reset flow is activated. To be able to recognize whether the failure was on a closing command, the operational code for the given VHCR command is used. Once the device entered into an error state we prevent redundant error messages from being printed. Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 125 ++++++++++++++++------ drivers/net/ethernet/mellanox/mlx4/main.c | 16 +-- drivers/net/ethernet/mellanox/mlx4/mcg.c | 3 + drivers/net/ethernet/mellanox/mlx4/mlx4.h | 4 +- 4 files changed, 107 insertions(+), 41 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 7652eed4bbc8..2b48932855e7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -257,16 +257,30 @@ static int comm_pending(struct mlx4_dev *dev) return (swab32(status) >> 31) != priv->cmd.comm_toggle; } -static void mlx4_comm_cmd_post(struct mlx4_dev *dev, u8 cmd, u16 param) +static int mlx4_comm_cmd_post(struct mlx4_dev *dev, u8 cmd, u16 param) { struct mlx4_priv *priv = mlx4_priv(dev); u32 val; + /* To avoid writing to unknown addresses after the device state was + * changed to internal error and the function was rest, + * check the INTERNAL_ERROR flag which is updated under + * device_state_mutex lock. + */ + mutex_lock(&dev->persist->device_state_mutex); + + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) { + mutex_unlock(&dev->persist->device_state_mutex); + return -EIO; + } + priv->cmd.comm_toggle ^= 1; val = param | (cmd << 16) | (priv->cmd.comm_toggle << 31); __raw_writel((__force u32) cpu_to_be32(val), &priv->mfunc.comm->slave_write); mmiowb(); + mutex_unlock(&dev->persist->device_state_mutex); + return 0; } static int mlx4_comm_cmd_poll(struct mlx4_dev *dev, u8 cmd, u16 param, @@ -286,7 +300,13 @@ static int mlx4_comm_cmd_poll(struct mlx4_dev *dev, u8 cmd, u16 param, /* Write command */ down(&priv->cmd.poll_sem); - mlx4_comm_cmd_post(dev, cmd, param); + if (mlx4_comm_cmd_post(dev, cmd, param)) { + /* Only in case the device state is INTERNAL_ERROR, + * mlx4_comm_cmd_post returns with an error + */ + err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + goto out; + } end = msecs_to_jiffies(timeout) + jiffies; while (comm_pending(dev) && time_before(jiffies, end)) @@ -298,18 +318,23 @@ static int mlx4_comm_cmd_poll(struct mlx4_dev *dev, u8 cmd, u16 param, * is MLX4_DELAY_RESET_SLAVE*/ if ((MLX4_COMM_CMD_RESET == cmd)) { err = MLX4_DELAY_RESET_SLAVE; + goto out; } else { - mlx4_warn(dev, "Communication channel timed out\n"); - err = -ETIMEDOUT; + mlx4_warn(dev, "Communication channel command 0x%x timed out\n", + cmd); + err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); } } + if (err) + mlx4_enter_error_state(dev->persist); +out: up(&priv->cmd.poll_sem); return err; } -static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 op, - u16 param, unsigned long timeout) +static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 vhcr_cmd, + u16 param, u16 op, unsigned long timeout) { struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd; struct mlx4_cmd_context *context; @@ -327,32 +352,47 @@ static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 op, reinit_completion(&context->done); - mlx4_comm_cmd_post(dev, op, param); + if (mlx4_comm_cmd_post(dev, vhcr_cmd, param)) { + /* Only in case the device state is INTERNAL_ERROR, + * mlx4_comm_cmd_post returns with an error + */ + err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + goto out; + } if (!wait_for_completion_timeout(&context->done, msecs_to_jiffies(timeout))) { - mlx4_warn(dev, "communication channel command 0x%x timed out\n", - op); - err = -EBUSY; - goto out; + mlx4_warn(dev, "communication channel command 0x%x (op=0x%x) timed out\n", + vhcr_cmd, op); + goto out_reset; } err = context->result; if (err && context->fw_status != CMD_STAT_MULTI_FUNC_REQ) { mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n", - op, context->fw_status); - goto out; + vhcr_cmd, context->fw_status); + if (mlx4_closing_cmd_fatal_error(op, context->fw_status)) + goto out_reset; } -out: /* wait for comm channel ready * this is necessary for prevention the race * when switching between event to polling mode + * Skipping this section in case the device is in FATAL_ERROR state, + * In this state, no commands are sent via the comm channel until + * the device has returned from reset. */ - end = msecs_to_jiffies(timeout) + jiffies; - while (comm_pending(dev) && time_before(jiffies, end)) - cond_resched(); + if (!(dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) { + end = msecs_to_jiffies(timeout) + jiffies; + while (comm_pending(dev) && time_before(jiffies, end)) + cond_resched(); + } + goto out; +out_reset: + err = mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + mlx4_enter_error_state(dev->persist); +out: spin_lock(&cmd->context_lock); context->next = cmd->free_head; cmd->free_head = context - cmd->context; @@ -363,10 +403,13 @@ static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 op, } int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param, - unsigned long timeout) + u16 op, unsigned long timeout) { + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR); + if (mlx4_priv(dev)->cmd.use_events) - return mlx4_comm_cmd_wait(dev, cmd, param, timeout); + return mlx4_comm_cmd_wait(dev, cmd, param, op, timeout); return mlx4_comm_cmd_poll(dev, cmd, param, timeout); } @@ -502,8 +545,11 @@ static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, } ret = mlx4_status_to_errno(vhcr->status); } + if (ret && + dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + ret = mlx4_internal_err_ret_value(dev, op, op_modifier); } else { - ret = mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_POST, 0, + ret = mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_POST, 0, op, MLX4_COMM_TIME + timeout); if (!ret) { if (out_is_imm) { @@ -517,9 +563,14 @@ static int mlx4_slave_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, } } ret = mlx4_status_to_errno(vhcr->status); - } else - mlx4_err(dev, "failed execution of VHCR_POST command opcode 0x%x\n", - op); + } else { + if (dev->persist->state & + MLX4_DEVICE_STATE_INTERNAL_ERROR) + ret = mlx4_internal_err_ret_value(dev, op, + op_modifier); + else + mlx4_err(dev, "failed execution of VHCR_POST command opcode 0x%x\n", op); + } } mutex_unlock(&priv->cmd.slave_cmd_mutex); @@ -1559,8 +1610,10 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, ALIGN(sizeof(struct mlx4_vhcr_cmd), MLX4_ACCESS_MEM_ALIGN), 1); if (ret) { - mlx4_err(dev, "%s: Failed reading vhcr ret: 0x%x\n", - __func__, ret); + if (!(dev->persist->state & + MLX4_DEVICE_STATE_INTERNAL_ERROR)) + mlx4_err(dev, "%s: Failed reading vhcr ret: 0x%x\n", + __func__, ret); kfree(vhcr); return ret; } @@ -1599,11 +1652,14 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, goto out_status; } - if (mlx4_ACCESS_MEM(dev, inbox->dma, slave, - vhcr->in_param, - MLX4_MAILBOX_SIZE, 1)) { - mlx4_err(dev, "%s: Failed reading inbox (cmd:0x%x)\n", - __func__, cmd->opcode); + ret = mlx4_ACCESS_MEM(dev, inbox->dma, slave, + vhcr->in_param, + MLX4_MAILBOX_SIZE, 1); + if (ret) { + if (!(dev->persist->state & + MLX4_DEVICE_STATE_INTERNAL_ERROR)) + mlx4_err(dev, "%s: Failed reading inbox (cmd:0x%x)\n", + __func__, cmd->opcode); vhcr_cmd->status = CMD_STAT_INTERNAL_ERR; goto out_status; } @@ -1651,8 +1707,9 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, } if (err) { - mlx4_warn(dev, "vhcr command:0x%x slave:%d failed with error:%d, status %d\n", - vhcr->op, slave, vhcr->errno, err); + if (!(dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) + mlx4_warn(dev, "vhcr command:0x%x slave:%d failed with error:%d, status %d\n", + vhcr->op, slave, vhcr->errno, err); vhcr_cmd->status = mlx4_errno_to_status(err); goto out_status; } @@ -1667,7 +1724,9 @@ static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave, /* If we failed to write back the outbox after the *command was successfully executed, we must fail this * slave, as it is now in undefined state */ - mlx4_err(dev, "%s:Failed writing outbox\n", __func__); + if (!(dev->persist->state & + MLX4_DEVICE_STATE_INTERNAL_ERROR)) + mlx4_err(dev, "%s:Failed writing outbox\n", __func__); goto out; } } diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 1baf1f1e2866..9c7ef0bffb52 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1484,7 +1484,8 @@ static void mlx4_slave_exit(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); mutex_lock(&priv->cmd.slave_cmd_mutex); - if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME)) + if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP, + MLX4_COMM_TIME)) mlx4_warn(dev, "Failed to close slave function\n"); mutex_unlock(&priv->cmd.slave_cmd_mutex); } @@ -1648,7 +1649,7 @@ static int mlx4_init_slave(struct mlx4_dev *dev) mlx4_reset_vf_support(dev); mlx4_warn(dev, "Sending reset\n"); ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, - MLX4_COMM_TIME); + MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME); /* if we are in the middle of flr the slave will try * NUM_OF_RESET_RETRIES times before leaving.*/ if (ret_from_reset) { @@ -1673,22 +1674,23 @@ static int mlx4_init_slave(struct mlx4_dev *dev) mlx4_warn(dev, "Sending vhcr0\n"); if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48, - MLX4_COMM_TIME)) + MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME)) goto err; if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32, - MLX4_COMM_TIME)) + MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME)) goto err; if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16, - MLX4_COMM_TIME)) + MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME)) goto err; - if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME)) + if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, + MLX4_COMM_CMD_NA_OP, MLX4_COMM_TIME)) goto err; mutex_unlock(&priv->cmd.slave_cmd_mutex); return 0; err: - mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0); + mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_CMD_NA_OP, 0); err_offline: mutex_unlock(&priv->cmd.slave_cmd_mutex); return -EIO; diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c index d22d9283d2cd..bd9ea0d01aae 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c @@ -1350,6 +1350,9 @@ static int mlx4_QP_ATTACH(struct mlx4_dev *dev, struct mlx4_qp *qp, MLX4_CMD_WRAPPED); mlx4_free_cmd_mailbox(dev, mailbox); + if (err && !attach && + dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) + err = 0; return err; } diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 2a15b8248e77..096a81c16a9b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -123,6 +123,8 @@ enum mlx4_mpt_state { #define MLX4_COMM_TIME 10000 #define MLX4_COMM_OFFLINE_TIME_OUT 30000 +#define MLX4_COMM_CMD_NA_OP 0x0 + enum { MLX4_COMM_CMD_RESET, @@ -1173,7 +1175,7 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev); void mlx4_cmd_use_polling(struct mlx4_dev *dev); int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param, - unsigned long timeout); + u16 op, unsigned long timeout); void mlx4_cq_tasklet_cb(unsigned long data); void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn);