Skip to content

Commit

Permalink
Merge branch 'mlx4-next'
Browse files Browse the repository at this point in the history
Or Gerlitz says:

====================
mlx4: Fix and enhance the device reset flow

This series from Yishai Hadas fixes the device reset flow and adds SRIOV support.

Reset flows are required whenever a device experiences errors, is unresponsive,
or is not in a deterministic state. In such cases, the driver is expected to
reset the HW and continue operation. When SRIOV is enabled, these requirements
apply both to PF and VF devices.

Currently, the mlx4 reset flow doesn't work properly: when a fatal error is
detected on the FW internal buffer the chip is not reset and stays in its
bad state. There are cases that assumed to be fatal such as non-responsive FW,
errors via closing commands but are not handled today.

The AER mechanism should also be fixed:
- It should use mlx4_load_one instead of __mlx4_init_one which is done
  upon HCA probing.
- It must be aligned with concurrent catas flow, mark device to be in
  an error state, reset chip, etc.
- Port types should be restored to their original values before error occurred.

In addition, there the SRIOV use-case isn't supported.

In above cases when the device state becomes fatal we must act as follows:
1) Reset the chip and mark the HW device state as in fatal error.
2) Wake up any pending commands, preventing new ones to come in.
3) Restart the software stack.

We also address the SRIOV mode as follows: In case the PF detects a fatal error,
it lets VFs know about that, then both itself and VFs are restarted asynchronously.
However, in case only the VF encountered a fatal case or forced to be reset, they
reset the VF stuff and then restart software.

changes from V0:

No need to call pci_disable_device upon permanent PCI error. This will
be done as part of mlx4_remove_one which is called later once we
return PCI_ERS_RESULT_DISCONNECT from the pci error handler.

Initial toggle value should use only the T bit and not the whole byte value.
Not doing so sometimes broke SRIOV as of junky value seen by the VF as a
non-ready comm channel
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Jan 25, 2015
2 parents 7aee42c + 0cd9302 commit bc579ae
Show file tree
Hide file tree
Showing 27 changed files with 1,046 additions and 347 deletions.
2 changes: 1 addition & 1 deletion drivers/infiniband/hw/mlx4/alias_GUID.c
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
continue;

slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
if (slave_id >= dev->dev->num_vfs + 1)
if (slave_id >= dev->dev->persist->num_vfs + 1)
return;
tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE];
form_cache_ag = get_cached_alias_guid(dev, port_num,
Expand Down
3 changes: 2 additions & 1 deletion drivers/infiniband/hw/mlx4/mad.c
Original file line number Diff line number Diff line change
Expand Up @@ -1951,7 +1951,8 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
ctx->ib_dev = &dev->ib_dev;

for (i = 0;
i < min(dev->dev->caps.sqp_demux, (u16)(dev->dev->num_vfs + 1));
i < min(dev->dev->caps.sqp_demux,
(u16)(dev->dev->persist->num_vfs + 1));
i++) {
struct mlx4_active_ports actv_ports =
mlx4_get_active_ports(dev->dev, i);
Expand Down
17 changes: 10 additions & 7 deletions drivers/infiniband/hw/mlx4/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,

props->vendor_id = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
0xffffff;
props->vendor_part_id = dev->dev->pdev->device;
props->vendor_part_id = dev->dev->persist->pdev->device;
props->hw_ver = be32_to_cpup((__be32 *) (out_mad->data + 32));
memcpy(&props->sys_image_guid, out_mad->data + 4, 8);

Expand Down Expand Up @@ -1375,7 +1375,7 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
{
struct mlx4_ib_dev *dev =
container_of(device, struct mlx4_ib_dev, ib_dev.dev);
return sprintf(buf, "MT%d\n", dev->dev->pdev->device);
return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
}

static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
Expand Down Expand Up @@ -1937,7 +1937,8 @@ static void init_pkeys(struct mlx4_ib_dev *ibdev)
int i;

if (mlx4_is_master(ibdev->dev)) {
for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) {
for (slave = 0; slave <= ibdev->dev->persist->num_vfs;
++slave) {
for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
for (i = 0;
i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
Expand Down Expand Up @@ -1994,7 +1995,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) {
for (j = 0; j < eq_per_port; j++) {
snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%s",
i, j, dev->pdev->bus->name);
i, j, dev->persist->pdev->bus->name);
/* Set IRQ for specific name (per ring) */
if (mlx4_assign_eq(dev, name, NULL,
&ibdev->eq_table[eq])) {
Expand Down Expand Up @@ -2058,7 +2059,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)

ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
if (!ibdev) {
dev_err(&dev->pdev->dev, "Device struct alloc failed\n");
dev_err(&dev->persist->pdev->dev,
"Device struct alloc failed\n");
return NULL;
}

Expand All @@ -2085,7 +2087,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
ibdev->num_ports = num_ports;
ibdev->ib_dev.phys_port_cnt = ibdev->num_ports;
ibdev->ib_dev.num_comp_vectors = dev->caps.num_comp_vectors;
ibdev->ib_dev.dma_device = &dev->pdev->dev;
ibdev->ib_dev.dma_device = &dev->persist->pdev->dev;

if (dev->caps.userspace_caps)
ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
Expand Down Expand Up @@ -2236,7 +2238,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
sizeof(long),
GFP_KERNEL);
if (!ibdev->ib_uc_qpns_bitmap) {
dev_err(&dev->pdev->dev, "bit map alloc failed\n");
dev_err(&dev->persist->pdev->dev,
"bit map alloc failed\n");
goto err_steer_qp_release;
}

Expand Down
6 changes: 4 additions & 2 deletions drivers/infiniband/hw/mlx4/mr.c
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,8 @@ struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device
if (!mfrpl->ibfrpl.page_list)
goto err_free;

mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->pdev->dev,
mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->persist->
pdev->dev,
size, &mfrpl->map,
GFP_KERNEL);
if (!mfrpl->mapped_page_list)
Expand All @@ -423,7 +424,8 @@ void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
int size = page_list->max_page_list_len * sizeof (u64);

dma_free_coherent(&dev->dev->pdev->dev, size, mfrpl->mapped_page_list,
dma_free_coherent(&dev->dev->persist->pdev->dev, size,
mfrpl->mapped_page_list,
mfrpl->map);
kfree(mfrpl->ibfrpl.page_list);
kfree(mfrpl);
Expand Down
6 changes: 3 additions & 3 deletions drivers/infiniband/hw/mlx4/sysfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max)
char base_name[9];

/* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */
strlcpy(name, pci_name(dev->dev->pdev), max);
strlcpy(name, pci_name(dev->dev->persist->pdev), max);
strncpy(base_name, name, 8); /*till xxxx:yy:*/
base_name[8] = '\0';
/* with no ARI only 3 last bits are used so when the fn is higher than 8
Expand Down Expand Up @@ -792,7 +792,7 @@ static int register_pkey_tree(struct mlx4_ib_dev *device)
if (!mlx4_is_master(device->dev))
return 0;

for (i = 0; i <= device->dev->num_vfs; ++i)
for (i = 0; i <= device->dev->persist->num_vfs; ++i)
register_one_pkey_tree(device, i);

return 0;
Expand All @@ -807,7 +807,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device)
if (!mlx4_is_master(device->dev))
return;

for (slave = device->dev->num_vfs; slave >= 0; --slave) {
for (slave = device->dev->persist->num_vfs; slave >= 0; --slave) {
list_for_each_entry_safe(p, t,
&device->pkeys.pkey_port_list[slave],
entry) {
Expand Down
15 changes: 9 additions & 6 deletions drivers/net/ethernet/mellanox/mlx4/alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -592,7 +592,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
buf->nbufs = 1;
buf->npages = 1;
buf->page_shift = get_order(size) + PAGE_SHIFT;
buf->direct.buf = dma_alloc_coherent(&dev->pdev->dev,
buf->direct.buf = dma_alloc_coherent(&dev->persist->pdev->dev,
size, &t, gfp);
if (!buf->direct.buf)
return -ENOMEM;
Expand All @@ -619,7 +619,8 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,

for (i = 0; i < buf->nbufs; ++i) {
buf->page_list[i].buf =
dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
dma_alloc_coherent(&dev->persist->pdev->dev,
PAGE_SIZE,
&t, gfp);
if (!buf->page_list[i].buf)
goto err_free;
Expand Down Expand Up @@ -657,15 +658,17 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
int i;

if (buf->nbufs == 1)
dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf,
dma_free_coherent(&dev->persist->pdev->dev, size,
buf->direct.buf,
buf->direct.map);
else {
if (BITS_PER_LONG == 64 && buf->direct.buf)
vunmap(buf->direct.buf);

for (i = 0; i < buf->nbufs; ++i)
if (buf->page_list[i].buf)
dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
dma_free_coherent(&dev->persist->pdev->dev,
PAGE_SIZE,
buf->page_list[i].buf,
buf->page_list[i].map);
kfree(buf->page_list);
Expand Down Expand Up @@ -738,7 +741,7 @@ int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, gfp_t gfp
if (!mlx4_alloc_db_from_pgdir(pgdir, db, order))
goto out;

pgdir = mlx4_alloc_db_pgdir(&(dev->pdev->dev), gfp);
pgdir = mlx4_alloc_db_pgdir(&dev->persist->pdev->dev, gfp);
if (!pgdir) {
ret = -ENOMEM;
goto out;
Expand Down Expand Up @@ -775,7 +778,7 @@ void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db)
set_bit(i, db->u.pgdir->bits[o]);

if (bitmap_full(db->u.pgdir->order1, MLX4_DB_PER_PAGE / 2)) {
dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE,
db->u.pgdir->db_page, db->u.pgdir->db_dma);
list_del(&db->u.pgdir->list);
kfree(db->u.pgdir);
Expand Down
Loading

0 comments on commit bc579ae

Please sign in to comment.