Skip to content

Commit

Permalink
Merge branch 'mlx4-HA-LAG-SRIOV-VF'
Browse files Browse the repository at this point in the history
Or Gerlitz says:

====================
Add HA and LAG support for mlx4 SRIOV VFs

This series is built upon the code added in commit ce388ff "Merge branch
'mlx4-next'" which added HA and LAG support to mlx4 RoCE and SRIOV services.

We add HA and Link Aggregation support to single ported mlx4 Ethernet VFs.

In this case, the PF Ethernet interfaces are bonded, the VFs see single
port HW devices (already supported) -- however, now this port is highly
available. This means that all VF HW QPs (both VF Ethernet driver and VF
RoCE / RAW QPs) are subject to the V2P (Virtual-To-Physical) mapping which
is managed by the PF driver, and hence resilient across link failures and
such events.

When bonding operates in Dynamic link aggregation (802.3ad) mode, traffic
from each VF will go over the VF "base port" (the port this VF is assigned
to by the admin) as long as this port is up. When the port fails, traffic
from all VFs that are defined on this port will move to the other port, and
be back to their base-port when it recovers.

Moni and Or.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Dec 7, 2015
2 parents 0d76d6e + e57968a commit 0fe3e20
Show file tree
Hide file tree
Showing 9 changed files with 915 additions and 50 deletions.
17 changes: 14 additions & 3 deletions drivers/infiniband/hw/mlx4/mad.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include <linux/gfp.h>
#include <rdma/ib_pma.h>

#include <linux/mlx4/driver.h>
#include "mlx4_ib.h"

enum {
Expand Down Expand Up @@ -606,8 +607,8 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
struct ib_mad *mad)
{
struct mlx4_ib_dev *dev = to_mdev(ibdev);
int err;
int slave;
int err, other_port;
int slave = -1;
u8 *slave_id;
int is_eth = 0;

Expand All @@ -625,7 +626,17 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n");
return -EINVAL;
}
if (mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave)) {
err = mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave);
if (err && mlx4_is_mf_bonded(dev->dev)) {
other_port = (port == 1) ? 2 : 1;
err = mlx4_get_slave_from_roce_gid(dev->dev, other_port, grh->dgid.raw, &slave);
if (!err) {
port = other_port;
pr_debug("resolved slave %d from gid %pI6 wire port %d other %d\n",
slave, grh->dgid.raw, port, other_port);
}
}
if (err) {
mlx4_ib_warn(ibdev, "failed matching grh\n");
return -ENOENT;
}
Expand Down
24 changes: 20 additions & 4 deletions drivers/net/ethernet/mellanox/mlx4/eq.c
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,17 @@ void mlx4_gen_slave_eqe(struct work_struct *work)
eqe = next_slave_event_eqe(slave_eq)) {
slave = eqe->slave_id;

if (eqe->type == MLX4_EVENT_TYPE_PORT_CHANGE &&
eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_DOWN &&
mlx4_is_bonded(dev)) {
struct mlx4_port_cap port_cap;

if (!mlx4_QUERY_PORT(dev, 1, &port_cap) && port_cap.link_state)
goto consume;

if (!mlx4_QUERY_PORT(dev, 2, &port_cap) && port_cap.link_state)
goto consume;
}
/* All active slaves need to receive the event */
if (slave == ALL_SLAVES) {
for (i = 0; i <= dev->persist->num_vfs; i++) {
Expand All @@ -174,6 +185,7 @@ void mlx4_gen_slave_eqe(struct work_struct *work)
mlx4_warn(dev, "Failed to generate event for slave %d\n",
slave);
}
consume:
++slave_eq->cons;
}
}
Expand Down Expand Up @@ -594,7 +606,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
break;
for (i = 0; i < dev->persist->num_vfs + 1;
i++) {
if (!test_bit(i, slaves_port.slaves))
int reported_port = mlx4_is_bonded(dev) ? 1 : mlx4_phys_to_slave_port(dev, i, port);

if (!test_bit(i, slaves_port.slaves) && !mlx4_is_bonded(dev))
continue;
if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) {
if (i == mlx4_master_func_num(dev))
Expand All @@ -606,7 +620,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
eqe->event.port_change.port =
cpu_to_be32(
(be32_to_cpu(eqe->event.port_change.port) & 0xFFFFFFF)
| (mlx4_phys_to_slave_port(dev, i, port) << 28));
| (reported_port << 28));
mlx4_slave_event(dev, i, eqe);
}
} else { /* IB port */
Expand Down Expand Up @@ -636,7 +650,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
for (i = 0;
i < dev->persist->num_vfs + 1;
i++) {
if (!test_bit(i, slaves_port.slaves))
int reported_port = mlx4_is_bonded(dev) ? 1 : mlx4_phys_to_slave_port(dev, i, port);

if (!test_bit(i, slaves_port.slaves) && !mlx4_is_bonded(dev))
continue;
if (i == mlx4_master_func_num(dev))
continue;
Expand All @@ -645,7 +661,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
eqe->event.port_change.port =
cpu_to_be32(
(be32_to_cpu(eqe->event.port_change.port) & 0xFFFFFFF)
| (mlx4_phys_to_slave_port(dev, i, port) << 28));
| (reported_port << 28));
mlx4_slave_event(dev, i, eqe);
}
}
Expand Down
12 changes: 11 additions & 1 deletion drivers/net/ethernet/mellanox/mlx4/fw.c
Original file line number Diff line number Diff line change
Expand Up @@ -1104,6 +1104,7 @@ int mlx4_QUERY_PORT(struct mlx4_dev *dev, int port, struct mlx4_port_cap *port_c
goto out;

MLX4_GET(field, outbox, QUERY_PORT_SUPPORTED_TYPE_OFFSET);
port_cap->link_state = (field & 0x80) >> 7;
port_cap->supported_port_types = field & 3;
port_cap->suggested_type = (field >> 3) & 1;
port_cap->default_sense = (field >> 4) & 1;
Expand Down Expand Up @@ -1310,6 +1311,15 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave,
port_type |= MLX4_PORT_LINK_UP_MASK;
else if (IFLA_VF_LINK_STATE_DISABLE == admin_link_state)
port_type &= ~MLX4_PORT_LINK_UP_MASK;
else if (IFLA_VF_LINK_STATE_AUTO == admin_link_state && mlx4_is_bonded(dev)) {
int other_port = (port == 1) ? 2 : 1;
struct mlx4_port_cap port_cap;

err = mlx4_QUERY_PORT(dev, other_port, &port_cap);
if (err)
goto out;
port_type |= (port_cap.link_state << 7);
}

MLX4_PUT(outbox->buf, port_type,
QUERY_PORT_SUPPORTED_TYPE_OFFSET);
Expand All @@ -1325,7 +1335,7 @@ int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave,
MLX4_PUT(outbox->buf, short_field,
QUERY_PORT_CUR_MAX_PKEY_OFFSET);
}

out:
return err;
}

Expand Down
1 change: 1 addition & 0 deletions drivers/net/ethernet/mellanox/mlx4/fw.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ struct mlx4_mod_stat_cfg {
};

struct mlx4_port_cap {
u8 link_state;
u8 supported_port_types;
u8 suggested_type;
u8 default_sense;
Expand Down
107 changes: 97 additions & 10 deletions drivers/net/ethernet/mellanox/mlx4/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1221,23 +1221,100 @@ static ssize_t set_port_ib_mtu(struct device *dev,
return err ? err : count;
}

/* bond for multi-function device */
#define MAX_MF_BOND_ALLOWED_SLAVES 63
static int mlx4_mf_bond(struct mlx4_dev *dev)
{
int err = 0;
struct mlx4_slaves_pport slaves_port1;
struct mlx4_slaves_pport slaves_port2;
DECLARE_BITMAP(slaves_port_1_2, MLX4_MFUNC_MAX);

slaves_port1 = mlx4_phys_to_slaves_pport(dev, 1);
slaves_port2 = mlx4_phys_to_slaves_pport(dev, 2);
bitmap_and(slaves_port_1_2,
slaves_port1.slaves, slaves_port2.slaves,
dev->persist->num_vfs + 1);

/* only single port vfs are allowed */
if (bitmap_weight(slaves_port_1_2, dev->persist->num_vfs + 1) > 1) {
mlx4_warn(dev, "HA mode unsupported for dual ported VFs\n");
return -EINVAL;
}

/* limit on maximum allowed VFs */
if ((bitmap_weight(slaves_port1.slaves, dev->persist->num_vfs + 1) +
bitmap_weight(slaves_port2.slaves, dev->persist->num_vfs + 1)) >
MAX_MF_BOND_ALLOWED_SLAVES)
return -EINVAL;

if (dev->caps.steering_mode != MLX4_STEERING_MODE_DEVICE_MANAGED) {
mlx4_warn(dev, "HA mode unsupported for NON DMFS steering\n");
return -EINVAL;
}

err = mlx4_bond_mac_table(dev);
if (err)
return err;
err = mlx4_bond_vlan_table(dev);
if (err)
goto err1;
err = mlx4_bond_fs_rules(dev);
if (err)
goto err2;

return 0;
err2:
(void)mlx4_unbond_vlan_table(dev);
err1:
(void)mlx4_unbond_mac_table(dev);
return err;
}

static int mlx4_mf_unbond(struct mlx4_dev *dev)
{
int ret, ret1;

ret = mlx4_unbond_fs_rules(dev);
if (ret)
mlx4_warn(dev, "multifunction unbond for flow rules failedi (%d)\n", ret);
ret1 = mlx4_unbond_mac_table(dev);
if (ret1) {
mlx4_warn(dev, "multifunction unbond for MAC table failed (%d)\n", ret1);
ret = ret1;
}
ret1 = mlx4_unbond_vlan_table(dev);
if (ret1) {
mlx4_warn(dev, "multifunction unbond for VLAN table failed (%d)\n", ret1);
ret = ret1;
}
return ret;
}

int mlx4_bond(struct mlx4_dev *dev)
{
int ret = 0;
struct mlx4_priv *priv = mlx4_priv(dev);

mutex_lock(&priv->bond_mutex);

if (!mlx4_is_bonded(dev))
if (!mlx4_is_bonded(dev)) {
ret = mlx4_do_bond(dev, true);
else
ret = 0;
if (ret)
mlx4_err(dev, "Failed to bond device: %d\n", ret);
if (!ret && mlx4_is_master(dev)) {
ret = mlx4_mf_bond(dev);
if (ret) {
mlx4_err(dev, "bond for multifunction failed\n");
mlx4_do_bond(dev, false);
}
}
}

mutex_unlock(&priv->bond_mutex);
if (ret)
mlx4_err(dev, "Failed to bond device: %d\n", ret);
else
if (!ret)
mlx4_dbg(dev, "Device is bonded\n");

return ret;
}
EXPORT_SYMBOL_GPL(mlx4_bond);
Expand All @@ -1249,14 +1326,24 @@ int mlx4_unbond(struct mlx4_dev *dev)

mutex_lock(&priv->bond_mutex);

if (mlx4_is_bonded(dev))
if (mlx4_is_bonded(dev)) {
int ret2 = 0;

ret = mlx4_do_bond(dev, false);
if (ret)
mlx4_err(dev, "Failed to unbond device: %d\n", ret);
if (mlx4_is_master(dev))
ret2 = mlx4_mf_unbond(dev);
if (ret2) {
mlx4_warn(dev, "Failed to unbond device for multifunction (%d)\n", ret2);
ret = ret2;
}
}

mutex_unlock(&priv->bond_mutex);
if (ret)
mlx4_err(dev, "Failed to unbond device: %d\n", ret);
else
if (!ret)
mlx4_dbg(dev, "Device is unbonded\n");

return ret;
}
EXPORT_SYMBOL_GPL(mlx4_unbond);
Expand Down
8 changes: 8 additions & 0 deletions drivers/net/ethernet/mellanox/mlx4/mlx4.h
Original file line number Diff line number Diff line change
Expand Up @@ -736,6 +736,7 @@ struct mlx4_catas_err {
struct mlx4_mac_table {
__be64 entries[MLX4_MAX_MAC_NUM];
int refs[MLX4_MAX_MAC_NUM];
bool is_dup[MLX4_MAX_MAC_NUM];
struct mutex mutex;
int total;
int max;
Expand All @@ -758,6 +759,7 @@ struct mlx4_roce_gid_table {
struct mlx4_vlan_table {
__be32 entries[MLX4_MAX_VLAN_NUM];
int refs[MLX4_MAX_VLAN_NUM];
int is_dup[MLX4_MAX_VLAN_NUM];
struct mutex mutex;
int total;
int max;
Expand Down Expand Up @@ -1225,6 +1227,10 @@ void mlx4_init_roce_gid_table(struct mlx4_dev *dev,
struct mlx4_roce_gid_table *table);
void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan);
int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
int mlx4_bond_vlan_table(struct mlx4_dev *dev);
int mlx4_unbond_vlan_table(struct mlx4_dev *dev);
int mlx4_bond_mac_table(struct mlx4_dev *dev);
int mlx4_unbond_mac_table(struct mlx4_dev *dev);

int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port, int pkey_tbl_sz);
/* resource tracker functions*/
Expand Down Expand Up @@ -1385,6 +1391,8 @@ int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port);
int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave);
int mlx4_config_mad_demux(struct mlx4_dev *dev);
int mlx4_do_bond(struct mlx4_dev *dev, bool enable);
int mlx4_bond_fs_rules(struct mlx4_dev *dev);
int mlx4_unbond_fs_rules(struct mlx4_dev *dev);

enum mlx4_zone_flags {
MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO = 1UL << 0,
Expand Down
Loading

0 comments on commit 0fe3e20

Please sign in to comment.