Skip to content

Commit

Permalink
Merge tag 'mlx5e-updates-2018-12-14' of git://git.kernel.org/pub/scm/…
Browse files Browse the repository at this point in the history
…linux/kernel/git/saeed/linux

Saeed Mahameed says:

====================
mlx5e-updates-2018-12-14 (VF Lag)

From Aviv Heller,

Subsequent patches introduce VF LAG, which provdies load-balancing and
high-availability capabilities for VFs associated with different
physical ports of the same Connect-X card.

This series consists of the following:
 - mlx5 devcom, driver infrastructure that facilitates operations that involve
   both core devices (physical functions) of the same card, to synchronize and
   communicate between two driver instances of the same card.
 - Infrastructure for TC rule duplication.
 - Changes to LAG logic to enable its use when SR-IOV is enabled
 - PFs in switchdev mode is the only mode currently supported.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Dec 15, 2018
2 parents bedf3b3 + 9582466 commit 63de273
Show file tree
Hide file tree
Showing 19 changed files with 1,048 additions and 174 deletions.
11 changes: 10 additions & 1 deletion drivers/infiniband/hw/mlx5/ib_rep.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,21 @@ static const struct mlx5_ib_profile rep_profile = {
static int
mlx5_ib_nic_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
{
struct mlx5_ib_dev *ibdev;

ibdev = mlx5_ib_rep_to_dev(rep);
if (!__mlx5_ib_add(ibdev, ibdev->profile))
return -EINVAL;
return 0;
}

static void
mlx5_ib_nic_rep_unload(struct mlx5_eswitch_rep *rep)
{
rep->rep_if[REP_IB].priv = NULL;
struct mlx5_ib_dev *ibdev;

ibdev = mlx5_ib_rep_to_dev(rep);
__mlx5_ib_remove(ibdev, ibdev->profile, MLX5_IB_STAGE_MAX);
}

static int
Expand Down Expand Up @@ -89,6 +97,7 @@ mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
dev = mlx5_ib_rep_to_dev(rep);
__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
rep->rep_if[REP_IB].priv = NULL;
ib_dealloc_device(&dev->ib_dev);
}

static void *mlx5_ib_vport_get_proto_dev(struct mlx5_eswitch_rep *rep)
Expand Down
42 changes: 17 additions & 25 deletions drivers/infiniband/hw/mlx5/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
if (!ndev)
goto out;

if (mlx5_lag_is_active(dev->mdev)) {
if (dev->lag_active) {
rcu_read_lock();
upper = netdev_master_upper_dev_get_rcu(ndev);
if (upper) {
Expand Down Expand Up @@ -1848,7 +1848,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
context->lib_caps = req.lib_caps;
print_lib_caps(dev, context->lib_caps);

if (mlx5_lag_is_active(dev->mdev)) {
if (dev->lag_active) {
u8 port = mlx5_core_native_port_num(dev->mdev);

atomic_set(&context->tx_port_affinity,
Expand Down Expand Up @@ -4841,7 +4841,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
struct mlx5_flow_table *ft;
int err;

if (!ns || !mlx5_lag_is_active(mdev))
if (!ns || !mlx5_lag_is_roce(mdev))
return 0;

err = mlx5_cmd_create_vport_lag(mdev);
Expand All @@ -4855,6 +4855,7 @@ static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev)
}

dev->flow_db->lag_demux_ft = ft;
dev->lag_active = true;
return 0;

err_destroy_vport_lag:
Expand All @@ -4866,7 +4867,9 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev)
{
struct mlx5_core_dev *mdev = dev->mdev;

if (dev->flow_db->lag_demux_ft) {
if (dev->lag_active) {
dev->lag_active = false;

mlx5_destroy_flow_table(dev->flow_db->lag_demux_ft);
dev->flow_db->lag_demux_ft = NULL;

Expand Down Expand Up @@ -6173,7 +6176,7 @@ int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
const char *name;

rdma_set_device_sysfs_group(&dev->ib_dev, &mlx5_attr_group);
if (!mlx5_lag_is_active(dev->mdev))
if (!mlx5_lag_is_roce(dev->mdev))
name = "mlx5_%d";
else
name = "mlx5_bond_%d";
Expand Down Expand Up @@ -6207,18 +6210,6 @@ static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev)
cancel_delay_drop(dev);
}

static int mlx5_ib_stage_rep_reg_init(struct mlx5_ib_dev *dev)
{
mlx5_ib_register_vport_reps(dev);

return 0;
}

static void mlx5_ib_stage_rep_reg_cleanup(struct mlx5_ib_dev *dev)
{
mlx5_ib_unregister_vport_reps(dev);
}

static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev)
{
dev->mdev_events.notifier_call = mlx5_ib_event;
Expand Down Expand Up @@ -6257,8 +6248,6 @@ void __mlx5_ib_remove(struct mlx5_ib_dev *dev,
if (profile->stage[stage].cleanup)
profile->stage[stage].cleanup(dev);
}

ib_dealloc_device((struct ib_device *)dev);
}

void *__mlx5_ib_add(struct mlx5_ib_dev *dev,
Expand Down Expand Up @@ -6392,9 +6381,6 @@ static const struct mlx5_ib_profile nic_rep_profile = {
STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR,
mlx5_ib_stage_post_ib_reg_umr_init,
NULL),
STAGE_CREATE(MLX5_IB_STAGE_REP_REG,
mlx5_ib_stage_rep_reg_init,
mlx5_ib_stage_rep_reg_cleanup),
};

static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev)
Expand Down Expand Up @@ -6462,8 +6448,9 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
if (MLX5_ESWITCH_MANAGER(mdev) &&
mlx5_ib_eswitch_mode(mdev->priv.eswitch) == SRIOV_OFFLOADS) {
dev->rep = mlx5_ib_vport_rep(mdev->priv.eswitch, 0);

return __mlx5_ib_add(dev, &nic_rep_profile);
dev->profile = &nic_rep_profile;
mlx5_ib_register_vport_reps(dev);
return dev;
}

return __mlx5_ib_add(dev, &pf_profile);
Expand All @@ -6485,7 +6472,12 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
}

dev = context;
__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);
if (dev->profile == &nic_rep_profile)
mlx5_ib_unregister_vport_reps(dev);
else
__mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX);

ib_dealloc_device((struct ib_device *)dev);
}

static struct mlx5_interface mlx5_ib_interface = {
Expand Down
2 changes: 1 addition & 1 deletion drivers/infiniband/hw/mlx5/mlx5_ib.h
Original file line number Diff line number Diff line change
Expand Up @@ -790,7 +790,6 @@ enum mlx5_ib_stages {
MLX5_IB_STAGE_POST_IB_REG_UMR,
MLX5_IB_STAGE_DELAY_DROP,
MLX5_IB_STAGE_CLASS_ATTR,
MLX5_IB_STAGE_REP_REG,
MLX5_IB_STAGE_MAX,
};

Expand Down Expand Up @@ -937,6 +936,7 @@ struct mlx5_ib_dev {
struct mlx5_ib_delay_drop delay_drop;
const struct mlx5_ib_profile *profile;
struct mlx5_eswitch_rep *rep;
int lag_active;

struct mlx5_ib_lb_state lb;
u8 umr_fence;
Expand Down
2 changes: 1 addition & 1 deletion drivers/infiniband/hw/mlx5/qp.c
Original file line number Diff line number Diff line change
Expand Up @@ -3258,7 +3258,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
(ibqp->qp_type == IB_QPT_RAW_PACKET) ||
(ibqp->qp_type == IB_QPT_XRC_INI) ||
(ibqp->qp_type == IB_QPT_XRC_TGT)) {
if (mlx5_lag_is_active(dev->mdev)) {
if (dev->lag_active) {
u8 p = mlx5_core_native_port_num(dev->mdev);
tx_affinity = get_tx_affinity(dev, pd, base, p);
context->flags |= cpu_to_be32(tx_affinity << 24);
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/mellanox/mlx5/core/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \
mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \
fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
diag/fs_tracepoint.o diag/fw_tracer.o
lib/devcom.o diag/fs_tracepoint.o diag/fw_tracer.o

#
# Netdev basic
Expand Down
41 changes: 31 additions & 10 deletions drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
u8 *out_ttl)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_rep_priv *uplink_rpriv;
struct net_device *uplink_dev, *uplink_upper;
bool dst_is_lag_dev;
struct rtable *rt;
struct neighbour *n = NULL;

Expand All @@ -28,10 +29,20 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
#else
return -EOPNOTSUPP;
#endif
uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
/* if the egress device isn't on the same HW e-switch, we use the uplink */
if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev))
*out_dev = uplink_rpriv->netdev;

uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
uplink_upper = netdev_master_upper_dev_get(uplink_dev);
dst_is_lag_dev = (uplink_upper &&
netif_is_lag_master(uplink_upper) &&
rt->dst.dev == uplink_upper &&
mlx5_lag_is_sriov(priv->mdev));

/* if the egress device isn't on the same HW e-switch or
* it's a LAG device, use the uplink
*/
if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev) ||
dst_is_lag_dev)
*out_dev = uplink_dev;
else
*out_dev = rt->dst.dev;

Expand Down Expand Up @@ -65,8 +76,9 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
struct dst_entry *dst;

#if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
struct mlx5e_rep_priv *uplink_rpriv;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct net_device *uplink_dev, *uplink_upper;
bool dst_is_lag_dev;
int ret;

ret = ipv6_stub->ipv6_dst_lookup(dev_net(mirred_dev), NULL, &dst,
Expand All @@ -77,10 +89,19 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
if (!(*out_ttl))
*out_ttl = ip6_dst_hoplimit(dst);

uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
/* if the egress device isn't on the same HW e-switch, we use the uplink */
if (!switchdev_port_same_parent_id(priv->netdev, dst->dev))
*out_dev = uplink_rpriv->netdev;
uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
uplink_upper = netdev_master_upper_dev_get(uplink_dev);
dst_is_lag_dev = (uplink_upper &&
netif_is_lag_master(uplink_upper) &&
dst->dev == uplink_upper &&
mlx5_lag_is_sriov(priv->mdev));

/* if the egress device isn't on the same HW e-switch or
* it's a LAG device, use the uplink
*/
if (!switchdev_port_same_parent_id(priv->netdev, dst->dev) ||
dst_is_lag_dev)
*out_dev = uplink_dev;
else
*out_dev = dst->dev;
#else
Expand Down
20 changes: 17 additions & 3 deletions drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
Original file line number Diff line number Diff line change
Expand Up @@ -297,17 +297,31 @@ static const struct ethtool_ops mlx5e_rep_ethtool_ops = {
int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr)
{
struct mlx5e_priv *priv = netdev_priv(dev);
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_eswitch_rep *rep = rpriv->rep;
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct net_device *uplink_upper = NULL;
struct mlx5e_priv *uplink_priv = NULL;
struct net_device *uplink_dev;

if (esw->mode == SRIOV_NONE)
return -EOPNOTSUPP;

uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
if (uplink_dev) {
uplink_upper = netdev_master_upper_dev_get(uplink_dev);
uplink_priv = netdev_priv(uplink_dev);
}

switch (attr->id) {
case SWITCHDEV_ATTR_ID_PORT_PARENT_ID:
attr->u.ppid.id_len = ETH_ALEN;
ether_addr_copy(attr->u.ppid.id, rep->hw_id);
if (uplink_upper && mlx5_lag_is_sriov(uplink_priv->mdev)) {
ether_addr_copy(attr->u.ppid.id, uplink_upper->dev_addr);
} else {
struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_eswitch_rep *rep = rpriv->rep;

ether_addr_copy(attr->u.ppid.id, rep->hw_id);
}
break;
default:
return -EOPNOTSUPP;
Expand Down
Loading

0 comments on commit 63de273

Please sign in to comment.