Skip to content

Commit

Permalink
net/mlx5: Separate between E-Switch and MPFS
Browse files Browse the repository at this point in the history
Multi-Physical Function Switch (MPFs) is required for when multi-PF
configuration is enabled to allow passing user configured unicast MAC
addresses to the requesting PF.

Before this patch eswitch.c used to manage the HW MPFS l2 table,
E-Switch always (regardless of sriov) enabled vport(0) (NIC PF) vport's
contexts update on unicast mac address list changes, to populate the PF's
MPFS L2 table accordingly.

In downstream patch we would like to allow compiling the driver without
E-Switch functionalities, for that we move MPFS l2 table logic out
of eswitch.c into its own file, and provide Kconfig flag (MLX5_MPFS) to
allow compiling out MPFS for those who don't want Multi-PF support.

NIC PF netdevice will now directly update MPFS l2 table via the new MPFS
API. VF netdevice has no access to MPFS L2 table, so E-Switch will remain
responsible of updating its MPFS l2 table on behalf of its VFs.

Due to this change we also don't require enabling vport(0) (PF vport)
unicast mac changes events anymore, for when SRIOV is not enabled.
Which means E-Switch is now activated only on SRIOV activation, and not
required otherwise.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Cc: Jes Sorensen <jsorensen@fb.com>
Cc: kernel-team@fb.com
  • Loading branch information
Saeed Mahameed committed Aug 7, 2017
1 parent a9f7705 commit eeb66cd
Show file tree
Hide file tree
Showing 9 changed files with 377 additions and 221 deletions.
10 changes: 10 additions & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,16 @@ config MLX5_CORE_EN
---help---
Ethernet support in Mellanox Technologies ConnectX-4 NIC.

config MLX5_MPFS
bool "Mellanox Technologies MLX5 MPFS support"
depends on MLX5_CORE_EN
default y
---help---
Mellanox Technologies Ethernet Multi-Physical Function Switch (MPFS)
support in ConnectX NIC. MPFs is required for when multi-PF configuration
is enabled to allow passing user configured unicast MAC addresses to the
requesting PF.

config MLX5_CORE_EN_DCB
bool "Data Center Bridging (DCB) Support"
default y
Expand Down
2 changes: 2 additions & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += eswitch.o eswitch_offloads.o \
en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \
en_tc.o en_arfs.o en_rep.o en_fs_ethtool.o en_selftest.o

mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o

mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o

mlx5_core-$(CONFIG_MLX5_CORE_IPOIB) += ipoib/ipoib.o ipoib/ethtool.o
Expand Down
17 changes: 16 additions & 1 deletion drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <linux/tcp.h>
#include <linux/mlx5/fs.h>
#include "en.h"
#include "lib/mpfs.h"

static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
struct mlx5e_l2_rule *ai, int type);
Expand Down Expand Up @@ -65,6 +66,7 @@ struct mlx5e_l2_hash_node {
struct hlist_node hlist;
u8 action;
struct mlx5e_l2_rule ai;
bool mpfs;
};

static inline int mlx5e_hash_l2(u8 *addr)
Expand Down Expand Up @@ -362,17 +364,30 @@ static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv)
static void mlx5e_execute_l2_action(struct mlx5e_priv *priv,
struct mlx5e_l2_hash_node *hn)
{
switch (hn->action) {
u8 action = hn->action;
int l2_err = 0;

switch (action) {
case MLX5E_ACTION_ADD:
mlx5e_add_l2_flow_rule(priv, &hn->ai, MLX5E_FULLMATCH);
if (!is_multicast_ether_addr(hn->ai.addr)) {
l2_err = mlx5_mpfs_add_mac(priv->mdev, hn->ai.addr);
hn->mpfs = !l2_err;
}
hn->action = MLX5E_ACTION_NONE;
break;

case MLX5E_ACTION_DEL:
if (!is_multicast_ether_addr(hn->ai.addr) && hn->mpfs)
l2_err = mlx5_mpfs_del_mac(priv->mdev, hn->ai.addr);
mlx5e_del_l2_flow_rule(priv, &hn->ai);
mlx5e_del_l2_from_hash(hn);
break;
}

if (l2_err)
netdev_warn(priv->netdev, "MPFS, failed to %s mac %pM, err(%d)\n",
action == MLX5E_ACTION_ADD ? "add" : "del", hn->ai.addr, l2_err);
}

static void mlx5e_sync_netdev_addr(struct mlx5e_priv *priv)
Expand Down
190 changes: 38 additions & 152 deletions drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
Original file line number Diff line number Diff line change
Expand Up @@ -46,19 +46,13 @@ enum {
MLX5_ACTION_DEL = 2,
};

/* E-Switch UC L2 table hash node */
struct esw_uc_addr {
struct l2addr_node node;
u32 table_index;
u32 vport;
};

/* Vport UC/MC hash node */
struct vport_addr {
struct l2addr_node node;
u8 action;
u32 vport;
struct mlx5_flow_handle *flow_rule; /* SRIOV only */
struct mlx5_flow_handle *flow_rule;
bool mpfs; /* UC MAC was added to MPFs */
/* A flag indicating that mac was added due to mc promiscuous vport */
bool mc_promisc;
};
Expand Down Expand Up @@ -154,81 +148,6 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
return modify_esw_vport_context_cmd(dev, vport, in, sizeof(in));
}

/* HW L2 Table (MPFS) management */
static int set_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index,
u8 *mac, u8 vlan_valid, u16 vlan)
{
u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)] = {0};
u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)] = {0};
u8 *in_mac_addr;

MLX5_SET(set_l2_table_entry_in, in, opcode,
MLX5_CMD_OP_SET_L2_TABLE_ENTRY);
MLX5_SET(set_l2_table_entry_in, in, table_index, index);
MLX5_SET(set_l2_table_entry_in, in, vlan_valid, vlan_valid);
MLX5_SET(set_l2_table_entry_in, in, vlan, vlan);

in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address);
ether_addr_copy(&in_mac_addr[2], mac);

return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}

static int del_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index)
{
u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)] = {0};
u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)] = {0};

MLX5_SET(delete_l2_table_entry_in, in, opcode,
MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
MLX5_SET(delete_l2_table_entry_in, in, table_index, index);
return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
}

static int alloc_l2_table_index(struct mlx5_l2_table *l2_table, u32 *ix)
{
int err = 0;

*ix = find_first_zero_bit(l2_table->bitmap, l2_table->size);
if (*ix >= l2_table->size)
err = -ENOSPC;
else
__set_bit(*ix, l2_table->bitmap);

return err;
}

static void free_l2_table_index(struct mlx5_l2_table *l2_table, u32 ix)
{
__clear_bit(ix, l2_table->bitmap);
}

static int set_l2_table_entry(struct mlx5_core_dev *dev, u8 *mac,
u8 vlan_valid, u16 vlan,
u32 *index)
{
struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table;
int err;

err = alloc_l2_table_index(l2_table, index);
if (err)
return err;

err = set_l2_table_entry_cmd(dev, *index, mac, vlan_valid, vlan);
if (err)
free_l2_table_index(l2_table, *index);

return err;
}

static void del_l2_table_entry(struct mlx5_core_dev *dev, u32 index)
{
struct mlx5_l2_table *l2_table = &dev->priv.eswitch->l2_table;

del_l2_table_entry_cmd(dev, index);
free_l2_table_index(l2_table, index);
}

/* E-Switch FDB */
static struct mlx5_flow_handle *
__esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
Expand Down Expand Up @@ -455,65 +374,60 @@ typedef int (*vport_addr_action)(struct mlx5_eswitch *esw,

static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
{
struct hlist_head *hash = esw->l2_table.l2_hash;
struct esw_uc_addr *esw_uc;
u8 *mac = vaddr->node.addr;
u32 vport = vaddr->vport;
int err;

esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr);
if (esw_uc) {
/* Skip mlx5_mpfs_add_mac for PFs,
* it is already done by the PF netdev in mlx5e_execute_l2_action
*/
if (!vport)
goto fdb_add;

err = mlx5_mpfs_add_mac(esw->dev, mac);
if (err) {
esw_warn(esw->dev,
"Failed to set L2 mac(%pM) for vport(%d), mac is already in use by vport(%d)\n",
mac, vport, esw_uc->vport);
return -EEXIST;
"Failed to add L2 table mac(%pM) for vport(%d), err(%d)\n",
mac, vport, err);
return err;
}
vaddr->mpfs = true;

esw_uc = l2addr_hash_add(hash, mac, struct esw_uc_addr, GFP_KERNEL);
if (!esw_uc)
return -ENOMEM;
esw_uc->vport = vport;

err = set_l2_table_entry(esw->dev, mac, 0, 0, &esw_uc->table_index);
if (err)
goto abort;

fdb_add:
/* SRIOV is enabled: Forward UC MAC to vport */
if (esw->fdb_table.fdb && esw->mode == SRIOV_LEGACY)
vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport);

esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n",
vport, mac, esw_uc->table_index, vaddr->flow_rule);
return err;
abort:
l2addr_hash_del(esw_uc);
esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM fr(%p)\n",
vport, mac, vaddr->flow_rule);

return err;
}

static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
{
struct hlist_head *hash = esw->l2_table.l2_hash;
struct esw_uc_addr *esw_uc;
u8 *mac = vaddr->node.addr;
u32 vport = vaddr->vport;
int err = 0;

esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr);
if (!esw_uc || esw_uc->vport != vport) {
esw_debug(esw->dev,
"MAC(%pM) doesn't belong to vport (%d)\n",
mac, vport);
return -EINVAL;
}
esw_debug(esw->dev, "\tDELETE UC MAC: vport[%d] %pM index:%d fr(%p)\n",
vport, mac, esw_uc->table_index, vaddr->flow_rule);
/* Skip mlx5_mpfs_del_mac for PFs,
* it is already done by the PF netdev in mlx5e_execute_l2_action
*/
if (!vport || !vaddr->mpfs)
goto fdb_del;

del_l2_table_entry(esw->dev, esw_uc->table_index);
err = mlx5_mpfs_del_mac(esw->dev, mac);
if (err)
esw_warn(esw->dev,
"Failed to del L2 table mac(%pM) for vport(%d), err(%d)\n",
mac, vport, err);
vaddr->mpfs = false;

fdb_del:
if (vaddr->flow_rule)
mlx5_del_flow_rules(vaddr->flow_rule);
vaddr->flow_rule = NULL;

l2addr_hash_del(esw_uc);
return 0;
}

Expand Down Expand Up @@ -1635,7 +1549,6 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)

esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode);
esw->mode = mode;
esw_disable_vport(esw, 0);

if (mode == SRIOV_LEGACY)
err = esw_create_legacy_fdb_table(esw, nvfs + 1);
Expand All @@ -1648,7 +1561,11 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
if (err)
esw_warn(esw->dev, "Failed to create eswitch TSAR");

enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : UC_ADDR_CHANGE;
/* Don't enable vport events when in SRIOV_OFFLOADS mode, since:
* 1. L2 table (MPFS) is programmed by PF/VF representors netdevs set_rx_mode
* 2. FDB/Eswitch is programmed by user space tools
*/
enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : 0;
for (i = 0; i <= nvfs; i++)
esw_enable_vport(esw, i, enabled_events);

Expand All @@ -1657,7 +1574,6 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
return 0;

abort:
esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
esw->mode = SRIOV_NONE;
return err;
}
Expand Down Expand Up @@ -1691,30 +1607,10 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
esw_offloads_cleanup(esw, nvports);

esw->mode = SRIOV_NONE;
/* VPORT 0 (PF) must be enabled back with non-sriov configuration */
esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
}

void mlx5_eswitch_attach(struct mlx5_eswitch *esw)
{
if (!ESW_ALLOWED(esw))
return;

esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
/* VF Vports will be enabled when SRIOV is enabled */
}

void mlx5_eswitch_detach(struct mlx5_eswitch *esw)
{
if (!ESW_ALLOWED(esw))
return;

esw_disable_vport(esw, 0);
}

int mlx5_eswitch_init(struct mlx5_core_dev *dev)
{
int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table);
int total_vports = MLX5_TOTAL_VPORTS(dev);
struct mlx5_eswitch *esw;
int vport_num;
Expand All @@ -1724,8 +1620,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
return 0;

esw_info(dev,
"Total vports %d, l2 table size(%d), per vport: max uc(%d) max mc(%d)\n",
total_vports, l2_table_size,
"Total vports %d, per vport: max uc(%d) max mc(%d)\n",
total_vports,
MLX5_MAX_UC_PER_VPORT(dev),
MLX5_MAX_MC_PER_VPORT(dev));

Expand All @@ -1735,14 +1631,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)

esw->dev = dev;

esw->l2_table.bitmap = kcalloc(BITS_TO_LONGS(l2_table_size),
sizeof(uintptr_t), GFP_KERNEL);
if (!esw->l2_table.bitmap) {
err = -ENOMEM;
goto abort;
}
esw->l2_table.size = l2_table_size;

esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq");
if (!esw->work_queue) {
err = -ENOMEM;
Expand Down Expand Up @@ -1793,7 +1681,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
abort:
if (esw->work_queue)
destroy_workqueue(esw->work_queue);
kfree(esw->l2_table.bitmap);
kfree(esw->vports);
kfree(esw->offloads.vport_reps);
kfree(esw);
Expand All @@ -1809,7 +1696,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)

esw->dev->priv.eswitch = NULL;
destroy_workqueue(esw->work_queue);
kfree(esw->l2_table.bitmap);
kfree(esw->offloads.vport_reps);
kfree(esw->vports);
kfree(esw);
Expand Down
Loading

0 comments on commit eeb66cd

Please sign in to comment.