Skip to content

Commit

Permalink
Merge tag 'mlx5-updates-2023-05-31' of git://git.kernel.org/pub/scm/l…
Browse files Browse the repository at this point in the history
…inux/kernel/git/saeed/linux

Saeed Mahameed says:

====================
mlx5-updates-2023-05-31

net/mlx5: Support 4 ports VF LAG, part 1/2

This series continues the series[1] "Support 4 ports HCAs LAG mode"
by Mark Bloch. This series adds support for 4 ports VF LAG (single FDB
E-Switch).

This series of patches focuses on refactoring different sections of the
code that make assumptions about VF LAG supporting only two ports. For
instance, it assumes that each device can only have one peer.

Patches 1-5:
- Refactor ETH handling of TC rules of eswitches with peers.
Patch 6:
- Refactors peer miss group table.
Patches 7-9:
- Refactor single FDB E-Switch creation.
Patch 10:
- Refactor the DR layer.
Patches 11-14:
- Refactors devcom layer.

Next series will refactor LAG layer and enable 4 ports VF LAG.
This series specifically allows HCAs with 4 ports to create a VF LAG
with only 4 ports. It is not possible to create a VF LAG with 2 or 3
ports using HCAs that have 4 ports.

Currently, the Merged E-Switch feature only supports HCAs with 2 ports.
However, upcoming patches will introduce support for HCAs with 4 ports.

In order to activate VF LAG a user can execute:

devlink dev eswitch set pci/0000:08:00.0 mode switchdev
devlink dev eswitch set pci/0000:08:00.1 mode switchdev
devlink dev eswitch set pci/0000:08:00.2 mode switchdev
devlink dev eswitch set pci/0000:08:00.3 mode switchdev
ip link add name bond0 type bond
ip link set dev bond0 type bond mode 802.3ad
ip link set dev eth2 master bond0
ip link set dev eth3 master bond0
ip link set dev eth4 master bond0
ip link set dev eth5 master bond0

Where eth2, eth3, eth4 and eth5 are net-interfaces of pci/0000:08:00.0
pci/0000:08:00.1 pci/0000:08:00.2 pci/0000:08:00.3 respectively.

User can verify LAG state and type via debugfs:
/sys/kernel/debug/mlx5/0000\:08\:00.0/lag/state
/sys/kernel/debug/mlx5/0000\:08\:00.0/lag/type

[1]
https://lore.kernel.org/netdev/20220510055743.118828-1-saeedm@nvidia.com/

* tag 'mlx5-updates-2023-05-31' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux:
  net/mlx5: Devcom, extend mlx5_devcom_send_event to work with more than two devices
  net/mlx5: Devcom, introduce devcom_for_each_peer_entry
  net/mlx5: E-switch, mark devcom as not ready when all eswitches are unpaired
  net/mlx5: Devcom, Rename paired to ready
  net/mlx5: DR, handle more than one peer domain
  net/mlx5: E-switch, generalize shared FDB creation
  net/mlx5: E-switch, Handle multiple master egress rules
  net/mlx5: E-switch, refactor FDB miss rule add/remove
  net/mlx5: E-switch, enlarge peer miss group table
  net/mlx5e: Handle offloads flows per peer
  net/mlx5e: en_tc, re-factor query route port
  net/mlx5e: rep, store send to vport rules per peer
  net/mlx5e: tc, Refactor peer add/del flow
  net/mlx5e: en_tc, Extend peer flows to a list
====================

Link: https://lore.kernel.org/r/20230602191301.47004-1-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Jakub Kicinski committed Jun 5, 2023
2 parents c422ac9 + e2a82bf commit 28cfea9
Show file tree
Hide file tree
Showing 24 changed files with 603 additions and 271 deletions.
4 changes: 2 additions & 2 deletions drivers/net/ethernet/mellanox/mlx5/core/en/tc_priv.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,13 +94,13 @@ struct mlx5e_tc_flow {
* destinations.
*/
struct encap_flow_item encaps[MLX5_MAX_FLOW_FWD_VPORTS];
struct mlx5e_tc_flow *peer_flow;
struct mlx5e_hairpin_entry *hpe; /* attached hairpin instance */
struct list_head hairpin; /* flows sharing the same hairpin */
struct list_head peer; /* flows with peer flow */
struct list_head peer[MLX5_MAX_PORTS]; /* flows with peer flow */
struct list_head unready; /* flows not ready to be offloaded (e.g
* due to missing route)
*/
struct list_head peer_flows; /* flows on peer */
struct net_device *orig_dev; /* netdev adding flow first */
int tmp_entry_index;
struct list_head tmp_list; /* temporary flow list used by neigh update */
Expand Down
137 changes: 110 additions & 27 deletions drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
Original file line number Diff line number Diff line change
Expand Up @@ -374,39 +374,88 @@ static void mlx5e_sqs2vport_stop(struct mlx5_eswitch *esw,
struct mlx5_eswitch_rep *rep)
{
struct mlx5e_rep_sq *rep_sq, *tmp;
struct mlx5e_rep_sq_peer *sq_peer;
struct mlx5e_rep_priv *rpriv;
unsigned long i;

if (esw->mode != MLX5_ESWITCH_OFFLOADS)
return;

rpriv = mlx5e_rep_to_rep_priv(rep);
list_for_each_entry_safe(rep_sq, tmp, &rpriv->vport_sqs_list, list) {
mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule);
if (rep_sq->send_to_vport_rule_peer)
mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer);
xa_for_each(&rep_sq->sq_peer, i, sq_peer) {
if (sq_peer->rule)
mlx5_eswitch_del_send_to_vport_rule(sq_peer->rule);

xa_erase(&rep_sq->sq_peer, i);
kfree(sq_peer);
}

xa_destroy(&rep_sq->sq_peer);
list_del(&rep_sq->list);
kfree(rep_sq);
}
}

static int mlx5e_sqs2vport_add_peers_rules(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep,
struct mlx5_devcom *devcom,
struct mlx5e_rep_sq *rep_sq, int i)
{
struct mlx5_eswitch *peer_esw = NULL;
struct mlx5_flow_handle *flow_rule;
int tmp;

mlx5_devcom_for_each_peer_entry(devcom, MLX5_DEVCOM_ESW_OFFLOADS,
peer_esw, tmp) {
int peer_rule_idx = mlx5_get_dev_index(peer_esw->dev);
struct mlx5e_rep_sq_peer *sq_peer;
int err;

sq_peer = kzalloc(sizeof(*sq_peer), GFP_KERNEL);
if (!sq_peer)
return -ENOMEM;

flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw,
rep, rep_sq->sqn);
if (IS_ERR(flow_rule)) {
kfree(sq_peer);
return PTR_ERR(flow_rule);
}

sq_peer->rule = flow_rule;
sq_peer->peer = peer_esw;
err = xa_insert(&rep_sq->sq_peer, peer_rule_idx, sq_peer, GFP_KERNEL);
if (err) {
kfree(sq_peer);
mlx5_eswitch_del_send_to_vport_rule(flow_rule);
return err;
}
}

return 0;
}

static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
struct mlx5_eswitch_rep *rep,
u32 *sqns_array, int sqns_num)
{
struct mlx5_eswitch *peer_esw = NULL;
struct mlx5_flow_handle *flow_rule;
struct mlx5e_rep_priv *rpriv;
struct mlx5e_rep_sq *rep_sq;
struct mlx5_devcom *devcom;
bool devcom_locked = false;
int err;
int i;

if (esw->mode != MLX5_ESWITCH_OFFLOADS)
return 0;

devcom = esw->dev->priv.devcom;
rpriv = mlx5e_rep_to_rep_priv(rep);
if (mlx5_devcom_is_paired(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS))
peer_esw = mlx5_devcom_get_peer_data(esw->dev->priv.devcom,
MLX5_DEVCOM_ESW_OFFLOADS);
if (mlx5_devcom_comp_is_ready(devcom, MLX5_DEVCOM_ESW_OFFLOADS) &&
mlx5_devcom_for_each_peer_begin(devcom, MLX5_DEVCOM_ESW_OFFLOADS))
devcom_locked = true;

for (i = 0; i < sqns_num; i++) {
rep_sq = kzalloc(sizeof(*rep_sq), GFP_KERNEL);
Expand All @@ -426,31 +475,30 @@ static int mlx5e_sqs2vport_start(struct mlx5_eswitch *esw,
rep_sq->send_to_vport_rule = flow_rule;
rep_sq->sqn = sqns_array[i];

if (peer_esw) {
flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw,
rep, sqns_array[i]);
if (IS_ERR(flow_rule)) {
err = PTR_ERR(flow_rule);
xa_init(&rep_sq->sq_peer);
if (devcom_locked) {
err = mlx5e_sqs2vport_add_peers_rules(esw, rep, devcom, rep_sq, i);
if (err) {
mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule);
xa_destroy(&rep_sq->sq_peer);
kfree(rep_sq);
goto out_err;
}
rep_sq->send_to_vport_rule_peer = flow_rule;
}

list_add(&rep_sq->list, &rpriv->vport_sqs_list);
}

if (peer_esw)
mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS);
if (devcom_locked)
mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS);

return 0;

out_err:
mlx5e_sqs2vport_stop(esw, rep);

if (peer_esw)
mlx5_devcom_release_peer_data(esw->dev->priv.devcom, MLX5_DEVCOM_ESW_OFFLOADS);
if (devcom_locked)
mlx5_devcom_for_each_peer_end(devcom, MLX5_DEVCOM_ESW_OFFLOADS);

return err;
}
Expand Down Expand Up @@ -1530,42 +1578,77 @@ static void *mlx5e_vport_rep_get_proto_dev(struct mlx5_eswitch_rep *rep)
return rpriv->netdev;
}

static void mlx5e_vport_rep_event_unpair(struct mlx5_eswitch_rep *rep)
static void mlx5e_vport_rep_event_unpair(struct mlx5_eswitch_rep *rep,
struct mlx5_eswitch *peer_esw)
{
int i = mlx5_get_dev_index(peer_esw->dev);
struct mlx5e_rep_priv *rpriv;
struct mlx5e_rep_sq *rep_sq;

WARN_ON_ONCE(!peer_esw);
rpriv = mlx5e_rep_to_rep_priv(rep);
list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) {
if (!rep_sq->send_to_vport_rule_peer)
struct mlx5e_rep_sq_peer *sq_peer = xa_load(&rep_sq->sq_peer, i);

if (!sq_peer || sq_peer->peer != peer_esw)
continue;
mlx5_eswitch_del_send_to_vport_rule(rep_sq->send_to_vport_rule_peer);
rep_sq->send_to_vport_rule_peer = NULL;

mlx5_eswitch_del_send_to_vport_rule(sq_peer->rule);
xa_erase(&rep_sq->sq_peer, i);
kfree(sq_peer);
}
}

static int mlx5e_vport_rep_event_pair(struct mlx5_eswitch *esw,
struct mlx5_eswitch_rep *rep,
struct mlx5_eswitch *peer_esw)
{
int i = mlx5_get_dev_index(peer_esw->dev);
struct mlx5_flow_handle *flow_rule;
struct mlx5e_rep_sq_peer *sq_peer;
struct mlx5e_rep_priv *rpriv;
struct mlx5e_rep_sq *rep_sq;
int err;

rpriv = mlx5e_rep_to_rep_priv(rep);
list_for_each_entry(rep_sq, &rpriv->vport_sqs_list, list) {
if (rep_sq->send_to_vport_rule_peer)
sq_peer = xa_load(&rep_sq->sq_peer, i);

if (sq_peer && sq_peer->peer)
continue;
flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, rep, rep_sq->sqn);
if (IS_ERR(flow_rule))

flow_rule = mlx5_eswitch_add_send_to_vport_rule(peer_esw, esw, rep,
rep_sq->sqn);
if (IS_ERR(flow_rule)) {
err = PTR_ERR(flow_rule);
goto err_out;
rep_sq->send_to_vport_rule_peer = flow_rule;
}

if (sq_peer) {
sq_peer->rule = flow_rule;
sq_peer->peer = peer_esw;
continue;
}
sq_peer = kzalloc(sizeof(*sq_peer), GFP_KERNEL);
if (!sq_peer) {
err = -ENOMEM;
goto err_sq_alloc;
}
err = xa_insert(&rep_sq->sq_peer, i, sq_peer, GFP_KERNEL);
if (err)
goto err_xa;
sq_peer->rule = flow_rule;
sq_peer->peer = peer_esw;
}

return 0;
err_xa:
kfree(sq_peer);
err_sq_alloc:
mlx5_eswitch_del_send_to_vport_rule(flow_rule);
err_out:
mlx5e_vport_rep_event_unpair(rep);
return PTR_ERR(flow_rule);
mlx5e_vport_rep_event_unpair(rep, peer_esw);
return err;
}

static int mlx5e_vport_rep_event(struct mlx5_eswitch *esw,
Expand All @@ -1578,7 +1661,7 @@ static int mlx5e_vport_rep_event(struct mlx5_eswitch *esw,
if (event == MLX5_SWITCHDEV_EVENT_PAIR)
err = mlx5e_vport_rep_event_pair(esw, rep, data);
else if (event == MLX5_SWITCHDEV_EVENT_UNPAIR)
mlx5e_vport_rep_event_unpair(rep);
mlx5e_vport_rep_event_unpair(rep, data);

return err;
}
Expand Down
7 changes: 6 additions & 1 deletion drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
Original file line number Diff line number Diff line change
Expand Up @@ -225,9 +225,14 @@ struct mlx5e_encap_entry {
struct rcu_head rcu;
};

struct mlx5e_rep_sq_peer {
struct mlx5_flow_handle *rule;
void *peer;
};

struct mlx5e_rep_sq {
struct mlx5_flow_handle *send_to_vport_rule;
struct mlx5_flow_handle *send_to_vport_rule_peer;
struct xarray sq_peer;
u32 sqn;
struct list_head list;
};
Expand Down
Loading

0 comments on commit 28cfea9

Please sign in to comment.