Skip to content

Commit

Permalink
net/mlx5e: CT: Offload connections with hardware steering rules
Browse files Browse the repository at this point in the history
This is modeled similar to how software steering works:
- a reference-counted matcher is maintained for each
  combination of nat/no_nat x ipv4/ipv6 x tcp/udp/gre.
- adding a rule involves finding+referencing or creating a corresponding
  matcher, then actually adding a rule.
- updating rules is implemented using the bwc_rule update API, which can
  change a rule's actions without touching the match value.

By using a T-Rex traffic generator to initiate multi-million UDP flows
per second, a kernel running with these patches on the RX side was able
to offload ~600K flows per second, which is about ~7x larger than what
software steering could do on the same hardware (256-thread AMD EPYC,
512 GB RAM, ConnectX-7 b2b).

Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
Reviewed-by: Jianbo Liu <jianbol@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20250114130646.1937192-5-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Cosmin Ratiu authored and Jakub Kicinski committed Jan 16, 2025
1 parent 554f977 commit 066d49c
Showing 1 changed file with 247 additions and 2 deletions.
249 changes: 247 additions & 2 deletions drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_hmfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,276 @@

#include "en_tc.h"
#include "en/tc_ct.h"
#include "en/tc_priv.h"
#include "en/tc/ct_fs.h"
#include "fs_core.h"
#include "steering/hws/fs_hws_pools.h"
#include "steering/hws/mlx5hws.h"
#include "steering/hws/table.h"

struct mlx5_ct_fs_hmfs_matcher {
struct mlx5hws_bwc_matcher *hws_bwc_matcher;
refcount_t ref;
};

/* We need {ipv4, ipv6} x {tcp, udp, gre} matchers. */
#define NUM_MATCHERS (2 * 3)

struct mlx5_ct_fs_hmfs {
struct mlx5hws_table *ct_tbl;
struct mlx5hws_table *ct_nat_tbl;
struct mlx5_flow_table *ct_nat;
struct mlx5hws_action *fwd_action;
struct mlx5hws_action *last_action;
struct mlx5hws_context *ctx;
struct mutex lock; /* Guards matchers */
struct mlx5_ct_fs_hmfs_matcher matchers[NUM_MATCHERS];
struct mlx5_ct_fs_hmfs_matcher matchers_nat[NUM_MATCHERS];
};

struct mlx5_ct_fs_hmfs_rule {
struct mlx5_ct_fs_rule fs_rule;
struct mlx5hws_bwc_rule *hws_bwc_rule;
struct mlx5_ct_fs_hmfs_matcher *hmfs_matcher;
struct mlx5_fc *counter;
};

static u32 get_matcher_idx(bool ipv4, bool tcp, bool gre)
{
return ipv4 * 3 + tcp * 2 + gre;
}

static int mlx5_ct_fs_hmfs_init(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct,
struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct)
{
u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED;
struct mlx5hws_table *ct_tbl, *ct_nat_tbl, *post_ct_tbl;
struct mlx5_ct_fs_hmfs *fs_hmfs = mlx5_ct_fs_priv(fs);

ct_tbl = ct->fs_hws_table.hws_table;
ct_nat_tbl = ct_nat->fs_hws_table.hws_table;
post_ct_tbl = post_ct->fs_hws_table.hws_table;
fs_hmfs->ct_nat = ct_nat;

if (!ct_tbl || !ct_nat_tbl || !post_ct_tbl) {
netdev_warn(fs->netdev, "ct_fs_hmfs: failed to init, missing backing hws tables");
return -EOPNOTSUPP;
}

netdev_dbg(fs->netdev, "using hmfs steering");

fs_hmfs->ct_tbl = ct_tbl;
fs_hmfs->ct_nat_tbl = ct_nat_tbl;
fs_hmfs->ctx = ct_tbl->ctx;
mutex_init(&fs_hmfs->lock);

fs_hmfs->fwd_action = mlx5hws_action_create_dest_table(ct_tbl->ctx, post_ct_tbl, flags);
if (!fs_hmfs->fwd_action) {
netdev_warn(fs->netdev, "ct_fs_hmfs: failed to create fwd action\n");
return -EINVAL;
}
fs_hmfs->last_action = mlx5hws_action_create_last(ct_tbl->ctx, flags);
if (!fs_hmfs->last_action) {
netdev_warn(fs->netdev, "ct_fs_hmfs: failed to create last action\n");
mlx5hws_action_destroy(fs_hmfs->fwd_action);
return -EINVAL;
}

return 0;
}

static void mlx5_ct_fs_hmfs_destroy(struct mlx5_ct_fs *fs)
{
struct mlx5_ct_fs_hmfs *fs_hmfs = mlx5_ct_fs_priv(fs);

mlx5hws_action_destroy(fs_hmfs->last_action);
mlx5hws_action_destroy(fs_hmfs->fwd_action);
}

static struct mlx5hws_bwc_matcher *
mlx5_ct_fs_hmfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5hws_table *tbl,
struct mlx5_flow_spec *spec, bool ipv4, bool tcp, bool gre)
{
u8 match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2 | MLX5_MATCH_OUTER_HEADERS;
struct mlx5hws_match_parameters mask = {
.match_buf = spec->match_criteria,
.match_sz = sizeof(spec->match_criteria),
};
u32 priority = get_matcher_idx(ipv4, tcp, gre); /* Static priority based on params. */
struct mlx5hws_bwc_matcher *hws_bwc_matcher;

hws_bwc_matcher = mlx5hws_bwc_matcher_create(tbl, priority, match_criteria_enable, &mask);
if (!hws_bwc_matcher)
return ERR_PTR(-EINVAL);

return hws_bwc_matcher;
}

static struct mlx5_ct_fs_hmfs_matcher *
mlx5_ct_fs_hmfs_matcher_get(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec,
bool nat, bool ipv4, bool tcp, bool gre)
{
struct mlx5_ct_fs_hmfs *fs_hmfs = mlx5_ct_fs_priv(fs);
u32 matcher_idx = get_matcher_idx(ipv4, tcp, gre);
struct mlx5_ct_fs_hmfs_matcher *hmfs_matcher;
struct mlx5hws_bwc_matcher *hws_bwc_matcher;
struct mlx5hws_table *tbl;

hmfs_matcher = nat ?
(fs_hmfs->matchers_nat + matcher_idx) :
(fs_hmfs->matchers + matcher_idx);

if (refcount_inc_not_zero(&hmfs_matcher->ref))
return hmfs_matcher;

mutex_lock(&fs_hmfs->lock);

/* Retry with lock, as the matcher might be already created by another cpu. */
if (refcount_inc_not_zero(&hmfs_matcher->ref))
goto out_unlock;

tbl = nat ? fs_hmfs->ct_nat_tbl : fs_hmfs->ct_tbl;

hws_bwc_matcher = mlx5_ct_fs_hmfs_matcher_create(fs, tbl, spec, ipv4, tcp, gre);
if (IS_ERR(hws_bwc_matcher)) {
netdev_warn(fs->netdev,
"ct_fs_hmfs: failed to create bwc matcher (nat %d, ipv4 %d, tcp %d, gre %d), err: %ld\n",
nat, ipv4, tcp, gre, PTR_ERR(hws_bwc_matcher));

hmfs_matcher = ERR_CAST(hws_bwc_matcher);
goto out_unlock;
}

hmfs_matcher->hws_bwc_matcher = hws_bwc_matcher;
refcount_set(&hmfs_matcher->ref, 1);

out_unlock:
mutex_unlock(&fs_hmfs->lock);
return hmfs_matcher;
}

static void
mlx5_ct_fs_hmfs_matcher_put(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_hmfs_matcher *hmfs_matcher)
{
struct mlx5_ct_fs_hmfs *fs_hmfs = mlx5_ct_fs_priv(fs);

if (!refcount_dec_and_mutex_lock(&hmfs_matcher->ref, &fs_hmfs->lock))
return;

mlx5hws_bwc_matcher_destroy(hmfs_matcher->hws_bwc_matcher);
mutex_unlock(&fs_hmfs->lock);
}

#define NUM_CT_HMFS_RULES 4

static void mlx5_ct_fs_hmfs_fill_rule_actions(struct mlx5_ct_fs_hmfs *fs_hmfs,
struct mlx5_flow_attr *attr,
struct mlx5hws_rule_action *rule_actions)
{
struct mlx5_fs_hws_action *mh_action = &attr->modify_hdr->fs_hws_action;

memset(rule_actions, 0, NUM_CT_HMFS_RULES * sizeof(*rule_actions));
rule_actions[0].action = mlx5_fc_get_hws_action(fs_hmfs->ctx, attr->counter);
/* Modify header is special, it may require extra arguments outside the action itself. */
if (mh_action->mh_data) {
rule_actions[1].modify_header.offset = mh_action->mh_data->offset;
rule_actions[1].modify_header.data = mh_action->mh_data->data;
}
rule_actions[1].action = mh_action->hws_action;
rule_actions[2].action = fs_hmfs->fwd_action;
rule_actions[3].action = fs_hmfs->last_action;
}

static struct mlx5_ct_fs_rule *
mlx5_ct_fs_hmfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec,
struct mlx5_flow_attr *attr, struct flow_rule *flow_rule)
{
return ERR_PTR(-EOPNOTSUPP);
struct mlx5hws_rule_action rule_actions[NUM_CT_HMFS_RULES];
struct mlx5_ct_fs_hmfs *fs_hmfs = mlx5_ct_fs_priv(fs);
struct mlx5hws_match_parameters match_params = {
.match_buf = spec->match_value,
.match_sz = ARRAY_SIZE(spec->match_value),
};
struct mlx5_ct_fs_hmfs_matcher *hmfs_matcher;
struct mlx5_ct_fs_hmfs_rule *hmfs_rule;
bool nat, tcp, ipv4, gre;
int err;

if (!mlx5e_tc_ct_is_valid_flow_rule(fs->netdev, flow_rule))
return ERR_PTR(-EOPNOTSUPP);

hmfs_rule = kzalloc(sizeof(*hmfs_rule), GFP_KERNEL);
if (!hmfs_rule)
return ERR_PTR(-ENOMEM);

nat = (attr->ft == fs_hmfs->ct_nat);
ipv4 = mlx5e_tc_get_ip_version(spec, true) == 4;
tcp = MLX5_GET(fte_match_param, spec->match_value,
outer_headers.ip_protocol) == IPPROTO_TCP;
gre = MLX5_GET(fte_match_param, spec->match_value,
outer_headers.ip_protocol) == IPPROTO_GRE;

hmfs_matcher = mlx5_ct_fs_hmfs_matcher_get(fs, spec, nat, ipv4, tcp, gre);
if (IS_ERR(hmfs_matcher)) {
err = PTR_ERR(hmfs_matcher);
goto err_free_rule;
}
hmfs_rule->hmfs_matcher = hmfs_matcher;

mlx5_ct_fs_hmfs_fill_rule_actions(fs_hmfs, attr, rule_actions);
hmfs_rule->counter = attr->counter;

hmfs_rule->hws_bwc_rule =
mlx5hws_bwc_rule_create(hmfs_matcher->hws_bwc_matcher, &match_params,
spec->flow_context.flow_source, rule_actions);
if (!hmfs_rule->hws_bwc_rule) {
err = -EINVAL;
goto err_put_matcher;
}

return &hmfs_rule->fs_rule;

err_put_matcher:
mlx5_fc_put_hws_action(hmfs_rule->counter);
mlx5_ct_fs_hmfs_matcher_put(fs, hmfs_matcher);
err_free_rule:
kfree(hmfs_rule);
return ERR_PTR(err);
}

static void mlx5_ct_fs_hmfs_ct_rule_del(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule)
{
struct mlx5_ct_fs_hmfs_rule *hmfs_rule = container_of(fs_rule,
struct mlx5_ct_fs_hmfs_rule,
fs_rule);
mlx5hws_bwc_rule_destroy(hmfs_rule->hws_bwc_rule);
mlx5_fc_put_hws_action(hmfs_rule->counter);
mlx5_ct_fs_hmfs_matcher_put(fs, hmfs_rule->hmfs_matcher);
kfree(hmfs_rule);
}

static int mlx5_ct_fs_hmfs_ct_rule_update(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule,
struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr)
{
return -EOPNOTSUPP;
struct mlx5_ct_fs_hmfs_rule *hmfs_rule = container_of(fs_rule,
struct mlx5_ct_fs_hmfs_rule,
fs_rule);
struct mlx5hws_rule_action rule_actions[NUM_CT_HMFS_RULES];
struct mlx5_ct_fs_hmfs *fs_hmfs = mlx5_ct_fs_priv(fs);
int err;

mlx5_ct_fs_hmfs_fill_rule_actions(fs_hmfs, attr, rule_actions);

err = mlx5hws_bwc_rule_action_update(hmfs_rule->hws_bwc_rule, rule_actions);
if (err) {
mlx5_fc_put_hws_action(attr->counter);
return err;
}

mlx5_fc_put_hws_action(hmfs_rule->counter);
hmfs_rule->counter = attr->counter;

return 0;
}

static struct mlx5_ct_fs_ops hmfs_ops = {
Expand All @@ -39,6 +282,8 @@ static struct mlx5_ct_fs_ops hmfs_ops = {

.init = mlx5_ct_fs_hmfs_init,
.destroy = mlx5_ct_fs_hmfs_destroy,

.priv_size = sizeof(struct mlx5_ct_fs_hmfs),
};

struct mlx5_ct_fs_ops *mlx5_ct_fs_hmfs_ops_get(void)
Expand Down

0 comments on commit 066d49c

Please sign in to comment.