From af02dbfe37400bc456a4b25ba47015d13a2a52c8 Mon Sep 17 00:00:00 2001 From: Yevgeny Kliteynik Date: Tue, 14 Jan 2025 15:06:43 +0200 Subject: [PATCH 1/4] net/mlx5: HWS, rework the check if matcher size can be increased When checking if the matcher size can be increased, check both match and action RTCs. Also, consider the increasing step - check that it won't cause the new matcher size to become unsupported. Additionally, since we're using '+ 1' for action RTC size yet again, define it as macro and use in all the required places. Signed-off-by: Yevgeny Kliteynik Reviewed-by: Mark Bloch Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250114130646.1937192-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/steering/hws/bwc.c | 18 ++++++++++++++++-- .../mellanox/mlx5/core/steering/hws/matcher.c | 6 ++++-- .../mellanox/mlx5/core/steering/hws/matcher.h | 5 +++++ 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c index a8d886e921448..3dbd4efa21a2a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c @@ -468,8 +468,22 @@ hws_bwc_matcher_size_maxed_out(struct mlx5hws_bwc_matcher *bwc_matcher) { struct mlx5hws_cmd_query_caps *caps = bwc_matcher->matcher->tbl->ctx->caps; - return bwc_matcher->size_log + MLX5HWS_MATCHER_ASSURED_MAIN_TBL_DEPTH >= - caps->ste_alloc_log_max - 1; + /* check the match RTC size */ + if ((bwc_matcher->size_log + + MLX5HWS_MATCHER_ASSURED_MAIN_TBL_DEPTH + + MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP) > + (caps->ste_alloc_log_max - 1)) + return true; + + /* check the action RTC size */ + if ((bwc_matcher->size_log + + MLX5HWS_BWC_MATCHER_SIZE_LOG_STEP + + ilog2(roundup_pow_of_two(bwc_matcher->matcher->action_ste.max_stes)) + + MLX5HWS_MATCHER_ACTION_RTC_UPDATE_MULT) > + (caps->ste_alloc_log_max - 1)) + return true; + + return false; } static bool diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c index 80157a29a0766..b61864b320536 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.c @@ -289,7 +289,8 @@ static int hws_matcher_create_rtc(struct mlx5hws_matcher *matcher, * (2 to support writing new STEs for update rule)) */ ste->order = ilog2(roundup_pow_of_two(action_ste->max_stes)) + - attr->table.sz_row_log + 1; + attr->table.sz_row_log + + MLX5HWS_MATCHER_ACTION_RTC_UPDATE_MULT; rtc_attr.log_size = ste->order; rtc_attr.log_depth = 0; rtc_attr.update_index_mode = MLX5_IFC_RTC_STE_UPDATE_MODE_BY_OFFSET; @@ -561,7 +562,8 @@ static int hws_matcher_bind_at(struct mlx5hws_matcher *matcher) pool_attr.flags = MLX5HWS_POOL_FLAGS_FOR_STE_ACTION_POOL; /* Pool size is similar to action RTC size */ pool_attr.alloc_log_sz = ilog2(roundup_pow_of_two(action_ste->max_stes)) + - matcher->attr.table.sz_row_log + 1; + matcher->attr.table.sz_row_log + + MLX5HWS_MATCHER_ACTION_RTC_UPDATE_MULT; hws_matcher_set_pool_attr(&pool_attr, matcher); action_ste->pool = mlx5hws_pool_create(ctx, &pool_attr); if (!action_ste->pool) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h index cff4ae854a795..020de70270c50 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/matcher.h @@ -18,6 +18,11 @@ /* Required depth of the main large table */ #define MLX5HWS_MATCHER_ASSURED_MAIN_TBL_DEPTH 2 +/* Action RTC size multiplier that is required in order + * to support rule update for rules with action STEs. + */ +#define MLX5HWS_MATCHER_ACTION_RTC_UPDATE_MULT 1 + enum mlx5hws_matcher_offset { MLX5HWS_MATCHER_OFFSET_TAG_DW1 = 12, MLX5HWS_MATCHER_OFFSET_TAG_DW0 = 13, From 34eea5b12a109549c5942ba18f076ca5c9c8bc46 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 14 Jan 2025 15:06:44 +0200 Subject: [PATCH 2/4] net/mlx5e: CT: Add initial support for Hardware Steering Connection tracking can offload tuple matches to the NIC either via firmware commands (when the steering mode is dmfs or offload support is disabled due to eswitch being set to legacy) or via software-managed flow steering (smfs). This commit adds stub operations for a third mode, hardware-managed flow steering. This is enabled when both CONFIG_MLX5_TC_CT and CONFIG_MLX5_HW_STEERING are enabled. Signed-off-by: Cosmin Ratiu Reviewed-by: Jianbo Liu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250114130646.1937192-3-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../net/ethernet/mellanox/mlx5/core/Makefile | 1 + .../ethernet/mellanox/mlx5/core/en/tc/ct_fs.h | 10 ++++ .../mellanox/mlx5/core/en/tc/ct_fs_hmfs.c | 47 +++++++++++++++++++ .../ethernet/mellanox/mlx5/core/en/tc_ct.c | 17 +++++-- 4 files changed, 71 insertions(+), 4 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_hmfs.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index d9a8817bb33c5..568bbe5f83f57 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -60,6 +60,7 @@ mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en/tc/act/act.o en/tc/act/drop.o en/tc/a ifneq ($(CONFIG_MLX5_TC_CT),) mlx5_core-y += en/tc_ct.o en/tc/ct_fs_dmfs.o mlx5_core-$(CONFIG_MLX5_SW_STEERING) += en/tc/ct_fs_smfs.o + mlx5_core-$(CONFIG_MLX5_HW_STEERING) += en/tc/ct_fs_hmfs.o endif mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h index 62b3f7ff55621..e5b30801314b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs.h @@ -48,4 +48,14 @@ mlx5_ct_fs_smfs_ops_get(void) } #endif /* IS_ENABLED(CONFIG_MLX5_SW_STEERING) */ +#if IS_ENABLED(CONFIG_MLX5_HW_STEERING) +struct mlx5_ct_fs_ops *mlx5_ct_fs_hmfs_ops_get(void); +#else +static inline struct mlx5_ct_fs_ops * +mlx5_ct_fs_hmfs_ops_get(void) +{ + return NULL; +} +#endif /* IS_ENABLED(CONFIG_MLX5_SW_STEERING) */ + #endif /* __MLX5_EN_TC_CT_FS_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_hmfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_hmfs.c new file mode 100644 index 0000000000000..be1a36d1d778b --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_hmfs.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. */ + +#include "en_tc.h" +#include "en/tc_ct.h" +#include "en/tc/ct_fs.h" + +static int mlx5_ct_fs_hmfs_init(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct, + struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct) +{ + return 0; +} + +static void mlx5_ct_fs_hmfs_destroy(struct mlx5_ct_fs *fs) +{ +} + +static struct mlx5_ct_fs_rule * +mlx5_ct_fs_hmfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, + struct mlx5_flow_attr *attr, struct flow_rule *flow_rule) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static void mlx5_ct_fs_hmfs_ct_rule_del(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule) +{ +} + +static int mlx5_ct_fs_hmfs_ct_rule_update(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule, + struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr) +{ + return -EOPNOTSUPP; +} + +static struct mlx5_ct_fs_ops hmfs_ops = { + .ct_rule_add = mlx5_ct_fs_hmfs_ct_rule_add, + .ct_rule_del = mlx5_ct_fs_hmfs_ct_rule_del, + .ct_rule_update = mlx5_ct_fs_hmfs_ct_rule_update, + + .init = mlx5_ct_fs_hmfs_init, + .destroy = mlx5_ct_fs_hmfs_destroy, +}; + +struct mlx5_ct_fs_ops *mlx5_ct_fs_hmfs_ops_get(void) +{ + return &hmfs_ops; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index a84ebac2f011a..fec008c540f32 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -2065,10 +2065,19 @@ mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv *ct_priv) struct mlx5_ct_fs_ops *fs_ops = mlx5_ct_fs_dmfs_ops_get(); int err; - if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB && - ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) { - ct_dbg("Using SMFS ct flow steering provider"); - fs_ops = mlx5_ct_fs_smfs_ops_get(); + if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB) { + if (ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_HMFS) { + ct_dbg("Using HMFS ct flow steering provider"); + fs_ops = mlx5_ct_fs_hmfs_ops_get(); + } else if (ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) { + ct_dbg("Using SMFS ct flow steering provider"); + fs_ops = mlx5_ct_fs_smfs_ops_get(); + } + + if (!fs_ops) { + ct_dbg("Requested flow steering mode is not enabled."); + return -EOPNOTSUPP; + } } ct_priv->fs = kzalloc(sizeof(*ct_priv->fs) + fs_ops->priv_size, GFP_KERNEL); From 554f9773fdeef302a7fa92027a8684e53adc4935 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 14 Jan 2025 15:06:45 +0200 Subject: [PATCH 3/4] net/mlx5e: CT: Make mlx5_ct_fs_smfs_ct_validate_flow_rule reusable This function checks whether a flow_rule has the right flow dissector keys and masks used for a connection tracking flow offload. It is currently used locally by the tc_ct smfs module, but is about to be used from another place, so this commit moves it to a better place, renames it to mlx5e_tc_ct_is_valid_flow_rule and drops the unused fs argument. Signed-off-by: Cosmin Ratiu Reviewed-by: Jianbo Liu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250114130646.1937192-4-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en/tc/ct_fs_smfs.c | 75 +------------------ .../ethernet/mellanox/mlx5/core/en/tc_ct.c | 71 ++++++++++++++++++ .../ethernet/mellanox/mlx5/core/en/tc_ct.h | 10 +++ 3 files changed, 82 insertions(+), 74 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c index 45737d039252f..0c97c58999049 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c @@ -13,7 +13,6 @@ #define INIT_ERR_PREFIX "ct_fs_smfs init failed" #define ct_dbg(fmt, args...)\ netdev_dbg(fs->netdev, "ct_fs_smfs debug: " fmt "\n", ##args) -#define MLX5_CT_TCP_FLAGS_MASK cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16) struct mlx5_ct_fs_smfs_matcher { struct mlx5dr_matcher *dr_matcher; @@ -220,78 +219,6 @@ mlx5_ct_fs_smfs_destroy(struct mlx5_ct_fs *fs) mlx5_smfs_action_destroy(fs_smfs->fwd_action); } -static inline bool -mlx5_tc_ct_valid_used_dissector_keys(const u64 used_keys) -{ -#define DISS_BIT(name) BIT_ULL(FLOW_DISSECTOR_KEY_ ## name) - const u64 basic_keys = DISS_BIT(BASIC) | DISS_BIT(CONTROL) | - DISS_BIT(META); - const u64 ipv4_tcp = basic_keys | DISS_BIT(IPV4_ADDRS) | - DISS_BIT(PORTS) | DISS_BIT(TCP); - const u64 ipv6_tcp = basic_keys | DISS_BIT(IPV6_ADDRS) | - DISS_BIT(PORTS) | DISS_BIT(TCP); - const u64 ipv4_udp = basic_keys | DISS_BIT(IPV4_ADDRS) | - DISS_BIT(PORTS); - const u64 ipv6_udp = basic_keys | DISS_BIT(IPV6_ADDRS) | - DISS_BIT(PORTS); - const u64 ipv4_gre = basic_keys | DISS_BIT(IPV4_ADDRS); - const u64 ipv6_gre = basic_keys | DISS_BIT(IPV6_ADDRS); - - return (used_keys == ipv4_tcp || used_keys == ipv4_udp || used_keys == ipv6_tcp || - used_keys == ipv6_udp || used_keys == ipv4_gre || used_keys == ipv6_gre); -} - -static bool -mlx5_ct_fs_smfs_ct_validate_flow_rule(struct mlx5_ct_fs *fs, struct flow_rule *flow_rule) -{ - struct flow_match_ipv4_addrs ipv4_addrs; - struct flow_match_ipv6_addrs ipv6_addrs; - struct flow_match_control control; - struct flow_match_basic basic; - struct flow_match_ports ports; - struct flow_match_tcp tcp; - - if (!mlx5_tc_ct_valid_used_dissector_keys(flow_rule->match.dissector->used_keys)) { - ct_dbg("rule uses unexpected dissectors (0x%016llx)", - flow_rule->match.dissector->used_keys); - return false; - } - - flow_rule_match_basic(flow_rule, &basic); - flow_rule_match_control(flow_rule, &control); - flow_rule_match_ipv4_addrs(flow_rule, &ipv4_addrs); - flow_rule_match_ipv6_addrs(flow_rule, &ipv6_addrs); - if (basic.key->ip_proto != IPPROTO_GRE) - flow_rule_match_ports(flow_rule, &ports); - if (basic.key->ip_proto == IPPROTO_TCP) - flow_rule_match_tcp(flow_rule, &tcp); - - if (basic.mask->n_proto != htons(0xFFFF) || - (basic.key->n_proto != htons(ETH_P_IP) && basic.key->n_proto != htons(ETH_P_IPV6)) || - basic.mask->ip_proto != 0xFF || - (basic.key->ip_proto != IPPROTO_UDP && basic.key->ip_proto != IPPROTO_TCP && - basic.key->ip_proto != IPPROTO_GRE)) { - ct_dbg("rule uses unexpected basic match (n_proto 0x%04x/0x%04x, ip_proto 0x%02x/0x%02x)", - ntohs(basic.key->n_proto), ntohs(basic.mask->n_proto), - basic.key->ip_proto, basic.mask->ip_proto); - return false; - } - - if (basic.key->ip_proto != IPPROTO_GRE && - (ports.mask->src != htons(0xFFFF) || ports.mask->dst != htons(0xFFFF))) { - ct_dbg("rule uses ports match (src 0x%04x, dst 0x%04x)", - ports.mask->src, ports.mask->dst); - return false; - } - - if (basic.key->ip_proto == IPPROTO_TCP && tcp.mask->flags != MLX5_CT_TCP_FLAGS_MASK) { - ct_dbg("rule uses unexpected tcp match (flags 0x%02x)", tcp.mask->flags); - return false; - } - - return true; -} - static struct mlx5_ct_fs_rule * mlx5_ct_fs_smfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr, struct flow_rule *flow_rule) @@ -304,7 +231,7 @@ mlx5_ct_fs_smfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, int num_actions = 0, err; bool nat, tcp, ipv4, gre; - if (!mlx5_ct_fs_smfs_ct_validate_flow_rule(fs, flow_rule)) + if (!mlx5e_tc_ct_is_valid_flow_rule(fs->netdev, flow_rule)) return ERR_PTR(-EOPNOTSUPP); smfs_rule = kzalloc(sizeof(*smfs_rule), GFP_KERNEL); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index fec008c540f32..a065e8fafb1d1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -2430,3 +2430,74 @@ mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, atomic_inc(&ct_priv->debugfs.stats.rx_dropped); return false; } + +static bool mlx5e_tc_ct_valid_used_dissector_keys(const u64 used_keys) +{ +#define DISS_BIT(name) BIT_ULL(FLOW_DISSECTOR_KEY_ ## name) + const u64 basic_keys = DISS_BIT(BASIC) | DISS_BIT(CONTROL) | + DISS_BIT(META); + const u64 ipv4_tcp = basic_keys | DISS_BIT(IPV4_ADDRS) | + DISS_BIT(PORTS) | DISS_BIT(TCP); + const u64 ipv6_tcp = basic_keys | DISS_BIT(IPV6_ADDRS) | + DISS_BIT(PORTS) | DISS_BIT(TCP); + const u64 ipv4_udp = basic_keys | DISS_BIT(IPV4_ADDRS) | + DISS_BIT(PORTS); + const u64 ipv6_udp = basic_keys | DISS_BIT(IPV6_ADDRS) | + DISS_BIT(PORTS); + const u64 ipv4_gre = basic_keys | DISS_BIT(IPV4_ADDRS); + const u64 ipv6_gre = basic_keys | DISS_BIT(IPV6_ADDRS); + + return (used_keys == ipv4_tcp || used_keys == ipv4_udp || used_keys == ipv6_tcp || + used_keys == ipv6_udp || used_keys == ipv4_gre || used_keys == ipv6_gre); +} + +bool mlx5e_tc_ct_is_valid_flow_rule(const struct net_device *dev, struct flow_rule *flow_rule) +{ + struct flow_match_ipv4_addrs ipv4_addrs; + struct flow_match_ipv6_addrs ipv6_addrs; + struct flow_match_control control; + struct flow_match_basic basic; + struct flow_match_ports ports; + struct flow_match_tcp tcp; + + if (!mlx5e_tc_ct_valid_used_dissector_keys(flow_rule->match.dissector->used_keys)) { + netdev_dbg(dev, "ct_debug: rule uses unexpected dissectors (0x%016llx)", + flow_rule->match.dissector->used_keys); + return false; + } + + flow_rule_match_basic(flow_rule, &basic); + flow_rule_match_control(flow_rule, &control); + flow_rule_match_ipv4_addrs(flow_rule, &ipv4_addrs); + flow_rule_match_ipv6_addrs(flow_rule, &ipv6_addrs); + if (basic.key->ip_proto != IPPROTO_GRE) + flow_rule_match_ports(flow_rule, &ports); + if (basic.key->ip_proto == IPPROTO_TCP) + flow_rule_match_tcp(flow_rule, &tcp); + + if (basic.mask->n_proto != htons(0xFFFF) || + (basic.key->n_proto != htons(ETH_P_IP) && basic.key->n_proto != htons(ETH_P_IPV6)) || + basic.mask->ip_proto != 0xFF || + (basic.key->ip_proto != IPPROTO_UDP && basic.key->ip_proto != IPPROTO_TCP && + basic.key->ip_proto != IPPROTO_GRE)) { + netdev_dbg(dev, "ct_debug: rule uses unexpected basic match (n_proto 0x%04x/0x%04x, ip_proto 0x%02x/0x%02x)", + ntohs(basic.key->n_proto), ntohs(basic.mask->n_proto), + basic.key->ip_proto, basic.mask->ip_proto); + return false; + } + + if (basic.key->ip_proto != IPPROTO_GRE && + (ports.mask->src != htons(0xFFFF) || ports.mask->dst != htons(0xFFFF))) { + netdev_dbg(dev, "ct_debug: rule uses ports match (src 0x%04x, dst 0x%04x)", + ports.mask->src, ports.mask->dst); + return false; + } + + if (basic.key->ip_proto == IPPROTO_TCP && tcp.mask->flags != MLX5_CT_TCP_FLAGS_MASK) { + netdev_dbg(dev, "ct_debug: rule uses unexpected tcp match (flags 0x%02x)", + tcp.mask->flags); + return false; + } + + return true; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index b66c5f98067f7..5e9dbdd4a5e91 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -128,6 +128,9 @@ bool mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, struct sk_buff *skb, u8 zone_restore_id); +#define MLX5_CT_TCP_FLAGS_MASK cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16) +bool mlx5e_tc_ct_is_valid_flow_rule(const struct net_device *dev, struct flow_rule *flow_rule); + #else /* CONFIG_MLX5_TC_CT */ static inline struct mlx5_tc_ct_priv * @@ -202,5 +205,12 @@ mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, return false; } +static inline bool +mlx5e_tc_ct_is_valid_flow_rule(const struct net_device *dev, + struct flow_rule *flow_rule) +{ + return false; +} + #endif /* !IS_ENABLED(CONFIG_MLX5_TC_CT) */ #endif /* __MLX5_EN_TC_CT_H__ */ From 066d49c199a66f07232fdcbe5b5f5c607e804327 Mon Sep 17 00:00:00 2001 From: Cosmin Ratiu Date: Tue, 14 Jan 2025 15:06:46 +0200 Subject: [PATCH 4/4] net/mlx5e: CT: Offload connections with hardware steering rules This is modeled similar to how software steering works: - a reference-counted matcher is maintained for each combination of nat/no_nat x ipv4/ipv6 x tcp/udp/gre. - adding a rule involves finding+referencing or creating a corresponding matcher, then actually adding a rule. - updating rules is implemented using the bwc_rule update API, which can change a rule's actions without touching the match value. By using a T-Rex traffic generator to initiate multi-million UDP flows per second, a kernel running with these patches on the RX side was able to offload ~600K flows per second, which is about ~7x larger than what software steering could do on the same hardware (256-thread AMD EPYC, 512 GB RAM, ConnectX-7 b2b). Signed-off-by: Cosmin Ratiu Reviewed-by: Jianbo Liu Signed-off-by: Tariq Toukan Link: https://patch.msgid.link/20250114130646.1937192-5-tariqt@nvidia.com Signed-off-by: Jakub Kicinski --- .../mellanox/mlx5/core/en/tc/ct_fs_hmfs.c | 249 +++++++++++++++++- 1 file changed, 247 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_hmfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_hmfs.c index be1a36d1d778b..a4263137fef5a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_hmfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_hmfs.c @@ -3,33 +3,276 @@ #include "en_tc.h" #include "en/tc_ct.h" +#include "en/tc_priv.h" #include "en/tc/ct_fs.h" +#include "fs_core.h" +#include "steering/hws/fs_hws_pools.h" +#include "steering/hws/mlx5hws.h" +#include "steering/hws/table.h" + +struct mlx5_ct_fs_hmfs_matcher { + struct mlx5hws_bwc_matcher *hws_bwc_matcher; + refcount_t ref; +}; + +/* We need {ipv4, ipv6} x {tcp, udp, gre} matchers. */ +#define NUM_MATCHERS (2 * 3) + +struct mlx5_ct_fs_hmfs { + struct mlx5hws_table *ct_tbl; + struct mlx5hws_table *ct_nat_tbl; + struct mlx5_flow_table *ct_nat; + struct mlx5hws_action *fwd_action; + struct mlx5hws_action *last_action; + struct mlx5hws_context *ctx; + struct mutex lock; /* Guards matchers */ + struct mlx5_ct_fs_hmfs_matcher matchers[NUM_MATCHERS]; + struct mlx5_ct_fs_hmfs_matcher matchers_nat[NUM_MATCHERS]; +}; + +struct mlx5_ct_fs_hmfs_rule { + struct mlx5_ct_fs_rule fs_rule; + struct mlx5hws_bwc_rule *hws_bwc_rule; + struct mlx5_ct_fs_hmfs_matcher *hmfs_matcher; + struct mlx5_fc *counter; +}; + +static u32 get_matcher_idx(bool ipv4, bool tcp, bool gre) +{ + return ipv4 * 3 + tcp * 2 + gre; +} static int mlx5_ct_fs_hmfs_init(struct mlx5_ct_fs *fs, struct mlx5_flow_table *ct, struct mlx5_flow_table *ct_nat, struct mlx5_flow_table *post_ct) { + u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED; + struct mlx5hws_table *ct_tbl, *ct_nat_tbl, *post_ct_tbl; + struct mlx5_ct_fs_hmfs *fs_hmfs = mlx5_ct_fs_priv(fs); + + ct_tbl = ct->fs_hws_table.hws_table; + ct_nat_tbl = ct_nat->fs_hws_table.hws_table; + post_ct_tbl = post_ct->fs_hws_table.hws_table; + fs_hmfs->ct_nat = ct_nat; + + if (!ct_tbl || !ct_nat_tbl || !post_ct_tbl) { + netdev_warn(fs->netdev, "ct_fs_hmfs: failed to init, missing backing hws tables"); + return -EOPNOTSUPP; + } + + netdev_dbg(fs->netdev, "using hmfs steering"); + + fs_hmfs->ct_tbl = ct_tbl; + fs_hmfs->ct_nat_tbl = ct_nat_tbl; + fs_hmfs->ctx = ct_tbl->ctx; + mutex_init(&fs_hmfs->lock); + + fs_hmfs->fwd_action = mlx5hws_action_create_dest_table(ct_tbl->ctx, post_ct_tbl, flags); + if (!fs_hmfs->fwd_action) { + netdev_warn(fs->netdev, "ct_fs_hmfs: failed to create fwd action\n"); + return -EINVAL; + } + fs_hmfs->last_action = mlx5hws_action_create_last(ct_tbl->ctx, flags); + if (!fs_hmfs->last_action) { + netdev_warn(fs->netdev, "ct_fs_hmfs: failed to create last action\n"); + mlx5hws_action_destroy(fs_hmfs->fwd_action); + return -EINVAL; + } + return 0; } static void mlx5_ct_fs_hmfs_destroy(struct mlx5_ct_fs *fs) { + struct mlx5_ct_fs_hmfs *fs_hmfs = mlx5_ct_fs_priv(fs); + + mlx5hws_action_destroy(fs_hmfs->last_action); + mlx5hws_action_destroy(fs_hmfs->fwd_action); +} + +static struct mlx5hws_bwc_matcher * +mlx5_ct_fs_hmfs_matcher_create(struct mlx5_ct_fs *fs, struct mlx5hws_table *tbl, + struct mlx5_flow_spec *spec, bool ipv4, bool tcp, bool gre) +{ + u8 match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2 | MLX5_MATCH_OUTER_HEADERS; + struct mlx5hws_match_parameters mask = { + .match_buf = spec->match_criteria, + .match_sz = sizeof(spec->match_criteria), + }; + u32 priority = get_matcher_idx(ipv4, tcp, gre); /* Static priority based on params. */ + struct mlx5hws_bwc_matcher *hws_bwc_matcher; + + hws_bwc_matcher = mlx5hws_bwc_matcher_create(tbl, priority, match_criteria_enable, &mask); + if (!hws_bwc_matcher) + return ERR_PTR(-EINVAL); + + return hws_bwc_matcher; +} + +static struct mlx5_ct_fs_hmfs_matcher * +mlx5_ct_fs_hmfs_matcher_get(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, + bool nat, bool ipv4, bool tcp, bool gre) +{ + struct mlx5_ct_fs_hmfs *fs_hmfs = mlx5_ct_fs_priv(fs); + u32 matcher_idx = get_matcher_idx(ipv4, tcp, gre); + struct mlx5_ct_fs_hmfs_matcher *hmfs_matcher; + struct mlx5hws_bwc_matcher *hws_bwc_matcher; + struct mlx5hws_table *tbl; + + hmfs_matcher = nat ? + (fs_hmfs->matchers_nat + matcher_idx) : + (fs_hmfs->matchers + matcher_idx); + + if (refcount_inc_not_zero(&hmfs_matcher->ref)) + return hmfs_matcher; + + mutex_lock(&fs_hmfs->lock); + + /* Retry with lock, as the matcher might be already created by another cpu. */ + if (refcount_inc_not_zero(&hmfs_matcher->ref)) + goto out_unlock; + + tbl = nat ? fs_hmfs->ct_nat_tbl : fs_hmfs->ct_tbl; + + hws_bwc_matcher = mlx5_ct_fs_hmfs_matcher_create(fs, tbl, spec, ipv4, tcp, gre); + if (IS_ERR(hws_bwc_matcher)) { + netdev_warn(fs->netdev, + "ct_fs_hmfs: failed to create bwc matcher (nat %d, ipv4 %d, tcp %d, gre %d), err: %ld\n", + nat, ipv4, tcp, gre, PTR_ERR(hws_bwc_matcher)); + + hmfs_matcher = ERR_CAST(hws_bwc_matcher); + goto out_unlock; + } + + hmfs_matcher->hws_bwc_matcher = hws_bwc_matcher; + refcount_set(&hmfs_matcher->ref, 1); + +out_unlock: + mutex_unlock(&fs_hmfs->lock); + return hmfs_matcher; +} + +static void +mlx5_ct_fs_hmfs_matcher_put(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_hmfs_matcher *hmfs_matcher) +{ + struct mlx5_ct_fs_hmfs *fs_hmfs = mlx5_ct_fs_priv(fs); + + if (!refcount_dec_and_mutex_lock(&hmfs_matcher->ref, &fs_hmfs->lock)) + return; + + mlx5hws_bwc_matcher_destroy(hmfs_matcher->hws_bwc_matcher); + mutex_unlock(&fs_hmfs->lock); +} + +#define NUM_CT_HMFS_RULES 4 + +static void mlx5_ct_fs_hmfs_fill_rule_actions(struct mlx5_ct_fs_hmfs *fs_hmfs, + struct mlx5_flow_attr *attr, + struct mlx5hws_rule_action *rule_actions) +{ + struct mlx5_fs_hws_action *mh_action = &attr->modify_hdr->fs_hws_action; + + memset(rule_actions, 0, NUM_CT_HMFS_RULES * sizeof(*rule_actions)); + rule_actions[0].action = mlx5_fc_get_hws_action(fs_hmfs->ctx, attr->counter); + /* Modify header is special, it may require extra arguments outside the action itself. */ + if (mh_action->mh_data) { + rule_actions[1].modify_header.offset = mh_action->mh_data->offset; + rule_actions[1].modify_header.data = mh_action->mh_data->data; + } + rule_actions[1].action = mh_action->hws_action; + rule_actions[2].action = fs_hmfs->fwd_action; + rule_actions[3].action = fs_hmfs->last_action; } static struct mlx5_ct_fs_rule * mlx5_ct_fs_hmfs_ct_rule_add(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr, struct flow_rule *flow_rule) { - return ERR_PTR(-EOPNOTSUPP); + struct mlx5hws_rule_action rule_actions[NUM_CT_HMFS_RULES]; + struct mlx5_ct_fs_hmfs *fs_hmfs = mlx5_ct_fs_priv(fs); + struct mlx5hws_match_parameters match_params = { + .match_buf = spec->match_value, + .match_sz = ARRAY_SIZE(spec->match_value), + }; + struct mlx5_ct_fs_hmfs_matcher *hmfs_matcher; + struct mlx5_ct_fs_hmfs_rule *hmfs_rule; + bool nat, tcp, ipv4, gre; + int err; + + if (!mlx5e_tc_ct_is_valid_flow_rule(fs->netdev, flow_rule)) + return ERR_PTR(-EOPNOTSUPP); + + hmfs_rule = kzalloc(sizeof(*hmfs_rule), GFP_KERNEL); + if (!hmfs_rule) + return ERR_PTR(-ENOMEM); + + nat = (attr->ft == fs_hmfs->ct_nat); + ipv4 = mlx5e_tc_get_ip_version(spec, true) == 4; + tcp = MLX5_GET(fte_match_param, spec->match_value, + outer_headers.ip_protocol) == IPPROTO_TCP; + gre = MLX5_GET(fte_match_param, spec->match_value, + outer_headers.ip_protocol) == IPPROTO_GRE; + + hmfs_matcher = mlx5_ct_fs_hmfs_matcher_get(fs, spec, nat, ipv4, tcp, gre); + if (IS_ERR(hmfs_matcher)) { + err = PTR_ERR(hmfs_matcher); + goto err_free_rule; + } + hmfs_rule->hmfs_matcher = hmfs_matcher; + + mlx5_ct_fs_hmfs_fill_rule_actions(fs_hmfs, attr, rule_actions); + hmfs_rule->counter = attr->counter; + + hmfs_rule->hws_bwc_rule = + mlx5hws_bwc_rule_create(hmfs_matcher->hws_bwc_matcher, &match_params, + spec->flow_context.flow_source, rule_actions); + if (!hmfs_rule->hws_bwc_rule) { + err = -EINVAL; + goto err_put_matcher; + } + + return &hmfs_rule->fs_rule; + +err_put_matcher: + mlx5_fc_put_hws_action(hmfs_rule->counter); + mlx5_ct_fs_hmfs_matcher_put(fs, hmfs_matcher); +err_free_rule: + kfree(hmfs_rule); + return ERR_PTR(err); } static void mlx5_ct_fs_hmfs_ct_rule_del(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule) { + struct mlx5_ct_fs_hmfs_rule *hmfs_rule = container_of(fs_rule, + struct mlx5_ct_fs_hmfs_rule, + fs_rule); + mlx5hws_bwc_rule_destroy(hmfs_rule->hws_bwc_rule); + mlx5_fc_put_hws_action(hmfs_rule->counter); + mlx5_ct_fs_hmfs_matcher_put(fs, hmfs_rule->hmfs_matcher); + kfree(hmfs_rule); } static int mlx5_ct_fs_hmfs_ct_rule_update(struct mlx5_ct_fs *fs, struct mlx5_ct_fs_rule *fs_rule, struct mlx5_flow_spec *spec, struct mlx5_flow_attr *attr) { - return -EOPNOTSUPP; + struct mlx5_ct_fs_hmfs_rule *hmfs_rule = container_of(fs_rule, + struct mlx5_ct_fs_hmfs_rule, + fs_rule); + struct mlx5hws_rule_action rule_actions[NUM_CT_HMFS_RULES]; + struct mlx5_ct_fs_hmfs *fs_hmfs = mlx5_ct_fs_priv(fs); + int err; + + mlx5_ct_fs_hmfs_fill_rule_actions(fs_hmfs, attr, rule_actions); + + err = mlx5hws_bwc_rule_action_update(hmfs_rule->hws_bwc_rule, rule_actions); + if (err) { + mlx5_fc_put_hws_action(attr->counter); + return err; + } + + mlx5_fc_put_hws_action(hmfs_rule->counter); + hmfs_rule->counter = attr->counter; + + return 0; } static struct mlx5_ct_fs_ops hmfs_ops = { @@ -39,6 +282,8 @@ static struct mlx5_ct_fs_ops hmfs_ops = { .init = mlx5_ct_fs_hmfs_init, .destroy = mlx5_ct_fs_hmfs_destroy, + + .priv_size = sizeof(struct mlx5_ct_fs_hmfs), }; struct mlx5_ct_fs_ops *mlx5_ct_fs_hmfs_ops_get(void)