From 197258f0ef685ddbd534254dc79f49faa47dc93d Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 14 Jan 2025 14:28:43 +0000 Subject: [PATCH 01/10] net: ethtool: add hds_config member in ethtool_netdev_state When tcp-data-split is UNKNOWN mode, drivers arbitrarily handle it. For example, bnxt_en driver automatically enables if at least one of LRO/GRO/JUMBO is enabled. If tcp-data-split is UNKNOWN and LRO is enabled, a driver returns ENABLES of tcp-data-split, not UNKNOWN. So, `ethtool -g eth0` shows tcp-data-split is enabled. The problem is in the setting situation. In the ethnl_set_rings(), it first calls get_ringparam() to get the current driver's config. At that moment, if driver's tcp-data-split config is UNKNOWN, it returns ENABLE if LRO/GRO/JUMBO is enabled. Then, it sets values from the user and driver's current config to kernel_ethtool_ringparam. Last it calls .set_ringparam(). The driver, especially bnxt_en driver receives ETHTOOL_TCP_DATA_SPLIT_ENABLED. But it can't distinguish whether it is set by the user or just the current config. When user updates ring parameter, the new hds_config value is updated and current hds_config value is stored to old_hdsconfig. Driver's .set_ringparam() callback can distinguish a passed tcp-data-split value is came from user explicitly. If .set_ringparam() is failed, hds_config is rollbacked immediately. Suggested-by: Jakub Kicinski Reviewed-by: Jakub Kicinski Signed-off-by: Taehee Yoo Link: https://patch.msgid.link/20250114142852.3364986-2-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- include/linux/ethtool.h | 2 ++ include/linux/netdevice.h | 1 + net/core/dev.c | 12 ++++++++++++ net/ethtool/rings.c | 12 ++++++++++++ 4 files changed, 27 insertions(+) diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 20a86bd5f4e34..d79bd201c1c89 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1157,12 +1157,14 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, * @rss_ctx: XArray of custom RSS contexts * @rss_lock: Protects entries in @rss_ctx. May be taken from * within RTNL. + * @hds_config: HDS value from userspace. * @wol_enabled: Wake-on-LAN is enabled * @module_fw_flash_in_progress: Module firmware flashing is in progress. */ struct ethtool_netdev_state { struct xarray rss_ctx; struct mutex rss_lock; + u8 hds_config; unsigned wol_enabled:1; unsigned module_fw_flash_in_progress:1; }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bced03fb349e5..3e6336775baf0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4082,6 +4082,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); u8 dev_xdp_prog_count(struct net_device *dev); int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf); +u8 dev_xdp_sb_prog_count(struct net_device *dev); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); u32 dev_get_min_mp_channel_count(const struct net_device *dev); diff --git a/net/core/dev.c b/net/core/dev.c index fda4e1039bf01..5ef817d656ef6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -9550,6 +9550,18 @@ u8 dev_xdp_prog_count(struct net_device *dev) } EXPORT_SYMBOL_GPL(dev_xdp_prog_count); +u8 dev_xdp_sb_prog_count(struct net_device *dev) +{ + u8 count = 0; + int i; + + for (i = 0; i < __MAX_XDP_MODE; i++) + if (dev->xdp_state[i].prog && + !dev->xdp_state[i].prog->aux->xdp_has_frags) + count++; + return count; +} + int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf) { if (!dev->netdev_ops->ndo_bpf) diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index b7865a14fdf8f..b2a2586b241f6 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -203,6 +203,7 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) dev->ethtool_ops->get_ringparam(dev, &ringparam, &kernel_ringparam, info->extack); + kernel_ringparam.tcp_data_split = dev->ethtool->hds_config; ethnl_update_u32(&ringparam.rx_pending, tb[ETHTOOL_A_RINGS_RX], &mod); ethnl_update_u32(&ringparam.rx_mini_pending, @@ -225,6 +226,14 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) if (!mod) return 0; + if (kernel_ringparam.tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + dev_xdp_sb_prog_count(dev)) { + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_RINGS_TCP_DATA_SPLIT], + "tcp-data-split can not be enabled with single buffer XDP"); + return -EINVAL; + } + /* ensure new ring parameters are within limits */ if (ringparam.rx_pending > ringparam.rx_max_pending) err_attr = tb[ETHTOOL_A_RINGS_RX]; @@ -252,6 +261,9 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) ret = dev->ethtool_ops->set_ringparam(dev, &ringparam, &kernel_ringparam, info->extack); + if (!ret) + dev->ethtool->hds_config = kernel_ringparam.tcp_data_split; + return ret < 0 ? ret : 1; } From eec8359f0797ef87c6ef6cbed6de08b02073b833 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 14 Jan 2025 14:28:44 +0000 Subject: [PATCH 02/10] net: ethtool: add support for configuring hds-thresh The hds-thresh option configures the threshold value of the header-data-split. If a received packet size is larger than this threshold value, a packet will be split into header and payload. The header indicates TCP and UDP header, but it depends on driver spec. The bnxt_en driver supports HDS(Header-Data-Split) configuration at FW level, affecting TCP and UDP too. So, If hds-thresh is set, it affects UDP and TCP packets. Example: # ethtool -G hds-thresh # ethtool -G enp14s0f0np0 tcp-data-split on hds-thresh 256 # ethtool -g enp14s0f0np0 Ring parameters for enp14s0f0np0: Pre-set maximums: ... HDS thresh: 1023 Current hardware settings: ... TCP data split: on HDS thresh: 256 The default/min/max values are not defined in the ethtool so the drivers should define themself. The 0 value means that all TCP/UDP packets' header and payload will be split. Tested-by: Stanislav Fomichev Signed-off-by: Taehee Yoo Link: https://patch.msgid.link/20250114142852.3364986-3-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ethtool.yaml | 8 ++++++ Documentation/networking/ethtool-netlink.rst | 10 +++++++ include/linux/ethtool.h | 9 ++++++ .../uapi/linux/ethtool_netlink_generated.h | 2 ++ net/ethtool/netlink.h | 2 +- net/ethtool/rings.c | 28 +++++++++++++++++-- 6 files changed, 55 insertions(+), 4 deletions(-) diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 60f85fbf41561..66be040130481 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -332,6 +332,12 @@ attribute-sets: - name: tx-push-buf-len-max type: u32 + - + name: hds-thresh + type: u32 + - + name: hds-thresh-max + type: u32 - name: mm-stat @@ -1777,6 +1783,8 @@ operations: - rx-push - tx-push-buf-len - tx-push-buf-len-max + - hds-thresh + - hds-thresh-max dump: *ring-get-op - name: rings-set diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index da846f1d998ec..f70c0249860cd 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -899,6 +899,10 @@ Kernel response contents: ``ETHTOOL_A_RINGS_RX_PUSH`` u8 flag of RX Push mode ``ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN`` u32 size of TX push buffer ``ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX`` u32 max size of TX push buffer + ``ETHTOOL_A_RINGS_HDS_THRESH`` u32 threshold of + header / data split + ``ETHTOOL_A_RINGS_HDS_THRESH_MAX`` u32 max threshold of + header / data split ======================================= ====== =========================== ``ETHTOOL_A_RINGS_TCP_DATA_SPLIT`` indicates whether the device is usable with @@ -941,10 +945,12 @@ Request contents: ``ETHTOOL_A_RINGS_RX_JUMBO`` u32 size of RX jumbo ring ``ETHTOOL_A_RINGS_TX`` u32 size of TX ring ``ETHTOOL_A_RINGS_RX_BUF_LEN`` u32 size of buffers on the ring + ``ETHTOOL_A_RINGS_TCP_DATA_SPLIT`` u8 TCP header / data split ``ETHTOOL_A_RINGS_CQE_SIZE`` u32 Size of TX/RX CQE ``ETHTOOL_A_RINGS_TX_PUSH`` u8 flag of TX Push mode ``ETHTOOL_A_RINGS_RX_PUSH`` u8 flag of RX Push mode ``ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN`` u32 size of TX push buffer + ``ETHTOOL_A_RINGS_HDS_THRESH`` u32 threshold of header / data split ==================================== ====== =========================== Kernel checks that requested ring sizes do not exceed limits reported by @@ -961,6 +967,10 @@ A bigger CQE can have more receive buffer pointers, and in turn the NIC can transfer a bigger frame from wire. Based on the NIC hardware, the overall completion queue size can be adjusted in the driver if CQE size is modified. +``ETHTOOL_A_RINGS_HDS_THRESH`` specifies the threshold value of +header / data split feature. If a received packet size is larger than this +threshold value, header and data will be split. + CHANNELS_GET ============ diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index d79bd201c1c89..e4136b0df8923 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -78,6 +78,9 @@ enum { * @cqe_size: Size of TX/RX completion queue event * @tx_push_buf_len: Size of TX push buffer * @tx_push_buf_max_len: Maximum allowed size of TX push buffer + * @hds_thresh: Packet size threshold for header data split (HDS) + * @hds_thresh_max: Maximum supported setting for @hds_threshold + * */ struct kernel_ethtool_ringparam { u32 rx_buf_len; @@ -87,6 +90,8 @@ struct kernel_ethtool_ringparam { u32 cqe_size; u32 tx_push_buf_len; u32 tx_push_buf_max_len; + u32 hds_thresh; + u32 hds_thresh_max; }; /** @@ -97,6 +102,7 @@ struct kernel_ethtool_ringparam { * @ETHTOOL_RING_USE_RX_PUSH: capture for setting rx_push * @ETHTOOL_RING_USE_TX_PUSH_BUF_LEN: capture for setting tx_push_buf_len * @ETHTOOL_RING_USE_TCP_DATA_SPLIT: capture for setting tcp_data_split + * @ETHTOOL_RING_USE_HDS_THRS: capture for setting header-data-split-thresh */ enum ethtool_supported_ring_param { ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0), @@ -105,6 +111,7 @@ enum ethtool_supported_ring_param { ETHTOOL_RING_USE_RX_PUSH = BIT(3), ETHTOOL_RING_USE_TX_PUSH_BUF_LEN = BIT(4), ETHTOOL_RING_USE_TCP_DATA_SPLIT = BIT(5), + ETHTOOL_RING_USE_HDS_THRS = BIT(6), }; #define __ETH_RSS_HASH_BIT(bit) ((u32)1 << (bit)) @@ -1157,6 +1164,7 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, * @rss_ctx: XArray of custom RSS contexts * @rss_lock: Protects entries in @rss_ctx. May be taken from * within RTNL. + * @hds_thresh: HDS Threshold value. * @hds_config: HDS value from userspace. * @wol_enabled: Wake-on-LAN is enabled * @module_fw_flash_in_progress: Module firmware flashing is in progress. @@ -1164,6 +1172,7 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, struct ethtool_netdev_state { struct xarray rss_ctx; struct mutex rss_lock; + u32 hds_thresh; u8 hds_config; unsigned wol_enabled:1; unsigned module_fw_flash_in_progress:1; diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 43993a2d68e56..2e17ff348f89e 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -155,6 +155,8 @@ enum { ETHTOOL_A_RINGS_RX_PUSH, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, + ETHTOOL_A_RINGS_HDS_THRESH, + ETHTOOL_A_RINGS_HDS_THRESH_MAX, __ETHTOOL_A_RINGS_CNT, ETHTOOL_A_RINGS_MAX = (__ETHTOOL_A_RINGS_CNT - 1) diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 1ce0a3de14304..ff69ca0715dea 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -456,7 +456,7 @@ extern const struct nla_policy ethnl_features_set_policy[ETHTOOL_A_FEATURES_WANT extern const struct nla_policy ethnl_privflags_get_policy[ETHTOOL_A_PRIVFLAGS_HEADER + 1]; extern const struct nla_policy ethnl_privflags_set_policy[ETHTOOL_A_PRIVFLAGS_FLAGS + 1]; extern const struct nla_policy ethnl_rings_get_policy[ETHTOOL_A_RINGS_HEADER + 1]; -extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX + 1]; +extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_HDS_THRESH_MAX + 1]; extern const struct nla_policy ethnl_channels_get_policy[ETHTOOL_A_CHANNELS_HEADER + 1]; extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMBINED_COUNT + 1]; extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1]; diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index b2a2586b241f6..a381913a19f0b 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -61,7 +61,9 @@ static int rings_reply_size(const struct ethnl_req_info *req_base, nla_total_size(sizeof(u8)) + /* _RINGS_TX_PUSH */ nla_total_size(sizeof(u8))) + /* _RINGS_RX_PUSH */ nla_total_size(sizeof(u32)) + /* _RINGS_TX_PUSH_BUF_LEN */ - nla_total_size(sizeof(u32)); /* _RINGS_TX_PUSH_BUF_LEN_MAX */ + nla_total_size(sizeof(u32)) + /* _RINGS_TX_PUSH_BUF_LEN_MAX */ + nla_total_size(sizeof(u32)) + /* _RINGS_HDS_THRESH */ + nla_total_size(sizeof(u32)); /* _RINGS_HDS_THRESH_MAX*/ } static int rings_fill_reply(struct sk_buff *skb, @@ -108,7 +110,12 @@ static int rings_fill_reply(struct sk_buff *skb, (nla_put_u32(skb, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, kr->tx_push_buf_max_len) || nla_put_u32(skb, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, - kr->tx_push_buf_len)))) + kr->tx_push_buf_len))) || + ((supported_ring_params & ETHTOOL_RING_USE_HDS_THRS) && + (nla_put_u32(skb, ETHTOOL_A_RINGS_HDS_THRESH, + kr->hds_thresh) || + nla_put_u32(skb, ETHTOOL_A_RINGS_HDS_THRESH_MAX, + kr->hds_thresh_max)))) return -EMSGSIZE; return 0; @@ -130,6 +137,7 @@ const struct nla_policy ethnl_rings_set_policy[] = { [ETHTOOL_A_RINGS_TX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1), [ETHTOOL_A_RINGS_RX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1), [ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN] = { .type = NLA_U32 }, + [ETHTOOL_A_RINGS_HDS_THRESH] = { .type = NLA_U32 }, }; static int @@ -155,6 +163,14 @@ ethnl_set_rings_validate(struct ethnl_req_info *req_info, return -EOPNOTSUPP; } + if (tb[ETHTOOL_A_RINGS_HDS_THRESH] && + !(ops->supported_ring_params & ETHTOOL_RING_USE_HDS_THRS)) { + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_RINGS_HDS_THRESH], + "setting hds-thresh is not supported"); + return -EOPNOTSUPP; + } + if (tb[ETHTOOL_A_RINGS_CQE_SIZE] && !(ops->supported_ring_params & ETHTOOL_RING_USE_CQE_SIZE)) { NL_SET_ERR_MSG_ATTR(info->extack, @@ -223,6 +239,8 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) tb[ETHTOOL_A_RINGS_RX_PUSH], &mod); ethnl_update_u32(&kernel_ringparam.tx_push_buf_len, tb[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN], &mod); + ethnl_update_u32(&kernel_ringparam.hds_thresh, + tb[ETHTOOL_A_RINGS_HDS_THRESH], &mod); if (!mod) return 0; @@ -243,6 +261,8 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) err_attr = tb[ETHTOOL_A_RINGS_RX_JUMBO]; else if (ringparam.tx_pending > ringparam.tx_max_pending) err_attr = tb[ETHTOOL_A_RINGS_TX]; + else if (kernel_ringparam.hds_thresh > kernel_ringparam.hds_thresh_max) + err_attr = tb[ETHTOOL_A_RINGS_HDS_THRESH]; else err_attr = NULL; if (err_attr) { @@ -261,8 +281,10 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) ret = dev->ethtool_ops->set_ringparam(dev, &ringparam, &kernel_ringparam, info->extack); - if (!ret) + if (!ret) { dev->ethtool->hds_config = kernel_ringparam.tcp_data_split; + dev->ethtool->hds_thresh = kernel_ringparam.hds_thresh; + } return ret < 0 ? ret : 1; } From a08a5c9484015a88c937aa0f9eaf3efb2123c3b8 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 14 Jan 2025 14:28:45 +0000 Subject: [PATCH 03/10] net: devmem: add ring parameter filtering If driver doesn't support ring parameter or tcp-data-split configuration is not sufficient, the devmem should not be set up. Before setup the devmem, tcp-data-split should be ON and hds-thresh value should be 0. Tested-by: Stanislav Fomichev Reviewed-by: Mina Almasry Signed-off-by: Taehee Yoo Link: https://patch.msgid.link/20250114142852.3364986-4-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- net/core/devmem.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/core/devmem.c b/net/core/devmem.c index 0b6ed7525b22a..c971b8aceac86 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -140,6 +141,16 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, return -ERANGE; } + if (dev->ethtool->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { + NL_SET_ERR_MSG(extack, "tcp-data-split is disabled"); + return -EINVAL; + } + + if (dev->ethtool->hds_thresh) { + NL_SET_ERR_MSG(extack, "hds-thresh is not zero"); + return -EINVAL; + } + rxq = __netif_get_rx_queue(dev, rxq_idx); if (rxq->mp_params.mp_priv) { NL_SET_ERR_MSG(extack, "designated queue already memory provider bound"); From e61779015c4a4655b31e2e1fc47a7210be9f53f3 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 14 Jan 2025 14:28:46 +0000 Subject: [PATCH 04/10] net: ethtool: add ring parameter filtering While the devmem is running, the tcp-data-split and hds-thresh configuration should not be changed. If user tries to change tcp-data-split and threshold value while the devmem is running, it fails and shows extack message. Reviewed-by: Jakub Kicinski Tested-by: Stanislav Fomichev Reviewed-by: Mina Almasry Signed-off-by: Taehee Yoo Link: https://patch.msgid.link/20250114142852.3364986-5-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- net/ethtool/rings.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index a381913a19f0b..d8cd4e4d77622 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -252,6 +252,19 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) return -EINVAL; } + if (dev_get_min_mp_channel_count(dev)) { + if (kernel_ringparam.tcp_data_split != + ETHTOOL_TCP_DATA_SPLIT_ENABLED) { + NL_SET_ERR_MSG(info->extack, + "can't disable tcp-data-split while device has memory provider enabled"); + return -EINVAL; + } else if (kernel_ringparam.hds_thresh) { + NL_SET_ERR_MSG(info->extack, + "can't set non-zero hds_thresh while device is memory provider enabled"); + return -EINVAL; + } + } + /* ensure new ring parameters are within limits */ if (ringparam.rx_pending > ringparam.rx_max_pending) err_attr = tb[ETHTOOL_A_RINGS_RX]; From 2d46e481a9afc8e6b214f5c78b05374f05b8f62a Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 14 Jan 2025 14:28:47 +0000 Subject: [PATCH 05/10] net: disallow setup single buffer XDP when tcp-data-split is enabled. When a single buffer XDP is attached, NIC should guarantee only single page packets will be received. tcp-data-split feature splits packets into header and payload. single buffer XDP can't handle it properly. So attaching single buffer XDP should be disallowed when tcp-data-split is enabled. Acked-by: Jakub Kicinski Signed-off-by: Taehee Yoo Link: https://patch.msgid.link/20250114142852.3364986-6-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- net/core/dev.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/net/core/dev.c b/net/core/dev.c index 5ef817d656ef6..47e6b0f73cfc7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -92,6 +92,7 @@ #include #include #include +#include #include #include #include @@ -9567,6 +9568,14 @@ int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf) if (!dev->netdev_ops->ndo_bpf) return -EOPNOTSUPP; + if (dev->ethtool->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + bpf->command == XDP_SETUP_PROG && + bpf->prog && !bpf->prog->aux->xdp_has_frags) { + NL_SET_ERR_MSG(bpf->extack, + "unable to propagate XDP to device using tcp-data-split"); + return -EBUSY; + } + if (dev_get_min_mp_channel_count(dev)) { NL_SET_ERR_MSG(bpf->extack, "unable to propagate XDP to device using memory provider"); return -EBUSY; @@ -9604,6 +9613,12 @@ static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode, struct netdev_bpf xdp; int err; + if (dev->ethtool->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + prog && !prog->aux->xdp_has_frags) { + NL_SET_ERR_MSG(extack, "unable to install XDP to device using tcp-data-split"); + return -EBUSY; + } + if (dev_get_min_mp_channel_count(dev)) { NL_SET_ERR_MSG(extack, "unable to install XDP to device using memory provider"); return -EBUSY; From 152f4da05aeee62cf04d61daf9789575f1df8f4e Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 14 Jan 2025 14:28:48 +0000 Subject: [PATCH 06/10] bnxt_en: add support for rx-copybreak ethtool command The bnxt_en driver supports rx-copybreak, but it couldn't be set by userspace. Only the default value(256) has worked. This patch makes the bnxt_en driver support following command. `ethtool --set-tunable rx-copybreak ` and `ethtool --get-tunable rx-copybreak`. By this patch, hds_threshol is set to the rx-copybreak value. But it will be set by `ethtool -G eth0 hds-thresh N` in the next patch. Reviewed-by: Jakub Kicinski Reviewed-by: Brett Creeley Tested-by: Stanislav Fomichev Tested-by: Andy Gospodarek Signed-off-by: Taehee Yoo Reviewed-by: Michael Chan Link: https://patch.msgid.link/20250114142852.3364986-7-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 28 +++++++----- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 5 ++- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 44 ++++++++++++++++++- 3 files changed, 63 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 884d42db55545..d19c4fb588e59 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -81,7 +81,6 @@ MODULE_DESCRIPTION("Broadcom NetXtreme network driver"); #define BNXT_RX_OFFSET (NET_SKB_PAD + NET_IP_ALIGN) #define BNXT_RX_DMA_OFFSET NET_SKB_PAD -#define BNXT_RX_COPY_THRESH 256 #define BNXT_TX_PUSH_THRESH 164 @@ -1343,13 +1342,13 @@ static struct sk_buff *bnxt_copy_data(struct bnxt_napi *bnapi, u8 *data, if (!skb) return NULL; - dma_sync_single_for_cpu(&pdev->dev, mapping, bp->rx_copy_thresh, + dma_sync_single_for_cpu(&pdev->dev, mapping, bp->rx_copybreak, bp->rx_dir); memcpy(skb->data - NET_IP_ALIGN, data - NET_IP_ALIGN, len + NET_IP_ALIGN); - dma_sync_single_for_device(&pdev->dev, mapping, bp->rx_copy_thresh, + dma_sync_single_for_device(&pdev->dev, mapping, bp->rx_copybreak, bp->rx_dir); skb_put(skb, len); @@ -1842,7 +1841,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, return NULL; } - if (len <= bp->rx_copy_thresh) { + if (len <= bp->rx_copybreak) { skb = bnxt_copy_skb(bnapi, data_ptr, len, mapping); if (!skb) { bnxt_abort_tpa(cpr, idx, agg_bufs); @@ -2176,7 +2175,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, } } - if (len <= bp->rx_copy_thresh) { + if (len <= bp->rx_copybreak) { if (!xdp_active) skb = bnxt_copy_skb(bnapi, data_ptr, len, dma_addr); else @@ -4608,6 +4607,11 @@ void bnxt_set_tpa_flags(struct bnxt *bp) bp->flags |= BNXT_FLAG_GRO; } +static void bnxt_init_ring_params(struct bnxt *bp) +{ + bp->rx_copybreak = BNXT_DEFAULT_RX_COPYBREAK; +} + /* bp->rx_ring_size, bp->tx_ring_size, dev->mtu, BNXT_FLAG_{G|L}RO flags must * be set on entry. */ @@ -4622,7 +4626,6 @@ void bnxt_set_ring_params(struct bnxt *bp) rx_space = rx_size + ALIGN(max(NET_SKB_PAD, XDP_PACKET_HEADROOM), 8) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - bp->rx_copy_thresh = BNXT_RX_COPY_THRESH; ring_size = bp->rx_ring_size; bp->rx_agg_ring_size = 0; bp->rx_agg_nr_pages = 0; @@ -4667,7 +4670,9 @@ void bnxt_set_ring_params(struct bnxt *bp) ALIGN(max(NET_SKB_PAD, XDP_PACKET_HEADROOM), 8) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); } else { - rx_size = SKB_DATA_ALIGN(BNXT_RX_COPY_THRESH + NET_IP_ALIGN); + rx_size = SKB_DATA_ALIGN(max(BNXT_DEFAULT_RX_COPYBREAK, + bp->rx_copybreak) + + NET_IP_ALIGN); rx_space = rx_size + NET_SKB_PAD + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); } @@ -6573,16 +6578,14 @@ static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, struct bnxt_vnic_info *vnic) req->flags = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_JUMBO_PLACEMENT); req->enables = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_ENABLES_JUMBO_THRESH_VALID); + req->jumbo_thresh = cpu_to_le16(bp->rx_buf_use_size); - if (BNXT_RX_PAGE_MODE(bp)) { - req->jumbo_thresh = cpu_to_le16(bp->rx_buf_use_size); - } else { + if (!BNXT_RX_PAGE_MODE(bp) && (bp->flags & BNXT_FLAG_AGG_RINGS)) { req->flags |= cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV4 | VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV6); req->enables |= cpu_to_le32(VNIC_PLCMODES_CFG_REQ_ENABLES_HDS_THRESHOLD_VALID); - req->jumbo_thresh = cpu_to_le16(bp->rx_copy_thresh); - req->hds_threshold = cpu_to_le16(bp->rx_copy_thresh); + req->hds_threshold = cpu_to_le16(bp->rx_copybreak); } req->vnic_id = cpu_to_le32(vnic->fw_vnic_id); return hwrm_req_send(bp, req); @@ -16261,6 +16264,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) bnxt_init_l2_fltr_tbl(bp); bnxt_set_rx_skb_mode(bp, false); bnxt_set_tpa_flags(bp); + bnxt_init_ring_params(bp); bnxt_set_ring_params(bp); bnxt_rdma_aux_device_init(bp); rc = bnxt_set_dflt_rings(bp, true); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 094c9e95b4639..7edb92ce59762 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -34,6 +34,9 @@ #include #endif +#define BNXT_DEFAULT_RX_COPYBREAK 256 +#define BNXT_MAX_RX_COPYBREAK 1024 + extern struct list_head bnxt_block_cb_list; struct page_pool; @@ -2347,7 +2350,7 @@ struct bnxt { enum dma_data_direction rx_dir; u32 rx_ring_size; u32 rx_agg_ring_size; - u32 rx_copy_thresh; + u32 rx_copybreak; u32 rx_ring_mask; u32 rx_agg_ring_mask; int rx_nr_pages; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 75a59dd72bcea..e9e63d95df177 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -4328,6 +4328,45 @@ static int bnxt_get_eee(struct net_device *dev, struct ethtool_keee *edata) return 0; } +static int bnxt_set_tunable(struct net_device *dev, + const struct ethtool_tunable *tuna, + const void *data) +{ + struct bnxt *bp = netdev_priv(dev); + u32 rx_copybreak; + + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + rx_copybreak = *(u32 *)data; + if (rx_copybreak > BNXT_MAX_RX_COPYBREAK) + return -ERANGE; + if (rx_copybreak != bp->rx_copybreak) { + if (netif_running(dev)) + return -EBUSY; + bp->rx_copybreak = rx_copybreak; + } + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int bnxt_get_tunable(struct net_device *dev, + const struct ethtool_tunable *tuna, void *data) +{ + struct bnxt *bp = netdev_priv(dev); + + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + *(u32 *)data = bp->rx_copybreak; + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + static int bnxt_read_sfp_module_eeprom_info(struct bnxt *bp, u16 i2c_addr, u16 page_number, u8 bank, u16 start_addr, u16 data_length, @@ -4790,7 +4829,8 @@ static int bnxt_run_loopback(struct bnxt *bp) cpr = &rxr->bnapi->cp_ring; if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) cpr = rxr->rx_cpr; - pkt_size = min(bp->dev->mtu + ETH_HLEN, bp->rx_copy_thresh); + pkt_size = min(bp->dev->mtu + ETH_HLEN, max(BNXT_DEFAULT_RX_COPYBREAK, + bp->rx_copybreak)); skb = netdev_alloc_skb(bp->dev, pkt_size); if (!skb) return -ENOMEM; @@ -5372,6 +5412,8 @@ const struct ethtool_ops bnxt_ethtool_ops = { .get_link_ext_stats = bnxt_get_link_ext_stats, .get_eee = bnxt_get_eee, .set_eee = bnxt_set_eee, + .get_tunable = bnxt_get_tunable, + .set_tunable = bnxt_set_tunable, .get_module_info = bnxt_get_module_info, .get_module_eeprom = bnxt_get_module_eeprom, .get_module_eeprom_by_page = bnxt_get_module_eeprom_by_page, From 87c8f8496a05de71dc42f5f2ed2b1ea64ea8b77d Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 14 Jan 2025 14:28:49 +0000 Subject: [PATCH 07/10] bnxt_en: add support for tcp-data-split ethtool command NICs that uses bnxt_en driver supports tcp-data-split feature by the name of HDS(header-data-split). But there is no implementation for the HDS to enable by ethtool. Only getting the current HDS status is implemented and The HDS is just automatically enabled only when either LRO, HW-GRO, or JUMBO is enabled. The hds_threshold follows rx-copybreak value. and it was unchangeable. This implements `ethtool -G tcp-data-split ` command option. The value can be and . The value is and one of LRO/GRO/JUMBO is enabled, HDS is automatically enabled and all LRO/GRO/JUMBO are disabled, HDS is automatically disabled. HDS feature relies on the aggregation ring. So, if HDS is enabled, the bnxt_en driver initializes the aggregation ring. This is the reason why BNXT_FLAG_AGG_RINGS contains HDS condition. Acked-by: Jakub Kicinski Tested-by: Stanislav Fomichev Tested-by: Andy Gospodarek Signed-off-by: Taehee Yoo Reviewed-by: Michael Chan Link: https://patch.msgid.link/20250114142852.3364986-8-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 5 +++-- .../net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 20 +++++++++++++++++++ drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 4 ++++ 4 files changed, 28 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index d19c4fb588e59..f029559a581e5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4630,7 +4630,7 @@ void bnxt_set_ring_params(struct bnxt *bp) bp->rx_agg_ring_size = 0; bp->rx_agg_nr_pages = 0; - if (bp->flags & BNXT_FLAG_TPA) + if (bp->flags & BNXT_FLAG_TPA || bp->flags & BNXT_FLAG_HDS) agg_factor = min_t(u32, 4, 65536 / BNXT_RX_PAGE_SIZE); bp->flags &= ~BNXT_FLAG_JUMBO; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 7edb92ce59762..7dc06e07bae2a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2244,8 +2244,6 @@ struct bnxt { #define BNXT_FLAG_TPA (BNXT_FLAG_LRO | BNXT_FLAG_GRO) #define BNXT_FLAG_JUMBO 0x10 #define BNXT_FLAG_STRIP_VLAN 0x20 - #define BNXT_FLAG_AGG_RINGS (BNXT_FLAG_JUMBO | BNXT_FLAG_GRO | \ - BNXT_FLAG_LRO) #define BNXT_FLAG_RFS 0x100 #define BNXT_FLAG_SHARED_RINGS 0x200 #define BNXT_FLAG_PORT_STATS 0x400 @@ -2266,6 +2264,9 @@ struct bnxt { #define BNXT_FLAG_ROCE_MIRROR_CAP 0x4000000 #define BNXT_FLAG_TX_COAL_CMPL 0x8000000 #define BNXT_FLAG_PORT_STATS_EXT 0x10000000 + #define BNXT_FLAG_HDS 0x20000000 + #define BNXT_FLAG_AGG_RINGS (BNXT_FLAG_JUMBO | BNXT_FLAG_GRO | \ + BNXT_FLAG_LRO | BNXT_FLAG_HDS) #define BNXT_FLAG_ALL_CONFIG_FEATS (BNXT_FLAG_TPA | \ BNXT_FLAG_RFS | \ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index e9e63d95df177..413007190f509 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -840,16 +840,35 @@ static int bnxt_set_ringparam(struct net_device *dev, struct kernel_ethtool_ringparam *kernel_ering, struct netlink_ext_ack *extack) { + u8 tcp_data_split = kernel_ering->tcp_data_split; struct bnxt *bp = netdev_priv(dev); + u8 hds_config_mod; if ((ering->rx_pending > BNXT_MAX_RX_DESC_CNT) || (ering->tx_pending > BNXT_MAX_TX_DESC_CNT) || (ering->tx_pending < BNXT_MIN_TX_DESC_CNT)) return -EINVAL; + hds_config_mod = tcp_data_split != dev->ethtool->hds_config; + if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_DISABLED && hds_config_mod) + return -EINVAL; + + if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + hds_config_mod && BNXT_RX_PAGE_MODE(bp)) { + NL_SET_ERR_MSG_MOD(extack, "tcp-data-split is disallowed when XDP is attached"); + return -EINVAL; + } + if (netif_running(dev)) bnxt_close_nic(bp, false, false); + if (hds_config_mod) { + if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED) + bp->flags |= BNXT_FLAG_HDS; + else if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) + bp->flags &= ~BNXT_FLAG_HDS; + } + bp->rx_ring_size = ering->rx_pending; bp->tx_ring_size = ering->tx_pending; bnxt_set_ring_params(bp); @@ -5371,6 +5390,7 @@ const struct ethtool_ops bnxt_ethtool_ops = { ETHTOOL_COALESCE_STATS_BLOCK_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX | ETHTOOL_COALESCE_USE_CQE, + .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT, .get_link_ksettings = bnxt_get_link_ksettings, .set_link_ksettings = bnxt_set_link_ksettings, .get_fec_stats = bnxt_get_fec_stats, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index f88b641533fcc..1bfff7f29310e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -395,6 +395,10 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) bp->dev->mtu, BNXT_MAX_PAGE_MODE_MTU); return -EOPNOTSUPP; } + if (prog && bp->flags & BNXT_FLAG_HDS) { + netdev_warn(dev, "XDP is disallowed when HDS is enabled.\n"); + return -EOPNOTSUPP; + } if (!(bp->flags & BNXT_FLAG_SHARED_RINGS)) { netdev_warn(dev, "ethtool rx/tx channels must be combined to support XDP.\n"); return -EOPNOTSUPP; From 6b43673a25c3666d42f5524e59aed8a3914924cc Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 14 Jan 2025 14:28:50 +0000 Subject: [PATCH 08/10] bnxt_en: add support for hds-thresh ethtool command The bnxt_en driver has configured the hds_threshold value automatically when TPA is enabled based on the rx-copybreak default value. Now the hds-thresh ethtool command is added, so it adds an implementation of hds-thresh option. Configuration of the hds-thresh is applied only when the tcp-data-split is enabled. The default value of hds-thresh is 256, which is the default value of rx-copybreak, which used to be the hds_thresh value. The maximum hds-thresh is 1023. # Example: # ethtool -G enp14s0f0np0 tcp-data-split on hds-thresh 256 # ethtool -g enp14s0f0np0 Ring parameters for enp14s0f0np0: Pre-set maximums: ... HDS thresh: 1023 Current hardware settings: ... TCP data split: on HDS thresh: 256 Tested-by: Stanislav Fomichev Tested-by: Andy Gospodarek Signed-off-by: Taehee Yoo Reviewed-by: Michael Chan Link: https://patch.msgid.link/20250114142852.3364986-9-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 +++- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 2 ++ drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 6 +++++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index f029559a581e5..caddb5cbc0246 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4610,6 +4610,7 @@ void bnxt_set_tpa_flags(struct bnxt *bp) static void bnxt_init_ring_params(struct bnxt *bp) { bp->rx_copybreak = BNXT_DEFAULT_RX_COPYBREAK; + bp->dev->ethtool->hds_thresh = BNXT_DEFAULT_RX_COPYBREAK; } /* bp->rx_ring_size, bp->tx_ring_size, dev->mtu, BNXT_FLAG_{G|L}RO flags must @@ -6569,6 +6570,7 @@ static void bnxt_hwrm_update_rss_hash_cfg(struct bnxt *bp) static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, struct bnxt_vnic_info *vnic) { + u16 hds_thresh = (u16)bp->dev->ethtool->hds_thresh; struct hwrm_vnic_plcmodes_cfg_input *req; int rc; @@ -6585,7 +6587,7 @@ static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, struct bnxt_vnic_info *vnic) VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV6); req->enables |= cpu_to_le32(VNIC_PLCMODES_CFG_REQ_ENABLES_HDS_THRESHOLD_VALID); - req->hds_threshold = cpu_to_le16(bp->rx_copybreak); + req->hds_threshold = cpu_to_le16(hds_thresh); } req->vnic_id = cpu_to_le32(vnic->fw_vnic_id); return hwrm_req_send(bp, req); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 7dc06e07bae2a..8f481dd9c2245 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2779,6 +2779,8 @@ struct bnxt { #define SFF_MODULE_ID_QSFP28 0x11 #define BNXT_MAX_PHY_I2C_RESP_SIZE 64 +#define BNXT_HDS_THRESHOLD_MAX 1023 + static inline u32 bnxt_tx_avail(struct bnxt *bp, const struct bnxt_tx_ring_info *txr) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 413007190f509..540c140d52dcf 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -833,6 +833,9 @@ static void bnxt_get_ringparam(struct net_device *dev, ering->rx_pending = bp->rx_ring_size; ering->rx_jumbo_pending = bp->rx_agg_ring_size; ering->tx_pending = bp->tx_ring_size; + + kernel_ering->hds_thresh = dev->ethtool->hds_thresh; + kernel_ering->hds_thresh_max = BNXT_HDS_THRESHOLD_MAX; } static int bnxt_set_ringparam(struct net_device *dev, @@ -5390,7 +5393,8 @@ const struct ethtool_ops bnxt_ethtool_ops = { ETHTOOL_COALESCE_STATS_BLOCK_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX | ETHTOOL_COALESCE_USE_CQE, - .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT, + .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT | + ETHTOOL_RING_USE_HDS_THRS, .get_link_ksettings = bnxt_get_link_ksettings, .set_link_ksettings = bnxt_set_link_ksettings, .get_fec_stats = bnxt_get_fec_stats, From f394d07b192b67a895dbed76253ce95dcbb5d17c Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 14 Jan 2025 14:28:51 +0000 Subject: [PATCH 09/10] netdevsim: add HDS feature HDS options(tcp-data-split, hds-thresh) have dependencies between other features like XDP. Basic dependencies are checked in the core API. netdevsim is very useful to check basic dependencies. The default tcp-data-split mode is UNKNOWN but netdevsim driver returns ENABLED when ethtool dumps tcp-data-split mode. The default value of HDS threshold is 0 and the maximum value is 1024. ethtool shows like this. ethtool -g eni1np1 Ring parameters for eni1np1: Pre-set maximums: ... HDS thresh: 1024 Current hardware settings: ... TCP data split: on HDS thresh: 0 ethtool -G eni1np1 tcp-data-split on hds-thresh 1024 ethtool -g eni1np1 Ring parameters for eni1np1: Pre-set maximums: ... HDS thresh: 1024 Current hardware settings: ... TCP data split: on HDS thresh: 1024 Signed-off-by: Taehee Yoo Link: https://patch.msgid.link/20250114142852.3364986-10-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- drivers/net/netdevsim/ethtool.c | 12 +++++++++++- drivers/net/netdevsim/netdev.c | 9 +++++++++ drivers/net/netdevsim/netdevsim.h | 3 +++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c index 5fe1eaef99b5b..9e0df40c71e18 100644 --- a/drivers/net/netdevsim/ethtool.c +++ b/drivers/net/netdevsim/ethtool.c @@ -2,7 +2,6 @@ // Copyright (c) 2020 Facebook #include -#include #include #include "netdevsim.h" @@ -72,6 +71,12 @@ static void nsim_get_ringparam(struct net_device *dev, struct netdevsim *ns = netdev_priv(dev); memcpy(ring, &ns->ethtool.ring, sizeof(ns->ethtool.ring)); + kernel_ring->tcp_data_split = dev->ethtool->hds_config; + kernel_ring->hds_thresh = dev->ethtool->hds_thresh; + kernel_ring->hds_thresh_max = NSIM_HDS_THRESHOLD_MAX; + + if (kernel_ring->tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) + kernel_ring->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_ENABLED; } static int nsim_set_ringparam(struct net_device *dev, @@ -161,6 +166,8 @@ static int nsim_get_ts_info(struct net_device *dev, static const struct ethtool_ops nsim_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_ALL_PARAMS, + .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT | + ETHTOOL_RING_USE_HDS_THRS, .get_pause_stats = nsim_get_pause_stats, .get_pauseparam = nsim_get_pauseparam, .set_pauseparam = nsim_set_pauseparam, @@ -182,6 +189,9 @@ static void nsim_ethtool_ring_init(struct netdevsim *ns) ns->ethtool.ring.rx_jumbo_max_pending = 4096; ns->ethtool.ring.rx_mini_max_pending = 4096; ns->ethtool.ring.tx_max_pending = 4096; + + ns->netdev->ethtool->hds_config = ETHTOOL_TCP_DATA_SPLIT_UNKNOWN; + ns->netdev->ethtool->hds_thresh = 0; } void nsim_ethtool_init(struct netdevsim *ns) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index d013b64985390..f92b05ccdca91 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -54,6 +55,7 @@ static int nsim_forward_skb(struct net_device *dev, struct sk_buff *skb, static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct netdevsim *ns = netdev_priv(dev); + struct ethtool_netdev_state *ethtool; struct net_device *peer_dev; unsigned int len = skb->len; struct netdevsim *peer_ns; @@ -74,6 +76,13 @@ static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev) rxq = rxq % peer_dev->num_rx_queues; rq = peer_ns->rq[rxq]; + ethtool = peer_dev->ethtool; + if (skb_is_nonlinear(skb) && + (ethtool->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED || + (ethtool->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + ethtool->hds_thresh > len))) + skb_linearize(skb); + skb_tx_timestamp(skb); if (unlikely(nsim_forward_skb(peer_dev, skb, rq) == NET_RX_DROP)) goto out_drop_cnt; diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index a70f62af4c886..dcf073bc4802e 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +37,8 @@ #define NSIM_IPSEC_VALID BIT(31) #define NSIM_UDP_TUNNEL_N_PORTS 4 +#define NSIM_HDS_THRESHOLD_MAX 1024 + struct nsim_sa { struct xfrm_state *xs; __be32 ipaddr[4]; From cfd70e3eba2b68aa230d431e3c6ca0a1566e8d2e Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 14 Jan 2025 14:28:52 +0000 Subject: [PATCH 10/10] selftest: net-drv: hds: add test for HDS feature HDS/HDS-thresh features were updated/implemented. so add some tests for these features. HDS tests are the same with `ethtool -G eth0 tcp-data-split ` but `auto` depends on driver specification. So, it doesn't include `auto` case. HDS-thresh tests are same with `ethtool -G eth0 hds-thresh <0 - MAX>` It includes both 0 and MAX cases. It also includes exceed case, MAX + 1. Signed-off-by: Taehee Yoo Link: https://patch.msgid.link/20250114142852.3364986-11-ap420073@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/Makefile | 1 + tools/testing/selftests/drivers/net/hds.py | 120 +++++++++++++++++++ 2 files changed, 121 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/hds.py diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 469179c18935f..137470bdee0c7 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -12,6 +12,7 @@ TEST_PROGS := \ queues.py \ stats.py \ shaper.py \ + hds.py \ # end of TEST_PROGS include ../../lib.mk diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py new file mode 100755 index 0000000000000..394971b25c0b1 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hds.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import errno +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx +from lib.py import EthtoolFamily, NlError +from lib.py import NetDrvEnv + +def get_hds(cfg, netnl) -> None: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'tcp-data-split' not in rings: + raise KsftSkipEx('tcp-data-split not supported by device') + +def get_hds_thresh(cfg, netnl) -> None: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + +def set_hds_enable(cfg, netnl) -> None: + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'enabled'}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("disabling of HDS not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'tcp-data-split' not in rings: + raise KsftSkipEx('tcp-data-split not supported by device') + + ksft_eq('enabled', rings['tcp-data-split']) + +def set_hds_disable(cfg, netnl) -> None: + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'disabled'}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("disabling of HDS not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'tcp-data-split' not in rings: + raise KsftSkipEx('tcp-data-split not supported by device') + + ksft_eq('disabled', rings['tcp-data-split']) + +def set_hds_thresh_zero(cfg, netnl) -> None: + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': 0}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("hds-thresh-set not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + + ksft_eq(0, rings['hds-thresh']) + +def set_hds_thresh_max(cfg, netnl) -> None: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': rings['hds-thresh-max']}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("hds-thresh-set not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + ksft_eq(rings['hds-thresh'], rings['hds-thresh-max']) + +def set_hds_thresh_gt(cfg, netnl) -> None: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + if 'hds-thresh-max' not in rings: + raise KsftSkipEx('hds-thresh-max not defined by device') + hds_gt = rings['hds-thresh-max'] + 1 + with ksft_raises(NlError) as e: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_gt}) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + +def main() -> None: + with NetDrvEnv(__file__, queue_count=3) as cfg: + ksft_run([get_hds, + get_hds_thresh, + set_hds_disable, + set_hds_enable, + set_hds_thresh_zero, + set_hds_thresh_max, + set_hds_thresh_gt], + args=(cfg, EthtoolFamily())) + ksft_exit() + +if __name__ == "__main__": + main()