From 3c836451ca9041cfb32a7d8f59ea15b3b991bbb3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 18 Jan 2025 18:05:11 -0800 Subject: [PATCH 1/7] net: move HDS config from ethtool state Separate the HDS config from the ethtool state struct. The HDS config contains just simple parameters, not state. Having it as a separate struct will make it easier to clone / copy and also long term potentially make it per-queue. Reviewed-by: Michael Chan Link: https://patch.msgid.link/20250119020518.1962249-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++-- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 5 +++-- drivers/net/netdevsim/ethtool.c | 9 +++++---- drivers/net/netdevsim/netdev.c | 10 +++++----- include/linux/ethtool.h | 4 ---- include/linux/netdevice.h | 3 +++ include/net/netdev_queues.h | 10 ++++++++++ net/core/dev.c | 10 ++++++++-- net/core/devmem.c | 4 ++-- net/ethtool/rings.c | 8 +++++--- 10 files changed, 43 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 748c9b1ea7010..0998b20578b4e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4610,7 +4610,7 @@ void bnxt_set_tpa_flags(struct bnxt *bp) static void bnxt_init_ring_params(struct bnxt *bp) { bp->rx_copybreak = BNXT_DEFAULT_RX_COPYBREAK; - bp->dev->ethtool->hds_thresh = BNXT_DEFAULT_RX_COPYBREAK; + bp->dev->cfg->hds_thresh = BNXT_DEFAULT_RX_COPYBREAK; } /* bp->rx_ring_size, bp->tx_ring_size, dev->mtu, BNXT_FLAG_{G|L}RO flags must @@ -6585,7 +6585,7 @@ static void bnxt_hwrm_update_rss_hash_cfg(struct bnxt *bp) static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, struct bnxt_vnic_info *vnic) { - u16 hds_thresh = (u16)bp->dev->ethtool->hds_thresh; + u16 hds_thresh = (u16)bp->dev->cfg->hds_thresh; struct hwrm_vnic_plcmodes_cfg_input *req; int rc; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 65a20931c5796..0a6d47d4d66b2 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include "bnxt_hsi.h" #include "bnxt.h" @@ -834,7 +835,7 @@ static void bnxt_get_ringparam(struct net_device *dev, ering->rx_jumbo_pending = bp->rx_agg_ring_size; ering->tx_pending = bp->tx_ring_size; - kernel_ering->hds_thresh = dev->ethtool->hds_thresh; + kernel_ering->hds_thresh = dev->cfg->hds_thresh; kernel_ering->hds_thresh_max = BNXT_HDS_THRESHOLD_MAX; } @@ -852,7 +853,7 @@ static int bnxt_set_ringparam(struct net_device *dev, (ering->tx_pending < BNXT_MIN_TX_DESC_CNT)) return -EINVAL; - hds_config_mod = tcp_data_split != dev->ethtool->hds_config; + hds_config_mod = tcp_data_split != dev->cfg->hds_config; if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_DISABLED && hds_config_mod) return -EINVAL; diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c index 12163635b759f..189793debdb7e 100644 --- a/drivers/net/netdevsim/ethtool.c +++ b/drivers/net/netdevsim/ethtool.c @@ -3,6 +3,7 @@ #include #include +#include #include "netdevsim.h" @@ -71,8 +72,8 @@ static void nsim_get_ringparam(struct net_device *dev, struct netdevsim *ns = netdev_priv(dev); memcpy(ring, &ns->ethtool.ring, sizeof(ns->ethtool.ring)); - kernel_ring->tcp_data_split = dev->ethtool->hds_config; - kernel_ring->hds_thresh = dev->ethtool->hds_thresh; + kernel_ring->tcp_data_split = dev->cfg->hds_config; + kernel_ring->hds_thresh = dev->cfg->hds_thresh; kernel_ring->hds_thresh_max = NSIM_HDS_THRESHOLD_MAX; if (kernel_ring->tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) @@ -190,8 +191,8 @@ static void nsim_ethtool_ring_init(struct netdevsim *ns) ns->ethtool.ring.rx_mini_max_pending = 4096; ns->ethtool.ring.tx_max_pending = 4096; - ns->netdev->ethtool->hds_config = ETHTOOL_TCP_DATA_SPLIT_UNKNOWN; - ns->netdev->ethtool->hds_thresh = 0; + ns->netdev->cfg->hds_config = ETHTOOL_TCP_DATA_SPLIT_UNKNOWN; + ns->netdev->cfg->hds_thresh = 0; } void nsim_ethtool_init(struct netdevsim *ns) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index f92b05ccdca91..42f247cbdceec 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -55,10 +55,10 @@ static int nsim_forward_skb(struct net_device *dev, struct sk_buff *skb, static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct netdevsim *ns = netdev_priv(dev); - struct ethtool_netdev_state *ethtool; struct net_device *peer_dev; unsigned int len = skb->len; struct netdevsim *peer_ns; + struct netdev_config *cfg; struct nsim_rq *rq; int rxq; @@ -76,11 +76,11 @@ static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev) rxq = rxq % peer_dev->num_rx_queues; rq = peer_ns->rq[rxq]; - ethtool = peer_dev->ethtool; + cfg = peer_dev->cfg; if (skb_is_nonlinear(skb) && - (ethtool->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED || - (ethtool->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && - ethtool->hds_thresh > len))) + (cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED || + (cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + cfg->hds_thresh > len))) skb_linearize(skb); skb_tx_timestamp(skb); diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 64301ddf2f598..870994cc3ef7e 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -1171,16 +1171,12 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, * @rss_ctx: XArray of custom RSS contexts * @rss_lock: Protects entries in @rss_ctx. May be taken from * within RTNL. - * @hds_thresh: HDS Threshold value. - * @hds_config: HDS value from userspace. * @wol_enabled: Wake-on-LAN is enabled * @module_fw_flash_in_progress: Module firmware flashing is in progress. */ struct ethtool_netdev_state { struct xarray rss_ctx; struct mutex rss_lock; - u32 hds_thresh; - u8 hds_config; unsigned wol_enabled:1; unsigned module_fw_flash_in_progress:1; }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8308d9c75918f..173a8b3a9eb2e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -63,6 +63,7 @@ struct dsa_port; struct ip_tunnel_parm_kern; struct macsec_context; struct macsec_ops; +struct netdev_config; struct netdev_name_node; struct sd_flow_limit; struct sfp_bus; @@ -2410,6 +2411,8 @@ struct net_device { const struct udp_tunnel_nic_info *udp_tunnel_nic_info; struct udp_tunnel_nic *udp_tunnel_nic; + /** @cfg: net_device queue-related configuration */ + struct netdev_config *cfg; struct ethtool_netdev_state *ethtool; /* protected by rtnl_lock */ diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h index 5ca019d294ca3..b02bb9f109d5e 100644 --- a/include/net/netdev_queues.h +++ b/include/net/netdev_queues.h @@ -4,6 +4,16 @@ #include +/** + * struct netdev_config - queue-related configuration for a netdev + * @hds_thresh: HDS Threshold value. + * @hds_config: HDS value from userspace. + */ +struct netdev_config { + u32 hds_thresh; + u8 hds_config; +}; + /* See the netdev.yaml spec for definition of each statistic */ struct netdev_queue_stats_rx { u64 bytes; diff --git a/net/core/dev.c b/net/core/dev.c index 3dab6699b1c12..e37d47cf476b8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -106,6 +106,7 @@ #include #include #include +#include #include #include #include @@ -9719,7 +9720,7 @@ int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf) if (!dev->netdev_ops->ndo_bpf) return -EOPNOTSUPP; - if (dev->ethtool->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + if (dev->cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && bpf->command == XDP_SETUP_PROG && bpf->prog && !bpf->prog->aux->xdp_has_frags) { NL_SET_ERR_MSG(bpf->extack, @@ -9764,7 +9765,7 @@ static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode, struct netdev_bpf xdp; int err; - if (dev->ethtool->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + if (dev->cfg->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && prog && !prog->aux->xdp_has_frags) { NL_SET_ERR_MSG(extack, "unable to install XDP to device using tcp-data-split"); return -EBUSY; @@ -11542,6 +11543,10 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, if (!dev->ethtool) goto free_all; + dev->cfg = kzalloc(sizeof(*dev->cfg), GFP_KERNEL_ACCOUNT); + if (!dev->cfg) + goto free_all; + napi_config_sz = array_size(maxqs, sizeof(*dev->napi_config)); dev->napi_config = kvzalloc(napi_config_sz, GFP_KERNEL_ACCOUNT); if (!dev->napi_config) @@ -11610,6 +11615,7 @@ void free_netdev(struct net_device *dev) return; } + kfree(dev->cfg); kfree(dev->ethtool); netif_free_tx_queues(dev); netif_free_rx_queues(dev); diff --git a/net/core/devmem.c b/net/core/devmem.c index c971b8aceac86..3bba3f018df03 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -141,12 +141,12 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, return -ERANGE; } - if (dev->ethtool->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { + if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { NL_SET_ERR_MSG(extack, "tcp-data-split is disabled"); return -EINVAL; } - if (dev->ethtool->hds_thresh) { + if (dev->cfg->hds_thresh) { NL_SET_ERR_MSG(extack, "hds-thresh is not zero"); return -EINVAL; } diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index d8cd4e4d77622..7a3c2a2dff12c 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only +#include + #include "netlink.h" #include "common.h" @@ -219,7 +221,7 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) dev->ethtool_ops->get_ringparam(dev, &ringparam, &kernel_ringparam, info->extack); - kernel_ringparam.tcp_data_split = dev->ethtool->hds_config; + kernel_ringparam.tcp_data_split = dev->cfg->hds_config; ethnl_update_u32(&ringparam.rx_pending, tb[ETHTOOL_A_RINGS_RX], &mod); ethnl_update_u32(&ringparam.rx_mini_pending, @@ -295,8 +297,8 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) ret = dev->ethtool_ops->set_ringparam(dev, &ringparam, &kernel_ringparam, info->extack); if (!ret) { - dev->ethtool->hds_config = kernel_ringparam.tcp_data_split; - dev->ethtool->hds_thresh = kernel_ringparam.hds_thresh; + dev->cfg->hds_config = kernel_ringparam.tcp_data_split; + dev->cfg->hds_thresh = kernel_ringparam.hds_thresh; } return ret < 0 ? ret : 1; From 743dea746ed6d581877c114ef1f362bc277fed6a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 18 Jan 2025 18:05:12 -0800 Subject: [PATCH 2/7] net: ethtool: store netdev in a temp variable in ethnl_default_set_doit() For ease of review of the next patch store the dev pointer on the stack, instead of referring to req_info.dev every time. No functional changes. Reviewed-by: Michael Chan Link: https://patch.msgid.link/20250119020518.1962249-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/ethtool/netlink.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 849c98e637c65..c17d8513d4c1f 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -667,6 +667,7 @@ static int ethnl_default_set_doit(struct sk_buff *skb, struct genl_info *info) const struct ethnl_request_ops *ops; struct ethnl_req_info req_info = {}; const u8 cmd = info->genlhdr->cmd; + struct net_device *dev; int ret; ops = ethnl_default_requests[cmd]; @@ -688,19 +689,21 @@ static int ethnl_default_set_doit(struct sk_buff *skb, struct genl_info *info) goto out_dev; } + dev = req_info.dev; + rtnl_lock(); - ret = ethnl_ops_begin(req_info.dev); + ret = ethnl_ops_begin(dev); if (ret < 0) goto out_rtnl; ret = ops->set(&req_info, info); if (ret <= 0) goto out_ops; - ethtool_notify(req_info.dev, ops->set_ntf_cmd, NULL); + ethtool_notify(dev, ops->set_ntf_cmd, NULL); ret = 0; out_ops: - ethnl_ops_complete(req_info.dev); + ethnl_ops_complete(dev); out_rtnl: rtnl_unlock(); out_dev: From 32ad1f7a050d0c17e1e52e1dfdd9f6221ae20ef9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 18 Jan 2025 18:05:13 -0800 Subject: [PATCH 3/7] net: provide pending ring configuration in net_device Record the pending configuration in net_device struct. ethtool core duplicates the current config and the specific handlers (for now just ringparam) can modify it. Reviewed-by: Michael Chan Link: https://patch.msgid.link/20250119020518.1962249-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 6 ++++++ net/core/dev.c | 2 ++ net/ethtool/netlink.c | 21 ++++++++++++++++++--- net/ethtool/rings.c | 8 +++----- 4 files changed, 29 insertions(+), 8 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 173a8b3a9eb2e..8da4c61f97b97 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2413,6 +2413,12 @@ struct net_device { /** @cfg: net_device queue-related configuration */ struct netdev_config *cfg; + /** + * @cfg_pending: same as @cfg but when device is being actively + * reconfigured includes any changes to the configuration + * requested by the user, but which may or may not be rejected. + */ + struct netdev_config *cfg_pending; struct ethtool_netdev_state *ethtool; /* protected by rtnl_lock */ diff --git a/net/core/dev.c b/net/core/dev.c index e37d47cf476b8..afa2282f26043 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11546,6 +11546,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->cfg = kzalloc(sizeof(*dev->cfg), GFP_KERNEL_ACCOUNT); if (!dev->cfg) goto free_all; + dev->cfg_pending = dev->cfg; napi_config_sz = array_size(maxqs, sizeof(*dev->napi_config)); dev->napi_config = kvzalloc(napi_config_sz, GFP_KERNEL_ACCOUNT); @@ -11615,6 +11616,7 @@ void free_netdev(struct net_device *dev) return; } + WARN_ON(dev->cfg != dev->cfg_pending); kfree(dev->cfg); kfree(dev->ethtool); netif_free_tx_queues(dev); diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index c17d8513d4c1f..1d2f62ef61306 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only +#include #include #include #include @@ -692,19 +693,33 @@ static int ethnl_default_set_doit(struct sk_buff *skb, struct genl_info *info) dev = req_info.dev; rtnl_lock(); + dev->cfg_pending = kmemdup(dev->cfg, sizeof(*dev->cfg), + GFP_KERNEL_ACCOUNT); + if (!dev->cfg_pending) { + ret = -ENOMEM; + goto out_tie_cfg; + } + ret = ethnl_ops_begin(dev); if (ret < 0) - goto out_rtnl; + goto out_free_cfg; ret = ops->set(&req_info, info); - if (ret <= 0) + if (ret < 0) + goto out_ops; + + swap(dev->cfg, dev->cfg_pending); + if (!ret) goto out_ops; ethtool_notify(dev, ops->set_ntf_cmd, NULL); ret = 0; out_ops: ethnl_ops_complete(dev); -out_rtnl: +out_free_cfg: + kfree(dev->cfg_pending); +out_tie_cfg: + dev->cfg_pending = dev->cfg; rtnl_unlock(); out_dev: ethnl_parse_header_dev_put(&req_info); diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index 7a3c2a2dff12c..5e8ba81fbb3e4 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -294,13 +294,11 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) return -EINVAL; } + dev->cfg_pending->hds_config = kernel_ringparam.tcp_data_split; + dev->cfg_pending->hds_thresh = kernel_ringparam.hds_thresh; + ret = dev->ethtool_ops->set_ringparam(dev, &ringparam, &kernel_ringparam, info->extack); - if (!ret) { - dev->cfg->hds_config = kernel_ringparam.tcp_data_split; - dev->cfg->hds_thresh = kernel_ringparam.hds_thresh; - } - return ret < 0 ? ret : 1; } From e58263e9111733c4bfbbf07517d1f98f21134c52 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 18 Jan 2025 18:05:14 -0800 Subject: [PATCH 4/7] eth: bnxt: apply hds_thrs settings correctly Use the pending config for hds_thrs. Core will only update the "current" one after we return success. Without this change 2 reconfigs would be required for the setting to reach the device. Fixes: 6b43673a25c3 ("bnxt_en: add support for hds-thresh ethtool command") Reviewed-by: Michael Chan Link: https://patch.msgid.link/20250119020518.1962249-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 0998b20578b4e..2eeed4c11b64b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -6585,7 +6585,7 @@ static void bnxt_hwrm_update_rss_hash_cfg(struct bnxt *bp) static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, struct bnxt_vnic_info *vnic) { - u16 hds_thresh = (u16)bp->dev->cfg->hds_thresh; + u16 hds_thresh = (u16)bp->dev->cfg_pending->hds_thresh; struct hwrm_vnic_plcmodes_cfg_input *req; int rc; From 928459bbda19eb47e09d9bd4386ac46fad088958 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 18 Jan 2025 18:05:15 -0800 Subject: [PATCH 5/7] net: ethtool: populate the default HDS params in the core The core has the current HDS config, it can pre-populate the values for the drivers. While at it, remove the zero-setting in netdevsim. Zero are the default values since the config is zalloc'ed. Reviewed-by: Michael Chan Link: https://patch.msgid.link/20250119020518.1962249-6-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 1 - drivers/net/netdevsim/ethtool.c | 5 ----- net/ethtool/rings.c | 4 ++++ 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 0a6d47d4d66b2..9c58208395144 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -835,7 +835,6 @@ static void bnxt_get_ringparam(struct net_device *dev, ering->rx_jumbo_pending = bp->rx_agg_ring_size; ering->tx_pending = bp->tx_ring_size; - kernel_ering->hds_thresh = dev->cfg->hds_thresh; kernel_ering->hds_thresh_max = BNXT_HDS_THRESHOLD_MAX; } diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c index 189793debdb7e..3b23f3d3ca2bd 100644 --- a/drivers/net/netdevsim/ethtool.c +++ b/drivers/net/netdevsim/ethtool.c @@ -72,8 +72,6 @@ static void nsim_get_ringparam(struct net_device *dev, struct netdevsim *ns = netdev_priv(dev); memcpy(ring, &ns->ethtool.ring, sizeof(ns->ethtool.ring)); - kernel_ring->tcp_data_split = dev->cfg->hds_config; - kernel_ring->hds_thresh = dev->cfg->hds_thresh; kernel_ring->hds_thresh_max = NSIM_HDS_THRESHOLD_MAX; if (kernel_ring->tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) @@ -190,9 +188,6 @@ static void nsim_ethtool_ring_init(struct netdevsim *ns) ns->ethtool.ring.rx_jumbo_max_pending = 4096; ns->ethtool.ring.rx_mini_max_pending = 4096; ns->ethtool.ring.tx_max_pending = 4096; - - ns->netdev->cfg->hds_config = ETHTOOL_TCP_DATA_SPLIT_UNKNOWN; - ns->netdev->cfg->hds_thresh = 0; } void nsim_ethtool_init(struct netdevsim *ns) diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index 5e8ba81fbb3e4..7839bfd1ac6a0 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -39,6 +39,10 @@ static int rings_prepare_data(const struct ethnl_req_info *req_base, ret = ethnl_ops_begin(dev); if (ret < 0) return ret; + + data->kernel_ringparam.tcp_data_split = dev->cfg->hds_config; + data->kernel_ringparam.hds_thresh = dev->cfg->hds_thresh; + dev->ethtool_ops->get_ringparam(dev, &data->ringparam, &data->kernel_ringparam, info->extack); ethnl_ops_complete(dev); From bee018052d1bbfa6d33ca016a42e8e2534429492 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 18 Jan 2025 18:05:16 -0800 Subject: [PATCH 6/7] eth: bnxt: allocate enough buffer space to meet HDS threshold Now that we can configure HDS threshold separately from the rx_copybreak HDS threshold may be higher than rx_copybreak. We need to make sure that we have enough space for the headers. Fixes: 6b43673a25c3 ("bnxt_en: add support for hds-thresh ethtool command") Reviewed-by: Michael Chan Link: https://patch.msgid.link/20250119020518.1962249-7-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 2eeed4c11b64b..19e723493c4e5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4671,9 +4671,10 @@ void bnxt_set_ring_params(struct bnxt *bp) ALIGN(max(NET_SKB_PAD, XDP_PACKET_HEADROOM), 8) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); } else { - rx_size = SKB_DATA_ALIGN(max(BNXT_DEFAULT_RX_COPYBREAK, - bp->rx_copybreak) + - NET_IP_ALIGN); + rx_size = max3(BNXT_DEFAULT_RX_COPYBREAK, + bp->rx_copybreak, + bp->dev->cfg_pending->hds_thresh); + rx_size = SKB_DATA_ALIGN(rx_size + NET_IP_ALIGN); rx_space = rx_size + NET_SKB_PAD + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); } From 99d028c63457773152964ba3abc0d8fb8cf220bf Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 18 Jan 2025 18:05:17 -0800 Subject: [PATCH 7/7] eth: bnxt: update header sizing defaults 300-400B RPC requests are fairly common. With the current default of 256B HDS threshold bnxt ends up splitting those, lowering PCIe bandwidth efficiency and increasing the number of memory allocation. Increase the HDS threshold to fit 4 buffers in a 4k page. This works out to 640B as the threshold on a typical kernel confing. This change increases the performance for a microbenchmark which receives 400B RPCs and sends empty responses by 4.5%. Admittedly this is just a single benchmark, but 256B works out to just 6 (so 2 more) packets per head page, because shinfo size dominates the headers. Now that we use page pool for the header pages I was also tempted to default rx_copybreak to 0, but in synthetic testing the copybreak size doesn't seem to make much difference. Reviewed-by: Michael Chan Link: https://patch.msgid.link/20250119020518.1962249-8-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 19e723493c4e5..589a1008601c0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4609,8 +4609,13 @@ void bnxt_set_tpa_flags(struct bnxt *bp) static void bnxt_init_ring_params(struct bnxt *bp) { + unsigned int rx_size; + bp->rx_copybreak = BNXT_DEFAULT_RX_COPYBREAK; - bp->dev->cfg->hds_thresh = BNXT_DEFAULT_RX_COPYBREAK; + /* Try to fit 4 chunks into a 4k page */ + rx_size = SZ_1K - + NET_SKB_PAD - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + bp->dev->cfg->hds_thresh = max(BNXT_DEFAULT_RX_COPYBREAK, rx_size); } /* bp->rx_ring_size, bp->tx_ring_size, dev->mtu, BNXT_FLAG_{G|L}RO flags must