diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 60f85fbf41561..66be040130481 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -332,6 +332,12 @@ attribute-sets: - name: tx-push-buf-len-max type: u32 + - + name: hds-thresh + type: u32 + - + name: hds-thresh-max + type: u32 - name: mm-stat @@ -1777,6 +1783,8 @@ operations: - rx-push - tx-push-buf-len - tx-push-buf-len-max + - hds-thresh + - hds-thresh-max dump: *ring-get-op - name: rings-set diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index da846f1d998ec..f70c0249860cd 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -899,6 +899,10 @@ Kernel response contents: ``ETHTOOL_A_RINGS_RX_PUSH`` u8 flag of RX Push mode ``ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN`` u32 size of TX push buffer ``ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX`` u32 max size of TX push buffer + ``ETHTOOL_A_RINGS_HDS_THRESH`` u32 threshold of + header / data split + ``ETHTOOL_A_RINGS_HDS_THRESH_MAX`` u32 max threshold of + header / data split ======================================= ====== =========================== ``ETHTOOL_A_RINGS_TCP_DATA_SPLIT`` indicates whether the device is usable with @@ -941,10 +945,12 @@ Request contents: ``ETHTOOL_A_RINGS_RX_JUMBO`` u32 size of RX jumbo ring ``ETHTOOL_A_RINGS_TX`` u32 size of TX ring ``ETHTOOL_A_RINGS_RX_BUF_LEN`` u32 size of buffers on the ring + ``ETHTOOL_A_RINGS_TCP_DATA_SPLIT`` u8 TCP header / data split ``ETHTOOL_A_RINGS_CQE_SIZE`` u32 Size of TX/RX CQE ``ETHTOOL_A_RINGS_TX_PUSH`` u8 flag of TX Push mode ``ETHTOOL_A_RINGS_RX_PUSH`` u8 flag of RX Push mode ``ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN`` u32 size of TX push buffer + ``ETHTOOL_A_RINGS_HDS_THRESH`` u32 threshold of header / data split ==================================== ====== =========================== Kernel checks that requested ring sizes do not exceed limits reported by @@ -961,6 +967,10 @@ A bigger CQE can have more receive buffer pointers, and in turn the NIC can transfer a bigger frame from wire. Based on the NIC hardware, the overall completion queue size can be adjusted in the driver if CQE size is modified. +``ETHTOOL_A_RINGS_HDS_THRESH`` specifies the threshold value of +header / data split feature. If a received packet size is larger than this +threshold value, header and data will be split. + CHANNELS_GET ============ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 884d42db55545..caddb5cbc0246 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -81,7 +81,6 @@ MODULE_DESCRIPTION("Broadcom NetXtreme network driver"); #define BNXT_RX_OFFSET (NET_SKB_PAD + NET_IP_ALIGN) #define BNXT_RX_DMA_OFFSET NET_SKB_PAD -#define BNXT_RX_COPY_THRESH 256 #define BNXT_TX_PUSH_THRESH 164 @@ -1343,13 +1342,13 @@ static struct sk_buff *bnxt_copy_data(struct bnxt_napi *bnapi, u8 *data, if (!skb) return NULL; - dma_sync_single_for_cpu(&pdev->dev, mapping, bp->rx_copy_thresh, + dma_sync_single_for_cpu(&pdev->dev, mapping, bp->rx_copybreak, bp->rx_dir); memcpy(skb->data - NET_IP_ALIGN, data - NET_IP_ALIGN, len + NET_IP_ALIGN); - dma_sync_single_for_device(&pdev->dev, mapping, bp->rx_copy_thresh, + dma_sync_single_for_device(&pdev->dev, mapping, bp->rx_copybreak, bp->rx_dir); skb_put(skb, len); @@ -1842,7 +1841,7 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp, return NULL; } - if (len <= bp->rx_copy_thresh) { + if (len <= bp->rx_copybreak) { skb = bnxt_copy_skb(bnapi, data_ptr, len, mapping); if (!skb) { bnxt_abort_tpa(cpr, idx, agg_bufs); @@ -2176,7 +2175,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, } } - if (len <= bp->rx_copy_thresh) { + if (len <= bp->rx_copybreak) { if (!xdp_active) skb = bnxt_copy_skb(bnapi, data_ptr, len, dma_addr); else @@ -4608,6 +4607,12 @@ void bnxt_set_tpa_flags(struct bnxt *bp) bp->flags |= BNXT_FLAG_GRO; } +static void bnxt_init_ring_params(struct bnxt *bp) +{ + bp->rx_copybreak = BNXT_DEFAULT_RX_COPYBREAK; + bp->dev->ethtool->hds_thresh = BNXT_DEFAULT_RX_COPYBREAK; +} + /* bp->rx_ring_size, bp->tx_ring_size, dev->mtu, BNXT_FLAG_{G|L}RO flags must * be set on entry. */ @@ -4622,12 +4627,11 @@ void bnxt_set_ring_params(struct bnxt *bp) rx_space = rx_size + ALIGN(max(NET_SKB_PAD, XDP_PACKET_HEADROOM), 8) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - bp->rx_copy_thresh = BNXT_RX_COPY_THRESH; ring_size = bp->rx_ring_size; bp->rx_agg_ring_size = 0; bp->rx_agg_nr_pages = 0; - if (bp->flags & BNXT_FLAG_TPA) + if (bp->flags & BNXT_FLAG_TPA || bp->flags & BNXT_FLAG_HDS) agg_factor = min_t(u32, 4, 65536 / BNXT_RX_PAGE_SIZE); bp->flags &= ~BNXT_FLAG_JUMBO; @@ -4667,7 +4671,9 @@ void bnxt_set_ring_params(struct bnxt *bp) ALIGN(max(NET_SKB_PAD, XDP_PACKET_HEADROOM), 8) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); } else { - rx_size = SKB_DATA_ALIGN(BNXT_RX_COPY_THRESH + NET_IP_ALIGN); + rx_size = SKB_DATA_ALIGN(max(BNXT_DEFAULT_RX_COPYBREAK, + bp->rx_copybreak) + + NET_IP_ALIGN); rx_space = rx_size + NET_SKB_PAD + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); } @@ -6564,6 +6570,7 @@ static void bnxt_hwrm_update_rss_hash_cfg(struct bnxt *bp) static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, struct bnxt_vnic_info *vnic) { + u16 hds_thresh = (u16)bp->dev->ethtool->hds_thresh; struct hwrm_vnic_plcmodes_cfg_input *req; int rc; @@ -6573,16 +6580,14 @@ static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, struct bnxt_vnic_info *vnic) req->flags = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_JUMBO_PLACEMENT); req->enables = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_ENABLES_JUMBO_THRESH_VALID); + req->jumbo_thresh = cpu_to_le16(bp->rx_buf_use_size); - if (BNXT_RX_PAGE_MODE(bp)) { - req->jumbo_thresh = cpu_to_le16(bp->rx_buf_use_size); - } else { + if (!BNXT_RX_PAGE_MODE(bp) && (bp->flags & BNXT_FLAG_AGG_RINGS)) { req->flags |= cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV4 | VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV6); req->enables |= cpu_to_le32(VNIC_PLCMODES_CFG_REQ_ENABLES_HDS_THRESHOLD_VALID); - req->jumbo_thresh = cpu_to_le16(bp->rx_copy_thresh); - req->hds_threshold = cpu_to_le16(bp->rx_copy_thresh); + req->hds_threshold = cpu_to_le16(hds_thresh); } req->vnic_id = cpu_to_le32(vnic->fw_vnic_id); return hwrm_req_send(bp, req); @@ -16261,6 +16266,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) bnxt_init_l2_fltr_tbl(bp); bnxt_set_rx_skb_mode(bp, false); bnxt_set_tpa_flags(bp); + bnxt_init_ring_params(bp); bnxt_set_ring_params(bp); bnxt_rdma_aux_device_init(bp); rc = bnxt_set_dflt_rings(bp, true); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 094c9e95b4639..8f481dd9c2245 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -34,6 +34,9 @@ #include #endif +#define BNXT_DEFAULT_RX_COPYBREAK 256 +#define BNXT_MAX_RX_COPYBREAK 1024 + extern struct list_head bnxt_block_cb_list; struct page_pool; @@ -2241,8 +2244,6 @@ struct bnxt { #define BNXT_FLAG_TPA (BNXT_FLAG_LRO | BNXT_FLAG_GRO) #define BNXT_FLAG_JUMBO 0x10 #define BNXT_FLAG_STRIP_VLAN 0x20 - #define BNXT_FLAG_AGG_RINGS (BNXT_FLAG_JUMBO | BNXT_FLAG_GRO | \ - BNXT_FLAG_LRO) #define BNXT_FLAG_RFS 0x100 #define BNXT_FLAG_SHARED_RINGS 0x200 #define BNXT_FLAG_PORT_STATS 0x400 @@ -2263,6 +2264,9 @@ struct bnxt { #define BNXT_FLAG_ROCE_MIRROR_CAP 0x4000000 #define BNXT_FLAG_TX_COAL_CMPL 0x8000000 #define BNXT_FLAG_PORT_STATS_EXT 0x10000000 + #define BNXT_FLAG_HDS 0x20000000 + #define BNXT_FLAG_AGG_RINGS (BNXT_FLAG_JUMBO | BNXT_FLAG_GRO | \ + BNXT_FLAG_LRO | BNXT_FLAG_HDS) #define BNXT_FLAG_ALL_CONFIG_FEATS (BNXT_FLAG_TPA | \ BNXT_FLAG_RFS | \ @@ -2347,7 +2351,7 @@ struct bnxt { enum dma_data_direction rx_dir; u32 rx_ring_size; u32 rx_agg_ring_size; - u32 rx_copy_thresh; + u32 rx_copybreak; u32 rx_ring_mask; u32 rx_agg_ring_mask; int rx_nr_pages; @@ -2775,6 +2779,8 @@ struct bnxt { #define SFF_MODULE_ID_QSFP28 0x11 #define BNXT_MAX_PHY_I2C_RESP_SIZE 64 +#define BNXT_HDS_THRESHOLD_MAX 1023 + static inline u32 bnxt_tx_avail(struct bnxt *bp, const struct bnxt_tx_ring_info *txr) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 75a59dd72bcea..540c140d52dcf 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -833,6 +833,9 @@ static void bnxt_get_ringparam(struct net_device *dev, ering->rx_pending = bp->rx_ring_size; ering->rx_jumbo_pending = bp->rx_agg_ring_size; ering->tx_pending = bp->tx_ring_size; + + kernel_ering->hds_thresh = dev->ethtool->hds_thresh; + kernel_ering->hds_thresh_max = BNXT_HDS_THRESHOLD_MAX; } static int bnxt_set_ringparam(struct net_device *dev, @@ -840,16 +843,35 @@ static int bnxt_set_ringparam(struct net_device *dev, struct kernel_ethtool_ringparam *kernel_ering, struct netlink_ext_ack *extack) { + u8 tcp_data_split = kernel_ering->tcp_data_split; struct bnxt *bp = netdev_priv(dev); + u8 hds_config_mod; if ((ering->rx_pending > BNXT_MAX_RX_DESC_CNT) || (ering->tx_pending > BNXT_MAX_TX_DESC_CNT) || (ering->tx_pending < BNXT_MIN_TX_DESC_CNT)) return -EINVAL; + hds_config_mod = tcp_data_split != dev->ethtool->hds_config; + if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_DISABLED && hds_config_mod) + return -EINVAL; + + if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + hds_config_mod && BNXT_RX_PAGE_MODE(bp)) { + NL_SET_ERR_MSG_MOD(extack, "tcp-data-split is disallowed when XDP is attached"); + return -EINVAL; + } + if (netif_running(dev)) bnxt_close_nic(bp, false, false); + if (hds_config_mod) { + if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED) + bp->flags |= BNXT_FLAG_HDS; + else if (tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) + bp->flags &= ~BNXT_FLAG_HDS; + } + bp->rx_ring_size = ering->rx_pending; bp->tx_ring_size = ering->tx_pending; bnxt_set_ring_params(bp); @@ -4328,6 +4350,45 @@ static int bnxt_get_eee(struct net_device *dev, struct ethtool_keee *edata) return 0; } +static int bnxt_set_tunable(struct net_device *dev, + const struct ethtool_tunable *tuna, + const void *data) +{ + struct bnxt *bp = netdev_priv(dev); + u32 rx_copybreak; + + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + rx_copybreak = *(u32 *)data; + if (rx_copybreak > BNXT_MAX_RX_COPYBREAK) + return -ERANGE; + if (rx_copybreak != bp->rx_copybreak) { + if (netif_running(dev)) + return -EBUSY; + bp->rx_copybreak = rx_copybreak; + } + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int bnxt_get_tunable(struct net_device *dev, + const struct ethtool_tunable *tuna, void *data) +{ + struct bnxt *bp = netdev_priv(dev); + + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + *(u32 *)data = bp->rx_copybreak; + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + static int bnxt_read_sfp_module_eeprom_info(struct bnxt *bp, u16 i2c_addr, u16 page_number, u8 bank, u16 start_addr, u16 data_length, @@ -4790,7 +4851,8 @@ static int bnxt_run_loopback(struct bnxt *bp) cpr = &rxr->bnapi->cp_ring; if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) cpr = rxr->rx_cpr; - pkt_size = min(bp->dev->mtu + ETH_HLEN, bp->rx_copy_thresh); + pkt_size = min(bp->dev->mtu + ETH_HLEN, max(BNXT_DEFAULT_RX_COPYBREAK, + bp->rx_copybreak)); skb = netdev_alloc_skb(bp->dev, pkt_size); if (!skb) return -ENOMEM; @@ -5331,6 +5393,8 @@ const struct ethtool_ops bnxt_ethtool_ops = { ETHTOOL_COALESCE_STATS_BLOCK_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX | ETHTOOL_COALESCE_USE_CQE, + .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT | + ETHTOOL_RING_USE_HDS_THRS, .get_link_ksettings = bnxt_get_link_ksettings, .set_link_ksettings = bnxt_set_link_ksettings, .get_fec_stats = bnxt_get_fec_stats, @@ -5372,6 +5436,8 @@ const struct ethtool_ops bnxt_ethtool_ops = { .get_link_ext_stats = bnxt_get_link_ext_stats, .get_eee = bnxt_get_eee, .set_eee = bnxt_set_eee, + .get_tunable = bnxt_get_tunable, + .set_tunable = bnxt_set_tunable, .get_module_info = bnxt_get_module_info, .get_module_eeprom = bnxt_get_module_eeprom, .get_module_eeprom_by_page = bnxt_get_module_eeprom_by_page, diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index f88b641533fcc..1bfff7f29310e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -395,6 +395,10 @@ static int bnxt_xdp_set(struct bnxt *bp, struct bpf_prog *prog) bp->dev->mtu, BNXT_MAX_PAGE_MODE_MTU); return -EOPNOTSUPP; } + if (prog && bp->flags & BNXT_FLAG_HDS) { + netdev_warn(dev, "XDP is disallowed when HDS is enabled.\n"); + return -EOPNOTSUPP; + } if (!(bp->flags & BNXT_FLAG_SHARED_RINGS)) { netdev_warn(dev, "ethtool rx/tx channels must be combined to support XDP.\n"); return -EOPNOTSUPP; diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c index 5fe1eaef99b5b..9e0df40c71e18 100644 --- a/drivers/net/netdevsim/ethtool.c +++ b/drivers/net/netdevsim/ethtool.c @@ -2,7 +2,6 @@ // Copyright (c) 2020 Facebook #include -#include #include #include "netdevsim.h" @@ -72,6 +71,12 @@ static void nsim_get_ringparam(struct net_device *dev, struct netdevsim *ns = netdev_priv(dev); memcpy(ring, &ns->ethtool.ring, sizeof(ns->ethtool.ring)); + kernel_ring->tcp_data_split = dev->ethtool->hds_config; + kernel_ring->hds_thresh = dev->ethtool->hds_thresh; + kernel_ring->hds_thresh_max = NSIM_HDS_THRESHOLD_MAX; + + if (kernel_ring->tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_UNKNOWN) + kernel_ring->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_ENABLED; } static int nsim_set_ringparam(struct net_device *dev, @@ -161,6 +166,8 @@ static int nsim_get_ts_info(struct net_device *dev, static const struct ethtool_ops nsim_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_ALL_PARAMS, + .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT | + ETHTOOL_RING_USE_HDS_THRS, .get_pause_stats = nsim_get_pause_stats, .get_pauseparam = nsim_get_pauseparam, .set_pauseparam = nsim_set_pauseparam, @@ -182,6 +189,9 @@ static void nsim_ethtool_ring_init(struct netdevsim *ns) ns->ethtool.ring.rx_jumbo_max_pending = 4096; ns->ethtool.ring.rx_mini_max_pending = 4096; ns->ethtool.ring.tx_max_pending = 4096; + + ns->netdev->ethtool->hds_config = ETHTOOL_TCP_DATA_SPLIT_UNKNOWN; + ns->netdev->ethtool->hds_thresh = 0; } void nsim_ethtool_init(struct netdevsim *ns) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index d013b64985390..f92b05ccdca91 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -54,6 +55,7 @@ static int nsim_forward_skb(struct net_device *dev, struct sk_buff *skb, static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct netdevsim *ns = netdev_priv(dev); + struct ethtool_netdev_state *ethtool; struct net_device *peer_dev; unsigned int len = skb->len; struct netdevsim *peer_ns; @@ -74,6 +76,13 @@ static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev) rxq = rxq % peer_dev->num_rx_queues; rq = peer_ns->rq[rxq]; + ethtool = peer_dev->ethtool; + if (skb_is_nonlinear(skb) && + (ethtool->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED || + (ethtool->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + ethtool->hds_thresh > len))) + skb_linearize(skb); + skb_tx_timestamp(skb); if (unlikely(nsim_forward_skb(peer_dev, skb, rq) == NET_RX_DROP)) goto out_drop_cnt; diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index a70f62af4c886..dcf073bc4802e 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -36,6 +37,8 @@ #define NSIM_IPSEC_VALID BIT(31) #define NSIM_UDP_TUNNEL_N_PORTS 4 +#define NSIM_HDS_THRESHOLD_MAX 1024 + struct nsim_sa { struct xfrm_state *xs; __be32 ipaddr[4]; diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 20a86bd5f4e34..e4136b0df8923 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -78,6 +78,9 @@ enum { * @cqe_size: Size of TX/RX completion queue event * @tx_push_buf_len: Size of TX push buffer * @tx_push_buf_max_len: Maximum allowed size of TX push buffer + * @hds_thresh: Packet size threshold for header data split (HDS) + * @hds_thresh_max: Maximum supported setting for @hds_threshold + * */ struct kernel_ethtool_ringparam { u32 rx_buf_len; @@ -87,6 +90,8 @@ struct kernel_ethtool_ringparam { u32 cqe_size; u32 tx_push_buf_len; u32 tx_push_buf_max_len; + u32 hds_thresh; + u32 hds_thresh_max; }; /** @@ -97,6 +102,7 @@ struct kernel_ethtool_ringparam { * @ETHTOOL_RING_USE_RX_PUSH: capture for setting rx_push * @ETHTOOL_RING_USE_TX_PUSH_BUF_LEN: capture for setting tx_push_buf_len * @ETHTOOL_RING_USE_TCP_DATA_SPLIT: capture for setting tcp_data_split + * @ETHTOOL_RING_USE_HDS_THRS: capture for setting header-data-split-thresh */ enum ethtool_supported_ring_param { ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0), @@ -105,6 +111,7 @@ enum ethtool_supported_ring_param { ETHTOOL_RING_USE_RX_PUSH = BIT(3), ETHTOOL_RING_USE_TX_PUSH_BUF_LEN = BIT(4), ETHTOOL_RING_USE_TCP_DATA_SPLIT = BIT(5), + ETHTOOL_RING_USE_HDS_THRS = BIT(6), }; #define __ETH_RSS_HASH_BIT(bit) ((u32)1 << (bit)) @@ -1157,12 +1164,16 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, * @rss_ctx: XArray of custom RSS contexts * @rss_lock: Protects entries in @rss_ctx. May be taken from * within RTNL. + * @hds_thresh: HDS Threshold value. + * @hds_config: HDS value from userspace. * @wol_enabled: Wake-on-LAN is enabled * @module_fw_flash_in_progress: Module firmware flashing is in progress. */ struct ethtool_netdev_state { struct xarray rss_ctx; struct mutex rss_lock; + u32 hds_thresh; + u8 hds_config; unsigned wol_enabled:1; unsigned module_fw_flash_in_progress:1; }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bced03fb349e5..3e6336775baf0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4082,6 +4082,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog); u8 dev_xdp_prog_count(struct net_device *dev); int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf); +u8 dev_xdp_sb_prog_count(struct net_device *dev); u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode); u32 dev_get_min_mp_channel_count(const struct net_device *dev); diff --git a/include/uapi/linux/ethtool_netlink_generated.h b/include/uapi/linux/ethtool_netlink_generated.h index 43993a2d68e56..2e17ff348f89e 100644 --- a/include/uapi/linux/ethtool_netlink_generated.h +++ b/include/uapi/linux/ethtool_netlink_generated.h @@ -155,6 +155,8 @@ enum { ETHTOOL_A_RINGS_RX_PUSH, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, + ETHTOOL_A_RINGS_HDS_THRESH, + ETHTOOL_A_RINGS_HDS_THRESH_MAX, __ETHTOOL_A_RINGS_CNT, ETHTOOL_A_RINGS_MAX = (__ETHTOOL_A_RINGS_CNT - 1) diff --git a/net/core/dev.c b/net/core/dev.c index fda4e1039bf01..47e6b0f73cfc7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -92,6 +92,7 @@ #include #include #include +#include #include #include #include @@ -9550,11 +9551,31 @@ u8 dev_xdp_prog_count(struct net_device *dev) } EXPORT_SYMBOL_GPL(dev_xdp_prog_count); +u8 dev_xdp_sb_prog_count(struct net_device *dev) +{ + u8 count = 0; + int i; + + for (i = 0; i < __MAX_XDP_MODE; i++) + if (dev->xdp_state[i].prog && + !dev->xdp_state[i].prog->aux->xdp_has_frags) + count++; + return count; +} + int dev_xdp_propagate(struct net_device *dev, struct netdev_bpf *bpf) { if (!dev->netdev_ops->ndo_bpf) return -EOPNOTSUPP; + if (dev->ethtool->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + bpf->command == XDP_SETUP_PROG && + bpf->prog && !bpf->prog->aux->xdp_has_frags) { + NL_SET_ERR_MSG(bpf->extack, + "unable to propagate XDP to device using tcp-data-split"); + return -EBUSY; + } + if (dev_get_min_mp_channel_count(dev)) { NL_SET_ERR_MSG(bpf->extack, "unable to propagate XDP to device using memory provider"); return -EBUSY; @@ -9592,6 +9613,12 @@ static int dev_xdp_install(struct net_device *dev, enum bpf_xdp_mode mode, struct netdev_bpf xdp; int err; + if (dev->ethtool->hds_config == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + prog && !prog->aux->xdp_has_frags) { + NL_SET_ERR_MSG(extack, "unable to install XDP to device using tcp-data-split"); + return -EBUSY; + } + if (dev_get_min_mp_channel_count(dev)) { NL_SET_ERR_MSG(extack, "unable to install XDP to device using memory provider"); return -EBUSY; diff --git a/net/core/devmem.c b/net/core/devmem.c index 0b6ed7525b22a..c971b8aceac86 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -140,6 +141,16 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, return -ERANGE; } + if (dev->ethtool->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { + NL_SET_ERR_MSG(extack, "tcp-data-split is disabled"); + return -EINVAL; + } + + if (dev->ethtool->hds_thresh) { + NL_SET_ERR_MSG(extack, "hds-thresh is not zero"); + return -EINVAL; + } + rxq = __netif_get_rx_queue(dev, rxq_idx); if (rxq->mp_params.mp_priv) { NL_SET_ERR_MSG(extack, "designated queue already memory provider bound"); diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 1ce0a3de14304..ff69ca0715dea 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -456,7 +456,7 @@ extern const struct nla_policy ethnl_features_set_policy[ETHTOOL_A_FEATURES_WANT extern const struct nla_policy ethnl_privflags_get_policy[ETHTOOL_A_PRIVFLAGS_HEADER + 1]; extern const struct nla_policy ethnl_privflags_set_policy[ETHTOOL_A_PRIVFLAGS_FLAGS + 1]; extern const struct nla_policy ethnl_rings_get_policy[ETHTOOL_A_RINGS_HEADER + 1]; -extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX + 1]; +extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_HDS_THRESH_MAX + 1]; extern const struct nla_policy ethnl_channels_get_policy[ETHTOOL_A_CHANNELS_HEADER + 1]; extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMBINED_COUNT + 1]; extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1]; diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index b7865a14fdf8f..d8cd4e4d77622 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -61,7 +61,9 @@ static int rings_reply_size(const struct ethnl_req_info *req_base, nla_total_size(sizeof(u8)) + /* _RINGS_TX_PUSH */ nla_total_size(sizeof(u8))) + /* _RINGS_RX_PUSH */ nla_total_size(sizeof(u32)) + /* _RINGS_TX_PUSH_BUF_LEN */ - nla_total_size(sizeof(u32)); /* _RINGS_TX_PUSH_BUF_LEN_MAX */ + nla_total_size(sizeof(u32)) + /* _RINGS_TX_PUSH_BUF_LEN_MAX */ + nla_total_size(sizeof(u32)) + /* _RINGS_HDS_THRESH */ + nla_total_size(sizeof(u32)); /* _RINGS_HDS_THRESH_MAX*/ } static int rings_fill_reply(struct sk_buff *skb, @@ -108,7 +110,12 @@ static int rings_fill_reply(struct sk_buff *skb, (nla_put_u32(skb, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX, kr->tx_push_buf_max_len) || nla_put_u32(skb, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN, - kr->tx_push_buf_len)))) + kr->tx_push_buf_len))) || + ((supported_ring_params & ETHTOOL_RING_USE_HDS_THRS) && + (nla_put_u32(skb, ETHTOOL_A_RINGS_HDS_THRESH, + kr->hds_thresh) || + nla_put_u32(skb, ETHTOOL_A_RINGS_HDS_THRESH_MAX, + kr->hds_thresh_max)))) return -EMSGSIZE; return 0; @@ -130,6 +137,7 @@ const struct nla_policy ethnl_rings_set_policy[] = { [ETHTOOL_A_RINGS_TX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1), [ETHTOOL_A_RINGS_RX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1), [ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN] = { .type = NLA_U32 }, + [ETHTOOL_A_RINGS_HDS_THRESH] = { .type = NLA_U32 }, }; static int @@ -155,6 +163,14 @@ ethnl_set_rings_validate(struct ethnl_req_info *req_info, return -EOPNOTSUPP; } + if (tb[ETHTOOL_A_RINGS_HDS_THRESH] && + !(ops->supported_ring_params & ETHTOOL_RING_USE_HDS_THRS)) { + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_RINGS_HDS_THRESH], + "setting hds-thresh is not supported"); + return -EOPNOTSUPP; + } + if (tb[ETHTOOL_A_RINGS_CQE_SIZE] && !(ops->supported_ring_params & ETHTOOL_RING_USE_CQE_SIZE)) { NL_SET_ERR_MSG_ATTR(info->extack, @@ -203,6 +219,7 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) dev->ethtool_ops->get_ringparam(dev, &ringparam, &kernel_ringparam, info->extack); + kernel_ringparam.tcp_data_split = dev->ethtool->hds_config; ethnl_update_u32(&ringparam.rx_pending, tb[ETHTOOL_A_RINGS_RX], &mod); ethnl_update_u32(&ringparam.rx_mini_pending, @@ -222,9 +239,32 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) tb[ETHTOOL_A_RINGS_RX_PUSH], &mod); ethnl_update_u32(&kernel_ringparam.tx_push_buf_len, tb[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN], &mod); + ethnl_update_u32(&kernel_ringparam.hds_thresh, + tb[ETHTOOL_A_RINGS_HDS_THRESH], &mod); if (!mod) return 0; + if (kernel_ringparam.tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_ENABLED && + dev_xdp_sb_prog_count(dev)) { + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_RINGS_TCP_DATA_SPLIT], + "tcp-data-split can not be enabled with single buffer XDP"); + return -EINVAL; + } + + if (dev_get_min_mp_channel_count(dev)) { + if (kernel_ringparam.tcp_data_split != + ETHTOOL_TCP_DATA_SPLIT_ENABLED) { + NL_SET_ERR_MSG(info->extack, + "can't disable tcp-data-split while device has memory provider enabled"); + return -EINVAL; + } else if (kernel_ringparam.hds_thresh) { + NL_SET_ERR_MSG(info->extack, + "can't set non-zero hds_thresh while device is memory provider enabled"); + return -EINVAL; + } + } + /* ensure new ring parameters are within limits */ if (ringparam.rx_pending > ringparam.rx_max_pending) err_attr = tb[ETHTOOL_A_RINGS_RX]; @@ -234,6 +274,8 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) err_attr = tb[ETHTOOL_A_RINGS_RX_JUMBO]; else if (ringparam.tx_pending > ringparam.tx_max_pending) err_attr = tb[ETHTOOL_A_RINGS_TX]; + else if (kernel_ringparam.hds_thresh > kernel_ringparam.hds_thresh_max) + err_attr = tb[ETHTOOL_A_RINGS_HDS_THRESH]; else err_attr = NULL; if (err_attr) { @@ -252,6 +294,11 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info) ret = dev->ethtool_ops->set_ringparam(dev, &ringparam, &kernel_ringparam, info->extack); + if (!ret) { + dev->ethtool->hds_config = kernel_ringparam.tcp_data_split; + dev->ethtool->hds_thresh = kernel_ringparam.hds_thresh; + } + return ret < 0 ? ret : 1; } diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 469179c18935f..137470bdee0c7 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -12,6 +12,7 @@ TEST_PROGS := \ queues.py \ stats.py \ shaper.py \ + hds.py \ # end of TEST_PROGS include ../../lib.mk diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py new file mode 100755 index 0000000000000..394971b25c0b1 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hds.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import errno +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx +from lib.py import EthtoolFamily, NlError +from lib.py import NetDrvEnv + +def get_hds(cfg, netnl) -> None: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'tcp-data-split' not in rings: + raise KsftSkipEx('tcp-data-split not supported by device') + +def get_hds_thresh(cfg, netnl) -> None: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + +def set_hds_enable(cfg, netnl) -> None: + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'enabled'}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("disabling of HDS not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'tcp-data-split' not in rings: + raise KsftSkipEx('tcp-data-split not supported by device') + + ksft_eq('enabled', rings['tcp-data-split']) + +def set_hds_disable(cfg, netnl) -> None: + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'disabled'}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("disabling of HDS not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'tcp-data-split' not in rings: + raise KsftSkipEx('tcp-data-split not supported by device') + + ksft_eq('disabled', rings['tcp-data-split']) + +def set_hds_thresh_zero(cfg, netnl) -> None: + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': 0}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("hds-thresh-set not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + + ksft_eq(0, rings['hds-thresh']) + +def set_hds_thresh_max(cfg, netnl) -> None: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': rings['hds-thresh-max']}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("hds-thresh-set not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + ksft_eq(rings['hds-thresh'], rings['hds-thresh-max']) + +def set_hds_thresh_gt(cfg, netnl) -> None: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + if 'hds-thresh-max' not in rings: + raise KsftSkipEx('hds-thresh-max not defined by device') + hds_gt = rings['hds-thresh-max'] + 1 + with ksft_raises(NlError) as e: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_gt}) + ksft_eq(e.exception.nl_msg.error, -errno.EINVAL) + +def main() -> None: + with NetDrvEnv(__file__, queue_count=3) as cfg: + ksft_run([get_hds, + get_hds_thresh, + set_hds_disable, + set_hds_enable, + set_hds_thresh_zero, + set_hds_thresh_max, + set_hds_thresh_gt], + args=(cfg, EthtoolFamily())) + ksft_exit() + +if __name__ == "__main__": + main()