From 7ff5c83a1deb09419eaf33608dec120a00fcf923 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 31 Oct 2016 20:43:09 +0000 Subject: [PATCH 01/14] nfp: simplify nfp_net_poll() There are few variables in nfp_net_poll() which are used only once or unused but set. Remove them. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index d365760fa75b7..f6a42c14a1f0f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1525,21 +1525,15 @@ static int nfp_net_poll(struct napi_struct *napi, int budget) { struct nfp_net_r_vector *r_vec = container_of(napi, struct nfp_net_r_vector, napi); - struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring; - struct nfp_net_tx_ring *tx_ring = r_vec->tx_ring; - struct nfp_net *nn = r_vec->nfp_net; - struct netdev_queue *txq; unsigned int pkts_polled; - tx_ring = &nn->tx_rings[rx_ring->idx]; - txq = netdev_get_tx_queue(nn->netdev, tx_ring->idx); - nfp_net_tx_complete(tx_ring); + nfp_net_tx_complete(r_vec->tx_ring); - pkts_polled = nfp_net_rx(rx_ring, budget); + pkts_polled = nfp_net_rx(r_vec->rx_ring, budget); if (pkts_polled < budget) { napi_complete_done(napi, pkts_polled); - nfp_net_irq_unmask(nn, r_vec->irq_idx); + nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_idx); } return pkts_polled; From 416db5c1e448805866a6f23ebf7c78443f3d3bd5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 31 Oct 2016 20:43:10 +0000 Subject: [PATCH 02/14] nfp: remove support for nfp3200 Drop all code related to nfp3200. It was never widely deployed as a NIC. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net.h | 7 +- .../ethernet/netronome/nfp/nfp_net_common.c | 95 ++----------------- .../net/ethernet/netronome/nfp/nfp_net_ctrl.h | 13 +-- .../ethernet/netronome/nfp/nfp_netvf_main.c | 45 ++------- 4 files changed, 22 insertions(+), 138 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index ed824e11a1e3b..808c38ae49911 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -421,7 +421,6 @@ struct nfp_stat_pair { * @netdev: Backpointer to net_device structure * @nfp_fallback: Is the driver used in fallback mode? * @is_vf: Is the driver attached to a VF? - * @is_nfp3200: Is the driver for a NFP-3200 card? * @fw_loaded: Is the firmware loaded? * @bpf_offload_skip_sw: Offloaded BPF program will not be rerun by cls_bpf * @ctrl: Local copy of the control register/word. @@ -487,7 +486,6 @@ struct nfp_net { unsigned nfp_fallback:1; unsigned is_vf:1; - unsigned is_nfp3200:1; unsigned fw_loaded:1; unsigned bpf_offload_skip_sw:1; @@ -593,16 +591,13 @@ static inline void nn_writeb(struct nfp_net *nn, int off, u8 val) writeb(val, nn->ctrl_bar + off); } -/* NFP-3200 can't handle 16-bit accesses too well */ static inline u16 nn_readw(struct nfp_net *nn, int off) { - WARN_ON_ONCE(nn->is_nfp3200); return readw(nn->ctrl_bar + off); } static inline void nn_writew(struct nfp_net *nn, int off, u16 val) { - WARN_ON_ONCE(nn->is_nfp3200); writew(val, nn->ctrl_bar + off); } @@ -650,7 +645,7 @@ static inline void nn_pci_flush(struct nfp_net *nn) #define NFP_QCP_QUEUE_STS_HI 0x000c #define NFP_QCP_QUEUE_STS_HI_WRITEPTR_mask 0x3ffff -/* The offset of a QCP queues in the PCIe Target (same on NFP3200 and NFP6000 */ +/* The offset of a QCP queues in the PCIe Target */ #define NFP_PCIE_QUEUE(_q) (0x80000 + (NFP_QCP_QUEUE_ADDR_SZ * ((_q) & 0xff))) /* nfp_qcp_ptr - Read or Write Pointer of a queue */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index f6a42c14a1f0f..4e4b6e3b30d76 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -248,44 +248,15 @@ int nfp_net_reconfig(struct nfp_net *nn, u32 update) /* Interrupt configuration and handling */ -/** - * nfp_net_irq_unmask_msix() - Unmask MSI-X after automasking - * @nn: NFP Network structure - * @entry_nr: MSI-X table entry - * - * Clear the MSI-X table mask bit for the given entry bypassing Linux irq - * handling subsystem. Use *only* to reenable automasked vectors. - */ -static void nfp_net_irq_unmask_msix(struct nfp_net *nn, unsigned int entry_nr) -{ - struct list_head *msi_head = &nn->pdev->dev.msi_list; - struct msi_desc *entry; - u32 off; - - /* All MSI-Xs have the same mask_base */ - entry = list_first_entry(msi_head, struct msi_desc, list); - - off = (PCI_MSIX_ENTRY_SIZE * entry_nr) + - PCI_MSIX_ENTRY_VECTOR_CTRL; - writel(0, entry->mask_base + off); - readl(entry->mask_base); -} - /** * nfp_net_irq_unmask() - Unmask automasked interrupt * @nn: NFP Network structure * @entry_nr: MSI-X table entry * - * If MSI-X auto-masking is enabled clear the mask bit, otherwise - * clear the ICR for the entry. + * Clear the ICR for the IRQ entry. */ static void nfp_net_irq_unmask(struct nfp_net *nn, unsigned int entry_nr) { - if (nn->ctrl & NFP_NET_CFG_CTRL_MSIXAUTO) { - nfp_net_irq_unmask_msix(nn, entry_nr); - return; - } - nn_writeb(nn, NFP_NET_CFG_ICR(entry_nr), NFP_NET_CFG_ICR_UNMASKED); nn_pci_flush(nn); } @@ -1368,20 +1339,6 @@ nfp_net_parse_meta(struct net_device *netdev, struct sk_buff *skb, * more cleanly separate packet receive code from other bookkeeping * functions performed in the napi poll function. * - * There are differences between the NFP-3200 firmware and the - * NFP-6000 firmware. The NFP-3200 firmware uses a dedicated RX queue - * to indicate that new packets have arrived. The NFP-6000 does not - * have this queue and uses the DD bit in the RX descriptor. This - * method cannot be used on the NFP-3200 as it causes a race - * condition: The RX ring write pointer on the NFP-3200 is updated - * after packets (and descriptors) have been DMAed. If the DD bit is - * used and subsequently the read pointer is updated this may lead to - * the RX queue to underflow (if the firmware has not yet update the - * write pointer). Therefore we use slightly ugly conditional code - * below to handle the differences. We may, in the future update the - * NFP-3200 firmware to behave the same as the firmware on the - * NFP-6000. - * * Return: Number of packets received. */ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) @@ -1389,41 +1346,19 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) struct nfp_net_r_vector *r_vec = rx_ring->r_vec; struct nfp_net *nn = r_vec->nfp_net; unsigned int data_len, meta_len; - int avail = 0, pkts_polled = 0; struct sk_buff *skb, *new_skb; struct nfp_net_rx_desc *rxd; dma_addr_t new_dma_addr; - u32 qcp_wr_p; + int pkts_polled = 0; int idx; - if (nn->is_nfp3200) { - /* Work out how many packets arrived */ - qcp_wr_p = nfp_qcp_wr_ptr_read(rx_ring->qcp_rx); - idx = rx_ring->rd_p % rx_ring->cnt; - - if (qcp_wr_p == idx) - /* No new packets */ - return 0; - - if (qcp_wr_p > idx) - avail = qcp_wr_p - idx; - else - avail = qcp_wr_p + rx_ring->cnt - idx; - } else { - avail = budget + 1; - } - - while (avail > 0 && pkts_polled < budget) { + while (pkts_polled < budget) { idx = rx_ring->rd_p % rx_ring->cnt; rxd = &rx_ring->rxds[idx]; - if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) { - if (nn->is_nfp3200) - nn_dbg(nn, "RX descriptor not valid (DD)%d:%u rxd[0]=%#x rxd[1]=%#x\n", - rx_ring->idx, idx, - rxd->vals[0], rxd->vals[1]); + if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) break; - } + /* Memory barrier to ensure that we won't do other reads * before the DD bit. */ @@ -1431,7 +1366,6 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) rx_ring->rd_p++; pkts_polled++; - avail--; skb = rx_ring->rxbufs[idx].skb; @@ -1508,9 +1442,6 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) napi_gro_receive(&rx_ring->r_vec->napi, skb); } - if (nn->is_nfp3200) - nfp_qcp_rd_ptr_add(rx_ring->qcp_rx, pkts_polled); - return pkts_polled; } @@ -1895,9 +1826,8 @@ static void nfp_net_write_mac_addr(struct nfp_net *nn) { nn_writel(nn, NFP_NET_CFG_MACADDR + 0, get_unaligned_be32(nn->netdev->dev_addr)); - /* We can't do writew for NFP-3200 compatibility */ - nn_writel(nn, NFP_NET_CFG_MACADDR + 4, - get_unaligned_be16(nn->netdev->dev_addr + 4) << 16); + nn_writew(nn, NFP_NET_CFG_MACADDR + 6, + get_unaligned_be16(nn->netdev->dev_addr + 4)); } static void nfp_net_vec_clear_ring_data(struct nfp_net *nn, unsigned int idx) @@ -2675,8 +2605,7 @@ static const struct net_device_ops nfp_net_netdev_ops = { */ void nfp_net_info(struct nfp_net *nn) { - nn_info(nn, "Netronome %s %sNetdev: TxQs=%d/%d RxQs=%d/%d\n", - nn->is_nfp3200 ? "NFP-32xx" : "NFP-6xxx", + nn_info(nn, "Netronome NFP-6xxx %sNetdev: TxQs=%d/%d RxQs=%d/%d\n", nn->is_vf ? "VF " : "", nn->num_tx_rings, nn->max_tx_rings, nn->num_rx_rings, nn->max_rx_rings); @@ -2891,13 +2820,7 @@ int nfp_net_netdev_init(struct net_device *netdev) nn->ctrl |= NFP_NET_CFG_CTRL_IRQMOD; } - /* On NFP-3200 enable MSI-X auto-masking, if supported and the - * interrupts are not shared. - */ - if (nn->is_nfp3200 && nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO) - nn->ctrl |= NFP_NET_CFG_CTRL_MSIXAUTO; - - /* On NFP4000/NFP6000, determine RX packet/metadata boundary offset */ + /* Determine RX packet/metadata boundary offset */ if (nn->fw_ver.major >= 2) nn->rx_offset = nn_readl(nn, NFP_NET_CFG_RX_OFFSET); else diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index 93b10b441acbf..385ba355c965c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -50,7 +50,7 @@ /** * Configuration BAR size. * - * The configuration BAR is 8K in size, but on the NFP6000, due to + * The configuration BAR is 8K in size, but due to * THB-350, 32k needs to be reserved. */ #define NFP_NET_CFG_BAR_SZ (32 * 1024) @@ -186,18 +186,13 @@ #define NFP_NET_CFG_START_RXQ 0x004c /** - * NFP-3200 workaround (0x0050 - 0x0058) - * @NFP_NET_CFG_SPARE_ADDR: DMA address for ME code to use (e.g. YDS-155 fix) - */ -#define NFP_NET_CFG_SPARE_ADDR 0x0050 -/** - * NFP6000/NFP4000 - Prepend configuration + * Prepend configuration */ #define NFP_NET_CFG_RX_OFFSET 0x0050 #define NFP_NET_CFG_RX_OFFSET_DYNAMIC 0 /* Prepend mode */ /** - * NFP6000/NFP4000 - VXLAN/UDP encap configuration + * VXLAN/UDP encap configuration * @NFP_NET_CFG_VXLAN_PORT: Base address of table of tunnels' UDP dst ports * @NFP_NET_CFG_VXLAN_SZ: Size of the UDP port table in bytes */ @@ -205,7 +200,7 @@ #define NFP_NET_CFG_VXLAN_SZ 0x0008 /** - * NFP6000 - BPF section + * BPF section * @NFP_NET_CFG_BPF_ABI: BPF ABI version * @NFP_NET_CFG_BPF_CAP: BPF capabilities * @NFP_NET_CFG_BPF_MAX_LEN: Maximum size of JITed BPF code in bytes diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c index 2800bbf65a891..d065235034d48 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c @@ -63,9 +63,7 @@ static void nfp_netvf_get_mac_addr(struct nfp_net *nn) u8 mac_addr[ETH_ALEN]; put_unaligned_be32(nn_readl(nn, NFP_NET_CFG_MACADDR + 0), &mac_addr[0]); - /* We can't do readw for NFP-3200 compatibility */ - put_unaligned_be16(nn_readl(nn, NFP_NET_CFG_MACADDR + 4) >> 16, - &mac_addr[4]); + put_unaligned_be16(nn_readw(nn, NFP_NET_CFG_MACADDR + 6), &mac_addr[4]); if (!is_valid_ether_addr(mac_addr)) { eth_hw_addr_random(nn->netdev); @@ -86,7 +84,6 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, int tx_bar_no, rx_bar_no; u8 __iomem *ctrl_bar; struct nfp_net *nn; - int is_nfp3200; u32 startq; int stride; int err; @@ -101,15 +98,6 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, goto err_pci_disable; } - switch (pdev->device) { - case PCI_DEVICE_NFP6000VF: - is_nfp3200 = 0; - break; - default: - err = -ENODEV; - goto err_pci_regions; - } - pci_set_master(pdev); err = dma_set_mask_and_coherent(&pdev->dev, @@ -149,15 +137,9 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, } else { switch (fw_ver.major) { case 1 ... 4: - if (is_nfp3200) { - stride = 2; - tx_bar_no = NFP_NET_Q0_BAR; - rx_bar_no = NFP_NET_Q1_BAR; - } else { - stride = 4; - tx_bar_no = NFP_NET_Q0_BAR; - rx_bar_no = tx_bar_no; - } + stride = 4; + tx_bar_no = NFP_NET_Q0_BAR; + rx_bar_no = tx_bar_no; break; default: dev_err(&pdev->dev, "Unsupported Firmware ABI %d.%d.%d.%d\n", @@ -189,20 +171,10 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, max_rx_rings = (rx_bar_sz / NFP_QCP_QUEUE_ADDR_SZ) / 2; } - /* XXX Implement a workaround for THB-350 here. Ideally, we - * have a different PCI ID for A rev VFs. - */ - switch (pdev->device) { - case PCI_DEVICE_NFP6000VF: - startq = readl(ctrl_bar + NFP_NET_CFG_START_TXQ); - tx_bar_off = NFP_PCIE_QUEUE(startq); - startq = readl(ctrl_bar + NFP_NET_CFG_START_RXQ); - rx_bar_off = NFP_PCIE_QUEUE(startq); - break; - default: - err = -ENODEV; - goto err_ctrl_unmap; - } + startq = readl(ctrl_bar + NFP_NET_CFG_START_TXQ); + tx_bar_off = NFP_PCIE_QUEUE(startq); + startq = readl(ctrl_bar + NFP_NET_CFG_START_RXQ); + rx_bar_off = NFP_PCIE_QUEUE(startq); /* Allocate and initialise the netdev */ nn = nfp_net_netdev_alloc(pdev, max_tx_rings, max_rx_rings); @@ -214,7 +186,6 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, nn->fw_ver = fw_ver; nn->ctrl_bar = ctrl_bar; nn->is_vf = 1; - nn->is_nfp3200 = is_nfp3200; nn->stride_tx = stride; nn->stride_rx = stride; From 8c8b0e99eb5c008158e095ecc3a42755ba9a7f8d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 31 Oct 2016 20:43:11 +0000 Subject: [PATCH 03/14] nfp: remove unnecessary call to ether_setup() ether_setup() will be invoked by alloc_etherdev_mqs(), no need to call it again. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 4e4b6e3b30d76..bd28e42bee2ba 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2839,7 +2839,6 @@ int nfp_net_netdev_init(struct net_device *netdev) return err; /* Finalise the netdev setup */ - ether_setup(netdev); netdev->netdev_ops = &nfp_net_netdev_ops; netdev->watchdog_timeo = msecs_to_jiffies(5 * 1000); From fa95f1d2bf1816eee736192fa1f75ed96c0ab9cf Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 31 Oct 2016 20:43:12 +0000 Subject: [PATCH 04/14] nfp: remove inline attributes and dead code Don't declare functions as static inline in .c files and remove dead code it was hiding. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- .../net/ethernet/netronome/nfp/nfp_net_common.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index bd28e42bee2ba..025483fb9103e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -576,7 +576,7 @@ static void nfp_net_aux_irq_free(struct nfp_net *nn, u32 ctrl_offset, * * Return: True if the ring is full. */ -static inline int nfp_net_tx_full(struct nfp_net_tx_ring *tx_ring, int dcnt) +static int nfp_net_tx_full(struct nfp_net_tx_ring *tx_ring, int dcnt) { return (tx_ring->wr_p - tx_ring->rd_p) >= (tx_ring->cnt - dcnt); } @@ -1031,19 +1031,6 @@ static void nfp_net_tx_timeout(struct net_device *netdev) /* Receive processing */ -/** - * nfp_net_rx_space() - return the number of free slots on the RX ring - * @rx_ring: RX ring structure - * - * Make sure we leave at least one slot free. - * - * Return: True if there is space on the RX ring - */ -static inline int nfp_net_rx_space(struct nfp_net_rx_ring *rx_ring) -{ - return (rx_ring->cnt - 1) - (rx_ring->wr_p - rx_ring->rd_p); -} - /** * nfp_net_rx_alloc_one() - Allocate and map skb for RX * @rx_ring: RX ring structure of the skb From bf187ea01b07739167420088adb385f9385a7c7f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 31 Oct 2016 20:43:13 +0000 Subject: [PATCH 05/14] nfp: centralize the buffer size calculation Add a helper function to calculate the buffer size at run time. Buffer lengths will now depend on the FW prepend configuration instead of assuming the most space consuming configuration and defaulting to 2k buffers at initialization time. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net.h | 1 - .../ethernet/netronome/nfp/nfp_net_common.c | 29 ++++++++++++++----- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 808c38ae49911..9b90fab8a6df0 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -75,7 +75,6 @@ /* Default size for MTU and freelist buffer sizes */ #define NFP_NET_DEFAULT_MTU 1500 -#define NFP_NET_DEFAULT_RX_BUFSZ 2048 /* Maximum number of bytes prepended to a packet */ #define NFP_NET_MAX_PREPEND 64 diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 025483fb9103e..77c645abb8272 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1030,6 +1030,19 @@ static void nfp_net_tx_timeout(struct net_device *netdev) /* Receive processing */ +static unsigned int +nfp_net_calc_fl_bufsz(struct nfp_net *nn, unsigned int mtu) +{ + unsigned int fl_bufsz; + + if (nn->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) + fl_bufsz = NFP_NET_MAX_PREPEND; + else + fl_bufsz = nn->rx_offset; + fl_bufsz += ETH_HLEN + VLAN_HLEN * 2 + mtu; + + return fl_bufsz; +} /** * nfp_net_rx_alloc_one() - Allocate and map skb for RX @@ -2191,7 +2204,7 @@ static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu) old_mtu = netdev->mtu; old_fl_bufsz = nn->fl_bufsz; - new_fl_bufsz = NFP_NET_MAX_PREPEND + ETH_HLEN + VLAN_HLEN * 2 + new_mtu; + new_fl_bufsz = nfp_net_calc_fl_bufsz(nn, new_mtu); if (!netif_running(netdev)) { netdev->mtu = new_mtu; @@ -2731,12 +2744,18 @@ int nfp_net_netdev_init(struct net_device *netdev) nfp_net_write_mac_addr(nn); + /* Determine RX packet/metadata boundary offset */ + if (nn->fw_ver.major >= 2) + nn->rx_offset = nn_readl(nn, NFP_NET_CFG_RX_OFFSET); + else + nn->rx_offset = NFP_NET_RX_OFFSET; + /* Set default MTU and Freelist buffer size */ if (nn->max_mtu < NFP_NET_DEFAULT_MTU) netdev->mtu = nn->max_mtu; else netdev->mtu = NFP_NET_DEFAULT_MTU; - nn->fl_bufsz = NFP_NET_DEFAULT_RX_BUFSZ; + nn->fl_bufsz = nfp_net_calc_fl_bufsz(nn, netdev->mtu); /* Advertise/enable offloads based on capabilities * @@ -2807,12 +2826,6 @@ int nfp_net_netdev_init(struct net_device *netdev) nn->ctrl |= NFP_NET_CFG_CTRL_IRQMOD; } - /* Determine RX packet/metadata boundary offset */ - if (nn->fw_ver.major >= 2) - nn->rx_offset = nn_readl(nn, NFP_NET_CFG_RX_OFFSET); - else - nn->rx_offset = NFP_NET_RX_OFFSET; - /* Stash the re-configuration queue away. First odd queue in TX Bar */ nn->qcp_cfg = nn->tx_bar + NFP_QCP_QUEUE_ADDR_SZ; From e9949aebe04a1d917a15f4dc71e77deaefe9de94 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 31 Oct 2016 20:43:14 +0000 Subject: [PATCH 06/14] nfp: add buffer drop/recycle helper for RX nfp_net_rx() is quite long already and about to get longer. Move buffer drop/recycle to a helper. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- .../ethernet/netronome/nfp/nfp_net_common.c | 27 ++++++++++++------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 77c645abb8272..abd4f17e89c7a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1330,6 +1330,20 @@ nfp_net_parse_meta(struct net_device *netdev, struct sk_buff *skb, return data; } +static void +nfp_net_rx_drop(struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring, + struct nfp_net_rx_buf *rxbuf, struct sk_buff *skb) +{ + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_drops++; + u64_stats_update_end(&r_vec->rx_sync); + + if (rxbuf) + nfp_net_rx_give_one(rx_ring, rxbuf->skb, rxbuf->dma_addr); + if (skb) + dev_kfree_skb_any(skb); +} + /** * nfp_net_rx() - receive up to @budget packets on @rx_ring * @rx_ring: RX ring to receive from @@ -1372,11 +1386,8 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) new_skb = nfp_net_rx_alloc_one(rx_ring, &new_dma_addr, nn->fl_bufsz); if (!new_skb) { - nfp_net_rx_give_one(rx_ring, rx_ring->rxbufs[idx].skb, - rx_ring->rxbufs[idx].dma_addr); - u64_stats_update_begin(&r_vec->rx_sync); - r_vec->rx_drops++; - u64_stats_update_end(&r_vec->rx_sync); + nfp_net_rx_drop(r_vec, rx_ring, &rx_ring->rxbufs[idx], + NULL); continue; } @@ -1420,12 +1431,8 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) end = nfp_net_parse_meta(nn->netdev, skb, meta_len); if (unlikely(end != skb->data)) { - u64_stats_update_begin(&r_vec->rx_sync); - r_vec->rx_drops++; - u64_stats_update_end(&r_vec->rx_sync); - - dev_kfree_skb_any(skb); nn_warn_ratelimit(nn, "invalid RX packet metadata\n"); + nfp_net_rx_drop(r_vec, rx_ring, NULL, skb); continue; } } From c0f031bc88660e8b96060b76aa72fbe8859bdcc8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 31 Oct 2016 20:43:15 +0000 Subject: [PATCH 07/14] nfp: use alloc_frag() and build_skb() Speed up RX processing by moving to the alloc_frag()/build_skb() paradigm. Since we're no longer mapping the entire buffer for DMA add helpers which take care of calculating offsets and lengths. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net.h | 8 +- .../ethernet/netronome/nfp/nfp_net_common.c | 115 +++++++++++------- .../ethernet/netronome/nfp/nfp_net_debugfs.c | 9 +- 3 files changed, 82 insertions(+), 50 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 9b90fab8a6df0..9ca066b29f07e 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -101,6 +101,10 @@ /* Offload definitions */ #define NFP_NET_N_VXLAN_PORTS (NFP_NET_CFG_VXLAN_SZ / sizeof(__be16)) +#define NFP_NET_RX_BUF_HEADROOM (NET_SKB_PAD + NET_IP_ALIGN) +#define NFP_NET_RX_BUF_NON_DATA (NFP_NET_RX_BUF_HEADROOM + \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) + /* Forward declarations */ struct nfp_net; struct nfp_net_r_vector; @@ -277,11 +281,11 @@ struct nfp_net_rx_hash { /** * struct nfp_net_rx_buf - software RX buffer descriptor - * @skb: sk_buff associated with this buffer + * @frag: page fragment buffer * @dma_addr: DMA mapping address of the buffer */ struct nfp_net_rx_buf { - struct sk_buff *skb; + void *frag; dma_addr_t dma_addr; }; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index abd4f17e89c7a..449ec798a9b71 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -80,6 +81,22 @@ void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver, put_unaligned_le32(reg, fw_ver); } +static dma_addr_t +nfp_net_dma_map_rx(struct nfp_net *nn, void *frag, unsigned int bufsz, + int direction) +{ + return dma_map_single(&nn->pdev->dev, frag + NFP_NET_RX_BUF_HEADROOM, + bufsz - NFP_NET_RX_BUF_NON_DATA, direction); +} + +static void +nfp_net_dma_unmap_rx(struct nfp_net *nn, dma_addr_t dma_addr, + unsigned int bufsz, int direction) +{ + dma_unmap_single(&nn->pdev->dev, dma_addr, + bufsz - NFP_NET_RX_BUF_NON_DATA, direction); +} + /* Firmware reconfig * * Firmware reconfig may take a while so we have two versions of it - @@ -1035,64 +1052,67 @@ nfp_net_calc_fl_bufsz(struct nfp_net *nn, unsigned int mtu) { unsigned int fl_bufsz; + fl_bufsz = NFP_NET_RX_BUF_HEADROOM; if (nn->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) - fl_bufsz = NFP_NET_MAX_PREPEND; + fl_bufsz += NFP_NET_MAX_PREPEND; else - fl_bufsz = nn->rx_offset; + fl_bufsz += nn->rx_offset; fl_bufsz += ETH_HLEN + VLAN_HLEN * 2 + mtu; + fl_bufsz = SKB_DATA_ALIGN(fl_bufsz); + fl_bufsz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + return fl_bufsz; } /** - * nfp_net_rx_alloc_one() - Allocate and map skb for RX + * nfp_net_rx_alloc_one() - Allocate and map page frag for RX * @rx_ring: RX ring structure of the skb * @dma_addr: Pointer to storage for DMA address (output param) * @fl_bufsz: size of freelist buffers * - * This function will allcate a new skb, map it for DMA. + * This function will allcate a new page frag, map it for DMA. * - * Return: allocated skb or NULL on failure. + * Return: allocated page frag or NULL on failure. */ -static struct sk_buff * +static void * nfp_net_rx_alloc_one(struct nfp_net_rx_ring *rx_ring, dma_addr_t *dma_addr, unsigned int fl_bufsz) { struct nfp_net *nn = rx_ring->r_vec->nfp_net; - struct sk_buff *skb; + void *frag; - skb = netdev_alloc_skb(nn->netdev, fl_bufsz); - if (!skb) { - nn_warn_ratelimit(nn, "Failed to alloc receive SKB\n"); + frag = netdev_alloc_frag(fl_bufsz); + if (!frag) { + nn_warn_ratelimit(nn, "Failed to alloc receive page frag\n"); return NULL; } - *dma_addr = dma_map_single(&nn->pdev->dev, skb->data, - fl_bufsz, DMA_FROM_DEVICE); + *dma_addr = nfp_net_dma_map_rx(nn, frag, fl_bufsz, DMA_FROM_DEVICE); if (dma_mapping_error(&nn->pdev->dev, *dma_addr)) { - dev_kfree_skb_any(skb); + skb_free_frag(frag); nn_warn_ratelimit(nn, "Failed to map DMA RX buffer\n"); return NULL; } - return skb; + return frag; } /** * nfp_net_rx_give_one() - Put mapped skb on the software and hardware rings * @rx_ring: RX ring structure - * @skb: Skb to put on rings + * @frag: page fragment buffer * @dma_addr: DMA address of skb mapping */ static void nfp_net_rx_give_one(struct nfp_net_rx_ring *rx_ring, - struct sk_buff *skb, dma_addr_t dma_addr) + void *frag, dma_addr_t dma_addr) { unsigned int wr_idx; wr_idx = rx_ring->wr_p % rx_ring->cnt; /* Stash SKB and DMA address away */ - rx_ring->rxbufs[wr_idx].skb = skb; + rx_ring->rxbufs[wr_idx].frag = frag; rx_ring->rxbufs[wr_idx].dma_addr = dma_addr; /* Fill freelist descriptor */ @@ -1127,9 +1147,9 @@ static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring) wr_idx = rx_ring->wr_p % rx_ring->cnt; last_idx = rx_ring->cnt - 1; rx_ring->rxbufs[wr_idx].dma_addr = rx_ring->rxbufs[last_idx].dma_addr; - rx_ring->rxbufs[wr_idx].skb = rx_ring->rxbufs[last_idx].skb; + rx_ring->rxbufs[wr_idx].frag = rx_ring->rxbufs[last_idx].frag; rx_ring->rxbufs[last_idx].dma_addr = 0; - rx_ring->rxbufs[last_idx].skb = NULL; + rx_ring->rxbufs[last_idx].frag = NULL; memset(rx_ring->rxds, 0, sizeof(*rx_ring->rxds) * rx_ring->cnt); rx_ring->wr_p = 0; @@ -1149,7 +1169,6 @@ static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring) static void nfp_net_rx_ring_bufs_free(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring) { - struct pci_dev *pdev = nn->pdev; unsigned int i; for (i = 0; i < rx_ring->cnt - 1; i++) { @@ -1157,14 +1176,14 @@ nfp_net_rx_ring_bufs_free(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring) * fails to allocate enough buffers and calls here to free * already allocated ones. */ - if (!rx_ring->rxbufs[i].skb) + if (!rx_ring->rxbufs[i].frag) continue; - dma_unmap_single(&pdev->dev, rx_ring->rxbufs[i].dma_addr, - rx_ring->bufsz, DMA_FROM_DEVICE); - dev_kfree_skb_any(rx_ring->rxbufs[i].skb); + nfp_net_dma_unmap_rx(nn, rx_ring->rxbufs[i].dma_addr, + rx_ring->bufsz, DMA_FROM_DEVICE); + skb_free_frag(rx_ring->rxbufs[i].frag); rx_ring->rxbufs[i].dma_addr = 0; - rx_ring->rxbufs[i].skb = NULL; + rx_ring->rxbufs[i].frag = NULL; } } @@ -1182,10 +1201,10 @@ nfp_net_rx_ring_bufs_alloc(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring) rxbufs = rx_ring->rxbufs; for (i = 0; i < rx_ring->cnt - 1; i++) { - rxbufs[i].skb = + rxbufs[i].frag = nfp_net_rx_alloc_one(rx_ring, &rxbufs[i].dma_addr, rx_ring->bufsz); - if (!rxbufs[i].skb) { + if (!rxbufs[i].frag) { nfp_net_rx_ring_bufs_free(nn, rx_ring); return -ENOMEM; } @@ -1203,7 +1222,7 @@ static void nfp_net_rx_ring_fill_freelist(struct nfp_net_rx_ring *rx_ring) unsigned int i; for (i = 0; i < rx_ring->cnt - 1; i++) - nfp_net_rx_give_one(rx_ring, rx_ring->rxbufs[i].skb, + nfp_net_rx_give_one(rx_ring, rx_ring->rxbufs[i].frag, rx_ring->rxbufs[i].dma_addr); } @@ -1338,8 +1357,13 @@ nfp_net_rx_drop(struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring, r_vec->rx_drops++; u64_stats_update_end(&r_vec->rx_sync); + /* skb is build based on the frag, free_skb() would free the frag + * so to be able to reuse it we need an extra ref. + */ + if (skb && rxbuf && skb->head == rxbuf->frag) + page_ref_inc(virt_to_head_page(rxbuf->frag)); if (rxbuf) - nfp_net_rx_give_one(rx_ring, rxbuf->skb, rxbuf->dma_addr); + nfp_net_rx_give_one(rx_ring, rxbuf->frag, rxbuf->dma_addr); if (skb) dev_kfree_skb_any(skb); } @@ -1360,10 +1384,12 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) struct nfp_net_r_vector *r_vec = rx_ring->r_vec; struct nfp_net *nn = r_vec->nfp_net; unsigned int data_len, meta_len; - struct sk_buff *skb, *new_skb; + struct nfp_net_rx_buf *rxbuf; struct nfp_net_rx_desc *rxd; dma_addr_t new_dma_addr; + struct sk_buff *skb; int pkts_polled = 0; + void *new_frag; int idx; while (pkts_polled < budget) { @@ -1381,21 +1407,23 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) rx_ring->rd_p++; pkts_polled++; - skb = rx_ring->rxbufs[idx].skb; - - new_skb = nfp_net_rx_alloc_one(rx_ring, &new_dma_addr, - nn->fl_bufsz); - if (!new_skb) { - nfp_net_rx_drop(r_vec, rx_ring, &rx_ring->rxbufs[idx], - NULL); + rxbuf = &rx_ring->rxbufs[idx]; + skb = build_skb(rxbuf->frag, nn->fl_bufsz); + if (unlikely(!skb)) { + nfp_net_rx_drop(r_vec, rx_ring, rxbuf, NULL); + continue; + } + new_frag = nfp_net_rx_alloc_one(rx_ring, &new_dma_addr, + nn->fl_bufsz); + if (unlikely(!new_frag)) { + nfp_net_rx_drop(r_vec, rx_ring, rxbuf, skb); continue; } - dma_unmap_single(&nn->pdev->dev, - rx_ring->rxbufs[idx].dma_addr, - nn->fl_bufsz, DMA_FROM_DEVICE); + nfp_net_dma_unmap_rx(nn, rx_ring->rxbufs[idx].dma_addr, + nn->fl_bufsz, DMA_FROM_DEVICE); - nfp_net_rx_give_one(rx_ring, new_skb, new_dma_addr); + nfp_net_rx_give_one(rx_ring, new_frag, new_dma_addr); /* < meta_len > * <-- [rx_offset] --> @@ -1413,9 +1441,10 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) data_len = le16_to_cpu(rxd->rxd.data_len); if (nn->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) - skb_reserve(skb, meta_len); + skb_reserve(skb, NFP_NET_RX_BUF_HEADROOM + meta_len); else - skb_reserve(skb, nn->rx_offset); + skb_reserve(skb, + NFP_NET_RX_BUF_HEADROOM + nn->rx_offset); skb_put(skb, data_len - meta_len); /* Stats update */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c index f7c9a5bc4aa33..180cf70f00932 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c @@ -44,8 +44,8 @@ static int nfp_net_debugfs_rx_q_read(struct seq_file *file, void *data) struct nfp_net_r_vector *r_vec = file->private; struct nfp_net_rx_ring *rx_ring; struct nfp_net_rx_desc *rxd; - struct sk_buff *skb; struct nfp_net *nn; + void *frag; int i; rtnl_lock(); @@ -73,10 +73,9 @@ static int nfp_net_debugfs_rx_q_read(struct seq_file *file, void *data) seq_printf(file, "%04d: 0x%08x 0x%08x", i, rxd->vals[0], rxd->vals[1]); - skb = READ_ONCE(rx_ring->rxbufs[i].skb); - if (skb) - seq_printf(file, " skb->head=%p skb->data=%p", - skb->head, skb->data); + frag = READ_ONCE(rx_ring->rxbufs[i].frag); + if (frag) + seq_printf(file, " frag=%p", frag); if (rx_ring->rxbufs[i].dma_addr) seq_printf(file, " dma_addr=%pad", From b64b7bb6d492ef1bbe9aa9ed7eeb18f7148ece3d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 31 Oct 2016 20:43:16 +0000 Subject: [PATCH 08/14] nfp: add separate buffer allocation function for napi Introduce a separate buffer allocation function to be called from NAPI. We can make assumptions about the context and buffer size. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- .../ethernet/netronome/nfp/nfp_net_common.c | 23 +++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 449ec798a9b71..b43d2dbae3682 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -1098,6 +1098,26 @@ nfp_net_rx_alloc_one(struct nfp_net_rx_ring *rx_ring, dma_addr_t *dma_addr, return frag; } +static void *nfp_net_napi_alloc_one(struct nfp_net *nn, dma_addr_t *dma_addr) +{ + void *frag; + + frag = napi_alloc_frag(nn->fl_bufsz); + if (!frag) { + nn_warn_ratelimit(nn, "Failed to alloc receive page frag\n"); + return NULL; + } + + *dma_addr = nfp_net_dma_map_rx(nn, frag, nn->fl_bufsz, DMA_FROM_DEVICE); + if (dma_mapping_error(&nn->pdev->dev, *dma_addr)) { + skb_free_frag(frag); + nn_warn_ratelimit(nn, "Failed to map DMA RX buffer\n"); + return NULL; + } + + return frag; +} + /** * nfp_net_rx_give_one() - Put mapped skb on the software and hardware rings * @rx_ring: RX ring structure @@ -1413,8 +1433,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) nfp_net_rx_drop(r_vec, rx_ring, rxbuf, NULL); continue; } - new_frag = nfp_net_rx_alloc_one(rx_ring, &new_dma_addr, - nn->fl_bufsz); + new_frag = nfp_net_napi_alloc_one(nn, &new_dma_addr); if (unlikely(!new_frag)) { nfp_net_rx_drop(r_vec, rx_ring, rxbuf, skb); continue; From 1a1d74d378b13ad3f93e8975a0ade0980a49d28b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 31 Oct 2016 20:43:17 +0000 Subject: [PATCH 09/14] nfp: use AND instead of modulo to get ring indexes We already force ring sizes to be power of 2 so replace modulo operations with AND (size - 1) in index calculations. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- .../net/ethernet/netronome/nfp/nfp_net_common.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index b43d2dbae3682..7225ab61a1202 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -778,7 +778,7 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev) if (dma_mapping_error(&nn->pdev->dev, dma_addr)) goto err_free; - wr_idx = tx_ring->wr_p % tx_ring->cnt; + wr_idx = tx_ring->wr_p & (tx_ring->cnt - 1); /* Stash the soft descriptor of the head then initialize it */ txbuf = &tx_ring->txbufs[wr_idx]; @@ -822,7 +822,7 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev) if (dma_mapping_error(&nn->pdev->dev, dma_addr)) goto err_unmap; - wr_idx = (wr_idx + 1) % tx_ring->cnt; + wr_idx = (wr_idx + 1) & (tx_ring->cnt - 1); tx_ring->txbufs[wr_idx].skb = skb; tx_ring->txbufs[wr_idx].dma_addr = dma_addr; tx_ring->txbufs[wr_idx].fidx = f; @@ -917,7 +917,7 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring) todo = qcp_rd_p + tx_ring->cnt - tx_ring->qcp_rd_p; while (todo--) { - idx = tx_ring->rd_p % tx_ring->cnt; + idx = tx_ring->rd_p & (tx_ring->cnt - 1); tx_ring->rd_p++; skb = tx_ring->txbufs[idx].skb; @@ -992,7 +992,7 @@ nfp_net_tx_ring_reset(struct nfp_net *nn, struct nfp_net_tx_ring *tx_ring) int nr_frags, fidx, idx; struct sk_buff *skb; - idx = tx_ring->rd_p % tx_ring->cnt; + idx = tx_ring->rd_p & (tx_ring->cnt - 1); skb = tx_ring->txbufs[idx].skb; nr_frags = skb_shinfo(skb)->nr_frags; fidx = tx_ring->txbufs[idx].fidx; @@ -1129,7 +1129,7 @@ static void nfp_net_rx_give_one(struct nfp_net_rx_ring *rx_ring, { unsigned int wr_idx; - wr_idx = rx_ring->wr_p % rx_ring->cnt; + wr_idx = rx_ring->wr_p & (rx_ring->cnt - 1); /* Stash SKB and DMA address away */ rx_ring->rxbufs[wr_idx].frag = frag; @@ -1164,7 +1164,7 @@ static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring) unsigned int wr_idx, last_idx; /* Move the empty entry to the end of the list */ - wr_idx = rx_ring->wr_p % rx_ring->cnt; + wr_idx = rx_ring->wr_p & (rx_ring->cnt - 1); last_idx = rx_ring->cnt - 1; rx_ring->rxbufs[wr_idx].dma_addr = rx_ring->rxbufs[last_idx].dma_addr; rx_ring->rxbufs[wr_idx].frag = rx_ring->rxbufs[last_idx].frag; @@ -1413,7 +1413,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) int idx; while (pkts_polled < budget) { - idx = rx_ring->rd_p % rx_ring->cnt; + idx = rx_ring->rd_p & (rx_ring->cnt - 1); rxd = &rx_ring->rxds[idx]; if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) From 23a4e389bdc71fda949b5a19a28fa99c4d746d43 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 31 Oct 2016 20:43:18 +0000 Subject: [PATCH 10/14] nfp: create separate define for max number of vectors We are currently using define for max TX rings to allocate IRQ vectors. It's OK since the max number of rings for TX and RX are currently the same, but lets make the code nicer by taking max of the two. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 9ca066b29f07e..55a497c022376 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -87,6 +87,9 @@ /* Queue/Ring definitions */ #define NFP_NET_MAX_TX_RINGS 64 /* Max. # of Tx rings per device */ #define NFP_NET_MAX_RX_RINGS 64 /* Max. # of Rx rings per device */ +#define NFP_NET_MAX_R_VECS (NFP_NET_MAX_TX_RINGS > NFP_NET_MAX_RX_RINGS ? \ + NFP_NET_MAX_TX_RINGS : NFP_NET_MAX_RX_RINGS) +#define NFP_NET_MAX_IRQS (NFP_NET_NON_Q_VECTORS + NFP_NET_MAX_R_VECS) #define NFP_NET_MIN_TX_DESCS 256 /* Min. # of Tx descs per ring */ #define NFP_NET_MIN_RX_DESCS 256 /* Min. # of Rx descs per ring */ @@ -539,9 +542,8 @@ struct nfp_net { u8 num_irqs; u8 num_r_vecs; - struct nfp_net_r_vector r_vecs[NFP_NET_MAX_TX_RINGS]; - struct msix_entry irq_entries[NFP_NET_NON_Q_VECTORS + - NFP_NET_MAX_TX_RINGS]; + struct nfp_net_r_vector r_vecs[NFP_NET_MAX_R_VECS]; + struct msix_entry irq_entries[NFP_NET_MAX_IRQS]; irq_handler_t lsc_handler; char lsc_name[IFNAMSIZ + 8]; From a4b562bb8ebd418dcfeba2840a7ece25a74d8385 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 31 Oct 2016 20:43:19 +0000 Subject: [PATCH 11/14] nfp: use unsigned int for vector/ring counts Use unsigned int consistently for vector/ring counts. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net.h | 17 +++++++++-------- .../net/ethernet/netronome/nfp/nfp_net_common.c | 5 +++-- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 55a497c022376..0ea6ebd329cf0 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -528,11 +528,11 @@ struct nfp_net { struct timer_list rx_filter_stats_timer; spinlock_t rx_filter_lock; - int max_tx_rings; - int max_rx_rings; + unsigned int max_tx_rings; + unsigned int max_rx_rings; - int num_tx_rings; - int num_rx_rings; + unsigned int num_tx_rings; + unsigned int num_rx_rings; int stride_tx; int stride_rx; @@ -540,8 +540,8 @@ struct nfp_net { int txd_cnt; int rxd_cnt; - u8 num_irqs; - u8 num_r_vecs; + unsigned int num_irqs; + unsigned int num_r_vecs; struct nfp_net_r_vector r_vecs[NFP_NET_MAX_R_VECS]; struct msix_entry irq_entries[NFP_NET_MAX_IRQS]; @@ -757,8 +757,9 @@ extern const char nfp_net_driver_version[]; void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver, void __iomem *ctrl_bar); -struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev, - int max_tx_rings, int max_rx_rings); +struct nfp_net * +nfp_net_netdev_alloc(struct pci_dev *pdev, + unsigned int max_tx_rings, unsigned int max_rx_rings); void nfp_net_netdev_free(struct nfp_net *nn); int nfp_net_netdev_init(struct net_device *netdev); void nfp_net_netdev_clean(struct net_device *netdev); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 7225ab61a1202..49a1f54e07874 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -318,8 +318,8 @@ static int nfp_net_msix_alloc(struct nfp_net *nn, int nr_vecs) */ static int nfp_net_irqs_wanted(struct nfp_net *nn) { + unsigned int vecs; int ncpus; - int vecs; ncpus = num_online_cpus(); @@ -2701,7 +2701,8 @@ void nfp_net_info(struct nfp_net *nn) * Return: NFP Net device structure, or ERR_PTR on error. */ struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev, - int max_tx_rings, int max_rx_rings) + unsigned int max_tx_rings, + unsigned int max_rx_rings) { struct net_device *netdev; struct nfp_net *nn; From 4b27a1eb7b83da956c3939869ed22f5ca0e1f1e1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 31 Oct 2016 20:43:20 +0000 Subject: [PATCH 12/14] nfp: remove nfp_net_irqs_wanted() nfp_net_irqs_wanted() doesn't really encapsulate much logic, remove it and inline the calculations. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- .../ethernet/netronome/nfp/nfp_net_common.c | 27 +++---------------- 1 file changed, 4 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 49a1f54e07874..f26c3927746fd 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -307,28 +307,6 @@ static int nfp_net_msix_alloc(struct nfp_net *nn, int nr_vecs) return nvecs; } -/** - * nfp_net_irqs_wanted() - Work out how many interrupt vectors we want - * @nn: NFP Network structure - * - * We want a vector per CPU (or ring), whatever is smaller plus - * NFP_NET_NON_Q_VECTORS for LSC etc. - * - * Return: Number of interrupts wanted - */ -static int nfp_net_irqs_wanted(struct nfp_net *nn) -{ - unsigned int vecs; - int ncpus; - - ncpus = num_online_cpus(); - - vecs = max_t(int, nn->num_tx_rings, nn->num_rx_rings); - vecs = min_t(int, vecs, ncpus); - - return vecs + NFP_NET_NON_Q_VECTORS; -} - /** * nfp_net_irqs_alloc() - allocates MSI-X irqs * @nn: NFP Network structure @@ -339,7 +317,7 @@ int nfp_net_irqs_alloc(struct nfp_net *nn) { int wanted_irqs; - wanted_irqs = nfp_net_irqs_wanted(nn); + wanted_irqs = nn->num_r_vecs + NFP_NET_NON_Q_VECTORS; nn->num_irqs = nfp_net_msix_alloc(nn, wanted_irqs); if (nn->num_irqs == 0) { @@ -2726,6 +2704,9 @@ struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev, nn->num_tx_rings = min_t(int, nqs, max_tx_rings); nn->num_rx_rings = min_t(int, nqs, max_rx_rings); + nn->num_r_vecs = max(nn->num_tx_rings, nn->num_rx_rings); + nn->num_r_vecs = min_t(unsigned int, nn->num_r_vecs, num_online_cpus()); + nn->txd_cnt = NFP_NET_TX_DESCS_DEFAULT; nn->rxd_cnt = NFP_NET_RX_DESCS_DEFAULT; From b33ae997a362d64a1178be17804c2c3899770a4b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 31 Oct 2016 20:43:21 +0000 Subject: [PATCH 13/14] nfp: replace num_irqs with max_r_vecs num_irqs is not used anywhere, replace it with max_r_vecs which holds number of allocated RX/TX vectors and is going to be useful soon. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net.h | 4 ++-- .../net/ethernet/netronome/nfp/nfp_net_common.c | 14 ++++++++------ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 0ea6ebd329cf0..e8713254786b6 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -456,7 +456,7 @@ struct nfp_stat_pair { * @rxd_cnt: Size of the RX ring in number of descriptors * @tx_rings: Array of pre-allocated TX ring structures * @rx_rings: Array of pre-allocated RX ring structures - * @num_irqs: Number of allocated interrupt vectors + * @max_r_vecs: Number of allocated interrupt vectors for RX/TX * @num_r_vecs: Number of used ring vectors * @r_vecs: Pre-allocated array of ring vectors * @irq_entries: Pre-allocated array of MSI-X entries @@ -540,7 +540,7 @@ struct nfp_net { int txd_cnt; int rxd_cnt; - unsigned int num_irqs; + unsigned int max_r_vecs; unsigned int num_r_vecs; struct nfp_net_r_vector r_vecs[NFP_NET_MAX_R_VECS]; struct msix_entry irq_entries[NFP_NET_MAX_IRQS]; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index f26c3927746fd..4b866fc874b69 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -316,22 +316,24 @@ static int nfp_net_msix_alloc(struct nfp_net *nn, int nr_vecs) int nfp_net_irqs_alloc(struct nfp_net *nn) { int wanted_irqs; + unsigned int n; wanted_irqs = nn->num_r_vecs + NFP_NET_NON_Q_VECTORS; - nn->num_irqs = nfp_net_msix_alloc(nn, wanted_irqs); - if (nn->num_irqs == 0) { + n = nfp_net_msix_alloc(nn, wanted_irqs); + if (n == 0) { nn_err(nn, "Failed to allocate MSI-X IRQs\n"); return 0; } - nn->num_r_vecs = nn->num_irqs - NFP_NET_NON_Q_VECTORS; + nn->max_r_vecs = n - NFP_NET_NON_Q_VECTORS; + nn->num_r_vecs = nn->max_r_vecs; - if (nn->num_irqs < wanted_irqs) + if (n < wanted_irqs) nn_warn(nn, "Unable to allocate %d vectors. Got %d instead\n", - wanted_irqs, nn->num_irqs); + wanted_irqs, n); - return nn->num_irqs; + return n; } /** From cbeaf7aa733a134721cdcda44688e53a6380cd31 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 31 Oct 2016 20:43:22 +0000 Subject: [PATCH 14/14] nfp: bring back support for different ring counts We used to always allocate the same number of TX and RX rings so the support for having r_vectors without one of the rings was dropped. That makes us, however, unnecessarily limited to 8 TX rings (8 is the Linux RSS default) most of the time. Also we are about to add channel count configuration via ethtool, so bring that support back. TX rings can now default to num_online_cpus() and RX rings to netif_get_num_default_rss_queues(). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- .../ethernet/netronome/nfp/nfp_net_common.c | 118 +++++++++++------- 1 file changed, 71 insertions(+), 47 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 4b866fc874b69..97e0bbef13d11 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -483,13 +483,13 @@ static void nfp_net_irqs_assign(struct net_device *netdev) struct nfp_net_r_vector *r_vec; int r; - /* Assumes nn->num_tx_rings == nn->num_rx_rings */ - if (nn->num_tx_rings > nn->num_r_vecs) { - nn_warn(nn, "More rings (%d) than vectors (%d).\n", - nn->num_tx_rings, nn->num_r_vecs); - nn->num_tx_rings = nn->num_r_vecs; - nn->num_rx_rings = nn->num_r_vecs; - } + if (nn->num_rx_rings > nn->num_r_vecs || + nn->num_tx_rings > nn->num_r_vecs) + nn_warn(nn, "More rings (%d,%d) than vectors (%d).\n", + nn->num_rx_rings, nn->num_tx_rings, nn->num_r_vecs); + + nn->num_rx_rings = min(nn->num_r_vecs, nn->num_rx_rings); + nn->num_tx_rings = min(nn->num_r_vecs, nn->num_tx_rings); nn->lsc_handler = nfp_net_irq_lsc; nn->exn_handler = nfp_net_irq_exn; @@ -1491,11 +1491,12 @@ static int nfp_net_poll(struct napi_struct *napi, int budget) { struct nfp_net_r_vector *r_vec = container_of(napi, struct nfp_net_r_vector, napi); - unsigned int pkts_polled; - - nfp_net_tx_complete(r_vec->tx_ring); + unsigned int pkts_polled = 0; - pkts_polled = nfp_net_rx(r_vec->rx_ring, budget); + if (r_vec->tx_ring) + nfp_net_tx_complete(r_vec->tx_ring); + if (r_vec->rx_ring) + pkts_polled = nfp_net_rx(r_vec->rx_ring, budget); if (pkts_polled < budget) { napi_complete_done(napi, pkts_polled); @@ -1743,7 +1744,7 @@ nfp_net_shadow_rx_rings_free(struct nfp_net *nn, struct nfp_net_rx_ring *rings) if (!rings) return; - for (r = 0; r < nn->num_r_vecs; r++) { + for (r = 0; r < nn->num_rx_rings; r++) { nfp_net_rx_ring_bufs_free(nn, &rings[r]); nfp_net_rx_ring_free(&rings[r]); } @@ -1758,11 +1759,19 @@ nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, struct msix_entry *entry = &nn->irq_entries[r_vec->irq_idx]; int err; - r_vec->tx_ring = &nn->tx_rings[idx]; - nfp_net_tx_ring_init(r_vec->tx_ring, r_vec, idx); + if (idx < nn->num_tx_rings) { + r_vec->tx_ring = &nn->tx_rings[idx]; + nfp_net_tx_ring_init(r_vec->tx_ring, r_vec, idx); + } else { + r_vec->tx_ring = NULL; + } - r_vec->rx_ring = &nn->rx_rings[idx]; - nfp_net_rx_ring_init(r_vec->rx_ring, r_vec, idx); + if (idx < nn->num_rx_rings) { + r_vec->rx_ring = &nn->rx_rings[idx]; + nfp_net_rx_ring_init(r_vec->rx_ring, r_vec, idx); + } else { + r_vec->rx_ring = NULL; + } snprintf(r_vec->name, sizeof(r_vec->name), "%s-rxtx-%d", nn->netdev->name, idx); @@ -1839,13 +1848,13 @@ void nfp_net_coalesce_write_cfg(struct nfp_net *nn) /* copy RX interrupt coalesce parameters */ value = (nn->rx_coalesce_max_frames << 16) | (factor * nn->rx_coalesce_usecs); - for (i = 0; i < nn->num_r_vecs; i++) + for (i = 0; i < nn->num_rx_rings; i++) nn_writel(nn, NFP_NET_CFG_RXR_IRQ_MOD(i), value); /* copy TX interrupt coalesce parameters */ value = (nn->tx_coalesce_max_frames << 16) | (factor * nn->tx_coalesce_usecs); - for (i = 0; i < nn->num_r_vecs; i++) + for (i = 0; i < nn->num_tx_rings; i++) nn_writel(nn, NFP_NET_CFG_TXR_IRQ_MOD(i), value); } @@ -1903,27 +1912,33 @@ static void nfp_net_clear_config_and_disable(struct nfp_net *nn) if (err) nn_err(nn, "Could not disable device: %d\n", err); - for (r = 0; r < nn->num_r_vecs; r++) { + for (r = 0; r < nn->num_rx_rings; r++) nfp_net_rx_ring_reset(nn->r_vecs[r].rx_ring); + for (r = 0; r < nn->num_tx_rings; r++) nfp_net_tx_ring_reset(nn, nn->r_vecs[r].tx_ring); + for (r = 0; r < nn->num_r_vecs; r++) nfp_net_vec_clear_ring_data(nn, r); - } nn->ctrl = new_ctrl; } static void -nfp_net_vec_write_ring_data(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, - unsigned int idx) +nfp_net_rx_ring_hw_cfg_write(struct nfp_net *nn, + struct nfp_net_rx_ring *rx_ring, unsigned int idx) { /* Write the DMA address, size and MSI-X info to the device */ - nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), r_vec->rx_ring->dma); - nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), ilog2(r_vec->rx_ring->cnt)); - nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), r_vec->irq_idx); + nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), rx_ring->dma); + nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), ilog2(rx_ring->cnt)); + nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), rx_ring->r_vec->irq_idx); +} - nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), r_vec->tx_ring->dma); - nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), ilog2(r_vec->tx_ring->cnt)); - nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), r_vec->irq_idx); +static void +nfp_net_tx_ring_hw_cfg_write(struct nfp_net *nn, + struct nfp_net_tx_ring *tx_ring, unsigned int idx) +{ + nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), tx_ring->dma); + nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), ilog2(tx_ring->cnt)); + nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), tx_ring->r_vec->irq_idx); } static int __nfp_net_set_config_and_enable(struct nfp_net *nn) @@ -1948,8 +1963,10 @@ static int __nfp_net_set_config_and_enable(struct nfp_net *nn) update |= NFP_NET_CFG_UPDATE_IRQMOD; } - for (r = 0; r < nn->num_r_vecs; r++) - nfp_net_vec_write_ring_data(nn, &nn->r_vecs[r], r); + for (r = 0; r < nn->num_tx_rings; r++) + nfp_net_tx_ring_hw_cfg_write(nn, &nn->tx_rings[r], r); + for (r = 0; r < nn->num_rx_rings; r++) + nfp_net_rx_ring_hw_cfg_write(nn, &nn->rx_rings[r], r); nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, nn->num_tx_rings == 64 ? 0xffffffffffffffffULL : ((u64)1 << nn->num_tx_rings) - 1); @@ -1975,7 +1992,7 @@ static int __nfp_net_set_config_and_enable(struct nfp_net *nn) nn->ctrl = new_ctrl; - for (r = 0; r < nn->num_r_vecs; r++) + for (r = 0; r < nn->num_rx_rings; r++) nfp_net_rx_ring_fill_freelist(nn->r_vecs[r].rx_ring); /* Since reconfiguration requests while NFP is down are ignored we @@ -2067,20 +2084,22 @@ static int nfp_net_netdev_open(struct net_device *netdev) for (r = 0; r < nn->num_r_vecs; r++) { err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r); if (err) - goto err_free_prev_vecs; - + goto err_cleanup_vec_p; + } + for (r = 0; r < nn->num_tx_rings; r++) { err = nfp_net_tx_ring_alloc(nn->r_vecs[r].tx_ring, nn->txd_cnt); if (err) - goto err_cleanup_vec_p; - + goto err_free_tx_ring_p; + } + for (r = 0; r < nn->num_rx_rings; r++) { err = nfp_net_rx_ring_alloc(nn->r_vecs[r].rx_ring, nn->fl_bufsz, nn->rxd_cnt); if (err) - goto err_free_tx_ring_p; + goto err_flush_free_rx_ring_p; err = nfp_net_rx_ring_bufs_alloc(nn, nn->r_vecs[r].rx_ring); if (err) - goto err_flush_rx_ring_p; + goto err_free_rx_ring_p; } err = netif_set_real_num_tx_queues(netdev, nn->num_tx_rings); @@ -2113,17 +2132,21 @@ static int nfp_net_netdev_open(struct net_device *netdev) return 0; err_free_rings: - r = nn->num_r_vecs; -err_free_prev_vecs: + r = nn->num_rx_rings; +err_flush_free_rx_ring_p: while (r--) { nfp_net_rx_ring_bufs_free(nn, nn->r_vecs[r].rx_ring); -err_flush_rx_ring_p: +err_free_rx_ring_p: nfp_net_rx_ring_free(nn->r_vecs[r].rx_ring); + } + r = nn->num_tx_rings; err_free_tx_ring_p: + while (r--) nfp_net_tx_ring_free(nn->r_vecs[r].tx_ring); + r = nn->num_r_vecs; err_cleanup_vec_p: + while (r--) nfp_net_cleanup_vector(nn, &nn->r_vecs[r]); - } kfree(nn->tx_rings); err_free_rx_rings: kfree(nn->rx_rings); @@ -2162,12 +2185,14 @@ static void nfp_net_close_free_all(struct nfp_net *nn) { unsigned int r; - for (r = 0; r < nn->num_r_vecs; r++) { + for (r = 0; r < nn->num_rx_rings; r++) { nfp_net_rx_ring_bufs_free(nn, nn->r_vecs[r].rx_ring); nfp_net_rx_ring_free(nn->r_vecs[r].rx_ring); + } + for (r = 0; r < nn->num_tx_rings; r++) nfp_net_tx_ring_free(nn->r_vecs[r].tx_ring); + for (r = 0; r < nn->num_r_vecs; r++) nfp_net_cleanup_vector(nn, &nn->r_vecs[r]); - } kfree(nn->rx_rings); kfree(nn->tx_rings); @@ -2686,7 +2711,6 @@ struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev, { struct net_device *netdev; struct nfp_net *nn; - int nqs; netdev = alloc_etherdev_mqs(sizeof(struct nfp_net), max_tx_rings, max_rx_rings); @@ -2702,9 +2726,9 @@ struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev, nn->max_tx_rings = max_tx_rings; nn->max_rx_rings = max_rx_rings; - nqs = netif_get_num_default_rss_queues(); - nn->num_tx_rings = min_t(int, nqs, max_tx_rings); - nn->num_rx_rings = min_t(int, nqs, max_rx_rings); + nn->num_tx_rings = min_t(unsigned int, max_tx_rings, num_online_cpus()); + nn->num_rx_rings = min_t(unsigned int, max_rx_rings, + netif_get_num_default_rss_queues()); nn->num_r_vecs = max(nn->num_tx_rings, nn->num_rx_rings); nn->num_r_vecs = min_t(unsigned int, nn->num_r_vecs, num_online_cpus());