From aaebaf50b502648b1d4d8c93b4be133944c2bbd0 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 3 May 2017 10:28:53 -0700 Subject: [PATCH 1/9] ixgbe: fix race condition with PTP_TX_IN_PROGRESS bits Hardware related to the ixgbe driver is limited to handling a single Tx timestamp request at a time. Thus, the driver ignores requests for Tx timestamp while waiting for the current request to finish. It uses a state bit lock which enforces that only one timestamp request is honored at a time. Unfortunately this suffers from a simple race condition. The bit lock is not cleared until after skb_tstamp_tx() is called notifying applications of a new Tx timestamp. Even a well behaved application sending only one packet at a time and waiting for a response can wake up and send a new packet before the bit lock is cleared. This results in needlessly dropping some Tx timestamp requests. We can fix this by unlocking the state bit as soon as we read the Timestamp register, as this is the first point at which it is safe to unlock. To avoid issues with the skb pointer, we'll use a copy of the pointer and set the global variable in the driver structure to NULL first. This ensures that the next timestamp request does not modify our local copy of the skb pointer. This ensures that well behaved applications do not accidentally race with the unlock bit. Obviously an application which sends multiple Tx timestamp requests at once will still only timestamp one packet at a time. Unfortunately there is nothing we can do about this. Reported-by: David Mirabito Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index d44c728fdc0be..4a2000bfd4aec 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -672,17 +672,26 @@ static void ixgbe_ptp_clear_tx_timestamp(struct ixgbe_adapter *adapter) */ static void ixgbe_ptp_tx_hwtstamp(struct ixgbe_adapter *adapter) { + struct sk_buff *skb = adapter->ptp_tx_skb; struct ixgbe_hw *hw = &adapter->hw; struct skb_shared_hwtstamps shhwtstamps; u64 regval = 0; regval |= (u64)IXGBE_READ_REG(hw, IXGBE_TXSTMPL); regval |= (u64)IXGBE_READ_REG(hw, IXGBE_TXSTMPH) << 32; - ixgbe_ptp_convert_to_hwtstamp(adapter, &shhwtstamps, regval); - skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps); - ixgbe_ptp_clear_tx_timestamp(adapter); + /* Handle cleanup of the ptp_tx_skb ourselves, and unlock the state + * bit prior to notifying the stack via skb_tstamp_tx(). This prevents + * well behaved applications from attempting to timestamp again prior + * to the lock bit being clear. + */ + adapter->ptp_tx_skb = NULL; + clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state); + + /* Notify the stack and then free the skb after we've unlocked */ + skb_tstamp_tx(skb, &shhwtstamps); + dev_kfree_skb_any(skb); } /** From 5fef124d9c75942dc5c2445a3faa8ad37cbf4c82 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 3 May 2017 10:28:56 -0700 Subject: [PATCH 2/9] ixgbe: avoid permanent lock of *_PTP_TX_IN_PROGRESS The ixgbe driver uses a state bit lock to avoid handling more than one Tx timestamp request at once. This is required because hardware is limited to a single set of registers for Tx timestamps. The state bit lock is not properly cleaned up during ixgbe_xmit_frame_ring() if the transmit fails such as due to DMA or TSO failure. In some hardware this results in blocking timestamps until the service task times out. In other hardware this results in a permanent lock of the timestamp bit because we never receive an interrupt indicating the timestamp occurred, since indeed the packet was never transmitted. Fix this by checking for DMA and TSO errors in ixgbe_xmit_frame_ring() and properly cleaning up after ourselves when these occur. Reported-by: Reported-by: David Mirabito Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 20 ++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 812319ab77db9..5773df2483601 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7875,9 +7875,9 @@ static inline int ixgbe_maybe_stop_tx(struct ixgbe_ring *tx_ring, u16 size) #define IXGBE_TXD_CMD (IXGBE_TXD_CMD_EOP | \ IXGBE_TXD_CMD_RS) -static void ixgbe_tx_map(struct ixgbe_ring *tx_ring, - struct ixgbe_tx_buffer *first, - const u8 hdr_len) +static int ixgbe_tx_map(struct ixgbe_ring *tx_ring, + struct ixgbe_tx_buffer *first, + const u8 hdr_len) { struct sk_buff *skb = first->skb; struct ixgbe_tx_buffer *tx_buffer; @@ -8004,7 +8004,7 @@ static void ixgbe_tx_map(struct ixgbe_ring *tx_ring, mmiowb(); } - return; + return 0; dma_error: dev_err(tx_ring->dev, "TX DMA map failed\n"); tx_buffer = &tx_ring->tx_buffer_info[i]; @@ -8034,6 +8034,8 @@ static void ixgbe_tx_map(struct ixgbe_ring *tx_ring, first->skb = NULL; tx_ring->next_to_use = i; + + return -1; } static void ixgbe_atr(struct ixgbe_ring *ring, @@ -8407,13 +8409,21 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, #ifdef IXGBE_FCOE xmit_fcoe: #endif /* IXGBE_FCOE */ - ixgbe_tx_map(tx_ring, first, hdr_len); + if (ixgbe_tx_map(tx_ring, first, hdr_len)) + goto cleanup_tx_timestamp; return NETDEV_TX_OK; out_drop: dev_kfree_skb_any(first->skb); first->skb = NULL; +cleanup_tx_timestamp: + if (unlikely(tx_flags & IXGBE_TX_FLAGS_TSTAMP)) { + dev_kfree_skb_any(adapter->ptp_tx_skb); + adapter->ptp_tx_skb = NULL; + cancel_work_sync(&adapter->ptp_tx_work); + clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state); + } return NETDEV_TX_OK; } From 4cc74c01ef8bb59fae98aeda359e8bcf6148943a Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 3 May 2017 10:29:00 -0700 Subject: [PATCH 3/9] ixgbe: add statistic indicating number of skipped Tx timestamps The ixgbe driver can only handle one Tx timestamp request at a time. This means it is possible for an application timestamp request to be ignored. There is no easy way for an administrator to determine if this occurred. Add a new statistic which tracks this, tx_hwtstamp_skipped. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 1 + .../net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 3 +++ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 23 +++++++++++-------- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 76263762bea17..eb36106218ad0 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -733,6 +733,7 @@ struct ixgbe_adapter { struct timecounter hw_tc; u32 base_incval; u32 tx_hwtstamp_timeouts; + u32 tx_hwtstamp_skipped; u32 rx_hwtstamp_cleared; void (*ptp_setup_sdp)(struct ixgbe_adapter *); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 9113e8099b037..2890e926b4983 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -111,6 +111,9 @@ static const struct ixgbe_stats ixgbe_gstrings_stats[] = { {"os2bmc_tx_by_bmc", IXGBE_STAT(stats.b2ospc)}, {"os2bmc_tx_by_host", IXGBE_STAT(stats.o2bspc)}, {"os2bmc_rx_by_host", IXGBE_STAT(stats.b2ogprc)}, + {"tx_hwtstamp_timeouts", IXGBE_STAT(tx_hwtstamp_timeouts)}, + {"tx_hwtstamp_skipped", IXGBE_STAT(tx_hwtstamp_skipped)}, + {"rx_hwtstamp_cleared", IXGBE_STAT(rx_hwtstamp_cleared)}, #ifdef IXGBE_FCOE {"fcoe_bad_fccrc", IXGBE_STAT(stats.fccrc)}, {"rx_fcoe_dropped", IXGBE_STAT(stats.fcoerpdc)}, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 5773df2483601..4ea1137ea23f7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8337,16 +8337,19 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb, protocol = vlan_get_protocol(skb); if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && - adapter->ptp_clock && - !test_and_set_bit_lock(__IXGBE_PTP_TX_IN_PROGRESS, - &adapter->state)) { - skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - tx_flags |= IXGBE_TX_FLAGS_TSTAMP; - - /* schedule check for Tx timestamp */ - adapter->ptp_tx_skb = skb_get(skb); - adapter->ptp_tx_start = jiffies; - schedule_work(&adapter->ptp_tx_work); + adapter->ptp_clock) { + if (!test_and_set_bit_lock(__IXGBE_PTP_TX_IN_PROGRESS, + &adapter->state)) { + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + tx_flags |= IXGBE_TX_FLAGS_TSTAMP; + + /* schedule check for Tx timestamp */ + adapter->ptp_tx_skb = skb_get(skb); + adapter->ptp_tx_start = jiffies; + schedule_work(&adapter->ptp_tx_work); + } else { + adapter->tx_hwtstamp_skipped++; + } } skb_tx_timestamp(skb); From 622a2ef538fb3ca8eccf49716aba8267d6e95a47 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 3 May 2017 10:29:04 -0700 Subject: [PATCH 4/9] ixgbe: check for Tx timestamp timeouts during watchdog The ixgbe driver has logic to handle only one Tx timestamp at a time, using a state bit lock to avoid multiple requests at once. It may be possible, if incredibly unlikely, that a Tx timestamp event is requested but never completes. Since we use an interrupt scheme to determine when the Tx timestamp occurred we would never clear the state bit in this case. Add an ixgbe_ptp_tx_hang() function similar to the already existing ixgbe_ptp_rx_hang() function. This function runs in the watchdog routine and makes sure we eventually recover from this case instead of permanently disabling Tx timestamps. Note: there is no currently known way to cause this without hacking the driver code to force it. Signed-off-by: Jacob Keller Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 1 + drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 1 + drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c | 27 +++++++++++++++++++ 3 files changed, 29 insertions(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index eb36106218ad0..dd5578756ae0f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -961,6 +961,7 @@ void ixgbe_ptp_suspend(struct ixgbe_adapter *adapter); void ixgbe_ptp_stop(struct ixgbe_adapter *adapter); void ixgbe_ptp_overflow_check(struct ixgbe_adapter *adapter); void ixgbe_ptp_rx_hang(struct ixgbe_adapter *adapter); +void ixgbe_ptp_tx_hang(struct ixgbe_adapter *adapter); void ixgbe_ptp_rx_pktstamp(struct ixgbe_q_vector *, struct sk_buff *); void ixgbe_ptp_rx_rgtstamp(struct ixgbe_q_vector *, struct sk_buff *skb); static inline void ixgbe_ptp_rx_hwtstamp(struct ixgbe_ring *rx_ring, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 4ea1137ea23f7..3ed212f5a43e9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7635,6 +7635,7 @@ static void ixgbe_service_task(struct work_struct *work) if (test_bit(__IXGBE_PTP_RUNNING, &adapter->state)) { ixgbe_ptp_overflow_check(adapter); ixgbe_ptp_rx_hang(adapter); + ixgbe_ptp_tx_hang(adapter); } ixgbe_service_event_complete(adapter); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index 4a2000bfd4aec..86d6924a2b714 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -662,6 +662,33 @@ static void ixgbe_ptp_clear_tx_timestamp(struct ixgbe_adapter *adapter) clear_bit_unlock(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state); } +/** + * ixgbe_ptp_tx_hang - detect error case where Tx timestamp never finishes + * @adapter: private network adapter structure + */ +void ixgbe_ptp_tx_hang(struct ixgbe_adapter *adapter) +{ + bool timeout = time_is_before_jiffies(adapter->ptp_tx_start + + IXGBE_PTP_TX_TIMEOUT); + + if (!adapter->ptp_tx_skb) + return; + + if (!test_bit(__IXGBE_PTP_TX_IN_PROGRESS, &adapter->state)) + return; + + /* If we haven't received a timestamp within the timeout, it is + * reasonable to assume that it will never occur, so we can unlock the + * timestamp bit when this occurs. + */ + if (timeout) { + cancel_work_sync(&adapter->ptp_tx_work); + ixgbe_ptp_clear_tx_timestamp(adapter); + adapter->tx_hwtstamp_timeouts++; + e_warn(drv, "clearing Tx timestamp hang\n"); + } +} + /** * ixgbe_ptp_tx_hwtstamp - utility function which checks for TX time stamp * @adapter: the private adapter struct From 01ec5525fc2a0fcc8f4b796b9bb4ee1c6a5d9415 Mon Sep 17 00:00:00 2001 From: Tony Nguyen Date: Thu, 18 May 2017 14:55:07 -0700 Subject: [PATCH 5/9] ixgbe: Bump version number Update ixgbe version number. Signed-off-by: Tony Nguyen Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 3ed212f5a43e9..c17c8317e0016 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -76,7 +76,7 @@ char ixgbe_default_device_descr[] = static char ixgbe_default_device_descr[] = "Intel(R) 10 Gigabit Network Connection"; #endif -#define DRV_VERSION "5.0.0-k" +#define DRV_VERSION "5.1.0-k" const char ixgbe_driver_version[] = DRV_VERSION; static const char ixgbe_copyright[] = "Copyright (c) 1999-2016 Intel Corporation."; From adc2c83e2b317de39220e0004b6556b5ea2bf412 Mon Sep 17 00:00:00 2001 From: Tony Nguyen Date: Thu, 18 May 2017 14:55:23 -0700 Subject: [PATCH 6/9] ixgbevf: Bump version number Update ixgbevf version number. Signed-off-by: Tony Nguyen Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index aced91c9c034b..084c535827931 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -57,7 +57,7 @@ const char ixgbevf_driver_name[] = "ixgbevf"; static const char ixgbevf_driver_string[] = "Intel(R) 10 Gigabit PCI Express Virtual Function Network Driver"; -#define DRV_VERSION "3.2.2-k" +#define DRV_VERSION "4.1.0-k" const char ixgbevf_driver_version[] = DRV_VERSION; static char ixgbevf_copyright[] = "Copyright (c) 2009 - 2015 Intel Corporation."; From d28b194955a9b6e6ccf4383f1baba78bb5a528db Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Tue, 23 May 2017 14:02:23 -0700 Subject: [PATCH 7/9] ixgbe: fix writes to PFQDE ixgbe_write_qde() was ignoring the qde parameter which resulted in PFQDE.HIDE_VLAN not being set for X550. Signed-off-by: Emil Tantilov Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index e2766da5fe026..0760bd7eeb01a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -818,7 +818,7 @@ static inline void ixgbe_write_qde(struct ixgbe_adapter *adapter, u32 vf, IXGBE_WRITE_FLUSH(hw); /* indicate to hardware that we want to set drop enable */ - reg = IXGBE_QDE_WRITE | IXGBE_QDE_ENABLE; + reg = IXGBE_QDE_WRITE | qde; reg |= i << IXGBE_QDE_IDX_SHIFT; IXGBE_WRITE_REG(hw, IXGBE_QDE, reg); } From 4ebdf8af3017ce242f37be2ae5e5f655dc9846ef Mon Sep 17 00:00:00 2001 From: Tony Nguyen Date: Thu, 1 Jun 2017 12:06:05 -0700 Subject: [PATCH 8/9] ixgbe: Resolve cppcheck format string warning cppcheck warns that the format string is incorrect in the function ixgbe_get_strings(). Since the value cannot be negative, change the variable to unsigned which matches the format specifier. Signed-off-by: Tony Nguyen Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 2890e926b4983..72c565712a5f8 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -1277,7 +1277,7 @@ static void ixgbe_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { char *p = (char *)data; - int i; + unsigned int i; switch (stringset) { case ETH_SS_TEST: From a09c0fc3f5d775231f1884e0e66c495065a461ee Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Sat, 3 Jun 2017 18:01:17 -0400 Subject: [PATCH 9/9] ixgbe: pci_set_drvdata must be called before register_netdev We call pci_set_drvdata immediately after calling register_netdev, which leaves a window where tasks writing to the sriov_numvfs sysfs attribute can sneak in and crash the kernel. register_netdev cleans up after itself so placing pci_set_drvdata immediately before it should preserve the intent of commit 0fb6a55cc31f ("ixgbe: fix crash on rmmod after probe fail"). Fixes: 0fb6a55cc31f ("ixgbe: fix crash on rmmod after probe fail") Signed-off-by: Jeff Mahoney Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index c17c8317e0016..f3dc5dea93005 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -10372,11 +10372,11 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) "hardware.\n"); } strcpy(netdev->name, "eth%d"); + pci_set_drvdata(pdev, adapter); err = register_netdev(netdev); if (err) goto err_register; - pci_set_drvdata(pdev, adapter); /* power down the optics for 82599 SFP+ fiber */ if (hw->mac.ops.disable_tx_laser)