From d5c7d7f6427cd7c39353d09bf47bfbc7800b6a53 Mon Sep 17 00:00:00 2001 From: Asaf Vertz Date: Thu, 8 Jan 2015 06:01:00 +0000 Subject: [PATCH 01/15] e1000: fix time comparison To be future-proof and for better readability the time comparisons are modified to use time_after_eq() instead of plain, error-prone math. Signed-off-by: Asaf Vertz Acked-by: Jacob Keller Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000_ethtool.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index b691eb4f6376..4270ad2d4ddf 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -24,6 +24,7 @@ /* ethtool support for e1000 */ #include "e1000.h" +#include #include enum {NETDEV_STATS, E1000_STATS}; @@ -1460,7 +1461,7 @@ static int e1000_run_loopback_test(struct e1000_adapter *adapter) ret_val = 13; /* ret_val is the same as mis-compare */ break; } - if (jiffies >= (time + 2)) { + if (time_after_eq(jiffies, time + 2)) { ret_val = 14; /* error code for time out error */ break; } From 6930895df994af212985396f1274712aaaa5bc26 Mon Sep 17 00:00:00 2001 From: Mathias Koehrer Date: Thu, 7 Aug 2014 18:51:53 +0000 Subject: [PATCH 02/15] e1000e: Fix 82572EI that has no hardware timestamp support With the Intel 82527EI (driver: e1000e) there is an issue when running the ptpd2 program, that leads to a kernel oops. The reason is here that in e1000_xmit_frame() a work queue will be scheduled that has not been initialized in this case. The work queue "tx_hwstamp_work" will only be initialized if adapter->flags & FLAG_HAS_HW_TIMESTAMP set. This check is missing in e1000_xmit_frame(). The following patch adds the missing check. Signed-off-by: Mathias Koehrer Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/netdev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 38cb586b1bf4..aa39a81a6df6 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5636,8 +5636,9 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, count = e1000_tx_map(tx_ring, skb, first, adapter->tx_fifo_limit, nr_frags); if (count) { - if (unlikely((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && - !adapter->tx_hwtstamp_skb)) { + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && + (adapter->flags & FLAG_HAS_HW_TIMESTAMP) && + !adapter->tx_hwtstamp_skb) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; tx_flags |= E1000_TX_FLAGS_HWTSTAMP; adapter->tx_hwtstamp_skb = skb_get(skb); From 074c3582192b6cb340510b2a6e4579e78f671dcf Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Wed, 25 Jun 2014 02:37:13 +0000 Subject: [PATCH 03/15] virtio_net: add software timestamp support This patch enables the use of software timestamping via the virtio_net driver. Signed-off-by: Jacob Keller Signed-off-by: Jeff Kirsher --- drivers/net/virtio_net.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 11e2e8131359..9bd71d53c5e0 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -925,6 +925,9 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) /* Free up any pending old buffers before queueing new ones. */ free_old_xmit_skbs(sq); + /* timestamp packet in software */ + skb_tx_timestamp(skb); + /* Try to transmit */ err = xmit_skb(sq, skb); @@ -1376,6 +1379,7 @@ static const struct ethtool_ops virtnet_ethtool_ops = { .get_ringparam = virtnet_get_ringparam, .set_channels = virtnet_set_channels, .get_channels = virtnet_get_channels, + .get_ts_info = ethtool_op_get_ts_info, }; #define MIN_MTU 68 From 95dd44b4f31eef3cce0f98967635431e10e31c98 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 14 Nov 2014 00:56:19 +0000 Subject: [PATCH 04/15] igb: Clean-up page reuse code This patch cleans up the page reuse code getting it into a state where all the workarounds needed are in place as well as cleaning up a few minor oversights such as using __free_pages instead of put_page to drop a locally allocated page. It also cleans up how we clear the descriptor status bits. Previously they were zeroed as a part of clearing the hdr_addr. However the hdr_addr is a 64 bit field and 64 bit writes can be a bit more expensive on on 32 bit systems. Since we are no longer using the header split feature the upper 32 bits of the address no longer need to be cleared. As a result we can just clear the status bits and leave the length and VLAN fields as-is which should provide more information in debugging. Signed-off-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 35 +++++++++++------------ 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 6c25ec314183..d1480f3054fe 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -6527,15 +6527,17 @@ static void igb_reuse_rx_page(struct igb_ring *rx_ring, DMA_FROM_DEVICE); } +static inline bool igb_page_is_reserved(struct page *page) +{ + return (page_to_nid(page) != numa_mem_id()) || page->pfmemalloc; +} + static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer, struct page *page, unsigned int truesize) { /* avoid re-using remote pages */ - if (unlikely(page_to_nid(page) != numa_node_id())) - return false; - - if (unlikely(page->pfmemalloc)) + if (unlikely(igb_page_is_reserved(page))) return false; #if (PAGE_SIZE < 8192) @@ -6545,22 +6547,19 @@ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer, /* flip page offset to other buffer */ rx_buffer->page_offset ^= IGB_RX_BUFSZ; - - /* Even if we own the page, we are not allowed to use atomic_set() - * This would break get_page_unless_zero() users. - */ - atomic_inc(&page->_count); #else /* move offset up to the next cache line */ rx_buffer->page_offset += truesize; if (rx_buffer->page_offset > (PAGE_SIZE - IGB_RX_BUFSZ)) return false; - - /* bump ref count on page before it is given to the stack */ - get_page(page); #endif + /* Even if we own the page, we are not allowed to use atomic_set() + * This would break get_page_unless_zero() users. + */ + atomic_inc(&page->_count); + return true; } @@ -6603,13 +6602,12 @@ static bool igb_add_rx_frag(struct igb_ring *rx_ring, memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); - /* we can reuse buffer as-is, just make sure it is local */ - if (likely((page_to_nid(page) == numa_node_id()) && - !page->pfmemalloc)) + /* page is not reserved, we can reuse buffer as-is */ + if (likely(!igb_page_is_reserved(page))) return true; /* this page cannot be reused so discard it */ - put_page(page); + __free_page(page); return false; } @@ -6627,7 +6625,6 @@ static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring, struct page *page; rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; - page = rx_buffer->page; prefetchw(page); @@ -7042,8 +7039,8 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) i -= rx_ring->count; } - /* clear the hdr_addr for the next_to_use descriptor */ - rx_desc->read.hdr_addr = 0; + /* clear the status bits for the next_to_use descriptor */ + rx_desc->wb.upper.status_error = 0; cleaned_count--; } while (cleaned_count); From ba5b8dcdb865efaa35692516a7153dc631ba6ffa Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 14 Nov 2014 00:56:24 +0000 Subject: [PATCH 05/15] fm10k: Clean-up page reuse code This patch cleans up the page reuse code getting it into a state where all the workarounds needed are in place as well as cleaning up a few minor oversights such as using __free_pages instead of put_page to drop a locally allocated page. It also cleans up how we clear the descriptor status bits. Previously they were zeroed as a part of clearing the hdr_addr. However the hdr_addr is a 64 bit field and 64 bit writes can be a bit more expensive on on 32 bit systems. Since we are no longer using the header split feature the upper 32 bits of the address no longer need to be cleared. As a result we can just clear the status bits and leave the length and VLAN fields as-is which should provide more information in debugging. Signed-off-by: Alexander Duyck Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_main.c | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index caa43f7c2931..c7a19a5e0ec9 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -97,7 +97,6 @@ static bool fm10k_alloc_mapped_page(struct fm10k_ring *rx_ring, */ if (dma_mapping_error(rx_ring->dev, dma)) { __free_page(page); - bi->page = NULL; rx_ring->rx_stats.alloc_failed++; return false; @@ -147,8 +146,8 @@ void fm10k_alloc_rx_buffers(struct fm10k_ring *rx_ring, u16 cleaned_count) i -= rx_ring->count; } - /* clear the hdr_addr for the next_to_use descriptor */ - rx_desc->q.hdr_addr = 0; + /* clear the status bits for the next_to_use descriptor */ + rx_desc->d.staterr = 0; cleaned_count--; } while (cleaned_count); @@ -194,7 +193,7 @@ static void fm10k_reuse_rx_page(struct fm10k_ring *rx_ring, rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; /* transfer page from old buffer to new buffer */ - memcpy(new_buff, old_buff, sizeof(struct fm10k_rx_buffer)); + *new_buff = *old_buff; /* sync the buffer for use by the device */ dma_sync_single_range_for_device(rx_ring->dev, old_buff->dma, @@ -203,12 +202,17 @@ static void fm10k_reuse_rx_page(struct fm10k_ring *rx_ring, DMA_FROM_DEVICE); } +static inline bool fm10k_page_is_reserved(struct page *page) +{ + return (page_to_nid(page) != numa_mem_id()) || page->pfmemalloc; +} + static bool fm10k_can_reuse_rx_page(struct fm10k_rx_buffer *rx_buffer, struct page *page, unsigned int truesize) { /* avoid re-using remote pages */ - if (unlikely(page_to_nid(page) != numa_mem_id())) + if (unlikely(fm10k_page_is_reserved(page))) return false; #if (PAGE_SIZE < 8192) @@ -218,22 +222,19 @@ static bool fm10k_can_reuse_rx_page(struct fm10k_rx_buffer *rx_buffer, /* flip page offset to other buffer */ rx_buffer->page_offset ^= FM10K_RX_BUFSZ; - - /* Even if we own the page, we are not allowed to use atomic_set() - * This would break get_page_unless_zero() users. - */ - atomic_inc(&page->_count); #else /* move offset up to the next cache line */ rx_buffer->page_offset += truesize; if (rx_buffer->page_offset > (PAGE_SIZE - FM10K_RX_BUFSZ)) return false; - - /* bump ref count on page before it is given to the stack */ - get_page(page); #endif + /* Even if we own the page, we are not allowed to use atomic_set() + * This would break get_page_unless_zero() users. + */ + atomic_inc(&page->_count); + return true; } @@ -270,12 +271,12 @@ static bool fm10k_add_rx_frag(struct fm10k_ring *rx_ring, memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); - /* we can reuse buffer as-is, just make sure it is local */ - if (likely(page_to_nid(page) == numa_mem_id())) + /* page is not reserved, we can reuse buffer as-is */ + if (likely(!fm10k_page_is_reserved(page))) return true; /* this page cannot be reused so discard it */ - put_page(page); + __free_page(page); return false; } @@ -293,7 +294,6 @@ static struct sk_buff *fm10k_fetch_rx_buffer(struct fm10k_ring *rx_ring, struct page *page; rx_buffer = &rx_ring->rx_buffer[rx_ring->next_to_clean]; - page = rx_buffer->page; prefetchw(page); From 61d7f75f45231e4a2f2ab7d975555f55f0019800 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Fri, 21 Nov 2014 20:51:10 +0000 Subject: [PATCH 06/15] igb: refactor time sync interrupt handling The code that handles the time sync interrupt is repeated in three different places. This patch refactors the identical code blocks into a single helper function. Signed-off-by: Richard Cochran Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 49 +++++++++-------------- 1 file changed, 19 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index d1480f3054fe..135ac5c45abd 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -5384,6 +5384,19 @@ void igb_update_stats(struct igb_adapter *adapter, } } +static void igb_tsync_interrupt(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 tsicr = rd32(E1000_TSICR); + + if (tsicr & E1000_TSICR_TXTS) { + /* acknowledge the interrupt */ + wr32(E1000_TSICR, E1000_TSICR_TXTS); + /* retrieve hardware timestamp */ + schedule_work(&adapter->ptp_tx_work); + } +} + static irqreturn_t igb_msix_other(int irq, void *data) { struct igb_adapter *adapter = data; @@ -5415,16 +5428,8 @@ static irqreturn_t igb_msix_other(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } - if (icr & E1000_ICR_TS) { - u32 tsicr = rd32(E1000_TSICR); - - if (tsicr & E1000_TSICR_TXTS) { - /* acknowledge the interrupt */ - wr32(E1000_TSICR, E1000_TSICR_TXTS); - /* retrieve hardware timestamp */ - schedule_work(&adapter->ptp_tx_work); - } - } + if (icr & E1000_ICR_TS) + igb_tsync_interrupt(adapter); wr32(E1000_EIMS, adapter->eims_other); @@ -6203,16 +6208,8 @@ static irqreturn_t igb_intr_msi(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } - if (icr & E1000_ICR_TS) { - u32 tsicr = rd32(E1000_TSICR); - - if (tsicr & E1000_TSICR_TXTS) { - /* acknowledge the interrupt */ - wr32(E1000_TSICR, E1000_TSICR_TXTS); - /* retrieve hardware timestamp */ - schedule_work(&adapter->ptp_tx_work); - } - } + if (icr & E1000_ICR_TS) + igb_tsync_interrupt(adapter); napi_schedule(&q_vector->napi); @@ -6257,16 +6254,8 @@ static irqreturn_t igb_intr(int irq, void *data) mod_timer(&adapter->watchdog_timer, jiffies + 1); } - if (icr & E1000_ICR_TS) { - u32 tsicr = rd32(E1000_TSICR); - - if (tsicr & E1000_TSICR_TXTS) { - /* acknowledge the interrupt */ - wr32(E1000_TSICR, E1000_TSICR_TXTS); - /* retrieve hardware timestamp */ - schedule_work(&adapter->ptp_tx_work); - } - } + if (icr & E1000_ICR_TS) + igb_tsync_interrupt(adapter); napi_schedule(&q_vector->napi); From 8298c1ecd534e45e52f05a29122b0f6c1a580d25 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Fri, 21 Nov 2014 20:51:15 +0000 Subject: [PATCH 07/15] igb: serialize access to the time sync interrupt registers The time sync related interrupt registers may be manipulated from different contexts. This patch protects the registers from being asynchronously changed by the reset function. Also, the patch removes a misleading comment. The reset function is disabling a bunch of functions, not enabling them. Signed-off-by: Richard Cochran Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_ptp.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 5e7a4e30a7b6..8389bb40a4bf 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -900,6 +900,7 @@ void igb_ptp_stop(struct igb_adapter *adapter) void igb_ptp_reset(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; + unsigned long flags; if (!(adapter->flags & IGB_FLAG_PTP)) return; @@ -907,6 +908,8 @@ void igb_ptp_reset(struct igb_adapter *adapter) /* reset the tstamp_config */ igb_ptp_set_timestamp_mode(adapter, &adapter->tstamp_config); + spin_lock_irqsave(&adapter->tmreg_lock, flags); + switch (adapter->hw.mac.type) { case e1000_82576: /* Dial the nominal frequency. */ @@ -917,23 +920,24 @@ void igb_ptp_reset(struct igb_adapter *adapter) case e1000_i350: case e1000_i210: case e1000_i211: - /* Enable the timer functions and interrupts. */ wr32(E1000_TSAUXC, 0x0); wr32(E1000_TSIM, TSYNC_INTERRUPTS); wr32(E1000_IMS, E1000_IMS_TS); break; default: /* No work to do. */ - return; + goto out; } /* Re-initialize the timer. */ if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) { struct timespec ts = ktime_to_timespec(ktime_get_real()); - igb_ptp_settime_i210(&adapter->ptp_caps, &ts); + igb_ptp_write_i210(adapter, &ts); } else { timecounter_init(&adapter->tc, &adapter->cc, ktime_to_ns(ktime_get_real())); } +out: + spin_unlock_irqrestore(&adapter->tmreg_lock, flags); } From 00c65578b47bec3f92ce259d0c9d959ac54fff28 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Fri, 21 Nov 2014 20:51:20 +0000 Subject: [PATCH 08/15] igb: enable internal PPS for the i210 The i210 device can produce an interrupt on the full second. This patch allows using this interrupt to generate an internal PPS event for adjusting the kernel system time. Signed-off-by: Richard Cochran Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 18 ++++++++++--- drivers/net/ethernet/intel/igb/igb_ptp.c | 32 +++++++++++++++++++++-- 2 files changed, 45 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 135ac5c45abd..e84416286dde 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -5387,14 +5387,26 @@ void igb_update_stats(struct igb_adapter *adapter, static void igb_tsync_interrupt(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; - u32 tsicr = rd32(E1000_TSICR); + struct ptp_clock_event event; + u32 ack = 0, tsicr = rd32(E1000_TSICR); + + if (tsicr & TSINTR_SYS_WRAP) { + event.type = PTP_CLOCK_PPS; + if (adapter->ptp_caps.pps) + ptp_clock_event(adapter->ptp_clock, &event); + else + dev_err(&adapter->pdev->dev, "unexpected SYS WRAP"); + ack |= TSINTR_SYS_WRAP; + } if (tsicr & E1000_TSICR_TXTS) { - /* acknowledge the interrupt */ - wr32(E1000_TSICR, E1000_TSICR_TXTS); /* retrieve hardware timestamp */ schedule_work(&adapter->ptp_tx_work); + ack |= E1000_TSICR_TXTS; } + + /* acknowledge the interrupts */ + wr32(E1000_TSICR, ack); } static irqreturn_t igb_msix_other(int irq, void *data) diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 8389bb40a4bf..98c58d921228 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -355,6 +355,34 @@ static int igb_ptp_settime_i210(struct ptp_clock_info *ptp, return 0; } +static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on) +{ + struct igb_adapter *igb = + container_of(ptp, struct igb_adapter, ptp_caps); + struct e1000_hw *hw = &igb->hw; + unsigned long flags; + u32 tsim; + + switch (rq->type) { + case PTP_CLK_REQ_PPS: + spin_lock_irqsave(&igb->tmreg_lock, flags); + tsim = rd32(E1000_TSIM); + if (on) + tsim |= TSINTR_SYS_WRAP; + else + tsim &= ~TSINTR_SYS_WRAP; + wr32(E1000_TSIM, tsim); + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + return 0; + + default: + break; + } + + return -EOPNOTSUPP; +} + static int igb_ptp_feature_enable(struct ptp_clock_info *ptp, struct ptp_clock_request *rq, int on) { @@ -797,12 +825,12 @@ void igb_ptp_init(struct igb_adapter *adapter) adapter->ptp_caps.owner = THIS_MODULE; adapter->ptp_caps.max_adj = 62499999; adapter->ptp_caps.n_ext_ts = 0; - adapter->ptp_caps.pps = 0; + adapter->ptp_caps.pps = 1; adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580; adapter->ptp_caps.adjtime = igb_ptp_adjtime_i210; adapter->ptp_caps.gettime = igb_ptp_gettime_i210; adapter->ptp_caps.settime = igb_ptp_settime_i210; - adapter->ptp_caps.enable = igb_ptp_feature_enable; + adapter->ptp_caps.enable = igb_ptp_feature_enable_i210; /* Enable the timer functions by clearing bit 31. */ wr32(E1000_TSAUXC, 0x0); break; From 720db4ffd0846570c1ddc82e7bb661ab0a676fad Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Fri, 21 Nov 2014 20:51:26 +0000 Subject: [PATCH 09/15] igb: enable auxiliary PHC functions for the i210 The i210 device offers a number of special PTP Hardware Clock features on the Software Defined Pins (SDPs). This patch adds support for two of the possible functions, namely time stamping external events, and periodic output signals. The assignment of PHC functions to the four SDP can be freely chosen by the user. Signed-off-by: Richard Cochran Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb.h | 9 + drivers/net/ethernet/intel/igb/igb_main.c | 51 ++++- drivers/net/ethernet/intel/igb/igb_ptp.c | 222 +++++++++++++++++++++- 3 files changed, 276 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index ee22da391474..c2bd4f98a837 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -343,6 +343,9 @@ struct hwmon_buff { }; #endif +#define IGB_N_EXTTS 2 +#define IGB_N_PEROUT 2 +#define IGB_N_SDP 4 #define IGB_RETA_SIZE 128 /* board specific private data structure */ @@ -439,6 +442,12 @@ struct igb_adapter { u32 tx_hwtstamp_timeouts; u32 rx_hwtstamp_cleared; + struct ptp_pin_desc sdp_config[IGB_N_SDP]; + struct { + struct timespec start; + struct timespec period; + } perout[IGB_N_PEROUT]; + char fw_version[32]; #ifdef CONFIG_IGB_HWMON struct hwmon_buff *igb_hwmon_buff; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index e84416286dde..6e6544970b83 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -5388,7 +5388,8 @@ static void igb_tsync_interrupt(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct ptp_clock_event event; - u32 ack = 0, tsicr = rd32(E1000_TSICR); + struct timespec ts; + u32 ack = 0, tsauxc, sec, nsec, tsicr = rd32(E1000_TSICR); if (tsicr & TSINTR_SYS_WRAP) { event.type = PTP_CLOCK_PPS; @@ -5405,6 +5406,54 @@ static void igb_tsync_interrupt(struct igb_adapter *adapter) ack |= E1000_TSICR_TXTS; } + if (tsicr & TSINTR_TT0) { + spin_lock(&adapter->tmreg_lock); + ts = timespec_add(adapter->perout[0].start, + adapter->perout[0].period); + wr32(E1000_TRGTTIML0, ts.tv_nsec); + wr32(E1000_TRGTTIMH0, ts.tv_sec); + tsauxc = rd32(E1000_TSAUXC); + tsauxc |= TSAUXC_EN_TT0; + wr32(E1000_TSAUXC, tsauxc); + adapter->perout[0].start = ts; + spin_unlock(&adapter->tmreg_lock); + ack |= TSINTR_TT0; + } + + if (tsicr & TSINTR_TT1) { + spin_lock(&adapter->tmreg_lock); + ts = timespec_add(adapter->perout[1].start, + adapter->perout[1].period); + wr32(E1000_TRGTTIML1, ts.tv_nsec); + wr32(E1000_TRGTTIMH1, ts.tv_sec); + tsauxc = rd32(E1000_TSAUXC); + tsauxc |= TSAUXC_EN_TT1; + wr32(E1000_TSAUXC, tsauxc); + adapter->perout[1].start = ts; + spin_unlock(&adapter->tmreg_lock); + ack |= TSINTR_TT1; + } + + if (tsicr & TSINTR_AUTT0) { + nsec = rd32(E1000_AUXSTMPL0); + sec = rd32(E1000_AUXSTMPH0); + event.type = PTP_CLOCK_EXTTS; + event.index = 0; + event.timestamp = sec * 1000000000ULL + nsec; + ptp_clock_event(adapter->ptp_clock, &event); + ack |= TSINTR_AUTT0; + } + + if (tsicr & TSINTR_AUTT1) { + nsec = rd32(E1000_AUXSTMPL1); + sec = rd32(E1000_AUXSTMPH1); + event.type = PTP_CLOCK_EXTTS; + event.index = 1; + event.timestamp = sec * 1000000000ULL + nsec; + ptp_clock_event(adapter->ptp_clock, &event); + ack |= TSINTR_AUTT1; + } + /* acknowledge the interrupts */ wr32(E1000_TSICR, ack); } diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 98c58d921228..d20fc8ed11f1 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -355,16 +355,204 @@ static int igb_ptp_settime_i210(struct ptp_clock_info *ptp, return 0; } +static void igb_pin_direction(int pin, int input, u32 *ctrl, u32 *ctrl_ext) +{ + u32 *ptr = pin < 2 ? ctrl : ctrl_ext; + u32 mask[IGB_N_SDP] = { + E1000_CTRL_SDP0_DIR, + E1000_CTRL_SDP1_DIR, + E1000_CTRL_EXT_SDP2_DIR, + E1000_CTRL_EXT_SDP3_DIR, + }; + + if (input) + *ptr &= ~mask[pin]; + else + *ptr |= mask[pin]; +} + +static void igb_pin_extts(struct igb_adapter *igb, int chan, int pin) +{ + struct e1000_hw *hw = &igb->hw; + u32 aux0_sel_sdp[IGB_N_SDP] = { + AUX0_SEL_SDP0, AUX0_SEL_SDP1, AUX0_SEL_SDP2, AUX0_SEL_SDP3, + }; + u32 aux1_sel_sdp[IGB_N_SDP] = { + AUX1_SEL_SDP0, AUX1_SEL_SDP1, AUX1_SEL_SDP2, AUX1_SEL_SDP3, + }; + u32 ts_sdp_en[IGB_N_SDP] = { + TS_SDP0_EN, TS_SDP1_EN, TS_SDP2_EN, TS_SDP3_EN, + }; + u32 ctrl, ctrl_ext, tssdp = 0; + + ctrl = rd32(E1000_CTRL); + ctrl_ext = rd32(E1000_CTRL_EXT); + tssdp = rd32(E1000_TSSDP); + + igb_pin_direction(pin, 1, &ctrl, &ctrl_ext); + + /* Make sure this pin is not enabled as an output. */ + tssdp &= ~ts_sdp_en[pin]; + + if (chan == 1) { + tssdp &= ~AUX1_SEL_SDP3; + tssdp |= aux1_sel_sdp[pin] | AUX1_TS_SDP_EN; + } else { + tssdp &= ~AUX0_SEL_SDP3; + tssdp |= aux0_sel_sdp[pin] | AUX0_TS_SDP_EN; + } + + wr32(E1000_TSSDP, tssdp); + wr32(E1000_CTRL, ctrl); + wr32(E1000_CTRL_EXT, ctrl_ext); +} + +static void igb_pin_perout(struct igb_adapter *igb, int chan, int pin) +{ + struct e1000_hw *hw = &igb->hw; + u32 aux0_sel_sdp[IGB_N_SDP] = { + AUX0_SEL_SDP0, AUX0_SEL_SDP1, AUX0_SEL_SDP2, AUX0_SEL_SDP3, + }; + u32 aux1_sel_sdp[IGB_N_SDP] = { + AUX1_SEL_SDP0, AUX1_SEL_SDP1, AUX1_SEL_SDP2, AUX1_SEL_SDP3, + }; + u32 ts_sdp_en[IGB_N_SDP] = { + TS_SDP0_EN, TS_SDP1_EN, TS_SDP2_EN, TS_SDP3_EN, + }; + u32 ts_sdp_sel_tt0[IGB_N_SDP] = { + TS_SDP0_SEL_TT0, TS_SDP1_SEL_TT0, + TS_SDP2_SEL_TT0, TS_SDP3_SEL_TT0, + }; + u32 ts_sdp_sel_tt1[IGB_N_SDP] = { + TS_SDP0_SEL_TT1, TS_SDP1_SEL_TT1, + TS_SDP2_SEL_TT1, TS_SDP3_SEL_TT1, + }; + u32 ts_sdp_sel_clr[IGB_N_SDP] = { + TS_SDP0_SEL_FC1, TS_SDP1_SEL_FC1, + TS_SDP2_SEL_FC1, TS_SDP3_SEL_FC1, + }; + u32 ctrl, ctrl_ext, tssdp = 0; + + ctrl = rd32(E1000_CTRL); + ctrl_ext = rd32(E1000_CTRL_EXT); + tssdp = rd32(E1000_TSSDP); + + igb_pin_direction(pin, 0, &ctrl, &ctrl_ext); + + /* Make sure this pin is not enabled as an input. */ + if ((tssdp & AUX0_SEL_SDP3) == aux0_sel_sdp[pin]) + tssdp &= ~AUX0_TS_SDP_EN; + + if ((tssdp & AUX1_SEL_SDP3) == aux1_sel_sdp[pin]) + tssdp &= ~AUX1_TS_SDP_EN; + + tssdp &= ~ts_sdp_sel_clr[pin]; + if (chan == 1) + tssdp |= ts_sdp_sel_tt1[pin]; + else + tssdp |= ts_sdp_sel_tt0[pin]; + + tssdp |= ts_sdp_en[pin]; + + wr32(E1000_TSSDP, tssdp); + wr32(E1000_CTRL, ctrl); + wr32(E1000_CTRL_EXT, ctrl_ext); +} + static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp, struct ptp_clock_request *rq, int on) { struct igb_adapter *igb = container_of(ptp, struct igb_adapter, ptp_caps); struct e1000_hw *hw = &igb->hw; + u32 tsauxc, tsim, tsauxc_mask, tsim_mask, trgttiml, trgttimh; unsigned long flags; - u32 tsim; + struct timespec ts; + int pin; + s64 ns; switch (rq->type) { + case PTP_CLK_REQ_EXTTS: + if (on) { + pin = ptp_find_pin(igb->ptp_clock, PTP_PF_EXTTS, + rq->extts.index); + if (pin < 0) + return -EBUSY; + } + if (rq->extts.index == 1) { + tsauxc_mask = TSAUXC_EN_TS1; + tsim_mask = TSINTR_AUTT1; + } else { + tsauxc_mask = TSAUXC_EN_TS0; + tsim_mask = TSINTR_AUTT0; + } + spin_lock_irqsave(&igb->tmreg_lock, flags); + tsauxc = rd32(E1000_TSAUXC); + tsim = rd32(E1000_TSIM); + if (on) { + igb_pin_extts(igb, rq->extts.index, pin); + tsauxc |= tsauxc_mask; + tsim |= tsim_mask; + } else { + tsauxc &= ~tsauxc_mask; + tsim &= ~tsim_mask; + } + wr32(E1000_TSAUXC, tsauxc); + wr32(E1000_TSIM, tsim); + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + return 0; + + case PTP_CLK_REQ_PEROUT: + if (on) { + pin = ptp_find_pin(igb->ptp_clock, PTP_PF_PEROUT, + rq->perout.index); + if (pin < 0) + return -EBUSY; + } + ts.tv_sec = rq->perout.period.sec; + ts.tv_nsec = rq->perout.period.nsec; + ns = timespec_to_ns(&ts); + ns = ns >> 1; + if (on && ns < 500000LL) { + /* 2k interrupts per second is an awful lot. */ + return -EINVAL; + } + ts = ns_to_timespec(ns); + if (rq->perout.index == 1) { + tsauxc_mask = TSAUXC_EN_TT1; + tsim_mask = TSINTR_TT1; + trgttiml = E1000_TRGTTIML1; + trgttimh = E1000_TRGTTIMH1; + } else { + tsauxc_mask = TSAUXC_EN_TT0; + tsim_mask = TSINTR_TT0; + trgttiml = E1000_TRGTTIML0; + trgttimh = E1000_TRGTTIMH0; + } + spin_lock_irqsave(&igb->tmreg_lock, flags); + tsauxc = rd32(E1000_TSAUXC); + tsim = rd32(E1000_TSIM); + if (on) { + int i = rq->perout.index; + + igb_pin_perout(igb, i, pin); + igb->perout[i].start.tv_sec = rq->perout.start.sec; + igb->perout[i].start.tv_nsec = rq->perout.start.nsec; + igb->perout[i].period.tv_sec = ts.tv_sec; + igb->perout[i].period.tv_nsec = ts.tv_nsec; + wr32(trgttiml, rq->perout.start.sec); + wr32(trgttimh, rq->perout.start.nsec); + tsauxc |= tsauxc_mask; + tsim |= tsim_mask; + } else { + tsauxc &= ~tsauxc_mask; + tsim &= ~tsim_mask; + } + wr32(E1000_TSAUXC, tsauxc); + wr32(E1000_TSIM, tsim); + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + return 0; + case PTP_CLK_REQ_PPS: spin_lock_irqsave(&igb->tmreg_lock, flags); tsim = rd32(E1000_TSIM); @@ -375,9 +563,6 @@ static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp, wr32(E1000_TSIM, tsim); spin_unlock_irqrestore(&igb->tmreg_lock, flags); return 0; - - default: - break; } return -EOPNOTSUPP; @@ -389,6 +574,20 @@ static int igb_ptp_feature_enable(struct ptp_clock_info *ptp, return -EOPNOTSUPP; } +static int igb_ptp_verify_pin(struct ptp_clock_info *ptp, unsigned int pin, + enum ptp_pin_function func, unsigned int chan) +{ + switch (func) { + case PTP_PF_NONE: + case PTP_PF_EXTTS: + case PTP_PF_PEROUT: + break; + case PTP_PF_PHYSYNC: + return -1; + } + return 0; +} + /** * igb_ptp_tx_work * @work: pointer to work struct @@ -779,6 +978,7 @@ void igb_ptp_init(struct igb_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; + int i; switch (hw->mac.type) { case e1000_82576: @@ -821,16 +1021,27 @@ void igb_ptp_init(struct igb_adapter *adapter) break; case e1000_i210: case e1000_i211: + for (i = 0; i < IGB_N_SDP; i++) { + struct ptp_pin_desc *ppd = &adapter->sdp_config[i]; + + snprintf(ppd->name, sizeof(ppd->name), "SDP%d", i); + ppd->index = i; + ppd->func = PTP_PF_NONE; + } snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); adapter->ptp_caps.owner = THIS_MODULE; adapter->ptp_caps.max_adj = 62499999; - adapter->ptp_caps.n_ext_ts = 0; + adapter->ptp_caps.n_ext_ts = IGB_N_EXTTS; + adapter->ptp_caps.n_per_out = IGB_N_PEROUT; + adapter->ptp_caps.n_pins = IGB_N_SDP; adapter->ptp_caps.pps = 1; + adapter->ptp_caps.pin_config = adapter->sdp_config; adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580; adapter->ptp_caps.adjtime = igb_ptp_adjtime_i210; adapter->ptp_caps.gettime = igb_ptp_gettime_i210; adapter->ptp_caps.settime = igb_ptp_settime_i210; adapter->ptp_caps.enable = igb_ptp_feature_enable_i210; + adapter->ptp_caps.verify = igb_ptp_verify_pin; /* Enable the timer functions by clearing bit 31. */ wr32(E1000_TSAUXC, 0x0); break; @@ -949,6 +1160,7 @@ void igb_ptp_reset(struct igb_adapter *adapter) case e1000_i210: case e1000_i211: wr32(E1000_TSAUXC, 0x0); + wr32(E1000_TSSDP, 0x0); wr32(E1000_TSIM, TSYNC_INTERRUPTS); wr32(E1000_IMS, E1000_IMS_TS); break; From e2929e453a1dddf489cc8bc360eb207df39d196c Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 11 Nov 2014 17:19:30 +0000 Subject: [PATCH 10/15] net/fm10k: Avoid double setting of NETIF_F_SG for the HW encapsulation feature mask The networking core does it for the driver during registration time. Signed-off-by: Or Gerlitz Acked-by: Matthew Vick Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_netdev.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 945b35d31c71..cfde8bac1aeb 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -1414,13 +1414,12 @@ struct net_device *fm10k_alloc_netdev(void) dev->vlan_features |= dev->features; /* configure tunnel offloads */ - dev->hw_enc_features = NETIF_F_IP_CSUM | - NETIF_F_TSO | - NETIF_F_TSO6 | - NETIF_F_TSO_ECN | - NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_IPV6_CSUM | - NETIF_F_SG; + dev->hw_enc_features |= NETIF_F_IP_CSUM | + NETIF_F_TSO | + NETIF_F_TSO6 | + NETIF_F_TSO_ECN | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_IPV6_CSUM; /* we want to leave these both on as we cannot disable VLAN tag * insertion or stripping on the hardware since it is contained From b66b6d9f6dbe0655a570fb2a2482cceac5f1a9bc Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Fri, 14 Nov 2014 07:47:40 +0000 Subject: [PATCH 11/15] fm10k: Check tunnel header length in encap offload fm10k supports up to 184 bytes of inner+outer headers. Add an initial check to fail encap offload if these are too large. Signed-off-by: Joe Stringer Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index c7a19a5e0ec9..84ab9eea2768 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -727,6 +727,12 @@ static __be16 fm10k_tx_encap_offload(struct sk_buff *skb) struct ethhdr *eth_hdr; u8 l4_hdr = 0; +/* fm10k supports 184 octets of outer+inner headers. Minus 20 for inner L4. */ +#define FM10K_MAX_ENCAP_TRANSPORT_OFFSET 164 + if (skb_inner_transport_header(skb) - skb_mac_header(skb) > + FM10K_MAX_ENCAP_TRANSPORT_OFFSET) + return 0; + switch (vlan_get_protocol(skb)) { case htons(ETH_P_IP): l4_hdr = ip_hdr(skb)->protocol; From b4a9d6f17335ebd4d09f702cfffa059057ec238c Mon Sep 17 00:00:00 2001 From: Matthew Vick Date: Thu, 13 Nov 2014 07:29:18 +0000 Subject: [PATCH 12/15] fm10k: Increase the timeout for the data path reset Based on feedback from the hardware team, 100us is too short of a time to wait for the data path reset to complete and the recommendation is to increase this timeout to 150us. Signed-off-by: Matthew Vick Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/fm10k/fm10k_type.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_type.h b/drivers/net/ethernet/intel/fm10k/fm10k_type.h index 280296f29154..7c6d9d5a8ae5 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_type.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_type.h @@ -354,7 +354,7 @@ struct fm10k_hw; /* Define timeouts for resets and disables */ #define FM10K_QUEUE_DISABLE_TIMEOUT 100 -#define FM10K_RESET_TIMEOUT 100 +#define FM10K_RESET_TIMEOUT 150 /* VF registers */ #define FM10K_VFCTRL 0x00000 From 6ddbc4cf1f4d5a3a58b4223c80881f299dae3774 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 9 Oct 2014 05:33:55 +0000 Subject: [PATCH 13/15] igb: Indicate failure on vf reset for empty mac address Commit 5ac6f91d changed the igb driver to expose a zero (empty) mac address to the VF on reset rather than a random one. However, that behavioral change also requires igbvf driver changes which can be hard especially when we want to talk to proprietary guest OSs. Looking at the code previous to the commit in Linux that made igbvf work with empty mac addresses (8d56b6d), we can see that on reset failure the driver will try to generate a new mac address with both the old and the new code. Furthermore, ixgbe does send reset failure when it detects an empty mac address (35055928c). So I think it's safe to make igb behave the same. With this patch I can successfully run a Windows 8.1 guest with an empty mac address and an assigned igbvf device that has no mac address set by the host. If anyone is aware of a guest driver that chokes on NACK returns of VF RESET commands, please speak up. Signed-off-by: Alexander Graf Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 6e6544970b83..f366b3b96d03 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -6077,8 +6077,12 @@ static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf) adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS; /* reply to reset with ack and vf mac address */ - msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK; - memcpy(addr, vf_mac, ETH_ALEN); + if (!is_zero_ether_addr(vf_mac)) { + msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK; + memcpy(addr, vf_mac, ETH_ALEN); + } else { + msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_NACK; + } igb_write_mbx(hw, msgbuf, 3, vf); } From 8a4d0b93c142a53c369998303d2114b5beeca7af Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 7 Jan 2015 11:40:33 +0000 Subject: [PATCH 14/15] net: e1000: support txtd update delay via xmit_more Don't update Tx tail descriptor if we queue hasn't been stopped and we know at least one more skb will be sent right away. Signed-off-by: Florian Westphal Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000_main.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 9242982db3e0..7f997d36948f 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -2977,7 +2977,6 @@ static void e1000_tx_queue(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, int tx_flags, int count) { - struct e1000_hw *hw = &adapter->hw; struct e1000_tx_desc *tx_desc = NULL; struct e1000_tx_buffer *buffer_info; u32 txd_upper = 0, txd_lower = E1000_TXD_CMD_IFCS; @@ -3031,11 +3030,6 @@ static void e1000_tx_queue(struct e1000_adapter *adapter, wmb(); tx_ring->next_to_use = i; - writel(i, hw->hw_addr + tx_ring->tdt); - /* we need this if more than one processor can write to our tail - * at a time, it synchronizes IO on IA64/Altix systems - */ - mmiowb(); } /* 82547 workaround to avoid controller hang in half-duplex environment. @@ -3264,6 +3258,15 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, /* Make sure there is space in the ring for the next send. */ e1000_maybe_stop_tx(netdev, tx_ring, MAX_SKB_FRAGS + 2); + if (!skb->xmit_more || + netif_xmit_stopped(netdev_get_tx_queue(netdev, 0))) { + writel(tx_ring->next_to_use, hw->hw_addr + tx_ring->tdt); + /* we need this if more than one processor can write to + * our tail at a time, it synchronizes IO on IA64/Altix + * systems + */ + mmiowb(); + } } else { dev_kfree_skb_any(skb); tx_ring->buffer_info[first].time_stamp = 0; From 472f31f5726ab2f41f09cb8175610f196fac2d7a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Jan 2015 09:26:14 +0000 Subject: [PATCH 15/15] net: e1000e: support txtd update delay via xmit_more Don't update Tx tail descriptor if queue hasn't been stopped and we know at least one more skb will be sent right away. Signed-off-by: Florian Westphal Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/netdev.c | 25 +++++++++++++--------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index aa39a81a6df6..1e8c40fd5c3d 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5444,16 +5444,6 @@ static void e1000_tx_queue(struct e1000_ring *tx_ring, int tx_flags, int count) wmb(); tx_ring->next_to_use = i; - - if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA) - e1000e_update_tdt_wa(tx_ring, i); - else - writel(i, tx_ring->tail); - - /* we need this if more than one processor can write to our tail - * at a time, it synchronizes IO on IA64/Altix systems - */ - mmiowb(); } #define MINIMUM_DHCP_PACKET_SIZE 282 @@ -5655,6 +5645,21 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, (MAX_SKB_FRAGS * DIV_ROUND_UP(PAGE_SIZE, adapter->tx_fifo_limit) + 2)); + + if (!skb->xmit_more || + netif_xmit_stopped(netdev_get_tx_queue(netdev, 0))) { + if (adapter->flags2 & FLAG2_PCIM2PCI_ARBITER_WA) + e1000e_update_tdt_wa(tx_ring, + tx_ring->next_to_use); + else + writel(tx_ring->next_to_use, tx_ring->tail); + + /* we need this if more than one processor can write + * to our tail at a time, it synchronizes IO on + *IA64/Altix systems + */ + mmiowb(); + } } else { dev_kfree_skb_any(skb); tx_ring->buffer_info[first].time_stamp = 0;