From 745d0bd3af99ccc8c5f5822f808cd133eadad6ac Mon Sep 17 00:00:00 2001
From: Benjamin Poirier <bpoirier@suse.com>
Date: Wed, 31 Jan 2018 16:26:27 +0900
Subject: [PATCH 1/6] e1000e: Remove Other from EIAC

It was reported that emulated e1000e devices in vmware esxi 6.5 Build
7526125 do not link up after commit 4aea7a5c5e94 ("e1000e: Avoid receiver
overrun interrupt bursts", v4.15-rc1). Some tracing shows that after
e1000e_trigger_lsc() is called, ICR reads out as 0x0 in e1000_msix_other()
on emulated e1000e devices. In comparison, on real e1000e 82574 hardware,
icr=0x80000004 (_INT_ASSERTED | _LSC) in the same situation.

Some experimentation showed that this flaw in vmware e1000e emulation can
be worked around by not setting Other in EIAC. This is how it was before
16ecba59bc33 ("e1000e: Do not read ICR in Other interrupt", v4.5-rc1).

Fixes: 4aea7a5c5e94 ("e1000e: Avoid receiver overrun interrupt bursts")
Signed-off-by: Benjamin Poirier <bpoirier@suse.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 1298b69f990b4..153ad406c65e1 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1918,6 +1918,8 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data)
 	bool enable = true;
 
 	icr = er32(ICR);
+	ew32(ICR, E1000_ICR_OTHER);
+
 	if (icr & E1000_ICR_RXO) {
 		ew32(ICR, E1000_ICR_RXO);
 		enable = false;
@@ -2040,7 +2042,6 @@ static void e1000_configure_msix(struct e1000_adapter *adapter)
 		       hw->hw_addr + E1000_EITR_82574(vector));
 	else
 		writel(1, hw->hw_addr + E1000_EITR_82574(vector));
-	adapter->eiac_mask |= E1000_IMS_OTHER;
 
 	/* Cause Tx interrupts on every write back */
 	ivar |= BIT(31);
@@ -2265,7 +2266,7 @@ static void e1000_irq_enable(struct e1000_adapter *adapter)
 
 	if (adapter->msix_entries) {
 		ew32(EIAC_82574, adapter->eiac_mask & E1000_EIAC_MASK_82574);
-		ew32(IMS, adapter->eiac_mask | E1000_IMS_LSC);
+		ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER | E1000_IMS_LSC);
 	} else if (hw->mac.type >= e1000_pch_lpt) {
 		ew32(IMS, IMS_ENABLE_MASK | E1000_IMS_ECCER);
 	} else {

From 1f0ea19722ef9dfa229a9540f70b8d1c34a98a6a Mon Sep 17 00:00:00 2001
From: Benjamin Poirier <bpoirier@suse.com>
Date: Thu, 8 Feb 2018 15:47:12 +0900
Subject: [PATCH 2/6] Partial revert "e1000e: Avoid receiver overrun interrupt
 bursts"

This partially reverts commit 4aea7a5c5e940c1723add439f4088844cd26196d.

We keep the fix for the first part of the problem (1) described in the log
of that commit, that is to read ICR in the other interrupt handler. We
remove the fix for the second part of the problem (2), Other interrupt
throttling.

Bursts of "Other" interrupts may once again occur during rxo (receive
overflow) traffic conditions. This is deemed acceptable in the interest of
avoiding unforeseen fallout from changes that are not strictly necessary.
As discussed, the e1000e driver should be in "maintenance mode".

Link: https://www.spinics.net/lists/netdev/msg480675.html
Signed-off-by: Benjamin Poirier <bpoirier@suse.com>
Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 153ad406c65e1..3b36efa6228d2 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1915,21 +1915,10 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data)
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
 	u32 icr;
-	bool enable = true;
 
 	icr = er32(ICR);
 	ew32(ICR, E1000_ICR_OTHER);
 
-	if (icr & E1000_ICR_RXO) {
-		ew32(ICR, E1000_ICR_RXO);
-		enable = false;
-		/* napi poll will re-enable Other, make sure it runs */
-		if (napi_schedule_prep(&adapter->napi)) {
-			adapter->total_rx_bytes = 0;
-			adapter->total_rx_packets = 0;
-			__napi_schedule(&adapter->napi);
-		}
-	}
 	if (icr & E1000_ICR_LSC) {
 		ew32(ICR, E1000_ICR_LSC);
 		hw->mac.get_link_status = true;
@@ -1938,7 +1927,7 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data)
 			mod_timer(&adapter->watchdog_timer, jiffies + 1);
 	}
 
-	if (enable && !test_bit(__E1000_DOWN, &adapter->state))
+	if (!test_bit(__E1000_DOWN, &adapter->state))
 		ew32(IMS, E1000_IMS_OTHER);
 
 	return IRQ_HANDLED;
@@ -2708,8 +2697,7 @@ static int e1000e_poll(struct napi_struct *napi, int weight)
 		napi_complete_done(napi, work_done);
 		if (!test_bit(__E1000_DOWN, &adapter->state)) {
 			if (adapter->msix_entries)
-				ew32(IMS, adapter->rx_ring->ims_val |
-				     E1000_IMS_OTHER);
+				ew32(IMS, adapter->rx_ring->ims_val);
 			else
 				e1000_irq_enable(adapter);
 		}

From 361a954e6a7215de11a6179ad9bdc07d7e394b04 Mon Sep 17 00:00:00 2001
From: Benjamin Poirier <bpoirier@suse.com>
Date: Thu, 8 Feb 2018 15:47:13 +0900
Subject: [PATCH 3/6] e1000e: Fix queue interrupt re-raising in Other interrupt

Restores the ICS write for Rx/Tx queue interrupts which was present before
commit 16ecba59bc33 ("e1000e: Do not read ICR in Other interrupt", v4.5-rc1)
but was not restored in commit 4aea7a5c5e94
("e1000e: Avoid receiver overrun interrupt bursts", v4.15-rc1).

This re-raises the queue interrupts in case the txq or rxq bits were set in
ICR and the Other interrupt handler read and cleared ICR before the queue
interrupt was raised.

Fixes: 4aea7a5c5e94 ("e1000e: Avoid receiver overrun interrupt bursts")
Signed-off-by: Benjamin Poirier <bpoirier@suse.com>
Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 3b36efa6228d2..2c9609bee2ae4 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1919,6 +1919,9 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data)
 	icr = er32(ICR);
 	ew32(ICR, E1000_ICR_OTHER);
 
+	if (icr & adapter->eiac_mask)
+		ew32(ICS, (icr & adapter->eiac_mask));
+
 	if (icr & E1000_ICR_LSC) {
 		ew32(ICR, E1000_ICR_LSC);
 		hw->mac.get_link_status = true;

From 116f4a640b3197401bc93b8adc6c35040308ceff Mon Sep 17 00:00:00 2001
From: Benjamin Poirier <bpoirier@suse.com>
Date: Thu, 8 Feb 2018 15:47:14 +0900
Subject: [PATCH 4/6] e1000e: Avoid missed interrupts following ICR read

The 82574 specification update errata 12 states that interrupts may be
missed if ICR is read while INT_ASSERTED is not set. Avoid that problem by
setting all bits related to events that can trigger the Other interrupt in
IMS.

The Other interrupt is raised for such events regardless of whether or not
they are set in IMS. However, only when they are set is the INT_ASSERTED
bit also set in ICR.

By doing this, we ensure that INT_ASSERTED is always set when we read ICR
in e1000_msix_other() and steer clear of the errata. This also ensures that
ICR will automatically be cleared on read, therefore we no longer need to
clear bits explicitly.

Signed-off-by: Benjamin Poirier <bpoirier@suse.com>
Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/e1000e/defines.h | 21 ++++++++++++++++++++-
 drivers/net/ethernet/intel/e1000e/netdev.c  | 11 ++++-------
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/defines.h b/drivers/net/ethernet/intel/e1000e/defines.h
index afb7ebe20b243..824fd44e25f0d 100644
--- a/drivers/net/ethernet/intel/e1000e/defines.h
+++ b/drivers/net/ethernet/intel/e1000e/defines.h
@@ -400,6 +400,10 @@
 #define E1000_ICR_RXDMT0        0x00000010 /* Rx desc min. threshold (0) */
 #define E1000_ICR_RXO           0x00000040 /* Receiver Overrun */
 #define E1000_ICR_RXT0          0x00000080 /* Rx timer intr (ring 0) */
+#define E1000_ICR_MDAC          0x00000200 /* MDIO Access Complete */
+#define E1000_ICR_SRPD          0x00010000 /* Small Receive Packet Detected */
+#define E1000_ICR_ACK           0x00020000 /* Receive ACK Frame Detected */
+#define E1000_ICR_MNG           0x00040000 /* Manageability Event Detected */
 #define E1000_ICR_ECCER         0x00400000 /* Uncorrectable ECC Error */
 /* If this bit asserted, the driver should claim the interrupt */
 #define E1000_ICR_INT_ASSERTED	0x80000000
@@ -407,7 +411,7 @@
 #define E1000_ICR_RXQ1          0x00200000 /* Rx Queue 1 Interrupt */
 #define E1000_ICR_TXQ0          0x00400000 /* Tx Queue 0 Interrupt */
 #define E1000_ICR_TXQ1          0x00800000 /* Tx Queue 1 Interrupt */
-#define E1000_ICR_OTHER         0x01000000 /* Other Interrupts */
+#define E1000_ICR_OTHER         0x01000000 /* Other Interrupt */
 
 /* PBA ECC Register */
 #define E1000_PBA_ECC_COUNTER_MASK  0xFFF00000 /* ECC counter mask */
@@ -431,12 +435,27 @@
 	E1000_IMS_RXSEQ  |    \
 	E1000_IMS_LSC)
 
+/* These are all of the events related to the OTHER interrupt.
+ */
+#define IMS_OTHER_MASK ( \
+	E1000_IMS_LSC  | \
+	E1000_IMS_RXO  | \
+	E1000_IMS_MDAC | \
+	E1000_IMS_SRPD | \
+	E1000_IMS_ACK  | \
+	E1000_IMS_MNG)
+
 /* Interrupt Mask Set */
 #define E1000_IMS_TXDW      E1000_ICR_TXDW      /* Transmit desc written back */
 #define E1000_IMS_LSC       E1000_ICR_LSC       /* Link Status Change */
 #define E1000_IMS_RXSEQ     E1000_ICR_RXSEQ     /* Rx sequence error */
 #define E1000_IMS_RXDMT0    E1000_ICR_RXDMT0    /* Rx desc min. threshold */
+#define E1000_IMS_RXO       E1000_ICR_RXO       /* Receiver Overrun */
 #define E1000_IMS_RXT0      E1000_ICR_RXT0      /* Rx timer intr */
+#define E1000_IMS_MDAC      E1000_ICR_MDAC      /* MDIO Access Complete */
+#define E1000_IMS_SRPD      E1000_ICR_SRPD      /* Small Receive Packet */
+#define E1000_IMS_ACK       E1000_ICR_ACK       /* Receive ACK Frame Detected */
+#define E1000_IMS_MNG       E1000_ICR_MNG       /* Manageability Event */
 #define E1000_IMS_ECCER     E1000_ICR_ECCER     /* Uncorrectable ECC Error */
 #define E1000_IMS_RXQ0      E1000_ICR_RXQ0      /* Rx Queue 0 Interrupt */
 #define E1000_IMS_RXQ1      E1000_ICR_RXQ1      /* Rx Queue 1 Interrupt */
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 2c9609bee2ae4..9fd4050a91ca5 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -1914,16 +1914,12 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data)
 	struct net_device *netdev = data;
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
-	u32 icr;
-
-	icr = er32(ICR);
-	ew32(ICR, E1000_ICR_OTHER);
+	u32 icr = er32(ICR);
 
 	if (icr & adapter->eiac_mask)
 		ew32(ICS, (icr & adapter->eiac_mask));
 
 	if (icr & E1000_ICR_LSC) {
-		ew32(ICR, E1000_ICR_LSC);
 		hw->mac.get_link_status = true;
 		/* guard against interrupt when we're going down */
 		if (!test_bit(__E1000_DOWN, &adapter->state))
@@ -1931,7 +1927,7 @@ static irqreturn_t e1000_msix_other(int __always_unused irq, void *data)
 	}
 
 	if (!test_bit(__E1000_DOWN, &adapter->state))
-		ew32(IMS, E1000_IMS_OTHER);
+		ew32(IMS, E1000_IMS_OTHER | IMS_OTHER_MASK);
 
 	return IRQ_HANDLED;
 }
@@ -2258,7 +2254,8 @@ static void e1000_irq_enable(struct e1000_adapter *adapter)
 
 	if (adapter->msix_entries) {
 		ew32(EIAC_82574, adapter->eiac_mask & E1000_EIAC_MASK_82574);
-		ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER | E1000_IMS_LSC);
+		ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER |
+		     IMS_OTHER_MASK);
 	} else if (hw->mac.type >= e1000_pch_lpt) {
 		ew32(IMS, IMS_ENABLE_MASK | E1000_IMS_ECCER);
 	} else {

From 4e7dc08e57c95673d2edaba8983c3de4dd1f65f5 Mon Sep 17 00:00:00 2001
From: Benjamin Poirier <bpoirier@suse.com>
Date: Tue, 20 Feb 2018 15:12:00 +0900
Subject: [PATCH 5/6] e1000e: Fix check_for_link return value with autoneg off

When autoneg is off, the .check_for_link callback functions clear the
get_link_status flag and systematically return a "pseudo-error". This means
that the link is not detected as up until the next execution of the
e1000_watchdog_task() 2 seconds later.

Fixes: 19110cfbb34d ("e1000e: Separate signaling for link check/link up")
Signed-off-by: Benjamin Poirier <bpoirier@suse.com>
Acked-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/e1000e/ich8lan.c | 2 +-
 drivers/net/ethernet/intel/e1000e/mac.c     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 31277d3bb7dc1..ff308b05d68cc 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -1602,7 +1602,7 @@ static s32 e1000_check_for_copper_link_ich8lan(struct e1000_hw *hw)
 	 * we have already determined whether we have link or not.
 	 */
 	if (!mac->autoneg)
-		return -E1000_ERR_CONFIG;
+		return 1;
 
 	/* Auto-Neg is enabled.  Auto Speed Detection takes care
 	 * of MAC speed/duplex configuration.  So we only need to
diff --git a/drivers/net/ethernet/intel/e1000e/mac.c b/drivers/net/ethernet/intel/e1000e/mac.c
index f457c5703d0c4..db735644b3121 100644
--- a/drivers/net/ethernet/intel/e1000e/mac.c
+++ b/drivers/net/ethernet/intel/e1000e/mac.c
@@ -450,7 +450,7 @@ s32 e1000e_check_for_copper_link(struct e1000_hw *hw)
 	 * we have already determined whether we have link or not.
 	 */
 	if (!mac->autoneg)
-		return -E1000_ERR_CONFIG;
+		return 1;
 
 	/* Auto-Neg is enabled.  Auto Speed Detection takes care
 	 * of MAC speed/duplex configuration.  So we only need to

From aea3fca005fb45f80869f2e8d56fd4e64c1d1fdb Mon Sep 17 00:00:00 2001
From: Pierre-Yves Kerbrat <pkerbrat@kalray.eu>
Date: Fri, 26 Jan 2018 11:24:12 +0100
Subject: [PATCH 6/6] e1000e: allocate ring descriptors with
 dma_zalloc_coherent

Descriptor rings were not initialized at zero when allocated
When area contained garbage data, it caused skb_over_panic in
e1000_clean_rx_irq (if data had E1000_RXD_STAT_DD bit set)

This patch makes use of dma_zalloc_coherent to make sure the
ring is memset at 0 to prevent the area from containing garbage.

Following is the signature of the panic:
IODDR0@0.0: skbuff: skb_over_panic: text:80407b20 len:64010 put:64010 head:ab46d800 data:ab46d842 tail:0xab47d24c end:0xab46df40 dev:eth0
IODDR0@0.0: BUG: failure at net/core/skbuff.c:105/skb_panic()!
IODDR0@0.0: Kernel panic - not syncing: BUG!
IODDR0@0.0:
IODDR0@0.0: Process swapper/0 (pid: 0, threadinfo=81728000, task=8173cc00 ,cpu: 0)
IODDR0@0.0: SP = <815a1c0c>
IODDR0@0.0: Stack:      00000001
IODDR0@0.0: b2d89800 815e33ac
IODDR0@0.0: ea73c040 00000001
IODDR0@0.0: 60040003 0000fa0a
IODDR0@0.0: 00000002
IODDR0@0.0:
IODDR0@0.0: 804540c0 815a1c70
IODDR0@0.0: b2744000 602ac070
IODDR0@0.0: 815a1c44 b2d89800
IODDR0@0.0: 8173cc00 815a1c08
IODDR0@0.0:
IODDR0@0.0:     00000006
IODDR0@0.0: 815a1b50 00000000
IODDR0@0.0: 80079434 00000001
IODDR0@0.0: ab46df40 b2744000
IODDR0@0.0: b2d89800
IODDR0@0.0:
IODDR0@0.0: 0000fa0a 8045745c
IODDR0@0.0: 815a1c88 0000fa0a
IODDR0@0.0: 80407b20 b2789f80
IODDR0@0.0: 00000005 80407b20
IODDR0@0.0:
IODDR0@0.0:
IODDR0@0.0: Call Trace:
IODDR0@0.0: [<804540bc>] skb_panic+0xa4/0xa8
IODDR0@0.0: [<80079430>] console_unlock+0x2f8/0x6d0
IODDR0@0.0: [<80457458>] skb_put+0xa0/0xc0
IODDR0@0.0: [<80407b1c>] e1000_clean_rx_irq+0x2dc/0x3e8
IODDR0@0.0: [<80407b1c>] e1000_clean_rx_irq+0x2dc/0x3e8
IODDR0@0.0: [<804079c8>] e1000_clean_rx_irq+0x188/0x3e8
IODDR0@0.0: [<80407b1c>] e1000_clean_rx_irq+0x2dc/0x3e8
IODDR0@0.0: [<80468b48>] __dev_kfree_skb_any+0x88/0xa8
IODDR0@0.0: [<804101ac>] e1000e_poll+0x94/0x288
IODDR0@0.0: [<8046e9d4>] net_rx_action+0x19c/0x4e8
IODDR0@0.0:   ...
IODDR0@0.0: Maximum depth to print reached. Use kstack=<maximum_depth_to_print> To specify a custom value (where 0 means to display the full backtrace)
IODDR0@0.0: ---[ end Kernel panic - not syncing: BUG!

Signed-off-by: Pierre-Yves Kerbrat <pkerbrat@kalray.eu>
Signed-off-by: Marius Gligor <mgligor@kalray.eu>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Reviewed-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 9fd4050a91ca5..c0f23446bf26b 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -2323,8 +2323,8 @@ static int e1000_alloc_ring_dma(struct e1000_adapter *adapter,
 {
 	struct pci_dev *pdev = adapter->pdev;
 
-	ring->desc = dma_alloc_coherent(&pdev->dev, ring->size, &ring->dma,
-					GFP_KERNEL);
+	ring->desc = dma_zalloc_coherent(&pdev->dev, ring->size, &ring->dma,
+					 GFP_KERNEL);
 	if (!ring->desc)
 		return -ENOMEM;