From bbac1c94880cb8c7e093718897f4822f3933dd3c Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Mon, 1 Jun 2015 22:48:34 -0700
Subject: [PATCH 01/19] s390/bpf: fix stack allocation

On s390x we have to provide 160 bytes stack space before we can call
the next function. From the 160 bytes that we got from the previous
function we only use 11 * 8 bytes and have 160 - 11 * 8 bytes left.
Currently for BPF we allocate additional 160 - 11 * 8 bytes for the
next function. This is wrong because then the next function only gets:

 (160 - 11 * 8) + (160 - 11 * 8) = 2 * 72 = 144 bytes

Fix this and allocate enough memory for the next function.

Fixes: 054623105728 ("s390/bpf: Add s390x eBPF JIT compiler backend")
Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/s390/net/bpf_jit.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/s390/net/bpf_jit.h b/arch/s390/net/bpf_jit.h
index ba8593a515baa..de156ba3bd71c 100644
--- a/arch/s390/net/bpf_jit.h
+++ b/arch/s390/net/bpf_jit.h
@@ -48,7 +48,9 @@ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
  * We get 160 bytes stack space from calling function, but only use
  * 11 * 8 byte (old backchain + r15 - r6) for storing registers.
  */
-#define STK_OFF (MAX_BPF_STACK + 8 + 4 + 4 + (160 - 11 * 8))
+#define STK_SPACE	(MAX_BPF_STACK + 8 + 4 + 4 + 160)
+#define STK_160_UNUSED	(160 - 11 * 8)
+#define STK_OFF		(STK_SPACE - STK_160_UNUSED)
 #define STK_OFF_TMP	160	/* Offset of tmp buffer on stack */
 #define STK_OFF_HLEN	168	/* Offset of SKB header length on stack */
 

From 88aeca15d637c279171ba441730ef41e4c4ce0ed Mon Sep 17 00:00:00 2001
From: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Date: Mon, 1 Jun 2015 22:48:35 -0700
Subject: [PATCH 02/19] s390/bpf: fix bpf frame pointer setup

Currently the bpf frame pointer is set to the old r15. This is
wrong because of packed stack. Fix this and adjust the frame pointer
to respect packed stack. This now generates a prolog like the following:

 3ff8001c3fa: eb67f0480024   stmg    %r6,%r7,72(%r15)
 3ff8001c400: ebcff0780024   stmg    %r12,%r15,120(%r15)
 3ff8001c406: b904001f       lgr     %r1,%r15      <- load backchain
 3ff8001c40a: 41d0f048       la      %r13,72(%r15) <- load adjusted bfp
 3ff8001c40e: a7fbfd98       aghi    %r15,-616
 3ff8001c412: e310f0980024   stg     %r1,152(%r15) <- save backchain

Fixes: 054623105728 ("s390/bpf: Add s390x eBPF JIT compiler backend")
Signed-off-by: Michael Holzheu <holzheu@linux.vnet.ibm.com>
Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/s390/net/bpf_jit_comp.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 20c146d1251ae..55423d8be5801 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -384,13 +384,16 @@ static void bpf_jit_prologue(struct bpf_jit *jit)
 	}
 	/* Setup stack and backchain */
 	if (jit->seen & SEEN_STACK) {
-		/* lgr %bfp,%r15 (BPF frame pointer) */
-		EMIT4(0xb9040000, BPF_REG_FP, REG_15);
+		if (jit->seen & SEEN_FUNC)
+			/* lgr %w1,%r15 (backchain) */
+			EMIT4(0xb9040000, REG_W1, REG_15);
+		/* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
+		EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
 		/* aghi %r15,-STK_OFF */
 		EMIT4_IMM(0xa70b0000, REG_15, -STK_OFF);
 		if (jit->seen & SEEN_FUNC)
-			/* stg %bfp,152(%r15) (backchain) */
-			EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_FP, REG_0,
+			/* stg %w1,152(%r15) (backchain) */
+			EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
 				      REG_15, 152);
 	}
 	/*

From 640b2b107cec23c754214b62a811465fa8f9257f Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Tue, 2 Jun 2015 14:36:34 +0200
Subject: [PATCH 03/19] openvswitch: disable LRO

Currently, openvswitch tries to disable LRO from the user space. This does
not work correctly when the device added is a vlan interface, though.
Instead of dealing with possibly complex stacked cross name space relations
in the user space, do the same as bridging does and call dev_disable_lro in
the kernel.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Acked-by: Flavio Leitner <fbl@redhat.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/vport-netdev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index 4776282c64175..33e6d6e2908f5 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -125,6 +125,7 @@ static struct vport *netdev_create(const struct vport_parms *parms)
 	if (err)
 		goto error_master_upper_dev_unlink;
 
+	dev_disable_lro(netdev_vport->dev);
 	dev_set_promiscuity(netdev_vport->dev, 1);
 	netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
 	rtnl_unlock();

From 6e540309326188f769e03bb4c6dd8ff6752930c2 Mon Sep 17 00:00:00 2001
From: Shawn Bohrer <sbohrer@rgmadvisors.com>
Date: Wed, 3 Jun 2015 16:27:38 -0500
Subject: [PATCH 04/19] ipv4/udp: Verify multicast group is ours in
 upd_v4_early_demux()

421b3885bf6d56391297844f43fb7154a6396e12 "udp: ipv4: Add udp early
demux" introduced a regression that allowed sockets bound to INADDR_ANY
to receive packets from multicast groups that the socket had not joined.
For example a socket that had joined 224.168.2.9 could also receive
packets from 225.168.2.9 despite not having joined that group if
ip_early_demux is enabled.

Fix this by calling ip_check_mc_rcu() in udp_v4_early_demux() to verify
that the multicast packet is indeed ours.

Signed-off-by: Shawn Bohrer <sbohrer@rgmadvisors.com>
Reported-by: Yurij M. Plotnikov <Yurij.Plotnikov@oktetlabs.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/udp.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1c92ea67baefe..83aa604f9273c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -90,6 +90,7 @@
 #include <linux/socket.h>
 #include <linux/sockios.h>
 #include <linux/igmp.h>
+#include <linux/inetdevice.h>
 #include <linux/in.h>
 #include <linux/errno.h>
 #include <linux/timer.h>
@@ -1960,6 +1961,7 @@ void udp_v4_early_demux(struct sk_buff *skb)
 	struct sock *sk;
 	struct dst_entry *dst;
 	int dif = skb->dev->ifindex;
+	int ours;
 
 	/* validate the packet */
 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr)))
@@ -1969,14 +1971,24 @@ void udp_v4_early_demux(struct sk_buff *skb)
 	uh = udp_hdr(skb);
 
 	if (skb->pkt_type == PACKET_BROADCAST ||
-	    skb->pkt_type == PACKET_MULTICAST)
+	    skb->pkt_type == PACKET_MULTICAST) {
+		struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
+
+		if (!in_dev)
+			return;
+
+		ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
+				       iph->protocol);
+		if (!ours)
+			return;
 		sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
 						   uh->source, iph->saddr, dif);
-	else if (skb->pkt_type == PACKET_HOST)
+	} else if (skb->pkt_type == PACKET_HOST) {
 		sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr,
 					     uh->source, iph->saddr, dif);
-	else
+	} else {
 		return;
+	}
 
 	if (!sk)
 		return;

From 30520831f058cd9d75c0f6b360bc5c5ae49b5f27 Mon Sep 17 00:00:00 2001
From: Anjali Singhai Jain <anjali.singhai@intel.com>
Date: Fri, 8 May 2015 15:35:52 -0700
Subject: [PATCH 05/19] i40e/i40evf: Fix mixed size frags and linearization

This patch fixes a bug where the i40e Tx queue will hang if this
skb is passed to the driver.

With mixed size fragments while using TSO there was a corner case
where we needed to linearize but we were not. This was seen with
iSCSI traffic and could be reproduced with a frag list that looks
like this:

num_frags = 17, gso_segs = 17, hdr_len = 66,
skb_shinfo(skb)->gso_size = 1448
size = 3002, j = 1, frag_size = 2936, num_frags = 17
size = 4268, j = 1, frag_size = 4096, num_frags = 16
size = 5534, j = 1, frag_size = 4096, num_frags = 15
size = 5352, j = 1, frag_size = 4096, num_frags = 14
size = 5170, j = 1, frag_size = 4096, num_frags = 13
size = 3468, j = 1, frag_size = 2576, num_frags = 12
size = 750, j = 1, frag_size = 112, num_frags = 11
size = 862, j = 2, frag_size = 112, num_frags = 10
size = 974, j = 3, frag_size = 112, num_frags = 9
size = 1126, j = 4, frag_size = 152, num_frags = 8
size = 1330, j = 5, frag_size = 204, num_frags = 7
size = 1534, j = 6, frag_size = 204, num_frags = 6
size = 356, j = 1, frag_size = 204, num_frags = 5
size = 560, j = 2, frag_size = 204, num_frags = 4
size = 764, j = 3, frag_size = 204, num_frags = 3
size = 968, j = 4, frag_size = 204, num_frags = 2
size = 1140, j = 5, frag_size = 172, num_frags = 1
result: linearize = 0, j = 6

Change-ID: I79bb1aeab0af255fe2ce28e93672a85d85bf47e8
Signed-off-by: Anjali Singhai Jain <anjali.singhai@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c   | 25 ++++++++-----------
 drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 25 ++++++++-----------
 2 files changed, 20 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 4bd3a80aba829..9d95042d5a0f5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2410,14 +2410,12 @@ static int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
  * i40e_chk_linearize - Check if there are more than 8 fragments per packet
  * @skb:      send buffer
  * @tx_flags: collected send information
- * @hdr_len:  size of the packet header
  *
  * Note: Our HW can't scatter-gather more than 8 fragments to build
  * a packet on the wire and so we need to figure out the cases where we
  * need to linearize the skb.
  **/
-static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags,
-			       const u8 hdr_len)
+static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags)
 {
 	struct skb_frag_struct *frag;
 	bool linearize = false;
@@ -2429,7 +2427,7 @@ static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags,
 	gso_segs = skb_shinfo(skb)->gso_segs;
 
 	if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) {
-		u16 j = 1;
+		u16 j = 0;
 
 		if (num_frags < (I40E_MAX_BUFFER_TXD))
 			goto linearize_chk_done;
@@ -2440,21 +2438,18 @@ static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags,
 			goto linearize_chk_done;
 		}
 		frag = &skb_shinfo(skb)->frags[0];
-		size = hdr_len;
 		/* we might still have more fragments per segment */
 		do {
 			size += skb_frag_size(frag);
 			frag++; j++;
+			if ((size >= skb_shinfo(skb)->gso_size) &&
+			    (j < I40E_MAX_BUFFER_TXD)) {
+				size = (size % skb_shinfo(skb)->gso_size);
+				j = (size) ? 1 : 0;
+			}
 			if (j == I40E_MAX_BUFFER_TXD) {
-				if (size < skb_shinfo(skb)->gso_size) {
-					linearize = true;
-					break;
-				}
-				j = 1;
-				size -= skb_shinfo(skb)->gso_size;
-				if (size)
-					j++;
-				size += hdr_len;
+				linearize = true;
+				break;
 			}
 			num_frags--;
 		} while (num_frags);
@@ -2724,7 +2719,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
 	if (tsyn)
 		tx_flags |= I40E_TX_FLAGS_TSYN;
 
-	if (i40e_chk_linearize(skb, tx_flags, hdr_len))
+	if (i40e_chk_linearize(skb, tx_flags))
 		if (skb_linearize(skb))
 			goto out_drop;
 
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index b077e02a0cc7a..458fbb4210907 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -1619,14 +1619,12 @@ static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
  * i40e_chk_linearize - Check if there are more than 8 fragments per packet
  * @skb:      send buffer
  * @tx_flags: collected send information
- * @hdr_len:  size of the packet header
  *
  * Note: Our HW can't scatter-gather more than 8 fragments to build
  * a packet on the wire and so we need to figure out the cases where we
  * need to linearize the skb.
  **/
-static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags,
-			       const u8 hdr_len)
+static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags)
 {
 	struct skb_frag_struct *frag;
 	bool linearize = false;
@@ -1638,7 +1636,7 @@ static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags,
 	gso_segs = skb_shinfo(skb)->gso_segs;
 
 	if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO)) {
-		u16 j = 1;
+		u16 j = 0;
 
 		if (num_frags < (I40E_MAX_BUFFER_TXD))
 			goto linearize_chk_done;
@@ -1649,21 +1647,18 @@ static bool i40e_chk_linearize(struct sk_buff *skb, u32 tx_flags,
 			goto linearize_chk_done;
 		}
 		frag = &skb_shinfo(skb)->frags[0];
-		size = hdr_len;
 		/* we might still have more fragments per segment */
 		do {
 			size += skb_frag_size(frag);
 			frag++; j++;
+			if ((size >= skb_shinfo(skb)->gso_size) &&
+			    (j < I40E_MAX_BUFFER_TXD)) {
+				size = (size % skb_shinfo(skb)->gso_size);
+				j = (size) ? 1 : 0;
+			}
 			if (j == I40E_MAX_BUFFER_TXD) {
-				if (size < skb_shinfo(skb)->gso_size) {
-					linearize = true;
-					break;
-				}
-				j = 1;
-				size -= skb_shinfo(skb)->gso_size;
-				if (size)
-					j++;
-				size += hdr_len;
+				linearize = true;
+				break;
 			}
 			num_frags--;
 		} while (num_frags);
@@ -1950,7 +1945,7 @@ static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
 	else if (tso)
 		tx_flags |= I40E_TX_FLAGS_TSO;
 
-	if (i40e_chk_linearize(skb, tx_flags, hdr_len))
+	if (i40e_chk_linearize(skb, tx_flags))
 		if (skb_linearize(skb))
 			goto out_drop;
 

From fc60861e9b00388fd11d7995a60bf0b1e61dba93 Mon Sep 17 00:00:00 2001
From: Anjali Singhai Jain <anjali.singhai@intel.com>
Date: Fri, 8 May 2015 15:35:57 -0700
Subject: [PATCH 06/19] i40e: start up in VEPA mode by default

The patch fixes a bug in the default configuration which
prevented a software bridge loaded on the PF interface from
working correctly because broadcast packets are incorrectly
looped back.

Fix the general case, by loading the driver in VEPA mode Until a
VF or VMDq VSI is added. This way loopback on the Main VSI is
turned off until needed and can resolve the issue of unnecessary
reflection for users that do not have VF or VMDq VSIs setup.

The driver must now coordinate the loopback setting for the Flow
Director (FDIR) VSI to make sure it is in sync with the current
VEB or VEPA mode setting.

The user can still switch bridge modes from the bridge commands and
choose to be in VEPA mode with VF VSIs. Because of hardware
requirements, the call to switch to VEB mode when no VF/VMDqs are
present will be rejected.

NOTE: This patch uses BIT_ULL as that is preferred going forward,
a followup patch in the lower priority queue to net-next will fix
up the remaining 1 << usages.

Change-ID: Ib121ddb18fe4b3c4f52e9deda6fcbeb9105683d1
Signed-off-by: Anjali Singhai Jain <anjali.singhai@intel.com>
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Jim Young <james.m.young@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e.h        |  1 +
 .../net/ethernet/intel/i40e/i40e_debugfs.c    |  9 ++++++++
 drivers/net/ethernet/intel/i40e/i40e_main.c   | 21 +++++++++++++++----
 .../ethernet/intel/i40e/i40e_virtchnl_pf.c    | 10 ++++++++-
 4 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 33c35d3b7420f..5d47307121abb 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -317,6 +317,7 @@ struct i40e_pf {
 #endif
 #define I40E_FLAG_PORT_ID_VALID                (u64)(1 << 28)
 #define I40E_FLAG_DCB_CAPABLE                  (u64)(1 << 29)
+#define I40E_FLAG_VEB_MODE_ENABLED		BIT_ULL(40)
 
 	/* tracks features that get auto disabled by errors */
 	u64 auto_disable_flags;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 34170eabca7da..da0faf478af07 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -1021,6 +1021,15 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
 			goto command_write_done;
 		}
 
+		/* By default we are in VEPA mode, if this is the first VF/VMDq
+		 * VSI to be added switch to VEB mode.
+		 */
+		if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
+			pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+			i40e_do_reset_safe(pf,
+					   BIT_ULL(__I40E_PF_RESET_REQUESTED));
+		}
+
 		vsi = i40e_vsi_setup(pf, I40E_VSI_VMDQ2, vsi_seid, 0);
 		if (vsi)
 			dev_info(&pf->pdev->dev, "added VSI %d to relay %d\n",
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index a54c14491e3b6..853eb2f7e558d 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -6097,6 +6097,10 @@ static int i40e_reconstitute_veb(struct i40e_veb *veb)
 	if (ret)
 		goto end_reconstitute;
 
+	if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED)
+		veb->bridge_mode = BRIDGE_MODE_VEB;
+	else
+		veb->bridge_mode = BRIDGE_MODE_VEPA;
 	i40e_config_bridge_mode(veb);
 
 	/* create the remaining VSIs attached to this VEB */
@@ -8031,7 +8035,12 @@ static int i40e_ndo_bridge_setlink(struct net_device *dev,
 		} else if (mode != veb->bridge_mode) {
 			/* Existing HW bridge but different mode needs reset */
 			veb->bridge_mode = mode;
-			i40e_do_reset(pf, (1 << __I40E_PF_RESET_REQUESTED));
+			/* TODO: If no VFs or VMDq VSIs, disallow VEB mode */
+			if (mode == BRIDGE_MODE_VEB)
+				pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+			else
+				pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
+			i40e_do_reset(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED));
 			break;
 		}
 	}
@@ -8343,11 +8352,12 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
 		ctxt.uplink_seid = vsi->uplink_seid;
 		ctxt.connection_type = I40E_AQ_VSI_CONN_TYPE_NORMAL;
 		ctxt.flags = I40E_AQ_VSI_TYPE_PF;
-		if (i40e_is_vsi_uplink_mode_veb(vsi)) {
+		if ((pf->flags & I40E_FLAG_VEB_MODE_ENABLED) &&
+		    (i40e_is_vsi_uplink_mode_veb(vsi))) {
 			ctxt.info.valid_sections |=
-				cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
+			     cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
 			ctxt.info.switch_id =
-				cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
+			   cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
 		}
 		i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, true);
 		break;
@@ -8746,6 +8756,9 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
 					 __func__);
 				return NULL;
 			}
+			/* We come up by default in VEPA mode */
+			veb->bridge_mode = BRIDGE_MODE_VEPA;
+			pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
 			i40e_config_bridge_mode(veb);
 		}
 		for (i = 0; i < I40E_MAX_VEB && !veb; i++) {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 78d1c4ff565e8..4e9376da05182 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1018,11 +1018,19 @@ int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
 {
 	struct i40e_pf *pf = pci_get_drvdata(pdev);
 
-	if (num_vfs)
+	if (num_vfs) {
+		if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
+			pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+			i40e_do_reset_safe(pf,
+					   BIT_ULL(__I40E_PF_RESET_REQUESTED));
+		}
 		return i40e_pci_sriov_enable(pdev, num_vfs);
+	}
 
 	if (!pci_vfs_assigned(pf->pdev)) {
 		i40e_free_vfs(pf);
+		pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
+		i40e_do_reset_safe(pf, BIT_ULL(__I40E_PF_RESET_REQUESTED));
 	} else {
 		dev_warn(&pdev->dev, "Unable to free VFs because some are assigned to VMs.\n");
 		return -EINVAL;

From fa11cb3d16a9b9b296a2b811a49faf1356240348 Mon Sep 17 00:00:00 2001
From: Anjali Singhai Jain <anjali.singhai@intel.com>
Date: Wed, 27 May 2015 12:06:14 -0400
Subject: [PATCH 07/19] i40e: Make sure to be in VEB mode if SRIOV is enabled
 at probe

If SRIOV is enabled we need to be in VEB mode not VEPA mode at probe.
This fixes an NPAR bug when SRIOV is enabled in the BIOS.

Change-ID: Ibf006abafd9a0ca3698ec24848cd771cf345cbbc
Signed-off-by: Anjali Singhai Jain <anjali.singhai@intel.com>
Tested-by: Jim Young <james.m.young@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 853eb2f7e558d..5b5bea159bd53 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -8756,9 +8756,14 @@ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
 					 __func__);
 				return NULL;
 			}
-			/* We come up by default in VEPA mode */
-			veb->bridge_mode = BRIDGE_MODE_VEPA;
-			pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
+			/* We come up by default in VEPA mode if SRIOV is not
+			 * already enabled, in which case we can't force VEPA
+			 * mode.
+			 */
+			if (!(pf->flags & I40E_FLAG_VEB_MODE_ENABLED)) {
+				veb->bridge_mode = BRIDGE_MODE_VEPA;
+				pf->flags &= ~I40E_FLAG_VEB_MODE_ENABLED;
+			}
 			i40e_config_bridge_mode(veb);
 		}
 		for (i = 0; i < I40E_MAX_VEB && !veb; i++) {
@@ -9869,6 +9874,15 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto err_switch_setup;
 	}
 
+#ifdef CONFIG_PCI_IOV
+	/* prep for VF support */
+	if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) &&
+	    (pf->flags & I40E_FLAG_MSIX_ENABLED) &&
+	    !test_bit(__I40E_BAD_EEPROM, &pf->state)) {
+		if (pci_num_vf(pdev))
+			pf->flags |= I40E_FLAG_VEB_MODE_ENABLED;
+	}
+#endif
 	err = i40e_setup_pf_switch(pf, false);
 	if (err) {
 		dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err);

From 6d7954130c8d7100b7aba3c986fc4eefedf1a1ad Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Sat, 6 Jun 2015 22:07:23 +0200
Subject: [PATCH 08/19] rhashtable: add missing import <linux/export.h>

rhashtable uses EXPORT_SYMBOL_GPL() without importing linux/export.h
directly it is only imported indirectly through some other includes.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/rhashtable.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 4396434e47153..8609378e65051 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -26,6 +26,7 @@
 #include <linux/random.h>
 #include <linux/rhashtable.h>
 #include <linux/err.h>
+#include <linux/export.h>
 
 #define HASH_DEFAULT_SIZE	64UL
 #define HASH_MIN_SIZE		4U

From 078b29d7e92e4254b6de16097d0369dde17efe21 Mon Sep 17 00:00:00 2001
From: "Lendacky, Thomas" <Thomas.Lendacky@amd.com>
Date: Fri, 5 Jun 2015 16:02:26 -0500
Subject: [PATCH 09/19] amd-xgbe: Use disable_irq_nosync from within timer
 function

Since the Tx timer function runs in softirq context the driver needs
to call disable_irq_nosync instead of a disable_irq.

Reported-by: Josh Stone <jistone@redhat.com>
Signed-off-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index db84ddcfec846..9fd6c69a8bac3 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -423,7 +423,7 @@ static void xgbe_tx_timer(unsigned long data)
 	if (napi_schedule_prep(napi)) {
 		/* Disable Tx and Rx interrupts */
 		if (pdata->per_channel_irq)
-			disable_irq(channel->dma_irq);
+			disable_irq_nosync(channel->dma_irq);
 		else
 			xgbe_disable_rx_tx_ints(pdata);
 

From 1d7c49037b12016e7056b9f2c990380e2187e766 Mon Sep 17 00:00:00 2001
From: Wilson Kok <wkok@cumulusnetworks.com>
Date: Fri, 5 Jun 2015 00:52:57 -0700
Subject: [PATCH 10/19] bridge: use _bh spinlock variant for br_fdb_update to
 avoid lockup

br_fdb_update() can be called in process context in the following way:
br_fdb_add() -> __br_fdb_add() -> br_fdb_update() (if NTF_USE flag is set)
so we need to use spin_lock_bh because there are softirq users of the
hash_lock. One easy way to reproduce this is to modify the bridge utility
to set NTF_USE, enable stp and then set maxageing to a low value so
br_fdb_cleanup() is called frequently and then just add new entries in
a loop. This happens because br_fdb_cleanup() is called from timer/softirq
context. These locks were _bh before commit f8ae737deea1
("[BRIDGE]: forwarding remove unneeded preempt and bh diasables")
and at the time that commit was correct because br_fdb_update() couldn't be
called from process context, but that changed after commit:
292d1398983f ("bridge: add NTF_USE support")

Signed-off-by: Wilson Kok <wkok@cumulusnetworks.com>
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Fixes: 292d1398983f ("bridge: add NTF_USE support")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index e0670d7054f97..7eacc8ae97799 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -569,7 +569,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 				fdb_notify(br, fdb, RTM_NEWNEIGH);
 		}
 	} else {
-		spin_lock(&br->hash_lock);
+		spin_lock_bh(&br->hash_lock);
 		if (likely(!fdb_find(head, addr, vid))) {
 			fdb = fdb_create(head, source, addr, vid);
 			if (fdb) {
@@ -581,7 +581,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 		/* else  we lose race and someone else inserts
 		 * it first, don't bother updating
 		 */
-		spin_unlock(&br->hash_lock);
+		spin_unlock_bh(&br->hash_lock);
 	}
 }
 

From e51000db4c880165eab06ec0990605f24e75203f Mon Sep 17 00:00:00 2001
From: Sriharsha Basavapatna <sriharsha.basavapatna@avagotech.com>
Date: Fri, 5 Jun 2015 15:33:59 +0530
Subject: [PATCH 11/19] be2net: Replace dma/pci_alloc_coherent() calls with
 dma_zalloc_coherent()

There are several places in the driver (all in control paths) where
coherent dma memory is being allocated using either dma_alloc_coherent()
or the deprecated pci_alloc_consistent(). All these calls should be
changed to use dma_zalloc_coherent() to avoid uninitialized fields in
data structures backed by this memory.

Reported-by: Joerg Roedel <jroedel@suse.de>
Tested-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@avagotech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/emulex/benet/be_cmds.c   | 87 +++++++++++--------
 .../net/ethernet/emulex/benet/be_ethtool.c    | 18 ++--
 drivers/net/ethernet/emulex/benet/be_main.c   | 15 ++--
 3 files changed, 67 insertions(+), 53 deletions(-)

diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index fb140faeafb1c..c5e1d0ac75f90 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -1720,9 +1720,9 @@ int be_cmd_get_regs(struct be_adapter *adapter, u32 buf_len, void *buf)
 	total_size = buf_len;
 
 	get_fat_cmd.size = sizeof(struct be_cmd_req_get_fat) + 60*1024;
-	get_fat_cmd.va = pci_alloc_consistent(adapter->pdev,
-					      get_fat_cmd.size,
-					      &get_fat_cmd.dma);
+	get_fat_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
+					     get_fat_cmd.size,
+					     &get_fat_cmd.dma, GFP_ATOMIC);
 	if (!get_fat_cmd.va) {
 		dev_err(&adapter->pdev->dev,
 			"Memory allocation failure while reading FAT data\n");
@@ -1767,8 +1767,8 @@ int be_cmd_get_regs(struct be_adapter *adapter, u32 buf_len, void *buf)
 		log_offset += buf_size;
 	}
 err:
-	pci_free_consistent(adapter->pdev, get_fat_cmd.size,
-			    get_fat_cmd.va, get_fat_cmd.dma);
+	dma_free_coherent(&adapter->pdev->dev, get_fat_cmd.size,
+			  get_fat_cmd.va, get_fat_cmd.dma);
 	spin_unlock_bh(&adapter->mcc_lock);
 	return status;
 }
@@ -2215,12 +2215,12 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
 		return -EINVAL;
 
 	cmd.size = sizeof(struct be_cmd_resp_port_type);
-	cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma);
+	cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
+				     GFP_ATOMIC);
 	if (!cmd.va) {
 		dev_err(&adapter->pdev->dev, "Memory allocation failed\n");
 		return -ENOMEM;
 	}
-	memset(cmd.va, 0, cmd.size);
 
 	spin_lock_bh(&adapter->mcc_lock);
 
@@ -2245,7 +2245,7 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
 	}
 err:
 	spin_unlock_bh(&adapter->mcc_lock);
-	pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma);
+	dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma);
 	return status;
 }
 
@@ -2720,7 +2720,8 @@ int be_cmd_get_phy_info(struct be_adapter *adapter)
 		goto err;
 	}
 	cmd.size = sizeof(struct be_cmd_req_get_phy_info);
-	cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma);
+	cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
+				     GFP_ATOMIC);
 	if (!cmd.va) {
 		dev_err(&adapter->pdev->dev, "Memory alloc failure\n");
 		status = -ENOMEM;
@@ -2754,7 +2755,7 @@ int be_cmd_get_phy_info(struct be_adapter *adapter)
 				BE_SUPPORTED_SPEED_1GBPS;
 		}
 	}
-	pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma);
+	dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma);
 err:
 	spin_unlock_bh(&adapter->mcc_lock);
 	return status;
@@ -2805,8 +2806,9 @@ int be_cmd_get_cntl_attributes(struct be_adapter *adapter)
 
 	memset(&attribs_cmd, 0, sizeof(struct be_dma_mem));
 	attribs_cmd.size = sizeof(struct be_cmd_resp_cntl_attribs);
-	attribs_cmd.va = pci_alloc_consistent(adapter->pdev, attribs_cmd.size,
-					      &attribs_cmd.dma);
+	attribs_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
+					     attribs_cmd.size,
+					     &attribs_cmd.dma, GFP_ATOMIC);
 	if (!attribs_cmd.va) {
 		dev_err(&adapter->pdev->dev, "Memory allocation failure\n");
 		status = -ENOMEM;
@@ -2833,8 +2835,8 @@ int be_cmd_get_cntl_attributes(struct be_adapter *adapter)
 err:
 	mutex_unlock(&adapter->mbox_lock);
 	if (attribs_cmd.va)
-		pci_free_consistent(adapter->pdev, attribs_cmd.size,
-				    attribs_cmd.va, attribs_cmd.dma);
+		dma_free_coherent(&adapter->pdev->dev, attribs_cmd.size,
+				  attribs_cmd.va, attribs_cmd.dma);
 	return status;
 }
 
@@ -2972,9 +2974,10 @@ int be_cmd_get_mac_from_list(struct be_adapter *adapter, u8 *mac,
 
 	memset(&get_mac_list_cmd, 0, sizeof(struct be_dma_mem));
 	get_mac_list_cmd.size = sizeof(struct be_cmd_resp_get_mac_list);
-	get_mac_list_cmd.va = pci_alloc_consistent(adapter->pdev,
-						   get_mac_list_cmd.size,
-						   &get_mac_list_cmd.dma);
+	get_mac_list_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
+						  get_mac_list_cmd.size,
+						  &get_mac_list_cmd.dma,
+						  GFP_ATOMIC);
 
 	if (!get_mac_list_cmd.va) {
 		dev_err(&adapter->pdev->dev,
@@ -3047,8 +3050,8 @@ int be_cmd_get_mac_from_list(struct be_adapter *adapter, u8 *mac,
 
 out:
 	spin_unlock_bh(&adapter->mcc_lock);
-	pci_free_consistent(adapter->pdev, get_mac_list_cmd.size,
-			    get_mac_list_cmd.va, get_mac_list_cmd.dma);
+	dma_free_coherent(&adapter->pdev->dev, get_mac_list_cmd.size,
+			  get_mac_list_cmd.va, get_mac_list_cmd.dma);
 	return status;
 }
 
@@ -3101,8 +3104,8 @@ int be_cmd_set_mac_list(struct be_adapter *adapter, u8 *mac_array,
 
 	memset(&cmd, 0, sizeof(struct be_dma_mem));
 	cmd.size = sizeof(struct be_cmd_req_set_mac_list);
-	cmd.va = dma_alloc_coherent(&adapter->pdev->dev, cmd.size,
-				    &cmd.dma, GFP_KERNEL);
+	cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
+				     GFP_KERNEL);
 	if (!cmd.va)
 		return -ENOMEM;
 
@@ -3291,7 +3294,8 @@ int be_cmd_get_acpi_wol_cap(struct be_adapter *adapter)
 
 	memset(&cmd, 0, sizeof(struct be_dma_mem));
 	cmd.size = sizeof(struct be_cmd_resp_acpi_wol_magic_config_v1);
-	cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma);
+	cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
+				     GFP_ATOMIC);
 	if (!cmd.va) {
 		dev_err(&adapter->pdev->dev, "Memory allocation failure\n");
 		status = -ENOMEM;
@@ -3326,7 +3330,8 @@ int be_cmd_get_acpi_wol_cap(struct be_adapter *adapter)
 err:
 	mutex_unlock(&adapter->mbox_lock);
 	if (cmd.va)
-		pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma);
+		dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va,
+				  cmd.dma);
 	return status;
 
 }
@@ -3340,8 +3345,9 @@ int be_cmd_set_fw_log_level(struct be_adapter *adapter, u32 level)
 
 	memset(&extfat_cmd, 0, sizeof(struct be_dma_mem));
 	extfat_cmd.size = sizeof(struct be_cmd_resp_get_ext_fat_caps);
-	extfat_cmd.va = pci_alloc_consistent(adapter->pdev, extfat_cmd.size,
-					     &extfat_cmd.dma);
+	extfat_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
+					    extfat_cmd.size, &extfat_cmd.dma,
+					    GFP_ATOMIC);
 	if (!extfat_cmd.va)
 		return -ENOMEM;
 
@@ -3363,8 +3369,8 @@ int be_cmd_set_fw_log_level(struct be_adapter *adapter, u32 level)
 
 	status = be_cmd_set_ext_fat_capabilites(adapter, &extfat_cmd, cfgs);
 err:
-	pci_free_consistent(adapter->pdev, extfat_cmd.size, extfat_cmd.va,
-			    extfat_cmd.dma);
+	dma_free_coherent(&adapter->pdev->dev, extfat_cmd.size, extfat_cmd.va,
+			  extfat_cmd.dma);
 	return status;
 }
 
@@ -3377,8 +3383,9 @@ int be_cmd_get_fw_log_level(struct be_adapter *adapter)
 
 	memset(&extfat_cmd, 0, sizeof(struct be_dma_mem));
 	extfat_cmd.size = sizeof(struct be_cmd_resp_get_ext_fat_caps);
-	extfat_cmd.va = pci_alloc_consistent(adapter->pdev, extfat_cmd.size,
-					     &extfat_cmd.dma);
+	extfat_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
+					    extfat_cmd.size, &extfat_cmd.dma,
+					    GFP_ATOMIC);
 
 	if (!extfat_cmd.va) {
 		dev_err(&adapter->pdev->dev, "%s: Memory allocation failure\n",
@@ -3396,8 +3403,8 @@ int be_cmd_get_fw_log_level(struct be_adapter *adapter)
 				level = cfgs->module[0].trace_lvl[j].dbg_lvl;
 		}
 	}
-	pci_free_consistent(adapter->pdev, extfat_cmd.size, extfat_cmd.va,
-			    extfat_cmd.dma);
+	dma_free_coherent(&adapter->pdev->dev, extfat_cmd.size, extfat_cmd.va,
+			  extfat_cmd.dma);
 err:
 	return level;
 }
@@ -3595,7 +3602,8 @@ int be_cmd_get_func_config(struct be_adapter *adapter, struct be_resources *res)
 
 	memset(&cmd, 0, sizeof(struct be_dma_mem));
 	cmd.size = sizeof(struct be_cmd_resp_get_func_config);
-	cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma);
+	cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
+				     GFP_ATOMIC);
 	if (!cmd.va) {
 		dev_err(&adapter->pdev->dev, "Memory alloc failure\n");
 		status = -ENOMEM;
@@ -3635,7 +3643,8 @@ int be_cmd_get_func_config(struct be_adapter *adapter, struct be_resources *res)
 err:
 	mutex_unlock(&adapter->mbox_lock);
 	if (cmd.va)
-		pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma);
+		dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va,
+				  cmd.dma);
 	return status;
 }
 
@@ -3656,7 +3665,8 @@ int be_cmd_get_profile_config(struct be_adapter *adapter,
 
 	memset(&cmd, 0, sizeof(struct be_dma_mem));
 	cmd.size = sizeof(struct be_cmd_resp_get_profile_config);
-	cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma);
+	cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
+				     GFP_ATOMIC);
 	if (!cmd.va)
 		return -ENOMEM;
 
@@ -3702,7 +3712,8 @@ int be_cmd_get_profile_config(struct be_adapter *adapter,
 		res->vf_if_cap_flags = vf_res->cap_flags;
 err:
 	if (cmd.va)
-		pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma);
+		dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va,
+				  cmd.dma);
 	return status;
 }
 
@@ -3717,7 +3728,8 @@ static int be_cmd_set_profile_config(struct be_adapter *adapter, void *desc,
 
 	memset(&cmd, 0, sizeof(struct be_dma_mem));
 	cmd.size = sizeof(struct be_cmd_req_set_profile_config);
-	cmd.va = pci_alloc_consistent(adapter->pdev, cmd.size, &cmd.dma);
+	cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, cmd.size, &cmd.dma,
+				     GFP_ATOMIC);
 	if (!cmd.va)
 		return -ENOMEM;
 
@@ -3733,7 +3745,8 @@ static int be_cmd_set_profile_config(struct be_adapter *adapter, void *desc,
 	status = be_cmd_notify_wait(adapter, &wrb);
 
 	if (cmd.va)
-		pci_free_consistent(adapter->pdev, cmd.size, cmd.va, cmd.dma);
+		dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va,
+				  cmd.dma);
 	return status;
 }
 
diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index b765c24625bf5..2835dee5dc393 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -264,8 +264,8 @@ static int lancer_cmd_read_file(struct be_adapter *adapter, u8 *file_name,
 	int status = 0;
 
 	read_cmd.size = LANCER_READ_FILE_CHUNK;
-	read_cmd.va = pci_alloc_consistent(adapter->pdev, read_cmd.size,
-					   &read_cmd.dma);
+	read_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev, read_cmd.size,
+					  &read_cmd.dma, GFP_ATOMIC);
 
 	if (!read_cmd.va) {
 		dev_err(&adapter->pdev->dev,
@@ -289,8 +289,8 @@ static int lancer_cmd_read_file(struct be_adapter *adapter, u8 *file_name,
 			break;
 		}
 	}
-	pci_free_consistent(adapter->pdev, read_cmd.size, read_cmd.va,
-			    read_cmd.dma);
+	dma_free_coherent(&adapter->pdev->dev, read_cmd.size, read_cmd.va,
+			  read_cmd.dma);
 
 	return status;
 }
@@ -818,8 +818,9 @@ static int be_test_ddr_dma(struct be_adapter *adapter)
 	};
 
 	ddrdma_cmd.size = sizeof(struct be_cmd_req_ddrdma_test);
-	ddrdma_cmd.va = dma_alloc_coherent(&adapter->pdev->dev, ddrdma_cmd.size,
-					   &ddrdma_cmd.dma, GFP_KERNEL);
+	ddrdma_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
+					    ddrdma_cmd.size, &ddrdma_cmd.dma,
+					    GFP_KERNEL);
 	if (!ddrdma_cmd.va)
 		return -ENOMEM;
 
@@ -941,8 +942,9 @@ static int be_read_eeprom(struct net_device *netdev,
 
 	memset(&eeprom_cmd, 0, sizeof(struct be_dma_mem));
 	eeprom_cmd.size = sizeof(struct be_cmd_req_seeprom_read);
-	eeprom_cmd.va = dma_alloc_coherent(&adapter->pdev->dev, eeprom_cmd.size,
-					   &eeprom_cmd.dma, GFP_KERNEL);
+	eeprom_cmd.va = dma_zalloc_coherent(&adapter->pdev->dev,
+					    eeprom_cmd.size, &eeprom_cmd.dma,
+					    GFP_KERNEL);
 
 	if (!eeprom_cmd.va)
 		return -ENOMEM;
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 6f9ffb9026cd5..e43cc8a73ea7e 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -4605,8 +4605,8 @@ static int lancer_fw_download(struct be_adapter *adapter,
 
 	flash_cmd.size = sizeof(struct lancer_cmd_req_write_object)
 				+ LANCER_FW_DOWNLOAD_CHUNK;
-	flash_cmd.va = dma_alloc_coherent(dev, flash_cmd.size,
-					  &flash_cmd.dma, GFP_KERNEL);
+	flash_cmd.va = dma_zalloc_coherent(dev, flash_cmd.size,
+					   &flash_cmd.dma, GFP_KERNEL);
 	if (!flash_cmd.va)
 		return -ENOMEM;
 
@@ -4739,8 +4739,8 @@ static int be_fw_download(struct be_adapter *adapter, const struct firmware* fw)
 	}
 
 	flash_cmd.size = sizeof(struct be_cmd_write_flashrom);
-	flash_cmd.va = dma_alloc_coherent(dev, flash_cmd.size, &flash_cmd.dma,
-					  GFP_KERNEL);
+	flash_cmd.va = dma_zalloc_coherent(dev, flash_cmd.size, &flash_cmd.dma,
+					   GFP_KERNEL);
 	if (!flash_cmd.va)
 		return -ENOMEM;
 
@@ -5291,16 +5291,15 @@ static int be_drv_init(struct be_adapter *adapter)
 	int status = 0;
 
 	mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
-	mbox_mem_alloc->va = dma_alloc_coherent(dev, mbox_mem_alloc->size,
-						&mbox_mem_alloc->dma,
-						GFP_KERNEL);
+	mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
+						 &mbox_mem_alloc->dma,
+						 GFP_KERNEL);
 	if (!mbox_mem_alloc->va)
 		return -ENOMEM;
 
 	mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
 	mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
 	mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
-	memset(mbox_mem_align->va, 0, sizeof(struct be_mcc_mailbox));
 
 	rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
 	rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,

From 25cc8f0763c972911b1a65099cd10d9f8a45a7b0 Mon Sep 17 00:00:00 2001
From: Robert Shearman <rshearma@brocade.com>
Date: Fri, 5 Jun 2015 18:54:45 +0100
Subject: [PATCH 12/19] mpls: fix possible use after free of device

The mpls device is used in an RCU read context without a lock being
held. As the memory is freed without waiting for the RCU grace period
to elapse, the freed memory could still be in use.

Address this by using kfree_rcu to free the memory for the mpls device
after the RCU grace period has elapsed.

Fixes: 03c57747a702 ("mpls: Per-device MPLS state")
Signed-off-by: Robert Shearman <rshearma@brocade.com>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mpls/af_mpls.c  | 2 +-
 net/mpls/internal.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 7b3f732269e43..bff427f319247 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -541,7 +541,7 @@ static void mpls_ifdown(struct net_device *dev)
 
 	RCU_INIT_POINTER(dev->mpls_ptr, NULL);
 
-	kfree(mdev);
+	kfree_rcu(mdev, rcu);
 }
 
 static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index b064c345042c1..8cabeb5a1cb92 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -16,6 +16,7 @@ struct mpls_dev {
 	int			input_enabled;
 
 	struct ctl_table_header *sysctl;
+	struct rcu_head		rcu;
 };
 
 struct sk_buff;

From 7ff46e79fb7df5b09c46c36857929fdf039f8b31 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 7 Jun 2015 19:43:47 -0700
Subject: [PATCH 13/19] Revert "bridge: use _bh spinlock variant for
 br_fdb_update to avoid lockup"

This reverts commit 1d7c49037b12016e7056b9f2c990380e2187e766.

Nikolay Aleksandrov has a better version of this fix.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 7eacc8ae97799..e0670d7054f97 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -569,7 +569,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 				fdb_notify(br, fdb, RTM_NEWNEIGH);
 		}
 	} else {
-		spin_lock_bh(&br->hash_lock);
+		spin_lock(&br->hash_lock);
 		if (likely(!fdb_find(head, addr, vid))) {
 			fdb = fdb_create(head, source, addr, vid);
 			if (fdb) {
@@ -581,7 +581,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 		/* else  we lose race and someone else inserts
 		 * it first, don't bother updating
 		 */
-		spin_unlock_bh(&br->hash_lock);
+		spin_unlock(&br->hash_lock);
 	}
 }
 

From c4c832f89dc468cf11dc0dd17206bace44526651 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Sat, 6 Jun 2015 06:49:00 -0700
Subject: [PATCH 14/19] bridge: disable softirqs around br_fdb_update to avoid
 lockup

br_fdb_update() can be called in process context in the following way:
br_fdb_add() -> __br_fdb_add() -> br_fdb_update() (if NTF_USE flag is set)
so we need to disable softirqs because there are softirq users of the
hash_lock. One easy way to reproduce this is to modify the bridge utility
to set NTF_USE, enable stp and then set maxageing to a low value so
br_fdb_cleanup() is called frequently and then just add new entries in
a loop. This happens because br_fdb_cleanup() is called from timer/softirq
context. The spin locks in br_fdb_update were _bh before commit f8ae737deea1
("[BRIDGE]: forwarding remove unneeded preempt and bh diasables")
and at the time that commit was correct because br_fdb_update() couldn't be
called from process context, but that changed after commit:
292d1398983f ("bridge: add NTF_USE support")
Using local_bh_disable/enable around br_fdb_update() allows us to keep
using the spin_lock/unlock in br_fdb_update for the fast-path.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Fixes: 292d1398983f ("bridge: add NTF_USE support")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_fdb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index e0670d7054f97..659fb96672e41 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -796,9 +796,11 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p,
 	int err = 0;
 
 	if (ndm->ndm_flags & NTF_USE) {
+		local_bh_disable();
 		rcu_read_lock();
 		br_fdb_update(p->br, p, addr, vid, true);
 		rcu_read_unlock();
+		local_bh_enable();
 	} else {
 		spin_lock_bh(&p->br->hash_lock);
 		err = fdb_add_entry(p, addr, ndm->ndm_state,

From 1489bdeeae1a47171926e255956c9fc251db13a0 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Sun, 7 Jun 2015 14:11:48 +0200
Subject: [PATCH 15/19] b44: call netif_napi_del()

When the driver gets unregistered a call to netif_napi_del() was
missing, this all was also missing in the error paths of
b44_init_one().

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/b44.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index 77363d6805321..a3b1c07ae0af0 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -2464,6 +2464,7 @@ static int b44_init_one(struct ssb_device *sdev,
 	ssb_bus_may_powerdown(sdev->bus);
 
 err_out_free_dev:
+	netif_napi_del(&bp->napi);
 	free_netdev(dev);
 
 out:
@@ -2480,6 +2481,7 @@ static void b44_remove_one(struct ssb_device *sdev)
 		b44_unregister_phy_one(bp);
 	ssb_device_disable(sdev, 0);
 	ssb_bus_may_powerdown(sdev->bus);
+	netif_napi_del(&bp->napi);
 	free_netdev(dev);
 	ssb_pcihost_set_power_state(sdev, PCI_D3hot);
 	ssb_set_drvdata(sdev, NULL);

From 27e41fcfa6b326ad44eee7e0b1930d080b270895 Mon Sep 17 00:00:00 2001
From: Robert Shearman <rshearma@brocade.com>
Date: Fri, 5 Jun 2015 18:51:54 +0100
Subject: [PATCH 16/19] ipv6: fix possible use after free of dev stats

The memory pointed to by idev->stats.icmpv6msgdev,
idev->stats.icmpv6dev and idev->stats.ipv6 can each be used in an RCU
read context without taking a reference on idev. For example, through
IP6_*_STATS_* calls in ip6_rcv. These memory blocks are freed without
waiting for an RCU grace period to elapse. This could lead to the
memory being written to after it has been freed.

Fix this by using call_rcu to free the memory used for stats, as well
as idev after an RCU grace period has elapsed.

Signed-off-by: Robert Shearman <rshearma@brocade.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf_core.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index d873ceea86e6c..ca09bf49ac680 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -133,6 +133,14 @@ static void snmp6_free_dev(struct inet6_dev *idev)
 	free_percpu(idev->stats.ipv6);
 }
 
+static void in6_dev_finish_destroy_rcu(struct rcu_head *head)
+{
+	struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu);
+
+	snmp6_free_dev(idev);
+	kfree(idev);
+}
+
 /* Nobody refers to this device, we may destroy it. */
 
 void in6_dev_finish_destroy(struct inet6_dev *idev)
@@ -151,7 +159,6 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
 		pr_warn("Freeing alive inet6 device %p\n", idev);
 		return;
 	}
-	snmp6_free_dev(idev);
-	kfree_rcu(idev, rcu);
+	call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu);
 }
 EXPORT_SYMBOL(in6_dev_finish_destroy);

From 0243508edd317ff1fa63b495643a7c192fbfcd92 Mon Sep 17 00:00:00 2001
From: Josh Hunt <johunt@akamai.com>
Date: Mon, 8 Jun 2015 12:00:59 -0400
Subject: [PATCH 17/19] ipv6: Fix protocol resubmission

UDP encapsulation is broken on IPv6. This is because the logic to resubmit
the nexthdr is inverted, checking for a ret value > 0 instead of < 0. Also,
the resubmit label is in the wrong position since we already get the
nexthdr value when performing decapsulation. In addition the skb pull is no
longer necessary either.

This changes the return value check to look for < 0, using it for the
nexthdr on the next iteration, and moves the resubmit label to the proper
location.

With these changes the v6 code now matches what we do in the v4 ip input
code wrt resubmitting when decapsulating.

Signed-off-by: Josh Hunt <johunt@akamai.com>
Acked-by: "Tom Herbert" <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_input.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index f2e464eba5efd..41a73da371a92 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -212,13 +212,13 @@ static int ip6_input_finish(struct sock *sk, struct sk_buff *skb)
 	 */
 
 	rcu_read_lock();
-resubmit:
 	idev = ip6_dst_idev(skb_dst(skb));
 	if (!pskb_pull(skb, skb_transport_offset(skb)))
 		goto discard;
 	nhoff = IP6CB(skb)->nhoff;
 	nexthdr = skb_network_header(skb)[nhoff];
 
+resubmit:
 	raw = raw6_local_deliver(skb, nexthdr);
 	ipprot = rcu_dereference(inet6_protos[nexthdr]);
 	if (ipprot) {
@@ -246,10 +246,12 @@ static int ip6_input_finish(struct sock *sk, struct sk_buff *skb)
 			goto discard;
 
 		ret = ipprot->handler(skb);
-		if (ret > 0)
+		if (ret < 0) {
+			nexthdr = -ret;
 			goto resubmit;
-		else if (ret == 0)
+		} else if (ret == 0) {
 			IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS);
+		}
 	} else {
 		if (!raw) {
 			if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {

From afe3f907d20f39c0eaf81f2baec247ba672f34a9 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 8 Jun 2015 10:47:57 -0700
Subject: [PATCH 18/19] net: bcmgenet: power on MII block for all MII modes

The RGMII block is currently only powered on when using RGMII or
RGMII_NO_ID, which is not correct when using the GENET interface in MII
or Reverse MII modes. We always need to power on the RGMII interface for
this block to properly work, regardless of the MII mode in which we
operate.

Fixes: aa09677cba423 ("net: bcmgenet: add MDIO routines")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmmii.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index e7651b3c6c576..420949cc55aab 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -299,9 +299,6 @@ int bcmgenet_mii_config(struct net_device *dev, bool init)
 			phy_name = "external RGMII (no delay)";
 		else
 			phy_name = "external RGMII (TX delay)";
-		reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL);
-		reg |= RGMII_MODE_EN | id_mode_dis;
-		bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL);
 		bcmgenet_sys_writel(priv,
 				    PORT_MODE_EXT_GPHY, SYS_PORT_CTRL);
 		break;
@@ -310,6 +307,15 @@ int bcmgenet_mii_config(struct net_device *dev, bool init)
 		return -EINVAL;
 	}
 
+	/* This is an external PHY (xMII), so we need to enable the RGMII
+	 * block for the interface to work
+	 */
+	if (priv->ext_phy) {
+		reg = bcmgenet_ext_readl(priv, EXT_RGMII_OOB_CTRL);
+		reg |= RGMII_MODE_EN | id_mode_dis;
+		bcmgenet_ext_writel(priv, reg, EXT_RGMII_OOB_CTRL);
+	}
+
 	if (init)
 		dev_info(kdev, "configuring instance for %s\n", phy_name);
 

From bbbf2df0039d31c6a0a9708ce4fe220a54bd5379 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 8 Jun 2015 11:53:08 -0400
Subject: [PATCH 19/19] net: replace last open coded skb_orphan_frags with
 function call

Commit 70008aa50e92 ("skbuff: convert to skb_orphan_frags") replaced
open coded tests of SKBTX_DEV_ZEROCOPY and skb_copy_ubufs with calls
to helper function skb_orphan_frags. Apply that to the last remaining
open coded site.

Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 2c1c67fad64d5..aa82f9ab6a36d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1718,15 +1718,8 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable);
 
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
 {
-	if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) {
-		if (skb_copy_ubufs(skb, GFP_ATOMIC)) {
-			atomic_long_inc(&dev->rx_dropped);
-			kfree_skb(skb);
-			return NET_RX_DROP;
-		}
-	}
-
-	if (unlikely(!is_skb_forwardable(dev, skb))) {
+	if (skb_orphan_frags(skb, GFP_ATOMIC) ||
+	    unlikely(!is_skb_forwardable(dev, skb))) {
 		atomic_long_inc(&dev->rx_dropped);
 		kfree_skb(skb);
 		return NET_RX_DROP;