From 7585db5b763975f7bfb00215fef1663df795c524 Mon Sep 17 00:00:00 2001
From: "H. Nikolaus Schaller" <hns@goldelico.com>
Date: Wed, 28 Oct 2015 19:00:26 +0100
Subject: [PATCH 001/201] ARM: 8449/1: fix bug in vdsomunge swab32 macro

commit 38850d786a799c3ff2de0dc1980902c3263698dc upstream.

Commit 8a603f91cc48 ("ARM: 8445/1: fix vdsomunge not to depend on
glibc specific byteswap.h") unfortunately introduced a bug created but
not found during discussion and patch simplification.

Reported-by: Efraim Yawitz <efraim.yawitz@gmail.com>
Signed-off-by: H. Nikolaus Schaller <hns@goldelico.com>
Fixes: 8a603f91cc48 ("ARM: 8445/1: fix vdsomunge not to depend on glibc specific byteswap.h")
Signed-off-by: Nathan Lynch <nathan_lynch@mentor.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/vdso/vdsomunge.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/vdso/vdsomunge.c b/arch/arm/vdso/vdsomunge.c
index 0cebd98cd88c3..f6455273b2f87 100644
--- a/arch/arm/vdso/vdsomunge.c
+++ b/arch/arm/vdso/vdsomunge.c
@@ -66,7 +66,7 @@
 	((((x) & 0x000000ff) << 24) | \
 	 (((x) & 0x0000ff00) <<  8) | \
 	 (((x) & 0x00ff0000) >>  8) | \
-	 (((x) & 0xff000000) << 24))
+	 (((x) & 0xff000000) >> 24))
 
 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 #define HOST_ORDER ELFDATA2LSB

From 0bbb0285d4a496282e11b8619ee10f449e90f39b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Thu, 22 Oct 2015 14:24:24 +0200
Subject: [PATCH 002/201] USB: qcserial: add Sierra Wireless MC74xx/EM74xx
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit f504ab1888026d15b5be8f9c262bf4ae9cacd177 upstream.

New device IDs shamelessly lifted from the vendor driver.

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Acked-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/qcserial.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c
index ebcec8cda8584..f49d262e926ba 100644
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c
@@ -153,6 +153,8 @@ static const struct usb_device_id id_table[] = {
 	{DEVICE_SWI(0x1199, 0x9056)},	/* Sierra Wireless Modem */
 	{DEVICE_SWI(0x1199, 0x9060)},	/* Sierra Wireless Modem */
 	{DEVICE_SWI(0x1199, 0x9061)},	/* Sierra Wireless Modem */
+	{DEVICE_SWI(0x1199, 0x9070)},	/* Sierra Wireless MC74xx/EM74xx */
+	{DEVICE_SWI(0x1199, 0x9071)},	/* Sierra Wireless MC74xx/EM74xx */
 	{DEVICE_SWI(0x413c, 0x81a2)},	/* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */
 	{DEVICE_SWI(0x413c, 0x81a3)},	/* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */
 	{DEVICE_SWI(0x413c, 0x81a4)},	/* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */

From 1f9ba7bc8b7ede093b8083cbd4f9bf1ddc36f324 Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Wed, 28 Oct 2015 13:09:53 -0400
Subject: [PATCH 003/201] tipc: linearize arriving NAME_DISTR and LINK_PROTO
 buffers

[ Upstream commit 5cbb28a4bf65c7e4daa6c25b651fed8eb888c620 ]

Testing of the new UDP bearer has revealed that reception of
NAME_DISTRIBUTOR, LINK_PROTOCOL/RESET and LINK_PROTOCOL/ACTIVATE
message buffers is not prepared for the case that those may be
non-linear.

We now linearize all such buffers before they are delivered up to the
generic reception layer.

In order for the commit to apply cleanly to 'net' and 'stable', we do
the change in the function tipc_udp_recv() for now. Later, we will post
a commit to 'net-next' moving the linearization to generic code, in
tipc_named_rcv() and tipc_link_proto_rcv().

Fixes: commit d0f91938bede ("tipc: add ip/udp media type")
Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/tipc/udp_media.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index 6e648d90297a9..cd7c5f131e722 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -48,6 +48,7 @@
 #include <linux/tipc_netlink.h>
 #include "core.h"
 #include "bearer.h"
+#include "msg.h"
 
 /* IANA assigned UDP port */
 #define UDP_PORT_DEFAULT	6118
@@ -222,6 +223,10 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct udp_bearer *ub;
 	struct tipc_bearer *b;
+	int usr = msg_user(buf_msg(skb));
+
+	if ((usr == LINK_PROTOCOL) || (usr == NAME_DISTRIBUTOR))
+		skb_linearize(skb);
 
 	ub = rcu_dereference_sk_user_data(sk);
 	if (!ub) {

From cfb2f132a8fad8c3b81f023d3259c3964a22c471 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 29 Oct 2015 18:11:35 -0700
Subject: [PATCH 004/201] net: bcmgenet: Software reset EPHY after power on

[ Upstream commit 5dbebbb44a6ad94aab2cd1a46f7676f255403f64 ]

The EPHY on GENET v1->v3 is extremely finicky, and will show occasional
failures based on the timing and reset sequence, ranging from duplicate
packets, to extremely high latencies.

Perform an additional software reset, and re-configuration to make sure it is
in a consistent and working state.

Fixes: 6ac3ce8295e6 ("net: bcmgenet: Remove excessive PHY reset")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c |  4 +++-
 drivers/net/ethernet/broadcom/genet/bcmgenet.h |  1 +
 drivers/net/ethernet/broadcom/genet/bcmmii.c   | 18 ++++++++++++++++++
 3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 1805541b4240e..5e3cd76cb69bd 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -907,8 +907,10 @@ static void bcmgenet_power_up(struct bcmgenet_priv *priv,
 	}
 
 	bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT);
-	if (mode == GENET_POWER_PASSIVE)
+	if (mode == GENET_POWER_PASSIVE) {
 		bcmgenet_phy_power_set(priv->dev, true);
+		bcmgenet_mii_reset(priv->dev);
+	}
 }
 
 /* ioctl handle special commands that are not present in ethtool. */
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 7299d10754226..c739f7ebc9929 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -674,6 +674,7 @@ int bcmgenet_mii_init(struct net_device *dev);
 int bcmgenet_mii_config(struct net_device *dev);
 int bcmgenet_mii_probe(struct net_device *dev);
 void bcmgenet_mii_exit(struct net_device *dev);
+void bcmgenet_mii_reset(struct net_device *dev);
 void bcmgenet_phy_power_set(struct net_device *dev, bool enable);
 void bcmgenet_mii_setup(struct net_device *dev);
 
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index c8affad76f368..8bdfe53754ba8 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -163,6 +163,7 @@ void bcmgenet_mii_setup(struct net_device *dev)
 	phy_print_status(phydev);
 }
 
+
 static int bcmgenet_fixed_phy_link_update(struct net_device *dev,
 					  struct fixed_phy_status *status)
 {
@@ -172,6 +173,22 @@ static int bcmgenet_fixed_phy_link_update(struct net_device *dev,
 	return 0;
 }
 
+/* Perform a voluntary PHY software reset, since the EPHY is very finicky about
+ * not doing it and will start corrupting packets
+ */
+void bcmgenet_mii_reset(struct net_device *dev)
+{
+	struct bcmgenet_priv *priv = netdev_priv(dev);
+
+	if (GENET_IS_V4(priv))
+		return;
+
+	if (priv->phydev) {
+		phy_init_hw(priv->phydev);
+		phy_start_aneg(priv->phydev);
+	}
+}
+
 void bcmgenet_phy_power_set(struct net_device *dev, bool enable)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
@@ -214,6 +231,7 @@ static void bcmgenet_internal_phy_setup(struct net_device *dev)
 	reg = bcmgenet_ext_readl(priv, EXT_EXT_PWR_MGMT);
 	reg |= EXT_PWR_DN_EN_LD;
 	bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT);
+	bcmgenet_mii_reset(dev);
 }
 
 static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv)

From 795d2a6ddbe561cbac89bce6f54be39b72040655 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Fri, 30 Oct 2015 10:23:33 +0200
Subject: [PATCH 005/201] ipv4: fix to not remove local route on link down

[ Upstream commit 4f823defdd5b106a5e89745ee8b163c71855de1e ]

When fib_netdev_event calls fib_disable_ip on NETDEV_DOWN event
we should not delete the local routes if the local address
is still present. The confusion comes from the fact that both
fib_netdev_event and fib_inetaddr_event use the NETDEV_DOWN
constant. Fix it by returning back the variable 'force'.

Steps to reproduce:
modprobe dummy
ifconfig dummy0 192.168.168.1 up
ifconfig dummy0 down
ip route list table local | grep dummy | grep host
local 192.168.168.1 dev dummy0  proto kernel  scope host  src 192.168.168.1

Fixes: 8a3d03166f19 ("net: track link-status of ipv4 nexthops")
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/ip_fib.h     |  2 +-
 net/ipv4/fib_frontend.c  | 13 +++++++------
 net/ipv4/fib_semantics.c | 11 ++++++++---
 3 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 727d6e9a96856..965fa5b1a274e 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -317,7 +317,7 @@ void fib_flush_external(struct net *net);
 
 /* Exported by fib_semantics.c */
 int ip_fib_check_default(__be32 gw, struct net_device *dev);
-int fib_sync_down_dev(struct net_device *dev, unsigned long event);
+int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force);
 int fib_sync_down_addr(struct net *net, __be32 local);
 int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
 void fib_select_multipath(struct fib_result *res);
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 690bcbc59f26d..457b2cd75b85b 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1110,9 +1110,10 @@ static void nl_fib_lookup_exit(struct net *net)
 	net->ipv4.fibnl = NULL;
 }
 
-static void fib_disable_ip(struct net_device *dev, unsigned long event)
+static void fib_disable_ip(struct net_device *dev, unsigned long event,
+			   bool force)
 {
-	if (fib_sync_down_dev(dev, event))
+	if (fib_sync_down_dev(dev, event, force))
 		fib_flush(dev_net(dev));
 	rt_cache_flush(dev_net(dev));
 	arp_ifdown(dev);
@@ -1140,7 +1141,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
 			/* Last address was deleted from this interface.
 			 * Disable IP.
 			 */
-			fib_disable_ip(dev, event);
+			fib_disable_ip(dev, event, true);
 		} else {
 			rt_cache_flush(dev_net(dev));
 		}
@@ -1157,7 +1158,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
 	unsigned int flags;
 
 	if (event == NETDEV_UNREGISTER) {
-		fib_disable_ip(dev, event);
+		fib_disable_ip(dev, event, true);
 		rt_flush_dev(dev);
 		return NOTIFY_DONE;
 	}
@@ -1178,14 +1179,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
 		rt_cache_flush(net);
 		break;
 	case NETDEV_DOWN:
-		fib_disable_ip(dev, event);
+		fib_disable_ip(dev, event, false);
 		break;
 	case NETDEV_CHANGE:
 		flags = dev_get_flags(dev);
 		if (flags & (IFF_RUNNING | IFF_LOWER_UP))
 			fib_sync_up(dev, RTNH_F_LINKDOWN);
 		else
-			fib_sync_down_dev(dev, event);
+			fib_sync_down_dev(dev, event, false);
 		/* fall through */
 	case NETDEV_CHANGEMTU:
 		rt_cache_flush(net);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 064bd3caaa4f1..2aa5b5e7da75a 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1281,7 +1281,13 @@ int fib_sync_down_addr(struct net *net, __be32 local)
 	return ret;
 }
 
-int fib_sync_down_dev(struct net_device *dev, unsigned long event)
+/* Event              force Flags           Description
+ * NETDEV_CHANGE      0     LINKDOWN        Carrier OFF, not for scope host
+ * NETDEV_DOWN        0     LINKDOWN|DEAD   Link down, not for scope host
+ * NETDEV_DOWN        1     LINKDOWN|DEAD   Last address removed
+ * NETDEV_UNREGISTER  1     LINKDOWN|DEAD   Device removed
+ */
+int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force)
 {
 	int ret = 0;
 	int scope = RT_SCOPE_NOWHERE;
@@ -1290,8 +1296,7 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event)
 	struct hlist_head *head = &fib_info_devhash[hash];
 	struct fib_nh *nh;
 
-	if (event == NETDEV_UNREGISTER ||
-	    event == NETDEV_DOWN)
+	if (force)
 		scope = -1;
 
 	hlist_for_each_entry(nh, head, nh_hash) {

From 7b4d18bbbd1549def1317b9229e822379f09c921 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Fri, 30 Oct 2015 10:23:34 +0200
Subject: [PATCH 006/201] ipv4: update RTNH_F_LINKDOWN flag on UP event

[ Upstream commit c9b3292eeb52c6834e972eb5b8fe38914771ed12 ]

When nexthop is part of multipath route we should clear the
LINKDOWN flag when link goes UP or when first address is added.
This is needed because we always set LINKDOWN flag when DEAD flag
was set but now on UP the nexthop is not dead anymore. Examples when
LINKDOWN bit can be forgotten when no NETDEV_CHANGE is delivered:

- link goes down (LINKDOWN is set), then link goes UP and device
shows carrier OK but LINKDOWN remains set

- last address is deleted (LINKDOWN is set), then address is
added and device shows carrier OK but LINKDOWN remains set

Steps to reproduce:
modprobe dummy
ifconfig dummy0 192.168.168.1 up

here add a multipath route where one nexthop is for dummy0:

ip route add 1.2.3.4 nexthop dummy0 nexthop SOME_OTHER_DEVICE
ifconfig dummy0 down
ifconfig dummy0 up

now ip route shows nexthop that is not dead. Now set the sysctl var:

echo 1 > /proc/sys/net/ipv4/conf/dummy0/ignore_routes_with_linkdown

now ip route will show a dead nexthop because the forgotten
RTNH_F_LINKDOWN is propagated as RTNH_F_DEAD.

Fixes: 8a3d03166f19 ("net: track link-status of ipv4 nexthops")
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/fib_semantics.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 2aa5b5e7da75a..e966f8599b4a1 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1445,6 +1445,13 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
 	if (!(dev->flags & IFF_UP))
 		return 0;
 
+	if (nh_flags & RTNH_F_DEAD) {
+		unsigned int flags = dev_get_flags(dev);
+
+		if (flags & (IFF_RUNNING | IFF_LOWER_UP))
+			nh_flags |= RTNH_F_LINKDOWN;
+	}
+
 	prev_fi = NULL;
 	hash = fib_devindex_hashfn(dev->ifindex);
 	head = &fib_info_devhash[hash];

From b8f93a05cbad22868ec327683a915cc9eece584e Mon Sep 17 00:00:00 2001
From: Phil Reid <preid@electromag.com.au>
Date: Fri, 30 Oct 2015 16:43:55 +0800
Subject: [PATCH 007/201] stmmac: Correctly report PTP capabilities.

[ Upstream commit e6dbe1eb2db0d7a14991c06278dd3030c45fb825 ]

priv->hwts_*_en indicate if timestamping is enabled/disabled at run
time. But  priv->dma_cap.time_stamp  and priv->dma_cap.atime_stamp
indicates HW is support for PTPv1/PTPv2.

Signed-off-by: Phil Reid <preid@electromag.com.au>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index 771cda2a48b2a..2e51b816a7e81 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -721,10 +721,13 @@ static int stmmac_get_ts_info(struct net_device *dev,
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
 
-	if ((priv->hwts_tx_en) && (priv->hwts_rx_en)) {
+	if ((priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp)) {
 
-		info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE |
+		info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE |
+					SOF_TIMESTAMPING_TX_HARDWARE |
+					SOF_TIMESTAMPING_RX_SOFTWARE |
 					SOF_TIMESTAMPING_RX_HARDWARE |
+					SOF_TIMESTAMPING_SOFTWARE |
 					SOF_TIMESTAMPING_RAW_HARDWARE;
 
 		if (priv->ptp_clock)

From e02e04ff6036fa7afe7e59cddcc86c598002bc76 Mon Sep 17 00:00:00 2001
From: Ani Sinha <ani@arista.com>
Date: Fri, 30 Oct 2015 16:54:31 -0700
Subject: [PATCH 008/201] ipmr: fix possible race resulting from improper usage
 of IP_INC_STATS_BH() in preemptible context.

[ Upstream commit 44f49dd8b5a606870a1f21101522a0f9c4414784 ]

Fixes the following kernel BUG :

BUG: using __this_cpu_add() in preemptible [00000000] code: bash/2758
caller is __this_cpu_preempt_check+0x13/0x15
CPU: 0 PID: 2758 Comm: bash Tainted: P           O   3.18.19 #2
 ffffffff8170eaca ffff880110d1b788 ffffffff81482b2a 0000000000000000
 0000000000000000 ffff880110d1b7b8 ffffffff812010ae ffff880007cab800
 ffff88001a060800 ffff88013a899108 ffff880108b84240 ffff880110d1b7c8
Call Trace:
[<ffffffff81482b2a>] dump_stack+0x52/0x80
[<ffffffff812010ae>] check_preemption_disabled+0xce/0xe1
[<ffffffff812010d4>] __this_cpu_preempt_check+0x13/0x15
[<ffffffff81419d60>] ipmr_queue_xmit+0x647/0x70c
[<ffffffff8141a154>] ip_mr_forward+0x32f/0x34e
[<ffffffff8141af76>] ip_mroute_setsockopt+0xe03/0x108c
[<ffffffff810553fc>] ? get_parent_ip+0x11/0x42
[<ffffffff810e6974>] ? pollwake+0x4d/0x51
[<ffffffff81058ac0>] ? default_wake_function+0x0/0xf
[<ffffffff810553fc>] ? get_parent_ip+0x11/0x42
[<ffffffff810613d9>] ? __wake_up_common+0x45/0x77
[<ffffffff81486ea9>] ? _raw_spin_unlock_irqrestore+0x1d/0x32
[<ffffffff810618bc>] ? __wake_up_sync_key+0x4a/0x53
[<ffffffff8139a519>] ? sock_def_readable+0x71/0x75
[<ffffffff813dd226>] do_ip_setsockopt+0x9d/0xb55
[<ffffffff81429818>] ? unix_seqpacket_sendmsg+0x3f/0x41
[<ffffffff813963fe>] ? sock_sendmsg+0x6d/0x86
[<ffffffff813959d4>] ? sockfd_lookup_light+0x12/0x5d
[<ffffffff8139650a>] ? SyS_sendto+0xf3/0x11b
[<ffffffff810d5738>] ? new_sync_read+0x82/0xaa
[<ffffffff813ddd19>] compat_ip_setsockopt+0x3b/0x99
[<ffffffff813fb24a>] compat_raw_setsockopt+0x11/0x32
[<ffffffff81399052>] compat_sock_common_setsockopt+0x18/0x1f
[<ffffffff813c4d05>] compat_SyS_setsockopt+0x1a9/0x1cf
[<ffffffff813c4149>] compat_SyS_socketcall+0x180/0x1e3
[<ffffffff81488ea1>] cstar_dispatch+0x7/0x1e

Signed-off-by: Ani Sinha <ani@arista.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/ipmr.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 866ee89f5254a..8e8203d5c520a 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1682,8 +1682,8 @@ static inline int ipmr_forward_finish(struct sock *sk, struct sk_buff *skb)
 {
 	struct ip_options *opt = &(IPCB(skb)->opt);
 
-	IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
-	IP_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len);
+	IP_INC_STATS(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
+	IP_ADD_STATS(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len);
 
 	if (unlikely(opt->optlen))
 		ip_forward_options(skb);
@@ -1745,7 +1745,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 		 * to blackhole.
 		 */
 
-		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
+		IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
 		ip_rt_put(rt);
 		goto out_free;
 	}

From 684d640a5b73bf4994bd2100894841abbb924bca Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 2 Nov 2015 17:08:19 -0800
Subject: [PATCH 009/201] sit: fix sit0 percpu double allocations

[ Upstream commit 4ece9009774596ee3df0acba65a324b7ea79387c ]

sit0 device allocates its percpu storage twice :
- One time in ipip6_tunnel_init()
- One time in ipip6_fb_tunnel_init()

Thus we leak 48 bytes per possible cpu per network namespace dismantle.

ipip6_fb_tunnel_init() can be much simpler and does not
return an error, and should be called after register_netdev()

Note that ipip6_tunnel_clone_6rd() also needs to be called
after register_netdev() (calling ipip6_tunnel_init())

Fixes: ebe084aafb7e ("sit: Use ipip6_tunnel_init as the ndo_init function.")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/sit.c | 26 ++++----------------------
 1 file changed, 4 insertions(+), 22 deletions(-)

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 94428fd85b2f9..dcccae86190f2 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1394,34 +1394,20 @@ static int ipip6_tunnel_init(struct net_device *dev)
 	return 0;
 }
 
-static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
+static void __net_init ipip6_fb_tunnel_init(struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	struct iphdr *iph = &tunnel->parms.iph;
 	struct net *net = dev_net(dev);
 	struct sit_net *sitn = net_generic(net, sit_net_id);
 
-	tunnel->dev = dev;
-	tunnel->net = dev_net(dev);
-
 	iph->version		= 4;
 	iph->protocol		= IPPROTO_IPV6;
 	iph->ihl		= 5;
 	iph->ttl		= 64;
 
-	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
-	if (!dev->tstats)
-		return -ENOMEM;
-
-	tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
-	if (!tunnel->dst_cache) {
-		free_percpu(dev->tstats);
-		return -ENOMEM;
-	}
-
 	dev_hold(dev);
 	rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
-	return 0;
 }
 
 static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -1831,23 +1817,19 @@ static int __net_init sit_init_net(struct net *net)
 	 */
 	sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
 
-	err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
-	if (err)
-		goto err_dev_free;
-
-	ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn);
 	err = register_netdev(sitn->fb_tunnel_dev);
 	if (err)
 		goto err_reg_dev;
 
+	ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn);
+	ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
+
 	t = netdev_priv(sitn->fb_tunnel_dev);
 
 	strcpy(t->parms.name, sitn->fb_tunnel_dev->name);
 	return 0;
 
 err_reg_dev:
-	dev_put(sitn->fb_tunnel_dev);
-err_dev_free:
 	ipip6_dev_free(sitn->fb_tunnel_dev);
 err_alloc_dev:
 	return err;

From c555478720e688d9b9fda6afb51253fc9d3f71c6 Mon Sep 17 00:00:00 2001
From: Martin Habets <mhabets@solarflare.com>
Date: Mon, 2 Nov 2015 12:51:31 +0000
Subject: [PATCH 010/201] sfc: push partner queue for skb->xmit_more

[ Upstream commit b2663a4f30e85ec606b806f5135413e6d5c78d1e ]

When the IP stack passes SKBs the sfc driver puts them in 2 different TX
queues (called partners), one for checksummed and one for not checksummed.
If the SKB has xmit_more set the driver will delay pushing the work to the
NIC.

When later it does decide to push the buffers this patch ensures it also
pushes the partner queue, if that also has any delayed work. Before this
fix the work in the partner queue would be left for a long time and cause
a netdev watchdog.

Fixes: 70b33fb ("sfc: add support for skb->xmit_more")
Reported-by: Jianlin Shi <jishi@redhat.com>
Signed-off-by: Martin Habets <mhabets@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/sfc/ef10.c       |  4 +++-
 drivers/net/ethernet/sfc/farch.c      |  4 +++-
 drivers/net/ethernet/sfc/net_driver.h |  2 ++
 drivers/net/ethernet/sfc/tx.c         | 30 +++++++++++++++++++++++++--
 4 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index ff649ebef6374..286cc6b69d579 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -1849,7 +1849,9 @@ static void efx_ef10_tx_write(struct efx_tx_queue *tx_queue)
 	unsigned int write_ptr;
 	efx_qword_t *txd;
 
-	BUG_ON(tx_queue->write_count == tx_queue->insert_count);
+	tx_queue->xmit_more_available = false;
+	if (unlikely(tx_queue->write_count == tx_queue->insert_count))
+		return;
 
 	do {
 		write_ptr = tx_queue->write_count & tx_queue->ptr_mask;
diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index f08266f0eca23..5a1c5a8f278ad 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -321,7 +321,9 @@ void efx_farch_tx_write(struct efx_tx_queue *tx_queue)
 	unsigned write_ptr;
 	unsigned old_write_count = tx_queue->write_count;
 
-	BUG_ON(tx_queue->write_count == tx_queue->insert_count);
+	tx_queue->xmit_more_available = false;
+	if (unlikely(tx_queue->write_count == tx_queue->insert_count))
+		return;
 
 	do {
 		write_ptr = tx_queue->write_count & tx_queue->ptr_mask;
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index c530e1c4cb4f1..24038ef96d9ff 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -219,6 +219,7 @@ struct efx_tx_buffer {
  * @tso_packets: Number of packets via the TSO xmit path
  * @pushes: Number of times the TX push feature has been used
  * @pio_packets: Number of times the TX PIO feature has been used
+ * @xmit_more_available: Are any packets waiting to be pushed to the NIC
  * @empty_read_count: If the completion path has seen the queue as empty
  *	and the transmission path has not yet checked this, the value of
  *	@read_count bitwise-added to %EFX_EMPTY_COUNT_VALID; otherwise 0.
@@ -253,6 +254,7 @@ struct efx_tx_queue {
 	unsigned int tso_packets;
 	unsigned int pushes;
 	unsigned int pio_packets;
+	bool xmit_more_available;
 	/* Statistics to supplement MAC stats */
 	unsigned long tx_packets;
 
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 1833a01465711..67f6afaa022f4 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -431,8 +431,20 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
 	efx_tx_maybe_stop_queue(tx_queue);
 
 	/* Pass off to hardware */
-	if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq))
+	if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) {
+		struct efx_tx_queue *txq2 = efx_tx_queue_partner(tx_queue);
+
+		/* There could be packets left on the partner queue if those
+		 * SKBs had skb->xmit_more set. If we do not push those they
+		 * could be left for a long time and cause a netdev watchdog.
+		 */
+		if (txq2->xmit_more_available)
+			efx_nic_push_buffers(txq2);
+
 		efx_nic_push_buffers(tx_queue);
+	} else {
+		tx_queue->xmit_more_available = skb->xmit_more;
+	}
 
 	tx_queue->tx_packets++;
 
@@ -722,6 +734,7 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
 	tx_queue->read_count = 0;
 	tx_queue->old_read_count = 0;
 	tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID;
+	tx_queue->xmit_more_available = false;
 
 	/* Set up TX descriptor ring */
 	efx_nic_init_tx(tx_queue);
@@ -747,6 +760,7 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
 
 		++tx_queue->read_count;
 	}
+	tx_queue->xmit_more_available = false;
 	netdev_tx_reset_queue(tx_queue->core_txq);
 }
 
@@ -1302,8 +1316,20 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
 	efx_tx_maybe_stop_queue(tx_queue);
 
 	/* Pass off to hardware */
-	if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq))
+	if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) {
+		struct efx_tx_queue *txq2 = efx_tx_queue_partner(tx_queue);
+
+		/* There could be packets left on the partner queue if those
+		 * SKBs had skb->xmit_more set. If we do not push those they
+		 * could be left for a long time and cause a netdev watchdog.
+		 */
+		if (txq2->xmit_more_available)
+			efx_nic_push_buffers(txq2);
+
 		efx_nic_push_buffers(tx_queue);
+	} else {
+		tx_queue->xmit_more_available = skb->xmit_more;
+	}
 
 	tx_queue->tso_bursts++;
 	return NETDEV_TX_OK;

From 2e05a9e795039ff20784f0be4a609bfc28ca9d20 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 2 Nov 2015 07:50:07 -0800
Subject: [PATCH 011/201] net: avoid NULL deref in inet_ctl_sock_destroy()

[ Upstream commit 8fa677d2706d325d71dab91bf6e6512c05214e37 ]

Under low memory conditions, tcp_sk_init() and icmp_sk_init()
can both iterate on all possible cpus and call inet_ctl_sock_destroy(),
with eventual NULL pointer.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/inet_common.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index 279f83591971b..109e3ee9108c0 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -41,7 +41,8 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len,
 
 static inline void inet_ctl_sock_destroy(struct sock *sk)
 {
-	sock_release(sk->sk_socket);
+	if (sk)
+		sock_release(sk->sk_socket);
 }
 
 #endif

From 35b2aa1c3c36ba20d7f58b77a1e590af7b7d6c1f Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Wed, 4 Nov 2015 14:47:53 +0100
Subject: [PATCH 012/201] ipv6: clean up dev_snmp6 proc entry when we fail to
 initialize inet6_dev

[ Upstream commit 2a189f9e57650e9f310ddf4aad75d66c1233a064 ]

In ipv6_add_dev, when addrconf_sysctl_register fails, we do not clean up
the dev_snmp6 entry that we have already registered for this device.
Call snmp6_unregister_dev in this case.

Fixes: a317a2f19da7d ("ipv6: fail early when creating netdev named all or default")
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/addrconf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 36b85bd05ac8a..dd00828863a0c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -417,6 +417,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 	if (err) {
 		ipv6_mc_destroy_dev(ndev);
 		del_timer(&ndev->regen_timer);
+		snmp6_unregister_dev(ndev);
 		goto err_release;
 	}
 	/* protected by rtnl_lock */

From 10b1965171b100b1348fa03e801a77d0178687bd Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Tue, 3 Nov 2015 14:32:57 -0800
Subject: [PATCH 013/201] ipv4: disable BH when changing ip local port range

[ Upstream commit 4ee3bd4a8c7463cdef0b82ebc33fc94a9170a7e0 ]

This fixes the following lockdep warning:

 [ INFO: inconsistent lock state ]
 4.3.0-rc7+ #1197 Not tainted
 ---------------------------------
 inconsistent {IN-SOFTIRQ-R} -> {SOFTIRQ-ON-W} usage.
 sysctl/1019 [HC0[0]:SC0[0]:HE1:SE1] takes:
  (&(&net->ipv4.ip_local_ports.lock)->seqcount){+.+-..}, at: [<ffffffff81921de7>] ipv4_local_port_range+0xb4/0x12a
 {IN-SOFTIRQ-R} state was registered at:
   [<ffffffff810bd682>] __lock_acquire+0x2f6/0xdf0
   [<ffffffff810be6d5>] lock_acquire+0x11c/0x1a4
   [<ffffffff818e599c>] inet_get_local_port_range+0x4e/0xae
   [<ffffffff8166e8e3>] udp_flow_src_port.constprop.40+0x23/0x116
   [<ffffffff81671cb9>] vxlan_xmit_one+0x219/0xa6a
   [<ffffffff81672f75>] vxlan_xmit+0xa6b/0xaa5
   [<ffffffff817f2deb>] dev_hard_start_xmit+0x2ae/0x465
   [<ffffffff817f35ed>] __dev_queue_xmit+0x531/0x633
   [<ffffffff817f3702>] dev_queue_xmit_sk+0x13/0x15
   [<ffffffff818004a5>] neigh_resolve_output+0x12f/0x14d
   [<ffffffff81959cfa>] ip6_finish_output2+0x344/0x39f
   [<ffffffff8195bf58>] ip6_finish_output+0x88/0x8e
   [<ffffffff8195bfef>] ip6_output+0x91/0xe5
   [<ffffffff819792ae>] dst_output_sk+0x47/0x4c
   [<ffffffff81979392>] NF_HOOK_THRESH.constprop.30+0x38/0x82
   [<ffffffff8197981e>] mld_sendpack+0x189/0x266
   [<ffffffff8197b28b>] mld_ifc_timer_expire+0x1ef/0x223
   [<ffffffff810de581>] call_timer_fn+0xfb/0x28c
   [<ffffffff810ded1e>] run_timer_softirq+0x1c7/0x1f1

Fixes: b8f1a55639e6 ("udp: Add function to make source port for UDP tunnels")
Cc: Tom Herbert <tom@herbertland.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/sysctl_net_ipv4.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 894da3a70aff9..ade773744b988 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -48,14 +48,14 @@ static void set_local_port_range(struct net *net, int range[2])
 {
 	bool same_parity = !((range[0] ^ range[1]) & 1);
 
-	write_seqlock(&net->ipv4.ip_local_ports.lock);
+	write_seqlock_bh(&net->ipv4.ip_local_ports.lock);
 	if (same_parity && !net->ipv4.ip_local_ports.warned) {
 		net->ipv4.ip_local_ports.warned = true;
 		pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n");
 	}
 	net->ipv4.ip_local_ports.range[0] = range[0];
 	net->ipv4.ip_local_ports.range[1] = range[1];
-	write_sequnlock(&net->ipv4.ip_local_ports.lock);
+	write_sequnlock_bh(&net->ipv4.ip_local_ports.lock);
 }
 
 /* Validate changes from /proc interface. */

From 0c04f7b0d0d109016317fddb16dccdbc7ae2c4fa Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Tue, 3 Nov 2015 15:59:28 -0800
Subject: [PATCH 014/201] net: Fix prefsrc lookups

[ Upstream commit e1b8d903c6c3862160d2d5036806a94786c8fc4e ]

A bug report (https://bugzilla.kernel.org/show_bug.cgi?id=107071) noted
that the follwoing ip command is failing with v4.3:

    $ ip route add 10.248.5.0/24 dev bond0.250 table vlan_250 src 10.248.5.154
    RTNETLINK answers: Invalid argument

021dd3b8a142d changed the lookup of the given preferred source address to
use the table id passed in, but this assumes the local entries are in the
given table which is not necessarily true for non-VRF use cases. When
validating the preferred source fallback to the local table on failure.

Fixes: 021dd3b8a142d ("net: Add routes to the table associated with the device")
Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/fib_semantics.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index e966f8599b4a1..ef5892f5e046a 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -864,14 +864,21 @@ static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc)
 	if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
 	    fib_prefsrc != cfg->fc_dst) {
 		u32 tb_id = cfg->fc_table;
+		int rc;
 
 		if (tb_id == RT_TABLE_MAIN)
 			tb_id = RT_TABLE_LOCAL;
 
-		if (inet_addr_type_table(cfg->fc_nlinfo.nl_net,
-					 fib_prefsrc, tb_id) != RTN_LOCAL) {
-			return false;
+		rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
+					  fib_prefsrc, tb_id);
+
+		if (rc != RTN_LOCAL && tb_id != RT_TABLE_LOCAL) {
+			rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net,
+						  fib_prefsrc, RT_TABLE_LOCAL);
 		}
+
+		if (rc != RTN_LOCAL)
+			return false;
 	}
 	return true;
 }

From 46d85c56f04ce31e80b52786c5b739a56f0ef3ae Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Wed, 4 Nov 2015 13:49:49 +0100
Subject: [PATCH 015/201] tun_dst: Fix potential NULL dereference

[ Upstream commit f63ce5b6fa5e9a0faf7a0e1ef2993a502878c78a ]

In tun_dst_unclone() the return value of skb_metadata_dst() is checked
for being NULL after it is dereferenced. Fix this by moving the
dereference after the NULL check.

Found by the Coverity scanner (CID 1338068).

Fixes: fc4099f17240 ("openvswitch: Fix egress tunnel info.")
Cc: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/dst_metadata.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index ce009710120ca..6816f0fa5693d 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -63,12 +63,13 @@ static inline struct metadata_dst *tun_rx_dst(int md_size)
 static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb)
 {
 	struct metadata_dst *md_dst = skb_metadata_dst(skb);
-	int md_size = md_dst->u.tun_info.options_len;
+	int md_size;
 	struct metadata_dst *new_md;
 
 	if (!md_dst)
 		return ERR_PTR(-EINVAL);
 
+	md_size = md_dst->u.tun_info.options_len;
 	new_md = metadata_dst_alloc(md_size, GFP_ATOMIC);
 	if (!new_md)
 		return ERR_PTR(-ENOMEM);

From 1e2d4ae7356d809aa52a6fa406d441b321ac7827 Mon Sep 17 00:00:00 2001
From: Francesco Ruggeri <fruggeri@aristanetworks.com>
Date: Thu, 5 Nov 2015 08:16:14 -0800
Subject: [PATCH 016/201] packet: race condition in packet_bind

[ Upstream commit 30f7ea1c2b5f5fb7462c5ae44fe2e40cb2d6a474 ]

There is a race conditions between packet_notifier and packet_bind{_spkt}.

It happens if packet_notifier(NETDEV_UNREGISTER) executes between the
time packet_bind{_spkt} takes a reference on the new netdevice and the
time packet_do_bind sets po->ifindex.
In this case the notification can be missed.
If this happens during a dev_change_net_namespace this can result in the
netdevice to be moved to the new namespace while the packet_sock in the
old namespace still holds a reference on it. When the netdevice is later
deleted in the new namespace the deletion hangs since the packet_sock
is not found in the new namespace' &net->packet.sklist.
It can be reproduced with the script below.

This patch makes packet_do_bind check again for the presence of the
netdevice in the packet_sock's namespace after the synchronize_net
in unregister_prot_hook.
More in general it also uses the rcu lock for the duration of the bind
to stop dev_change_net_namespace/rollback_registered_many from
going past the synchronize_net following unlist_netdevice, so that
no NETDEV_UNREGISTER notifications can happen on the new netdevice
while the bind is executing. In order to do this some code from
packet_bind{_spkt} is consolidated into packet_do_dev.

import socket, os, time, sys
proto=7
realDev='em1'
vlanId=400
if len(sys.argv) > 1:
   vlanId=int(sys.argv[1])
dev='vlan%d' % vlanId

os.system('taskset -p 0x10 %d' % os.getpid())

s = socket.socket(socket.PF_PACKET, socket.SOCK_RAW, proto)
os.system('ip link add link %s name %s type vlan id %d' %
          (realDev, dev, vlanId))
os.system('ip netns add dummy')

pid=os.fork()

if pid == 0:
   # dev should be moved while packet_do_bind is in synchronize net
   os.system('taskset -p 0x20000 %d' % os.getpid())
   os.system('ip link set %s netns dummy' % dev)
   os.system('ip netns exec dummy ip link del %s' % dev)
   s.close()
   sys.exit(0)

time.sleep(.004)
try:
   s.bind(('%s' % dev, proto+1))
except:
   print 'Could not bind socket'
   s.close()
   os.system('ip netns del dummy')
   sys.exit(0)

os.waitpid(pid, 0)
s.close()
os.system('ip netns del dummy')
sys.exit(0)

Signed-off-by: Francesco Ruggeri <fruggeri@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/packet/af_packet.c | 80 ++++++++++++++++++++++++++----------------
 1 file changed, 49 insertions(+), 31 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index aa4b15c358844..27b2898f275c7 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2903,22 +2903,40 @@ static int packet_release(struct socket *sock)
  *	Attach a packet hook.
  */
 
-static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
+static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
+			  __be16 proto)
 {
 	struct packet_sock *po = pkt_sk(sk);
 	struct net_device *dev_curr;
 	__be16 proto_curr;
 	bool need_rehook;
+	struct net_device *dev = NULL;
+	int ret = 0;
+	bool unlisted = false;
 
-	if (po->fanout) {
-		if (dev)
-			dev_put(dev);
-
+	if (po->fanout)
 		return -EINVAL;
-	}
 
 	lock_sock(sk);
 	spin_lock(&po->bind_lock);
+	rcu_read_lock();
+
+	if (name) {
+		dev = dev_get_by_name_rcu(sock_net(sk), name);
+		if (!dev) {
+			ret = -ENODEV;
+			goto out_unlock;
+		}
+	} else if (ifindex) {
+		dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
+		if (!dev) {
+			ret = -ENODEV;
+			goto out_unlock;
+		}
+	}
+
+	if (dev)
+		dev_hold(dev);
 
 	proto_curr = po->prot_hook.type;
 	dev_curr = po->prot_hook.dev;
@@ -2926,14 +2944,29 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
 	need_rehook = proto_curr != proto || dev_curr != dev;
 
 	if (need_rehook) {
-		unregister_prot_hook(sk, true);
+		if (po->running) {
+			rcu_read_unlock();
+			__unregister_prot_hook(sk, true);
+			rcu_read_lock();
+			dev_curr = po->prot_hook.dev;
+			if (dev)
+				unlisted = !dev_get_by_index_rcu(sock_net(sk),
+								 dev->ifindex);
+		}
 
 		po->num = proto;
 		po->prot_hook.type = proto;
-		po->prot_hook.dev = dev;
 
-		po->ifindex = dev ? dev->ifindex : 0;
-		packet_cached_dev_assign(po, dev);
+		if (unlikely(unlisted)) {
+			dev_put(dev);
+			po->prot_hook.dev = NULL;
+			po->ifindex = -1;
+			packet_cached_dev_reset(po);
+		} else {
+			po->prot_hook.dev = dev;
+			po->ifindex = dev ? dev->ifindex : 0;
+			packet_cached_dev_assign(po, dev);
+		}
 	}
 	if (dev_curr)
 		dev_put(dev_curr);
@@ -2941,7 +2974,7 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
 	if (proto == 0 || !need_rehook)
 		goto out_unlock;
 
-	if (!dev || (dev->flags & IFF_UP)) {
+	if (!unlisted && (!dev || (dev->flags & IFF_UP))) {
 		register_prot_hook(sk);
 	} else {
 		sk->sk_err = ENETDOWN;
@@ -2950,9 +2983,10 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
 	}
 
 out_unlock:
+	rcu_read_unlock();
 	spin_unlock(&po->bind_lock);
 	release_sock(sk);
-	return 0;
+	return ret;
 }
 
 /*
@@ -2964,8 +2998,6 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
 {
 	struct sock *sk = sock->sk;
 	char name[15];
-	struct net_device *dev;
-	int err = -ENODEV;
 
 	/*
 	 *	Check legality
@@ -2975,19 +3007,13 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
 		return -EINVAL;
 	strlcpy(name, uaddr->sa_data, sizeof(name));
 
-	dev = dev_get_by_name(sock_net(sk), name);
-	if (dev)
-		err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
-	return err;
+	return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
 }
 
 static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 {
 	struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
 	struct sock *sk = sock->sk;
-	struct net_device *dev = NULL;
-	int err;
-
 
 	/*
 	 *	Check legality
@@ -2998,16 +3024,8 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len
 	if (sll->sll_family != AF_PACKET)
 		return -EINVAL;
 
-	if (sll->sll_ifindex) {
-		err = -ENODEV;
-		dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
-		if (dev == NULL)
-			goto out;
-	}
-	err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
-
-out:
-	return err;
+	return packet_do_bind(sk, NULL, sll->sll_ifindex,
+			      sll->sll_protocol ? : pkt_sk(sk)->num);
 }
 
 static struct proto packet_proto = {

From 1c88ba236f4040530a98ca81d0316bd03fb5a41f Mon Sep 17 00:00:00 2001
From: Jay Vosburgh <jay.vosburgh@canonical.com>
Date: Fri, 6 Nov 2015 17:23:23 -0800
Subject: [PATCH 017/201] bonding: fix panic on non-ARPHRD_ETHER enslave
 failure

[ Upstream commit 40baec225765c54eefa870530dd613bad9829bb7 ]

Since commit 7d5cd2ce529b, when bond_enslave fails on devices that
are not ARPHRD_ETHER, if needed, it resets the bonding device back to
ARPHRD_ETHER by calling ether_setup.

	Unfortunately, ether_setup clobbers dev->flags, clearing IFF_UP
if the bond device is up, leaving it in a quasi-down state without
having actually gone through dev_close.  For bonding, if any periodic
work queue items are active (miimon, arp_interval, etc), those will
remain running, as they are stopped by bond_close.  At this point, if
the bonding module is unloaded or the bond is deleted, the system will
panic when the work function is called.

	This panic is resolved by calling dev_close on the bond itself
prior to calling ether_setup.

Cc: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Fixes: 7d5cd2ce5292 ("bonding: correctly handle bonding type change on enslave failure")
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/bonding/bond_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 771a449d2f563..bcd7bddbe312b 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1749,6 +1749,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 					    slave_dev->dev_addr))
 			eth_hw_addr_random(bond_dev);
 		if (bond_dev->type != ARPHRD_ETHER) {
+			dev_close(bond_dev);
 			ether_setup(bond_dev);
 			bond_dev->flags |= IFF_MASTER;
 			bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING;

From d71ab4e1efcb5cdc25bc5e20b655e16ef0927c01 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 9 Nov 2015 17:51:23 -0800
Subject: [PATCH 018/201] net: fix a race in dst_release()

[ Upstream commit d69bbf88c8d0b367cf3e3a052f6daadf630ee566 ]

Only cpu seeing dst refcount going to 0 can safely
dereference dst->flags.

Otherwise an other cpu might already have freed the dst.

Fixes: 27b75c95f10d ("net: avoid RCU for NOCACHE dst")
Reported-by: Greg Thelen <gthelen@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/dst.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/dst.c b/net/core/dst.c
index 0771c8cb9307c..d6a5a0bc7df5d 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -306,7 +306,7 @@ void dst_release(struct dst_entry *dst)
 		if (unlikely(newrefcnt < 0))
 			net_warn_ratelimited("%s: dst:%p refcnt:%d\n",
 					     __func__, dst, newrefcnt);
-		if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt)
+		if (!newrefcnt && unlikely(dst->flags & DST_NOCACHE))
 			call_rcu(&dst->rcu_head, dst_destroy_rcu);
 	}
 }

From 9837c4f9a02a877d094bc0c21d3f3b6d242d7749 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Fri, 28 Aug 2015 09:41:39 +0100
Subject: [PATCH 019/201] ARM: 8426/1: dma-mapping: add missing range check in
 dma_mmap()

commit 371f0f085f629fc0f66695f572373ca4445a67ad upstream.

dma_mmap() function in IOMMU-based dma-mapping implementation lacked
a check for valid range of mmap parameters (offset and buffer size), what
might have caused access beyond the allocated buffer. This patch fixes
this issue.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mm/dma-mapping.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 1a7815e5421b6..c0865867ef0de 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1407,12 +1407,17 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
 	unsigned long uaddr = vma->vm_start;
 	unsigned long usize = vma->vm_end - vma->vm_start;
 	struct page **pages = __iommu_get_pages(cpu_addr, attrs);
+	unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	unsigned long off = vma->vm_pgoff;
 
 	vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot);
 
 	if (!pages)
 		return -ENXIO;
 
+	if (off >= nr_pages || (usize >> PAGE_SHIFT) > nr_pages - off)
+		return -ENXIO;
+
 	do {
 		int ret = vm_insert_page(vma, uaddr, *pages++);
 		if (ret) {

From 53c5c971742f3e4340239a8a47cb0509eb460318 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Fri, 28 Aug 2015 09:42:09 +0100
Subject: [PATCH 020/201] ARM: 8427/1: dma-mapping: add support for offset
 parameter in dma_mmap()

commit 7e31210349e9e03a9a4dff31ab5f2bc83e8e84f5 upstream.

IOMMU-based dma_mmap() implementation lacked proper support for offset
parameter used in mmap call (it always assumed that mapping starts from
offset zero). This patch adds support for offset parameter to IOMMU-based
implementation.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mm/dma-mapping.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index c0865867ef0de..ad4eb2d26e169 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -1418,6 +1418,8 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
 	if (off >= nr_pages || (usize >> PAGE_SHIFT) > nr_pages - off)
 		return -ENXIO;
 
+	pages += off;
+
 	do {
 		int ret = vm_insert_page(vma, uaddr, *pages++);
 		if (ret) {

From 6b98f5acaab04d568c5bc9d003bb3c90221360c5 Mon Sep 17 00:00:00 2001
From: Peter Ujfalusi <peter.ujfalusi@ti.com>
Date: Wed, 14 Oct 2015 14:42:43 +0300
Subject: [PATCH 021/201] ARM: common: edma: Fix channel parameter for irq
 callbacks

commit 696d8b70c09dd421c4d037fab04341e5b30585cf upstream.

In case when the interrupt happened for the second eDMA the channel
number was incorrectly passed to the client driver.

Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/common/edma.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c
index 873dbfcc7dc9d..56fc339571f92 100644
--- a/arch/arm/common/edma.c
+++ b/arch/arm/common/edma.c
@@ -406,7 +406,8 @@ static irqreturn_t dma_irq_handler(int irq, void *data)
 					BIT(slot));
 			if (edma_cc[ctlr]->intr_data[channel].callback)
 				edma_cc[ctlr]->intr_data[channel].callback(
-					channel, EDMA_DMA_COMPLETE,
+					EDMA_CTLR_CHAN(ctlr, channel),
+					EDMA_DMA_COMPLETE,
 					edma_cc[ctlr]->intr_data[channel].data);
 		}
 	} while (sh_ipr);
@@ -460,7 +461,8 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data)
 					if (edma_cc[ctlr]->intr_data[k].
 								callback) {
 						edma_cc[ctlr]->intr_data[k].
-						callback(k,
+						callback(
+						EDMA_CTLR_CHAN(ctlr, k),
 						EDMA_DMA_CC_ERROR,
 						edma_cc[ctlr]->intr_data
 						[k].data);

From c09ea0445eec7604f4855a1340bd0a831b2b6831 Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@freescale.com>
Date: Wed, 16 Sep 2015 09:35:06 +0800
Subject: [PATCH 022/201] ARM: dts: imx27.dtsi: change the clock information
 for usb

commit facf47ee6b4d07d43c3bfd6f0762f1b28f64703a upstream.

For imx27, it needs three clocks to let the controller work,
the old code is wrong, and usbmisc has not included clock handling
code any more. Without this patch, it will cause below data
abort when accessing usbmisc registers.

usbcore: registered new interface driver usb-storage
Unhandled fault: external abort on non-linefetch (0x008) at 0xf4424600
pgd = c0004000
[f4424600] *pgd=10000452(bad)
Internal error: : 8 [#1] PREEMPT ARM
Modules linked in:
CPU: 0 PID: 1 Comm: swapper Not tainted 4.1.0-next-20150701-dirty #3089
Hardware name: Freescale i.MX27 (Device Tree Support)
task: c7832b60 ti: c783e000 task.ti: c783e000
PC is at usbmisc_imx27_init+0x4c/0xbc
LR is at usbmisc_imx27_init+0x40/0xbc
pc : [<c03cb5c0>]    lr : [<c03cb5b4>]    psr: 60000093
sp : c783fe08  ip : 00000000  fp : 00000000
r10: c0576434  r9 : 0000009c  r8 : c7a773a0
r7 : 01000000  r6 : 60000013  r5 : c7a776f0  r4 : c7a773f0
r3 : f4424600  r2 : 00000000  r1 : 00000001  r0 : 00000001
Flags: nZCv  IRQs off  FIQs on  Mode SVC_32  ISA ARM  Segment kernel
Control: 0005317f  Table: a0004000  DAC: 00000017
Process swapper (pid: 1, stack limit = 0xc783e190)
Stack: (0xc783fe08 to 0xc7840000)

Signed-off-by: Peter Chen <peter.chen@freescale.com>
Reported-by: Fabio Estevam <fabio.estevam@freescale.com>
Tested-by: Fabio Estevam <fabio.estevam@freescale.com>
Acked-by: Shawn Guo <shawnguo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/imx27.dtsi | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi
index feb9d34b239c8..f818ea483aeb5 100644
--- a/arch/arm/boot/dts/imx27.dtsi
+++ b/arch/arm/boot/dts/imx27.dtsi
@@ -486,7 +486,10 @@
 				compatible = "fsl,imx27-usb";
 				reg = <0x10024000 0x200>;
 				interrupts = <56>;
-				clocks = <&clks IMX27_CLK_USB_IPG_GATE>;
+				clocks = <&clks IMX27_CLK_USB_IPG_GATE>,
+					<&clks IMX27_CLK_USB_AHB_GATE>,
+					<&clks IMX27_CLK_USB_DIV>;
+				clock-names = "ipg", "ahb", "per";
 				fsl,usbmisc = <&usbmisc 0>;
 				status = "disabled";
 			};
@@ -495,7 +498,10 @@
 				compatible = "fsl,imx27-usb";
 				reg = <0x10024200 0x200>;
 				interrupts = <54>;
-				clocks = <&clks IMX27_CLK_USB_IPG_GATE>;
+				clocks = <&clks IMX27_CLK_USB_IPG_GATE>,
+					<&clks IMX27_CLK_USB_AHB_GATE>,
+					<&clks IMX27_CLK_USB_DIV>;
+				clock-names = "ipg", "ahb", "per";
 				fsl,usbmisc = <&usbmisc 1>;
 				dr_mode = "host";
 				status = "disabled";
@@ -505,7 +511,10 @@
 				compatible = "fsl,imx27-usb";
 				reg = <0x10024400 0x200>;
 				interrupts = <55>;
-				clocks = <&clks IMX27_CLK_USB_IPG_GATE>;
+				clocks = <&clks IMX27_CLK_USB_IPG_GATE>,
+					<&clks IMX27_CLK_USB_AHB_GATE>,
+					<&clks IMX27_CLK_USB_DIV>;
+				clock-names = "ipg", "ahb", "per";
 				fsl,usbmisc = <&usbmisc 2>;
 				dr_mode = "host";
 				status = "disabled";
@@ -515,7 +524,6 @@
 				#index-cells = <1>;
 				compatible = "fsl,imx27-usbmisc";
 				reg = <0x10024600 0x200>;
-				clocks = <&clks IMX27_CLK_USB_AHB_GATE>;
 			};
 
 			sahara2: sahara@10025000 {

From 73982e966793c4c38cc19af4c193fdd88fd90213 Mon Sep 17 00:00:00 2001
From: Dmitry Osipenko <digetx@gmail.com>
Date: Tue, 30 Jun 2015 17:15:50 +0300
Subject: [PATCH 023/201] ARM: tegra: paz00: use con_id's to refer GPIO's in
 gpiod_lookup table

commit e77b675f8786f38d40fc1562e1275875daf67fef upstream.

Commit 72daceb9a10a ("net: rfkill: gpio: Add default GPIO driver mappings
for ACPI") removed possibility to request GPIO by table index for non-ACPI
platforms without changing its users. As result "shutdown" GPIO request
will fail if request for "reset" GPIO succeeded or "reset" will be
requested instead of "shutdown" if "reset" wasn't defined. Fix it by
making gpiod_lookup_table use con_id's instead of indexes.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
Fixes: 72daceb (net: rfkill: gpio: Add default GPIO driver mappings for ACPI)
Acked-by: Alexandre Courbot <acourbot@nvidia.com>
Reviewed-by: Marc Dietrich <marvin24@gmx.de>
Tested-by: Marc Dietrich <marvin24@gmx.de>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-tegra/board-paz00.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-tegra/board-paz00.c b/arch/arm/mach-tegra/board-paz00.c
index fbe74c6806f3b..49d1110cff534 100644
--- a/arch/arm/mach-tegra/board-paz00.c
+++ b/arch/arm/mach-tegra/board-paz00.c
@@ -39,8 +39,8 @@ static struct platform_device wifi_rfkill_device = {
 static struct gpiod_lookup_table wifi_gpio_lookup = {
 	.dev_id = "rfkill_gpio",
 	.table = {
-		GPIO_LOOKUP_IDX("tegra-gpio", 25, NULL, 0, 0),
-		GPIO_LOOKUP_IDX("tegra-gpio", 85, NULL, 1, 0),
+		GPIO_LOOKUP("tegra-gpio", 25, "reset", 0),
+		GPIO_LOOKUP("tegra-gpio", 85, "shutdown", 0),
 		{ },
 	},
 };

From 7fbe6208f715497642c80da028565a9f8c5f117b Mon Sep 17 00:00:00 2001
From: Holger Busse <h.busse@kathrein-sachsen.de>
Date: Wed, 26 Aug 2015 10:45:45 +0200
Subject: [PATCH 024/201] ARM: at91/dt: corrections to i2c1 declaration to
 sama5d4

commit d1a9c24ad16ab2b26f1574bc3f2c165a7beff5df upstream.

Correcting the dma declaration for i2c1 dma.

Signed-off-by: Holger Busse <h.busse@kathrein-sachsen.de>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Fixes: 4cc7cdf35c5f ("ARM: at91/dt: add i2c1 declaration to sama5d4")
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/sama5d4.dtsi | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi
index 8d1de29e8da10..fb92481e60d46 100644
--- a/arch/arm/boot/dts/sama5d4.dtsi
+++ b/arch/arm/boot/dts/sama5d4.dtsi
@@ -939,11 +939,11 @@
 				reg = <0xf8018000 0x4000>;
 				interrupts = <33 IRQ_TYPE_LEVEL_HIGH 6>;
 				dmas = <&dma1
-					(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1))
-					AT91_XDMAC_DT_PERID(4)>,
+					(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+					| AT91_XDMAC_DT_PERID(4))>,
 				       <&dma1
-					(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1))
-					AT91_XDMAC_DT_PERID(5)>;
+					(AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)
+					| AT91_XDMAC_DT_PERID(5))>;
 				dma-names = "tx", "rx";
 				pinctrl-names = "default";
 				pinctrl-0 = <&pinctrl_i2c1>;

From 48d6d4f8e499a546f3bf03b1afefba59a5216e28 Mon Sep 17 00:00:00 2001
From: Patrick Doyle <pdoyle@irobot.com>
Date: Fri, 16 Oct 2015 12:39:05 +0200
Subject: [PATCH 025/201] ARM: at91: pm: at91_pm_suspend_in_sram() must be
 8-byte aligned

commit 5fcf8d1a0e84792b2bc44922c5d833dab96a9c1e upstream.

fncpy() requires that the source and the destination are both 8-byte
aligned.

Signed-off-by: Patrick Doyle <pdoyle@irobot.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
Fixes: d94e688cae56 ("ARM: at91/pm: move the copying the sram function to the sram initialization phase")
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-at91/pm_suspend.S | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/mach-at91/pm_suspend.S b/arch/arm/mach-at91/pm_suspend.S
index 0d95f488b47a7..a25defda3d226 100644
--- a/arch/arm/mach-at91/pm_suspend.S
+++ b/arch/arm/mach-at91/pm_suspend.S
@@ -80,6 +80,8 @@ tmp2	.req	r5
  *	@r2: base address of second SDRAM Controller or 0 if not present
  *	@r3: pm information
  */
+/* at91_pm_suspend_in_sram must be 8-byte aligned per the requirements of fncpy() */
+	.align 3
 ENTRY(at91_pm_suspend_in_sram)
 	/* Save registers on stack */
 	stmfd	sp!, {r4 - r12, lr}

From f706ac7622cb87d7e3bba4704d896e2e88fd3dad Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Fri, 21 Aug 2015 14:38:38 +0200
Subject: [PATCH 026/201] ARM: dts: Add vbus regulator to USB2 phy nodes on
 exynos3250, exynos4210 and exynos4412 boards

commit 4ae9a4c66cdcb8b5d4e4d904846f1b450dbcabb4 upstream.

Exynos USB2 PHY driver now supports VBUS regulator, so add it to all
boards which have it available. This also fixes commit
7eec1266751b ("ARM: dts: Add Maxim 77693 PMIC to exynos4412-trats2"),
which added new regulators to Trats2 board, but without linking them to
the consumers.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Fixes: 7eec1266751b ("ARM: dts: Add Maxim 77693 PMIC to exynos4412-trats2")
Signed-off-by: Kukjin Kim <kgene@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/exynos3250-monk.dts           | 1 +
 arch/arm/boot/dts/exynos3250-rinato.dts         | 1 +
 arch/arm/boot/dts/exynos4210-trats.dts          | 2 +-
 arch/arm/boot/dts/exynos4210-universal_c210.dts | 2 +-
 arch/arm/boot/dts/exynos4412-trats2.dts         | 1 +
 5 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/arm/boot/dts/exynos3250-monk.dts b/arch/arm/boot/dts/exynos3250-monk.dts
index 540a0adf2be6d..35b39d2255d3f 100644
--- a/arch/arm/boot/dts/exynos3250-monk.dts
+++ b/arch/arm/boot/dts/exynos3250-monk.dts
@@ -161,6 +161,7 @@
 };
 
 &exynos_usbphy {
+	vbus-supply = <&safeout_reg>;
 	status = "okay";
 };
 
diff --git a/arch/arm/boot/dts/exynos3250-rinato.dts b/arch/arm/boot/dts/exynos3250-rinato.dts
index 41a5fafb9aa93..23623cd3ebd93 100644
--- a/arch/arm/boot/dts/exynos3250-rinato.dts
+++ b/arch/arm/boot/dts/exynos3250-rinato.dts
@@ -153,6 +153,7 @@
 
 &exynos_usbphy {
 	status = "okay";
+	vbus-supply = <&safeout_reg>;
 };
 
 &hsotg {
diff --git a/arch/arm/boot/dts/exynos4210-trats.dts b/arch/arm/boot/dts/exynos4210-trats.dts
index ba34886f8b65b..01d38f2145b97 100644
--- a/arch/arm/boot/dts/exynos4210-trats.dts
+++ b/arch/arm/boot/dts/exynos4210-trats.dts
@@ -251,6 +251,7 @@
 
 &exynos_usbphy {
 	status = "okay";
+	vbus-supply = <&safe1_sreg>;
 };
 
 &fimd {
@@ -448,7 +449,6 @@
 
 			safe1_sreg: ESAFEOUT1 {
 			     regulator-name = "SAFEOUT1";
-			     regulator-always-on;
 			};
 
 			safe2_sreg: ESAFEOUT2 {
diff --git a/arch/arm/boot/dts/exynos4210-universal_c210.dts b/arch/arm/boot/dts/exynos4210-universal_c210.dts
index eb379526e2342..2c04297825fe9 100644
--- a/arch/arm/boot/dts/exynos4210-universal_c210.dts
+++ b/arch/arm/boot/dts/exynos4210-universal_c210.dts
@@ -248,6 +248,7 @@
 
 &exynos_usbphy {
 	status = "okay";
+	vbus-supply = <&safeout1_reg>;
 };
 
 &fimd {
@@ -486,7 +487,6 @@
 
 			safeout1_reg: ESAFEOUT1 {
 				regulator-name = "SAFEOUT1";
-				regulator-always-on;
 			};
 
 			safeout2_reg: ESAFEOUT2 {
diff --git a/arch/arm/boot/dts/exynos4412-trats2.dts b/arch/arm/boot/dts/exynos4412-trats2.dts
index 2a1ebb76ebe00..50a5e8a85283c 100644
--- a/arch/arm/boot/dts/exynos4412-trats2.dts
+++ b/arch/arm/boot/dts/exynos4412-trats2.dts
@@ -391,6 +391,7 @@
 };
 
 &exynos_usbphy {
+	vbus-supply = <&esafeout1_reg>;
 	status = "okay";
 };
 

From 5a089fcb4663cffbd0fac3460272c92ccd04ee31 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Fri, 16 Oct 2015 12:32:32 -0700
Subject: [PATCH 027/201] ARM: dts: Fix WLAN regression on omap5-uevm

commit 0efc898a9bea7a2e8e583c6efab0e19dc7093078 upstream.

Commit 99f84cae43df ("ARM: dts: add wl12xx/wl18xx bindings") added
device tree bindings for the TI WLAN SDIO on many omap variants.

I recall wondering how come omap5-uevm did not have the WLAN
added and this issue has been bugging me for a while now, and
I finally tracked it down to a bad pinmux regression, and a missing
deferred probe handling for the 32k clock from palmas that's
requested by twl6040.

Basically 392adaf796b9 ("ARM: dts: omap5-evm: Add mcspi data")
added pin muxing for mcspi4 that conflicts with the onboard
WLAN. While some omap5-uevm don't have WLAN populated, the
pins are not reused for other devices. And as the SDIO bus
should be probed, let's try to enable WLAN by default.

Let's fix the regression and add the WLAN configuration as
done for the other boards in 99f84cae43df ("ARM: dts: add
wl12xx/wl18xx bindings"). And let's use the new MMC pwrseq for
the 32k clock as suggested by Javier Martinez Canillas
<javier@dowhile0.org>.

Note that without a related deferred probe fix for twl6040,
the 32k clock is not initialized if palmas-clk is a module
and twl6040 is built-in.

Let's also use the generic "non-removable" instead of the
legacy "ti,non-removable" property while at it.

And finally, note that omap5 seems to require WAKEUP_EN for
the WLAN GPIO interrupt.

Fixes: 392adaf796b9 ("ARM: dts: omap5-evm: Add mcspi data")
Cc: Sourav Poddar <sourav.poddar@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/omap5-uevm.dts | 66 ++++++++++++++++++++++++++------
 1 file changed, 55 insertions(+), 11 deletions(-)

diff --git a/arch/arm/boot/dts/omap5-uevm.dts b/arch/arm/boot/dts/omap5-uevm.dts
index 3cb030f9d2c4d..c85761212d90d 100644
--- a/arch/arm/boot/dts/omap5-uevm.dts
+++ b/arch/arm/boot/dts/omap5-uevm.dts
@@ -31,6 +31,24 @@
 		regulator-max-microvolt = <3000000>;
 	};
 
+	mmc3_pwrseq: sdhci0_pwrseq {
+		compatible = "mmc-pwrseq-simple";
+		clocks = <&clk32kgaudio>;
+		clock-names = "ext_clock";
+	};
+
+	vmmcsdio_fixed: fixedregulator-mmcsdio {
+		compatible = "regulator-fixed";
+		regulator-name = "vmmcsdio_fixed";
+		regulator-min-microvolt = <1800000>;
+		regulator-max-microvolt = <1800000>;
+		gpio = <&gpio5 12 GPIO_ACTIVE_HIGH>;	/* gpio140 WLAN_EN */
+		enable-active-high;
+		startup-delay-us = <70000>;
+		pinctrl-names = "default";
+		pinctrl-0 = <&wlan_pins>;
+	};
+
 	/* HS USB Host PHY on PORT 2 */
 	hsusb2_phy: hsusb2_phy {
 		compatible = "usb-nop-xceiv";
@@ -197,12 +215,20 @@
 		>;
 	};
 
-	mcspi4_pins: pinmux_mcspi4_pins {
+	mmc3_pins: pinmux_mmc3_pins {
+		pinctrl-single,pins = <
+			OMAP5_IOPAD(0x01a4, PIN_INPUT_PULLUP | MUX_MODE0) /* wlsdio_clk */
+			OMAP5_IOPAD(0x01a6, PIN_INPUT_PULLUP | MUX_MODE0) /* wlsdio_cmd */
+			OMAP5_IOPAD(0x01a8, PIN_INPUT_PULLUP | MUX_MODE0) /* wlsdio_data0 */
+			OMAP5_IOPAD(0x01aa, PIN_INPUT_PULLUP | MUX_MODE0) /* wlsdio_data1 */
+			OMAP5_IOPAD(0x01ac, PIN_INPUT_PULLUP | MUX_MODE0) /* wlsdio_data2 */
+			OMAP5_IOPAD(0x01ae, PIN_INPUT_PULLUP | MUX_MODE0) /* wlsdio_data3 */
+		>;
+	};
+
+	wlan_pins: pinmux_wlan_pins {
 		pinctrl-single,pins = <
-			0x164 (PIN_INPUT | MUX_MODE1)		/*  mcspi4_clk */
-			0x168 (PIN_INPUT | MUX_MODE1)		/*  mcspi4_simo */
-			0x16a (PIN_INPUT | MUX_MODE1)		/*  mcspi4_somi */
-			0x16c (PIN_INPUT | MUX_MODE1)		/*  mcspi4_cs0 */
+			OMAP5_IOPAD(0x1bc, PIN_OUTPUT | MUX_MODE6) /* mcspi1_clk.gpio5_140 */
 		>;
 	};
 
@@ -276,6 +302,12 @@
 			0x1A (PIN_OUTPUT | MUX_MODE0) /* fref_clk1_out, USB hub clk */
 		>;
 	};
+
+	wlcore_irq_pin: pinmux_wlcore_irq_pin {
+		pinctrl-single,pins = <
+			OMAP5_IOPAD(0x040, WAKEUP_EN | PIN_INPUT_PULLUP | MUX_MODE6)	/* llia_wakereqin.gpio1_wk14 */
+		>;
+	};
 };
 
 &mmc1 {
@@ -290,8 +322,25 @@
 };
 
 &mmc3 {
+	vmmc-supply = <&vmmcsdio_fixed>;
+	mmc-pwrseq = <&mmc3_pwrseq>;
 	bus-width = <4>;
-	ti,non-removable;
+	non-removable;
+	cap-power-off-card;
+	pinctrl-names = "default";
+	pinctrl-0 = <&mmc3_pins &wlcore_irq_pin>;
+	interrupts-extended = <&gic GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH
+			       &omap5_pmx_core 0x168>;
+
+	#address-cells = <1>;
+	#size-cells = <0>;
+	wlcore: wlcore@2 {
+		compatible = "ti,wl1271";
+		reg = <2>;
+		interrupt-parent = <&gpio1>;
+		interrupts = <14 IRQ_TYPE_LEVEL_HIGH>;	/* gpio 14 */
+		ref-clock-frequency = <26000000>;
+	};
 };
 
 &mmc4 {
@@ -598,11 +647,6 @@
 	pinctrl-0 = <&mcspi3_pins>;
 };
 
-&mcspi4 {
-	pinctrl-names = "default";
-	pinctrl-0 = <&mcspi4_pins>;
-};
-
 &uart1 {
 	pinctrl-names = "default";
 	pinctrl-0 = <&uart1_pins>;

From 2c45565f18b2b076669d133805ce8ec313208f73 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wens@csie.org>
Date: Sat, 10 Oct 2015 22:48:56 +0800
Subject: [PATCH 028/201] ARM: dts: sun6i: hummingbird: Fix VDD-CPU and VDD-GPU
 regulator names

commit 976d84fce6aa1e5bf92b8d06d69014ac45fd5fad upstream.

The VDD-CPU and VDD-GPU regulators were incorrectly swapped.

Fixes: bab03561224ba ("ARM: dts: sun6i: hummingbird: Add AXP221 regulator
       nodes")

Signed-off-by: Chen-Yu Tsai <wens@csie.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/sun6i-a31-hummingbird.dts | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/arm/boot/dts/sun6i-a31-hummingbird.dts b/arch/arm/boot/dts/sun6i-a31-hummingbird.dts
index d0cfadac0691d..18f26ca4e3755 100644
--- a/arch/arm/boot/dts/sun6i-a31-hummingbird.dts
+++ b/arch/arm/boot/dts/sun6i-a31-hummingbird.dts
@@ -184,18 +184,18 @@
 				regulator-name = "vcc-3v0";
 			};
 
-			vdd_cpu: dcdc2 {
+			vdd_gpu: dcdc2 {
 				regulator-always-on;
 				regulator-min-microvolt = <700000>;
 				regulator-max-microvolt = <1320000>;
-				regulator-name = "vdd-cpu";
+				regulator-name = "vdd-gpu";
 			};
 
-			vdd_gpu: dcdc3 {
+			vdd_cpu: dcdc3 {
 				regulator-always-on;
 				regulator-min-microvolt = <700000>;
 				regulator-max-microvolt = <1320000>;
-				regulator-name = "vdd-gpu";
+				regulator-name = "vdd-cpu";
 			};
 
 			vdd_sys_dll: dcdc4 {

From aaa46adc35b604f6445b795b14ed9dfb79ce9c38 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 12 Oct 2015 15:46:08 +0200
Subject: [PATCH 029/201] ARM: pxa: remove incorrect __init annotation on
 pxa27x_set_pwrmode

commit 54c09889bff6d99c8733eed4a26c9391b177c88b upstream.

The z2 machine calls pxa27x_set_pwrmode() in order to power off
the machine, but this function gets discarded early at boot because
it is marked __init, as pointed out by kbuild:

WARNING: vmlinux.o(.text+0x145c4): Section mismatch in reference from the function z2_power_off() to the function .init.text:pxa27x_set_pwrmode()
The function z2_power_off() references
the function __init pxa27x_set_pwrmode().
This is often because z2_power_off lacks a __init
annotation or the annotation of pxa27x_set_pwrmode is wrong.

This removes the __init section modifier to fix rebooting and the
build error.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: ba4a90a6d86a ("ARM: pxa/z2: fix building error of pxa27x_cpu_suspend() no longer available")
Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/mach-pxa/include/mach/pxa27x.h | 2 +-
 arch/arm/mach-pxa/pxa27x.c              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/mach-pxa/include/mach/pxa27x.h b/arch/arm/mach-pxa/include/mach/pxa27x.h
index 599b925a657c4..1a4291936c582 100644
--- a/arch/arm/mach-pxa/include/mach/pxa27x.h
+++ b/arch/arm/mach-pxa/include/mach/pxa27x.h
@@ -19,7 +19,7 @@
 #define ARB_CORE_PARK		(1<<24)	   /* Be parked with core when idle */
 #define ARB_LOCK_FLAG		(1<<23)	   /* Only Locking masters gain access to the bus */
 
-extern int __init pxa27x_set_pwrmode(unsigned int mode);
+extern int pxa27x_set_pwrmode(unsigned int mode);
 extern void pxa27x_cpu_pm_enter(suspend_state_t state);
 
 #endif /* __MACH_PXA27x_H */
diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c
index 221260d5d1092..ffc4240285577 100644
--- a/arch/arm/mach-pxa/pxa27x.c
+++ b/arch/arm/mach-pxa/pxa27x.c
@@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(pxa27x_configure_ac97reset);
  */
 static unsigned int pwrmode = PWRMODE_SLEEP;
 
-int __init pxa27x_set_pwrmode(unsigned int mode)
+int pxa27x_set_pwrmode(unsigned int mode)
 {
 	switch (mode) {
 	case PWRMODE_SLEEP:

From 9473e288c36140f5b2e0cedc9b8fbcc7443838b9 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Sun, 25 Oct 2015 23:21:42 +0100
Subject: [PATCH 030/201] MIPS: lantiq: add clk_round_rate()

commit 4e7d30dba493b60a80e9b590add1b4402265cc83 upstream.

This adds a basic implementation of clk_round_rate()
The clk_round_rate() function is called by multiple drivers and
subsystems now and the lantiq clk driver is supposed to export this,
but doesn't do so, this causes linking problems like this one:
ERROR: "clk_round_rate" [drivers/media/v4l2-core/videodev.ko] undefined!

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Acked-by: John Crispin <blogic@openwrt.org>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/11358/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/lantiq/clk.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/arch/mips/lantiq/clk.c b/arch/mips/lantiq/clk.c
index 3fc2e6d70c779..a0706fd4ce0a0 100644
--- a/arch/mips/lantiq/clk.c
+++ b/arch/mips/lantiq/clk.c
@@ -99,6 +99,23 @@ int clk_set_rate(struct clk *clk, unsigned long rate)
 }
 EXPORT_SYMBOL(clk_set_rate);
 
+long clk_round_rate(struct clk *clk, unsigned long rate)
+{
+	if (unlikely(!clk_good(clk)))
+		return 0;
+	if (clk->rates && *clk->rates) {
+		unsigned long *r = clk->rates;
+
+		while (*r && (*r != rate))
+			r++;
+		if (!*r) {
+			return clk->rate;
+		}
+	}
+	return rate;
+}
+EXPORT_SYMBOL(clk_round_rate);
+
 int clk_enable(struct clk *clk)
 {
 	if (unlikely(!clk_good(clk)))

From c6f3f16cccadfb898160bd9d2cfdb58574db4123 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Tue, 6 Oct 2015 15:12:05 +0100
Subject: [PATCH 031/201] MIPS: CDMM: Add builtin_mips_cdmm_driver() macro

commit 1b4a5ddb127caf125e14551ebd334be1acf21805 upstream.

Add helper macro builtin_mips_cdmm_driver() for builtin CDMM drivers
that don't do anything special in init and have no exit. The
module_mips_cdmm_driver() helper isn't really appropriate for drivers
that can't be built as a module.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: linux-mips@linux-mips.org
Patchwork: http://patchwork.linux-mips.org/patch/11264/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/include/asm/cdmm.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/mips/include/asm/cdmm.h b/arch/mips/include/asm/cdmm.h
index bece2064cc8cb..c06dbf8ba9370 100644
--- a/arch/mips/include/asm/cdmm.h
+++ b/arch/mips/include/asm/cdmm.h
@@ -84,6 +84,17 @@ void mips_cdmm_driver_unregister(struct mips_cdmm_driver *);
 	module_driver(__mips_cdmm_driver, mips_cdmm_driver_register, \
 			mips_cdmm_driver_unregister)
 
+/*
+ * builtin_mips_cdmm_driver() - Helper macro for drivers that don't do anything
+ * special in init and have no exit. This eliminates some boilerplate. Each
+ * driver may only use this macro once, and calling it replaces device_initcall
+ * (or in some cases, the legacy __initcall). This is meant to be a direct
+ * parallel of module_mips_cdmm_driver() above but without the __exit stuff that
+ * is not used for builtin cases.
+ */
+#define builtin_mips_cdmm_driver(__mips_cdmm_driver) \
+	builtin_driver(__mips_cdmm_driver, mips_cdmm_driver_register)
+
 /* drivers/tty/mips_ejtag_fdc.c */
 
 #ifdef CONFIG_MIPS_EJTAG_FDC_EARLYCON

From e3301b7ef826632aa792add0f6d97ed577bc2ea9 Mon Sep 17 00:00:00 2001
From: Alban Bedel <albeu@free.fr>
Date: Tue, 17 Nov 2015 09:40:07 +0100
Subject: [PATCH 032/201] MIPS: ath79: Fix the DDR control initialization on
 ar71xx and ar934x

commit 5011a7e808c9fec643d752c5a495a48f27268a48 upstream.

The DDR control initialization needs to know the SoC type, however
ath79_detect_sys_type() was called after ath79_ddr_ctrl_init().
Reverse the order to fix the DDR control initialization on ar71xx and
ar934x.

Signed-off-by: Alban Bedel <albeu@free.fr>
Cc: Felix Fietkau <nbd@openwrt.org>
Cc: Qais Yousef <qais.yousef@imgtec.com>
Cc: Andrew Bresticker <abrestic@chromium.org>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/11500/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/ath79/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c
index 1ba21204ebe02..9a00137035798 100644
--- a/arch/mips/ath79/setup.c
+++ b/arch/mips/ath79/setup.c
@@ -216,9 +216,9 @@ void __init plat_mem_setup(void)
 					   AR71XX_RESET_SIZE);
 	ath79_pll_base = ioremap_nocache(AR71XX_PLL_BASE,
 					 AR71XX_PLL_SIZE);
+	ath79_detect_sys_type();
 	ath79_ddr_ctrl_init();
 
-	ath79_detect_sys_type();
 	if (mips_machtype != ATH79_MACH_GENERIC_OF)
 		detect_memory_region(0, ATH79_MEM_SIZE_MIN, ATH79_MEM_SIZE_MAX);
 

From d169209a986eeb1313a5142e716fa0fc5ba37ad3 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Wed, 11 Nov 2015 14:21:18 +0000
Subject: [PATCH 033/201] MIPS: KVM: Fix ASID restoration logic

commit 002374f371bd02df864cce1fe85d90dc5b292837 upstream.

ASID restoration on guest resume should determine the guest execution
mode based on the guest Status register rather than bit 30 of the guest
PC.

Fix the two places in locore.S that do this, loading the guest status
from the cop0 area. Note, this assembly is specific to the trap &
emulate implementation of KVM, so it doesn't need to check the
supervisor bit as that mode is not implemented in the guest.

Fixes: b680f70fc111 ("KVM/MIPS32: Entry point for trampolining to...")
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Gleb Natapov <gleb@kernel.org>
Cc: linux-mips@linux-mips.org
Cc: kvm@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/kvm/locore.S | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/arch/mips/kvm/locore.S b/arch/mips/kvm/locore.S
index c567240386a0f..d1ee95a7f7dd8 100644
--- a/arch/mips/kvm/locore.S
+++ b/arch/mips/kvm/locore.S
@@ -165,9 +165,11 @@ FEXPORT(__kvm_mips_vcpu_run)
 
 FEXPORT(__kvm_mips_load_asid)
 	/* Set the ASID for the Guest Kernel */
-	INT_SLL	t0, t0, 1	/* with kseg0 @ 0x40000000, kernel */
-			        /* addresses shift to 0x80000000 */
-	bltz	t0, 1f		/* If kernel */
+	PTR_L	t0, VCPU_COP0(k1)
+	LONG_L	t0, COP0_STATUS(t0)
+	andi	t0, KSU_USER | ST0_ERL | ST0_EXL
+	xori	t0, KSU_USER
+	bnez	t0, 1f		/* If kernel */
 	 INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID  /* (BD)  */
 	INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID    /* else user */
 1:
@@ -482,9 +484,11 @@ __kvm_mips_return_to_guest:
 	mtc0	t0, CP0_EPC
 
 	/* Set the ASID for the Guest Kernel */
-	INT_SLL	t0, t0, 1	/* with kseg0 @ 0x40000000, kernel */
-				/* addresses shift to 0x80000000 */
-	bltz	t0, 1f		/* If kernel */
+	PTR_L	t0, VCPU_COP0(k1)
+	LONG_L	t0, COP0_STATUS(t0)
+	andi	t0, KSU_USER | ST0_ERL | ST0_EXL
+	xori	t0, KSU_USER
+	bnez	t0, 1f		/* If kernel */
 	 INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID  /* (BD)  */
 	INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID    /* else user */
 1:

From 58e3361f1425c81b3b6800b190a525d8b876df28 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Wed, 11 Nov 2015 14:21:19 +0000
Subject: [PATCH 034/201] MIPS: KVM: Fix CACHE immediate offset sign extension

commit c5c2a3b998f1ff5a586f9d37e154070b8d550d17 upstream.

The immediate field of the CACHE instruction is signed, so ensure that
it gets sign extended by casting it to an int16_t rather than just
masking the low 16 bits.

Fixes: e685c689f3a8 ("KVM/MIPS32: Privileged instruction/target branch emulation.")
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Gleb Natapov <gleb@kernel.org>
Cc: linux-mips@linux-mips.org
Cc: kvm@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/kvm/emulate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index d5fa3eaf39a10..41b1b090f56f6 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -1581,7 +1581,7 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc,
 
 	base = (inst >> 21) & 0x1f;
 	op_inst = (inst >> 16) & 0x1f;
-	offset = inst & 0xffff;
+	offset = (int16_t)inst;
 	cache = (inst >> 16) & 0x3;
 	op = (inst >> 18) & 0x7;
 

From 40b295b30ed21e0e8f5bd3d3208323dd4d53fa8d Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Wed, 11 Nov 2015 14:21:20 +0000
Subject: [PATCH 035/201] MIPS: KVM: Uninit VCPU in vcpu_create error path

commit 585bb8f9a5e592f2ce7abbe5ed3112d5438d2754 upstream.

If either of the memory allocations in kvm_arch_vcpu_create() fail, the
vcpu which has been allocated and kvm_vcpu_init'd doesn't get uninit'd
in the error handling path. Add a call to kvm_vcpu_uninit() to fix this.

Fixes: 669e846e6c4e ("KVM/MIPS32: MIPS arch specific APIs for KVM")
Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Gleb Natapov <gleb@kernel.org>
Cc: linux-mips@linux-mips.org
Cc: kvm@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/kvm/mips.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 49ff3bfc007e5..b9b803facdbf7 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -279,7 +279,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 
 	if (!gebase) {
 		err = -ENOMEM;
-		goto out_free_cpu;
+		goto out_uninit_cpu;
 	}
 	kvm_debug("Allocated %d bytes for KVM Exception Handlers @ %p\n",
 		  ALIGN(size, PAGE_SIZE), gebase);
@@ -343,6 +343,9 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 out_free_gebase:
 	kfree(gebase);
 
+out_uninit_cpu:
+	kvm_vcpu_uninit(vcpu);
+
 out_free_cpu:
 	kfree(vcpu);
 

From 2bb00c908676bd240f8a8d0d19e477874c85f124 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Thu, 8 Oct 2015 20:23:33 +0200
Subject: [PATCH 036/201] kvm: x86: set KVM_REQ_EVENT when updating IRR
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit c77f3fab441c3e466b4c3601a475fc31ce156b06 upstream.

After moving PIR to IRR, the interrupt needs to be delivered manually.

Reported-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/lapic.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 8d9013c5e1eef..ae4483a3e1a72 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -348,6 +348,8 @@ void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir)
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
 	__kvm_apic_update_irr(pir, apic->regs);
+
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
 }
 EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
 

From ff3a8601b8a4be9932c714b59392900ecb10bc84 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 19 Oct 2015 11:30:19 +0200
Subject: [PATCH 037/201] kvm: x86: zero EFER on INIT

commit 5690891bcec5fcfda38da974ffa5488e36a59811 upstream.

Not zeroing EFER means that a 32-bit firmware cannot enter paging mode
without clearing EFER.LME first (which it should not know about).
Yang Zhang from Intel confirmed that the manual is wrong and EFER is
cleared to zero on INIT.

Fixes: d28bc9dd25ce023270d2e039e7c98d38ecbf7758
Cc: Yang Z Zhang <yang.z.zhang@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/svm.c | 11 +++++------
 arch/x86/kvm/vmx.c |  3 +--
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2f9ed1ff06326..5b342a031478a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1086,7 +1086,7 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
 	return target_tsc - tsc;
 }
 
-static void init_vmcb(struct vcpu_svm *svm, bool init_event)
+static void init_vmcb(struct vcpu_svm *svm)
 {
 	struct vmcb_control_area *control = &svm->vmcb->control;
 	struct vmcb_save_area *save = &svm->vmcb->save;
@@ -1157,8 +1157,7 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event)
 	init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
 	init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
 
-	if (!init_event)
-		svm_set_efer(&svm->vcpu, 0);
+	svm_set_efer(&svm->vcpu, 0);
 	save->dr6 = 0xffff0ff0;
 	kvm_set_rflags(&svm->vcpu, 2);
 	save->rip = 0x0000fff0;
@@ -1212,7 +1211,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 		if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
 			svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
 	}
-	init_vmcb(svm, init_event);
+	init_vmcb(svm);
 
 	kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy);
 	kvm_register_write(vcpu, VCPU_REGS_RDX, eax);
@@ -1268,7 +1267,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
 	clear_page(svm->vmcb);
 	svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
 	svm->asid_generation = 0;
-	init_vmcb(svm, false);
+	init_vmcb(svm);
 
 	svm_init_osvw(&svm->vcpu);
 
@@ -1890,7 +1889,7 @@ static int shutdown_interception(struct vcpu_svm *svm)
 	 * so reinitialize it.
 	 */
 	clear_page(svm->vmcb);
-	init_vmcb(svm, false);
+	init_vmcb(svm);
 
 	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
 	return 0;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6a8bc64566abd..ceb051496eed4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4771,8 +4771,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
 	vmx_set_cr0(vcpu, cr0); /* enter rmode */
 	vmx->vcpu.arch.cr0 = cr0;
 	vmx_set_cr4(vcpu, 0);
-	if (!init_event)
-		vmx_set_efer(vcpu, 0);
+	vmx_set_efer(vcpu, 0);
 	vmx_fpu_activate(vcpu);
 	update_exception_bitmap(vcpu);
 

From bead1229f821de2ec81b5d361e1750a8e564d248 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Fri, 30 Oct 2015 16:36:24 +0100
Subject: [PATCH 038/201] KVM: x86: add read_phys to x86_emulate_ops
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 7a036a6f670f63b32c5ee126425f9109271ca13f upstream.

We want to read the physical memory when emulating RSM.

X86EMUL_IO_NEEDED is returned on all errors for consistency with other
helpers.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Tested-by: Laszlo Ersek <lersek@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/kvm_emulate.h | 10 ++++++++++
 arch/x86/kvm/x86.c                 | 10 ++++++++++
 2 files changed, 20 insertions(+)

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index e16466ec473cb..e9cd7befcb76a 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -111,6 +111,16 @@ struct x86_emulate_ops {
 			unsigned int bytes,
 			struct x86_exception *fault);
 
+	/*
+	 * read_phys: Read bytes of standard (non-emulated/special) memory.
+	 *            Used for descriptor reading.
+	 *  @addr:  [IN ] Physical address from which to read.
+	 *  @val:   [OUT] Value read from memory.
+	 *  @bytes: [IN ] Number of bytes to read from memory.
+	 */
+	int (*read_phys)(struct x86_emulate_ctxt *ctxt, unsigned long addr,
+			void *val, unsigned int bytes);
+
 	/*
 	 * write_std: Write bytes of standard (non-emulated/special) memory.
 	 *            Used for descriptor writing.
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9a9a198303219..ac10269bc25ed 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4059,6 +4059,15 @@ static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 	return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception);
 }
 
+static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
+		unsigned long addr, void *val, unsigned int bytes)
+{
+	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
+	int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes);
+
+	return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
+}
+
 int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 				       gva_t addr, void *val,
 				       unsigned int bytes,
@@ -4794,6 +4803,7 @@ static const struct x86_emulate_ops emulate_ops = {
 	.write_gpr           = emulator_write_gpr,
 	.read_std            = kvm_read_guest_virt_system,
 	.write_std           = kvm_write_guest_virt_system,
+	.read_phys           = kvm_read_guest_phys_system,
 	.fetch               = kvm_fetch_guest_virt,
 	.read_emulated       = emulator_read_emulated,
 	.write_emulated      = emulator_write_emulated,

From b6b8b23bd73762dbb662b5a5a17d1c86f708fa01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com>
Date: Fri, 30 Oct 2015 16:36:25 +0100
Subject: [PATCH 039/201] KVM: x86: handle SMBASE as physical address in RSM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit f40606b147dd5b4678cedc877a71deb520ca507e upstream.

GET_SMSTATE depends on real mode to ensure that smbase+offset is treated
as a physical address, which has already caused a bug after shuffling
the code.  Enforce physical addressing.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Reported-by: Laszlo Ersek <lersek@redhat.com>
Tested-by: Laszlo Ersek <lersek@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/emulate.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 9da95b9daf8de..b60fed56671b8 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2272,8 +2272,8 @@ static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
 #define GET_SMSTATE(type, smbase, offset)				  \
 	({								  \
 	 type __val;							  \
-	 int r = ctxt->ops->read_std(ctxt, smbase + offset, &__val,       \
-				     sizeof(__val), NULL);		  \
+	 int r = ctxt->ops->read_phys(ctxt, smbase + offset, &__val,      \
+				      sizeof(__val));			  \
 	 if (r != X86EMUL_CONTINUE)					  \
 		 return X86EMUL_UNHANDLEABLE;				  \
 	 __val;								  \
@@ -2484,8 +2484,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
 
 	/*
 	 * Get back to real mode, to prepare a safe state in which to load
-	 * CR0/CR3/CR4/EFER.  Also this will ensure that addresses passed
-	 * to read_std/write_std are not virtual.
+	 * CR0/CR3/CR4/EFER.
 	 *
 	 * CR4.PCIDE must be zero, because it is a 64-bit mode only feature.
 	 */

From 15978cabaaf026f578661f069e18344dfd487cad Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 3 Nov 2015 13:43:05 +0100
Subject: [PATCH 040/201] KVM: x86: allow RSM from 64-bit mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 89651a3decbe03754f304a0b248f27eeb9a37937 upstream.

The SDM says that exiting system management mode from 64-bit mode
is invalid, but that would be too good to be true.  But actually,
most of the code is already there to support exiting from compat
mode (EFER.LME=1, EFER.LMA=0).  Getting all the way from 64-bit
mode to real mode only requires clearing CS.L and CR4.PCIDE.

Fixes: 660a5d517aaab9187f93854425c4c63f4a09195c
Tested-by: Laszlo Ersek <lersek@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/emulate.c | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index b60fed56671b8..1505587d06e97 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2484,16 +2484,36 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
 
 	/*
 	 * Get back to real mode, to prepare a safe state in which to load
-	 * CR0/CR3/CR4/EFER.
-	 *
-	 * CR4.PCIDE must be zero, because it is a 64-bit mode only feature.
+	 * CR0/CR3/CR4/EFER.  It's all a bit more complicated if the vCPU
+	 * supports long mode.
 	 */
+	cr4 = ctxt->ops->get_cr(ctxt, 4);
+	if (emulator_has_longmode(ctxt)) {
+		struct desc_struct cs_desc;
+
+		/* Zero CR4.PCIDE before CR0.PG.  */
+		if (cr4 & X86_CR4_PCIDE) {
+			ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
+			cr4 &= ~X86_CR4_PCIDE;
+		}
+
+		/* A 32-bit code segment is required to clear EFER.LMA.  */
+		memset(&cs_desc, 0, sizeof(cs_desc));
+		cs_desc.type = 0xb;
+		cs_desc.s = cs_desc.g = cs_desc.p = 1;
+		ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
+	}
+
+	/* For the 64-bit case, this will clear EFER.LMA.  */
 	cr0 = ctxt->ops->get_cr(ctxt, 0);
 	if (cr0 & X86_CR0_PE)
 		ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
-	cr4 = ctxt->ops->get_cr(ctxt, 4);
+
+	/* Now clear CR4.PAE (which must be done before clearing EFER.LME).  */
 	if (cr4 & X86_CR4_PAE)
 		ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
+
+	/* And finally go back to 32-bit mode.  */
 	efer = 0;
 	ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
 
@@ -4454,7 +4474,7 @@ static const struct opcode twobyte_table[256] = {
 	F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N,
 	/* 0xA8 - 0xAF */
 	I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg),
-	II(No64 | EmulateOnUD | ImplicitOps, em_rsm, rsm),
+	II(EmulateOnUD | ImplicitOps, em_rsm, rsm),
 	F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts),
 	F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd),
 	F(DstMem | SrcReg | Src2CL | ModRM, em_shrd),

From 1e143e741de2129c896e49eed917b8ff1d6c65bf Mon Sep 17 00:00:00 2001
From: Laszlo Ersek <lersek@redhat.com>
Date: Wed, 4 Nov 2015 12:54:41 +0100
Subject: [PATCH 041/201] KVM: x86: obey KVM_X86_QUIRK_CD_NW_CLEARED in
 kvm_set_cr0()

commit 879ae1880449c88db11c1ebdaedc2da79b2fe73f upstream.

Commit b18d5431acc7 ("KVM: x86: fix CR0.CD virtualization") was
technically correct, but it broke OVMF guests by slowing down various
parts of the firmware.

Commit fb279950ba02 ("KVM: vmx: obey KVM_QUIRK_CD_NW_CLEARED") quirked the
first function modified by b18d5431acc7, vmx_get_mt_mask(), for OVMF's
sake. This restored the speed of the OVMF code that runs before
PlatformPei (including the memory intensive LZMA decompression in SEC).

This patch extends the quirk to the second function modified by
b18d5431acc7, kvm_set_cr0(). It eliminates the intrusive slowdown that
hits the EFI_MP_SERVICES_PROTOCOL implementation of edk2's
UefiCpuPkg/CpuDxe -- which is built into OVMF --, when CpuDxe starts up
all APs at once for initialization, in order to count them.

We also carry over the kvm_arch_has_noncoherent_dma() sub-condition from
the other half of the original commit b18d5431acc7.

Fixes: b18d5431acc7a2fd22767925f3a6f597aa4bd29e
Cc: Jordan Justen <jordan.l.justen@intel.com>
Cc: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Tested-by: Janusz Mocek <januszmk6@gmail.com>
Signed-off-by: Laszlo Ersek <lersek@redhat.com>#
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kvm/x86.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ac10269bc25ed..43609af032833 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -622,7 +622,9 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	if ((cr0 ^ old_cr0) & update_bits)
 		kvm_mmu_reset_context(vcpu);
 
-	if ((cr0 ^ old_cr0) & X86_CR0_CD)
+	if (((cr0 ^ old_cr0) & X86_CR0_CD) &&
+	    kvm_arch_has_noncoherent_dma(vcpu->kvm) &&
+	    !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED))
 		kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL);
 
 	return 0;

From 971cdbf756b5e29427ca8d94e0df3a25a66a7ed8 Mon Sep 17 00:00:00 2001
From: Eric Northup <digitaleric@google.com>
Date: Tue, 3 Nov 2015 18:03:53 +0100
Subject: [PATCH 042/201] KVM: x86: work around infinite loop in microcode when
 #AC is delivered

commit 54a20552e1eae07aa240fa370a0293e006b5faed upstream.

It was found that a guest can DoS a host by triggering an infinite
stream of "alignment check" (#AC) exceptions.  This causes the
microcode to enter an infinite loop where the core never receives
another interrupt.  The host kernel panics pretty quickly due to the
effects (CVE-2015-5307).

Signed-off-by: Eric Northup <digitaleric@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/uapi/asm/svm.h | 1 +
 arch/x86/kvm/svm.c              | 8 ++++++++
 arch/x86/kvm/vmx.c              | 5 ++++-
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h
index b5d7640abc5d6..8a4add8e46393 100644
--- a/arch/x86/include/uapi/asm/svm.h
+++ b/arch/x86/include/uapi/asm/svm.h
@@ -100,6 +100,7 @@
 	{ SVM_EXIT_EXCP_BASE + UD_VECTOR,       "UD excp" }, \
 	{ SVM_EXIT_EXCP_BASE + PF_VECTOR,       "PF excp" }, \
 	{ SVM_EXIT_EXCP_BASE + NM_VECTOR,       "NM excp" }, \
+	{ SVM_EXIT_EXCP_BASE + AC_VECTOR,       "AC excp" }, \
 	{ SVM_EXIT_EXCP_BASE + MC_VECTOR,       "MC excp" }, \
 	{ SVM_EXIT_INTR,        "interrupt" }, \
 	{ SVM_EXIT_NMI,         "nmi" }, \
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 5b342a031478a..d7f89387ba0c9 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1107,6 +1107,7 @@ static void init_vmcb(struct vcpu_svm *svm)
 	set_exception_intercept(svm, PF_VECTOR);
 	set_exception_intercept(svm, UD_VECTOR);
 	set_exception_intercept(svm, MC_VECTOR);
+	set_exception_intercept(svm, AC_VECTOR);
 
 	set_intercept(svm, INTERCEPT_INTR);
 	set_intercept(svm, INTERCEPT_NMI);
@@ -1795,6 +1796,12 @@ static int ud_interception(struct vcpu_svm *svm)
 	return 1;
 }
 
+static int ac_interception(struct vcpu_svm *svm)
+{
+	kvm_queue_exception_e(&svm->vcpu, AC_VECTOR, 0);
+	return 1;
+}
+
 static void svm_fpu_activate(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -3370,6 +3377,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
 	[SVM_EXIT_EXCP_BASE + PF_VECTOR]	= pf_interception,
 	[SVM_EXIT_EXCP_BASE + NM_VECTOR]	= nm_interception,
 	[SVM_EXIT_EXCP_BASE + MC_VECTOR]	= mc_interception,
+	[SVM_EXIT_EXCP_BASE + AC_VECTOR]	= ac_interception,
 	[SVM_EXIT_INTR]				= intr_interception,
 	[SVM_EXIT_NMI]				= nmi_interception,
 	[SVM_EXIT_SMI]				= nop_on_interception,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index ceb051496eed4..343d3692dd65d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1567,7 +1567,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 	u32 eb;
 
 	eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
-	     (1u << NM_VECTOR) | (1u << DB_VECTOR);
+	     (1u << NM_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR);
 	if ((vcpu->guest_debug &
 	     (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
 	    (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
@@ -5103,6 +5103,9 @@ static int handle_exception(struct kvm_vcpu *vcpu)
 		return handle_rmode_exception(vcpu, ex_no, error_code);
 
 	switch (ex_no) {
+	case AC_VECTOR:
+		kvm_queue_exception_e(vcpu, AC_VECTOR, error_code);
+		return 1;
 	case DB_VECTOR:
 		dr6 = vmcs_readl(EXIT_QUALIFICATION);
 		if (!(vcpu->guest_debug &

From 268ad525ea7faf13437b29f564e681e885d07b60 Mon Sep 17 00:00:00 2001
From: Krzysztof Mazur <krzysiek@podlesie.net>
Date: Fri, 6 Nov 2015 14:18:36 +0100
Subject: [PATCH 043/201] x86/setup: Fix low identity map for >= 2GB kernel
 range

commit 68accac392d859d24adcf1be3a90e41f978bd54c upstream.

The commit f5f3497cad8c extended the low identity mapping. However, if
the kernel uses more than 2 GB (VMSPLIT_2G_OPT or VMSPLIT_1G memory
split), the normal memory mapping is overwritten by the low identity
mapping causing a crash. To avoid overwritting, limit the low identity
map to cover only memory before kernel range (PAGE_OFFSET).

Fixes: f5f3497cad8c "x86/setup: Extend low identity map to cover whole kernel range
Signed-off-by: Krzysztof Mazur <krzysiek@podlesie.net>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Laszlo Ersek <lersek@redhat.com>
Cc: Matt Fleming <matt.fleming@intel.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Link: http://lkml.kernel.org/r/1446815916-22105-1-git-send-email-krzysiek@podlesie.net
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index a3cccbfc5f777..37c8ea8e75ffd 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1180,7 +1180,7 @@ void __init setup_arch(char **cmdline_p)
 	 */
 	clone_pgd_range(initial_page_table,
 			swapper_pg_dir     + KERNEL_PGD_BOUNDARY,
-			KERNEL_PGD_PTRS);
+			min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
 #endif
 
 	tboot_probe();

From abb92fbf9750341af0425f638d361d7c06b36c49 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 3 Nov 2015 10:40:14 +0100
Subject: [PATCH 044/201] x86/irq: Probe for PIC presence before allocating
 descs for legacy IRQs

commit 8c058b0b9c34d8c8d7912880956543769323e2d8 upstream.

Commit d32932d02e18 ("x86/irq: Convert IOAPIC to use hierarchical irqdomain
interfaces") brought a regression for Hyper-V Gen2 instances. These
instances don't have i8259 legacy PIC but they use legacy IRQs for serial
port, rtc, and acpi. With this commit included we end up with these IRQs
not initialized. Earlier, there was a special workaround for legacy IRQs
in mp_map_pin_to_irq() doing mp_irqdomain_map() without looking at
nr_legacy_irqs() and now we fail in __irq_domain_alloc_irqs() when
irq_domain_alloc_descs() returns -EEXIST.

The essence of the issue seems to be that early_irq_init() calls
arch_probe_nr_irqs() to figure out the number of legacy IRQs before
we probe for i8259 and gets 16. Later when init_8259A() is called we switch
to NULL legacy PIC and nr_legacy_irqs() starts to return 0 but we already
have 16 descs allocated.

Solve the issue by separating i8259 probe from init and calling it in
arch_probe_nr_irqs() before we actually use nr_legacy_irqs() information.

Fixes: d32932d02e18 ("x86/irq: Convert IOAPIC to use hierarchical irqdomain interfaces")
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Link: http://lkml.kernel.org/r/1446543614-3621-1-git-send-email-vkuznets@redhat.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/include/asm/i8259.h  |  1 +
 arch/x86/kernel/apic/vector.c |  6 +++++-
 arch/x86/kernel/i8259.c       | 29 +++++++++++++++++++++--------
 3 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index ccffa53750a89..39bcefc20de73 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -60,6 +60,7 @@ struct legacy_pic {
 	void (*mask_all)(void);
 	void (*restore_mask)(void);
 	void (*init)(int auto_eoi);
+	int (*probe)(void);
 	int (*irq_pending)(unsigned int irq);
 	void (*make_irq)(unsigned int irq);
 };
diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 836d11b92811c..861bc59c8f256 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -361,7 +361,11 @@ int __init arch_probe_nr_irqs(void)
 	if (nr < nr_irqs)
 		nr_irqs = nr;
 
-	return nr_legacy_irqs();
+	/*
+	 * We don't know if PIC is present at this point so we need to do
+	 * probe() to get the right number of legacy IRQs.
+	 */
+	return legacy_pic->probe();
 }
 
 #ifdef	CONFIG_X86_IO_APIC
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 16cb827a5b277..be22f5a2192e3 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -295,16 +295,11 @@ static void unmask_8259A(void)
 	raw_spin_unlock_irqrestore(&i8259A_lock, flags);
 }
 
-static void init_8259A(int auto_eoi)
+static int probe_8259A(void)
 {
 	unsigned long flags;
 	unsigned char probe_val = ~(1 << PIC_CASCADE_IR);
 	unsigned char new_val;
-
-	i8259A_auto_eoi = auto_eoi;
-
-	raw_spin_lock_irqsave(&i8259A_lock, flags);
-
 	/*
 	 * Check to see if we have a PIC.
 	 * Mask all except the cascade and read
@@ -312,16 +307,28 @@ static void init_8259A(int auto_eoi)
 	 * have a PIC, we will read 0xff as opposed to the
 	 * value we wrote.
 	 */
+	raw_spin_lock_irqsave(&i8259A_lock, flags);
+
 	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-2 */
 	outb(probe_val, PIC_MASTER_IMR);
 	new_val = inb(PIC_MASTER_IMR);
 	if (new_val != probe_val) {
 		printk(KERN_INFO "Using NULL legacy PIC\n");
 		legacy_pic = &null_legacy_pic;
-		raw_spin_unlock_irqrestore(&i8259A_lock, flags);
-		return;
 	}
 
+	raw_spin_unlock_irqrestore(&i8259A_lock, flags);
+	return nr_legacy_irqs();
+}
+
+static void init_8259A(int auto_eoi)
+{
+	unsigned long flags;
+
+	i8259A_auto_eoi = auto_eoi;
+
+	raw_spin_lock_irqsave(&i8259A_lock, flags);
+
 	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
 
 	/*
@@ -379,6 +386,10 @@ static int legacy_pic_irq_pending_noop(unsigned int irq)
 {
 	return 0;
 }
+static int legacy_pic_probe(void)
+{
+	return 0;
+}
 
 struct legacy_pic null_legacy_pic = {
 	.nr_legacy_irqs = 0,
@@ -388,6 +399,7 @@ struct legacy_pic null_legacy_pic = {
 	.mask_all = legacy_pic_noop,
 	.restore_mask = legacy_pic_noop,
 	.init = legacy_pic_int_noop,
+	.probe = legacy_pic_probe,
 	.irq_pending = legacy_pic_irq_pending_noop,
 	.make_irq = legacy_pic_uint_noop,
 };
@@ -400,6 +412,7 @@ struct legacy_pic default_legacy_pic = {
 	.mask_all = mask_8259A,
 	.restore_mask = unmask_8259A,
 	.init = init_8259A,
+	.probe = probe_8259A,
 	.irq_pending = i8259A_irq_pending,
 	.make_irq = make_8259A_irq,
 };

From 58c9f67c37c1591544c06581d820f690d740ef45 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Thu, 5 Nov 2015 16:57:56 +0100
Subject: [PATCH 045/201] x86/cpu: Call verify_cpu() after having entered long
 mode too

commit 04633df0c43d710e5f696b06539c100898678235 upstream.

When we get loaded by a 64-bit bootloader, kernel entry point is
startup_64 in head_64.S. We don't trust any and all bootloaders because
some will fiddle with CPU configuration so we go ahead and massage each
CPU into sanity again.

For example, some dell BIOSes have this XD disable feature which set
IA32_MISC_ENABLE[34] and disable NX. This might be some dumb workaround
for other OSes but Linux sure doesn't need it.

A similar thing is present in the Surface 3 firmware - see
https://bugzilla.kernel.org/show_bug.cgi?id=106051 - which sets this bit
only on the BSP:

  # rdmsr -a 0x1a0
  400850089
  850089
  850089
  850089

I know, right?!

There's not even an off switch in there.

So fix all those cases by sanitizing the 64-bit entry point too. For
that, make verify_cpu() callable in 64-bit mode also.

Requested-and-debugged-by: "H. Peter Anvin" <hpa@zytor.com>
Reported-and-tested-by: Bastien Nocera <bugzilla@hadess.net>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Matt Fleming <matt@codeblueprint.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1446739076-21303-1-git-send-email-bp@alien8.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/head_64.S    |  8 ++++++++
 arch/x86/kernel/verify_cpu.S | 12 +++++++-----
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 1d40ca8a73f27..ffdc0e8603902 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -65,6 +65,9 @@ startup_64:
 	 * tables and then reload them.
 	 */
 
+	/* Sanitize CPU configuration */
+	call verify_cpu
+
 	/*
 	 * Compute the delta between the address I am compiled to run at and the
 	 * address I am actually running at.
@@ -174,6 +177,9 @@ ENTRY(secondary_startup_64)
 	 * after the boot processor executes this code.
 	 */
 
+	/* Sanitize CPU configuration */
+	call verify_cpu
+
 	movq	$(init_level4_pgt - __START_KERNEL_map), %rax
 1:
 
@@ -288,6 +294,8 @@ ENTRY(secondary_startup_64)
 	pushq	%rax		# target address in negative space
 	lretq
 
+#include "verify_cpu.S"
+
 #ifdef CONFIG_HOTPLUG_CPU
 /*
  * Boot CPU0 entry point. It's called from play_dead(). Everything has been set
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
index b9242bacbe59a..4cf401f581e7e 100644
--- a/arch/x86/kernel/verify_cpu.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -34,10 +34,11 @@
 #include <asm/msr-index.h>
 
 verify_cpu:
-	pushfl				# Save caller passed flags
-	pushl	$0			# Kill any dangerous flags
-	popfl
+	pushf				# Save caller passed flags
+	push	$0			# Kill any dangerous flags
+	popf
 
+#ifndef __x86_64__
 	pushfl				# standard way to check for cpuid
 	popl	%eax
 	movl	%eax,%ebx
@@ -48,6 +49,7 @@ verify_cpu:
 	popl	%eax
 	cmpl	%eax,%ebx
 	jz	verify_cpu_no_longmode	# cpu has no cpuid
+#endif
 
 	movl	$0x0,%eax		# See if cpuid 1 is implemented
 	cpuid
@@ -130,10 +132,10 @@ verify_cpu_sse_test:
 	jmp	verify_cpu_sse_test	# try again
 
 verify_cpu_no_longmode:
-	popfl				# Restore caller passed flags
+	popf				# Restore caller passed flags
 	movl $1,%eax
 	ret
 verify_cpu_sse_ok:
-	popfl				# Restore caller passed flags
+	popf				# Restore caller passed flags
 	xorl %eax, %eax
 	ret

From 905095d7916c3fab9ea86f0eb55e4a6b65713f5e Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Wed, 3 Jun 2015 10:31:14 +0100
Subject: [PATCH 046/201] x86/cpu: Fix SMAP check in PVOPS environments

commit 581b7f158fe0383b492acd1ce3fb4e99d4e57808 upstream.

There appears to be no formal statement of what pv_irq_ops.save_fl() is
supposed to return precisely.  Native returns the full flags, while lguest and
Xen only return the Interrupt Flag, and both have comments by the
implementations stating that only the Interrupt Flag is looked at.  This may
have been true when initially implemented, but no longer is.

To make matters worse, the Xen PVOP leaves the upper bits undefined, making
the BUG_ON() undefined behaviour.  Experimentally, this now trips for 32bit PV
guests on Broadwell hardware.  The BUG_ON() is consistent for an individual
build, but not consistent for all builds.  It has also been a sitting timebomb
since SMAP support was introduced.

Use native_save_fl() instead, which will obtain an accurate view of the AC
flag.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Tested-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: <lguest@lists.ozlabs.org>
Cc: Xen-devel <xen-devel@lists.xen.org>
Link: http://lkml.kernel.org/r/1433323874-6927-1-git-send-email-andrew.cooper3@citrix.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/common.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index de22ea7ff82f9..1a292573ddf79 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -273,10 +273,9 @@ __setup("nosmap", setup_disable_smap);
 
 static __always_inline void setup_smap(struct cpuinfo_x86 *c)
 {
-	unsigned long eflags;
+	unsigned long eflags = native_save_fl();
 
 	/* This should have been cleared long ago */
-	raw_local_save_flags(eflags);
 	BUG_ON(eflags & X86_EFLAGS_AC);
 
 	if (cpu_has(c, X86_FEATURE_SMAP)) {

From 2062c50207c0a0624d459d56356f5aaf51323eb2 Mon Sep 17 00:00:00 2001
From: Huaitong Han <huaitong.han@intel.com>
Date: Fri, 6 Nov 2015 17:00:23 +0800
Subject: [PATCH 047/201] x86/fpu: Fix get_xsave_addr() behavior under
 virtualization

commit a05917b6ba9dc9a95fc42bdcbe3a875e8ad83935 upstream.

KVM uses the get_xsave_addr() function in a different fashion from
the native kernel, in that the 'xsave' parameter belongs to guest vcpu,
not the currently running task.

But 'xsave' is replaced with current task's (host) xsave structure, so
get_xsave_addr() will incorrectly return the bad xsave address to KVM.

Fix it so that the passed in 'xsave' address is used - as intended
originally.

Signed-off-by: Huaitong Han <huaitong.han@intel.com>
Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dave.hansen@intel.com
Link: http://lkml.kernel.org/r/1446800423-21622-1-git-send-email-huaitong.han@intel.com
[ Tidied up the changelog. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/fpu/xstate.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 62fc001c7846d..2c4ac072a7023 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -402,7 +402,6 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
 	if (!boot_cpu_has(X86_FEATURE_XSAVE))
 		return NULL;
 
-	xsave = &current->thread.fpu.state.xsave;
 	/*
 	 * We should not ever be requesting features that we
 	 * have not enabled.  Remember that pcntxt_mask is

From 44a06ce07b02abb8a0b5c03e055dae3613fce390 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Tue, 10 Nov 2015 16:23:54 -0800
Subject: [PATCH 048/201] x86/fpu: Fix 32-bit signal frame handling

commit ab6b52947545a5355154f64f449f97af9d05845f upstream.

(This should have gone to LKML originally. Sorry for the extra
 noise, folks on the cc.)

Background:

Signal frames on x86 have two formats:

  1. For 32-bit executables (whether on a real 32-bit kernel or
     under 32-bit emulation on a 64-bit kernel) we have a
    'fpregset_t' that includes the "FSAVE" registers.

  2. For 64-bit executables (on 64-bit kernels obviously), the
     'fpregset_t' is smaller and does not contain the "FSAVE"
     state.

When creating the signal frame, we have to be aware of whether
we are running a 32 or 64-bit executable so we create the
correct format signal frame.

Problem:

save_xstate_epilog() uses 'fx_sw_reserved_ia32' whenever it is
called for a 32-bit executable.  This is for real 32-bit and
ia32 emulation.

But, fpu__init_prepare_fx_sw_frame() only initializes
'fx_sw_reserved_ia32' when emulation is enabled, *NOT* for real
32-bit kernels.

This leads to really wierd situations where 32-bit programs
lose their extended state when returning from a signal handler.
The kernel copies the uninitialized (zero) 'fx_sw_reserved_ia32'
out to userspace in save_xstate_epilog().  But when returning
from the signal, the kernel errors out in check_for_xstate()
when it does not see FP_XSTATE_MAGIC1 present (because it was
zeroed).  This leads to the FPU/XSAVE state being initialized.

For MPX, this leads to the most permissive state and means we
silently lose bounds violations.  I think this would also mean
that we could lose *ANY* FPU/SSE/AVX state.  I'm not sure why
no one has spotted this bug.

I believe this was broken by:

	72a671ced66d ("x86, fpu: Unify signal handling code paths for x86 and x86_64 kernels")

way back in 2012.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: dave@sr71.net
Cc: fenghua.yu@intel.com
Cc: yu-cheng.yu@intel.com
Link: http://lkml.kernel.org/r/20151111002354.A0799571@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/fpu/signal.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index 50ec9af1bd518..6545e6ddbfb1b 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -385,20 +385,19 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame,
  */
 void fpu__init_prepare_fx_sw_frame(void)
 {
-	int fsave_header_size = sizeof(struct fregs_state);
 	int size = xstate_size + FP_XSTATE_MAGIC2_SIZE;
 
-	if (config_enabled(CONFIG_X86_32))
-		size += fsave_header_size;
-
 	fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
 	fx_sw_reserved.extended_size = size;
 	fx_sw_reserved.xfeatures = xfeatures_mask;
 	fx_sw_reserved.xstate_size = xstate_size;
 
-	if (config_enabled(CONFIG_IA32_EMULATION)) {
+	if (config_enabled(CONFIG_IA32_EMULATION) ||
+	    config_enabled(CONFIG_X86_32)) {
+		int fsave_header_size = sizeof(struct fregs_state);
+
 		fx_sw_reserved_ia32 = fx_sw_reserved;
-		fx_sw_reserved_ia32.extended_size += fsave_header_size;
+		fx_sw_reserved_ia32.extended_size = size + fsave_header_size;
 	}
 }
 

From 10ec42fc4ed17919a154c3f6448cb3db4bbeed6d Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Wed, 11 Nov 2015 10:19:31 -0800
Subject: [PATCH 049/201] x86/mpx: Do proper get_user() when running 32-bit
 binaries on 64-bit kernels

commit 46561c3959d6307d22139c24cd0bf196162e5681 upstream.

When you call get_user(foo, bar), you effectively do a

	copy_from_user(&foo, bar, sizeof(*bar));

Note that the sizeof() is implicit.

When we reach out to userspace to try to zap an entire "bounds
table" we need to go read a "bounds directory entry" in order to
locate the table's address.  The size of a "directory entry"
depends on the binary being run and is always the size of a
pointer.

But, when we have a 64-bit kernel and a 32-bit application, the
directory entry is still only 32-bits long, but we fetch it with
a 64-bit pointer which makes get_user() does a 64-bit fetch.
Reading 4 extra bytes isn't harmful, unless we are at the end of
and run off the table.  It might also cause the zero page to get
faulted in unnecessarily even if you are not at the end.

Fix it up by doing a special 32-bit get_user() via a cast when
we have 32-bit userspace.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20151111181931.3ACF6822@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/mm/mpx.c | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index 134948b0926f5..e9d04ff749d15 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -584,6 +584,29 @@ static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm,
 	return bt_addr;
 }
 
+/*
+ * We only want to do a 4-byte get_user() on 32-bit.  Otherwise,
+ * we might run off the end of the bounds table if we are on
+ * a 64-bit kernel and try to get 8 bytes.
+ */
+int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret,
+		long __user *bd_entry_ptr)
+{
+	u32 bd_entry_32;
+	int ret;
+
+	if (is_64bit_mm(mm))
+		return get_user(*bd_entry_ret, bd_entry_ptr);
+
+	/*
+	 * Note that get_user() uses the type of the *pointer* to
+	 * establish the size of the get, not the destination.
+	 */
+	ret = get_user(bd_entry_32, (u32 __user *)bd_entry_ptr);
+	*bd_entry_ret = bd_entry_32;
+	return ret;
+}
+
 /*
  * Get the base of bounds tables pointed by specific bounds
  * directory entry.
@@ -604,7 +627,7 @@ static int get_bt_addr(struct mm_struct *mm,
 		int need_write = 0;
 
 		pagefault_disable();
-		ret = get_user(bd_entry, bd_entry_ptr);
+		ret = get_user_bd_entry(mm, &bd_entry, bd_entry_ptr);
 		pagefault_enable();
 		if (!ret)
 			break;

From 7631e903a6bd10e1eba646393991d36501382511 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Wed, 11 Nov 2015 10:19:34 -0800
Subject: [PATCH 050/201] x86/mpx: Fix 32-bit address space calculation

commit f3119b830264d89d216bfb378ab65065dffa02d9 upstream.

I received a bug report that running 32-bit MPX binaries on
64-bit kernels was broken.  I traced it down to this little code
snippet.  We were switching our "number of bounds directory
entries" calculation correctly.  But, we didn't switch the other
side of the calculation: the virtual space size.

This meant that we were calculating an absurd size for
bd_entry_virt_space() on 32-bit because we used the 64-bit
virt_space.

This was _also_ broken for 32-bit kernels running on 64-bit
hardware since boot_cpu_data.x86_virt_bits=48 even when running
in 32-bit mode.

Correct that and properly handle all 3 possible cases:

 1. 32-bit binary on 64-bit kernel
 2. 64-bit binary on 64-bit kernel
 3. 32-bit binary on 32-bit kernel

This manifested in having bounds tables not properly unmapped.
It "leaked" memory but had no functional impact otherwise.

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20151111181934.FA7FAC34@viggo.jf.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/mm/mpx.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index e9d04ff749d15..71fc79a58a15a 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -722,11 +722,23 @@ static unsigned long mpx_get_bt_entry_offset_bytes(struct mm_struct *mm,
  */
 static inline unsigned long bd_entry_virt_space(struct mm_struct *mm)
 {
-	unsigned long long virt_space = (1ULL << boot_cpu_data.x86_virt_bits);
-	if (is_64bit_mm(mm))
-		return virt_space / MPX_BD_NR_ENTRIES_64;
-	else
-		return virt_space / MPX_BD_NR_ENTRIES_32;
+	unsigned long long virt_space;
+	unsigned long long GB = (1ULL << 30);
+
+	/*
+	 * This covers 32-bit emulation as well as 32-bit kernels
+	 * running on 64-bit harware.
+	 */
+	if (!is_64bit_mm(mm))
+		return (4ULL * GB) / MPX_BD_NR_ENTRIES_32;
+
+	/*
+	 * 'x86_virt_bits' returns what the hardware is capable
+	 * of, and returns the full >32-bit adddress space when
+	 * running 32-bit kernels on 64-bit hardware.
+	 */
+	virt_space = (1ULL << boot_cpu_data.x86_virt_bits);
+	return virt_space / MPX_BD_NR_ENTRIES_64;
 }
 
 /*

From 328a2865479c18995902c9be3ff754cd4a91845d Mon Sep 17 00:00:00 2001
From: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Date: Sun, 25 Oct 2015 10:59:38 +0200
Subject: [PATCH 051/201] mac80211: Fix local deauth while associating

commit a64cba3c5330704a034bd3179270b8d04daf6987 upstream.

Local request to deauthenticate wasn't handled while associating, thus
the association could continue even when the user space required to
disconnect.

Signed-off-by: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/mac80211/mlme.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index cd7e55e08a238..af86975b35daa 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -5028,6 +5028,25 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 		return 0;
 	}
 
+	if (ifmgd->assoc_data &&
+	    ether_addr_equal(ifmgd->assoc_data->bss->bssid, req->bssid)) {
+		sdata_info(sdata,
+			   "aborting association with %pM by local choice (Reason: %u=%s)\n",
+			   req->bssid, req->reason_code,
+			   ieee80211_get_reason_code_string(req->reason_code));
+
+		drv_mgd_prepare_tx(sdata->local, sdata);
+		ieee80211_send_deauth_disassoc(sdata, req->bssid,
+					       IEEE80211_STYPE_DEAUTH,
+					       req->reason_code, tx,
+					       frame_buf);
+		ieee80211_destroy_assoc_data(sdata, false);
+		ieee80211_report_disconnect(sdata, frame_buf,
+					    sizeof(frame_buf), true,
+					    req->reason_code);
+		return 0;
+	}
+
 	if (ifmgd->associated &&
 	    ether_addr_equal(ifmgd->associated->bssid, req->bssid)) {
 		sdata_info(sdata,

From 85eeb5c5ad3d97ea3e59221b4237881b3a5d5731 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 28 Aug 2015 10:52:53 +0200
Subject: [PATCH 052/201] mac80211: fix driver RSSI event calculations

commit 8ec6d97871f37e4743678ea4a455bd59580aa0f4 upstream.

The ifmgd->ave_beacon_signal value cannot be taken as is for
comparisons, it must be divided by since it's represented
like that for better accuracy of the EWMA calculations. This
would lead to invalid driver RSSI events. Fix the used value.

Fixes: 615f7b9bb1f8 ("mac80211: add driver RSSI threshold events")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/mac80211/mlme.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index af86975b35daa..d011bc5391975 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3391,7 +3391,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 
 	if (ifmgd->rssi_min_thold != ifmgd->rssi_max_thold &&
 	    ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT) {
-		int sig = ifmgd->ave_beacon_signal;
+		int sig = ifmgd->ave_beacon_signal / 16;
 		int last_sig = ifmgd->last_ave_beacon_signal;
 		struct ieee80211_event event = {
 			.type = RSSI_EVENT,

From db96a3cfca19fd71caab2e88bc2e455da52d654e Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Sun, 25 Oct 2015 10:59:41 +0200
Subject: [PATCH 053/201] mac80211: allow null chandef in tracing

commit 254d3dfe445f94a764e399ca12e04365ac9413ed upstream.

In TDLS channel-switch operations the chandef can sometimes be NULL.
Avoid an oops in the trace code for these cases and just print a
chandef full of zeros.

Fixes: a7a6bdd0670fe ("mac80211: introduce TDLS channel switch ops")
Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/mac80211/trace.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 6f14591d8ca9e..0b13bfa6f32fe 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -33,11 +33,11 @@
 			__field(u32, chan_width)					\
 			__field(u32, center_freq1)					\
 			__field(u32, center_freq2)
-#define CHANDEF_ASSIGN(c)								\
-			__entry->control_freq = (c)->chan ? (c)->chan->center_freq : 0;	\
-			__entry->chan_width = (c)->width;				\
-			__entry->center_freq1 = (c)->center_freq1;			\
-			__entry->center_freq2 = (c)->center_freq2;
+#define CHANDEF_ASSIGN(c)							\
+			__entry->control_freq = (c) ? ((c)->chan ? (c)->chan->center_freq : 0) : 0;	\
+			__entry->chan_width = (c) ? (c)->width : 0;			\
+			__entry->center_freq1 = (c) ? (c)->center_freq1 : 0;		\
+			__entry->center_freq2 = (c) ? (c)->center_freq2 : 0;
 #define CHANDEF_PR_FMT	" control:%d MHz width:%d center: %d/%d MHz"
 #define CHANDEF_PR_ARG	__entry->control_freq, __entry->chan_width,			\
 			__entry->center_freq1, __entry->center_freq2

From 901b9f45117c6de411ed6ed8a1e08304a1bfe869 Mon Sep 17 00:00:00 2001
From: "Janusz.Dziedzic@tieto.com" <Janusz.Dziedzic@tieto.com>
Date: Tue, 27 Oct 2015 08:35:11 +0100
Subject: [PATCH 054/201] mac80211: fix divide by zero when NOA update

commit 519ee6918b91abdc4bc9720deae17599a109eb40 upstream.

In case of one shot NOA the interval can be 0, catch that
instead of potentially (depending on the driver) crashing
like this:

divide error: 0000 [#1] SMP
[...]
Call Trace:
<IRQ>
[<ffffffffc08e891c>] ieee80211_extend_absent_time+0x6c/0xb0 [mac80211]
[<ffffffffc08e8a17>] ieee80211_update_p2p_noa+0xb7/0xe0 [mac80211]
[<ffffffffc069cc30>] ath9k_p2p_ps_timer+0x170/0x190 [ath9k]
[<ffffffffc070adf8>] ath_gen_timer_isr+0xc8/0xf0 [ath9k_hw]
[<ffffffffc0691156>] ath9k_tasklet+0x296/0x2f0 [ath9k]
[<ffffffff8107ad65>] tasklet_action+0xe5/0xf0
[...]

Signed-off-by: Janusz Dziedzic <janusz.dziedzic@tieto.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/mac80211/util.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 1104421bc5255..cd90ece807721 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2951,6 +2951,13 @@ ieee80211_extend_noa_desc(struct ieee80211_noa_data *data, u32 tsf, int i)
 	if (end > 0)
 		return false;
 
+	/* One shot NOA  */
+	if (data->count[i] == 1)
+		return false;
+
+	if (data->desc[i].interval == 0)
+		return false;
+
 	/* End time is in the past, check for repetitions */
 	skip = DIV_ROUND_UP(-end, data->desc[i].interval);
 	if (data->count[i] < 255) {

From 526b4f4d1e1f3e16e0bd185181c23e5ff841a20e Mon Sep 17 00:00:00 2001
From: Ola Olsson <ola1olsson@gmail.com>
Date: Thu, 29 Oct 2015 07:04:58 +0100
Subject: [PATCH 055/201] nl80211: Fix potential memory leak from
 parse_acl_data

commit 4baf6bea37247e59f1971e8009d13aeda95edba2 upstream.

If parse_acl_data succeeds but the subsequent parsing of smps
attributes fails, there will be a memory leak due to early returns.
Fix that by moving the ACL parsing later.

Fixes: 18998c381b19b ("cfg80211: allow requesting SMPS mode on ap start")
Signed-off-by: Ola Olsson <ola.olsson@sonymobile.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/wireless/nl80211.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 5d8748b4c8a2d..6a1040daa9b9e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3409,12 +3409,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
 					   wdev->iftype))
 		return -EINVAL;
 
-	if (info->attrs[NL80211_ATTR_ACL_POLICY]) {
-		params.acl = parse_acl_data(&rdev->wiphy, info);
-		if (IS_ERR(params.acl))
-			return PTR_ERR(params.acl);
-	}
-
 	if (info->attrs[NL80211_ATTR_SMPS_MODE]) {
 		params.smps_mode =
 			nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]);
@@ -3438,6 +3432,12 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
 		params.smps_mode = NL80211_SMPS_OFF;
 	}
 
+	if (info->attrs[NL80211_ATTR_ACL_POLICY]) {
+		params.acl = parse_acl_data(&rdev->wiphy, info);
+		if (IS_ERR(params.acl))
+			return PTR_ERR(params.acl);
+	}
+
 	wdev_lock(wdev);
 	err = rdev_start_ap(rdev, dev, &params);
 	if (!err) {

From 306105dcdc8f3abb1f19fbed6643bf67e9611f77 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 25 Oct 2015 22:54:19 +0100
Subject: [PATCH 056/201] NFC: st-nci: Fix incorrect spi buffer size

commit a1269dd116319335db6d73013a31c038486c813e upstream.

When sending data over SPI, the maximum expected length is the maximum
nci packet payload + data header size + the frame head room (1 for the
ndlc header) + the frame trail room (0).

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nfc/st-nci/spi.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/nfc/st-nci/spi.c b/drivers/nfc/st-nci/spi.c
index 598a58c4d6d13..887d308c2ec80 100644
--- a/drivers/nfc/st-nci/spi.c
+++ b/drivers/nfc/st-nci/spi.c
@@ -25,6 +25,7 @@
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/nfc.h>
+#include <net/nfc/nci.h>
 #include <linux/platform_data/st-nci.h>
 
 #include "ndlc.h"
@@ -94,7 +95,8 @@ static int st_nci_spi_write(void *phy_id, struct sk_buff *skb)
 	struct st_nci_spi_phy *phy = phy_id;
 	struct spi_device *dev = phy->spi_dev;
 	struct sk_buff *skb_rx;
-	u8 buf[ST_NCI_SPI_MAX_SIZE];
+	u8 buf[ST_NCI_SPI_MAX_SIZE + NCI_DATA_HDR_SIZE +
+	       ST_NCI_FRAME_HEADROOM + ST_NCI_FRAME_TAILROOM];
 	struct spi_transfer spi_xfer = {
 		.tx_buf = skb->data,
 		.rx_buf = buf,

From fa885c63ce9659910f55b9335d61752e723156e6 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 25 Oct 2015 22:54:20 +0100
Subject: [PATCH 057/201] NFC: nci: Fix incorrect data chaining when sending
 data

commit 500c4ef02277eaadbfe20537f963b6221f6ac007 upstream.

When sending HCI data over NCI, cmd information should be
present only on the first packet.
Each packet shall be specifically allocated and sent to the
NCI layer.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/nfc/nci/hci.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index 609f92283d1b7..321889ee21078 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -146,18 +146,18 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
 	if (!conn_info)
 		return -EPROTO;
 
-	skb = nci_skb_alloc(ndev, 2 + conn_info->max_pkt_payload_len +
+	i = 0;
+	skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len +
 			    NCI_DATA_HDR_SIZE, GFP_KERNEL);
 	if (!skb)
 		return -ENOMEM;
 
-	skb_reserve(skb, 2 + NCI_DATA_HDR_SIZE);
+	skb_reserve(skb, NCI_DATA_HDR_SIZE + 2);
 	*skb_push(skb, 1) = data_type;
 
-	i = 0;
-	len = conn_info->max_pkt_payload_len;
-
 	do {
+		len = conn_info->max_pkt_payload_len;
+
 		/* If last packet add NCI_HFP_NO_CHAINING */
 		if (i + conn_info->max_pkt_payload_len -
 		    (skb->len + 1) >= data_len) {
@@ -177,9 +177,15 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
 			return r;
 
 		i += len;
+
 		if (i < data_len) {
-			skb_trim(skb, 0);
-			skb_pull(skb, len);
+			skb = nci_skb_alloc(ndev,
+					    conn_info->max_pkt_payload_len +
+					    NCI_DATA_HDR_SIZE, GFP_KERNEL);
+			if (!skb)
+				return -ENOMEM;
+
+			skb_reserve(skb, NCI_DATA_HDR_SIZE + 1);
 		}
 	} while (i < data_len);
 

From 37e8081d25e7b4ea1dde6ff19e847ca880ae8442 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 25 Oct 2015 22:54:21 +0100
Subject: [PATCH 058/201] NFC: nci: Fix improper management of HCI return code

commit d8cd37ed2fc871c66b4c79c59f651dc2cdf7091c upstream.

When sending HCI data over NCI, HCI return code is part
of the NCI data. In order to get correctly the HCI return
code, we assume the NCI communication is successful and
extract the return code for the nci_hci functions return code.

This is done because nci_to_errno does not match hci return
code value.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/nfc/nci/hci.c | 66 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 49 insertions(+), 17 deletions(-)

diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index 321889ee21078..b07092f4111bd 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -101,6 +101,20 @@ struct nci_hcp_packet {
 #define NCI_HCP_MSG_GET_CMD(header)  (header & 0x3f)
 #define NCI_HCP_MSG_GET_PIPE(header) (header & 0x7f)
 
+static int nci_hci_result_to_errno(u8 result)
+{
+	switch (result) {
+	case NCI_HCI_ANY_OK:
+		return 0;
+	case NCI_HCI_ANY_E_REG_PAR_UNKNOWN:
+		return -EOPNOTSUPP;
+	case NCI_HCI_ANY_E_TIMEOUT:
+		return -ETIME;
+	default:
+		return -1;
+	}
+}
+
 /* HCI core */
 static void nci_hci_reset_pipes(struct nci_hci_dev *hdev)
 {
@@ -218,7 +232,8 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd,
 		     const u8 *param, size_t param_len,
 		     struct sk_buff **skb)
 {
-	struct nci_conn_info    *conn_info;
+	struct nci_hcp_message *message;
+	struct nci_conn_info   *conn_info;
 	struct nci_data data;
 	int r;
 	u8 pipe = ndev->hci_dev->gate2pipe[gate];
@@ -238,9 +253,15 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd,
 
 	r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data,
 			msecs_to_jiffies(NCI_DATA_TIMEOUT));
-
-	if (r == NCI_STATUS_OK && skb)
-		*skb = conn_info->rx_skb;
+	if (r == NCI_STATUS_OK) {
+		message = (struct nci_hcp_message *)conn_info->rx_skb->data;
+		r = nci_hci_result_to_errno(
+			NCI_HCP_MSG_GET_CMD(message->header));
+		skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN);
+
+		if (!r && skb)
+			*skb = conn_info->rx_skb;
+	}
 
 	return r;
 }
@@ -334,9 +355,6 @@ static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe,
 	struct nci_conn_info    *conn_info;
 	u8 status = result;
 
-	if (result != NCI_HCI_ANY_OK)
-		goto exit;
-
 	conn_info = ndev->hci_dev->conn_info;
 	if (!conn_info) {
 		status = NCI_STATUS_REJECTED;
@@ -346,7 +364,7 @@ static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe,
 	conn_info->rx_skb = skb;
 
 exit:
-	nci_req_complete(ndev, status);
+	nci_req_complete(ndev, NCI_STATUS_OK);
 }
 
 /* Receive hcp message for pipe, with type and cmd.
@@ -401,7 +419,7 @@ void nci_hci_data_received_cb(void *context,
 {
 	struct nci_dev *ndev = (struct nci_dev *)context;
 	struct nci_hcp_packet *packet;
-	u8 pipe, type, instruction;
+	u8 pipe, type;
 	struct sk_buff *hcp_skb;
 	struct sk_buff *frag_skb;
 	int msg_len;
@@ -440,7 +458,7 @@ void nci_hci_data_received_cb(void *context,
 		*skb_put(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN) = pipe;
 
 		skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) {
-		       msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN;
+			msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN;
 			memcpy(skb_put(hcp_skb, msg_len), frag_skb->data +
 			       NCI_HCI_HCP_PACKET_HEADER_LEN, msg_len);
 		}
@@ -458,11 +476,10 @@ void nci_hci_data_received_cb(void *context,
 	packet = (struct nci_hcp_packet *)hcp_skb->data;
 	type = NCI_HCP_MSG_GET_TYPE(packet->message.header);
 	if (type == NCI_HCI_HCP_RESPONSE) {
-		pipe = packet->header;
-		instruction = NCI_HCP_MSG_GET_CMD(packet->message.header);
-		skb_pull(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN +
-			 NCI_HCI_HCP_MESSAGE_HEADER_LEN);
-		nci_hci_hcp_message_rx(ndev, pipe, type, instruction, hcp_skb);
+		pipe = NCI_HCP_MSG_GET_PIPE(packet->header);
+		skb_pull(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN);
+		nci_hci_hcp_message_rx(ndev, pipe, type,
+				       NCI_STATUS_OK, hcp_skb);
 	} else {
 		skb_queue_tail(&ndev->hci_dev->msg_rx_queue, hcp_skb);
 		schedule_work(&ndev->hci_dev->msg_rx_work);
@@ -494,6 +511,7 @@ EXPORT_SYMBOL(nci_hci_open_pipe);
 int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx,
 		      const u8 *param, size_t param_len)
 {
+	struct nci_hcp_message *message;
 	struct nci_conn_info *conn_info;
 	struct nci_data data;
 	int r;
@@ -526,6 +544,12 @@ int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx,
 	r = nci_request(ndev, nci_hci_send_data_req,
 			(unsigned long)&data,
 			msecs_to_jiffies(NCI_DATA_TIMEOUT));
+	if (r == NCI_STATUS_OK) {
+		message = (struct nci_hcp_message *)conn_info->rx_skb->data;
+		r = nci_hci_result_to_errno(
+			NCI_HCP_MSG_GET_CMD(message->header));
+		skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN);
+	}
 
 	kfree(tmp);
 	return r;
@@ -535,6 +559,7 @@ EXPORT_SYMBOL(nci_hci_set_param);
 int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx,
 		      struct sk_buff **skb)
 {
+	struct nci_hcp_message *message;
 	struct nci_conn_info    *conn_info;
 	struct nci_data data;
 	int r;
@@ -559,8 +584,15 @@ int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx,
 	r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data,
 			msecs_to_jiffies(NCI_DATA_TIMEOUT));
 
-	if (r == NCI_STATUS_OK)
-		*skb = conn_info->rx_skb;
+	if (r == NCI_STATUS_OK) {
+		message = (struct nci_hcp_message *)conn_info->rx_skb->data;
+		r = nci_hci_result_to_errno(
+			NCI_HCP_MSG_GET_CMD(message->header));
+		skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN);
+
+		if (!r && skb)
+			*skb = conn_info->rx_skb;
+	}
 
 	return r;
 }

From 7fc3efcbf181df8ba61753887654633389a1fb37 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 25 Oct 2015 22:54:22 +0100
Subject: [PATCH 059/201] NFC: nci: extract pipe value using
 NCI_HCP_MSG_GET_PIPE

commit e65917b6d54f8b47d8293ea96adfa604fd46cf0d upstream.

When receiving data in nci_hci_msg_rx_work, extract pipe
value using NCI_HCP_MSG_GET_PIPE macro.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/nfc/nci/hci.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index b07092f4111bd..30b09f04c142d 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -402,7 +402,7 @@ static void nci_hci_msg_rx_work(struct work_struct *work)
 	u8 pipe, type, instruction;
 
 	while ((skb = skb_dequeue(&hdev->msg_rx_queue)) != NULL) {
-		pipe = skb->data[0];
+		pipe = NCI_HCP_MSG_GET_PIPE(skb->data[0]);
 		skb_pull(skb, NCI_HCI_HCP_PACKET_HEADER_LEN);
 		message = (struct nci_hcp_message *)skb->data;
 		type = NCI_HCP_MSG_GET_TYPE(message->header);
@@ -439,7 +439,7 @@ void nci_hci_data_received_cb(void *context,
 
 	/* it's the last fragment. Does it need re-aggregation? */
 	if (skb_queue_len(&ndev->hci_dev->rx_hcp_frags)) {
-		pipe = packet->header & NCI_HCI_FRAGMENT;
+		pipe = NCI_HCP_MSG_GET_PIPE(packet->header);
 		skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb);
 
 		msg_len = 0;

From 66c4d636733dbc7d9aa3ed0191e9d02ba836d676 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Wed, 21 Oct 2015 19:55:32 +0300
Subject: [PATCH 060/201] iwlwifi: pcie: fix (again) prepare card flow

commit 03a19cbb91994212be72ce15ac3406fa9f8ba079 upstream.

The hardware bug in the commit mentioned below forces us
not to re-enable the clock gating in the Host Cluster.
The impact on the power consumption is minimal and it allows
the WAKE_ME interrupt to propagate.

Fixes: c9fdec9f3970 ("iwlwifi: pcie: fix prepare card flow")
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/iwlwifi/pcie/trans.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c
index 6ba7d300b08f3..90283453073c1 100644
--- a/drivers/net/wireless/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/iwlwifi/pcie/trans.c
@@ -592,10 +592,8 @@ static int iwl_pcie_prepare_card_hw(struct iwl_trans *trans)
 
 		do {
 			ret = iwl_pcie_set_hw_ready(trans);
-			if (ret >= 0) {
-				ret = 0;
-				goto out;
-			}
+			if (ret >= 0)
+				return 0;
 
 			usleep_range(200, 1000);
 			t += 200;
@@ -605,10 +603,6 @@ static int iwl_pcie_prepare_card_hw(struct iwl_trans *trans)
 
 	IWL_ERR(trans, "Couldn't prepare the card\n");
 
-out:
-	iwl_clear_bit(trans, CSR_DBG_LINK_PWR_MGMT_REG,
-		      CSR_RESET_LINK_PWR_MGMT_DISABLED);
-
 	return ret;
 }
 

From c592b07ed26abb4235c29620495a006acdc8bac6 Mon Sep 17 00:00:00 2001
From: Oren Givon <oren.givon@intel.com>
Date: Wed, 28 Oct 2015 12:32:20 +0200
Subject: [PATCH 061/201] iwlwifi: Add new PCI IDs for the 8260 series

commit 4ab75944c4b324c1f5f01dbd4c4d122d2b9da187 upstream.

Add some new PCI IDs for the 8260 series which were missing.
The following sub-system IDs were added:
0x0130, 0x1130, 0x0132, 0x1132, 0x1150, 0x8110, 0x9110, 0x8130,
0x9130, 0x8132, 0x9132, 0x8150, 0x9150, 0x0044, 0x0930

Signed-off-by: Oren Givon <oren.givon@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/iwlwifi/pcie/drv.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/iwlwifi/pcie/drv.c b/drivers/net/wireless/iwlwifi/pcie/drv.c
index 644b58bc5226c..639761fb2bfb2 100644
--- a/drivers/net/wireless/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/iwlwifi/pcie/drv.c
@@ -423,14 +423,21 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 /* 8000 Series */
 	{IWL_PCI_DEVICE(0x24F3, 0x0010, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x1010, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x0130, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x1130, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x0132, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x1132, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0110, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x01F0, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x0012, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x1012, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x1110, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0050, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0250, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x1050, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0150, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x1150, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F4, 0x0030, iwl8260_2ac_cfg)},
-	{IWL_PCI_DEVICE(0x24F4, 0x1130, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F4, 0x1030, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0xC010, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0xC110, iwl8260_2ac_cfg)},
@@ -438,18 +445,28 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 	{IWL_PCI_DEVICE(0x24F3, 0xC050, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0xD050, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x8010, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x8110, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x9010, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x9110, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F4, 0x8030, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F4, 0x9030, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x8130, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x9130, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x8132, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x9132, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x8050, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x8150, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x9050, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x9150, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0004, iwl8260_2n_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x0044, iwl8260_2n_cfg)},
 	{IWL_PCI_DEVICE(0x24F5, 0x0010, iwl4165_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F6, 0x0030, iwl4165_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0810, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0910, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0850, iwl8260_2ac_cfg)},
 	{IWL_PCI_DEVICE(0x24F3, 0x0950, iwl8260_2ac_cfg)},
+	{IWL_PCI_DEVICE(0x24F3, 0x0930, iwl8260_2ac_cfg)},
 #endif /* CONFIG_IWLMVM */
 
 	{0}

From bf14478a37d1f22b04025714f88b5e18edd57b31 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Fri, 25 Sep 2015 18:09:35 +0200
Subject: [PATCH 062/201] net: mvneta: Fix CPU_MAP registers initialisation

commit 2502d0ef272da7058ef303b849a2c8dc324c2e2e upstream.

The CPU_MAP register is duplicated for each CPUs at different addresses,
each instance being at a different address.

However, the code so far was using CONFIG_NR_CPUS to initialise the CPU_MAP
registers for each registers, while the SoCs embed at most 4 CPUs.

This is especially an issue with multi_v7_defconfig, where CONFIG_NR_CPUS
is currently set to 16, resulting in writes to registers that are not
CPU_MAP.

Fixes: c5aff18204da ("net: mvneta: driver for Marvell Armada 370/XP network unit")
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/marvell/mvneta.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 514df76fc70f3..8c6f307457d3b 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -949,7 +949,7 @@ static void mvneta_defaults_set(struct mvneta_port *pp)
 	/* Set CPU queue access map - all CPUs have access to all RX
 	 * queues and to all TX queues
 	 */
-	for (cpu = 0; cpu < CONFIG_NR_CPUS; cpu++)
+	for_each_present_cpu(cpu)
 		mvreg_write(pp, MVNETA_CPU_MAP(cpu),
 			    (MVNETA_CPU_RXQ_ACCESS_ALL_MASK |
 			     MVNETA_CPU_TXQ_ACCESS_ALL_MASK));

From ed42098a90ca511bd74a55a29db6f0f8bd193d2c Mon Sep 17 00:00:00 2001
From: Marcin Wojtas <mw@semihalf.com>
Date: Mon, 30 Nov 2015 13:27:44 +0100
Subject: [PATCH 063/201] net: mvneta: fix error path for building skb

commit 26c17a179f3f64f92de6e837c14279a6431a7ab6 upstream.

In the actual RX processing, there is same error path for both descriptor
ring refilling and building skb fails. This is not correct, because after
successful refill, the ring is already updated with newly allocated
buffer. Then, in case of build_skb() fail, hitherto code left the original
buffer unmapped.

This patch fixes above situation by swapping error check of skb build with
DMA-unmap of original buffer.

Signed-off-by: Marcin Wojtas <mw@semihalf.com>
Acked-by: Simon Guinot <simon.guinot@sequanux.org>
Fixes a84e32894191 ("net: mvneta: fix refilling for Rx DMA buffers")
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/marvell/mvneta.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 8c6f307457d3b..0e49244981d6e 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -1533,12 +1533,16 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo,
 		}
 
 		skb = build_skb(data, pp->frag_size > PAGE_SIZE ? 0 : pp->frag_size);
-		if (!skb)
-			goto err_drop_frame;
 
+		/* After refill old buffer has to be unmapped regardless
+		 * the skb is successfully built or not.
+		 */
 		dma_unmap_single(dev->dev.parent, phys_addr,
 				 MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE);
 
+		if (!skb)
+			goto err_drop_frame;
+
 		rcvd_pkts++;
 		rcvd_bytes += rx_bytes;
 

From 40a76e5410358a5193bd6643032b6046e27807be Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 30 Sep 2015 15:59:17 +0200
Subject: [PATCH 064/201] fs/proc, core/debug: Don't expose absolute kernel
 addresses via wchan

commit b2f73922d119686323f14fbbe46587f863852328 upstream.

So the /proc/PID/stat 'wchan' field (the 30th field, which contains
the absolute kernel address of the kernel function a task is blocked in)
leaks absolute kernel addresses to unprivileged user-space:

        seq_put_decimal_ull(m, ' ', wchan);

The absolute address might also leak via /proc/PID/wchan as well, if
KALLSYMS is turned off or if the symbol lookup fails for some reason:

static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
                          struct pid *pid, struct task_struct *task)
{
        unsigned long wchan;
        char symname[KSYM_NAME_LEN];

        wchan = get_wchan(task);

        if (lookup_symbol_name(wchan, symname) < 0) {
                if (!ptrace_may_access(task, PTRACE_MODE_READ))
                        return 0;
                seq_printf(m, "%lu", wchan);
        } else {
                seq_printf(m, "%s", symname);
        }

        return 0;
}

This isn't ideal, because for example it trivially leaks the KASLR offset
to any local attacker:

  fomalhaut:~> printf "%016lx\n" $(cat /proc/$$/stat | cut -d' ' -f35)
  ffffffff8123b380

Most real-life uses of wchan are symbolic:

  ps -eo pid:10,tid:10,wchan:30,comm

and procps uses /proc/PID/wchan, not the absolute address in /proc/PID/stat:

  triton:~/tip> strace -f ps -eo pid:10,tid:10,wchan:30,comm 2>&1 | grep wchan | tail -1
  open("/proc/30833/wchan", O_RDONLY)     = 6

There's one compatibility quirk here: procps relies on whether the
absolute value is non-zero - and we can provide that functionality
by outputing "0" or "1" depending on whether the task is blocked
(whether there's a wchan address).

These days there appears to be very little legitimate reason
user-space would be interested in  the absolute address. The
absolute address is mostly historic: from the days when we
didn't have kallsyms and user-space procps had to do the
decoding itself via the System.map.

So this patch sets all numeric output to "0" or "1" and keeps only
symbolic output, in /proc/PID/wchan.

( The absolute sleep address can generally still be profiled via
  perf, by tasks with sufficient privileges. )

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Kostya Serebryany <kcc@google.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sasha Levin <sasha.levin@oracle.com>
Cc: kasan-dev <kasan-dev@googlegroups.com>
Cc: linux-kernel@vger.kernel.org
Link: http://lkml.kernel.org/r/20150930135917.GA3285@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/filesystems/proc.txt |  5 +++--
 fs/proc/array.c                    | 16 ++++++++++++++--
 fs/proc/base.c                     |  9 +++------
 3 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index d411ca63c8b6c..3a9d65c912e78 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -140,7 +140,8 @@ Table 1-1: Process specific entries in /proc
  stat		Process status
  statm		Process memory status information
  status		Process status in human readable form
- wchan		If CONFIG_KALLSYMS is set, a pre-decoded wchan
+ wchan		Present with CONFIG_KALLSYMS=y: it shows the kernel function
+		symbol the task is blocked in - or "0" if not blocked.
  pagemap	Page table
  stack		Report full stack trace, enable via CONFIG_STACKTRACE
  smaps		a extension based on maps, showing the memory consumption of
@@ -310,7 +311,7 @@ Table 1-4: Contents of the stat files (as of 2.6.30-rc7)
   blocked       bitmap of blocked signals
   sigign        bitmap of ignored signals
   sigcatch      bitmap of caught signals
-  wchan         address where process went to sleep
+  0		(place holder, used to be the wchan address, use /proc/PID/wchan instead)
   0             (place holder)
   0             (place holder)
   exit_signal   signal to send to parent thread on exit
diff --git a/fs/proc/array.c b/fs/proc/array.c
index f60f0121e3319..eed2050db9be9 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -375,7 +375,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
 static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 			struct pid *pid, struct task_struct *task, int whole)
 {
-	unsigned long vsize, eip, esp, wchan = ~0UL;
+	unsigned long vsize, eip, esp, wchan = 0;
 	int priority, nice;
 	int tty_pgrp = -1, tty_nr = 0;
 	sigset_t sigign, sigcatch;
@@ -507,7 +507,19 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 	seq_put_decimal_ull(m, ' ', task->blocked.sig[0] & 0x7fffffffUL);
 	seq_put_decimal_ull(m, ' ', sigign.sig[0] & 0x7fffffffUL);
 	seq_put_decimal_ull(m, ' ', sigcatch.sig[0] & 0x7fffffffUL);
-	seq_put_decimal_ull(m, ' ', wchan);
+
+	/*
+	 * We used to output the absolute kernel address, but that's an
+	 * information leak - so instead we show a 0/1 flag here, to signal
+	 * to user-space whether there's a wchan field in /proc/PID/wchan.
+	 *
+	 * This works with older implementations of procps as well.
+	 */
+	if (wchan)
+		seq_puts(m, " 1");
+	else
+		seq_puts(m, " 0");
+
 	seq_put_decimal_ull(m, ' ', 0);
 	seq_put_decimal_ull(m, ' ', 0);
 	seq_put_decimal_ll(m, ' ', task->exit_signal);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b25eee4cead53..29595af328669 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -430,13 +430,10 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
 
 	wchan = get_wchan(task);
 
-	if (lookup_symbol_name(wchan, symname) < 0) {
-		if (!ptrace_may_access(task, PTRACE_MODE_READ))
-			return 0;
-		seq_printf(m, "%lu", wchan);
-	} else {
+	if (wchan && ptrace_may_access(task, PTRACE_MODE_READ) && !lookup_symbol_name(wchan, symname))
 		seq_printf(m, "%s", symname);
-	}
+	else
+		seq_putc(m, '0');
 
 	return 0;
 }

From e6a981502ce51fed9f4aa123bfec8adb4de09c0a Mon Sep 17 00:00:00 2001
From: Simran Rai <ssimran@broadcom.com>
Date: Mon, 19 Oct 2015 15:27:19 -0700
Subject: [PATCH 065/201] clk: iproc: Fix PLL output frequency calculation

commit 63243a4da7d0dfa19dcacd0a529782eeb2f86f92 upstream.

This patch affects the clocks that use fractional ndivider in their
PLL output frequency calculation. Instead of 2^20 divide factor, the
clock's ndiv integer shift was used. Fixed the bug by replacing ndiv
integer shift with 2^20 factor.

Signed-off-by: Simran Rai <ssimran@broadcom.com>
Signed-off-by: Ray Jui <rjui@broadcom.com>
Reviewed-by: Scott Branden <sbranden@broadcom.com>
Fixes: 5fe225c105fd ("clk: iproc: add initial common clock support")
Signed-off-by: Michael Turquette <mturquette@baylibre.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/bcm/clk-iproc-pll.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/clk/bcm/clk-iproc-pll.c b/drivers/clk/bcm/clk-iproc-pll.c
index 2dda4e8295a91..d679ab8696532 100644
--- a/drivers/clk/bcm/clk-iproc-pll.c
+++ b/drivers/clk/bcm/clk-iproc-pll.c
@@ -345,8 +345,8 @@ static unsigned long iproc_pll_recalc_rate(struct clk_hw *hw,
 	struct iproc_pll *pll = clk->pll;
 	const struct iproc_pll_ctrl *ctrl = pll->ctrl;
 	u32 val;
-	u64 ndiv;
-	unsigned int ndiv_int, ndiv_frac, pdiv;
+	u64 ndiv, ndiv_int, ndiv_frac;
+	unsigned int pdiv;
 
 	if (parent_rate == 0)
 		return 0;
@@ -366,22 +366,19 @@ static unsigned long iproc_pll_recalc_rate(struct clk_hw *hw,
 	val = readl(pll->pll_base + ctrl->ndiv_int.offset);
 	ndiv_int = (val >> ctrl->ndiv_int.shift) &
 		bit_mask(ctrl->ndiv_int.width);
-	ndiv = (u64)ndiv_int << ctrl->ndiv_int.shift;
+	ndiv = ndiv_int << 20;
 
 	if (ctrl->flags & IPROC_CLK_PLL_HAS_NDIV_FRAC) {
 		val = readl(pll->pll_base + ctrl->ndiv_frac.offset);
 		ndiv_frac = (val >> ctrl->ndiv_frac.shift) &
 			bit_mask(ctrl->ndiv_frac.width);
-
-		if (ndiv_frac != 0)
-			ndiv = ((u64)ndiv_int << ctrl->ndiv_int.shift) |
-				ndiv_frac;
+		ndiv += ndiv_frac;
 	}
 
 	val = readl(pll->pll_base + ctrl->pdiv.offset);
 	pdiv = (val >> ctrl->pdiv.shift) & bit_mask(ctrl->pdiv.width);
 
-	clk->rate = (ndiv * parent_rate) >> ctrl->ndiv_int.shift;
+	clk->rate = (ndiv * parent_rate) >> 20;
 
 	if (pdiv == 0)
 		clk->rate *= 2;

From b14a6617539431dadedec6d7dc023a87490dbb62 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 23 Oct 2015 11:36:01 +0200
Subject: [PATCH 066/201] clk: versatile-icst: fix memory leak

commit 7bdccef34fc67d3fce6778a018601dd41e43c5ce upstream.

A static code checker found a memory leak in the Versatile
ICST code. Fix it.

Fixes: a183da637c52 "clk: versatile: respect parent rate in ICST clock"
Reported-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/clk/versatile/clk-icst.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/clk/versatile/clk-icst.c b/drivers/clk/versatile/clk-icst.c
index a3893ea2199da..08c5ee9768799 100644
--- a/drivers/clk/versatile/clk-icst.c
+++ b/drivers/clk/versatile/clk-icst.c
@@ -157,8 +157,10 @@ struct clk *icst_clk_register(struct device *dev,
 	icst->lockreg = base + desc->lock_offset;
 
 	clk = clk_register(dev, &icst->hw);
-	if (IS_ERR(clk))
+	if (IS_ERR(clk)) {
+		kfree(pclone);
 		kfree(icst);
+	}
 
 	return clk;
 }

From 8cb5ad246f79e4703e010e0d0e0211fb95576401 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Fri, 18 Sep 2015 09:29:04 -0700
Subject: [PATCH 067/201] mfd: twl6040: Fix deferred probe handling for clk32k

commit 75c08f17ec87c2d742487bb87408d6feebc526bd upstream.

Commit 68bab8662f49 ("mfd: twl6040: Optional clk32k clock handling")
added clock handling for the 32k clock from palmas-clk. However, that
patch did not consider a typical situation where twl6040 is built-in,
and palmas-clk is a loadable module like we have in omap2plus_defconfig.

If palmas-clk is not loaded before twl6040 probes, we will get a
"clk32k is not handled" warning during booting. This means that any
drivers relying on this clock will mysteriously fail, including
omap5-uevm WLAN and audio.

Note that for WLAN, we probably should also eventually get
the clk32kgaudio for MMC3 directly as that's shared between
audio and WLAN SDIO at least for omap5-uevm. It seems the
WLAN chip cannot get it as otherwise MMC3 won't get properly
probed.

Fixes: 68bab8662f49 ("mfd: twl6040: Optional clk32k clock handling")
Signed-off-by: Tony Lindgren <tony@atomide.com>
Reviewed-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mfd/twl6040.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/mfd/twl6040.c b/drivers/mfd/twl6040.c
index a151ee2eed2a4..08a693cd38cc4 100644
--- a/drivers/mfd/twl6040.c
+++ b/drivers/mfd/twl6040.c
@@ -647,6 +647,8 @@ static int twl6040_probe(struct i2c_client *client,
 
 	twl6040->clk32k = devm_clk_get(&client->dev, "clk32k");
 	if (IS_ERR(twl6040->clk32k)) {
+		if (PTR_ERR(twl6040->clk32k) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
 		dev_info(&client->dev, "clk32k is not handled\n");
 		twl6040->clk32k = NULL;
 	}

From a251acf26f718499da7db0aa8b73ee671c749028 Mon Sep 17 00:00:00 2001
From: Aniket Nagarnaik <aniketn@marvell.com>
Date: Fri, 18 Sep 2015 06:32:09 -0700
Subject: [PATCH 068/201] mwifiex: fix NULL pointer dereference during hidden
 SSID scan

commit 17e524b1b60f4390d24a51d9524d1648cf5d1447 upstream.

This NULL pointer dereference is observed during suspend resume
stress test. All pending commands are cancelled when system goes
into suspend state. There a corner case in which host may receive
response for last scan command after this and try to trigger extra
active scan for hidden SSIDs.

The issue is fixed by adding a NULL check to skip that extra scan.

Fixes: 2375fa2b36feaf34 (mwifiex: fix unable to connect hidden SSID..)
Signed-off-by: Aniket Nagarnaik <aniketn@marvell.com>
Signed-off-by: Amitkumar Karwar <akarwar@marvell.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/mwifiex/scan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mwifiex/scan.c b/drivers/net/wireless/mwifiex/scan.c
index 5847863a2d6be..278ec477fd90b 100644
--- a/drivers/net/wireless/mwifiex/scan.c
+++ b/drivers/net/wireless/mwifiex/scan.c
@@ -1889,7 +1889,7 @@ mwifiex_active_scan_req_for_passive_chan(struct mwifiex_private *priv)
 	u8 id = 0;
 	struct mwifiex_user_scan_cfg  *user_scan_cfg;
 
-	if (adapter->active_scan_triggered) {
+	if (adapter->active_scan_triggered || !priv->scan_request) {
 		adapter->active_scan_triggered = false;
 		return 0;
 	}

From 0c7b4e0d36288a4695a56fd89dbf4c9e00ca9a0d Mon Sep 17 00:00:00 2001
From: Amitkumar Karwar <akarwar@marvell.com>
Date: Fri, 18 Sep 2015 06:32:10 -0700
Subject: [PATCH 069/201] mwifiex: avoid memsetting PCIe event buffer

commit 14d9c11c91a606fed65eaae2455423a23bb4ae59 upstream.

Preallocated PCIe buffer is being reused for all PCIe interface
events. Physical address of the buffer is shared with firmware
so that it can perform DMA on it. As event length is specified
in the header, there should not be a problem if the buffer gets
overwritten.
We will save some cycles by avoiding memset everytime while
submitting the buffer to firmware.

Fixes: 2728cecdc7d6bf3d21(mwifiex: corrections in PCIe event skb)
Signed-off-by: Amitkumar Karwar <akarwar@marvell.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/mwifiex/pcie.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/wireless/mwifiex/pcie.c b/drivers/net/wireless/mwifiex/pcie.c
index 408b684607165..21192b6f9c64f 100644
--- a/drivers/net/wireless/mwifiex/pcie.c
+++ b/drivers/net/wireless/mwifiex/pcie.c
@@ -1815,7 +1815,6 @@ static int mwifiex_pcie_event_complete(struct mwifiex_adapter *adapter,
 	if (!card->evt_buf_list[rdptr]) {
 		skb_push(skb, INTF_HEADER_LEN);
 		skb_put(skb, MAX_EVENT_SIZE - skb->len);
-		memset(skb->data, 0, MAX_EVENT_SIZE);
 		if (mwifiex_map_pci_memory(adapter, skb,
 					   MAX_EVENT_SIZE,
 					   PCI_DMA_FROMDEVICE))

From c1c2cc1b5f3e83fed0049c2d59f9ef38787763fc Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 21 Sep 2015 19:19:53 +0300
Subject: [PATCH 070/201] mwifiex: fix mwifiex_rdeeprom_read()

commit 1f9c6e1bc1ba5f8a10fcd6e99d170954d7c6d382 upstream.

There were several bugs here.

1)  The done label was in the wrong place so we didn't copy any
    information out when there was no command given.

2)  We were using PAGE_SIZE as the size of the buffer instead of
    "PAGE_SIZE - pos".

3)  snprintf() returns the number of characters that would have been
    printed if there were enough space.  If there was not enough space
    (and we had fixed the memory corruption bug #2) then it would result
    in an information leak when we do simple_read_from_buffer().  I've
    changed it to use scnprintf() instead.

I also removed the initialization at the start of the function, because
I thought it made the code a little more clear.

Fixes: 5e6e3a92b9a4 ('wireless: mwifiex: initial commit for Marvell mwifiex driver')
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Amitkumar Karwar <akarwar@marvell.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/mwifiex/debugfs.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/mwifiex/debugfs.c b/drivers/net/wireless/mwifiex/debugfs.c
index 5a0636d43a1b9..5583856fc5c41 100644
--- a/drivers/net/wireless/mwifiex/debugfs.c
+++ b/drivers/net/wireless/mwifiex/debugfs.c
@@ -731,7 +731,7 @@ mwifiex_rdeeprom_read(struct file *file, char __user *ubuf,
 		(struct mwifiex_private *) file->private_data;
 	unsigned long addr = get_zeroed_page(GFP_KERNEL);
 	char *buf = (char *) addr;
-	int pos = 0, ret = 0, i;
+	int pos, ret, i;
 	u8 value[MAX_EEPROM_DATA];
 
 	if (!buf)
@@ -739,7 +739,7 @@ mwifiex_rdeeprom_read(struct file *file, char __user *ubuf,
 
 	if (saved_offset == -1) {
 		/* No command has been given */
-		pos += snprintf(buf, PAGE_SIZE, "0");
+		pos = snprintf(buf, PAGE_SIZE, "0");
 		goto done;
 	}
 
@@ -748,17 +748,17 @@ mwifiex_rdeeprom_read(struct file *file, char __user *ubuf,
 				  (u16) saved_bytes, value);
 	if (ret) {
 		ret = -EINVAL;
-		goto done;
+		goto out_free;
 	}
 
-	pos += snprintf(buf, PAGE_SIZE, "%d %d ", saved_offset, saved_bytes);
+	pos = snprintf(buf, PAGE_SIZE, "%d %d ", saved_offset, saved_bytes);
 
 	for (i = 0; i < saved_bytes; i++)
-		pos += snprintf(buf + strlen(buf), PAGE_SIZE, "%d ", value[i]);
-
-	ret = simple_read_from_buffer(ubuf, count, ppos, buf, pos);
+		pos += scnprintf(buf + pos, PAGE_SIZE - pos, "%d ", value[i]);
 
 done:
+	ret = simple_read_from_buffer(ubuf, count, ppos, buf, pos);
+out_free:
 	free_page(addr);
 	return ret;
 }

From 17b9d97eec72418d235453aef32a87e2b39f29ee Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Sun, 18 Oct 2015 22:14:48 -0500
Subject: [PATCH 071/201] staging: rtl8712: Add device ID for Sitecom WLA2100

commit 1e6e63283691a2a9048a35d9c6c59cf0abd342e4 upstream.

This adds the USB ID for the Sitecom WLA2100. The Windows 10 inf file
was checked to verify that the addition is correct.

Reported-by: Frans van de Wiel <fvdw@fvdw.eu>
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Frans van de Wiel <fvdw@fvdw.eu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8712/usb_intf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/staging/rtl8712/usb_intf.c b/drivers/staging/rtl8712/usb_intf.c
index f8b5b332e7c3d..943a0e2045329 100644
--- a/drivers/staging/rtl8712/usb_intf.c
+++ b/drivers/staging/rtl8712/usb_intf.c
@@ -144,6 +144,7 @@ static struct usb_device_id rtl871x_usb_id_tbl[] = {
 	{USB_DEVICE(0x0DF6, 0x0058)},
 	{USB_DEVICE(0x0DF6, 0x0049)},
 	{USB_DEVICE(0x0DF6, 0x004C)},
+	{USB_DEVICE(0x0DF6, 0x006C)},
 	{USB_DEVICE(0x0DF6, 0x0064)},
 	/* Skyworth */
 	{USB_DEVICE(0x14b2, 0x3300)},

From de2ebbd167835c1c627bd24a1b6d846e0dead8f5 Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@gmail.com>
Date: Mon, 7 Sep 2015 12:05:41 +0200
Subject: [PATCH 072/201] Bluetooth: hidp: fix device disconnect on idle
 timeout

commit 660f0fc07d21114549c1862e67e78b1cf0c90c29 upstream.

The HIDP specs define an idle-timeout which automatically disconnects a
device. This has always been implemented in the HIDP layer and forced a
synchronous shutdown of the hidp-scheduler. This works just fine, but
lacks a forced disconnect on the underlying l2cap channels. This has been
broken since:

    commit 5205185d461d5902325e457ca80bd421127b7308
    Author: David Herrmann <dh.herrmann@gmail.com>
    Date:   Sat Apr 6 20:28:47 2013 +0200

        Bluetooth: hidp: remove old session-management

The old session-management always forced an l2cap error on the ctrl/intr
channels when shutting down. The new session-management skips this, as we
don't want to enforce channel policy on the caller. In other words, if
user-space removes an HIDP device, the underlying channels (which are
*owned* and *referenced* by user-space) are still left active. User-space
needs to call shutdown(2) or close(2) to release them.

Unfortunately, this does not work with idle-timeouts. There is no way to
signal user-space that the HIDP layer has been stopped. The API simply
does not support any event-passing except for poll(2). Hence, we restore
old behavior and force EUNATCH on the sockets if the HIDP layer is
disconnected due to idle-timeouts (behavior of explicit disconnects
remains unmodified). User-space can still call

    getsockopt(..., SO_ERROR, ...)

..to retrieve the EUNATCH error and clear sk_err. Hence, the channels can
still be re-used (which nobody does so far, though). Therefore, the API
still supports the new behavior, but with this patch it's also compatible
to the old implicit channel shutdown.

Reported-by: Mark Haun <haunma@keteu.org>
Reported-by: Luiz Augusto von Dentz <luiz.dentz@gmail.com>
Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/bluetooth/hidp/core.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index f1a117f8cad22..0bec4588c3c8c 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -401,6 +401,20 @@ static void hidp_idle_timeout(unsigned long arg)
 {
 	struct hidp_session *session = (struct hidp_session *) arg;
 
+	/* The HIDP user-space API only contains calls to add and remove
+	 * devices. There is no way to forward events of any kind. Therefore,
+	 * we have to forcefully disconnect a device on idle-timeouts. This is
+	 * unfortunate and weird API design, but it is spec-compliant and
+	 * required for backwards-compatibility. Hence, on idle-timeout, we
+	 * signal driver-detach events, so poll() will be woken up with an
+	 * error-condition on both sockets.
+	 */
+
+	session->intr_sock->sk->sk_err = EUNATCH;
+	session->ctrl_sock->sk->sk_err = EUNATCH;
+	wake_up_interruptible(sk_sleep(session->intr_sock->sk));
+	wake_up_interruptible(sk_sleep(session->ctrl_sock->sk));
+
 	hidp_session_terminate(session);
 }
 

From f499c55b9eb89486f0b62fc24824bbe5415303de Mon Sep 17 00:00:00 2001
From: Dmitry Tunin <hanipouspilot@gmail.com>
Date: Mon, 5 Oct 2015 19:29:33 +0300
Subject: [PATCH 073/201] Bluetooth: ath3k: Add new AR3012 0930:021c id

commit cd355ff071cd37e7197eccf9216770b2b29369f7 upstream.

This adapter works with the existing linux-firmware.

T:  Bus=01 Lev=01 Prnt=01 Port=03 Cnt=02 Dev#=  3 Spd=12  MxCh= 0
D:  Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs=  1
P:  Vendor=0930 ProdID=021c Rev=00.01
C:  #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA
I:  If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
I:  If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb

BugLink: https://bugs.launchpad.net/bugs/1502781

Signed-off-by: Dmitry Tunin <hanipouspilot@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bluetooth/ath3k.c | 2 ++
 drivers/bluetooth/btusb.c | 1 +
 2 files changed, 3 insertions(+)

diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index e527a3e13939c..60c15f8bc09d0 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -93,6 +93,7 @@ static const struct usb_device_id ath3k_table[] = {
 	{ USB_DEVICE(0x04CA, 0x300f) },
 	{ USB_DEVICE(0x04CA, 0x3010) },
 	{ USB_DEVICE(0x0930, 0x0219) },
+	{ USB_DEVICE(0x0930, 0x021c) },
 	{ USB_DEVICE(0x0930, 0x0220) },
 	{ USB_DEVICE(0x0930, 0x0227) },
 	{ USB_DEVICE(0x0b05, 0x17d0) },
@@ -153,6 +154,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = {
 	{ USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x0930, 0x021c), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0b05, 0x17d0), .driver_info = BTUSB_ATH3012 },
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index b6aceaf82aa88..6291b5d7f4318 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -195,6 +195,7 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x0930, 0x021c), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0b05, 0x17d0), .driver_info = BTUSB_ATH3012 },

From 65fe0308fde6307efe411850b3907930792a9492 Mon Sep 17 00:00:00 2001
From: Dmitry Tunin <hanipouspilot@gmail.com>
Date: Fri, 16 Oct 2015 11:45:26 +0300
Subject: [PATCH 074/201] Bluetooth: ath3k: Add support of AR3012 0cf3:817b
 device

commit 18e0afab8ce3f1230ce3fef52b2e73374fd9c0e7 upstream.

T: Bus=04 Lev=02 Prnt=02 Port=04 Cnt=01 Dev#= 3 Spd=12 MxCh= 0
D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1
P: Vendor=0cf3 ProdID=817b Rev=00.02
C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA
I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb

BugLink: https://bugs.launchpad.net/bugs/1506615

Signed-off-by: Dmitry Tunin <hanipouspilot@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bluetooth/ath3k.c | 2 ++
 drivers/bluetooth/btusb.c | 1 +
 2 files changed, 3 insertions(+)

diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index 60c15f8bc09d0..fa893c3ec4087 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c
@@ -105,6 +105,7 @@ static const struct usb_device_id ath3k_table[] = {
 	{ USB_DEVICE(0x0CF3, 0x311F) },
 	{ USB_DEVICE(0x0cf3, 0x3121) },
 	{ USB_DEVICE(0x0CF3, 0x817a) },
+	{ USB_DEVICE(0x0CF3, 0x817b) },
 	{ USB_DEVICE(0x0cf3, 0xe003) },
 	{ USB_DEVICE(0x0CF3, 0xE004) },
 	{ USB_DEVICE(0x0CF3, 0xE005) },
@@ -166,6 +167,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = {
 	{ USB_DEVICE(0x0cf3, 0x311F), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0x3121), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0CF3, 0x817a), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x0CF3, 0x817b), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0xe005), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0xe006), .driver_info = BTUSB_ATH3012 },
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 6291b5d7f4318..3d0d643c8bff4 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -207,6 +207,7 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x0cf3, 0x311f), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0x3121), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0x817a), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x0cf3, 0x817b), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0xe003), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0cf3, 0xe005), .driver_info = BTUSB_ATH3012 },

From 099208a1cb10bb4eee2622dc3ffe5497f4d6cf9f Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Mon, 19 Oct 2015 10:51:47 +0300
Subject: [PATCH 075/201] Bluetooth: Fix removing connection parameters when
 unpairing

commit a6ad2a6b9cc1d9d791aee5462cfb8528f366f1d4 upstream.

The commit 89cbb0638e9b7 introduced support for deferred connection
parameter removal when unpairing by removing them only once an
existing connection gets disconnected. However, it failed to address
the scenario when we're *not* connected and do an unpair operation.

What makes things worse is that most user space BlueZ versions will
first issue a disconnect request and only then unpair, meaning the
buggy code will be triggered every time. This effectively causes the
kernel to resume scanning and reconnect to a device for which we've
removed all keys and GATT database information.

This patch fixes the issue by adding the missing call to the
hci_conn_params_del() function to a branch which handles the case of
no existing connection.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/bluetooth/mgmt.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index c4fe2fee753fc..72c9376ec03cf 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -3090,6 +3090,11 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data,
 	} else {
 		u8 addr_type;
 
+		if (cp->addr.type == BDADDR_LE_PUBLIC)
+			addr_type = ADDR_LE_DEV_PUBLIC;
+		else
+			addr_type = ADDR_LE_DEV_RANDOM;
+
 		conn = hci_conn_hash_lookup_ba(hdev, LE_LINK,
 					       &cp->addr.bdaddr);
 		if (conn) {
@@ -3105,13 +3110,10 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data,
 			 */
 			if (!cp->disconnect)
 				conn = NULL;
+		} else {
+			hci_conn_params_del(hdev, &cp->addr.bdaddr, addr_type);
 		}
 
-		if (cp->addr.type == BDADDR_LE_PUBLIC)
-			addr_type = ADDR_LE_DEV_PUBLIC;
-		else
-			addr_type = ADDR_LE_DEV_RANDOM;
-
 		hci_remove_irk(hdev, &cp->addr.bdaddr, addr_type);
 
 		err = hci_remove_ltk(hdev, &cp->addr.bdaddr, addr_type);

From 25f47b043dd719e7637063f74569cd94796ccdf1 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Wed, 21 Oct 2015 15:21:31 +0300
Subject: [PATCH 076/201] Bluetooth: Fix missing hdev locking for LE scan
 cleanup

commit 8ce783dc5ea3af3a213ac9b4d9d2ccfeeb9c9058 upstream.

The hci_conn objects don't have a dedicated lock themselves but rely
on the caller to hold the hci_dev lock for most types of access. The
hci_conn_timeout() function has so far sent certain HCI commands based
on the hci_conn state which has been possible without holding the
hci_dev lock.

The recent changes to do LE scanning before connect attempts added
even more operations to hci_conn and hci_dev from hci_conn_timeout,
thereby exposing potential race conditions with the hci_dev and
hci_conn states.

As an example of such a race, here there's a timeout but an
l2cap_sock_connect() call manages to race with the cleanup routine:

[Oct21 08:14] l2cap_chan_timeout: chan ee4b12c0 state BT_CONNECT
[  +0.000004] l2cap_chan_close: chan ee4b12c0 state BT_CONNECT
[  +0.000002] l2cap_chan_del: chan ee4b12c0, conn f3141580, err 111, state BT_CONNECT
[  +0.000002] l2cap_sock_teardown_cb: chan ee4b12c0 state BT_CONNECT
[  +0.000005] l2cap_chan_put: chan ee4b12c0 orig refcnt 4
[  +0.000010] hci_conn_drop: hcon f53d56e0 orig refcnt 1
[  +0.000013] l2cap_chan_put: chan ee4b12c0 orig refcnt 3
[  +0.000063] hci_conn_timeout: hcon f53d56e0 state BT_CONNECT
[  +0.000049] hci_conn_params_del: addr ee:0d:30:09:53:1f (type 1)
[  +0.000002] hci_chan_list_flush: hcon f53d56e0
[  +0.000001] hci_chan_del: hci0 hcon f53d56e0 chan f4e7ccc0
[  +0.004528] l2cap_sock_create: sock e708fc00
[  +0.000023] l2cap_chan_create: chan ee4b1770
[  +0.000001] l2cap_chan_hold: chan ee4b1770 orig refcnt 1
[  +0.000002] l2cap_sock_init: sk ee4b3390
[  +0.000029] l2cap_sock_bind: sk ee4b3390
[  +0.000010] l2cap_sock_setsockopt: sk ee4b3390
[  +0.000037] l2cap_sock_connect: sk ee4b3390
[  +0.000002] l2cap_chan_connect: 00:02:72:d9:e5:8b -> ee:0d:30:09:53:1f (type 2) psm 0x00
[  +0.000002] hci_get_route: 00:02:72:d9:e5:8b -> ee:0d:30:09:53:1f
[  +0.000001] hci_dev_hold: hci0 orig refcnt 8
[  +0.000003] hci_conn_hold: hcon f53d56e0 orig refcnt 0

Above the l2cap_chan_connect() shouldn't have been able to reach the
hci_conn f53d56e0 anymore but since hci_conn_timeout didn't do proper
locking that's not the case. The end result is a reference to hci_conn
that's not in the conn_hash list, resulting in list corruption when
trying to remove it later:

[Oct21 08:15] l2cap_chan_timeout: chan ee4b1770 state BT_CONNECT
[  +0.000004] l2cap_chan_close: chan ee4b1770 state BT_CONNECT
[  +0.000003] l2cap_chan_del: chan ee4b1770, conn f3141580, err 111, state BT_CONNECT
[  +0.000001] l2cap_sock_teardown_cb: chan ee4b1770 state BT_CONNECT
[  +0.000005] l2cap_chan_put: chan ee4b1770 orig refcnt 4
[  +0.000002] hci_conn_drop: hcon f53d56e0 orig refcnt 1
[  +0.000015] l2cap_chan_put: chan ee4b1770 orig refcnt 3
[  +0.000038] hci_conn_timeout: hcon f53d56e0 state BT_CONNECT
[  +0.000003] hci_chan_list_flush: hcon f53d56e0
[  +0.000002] hci_conn_hash_del: hci0 hcon f53d56e0
[  +0.000001] ------------[ cut here ]------------
[  +0.000461] WARNING: CPU: 0 PID: 1782 at lib/list_debug.c:56 __list_del_entry+0x3f/0x71()
[  +0.000839] list_del corruption, f53d56e0->prev is LIST_POISON2 (00000200)

The necessary fix is unfortunately more complicated than just adding
hci_dev_lock/unlock calls to the hci_conn_timeout() call path.
Particularly, the hci_conn_del() API, which expects the hci_dev lock to
be held, performs a cancel_delayed_work_sync(&hcon->disc_work) which
would lead to a deadlock if the hci_conn_timeout() call path tries to
acquire the same lock.

This patch solves the problem by deferring the cleanup work to a
separate work callback. To protect against the hci_dev or hci_conn
going away meanwhile temporary references are taken with the help of
hci_dev_hold() and hci_conn_get().

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/bluetooth/hci_core.h |  1 +
 net/bluetooth/hci_conn.c         | 52 ++++++++++++++++++++++++++------
 2 files changed, 44 insertions(+), 9 deletions(-)

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 9e1a59e01fa2f..544a0201abdb4 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -469,6 +469,7 @@ struct hci_conn {
 	struct delayed_work auto_accept_work;
 	struct delayed_work idle_work;
 	struct delayed_work le_conn_timeout;
+	struct work_struct  le_scan_cleanup;
 
 	struct device	dev;
 	struct dentry	*debugfs;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 2dda439c8cb83..ec4836f243bc1 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -137,18 +137,51 @@ static void hci_conn_cleanup(struct hci_conn *conn)
 	hci_conn_put(conn);
 }
 
-/* This function requires the caller holds hdev->lock */
-static void hci_connect_le_scan_remove(struct hci_conn *conn)
+static void le_scan_cleanup(struct work_struct *work)
 {
-	hci_connect_le_scan_cleanup(conn);
+	struct hci_conn *conn = container_of(work, struct hci_conn,
+					     le_scan_cleanup);
+	struct hci_dev *hdev = conn->hdev;
+	struct hci_conn *c = NULL;
 
-	/* We can't call hci_conn_del here since that would deadlock
-	 * with trying to call cancel_delayed_work_sync(&conn->disc_work).
-	 * Instead, call just hci_conn_cleanup() which contains the bare
-	 * minimum cleanup operations needed for a connection in this
-	 * state.
+	BT_DBG("%s hcon %p", hdev->name, conn);
+
+	hci_dev_lock(hdev);
+
+	/* Check that the hci_conn is still around */
+	rcu_read_lock();
+	list_for_each_entry_rcu(c, &hdev->conn_hash.list, list) {
+		if (c == conn)
+			break;
+	}
+	rcu_read_unlock();
+
+	if (c == conn) {
+		hci_connect_le_scan_cleanup(conn);
+		hci_conn_cleanup(conn);
+	}
+
+	hci_dev_unlock(hdev);
+	hci_dev_put(hdev);
+	hci_conn_put(conn);
+}
+
+static void hci_connect_le_scan_remove(struct hci_conn *conn)
+{
+	BT_DBG("%s hcon %p", conn->hdev->name, conn);
+
+	/* We can't call hci_conn_del/hci_conn_cleanup here since that
+	 * could deadlock with another hci_conn_del() call that's holding
+	 * hci_dev_lock and doing cancel_delayed_work_sync(&conn->disc_work).
+	 * Instead, grab temporary extra references to the hci_dev and
+	 * hci_conn and perform the necessary cleanup in a separate work
+	 * callback.
 	 */
-	hci_conn_cleanup(conn);
+
+	hci_dev_hold(conn->hdev);
+	hci_conn_get(conn);
+
+	schedule_work(&conn->le_scan_cleanup);
 }
 
 static void hci_acl_create_connection(struct hci_conn *conn)
@@ -580,6 +613,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
 	INIT_DELAYED_WORK(&conn->auto_accept_work, hci_conn_auto_accept);
 	INIT_DELAYED_WORK(&conn->idle_work, hci_conn_idle);
 	INIT_DELAYED_WORK(&conn->le_conn_timeout, le_conn_timeout);
+	INIT_WORK(&conn->le_scan_cleanup, le_scan_cleanup);
 
 	atomic_set(&conn->refcnt, 0);
 

From 8d66c7371044032029efd1aecfc9a3b3e2125a9d Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 30 Oct 2015 13:48:19 +0100
Subject: [PATCH 077/201] can: Use correct type in sizeof() in nla_put()

commit 562b103a21974c2f9cd67514d110f918bb3e1796 upstream.

The sizeof() is invoked on an incorrect variable, likely due to some
copy-paste error, and this might result in memory corruption. Fix this.

Signed-off-by: Marek Vasut <marex@denx.de>
Cc: Wolfgang Grandegger <wg@grandegger.com>
Cc: netdev@vger.kernel.org
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index aede704605c66..141c2a42d7eda 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -915,7 +915,7 @@ static int can_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	     nla_put(skb, IFLA_CAN_BITTIMING_CONST,
 		     sizeof(*priv->bittiming_const), priv->bittiming_const)) ||
 
-	    nla_put(skb, IFLA_CAN_CLOCK, sizeof(cm), &priv->clock) ||
+	    nla_put(skb, IFLA_CAN_CLOCK, sizeof(priv->clock), &priv->clock) ||
 	    nla_put_u32(skb, IFLA_CAN_STATE, state) ||
 	    nla_put(skb, IFLA_CAN_CTRLMODE, sizeof(cm), &cm) ||
 	    nla_put_u32(skb, IFLA_CAN_RESTART_MS, priv->restart_ms) ||

From bf804ca6bcbb73b7c6ab2328f4baadd0e1f1f2d8 Mon Sep 17 00:00:00 2001
From: Mirza Krak <mirza.krak@hostmobility.com>
Date: Tue, 10 Nov 2015 14:59:34 +0100
Subject: [PATCH 078/201] can: sja1000: clear interrupts on start

commit 7cecd9ab80f43972c056dc068338f7bcc407b71c upstream.

According to SJA1000 data sheet error-warning (EI) interrupt is not
cleared by setting the controller in to reset-mode.

Then if we have the following case:
- system is suspended (echo mem > /sys/power/state) and SJA1000 is left
  in operating state
- A bus error condition occurs which activates EI interrupt, system is
  still suspended which means EI interrupt will be not be handled nor
  cleared.

If the above two events occur, on resume there is no way to return the
SJA1000 to operating state, except to cycle power to it.

By simply reading the IR register on start we will clear any previous
conditions that could be present.

Signed-off-by: Mirza Krak <mirza.krak@hostmobility.com>
Reported-by: Christian Magnusson <Christian.Magnusson@semcon.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/can/sja1000/sja1000.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c
index 7b92e911a6168..f10834be48a5c 100644
--- a/drivers/net/can/sja1000/sja1000.c
+++ b/drivers/net/can/sja1000/sja1000.c
@@ -218,6 +218,9 @@ static void sja1000_start(struct net_device *dev)
 	priv->write_reg(priv, SJA1000_RXERR, 0x0);
 	priv->read_reg(priv, SJA1000_ECC);
 
+	/* clear interrupt flags */
+	priv->read_reg(priv, SJA1000_IR);
+
 	/* leave reset mode */
 	set_normal_mode(dev);
 }

From 000fb8a2db323fa064992bde94c5df46ab661895 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Thu, 22 Oct 2015 15:41:52 +0100
Subject: [PATCH 079/201] arm64: Fix compat register mappings

commit 5accd17d0eb523350c9ef754d655e379c9bb93b3 upstream.

For reasons not entirely apparent, but now enshrined in history, the
architectural mapping of AArch32 banked registers to AArch64 registers
actually orders SP_<mode> and LR_<mode> backwards compared to the
intuitive r13/r14 order, for all modes except FIQ.

Fix the compat_<reg>_<mode> macros accordingly, in the hope of avoiding
subtle bugs with KVM and AArch32 guests.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/include/asm/ptrace.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index 536274ed292ea..e9e5467e0bf45 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -83,14 +83,14 @@
 #define compat_sp	regs[13]
 #define compat_lr	regs[14]
 #define compat_sp_hyp	regs[15]
-#define compat_sp_irq	regs[16]
-#define compat_lr_irq	regs[17]
-#define compat_sp_svc	regs[18]
-#define compat_lr_svc	regs[19]
-#define compat_sp_abt	regs[20]
-#define compat_lr_abt	regs[21]
-#define compat_sp_und	regs[22]
-#define compat_lr_und	regs[23]
+#define compat_lr_irq	regs[16]
+#define compat_sp_irq	regs[17]
+#define compat_lr_svc	regs[18]
+#define compat_sp_svc	regs[19]
+#define compat_lr_abt	regs[20]
+#define compat_sp_abt	regs[21]
+#define compat_lr_und	regs[22]
+#define compat_sp_und	regs[23]
 #define compat_r8_fiq	regs[24]
 #define compat_r9_fiq	regs[25]
 #define compat_r10_fiq	regs[26]

From ea9130ac6927fc4fe26cab6aa25d9e4172c38cd5 Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Mon, 26 Oct 2015 21:42:33 +0000
Subject: [PATCH 080/201] arm64: page-align sections for DEBUG_RODATA

commit cb083816ab5ac3d10a9417527f07fc5962cc3808 upstream.

A kernel built with DEBUG_RO_DATA && !CONFIG_DEBUG_ALIGN_RODATA doesn't
have .text aligned to a page boundary, though fixup_executable works at
page-granularity thanks to its use of create_mapping. If .text is not
page-aligned, the first page it exists in may be marked non-executable,
leading to failures when an attempt is made to execute code in said
page.

This patch upgrades ALIGN_DEBUG_RO and ALIGN_DEBUG_RO_MIN to force page
alignment for DEBUG_RO_DATA && !CONFIG_DEBUG_ALIGN_RODATA kernels,
ensuring that all sections with specific RWX permission requirements are
mapped with the correct permissions.

Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reported-by: Jeremy Linton <jeremy.linton@arm.com>
Reviewed-by: Laura Abbott <laura@labbott.name>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Suzuki Poulose <suzuki.poulose@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Fixes: da141706aea52c1a ("arm64: add better page protections to arm64")
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm64/kernel/vmlinux.lds.S | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 98073332e2d05..4d77757b5894a 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -60,9 +60,12 @@ PECOFF_FILE_ALIGNMENT = 0x200;
 #define PECOFF_EDATA_PADDING
 #endif
 
-#ifdef CONFIG_DEBUG_ALIGN_RODATA
+#if defined(CONFIG_DEBUG_ALIGN_RODATA)
 #define ALIGN_DEBUG_RO			. = ALIGN(1<<SECTION_SHIFT);
 #define ALIGN_DEBUG_RO_MIN(min)		ALIGN_DEBUG_RO
+#elif defined(CONFIG_DEBUG_RODATA)
+#define ALIGN_DEBUG_RO			. = ALIGN(1<<PAGE_SHIFT);
+#define ALIGN_DEBUG_RO_MIN(min)		ALIGN_DEBUG_RO
 #else
 #define ALIGN_DEBUG_RO
 #define ALIGN_DEBUG_RO_MIN(min)		. = ALIGN(min);

From 717991ddefe125a0f68f10b44bd7178e2208fdfd Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Tue, 20 Oct 2015 17:25:09 +0900
Subject: [PATCH 081/201] pinctrl: uniphier: set input-enable before pin-muxing

commit bac7f4c1bf5e7c6ccd5bb71edc015b26c77f7460 upstream.

While IECTRL is disabled, input signals are pulled-down internally.
If pin-muxing is set up first, glitch signals (Low to High transition)
might be input to hardware blocks.

Bad case scenario:
[1] The hardware block is already running before pinctrl is handled.
   (the reset is de-asserted by default or by a firmware, for example)
[2] The pin-muxing is set up.  The input signals to hardware block
    are pulled-down by the chip-internal biasing.
[3] The pins are input-enabled.  The signals from the board reach the
    hardware block.

Actually, one invalid character is input to the UART blocks for such
SoCs as PH1-LD4, PH1-sLD8, where UART devices start to run at the
power on reset.

To avoid such problems, pins should be input-enabled before muxing.

Fixes: 6e9088920258 ("pinctrl: UniPhier: add UniPhier pinctrl core support")
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Reported-by: Dai Okamura <okamura.dai@socionext.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pinctrl/uniphier/pinctrl-uniphier-core.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-core.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-core.c
index 918f3b643f1b0..589872cc8adbb 100644
--- a/drivers/pinctrl/uniphier/pinctrl-uniphier-core.c
+++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-core.c
@@ -539,6 +539,12 @@ static int uniphier_pmx_set_one_mux(struct pinctrl_dev *pctldev, unsigned pin,
 	unsigned reg, reg_end, shift, mask;
 	int ret;
 
+	/* some pins need input-enabling */
+	ret = uniphier_conf_pin_input_enable(pctldev,
+					     &pctldev->desc->pins[pin], 1);
+	if (ret)
+		return ret;
+
 	reg = UNIPHIER_PINCTRL_PINMUX_BASE + pin * mux_bits / 32 * reg_stride;
 	reg_end = reg + reg_stride;
 	shift = pin * mux_bits % 32;
@@ -563,9 +569,7 @@ static int uniphier_pmx_set_one_mux(struct pinctrl_dev *pctldev, unsigned pin,
 			return ret;
 	}
 
-	/* some pins need input-enabling */
-	return uniphier_conf_pin_input_enable(pctldev,
-					      &pctldev->desc->pins[pin], 1);
+	return 0;
 }
 
 static int uniphier_pmx_set_mux(struct pinctrl_dev *pctldev,

From 0e9637a2b1d42761dddeaad4dcf3c09cb34e5db5 Mon Sep 17 00:00:00 2001
From: Jonas Gorski <jogo@openwrt.org>
Date: Sun, 11 Oct 2015 17:39:31 +0200
Subject: [PATCH 082/201] pinctrl: qcom: ssbi: fix compilation with DEBUG_FS=n
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 11091fb0a1227d569d09353e1ce1f88694a033dc upstream.

The DEBUG_FS=n #defines for the dbg_show functions were missed when
renaming the driver from msm_ to pm8xxx_, causing it to break the build
when DEBUG_FS isn't enabled:

  CC [M]  drivers/pinctrl/qcom/pinctrl-ssbi-gpio.o
drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c:597:14: error: â€˜pm8xxx_gpio_dbg_showâ€™ undeclared here (not in a function)
  .dbg_show = pm8xxx_gpio_dbg_show,

Fix this by renaming them correctly.

Fixes: b4c45fe974bc ("pinctrl: qcom: ssbi: Family A gpio & mpp drivers")
Signed-off-by: Jonas Gorski <jogo@openwrt.org>
Reviewed-by: Bjorn Andersson <bjorn.andersson@sonymobile.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c | 2 +-
 drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
index e1a3721bc8e58..d809c9eaa3231 100644
--- a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
+++ b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c
@@ -584,7 +584,7 @@ static void pm8xxx_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
 }
 
 #else
-#define msm_gpio_dbg_show NULL
+#define pm8xxx_gpio_dbg_show NULL
 #endif
 
 static struct gpio_chip pm8xxx_gpio_template = {
diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c
index 6652b8d7f707a..8982027de8e8b 100644
--- a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c
+++ b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c
@@ -639,7 +639,7 @@ static void pm8xxx_mpp_dbg_show(struct seq_file *s, struct gpio_chip *chip)
 }
 
 #else
-#define msm_mpp_dbg_show NULL
+#define pm8xxx_mpp_dbg_show NULL
 #endif
 
 static struct gpio_chip pm8xxx_mpp_template = {

From 92f503d0b0491f90c196a1ed3dda601c4016b1a3 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kvalo@qca.qualcomm.com>
Date: Wed, 9 Sep 2015 11:34:37 +0300
Subject: [PATCH 083/201] ath10k: add ATH10K_FW_FEATURE_RAW_MODE_SUPPORT to
 ath10k_core_fw_feature_str[]

commit 5af82fa66a7ee8dfc29fadb487a02e2ef14ea965 upstream.

This was missed in the original commit adding the flag and ath10k only printed "bit10":

ath10k_pci 0000:02:00.0: qca988x hw2.0 (0x4100016c, 0x043202ff) fw 10.2.4.70.6-2 api 3
htt-ver 2.1 wmi-op 5 htt-op 2 cal otp max-sta 128 raw 0 hwcrypto 1 features no-p2p,bit10

Also add a build test to avoid this happening again.

Fixes: ccec9038c721 ("ath10k: enable raw encap mode and software crypto engine")
Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/ath10k/core.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index b87b98617073b..9e3d89d064453 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -142,12 +142,17 @@ static const char *const ath10k_core_fw_feature_str[] = {
 	[ATH10K_FW_FEATURE_IGNORE_OTP_RESULT] = "ignore-otp",
 	[ATH10K_FW_FEATURE_NO_NWIFI_DECAP_4ADDR_PADDING] = "no-4addr-pad",
 	[ATH10K_FW_FEATURE_SUPPORTS_SKIP_CLOCK_INIT] = "skip-clock-init",
+	[ATH10K_FW_FEATURE_RAW_MODE_SUPPORT] = "raw-mode",
 };
 
 static unsigned int ath10k_core_get_fw_feature_str(char *buf,
 						   size_t buf_len,
 						   enum ath10k_fw_features feat)
 {
+	/* make sure that ath10k_core_fw_feature_str[] gets updated */
+	BUILD_BUG_ON(ARRAY_SIZE(ath10k_core_fw_feature_str) !=
+		     ATH10K_FW_FEATURE_COUNT);
+
 	if (feat >= ARRAY_SIZE(ath10k_core_fw_feature_str) ||
 	    WARN_ON(!ath10k_core_fw_feature_str[feat])) {
 		return scnprintf(buf, buf_len, "bit%d", feat);

From 3a0ff96194c14f7915e571553a2328e477da8c97 Mon Sep 17 00:00:00 2001
From: Vivek Natarajan <nataraja@qti.qualcomm.com>
Date: Tue, 6 Oct 2015 15:19:34 +0300
Subject: [PATCH 084/201] ath10k: use station's current operating mode from
 assoc request

commit 72f8cef5d1155209561b01e092ce1a04ad50c4cb upstream.

The current number of spatial streams used by the client is advertised
as a separate IE in assoc request. Use this information to set
the NSS operating mode.

Fixes: 45c9abc059fa ("ath10k: implement more versatile set_bitrate_mask").
Signed-off-by: Vivek Natarajan <nataraja@qti.qualcomm.com>
Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/ath10k/mac.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 64674c955d447..ae90a2cdab9b2 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -2083,7 +2083,8 @@ static void ath10k_peer_assoc_h_ht(struct ath10k *ar,
 	enum ieee80211_band band;
 	const u8 *ht_mcs_mask;
 	const u16 *vht_mcs_mask;
-	int i, n, max_nss;
+	int i, n;
+	u8 max_nss;
 	u32 stbc;
 
 	lockdep_assert_held(&ar->conf_mutex);
@@ -2168,7 +2169,7 @@ static void ath10k_peer_assoc_h_ht(struct ath10k *ar,
 			arg->peer_ht_rates.rates[i] = i;
 	} else {
 		arg->peer_ht_rates.num_rates = n;
-		arg->peer_num_spatial_streams = max_nss;
+		arg->peer_num_spatial_streams = min(sta->rx_nss, max_nss);
 	}
 
 	ath10k_dbg(ar, ATH10K_DBG_MAC, "mac ht peer %pM mcs cnt %d nss %d\n",

From 97f367a3193fa5d4e85a469a3d862396c5484c3e Mon Sep 17 00:00:00 2001
From: Rajkumar Manoharan <rmanohar@qti.qualcomm.com>
Date: Tue, 3 Nov 2015 11:51:33 +0530
Subject: [PATCH 085/201] ath10k: fix invalid NSS for 4x4 devices

commit f680f70adbeab28b35f849016b964dd645db6237 upstream.

The number of spatial streams that are derived from chain mask
for 4x4 devices is using wrong bitmask and conditional check.
This is affecting downlink throughput for QCA99x0 devices. Earlier
cfg_tx_chainmask is not filled by default until user configured it
and so get_nss_from_chainmask never be called. This issue is exposed
by recent commit 166de3f1895d ("ath10k: remove supported chain mask").
By default maximum supported chain mask is filled in cfg_tx_chainmask.

Fixes: 5572a95b4b ("ath10k: apply chainmask settings to vdev on creation")
Signed-off-by: Rajkumar Manoharan <rmanohar@qti.qualcomm.com>
Signed-off-by: Kalle Valo <kvalo@qca.qualcomm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/ath/ath10k/mac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index ae90a2cdab9b2..e1d32de8b1110 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -4056,7 +4056,7 @@ static int ath10k_config(struct ieee80211_hw *hw, u32 changed)
 
 static u32 get_nss_from_chainmask(u16 chain_mask)
 {
-	if ((chain_mask & 0x15) == 0x15)
+	if ((chain_mask & 0xf) == 0xf)
 		return 4;
 	else if ((chain_mask & 0x7) == 0x7)
 		return 3;

From c2ec9e3a2cfe513efcd08d0492295124a9677ae2 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 27 Oct 2015 13:13:38 +0100
Subject: [PATCH 086/201] s390/kernel: fix ptrace peek/poke for floating point
 registers

commit 55a423b6f105fa323168f15f4bb67f23b21da44e upstream.

git commit 155e839a814834a3b4b31e729f4716e59d3d2dd4
"s390/kernel: dynamically allocate FP register save area"
introduced a regression in regard to ptrace.

If the vector register extension is not present or unused the
ptrace peek of a floating pointer register return incorrect data
and the ptrace poke to a floating pointer register overwrites the
task structure starting at task->thread.fpu.fprs.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/kernel/ptrace.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 8b1c8e33f184a..d319f36f7d1d6 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -244,7 +244,7 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr)
 			       ((addr_t) child->thread.fpu.vxrs + 2*offset);
 		else
 			tmp = *(addr_t *)
-			       ((addr_t) &child->thread.fpu.fprs + offset);
+			       ((addr_t) child->thread.fpu.fprs + offset);
 
 	} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
 		/*
@@ -388,7 +388,7 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data)
 				child->thread.fpu.vxrs + 2*offset) = data;
 		else
 			*(addr_t *)((addr_t)
-				&child->thread.fpu.fprs + offset) = data;
+				child->thread.fpu.fprs + offset) = data;
 
 	} else if (addr < (addr_t) (&dummy->regs.per_info + 1)) {
 		/*
@@ -622,7 +622,7 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr)
 			       ((addr_t) child->thread.fpu.vxrs + 2*offset);
 		else
 			tmp = *(__u32 *)
-			       ((addr_t) &child->thread.fpu.fprs + offset);
+			       ((addr_t) child->thread.fpu.fprs + offset);
 
 	} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
 		/*
@@ -747,7 +747,7 @@ static int __poke_user_compat(struct task_struct *child,
 				child->thread.fpu.vxrs + 2*offset) = tmp;
 		else
 			*(__u32 *)((addr_t)
-				&child->thread.fpu.fprs + offset) = tmp;
+				child->thread.fpu.fprs + offset) = tmp;
 
 	} else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) {
 		/*

From 46af83a483b3ddcd4bfff4889b2f8330806bfd7b Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Fri, 9 Oct 2015 11:07:06 +0200
Subject: [PATCH 087/201] s390/pci: reshuffle struct used to write debug data

commit 7cc8944e13c73374b6f33b39ca24c0891c87b077 upstream.

zpci_err_insn writes stale stack content to the debugfs.

Ensure that the struct in zpci_err_insn is ordered in a way that
we don't have uninitialized holes in it. In addition to that
add the packed attribute.

Fixes: 3d8258e (s390/pci: move debug messages to debugfs)
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Reviewed-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/pci/pci_insn.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index dcc2634ccbe29..10ca15dcab11f 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -16,11 +16,11 @@
 static inline void zpci_err_insn(u8 cc, u8 status, u64 req, u64 offset)
 {
 	struct {
-		u8 cc;
-		u8 status;
 		u64 req;
 		u64 offset;
-	} data = {cc, status, req, offset};
+		u8 cc;
+		u8 status;
+	} __packed data = {req, offset, cc, status};
 
 	zpci_err_hex(&data, sizeof(data));
 }

From 0c590b83b0308375b0d5e34fe6c9756d007184b4 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Mon, 26 Oct 2015 08:41:29 +0100
Subject: [PATCH 088/201] KVM: s390: SCA must not cross page boundaries

commit c5c2c393468576bad6d10b2b5fefff8cd25df3f4 upstream.

We seemed to have missed a few corner cases in commit f6c137ff00a4
("KVM: s390: randomize sca address").

The SCA has a maximum size of 2112 bytes. By setting the sca_offset to
some unlucky numbers, we exceed the page.

0x7c0 (1984) -> Fits exactly
0x7d0 (2000) -> 16 bytes out
0x7e0 (2016) -> 32 bytes out
0x7f0 (2032) -> 48 bytes out

One VCPU entry is 32 bytes long.

For the last two cases, we actually write data to the other page.
1. The address of the VCPU.
2. Injection/delivery/clearing of SIGP externall calls via SIGP IF.

Especially the 2. happens regularly. So this could produce two problems:
1. The guest losing/getting external calls.
2. Random memory overwrites in the host.

So this problem happens on every 127 + 128 created VM with 64 VCPUs.

Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/kvm/kvm-s390.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 0a67c40eece9b..2275413625dbc 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -1120,7 +1120,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	if (!kvm->arch.sca)
 		goto out_err;
 	spin_lock(&kvm_lock);
-	sca_offset = (sca_offset + 16) & 0x7f0;
+	sca_offset += 16;
+	if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE)
+		sca_offset = 0;
 	kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset);
 	spin_unlock(&kvm_lock);
 

From 752f0d74a7a63723e0baca91fdd9cb55f012911a Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Thu, 5 Nov 2015 09:03:50 +0100
Subject: [PATCH 089/201] KVM: Provide function for VCPU lookup by id

commit db27a7a37aa0b1f8b373f8b0fb72a2ccaafb85b7 upstream.

Let's provide a function to lookup a VCPU by id.

Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Reviewed-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
[split patch from refactoring patch]
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kvm_host.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1bef9e21e7259..e480f9fbdd629 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -442,6 +442,17 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
 	     (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \
 	     idx++)
 
+static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id)
+{
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		if (vcpu->vcpu_id == id)
+			return vcpu;
+	return NULL;
+}
+
 #define kvm_for_each_memslot(memslot, slots)	\
 	for (memslot = &slots->memslots[0];	\
 	      memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\

From 0c1396192a918b5d5879f8bdd59b97340e39818c Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Thu, 5 Nov 2015 09:06:06 +0100
Subject: [PATCH 090/201] KVM: s390: fix wrong lookup of VCPUs by array index

commit 152e9f65d66f0a3891efc3869440becc0e7ff53f upstream.

For now, VCPUs were always created sequentially with incrementing
VCPU ids. Therefore, the index in the VCPUs array matched the id.

As sequential creation might change with cpu hotplug, let's use
the correct lookup function to find a VCPU by id, not array index.

Let's also use kvm_lookup_vcpu() for validation of the sending VCPU
on external call injection.

Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/kvm/interrupt.c | 3 +--
 arch/s390/kvm/sigp.c      | 8 ++------
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 5c2c169395c3c..bb2468a8e7795 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1057,8 +1057,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
 				   src_id, 0);
 
 	/* sending vcpu invalid */
-	if (src_id >= KVM_MAX_VCPUS ||
-	    kvm_get_vcpu(vcpu->kvm, src_id) == NULL)
+	if (kvm_get_vcpu_by_id(vcpu->kvm, src_id) == NULL)
 		return -EINVAL;
 
 	if (sclp.has_sigpif)
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index da690b69f9fe1..77c22d685c7a1 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -291,12 +291,8 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
 			   u16 cpu_addr, u32 parameter, u64 *status_reg)
 {
 	int rc;
-	struct kvm_vcpu *dst_vcpu;
+	struct kvm_vcpu *dst_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
 
-	if (cpu_addr >= KVM_MAX_VCPUS)
-		return SIGP_CC_NOT_OPERATIONAL;
-
-	dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
 	if (!dst_vcpu)
 		return SIGP_CC_NOT_OPERATIONAL;
 
@@ -478,7 +474,7 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu)
 	trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
 
 	if (order_code == SIGP_EXTERNAL_CALL) {
-		dest_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+		dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
 		BUG_ON(dest_vcpu == NULL);
 
 		kvm_s390_vcpu_wakeup(dest_vcpu);

From 705e4e3c2cd5b193ebf83e762fb303be7b256cea Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Thu, 5 Nov 2015 09:38:15 +0100
Subject: [PATCH 091/201] KVM: s390: avoid memory overwrites on emergency
 signal injection

commit b85de33a1a3433487b6a721cfdce25ec8673e622 upstream.

Commit 383d0b050106 ("KVM: s390: handle pending local interrupts via
bitmap") introduced a possible memory overwrite from user space.

User space could pass an invalid emergency signal code (sending VCPU)
and therefore exceed the bitmap. Let's take care of this case and
check that the id is in the valid range.

Reviewed-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/kvm/interrupt.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index bb2468a8e7795..08de785c9e639 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1136,6 +1136,10 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
 				   irq->u.emerg.code, 0);
 
+	/* sending vcpu invalid */
+	if (kvm_get_vcpu_by_id(vcpu->kvm, irq->u.emerg.code) == NULL)
+		return -EINVAL;
+
 	set_bit(irq->u.emerg.code, li->sigp_emerg_pending);
 	set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
 	atomic_or(CPUSTAT_EXT_INT, li->cpuflags);

From 5d19d22c7ba11df61e175dc289ef1058270b00ae Mon Sep 17 00:00:00 2001
From: David Hildenbrand <dahi@linux.vnet.ibm.com>
Date: Fri, 6 Nov 2015 12:08:48 +0100
Subject: [PATCH 092/201] KVM: s390: enable SIMD only when no VCPUs were
 created

commit 5967c17b118a2bd1dd1d554cc4eee16233e52bec upstream.

We should never allow to enable/disable any facilities for the guest
when other VCPUs were already created.

kvm_arch_vcpu_(load|put) relies on SIMD not changing during runtime.
If somebody would create and run VCPUs and then decides to enable
SIMD, undefined behaviour could be possible (e.g. vector save area
not being set up).

Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/s390/kvm/kvm-s390.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 2275413625dbc..1290af83417a3 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -342,12 +342,16 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 		r = 0;
 		break;
 	case KVM_CAP_S390_VECTOR_REGISTERS:
-		if (MACHINE_HAS_VX) {
+		mutex_lock(&kvm->lock);
+		if (atomic_read(&kvm->online_vcpus)) {
+			r = -EBUSY;
+		} else if (MACHINE_HAS_VX) {
 			set_kvm_facility(kvm->arch.model.fac->mask, 129);
 			set_kvm_facility(kvm->arch.model.fac->list, 129);
 			r = 0;
 		} else
 			r = -EINVAL;
+		mutex_unlock(&kvm->lock);
 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
 			 r ? "(not available)" : "(success)");
 		break;

From 580c9bb0647dd6d75e28141942a0fd8a0d8962a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
Date: Mon, 31 Aug 2015 19:48:28 +0300
Subject: [PATCH 093/201] Revert "usb: dwc3: gadget: drop unnecessary loop when
 cleaning up TRBs"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit d115d7050a0d2c4967532f18c9cb522fea6b7280 upstream.

This reverts commit 8f2c9544aba636134303105ecb164190a39dece4.

As it breaks g_ether on my Baytrail FFRD8 device. Everything starts out
fine, but after a bit of data has been transferred it just stops
flowing.

Note that I do get a bunch of these "NOHZ: local_softirq_pending 08"
when booting the machine, but I'm not really sure if they're related
to this problem.

Cc: Felipe Balbi <balbi@ti.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: linux-usb@vger.kernel.org
Cc: stable@vger.kernel.org
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/gadget.c | 37 +++++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 1e8bdf8178113..11784de01ca88 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1872,27 +1872,32 @@ static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep,
 	unsigned int		i;
 	int			ret;
 
-	req = next_request(&dep->req_queued);
-	if (!req) {
-		WARN_ON_ONCE(1);
-		return 1;
-	}
-	i = 0;
 	do {
-		slot = req->start_slot + i;
-		if ((slot == DWC3_TRB_NUM - 1) &&
+		req = next_request(&dep->req_queued);
+		if (!req) {
+			WARN_ON_ONCE(1);
+			return 1;
+		}
+		i = 0;
+		do {
+			slot = req->start_slot + i;
+			if ((slot == DWC3_TRB_NUM - 1) &&
 				usb_endpoint_xfer_isoc(dep->endpoint.desc))
-			slot++;
-		slot %= DWC3_TRB_NUM;
-		trb = &dep->trb_pool[slot];
+				slot++;
+			slot %= DWC3_TRB_NUM;
+			trb = &dep->trb_pool[slot];
+
+			ret = __dwc3_cleanup_done_trbs(dwc, dep, req, trb,
+					event, status);
+			if (ret)
+				break;
+		} while (++i < req->request.num_mapped_sgs);
+
+		dwc3_gadget_giveback(dep, req, status);
 
-		ret = __dwc3_cleanup_done_trbs(dwc, dep, req, trb,
-				event, status);
 		if (ret)
 			break;
-	} while (++i < req->request.num_mapped_sgs);
-
-	dwc3_gadget_giveback(dep, req, status);
+	} while (1);
 
 	if (usb_endpoint_xfer_isoc(dep->endpoint.desc) &&
 			list_empty(&dep->req_queued)) {

From 5d8a3e3b16366682be5ef5d3f6b24ae39bef5b78 Mon Sep 17 00:00:00 2001
From: Mian Yousaf Kaukab <yousaf.kaukab@intel.com>
Date: Mon, 19 Oct 2015 16:25:15 +0200
Subject: [PATCH 094/201] usb: gadget: net2280: restore ep_cfg after defect7374
 workaround

commit 81e9d14a53eb1abfbe6ac828a87a2deb4702b5f1 upstream.

Defect 7374 workaround enables all GPEP as endpoint 0. Restore
endpoint number when defect 7374 workaround is disabled. Otherwise,
check to match USB endpoint number to hardware endpoint number in
net2280_enable() fails.

Reported-by: Paul Jones <p.jones@teclyn.com>
Signed-off-by: Mian Yousaf Kaukab <yousaf.kaukab@intel.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/net2280.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c
index cf0ed42f5591c..6706aef907f4b 100644
--- a/drivers/usb/gadget/udc/net2280.c
+++ b/drivers/usb/gadget/udc/net2280.c
@@ -1913,7 +1913,7 @@ static void defect7374_disable_data_eps(struct net2280 *dev)
 
 	for (i = 1; i < 5; i++) {
 		ep = &dev->ep[i];
-		writel(0, &ep->cfg->ep_cfg);
+		writel(i, &ep->cfg->ep_cfg);
 	}
 
 	/* CSROUT, CSRIN, PCIOUT, PCIIN, STATIN, RCIN */

From f95102dd298de57c5c27b65271f4359dd24c30b2 Mon Sep 17 00:00:00 2001
From: Douglas Gilbert <dgilbert@interlog.com>
Date: Mon, 16 Nov 2015 19:22:08 +0100
Subject: [PATCH 095/201] usb: gadget: atmel_usba_udc: Expose correct device
 speed

commit d134c48d889ddceadf4c990e6f3df16b816ed5d4 upstream.

Following changes that appeared in lk 4.0.0, the gadget udc driver for
some ARM based Atmel SoCs (e.g. at91sam9x5 and sama5d3 families)
incorrectly deduced full-speed USB link speed even when the hardware
had negotiated a high-speed link. The fix is to make sure that the
UDPHS Interrupt Enable Register value does not mask the SPEED bit
in the Interrupt Status Register.

For a mass storage gadget this problem lead to failures when the host
had a USB 3 port with the xhci_hcd driver. If the host was a USB 2
port using the ehci_hcd driver then the mass storage gadget worked
(but probably at a lower speed than it should have).

Signed-off-by: Douglas Gilbert <dgilbert@interlog.com>
Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Fixes: 9870d895ad87 ("usb: atmel_usba_udc: Mask status with enabled irqs")
Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/atmel_usba_udc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c
index f0f2b066ac083..f92f5aff0dd5e 100644
--- a/drivers/usb/gadget/udc/atmel_usba_udc.c
+++ b/drivers/usb/gadget/udc/atmel_usba_udc.c
@@ -1633,7 +1633,7 @@ static irqreturn_t usba_udc_irq(int irq, void *devid)
 	spin_lock(&udc->lock);
 
 	int_enb = usba_int_enb_get(udc);
-	status = usba_readl(udc, INT_STA) & int_enb;
+	status = usba_readl(udc, INT_STA) & (int_enb | USBA_HIGH_SPEED);
 	DBG(DBG_INT, "irq, status=%#08x\n", status);
 
 	if (status & USBA_DET_SUSPEND) {

From aaa9636572ef79ac6cd1088a0f32653ae242bca7 Mon Sep 17 00:00:00 2001
From: Ben McCauley <ben.mccauley@garmin.com>
Date: Mon, 16 Nov 2015 10:47:24 -0600
Subject: [PATCH 096/201] usb: dwc3: gadget: let us set lower max_speed

commit b9e51b2b1fda19143f48d182ed7a2943f21e1ae4 upstream.

In some SoCs, dwc3 is implemented as a USB2.0 only
core, meaning that it can't ever achieve SuperSpeed.

Currect driver always sets gadget.max_speed to
USB_SPEED_SUPER unconditionally. This can causes
issues to some Host stacks where the host will issue
a GetBOS() request and we will reply with a BOS
containing Superspeed Capability Descriptor.

At least Windows seems to be upset by this fact and
prints a warning that we should connect $this device
to another port.

[ balbi@ti.com : rewrote entire commit, including
source code comment to make a lot clearer what the
problem is ]

Signed-off-by: Ben McCauley <ben.mccauley@garmin.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/gadget.c | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 11784de01ca88..9c38e4b0a2622 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -2723,11 +2723,33 @@ int dwc3_gadget_init(struct dwc3 *dwc)
 	}
 
 	dwc->gadget.ops			= &dwc3_gadget_ops;
-	dwc->gadget.max_speed		= USB_SPEED_SUPER;
 	dwc->gadget.speed		= USB_SPEED_UNKNOWN;
 	dwc->gadget.sg_supported	= true;
 	dwc->gadget.name		= "dwc3-gadget";
 
+	/*
+	 * FIXME We might be setting max_speed to <SUPER, however versions
+	 * <2.20a of dwc3 have an issue with metastability (documented
+	 * elsewhere in this driver) which tells us we can't set max speed to
+	 * anything lower than SUPER.
+	 *
+	 * Because gadget.max_speed is only used by composite.c and function
+	 * drivers (i.e. it won't go into dwc3's registers) we are allowing this
+	 * to happen so we avoid sending SuperSpeed Capability descriptor
+	 * together with our BOS descriptor as that could confuse host into
+	 * thinking we can handle super speed.
+	 *
+	 * Note that, in fact, we won't even support GetBOS requests when speed
+	 * is less than super speed because we don't have means, yet, to tell
+	 * composite.c that we are USB 2.0 + LPM ECN.
+	 */
+	if (dwc->revision < DWC3_REVISION_220A)
+		dwc3_trace(trace_dwc3_gadget,
+				"Changing max_speed on rev %08x\n",
+				dwc->revision);
+
+	dwc->gadget.max_speed		= dwc->maximum_speed;
+
 	/*
 	 * Per databook, DWC3 needs buffer size to be aligned to MaxPacketSize
 	 * on ep out.

From c0cd28e68d5862fbf92c36b18cf28e3457bc5053 Mon Sep 17 00:00:00 2001
From: Li Jun <B47624@freescale.com>
Date: Fri, 12 Dec 2014 09:11:42 +0800
Subject: [PATCH 097/201] usb: chipidea: otg: gadget module load and unload
 support

commit 85da852df66e5e0d3aba761b0fece7c958ff0685 upstream.

This patch is to support load and unload gadget driver in full OTG mode.

Signed-off-by: Li Jun <jun.li@freescale.com>
Signed-off-by: Peter Chen <peter.chen@freescale.com>
Tested-by: Jiada Wang <jiada_wang@mentor.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/chipidea/udc.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c
index 8223fe73ea859..391a1225b0ba3 100644
--- a/drivers/usb/chipidea/udc.c
+++ b/drivers/usb/chipidea/udc.c
@@ -1751,6 +1751,22 @@ static int ci_udc_start(struct usb_gadget *gadget,
 	return retval;
 }
 
+static void ci_udc_stop_for_otg_fsm(struct ci_hdrc *ci)
+{
+	if (!ci_otg_is_fsm_mode(ci))
+		return;
+
+	mutex_lock(&ci->fsm.lock);
+	if (ci->fsm.otg->state == OTG_STATE_A_PERIPHERAL) {
+		ci->fsm.a_bidl_adis_tmout = 1;
+		ci_hdrc_otg_fsm_start(ci);
+	} else if (ci->fsm.otg->state == OTG_STATE_B_PERIPHERAL) {
+		ci->fsm.protocol = PROTO_UNDEF;
+		ci->fsm.otg->state = OTG_STATE_UNDEFINED;
+	}
+	mutex_unlock(&ci->fsm.lock);
+}
+
 /**
  * ci_udc_stop: unregister a gadget driver
  */
@@ -1775,6 +1791,7 @@ static int ci_udc_stop(struct usb_gadget *gadget)
 	ci->driver = NULL;
 	spin_unlock_irqrestore(&ci->lock, flags);
 
+	ci_udc_stop_for_otg_fsm(ci);
 	return 0;
 }
 

From 6ef8be0f8815bb27a8cb9183ce0180442407a73d Mon Sep 17 00:00:00 2001
From: John Youn <John.Youn@synopsys.com>
Date: Fri, 7 Aug 2015 11:04:14 -0700
Subject: [PATCH 098/201] usb: dwc3: pci: Add the Synopsys HAPS AXI Product ID

commit 41adc59caece02aa2e988a0e8f9fe8e6f426f82e upstream.

This ID is for the Synopsys DWC_usb3 core with AXI interface on PCIe
HAPS platform. This core has the debug registers mapped at a separate
BAR in order to support enhanced hibernation.

Signed-off-by: John Youn <johnyoun@synopsys.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/dwc3-pci.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index f62617999f3c7..361b7322548f4 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -27,6 +27,7 @@
 #include "platform_data.h"
 
 #define PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3	0xabcd
+#define PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3_AXI 0xabce
 #define PCI_DEVICE_ID_INTEL_BYT		0x0f37
 #define PCI_DEVICE_ID_INTEL_MRFLD	0x119e
 #define PCI_DEVICE_ID_INTEL_BSW		0x22B7
@@ -178,6 +179,10 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
 		PCI_DEVICE(PCI_VENDOR_ID_SYNOPSYS,
 				PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3),
 	},
+	{
+		PCI_DEVICE(PCI_VENDOR_ID_SYNOPSYS,
+				PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3_AXI),
+	},
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BSW), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BYT), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MRFLD), },

From 044668df6617ff9302975d6ea16fa53ef17f0149 Mon Sep 17 00:00:00 2001
From: John Youn <John.Youn@synopsys.com>
Date: Fri, 7 Aug 2015 11:47:25 -0700
Subject: [PATCH 099/201] usb: dwc3: pci: Add the PCI Product ID for Synopsys
 USB 3.1

commit e8095a25364a30216ad40dbe8893ed5c3c235949 upstream.

This adds the PCI product ID for the Synopsys USB 3.1 IP core
(DWC_usb31) on a HAPS-based PCI development platform.

Signed-off-by: John Youn <johnyoun@synopsys.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/dwc3-pci.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index 361b7322548f4..db67ed20f6255 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -28,6 +28,7 @@
 
 #define PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3	0xabcd
 #define PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3_AXI 0xabce
+#define PCI_DEVICE_ID_SYNOPSYS_HAPSUSB31 0xabcf
 #define PCI_DEVICE_ID_INTEL_BYT		0x0f37
 #define PCI_DEVICE_ID_INTEL_MRFLD	0x119e
 #define PCI_DEVICE_ID_INTEL_BSW		0x22B7
@@ -183,6 +184,10 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
 		PCI_DEVICE(PCI_VENDOR_ID_SYNOPSYS,
 				PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3_AXI),
 	},
+	{
+		PCI_DEVICE(PCI_VENDOR_ID_SYNOPSYS,
+				PCI_DEVICE_ID_SYNOPSYS_HAPSUSB31),
+	},
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BSW), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BYT), },
 	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MRFLD), },

From ab6fc45ff2c2e9183d81901d20224b4d37820570 Mon Sep 17 00:00:00 2001
From: John Youn <John.Youn@synopsys.com>
Date: Fri, 4 Sep 2015 19:15:10 -0700
Subject: [PATCH 100/201] usb: dwc3: Support Synopsys USB 3.1 IP

commit 690fb3718a70c66004342f6f5e2e8a5f95b977db upstream.

This patch allows the dwc3 driver to run on the new Synopsys USB 3.1
IP core, albeit in USB 3.0 mode only.

The Synopsys USB 3.1 IP (DWC_usb31) retains mostly the same register
interface and programming model as the existing USB 3.0 controller IP
(DWC_usb3). However the GSNPSID and version numbers are different.

Add checking for the new ID to pass driver probe.

Also, since the DWC_usb31 version number is lower in value than the
full GSNPSID of the DWC_usb3 IP, we set the high bit to identify
DWC_usb31 and to ensure the values are higher.

Finally, add a documentation note about the revision numbering scheme.
Any future revision checks (for STARS, workarounds, and new features)
should take into consideration how it applies to both the 3.1/3.0 IP.

Signed-off-by: John Youn <johnyoun@synopsys.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/core.c | 10 ++++++++--
 drivers/usb/dwc3/core.h | 18 ++++++++++++++++++
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 064123e445664..386ea764d8b2c 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -507,12 +507,18 @@ static int dwc3_core_init(struct dwc3 *dwc)
 
 	reg = dwc3_readl(dwc->regs, DWC3_GSNPSID);
 	/* This should read as U3 followed by revision number */
-	if ((reg & DWC3_GSNPSID_MASK) != 0x55330000) {
+	if ((reg & DWC3_GSNPSID_MASK) == 0x55330000) {
+		/* Detected DWC_usb3 IP */
+		dwc->revision = reg;
+	} else if ((reg & DWC3_GSNPSID_MASK) == 0x33310000) {
+		/* Detected DWC_usb31 IP */
+		dwc->revision = dwc3_readl(dwc->regs, DWC3_VER_NUMBER);
+		dwc->revision |= DWC3_REVISION_IS_DWC31;
+	} else {
 		dev_err(dwc->dev, "this is not a DesignWare USB3 DRD Core\n");
 		ret = -ENODEV;
 		goto err0;
 	}
-	dwc->revision = reg;
 
 	/*
 	 * Write Linux Version Code to our GUID register so it's easy to figure
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 0447788845856..1841dd025ea7a 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -108,6 +108,9 @@
 #define DWC3_GPRTBIMAP_FS0	0xc188
 #define DWC3_GPRTBIMAP_FS1	0xc18c
 
+#define DWC3_VER_NUMBER		0xc1a0
+#define DWC3_VER_TYPE		0xc1a4
+
 #define DWC3_GUSB2PHYCFG(n)	(0xc200 + (n * 0x04))
 #define DWC3_GUSB2I2CCTL(n)	(0xc240 + (n * 0x04))
 
@@ -766,6 +769,14 @@ struct dwc3 {
 	u32			num_event_buffers;
 	u32			u1u2;
 	u32			maximum_speed;
+
+	/*
+	 * All 3.1 IP version constants are greater than the 3.0 IP
+	 * version constants. This works for most version checks in
+	 * dwc3. However, in the future, this may not apply as
+	 * features may be developed on newer versions of the 3.0 IP
+	 * that are not in the 3.1 IP.
+	 */
 	u32			revision;
 
 #define DWC3_REVISION_173A	0x5533173a
@@ -788,6 +799,13 @@ struct dwc3 {
 #define DWC3_REVISION_270A	0x5533270a
 #define DWC3_REVISION_280A	0x5533280a
 
+/*
+ * NOTICE: we're using bit 31 as a "is usb 3.1" flag. This is really
+ * just so dwc31 revisions are always larger than dwc3.
+ */
+#define DWC3_REVISION_IS_DWC31		0x80000000
+#define DWC3_USB31_REVISION_110A	(0x3131302a | DWC3_REVISION_IS_USB31)
+
 	enum dwc3_ep0_next	ep0_next_event;
 	enum dwc3_ep0_state	ep0state;
 	enum dwc3_link_state	link_state;

From a78bd89f08fdaf04c83ce4b4bc2a6cfd90acc15c Mon Sep 17 00:00:00 2001
From: John Youn <John.Youn@synopsys.com>
Date: Sat, 26 Sep 2015 00:11:15 -0700
Subject: [PATCH 101/201] usb: dwc3: pci: Add platform data for Synopsys HAPS

commit bb7f3d6d323a56b9c3b3e727380d1395a7f10107 upstream.

Add platform data and set usb3_lpm_capable and has_lpm_erratum.

Signed-off-by: John Youn <johnyoun@synopsys.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/dwc3-pci.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index db67ed20f6255..0a3ccfc502c89 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -108,6 +108,21 @@ static int dwc3_pci_quirks(struct pci_dev *pdev)
 		}
 	}
 
+	if (pdev->vendor == PCI_VENDOR_ID_SYNOPSYS &&
+	    (pdev->device == PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3 ||
+	     pdev->device == PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3_AXI ||
+	     pdev->device == PCI_DEVICE_ID_SYNOPSYS_HAPSUSB31)) {
+
+		struct dwc3_platform_data pdata;
+
+		memset(&pdata, 0, sizeof(pdata));
+		pdata.usb3_lpm_capable = true;
+		pdata.has_lpm_erratum = true;
+
+		return platform_device_add_data(pci_get_drvdata(pdev), &pdata,
+						sizeof(pdata));
+	}
+
 	return 0;
 }
 

From 78e46d1ff1d6932628e5ca6bd1648596f9901588 Mon Sep 17 00:00:00 2001
From: John Youn <John.Youn@synopsys.com>
Date: Fri, 2 Oct 2015 20:30:57 -0700
Subject: [PATCH 102/201] usb: dwc3: Add dis_enblslpm_quirk

commit ec791d149bca4511e7d3a6a92bb3b030c5a443f9 upstream.

Add a quirk to clear the GUSB2PHYCFG.ENBLSLPM bit, which controls
whether the PHY receives the suspend signal from the controller.

Signed-off-by: John Youn <johnyoun@synopsys.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/usb/dwc3.txt | 2 ++
 drivers/usb/dwc3/core.c                        | 6 ++++++
 drivers/usb/dwc3/core.h                        | 4 ++++
 drivers/usb/dwc3/platform_data.h               | 1 +
 4 files changed, 13 insertions(+)

diff --git a/Documentation/devicetree/bindings/usb/dwc3.txt b/Documentation/devicetree/bindings/usb/dwc3.txt
index 0815eac5b1851..e12f3448846a2 100644
--- a/Documentation/devicetree/bindings/usb/dwc3.txt
+++ b/Documentation/devicetree/bindings/usb/dwc3.txt
@@ -35,6 +35,8 @@ Optional properties:
 			LTSSM during USB3 Compliance mode.
  - snps,dis_u3_susphy_quirk: when set core will disable USB3 suspend phy.
  - snps,dis_u2_susphy_quirk: when set core will disable USB2 suspend phy.
+ - snps,dis_enblslpm_quirk: when set clears the enblslpm in GUSB2PHYCFG,
+			disabling the suspend signal to the PHY.
  - snps,is-utmi-l1-suspend: true when DWC3 asserts output signal
 			utmi_l1_suspend_n, false when asserts utmi_sleep_n
  - snps,hird-threshold: HIRD threshold
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 386ea764d8b2c..51156ec91c78c 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -488,6 +488,9 @@ static int dwc3_phy_setup(struct dwc3 *dwc)
 	if (dwc->dis_u2_susphy_quirk)
 		reg &= ~DWC3_GUSB2PHYCFG_SUSPHY;
 
+	if (dwc->dis_enblslpm_quirk)
+		reg &= ~DWC3_GUSB2PHYCFG_ENBLSLPM;
+
 	dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg);
 
 	return 0;
@@ -885,6 +888,8 @@ static int dwc3_probe(struct platform_device *pdev)
 				"snps,dis_u3_susphy_quirk");
 		dwc->dis_u2_susphy_quirk = of_property_read_bool(node,
 				"snps,dis_u2_susphy_quirk");
+	dwc->dis_enblslpm_quirk = device_property_read_bool(dev,
+				"snps,dis_enblslpm_quirk");
 
 		dwc->tx_de_emphasis_quirk = of_property_read_bool(node,
 				"snps,tx_de_emphasis_quirk");
@@ -915,6 +920,7 @@ static int dwc3_probe(struct platform_device *pdev)
 		dwc->rx_detect_poll_quirk = pdata->rx_detect_poll_quirk;
 		dwc->dis_u3_susphy_quirk = pdata->dis_u3_susphy_quirk;
 		dwc->dis_u2_susphy_quirk = pdata->dis_u2_susphy_quirk;
+		dwc->dis_enblslpm_quirk = pdata->dis_enblslpm_quirk;
 
 		dwc->tx_de_emphasis_quirk = pdata->tx_de_emphasis_quirk;
 		if (pdata->tx_de_emphasis)
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 1841dd025ea7a..6e53ce9ce3207 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -178,6 +178,7 @@
 #define DWC3_GUSB2PHYCFG_PHYSOFTRST	(1 << 31)
 #define DWC3_GUSB2PHYCFG_SUSPHY		(1 << 6)
 #define DWC3_GUSB2PHYCFG_ULPI_UTMI	(1 << 4)
+#define DWC3_GUSB2PHYCFG_ENBLSLPM	(1 << 8)
 
 /* Global USB2 PHY Vendor Control Register */
 #define DWC3_GUSB2PHYACC_NEWREGREQ	(1 << 25)
@@ -715,6 +716,8 @@ struct dwc3_scratchpad_array {
  * @rx_detect_poll_quirk: set if we enable rx_detect to polling lfps quirk
  * @dis_u3_susphy_quirk: set if we disable usb3 suspend phy
  * @dis_u2_susphy_quirk: set if we disable usb2 suspend phy
+ * @dis_enblslpm_quirk: set if we clear enblslpm in GUSB2PHYCFG,
+ *                      disabling the suspend signal to the PHY.
  * @tx_de_emphasis_quirk: set if we enable Tx de-emphasis quirk
  * @tx_de_emphasis: Tx de-emphasis value
  * 	0	- -6dB de-emphasis
@@ -859,6 +862,7 @@ struct dwc3 {
 	unsigned		rx_detect_poll_quirk:1;
 	unsigned		dis_u3_susphy_quirk:1;
 	unsigned		dis_u2_susphy_quirk:1;
+	unsigned		dis_enblslpm_quirk:1;
 
 	unsigned		tx_de_emphasis_quirk:1;
 	unsigned		tx_de_emphasis:2;
diff --git a/drivers/usb/dwc3/platform_data.h b/drivers/usb/dwc3/platform_data.h
index d3614ecbb9ca2..db29380022604 100644
--- a/drivers/usb/dwc3/platform_data.h
+++ b/drivers/usb/dwc3/platform_data.h
@@ -42,6 +42,7 @@ struct dwc3_platform_data {
 	unsigned rx_detect_poll_quirk:1;
 	unsigned dis_u3_susphy_quirk:1;
 	unsigned dis_u2_susphy_quirk:1;
+	unsigned dis_enblslpm_quirk:1;
 
 	unsigned tx_de_emphasis_quirk:1;
 	unsigned tx_de_emphasis:2;

From 61d93448a08b9d0c194f75269d940a7038a2fab8 Mon Sep 17 00:00:00 2001
From: John Youn <John.Youn@synopsys.com>
Date: Fri, 2 Oct 2015 20:32:17 -0700
Subject: [PATCH 103/201] usb: dwc3: pci: Set enblslpm quirk for Synopsys
 platforms

commit 94218ee31ba56fb3a8625978b393124ad660408e upstream.

Certain Synopsys prototyping PHY boards are not able to meet timings
constraints for LPM. This allows the PHY to meet those timings by
leaving the PHY clock running during suspend.

Signed-off-by: John Youn <johnyoun@synopsys.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/dwc3-pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index 0a3ccfc502c89..2cd33cb07ed96 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -118,6 +118,7 @@ static int dwc3_pci_quirks(struct pci_dev *pdev)
 		memset(&pdata, 0, sizeof(pdata));
 		pdata.usb3_lpm_capable = true;
 		pdata.has_lpm_erratum = true;
+		pdata.dis_enblslpm_quirk = true;
 
 		return platform_device_add_data(pci_get_drvdata(pdev), &pdata,
 						sizeof(pdata));

From a46143c2744f7a35554b030295f928b4efdceb33 Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@freescale.com>
Date: Wed, 16 Sep 2015 09:40:51 +0800
Subject: [PATCH 104/201] usb: chipidea: imx: refine clock operations to adapt
 for all platforms

commit ae3e57ae26cdcc85728bb566f999bcb9a7cc6954 upstream.

Some i.mx platforms need three clocks to let controller work, but
others only need one, refine clock operation to adapt for all
platforms, it fixes a regression found at i.mx27.

Signed-off-by: Peter Chen <peter.chen@freescale.com>
Tested-by: Fabio Estevam <fabio.estevam@freescale.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/chipidea/ci_hdrc_imx.c | 131 +++++++++++++++++++++++++----
 1 file changed, 113 insertions(+), 18 deletions(-)

diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c
index dcc50c878159e..ad53aed9b2806 100644
--- a/drivers/usb/chipidea/ci_hdrc_imx.c
+++ b/drivers/usb/chipidea/ci_hdrc_imx.c
@@ -73,6 +73,12 @@ struct ci_hdrc_imx_data {
 	struct imx_usbmisc_data *usbmisc_data;
 	bool supports_runtime_pm;
 	bool in_lpm;
+	/* SoC before i.mx6 (except imx23/imx28) needs three clks */
+	bool need_three_clks;
+	struct clk *clk_ipg;
+	struct clk *clk_ahb;
+	struct clk *clk_per;
+	/* --------------------------------- */
 };
 
 /* Common functions shared by usbmisc drivers */
@@ -124,6 +130,102 @@ static struct imx_usbmisc_data *usbmisc_get_init_data(struct device *dev)
 }
 
 /* End of common functions shared by usbmisc drivers*/
+static int imx_get_clks(struct device *dev)
+{
+	struct ci_hdrc_imx_data *data = dev_get_drvdata(dev);
+	int ret = 0;
+
+	data->clk_ipg = devm_clk_get(dev, "ipg");
+	if (IS_ERR(data->clk_ipg)) {
+		/* If the platform only needs one clocks */
+		data->clk = devm_clk_get(dev, NULL);
+		if (IS_ERR(data->clk)) {
+			ret = PTR_ERR(data->clk);
+			dev_err(dev,
+				"Failed to get clks, err=%ld,%ld\n",
+				PTR_ERR(data->clk), PTR_ERR(data->clk_ipg));
+			return ret;
+		}
+		return ret;
+	}
+
+	data->clk_ahb = devm_clk_get(dev, "ahb");
+	if (IS_ERR(data->clk_ahb)) {
+		ret = PTR_ERR(data->clk_ahb);
+		dev_err(dev,
+			"Failed to get ahb clock, err=%d\n", ret);
+		return ret;
+	}
+
+	data->clk_per = devm_clk_get(dev, "per");
+	if (IS_ERR(data->clk_per)) {
+		ret = PTR_ERR(data->clk_per);
+		dev_err(dev,
+			"Failed to get per clock, err=%d\n", ret);
+		return ret;
+	}
+
+	data->need_three_clks = true;
+	return ret;
+}
+
+static int imx_prepare_enable_clks(struct device *dev)
+{
+	struct ci_hdrc_imx_data *data = dev_get_drvdata(dev);
+	int ret = 0;
+
+	if (data->need_three_clks) {
+		ret = clk_prepare_enable(data->clk_ipg);
+		if (ret) {
+			dev_err(dev,
+				"Failed to prepare/enable ipg clk, err=%d\n",
+				ret);
+			return ret;
+		}
+
+		ret = clk_prepare_enable(data->clk_ahb);
+		if (ret) {
+			dev_err(dev,
+				"Failed to prepare/enable ahb clk, err=%d\n",
+				ret);
+			clk_disable_unprepare(data->clk_ipg);
+			return ret;
+		}
+
+		ret = clk_prepare_enable(data->clk_per);
+		if (ret) {
+			dev_err(dev,
+				"Failed to prepare/enable per clk, err=%d\n",
+				ret);
+			clk_disable_unprepare(data->clk_ahb);
+			clk_disable_unprepare(data->clk_ipg);
+			return ret;
+		}
+	} else {
+		ret = clk_prepare_enable(data->clk);
+		if (ret) {
+			dev_err(dev,
+				"Failed to prepare/enable clk, err=%d\n",
+				ret);
+			return ret;
+		}
+	}
+
+	return ret;
+}
+
+static void imx_disable_unprepare_clks(struct device *dev)
+{
+	struct ci_hdrc_imx_data *data = dev_get_drvdata(dev);
+
+	if (data->need_three_clks) {
+		clk_disable_unprepare(data->clk_per);
+		clk_disable_unprepare(data->clk_ahb);
+		clk_disable_unprepare(data->clk_ipg);
+	} else {
+		clk_disable_unprepare(data->clk);
+	}
+}
 
 static int ci_hdrc_imx_probe(struct platform_device *pdev)
 {
@@ -142,23 +244,18 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev)
 	if (!data)
 		return -ENOMEM;
 
+	platform_set_drvdata(pdev, data);
 	data->usbmisc_data = usbmisc_get_init_data(&pdev->dev);
 	if (IS_ERR(data->usbmisc_data))
 		return PTR_ERR(data->usbmisc_data);
 
-	data->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(data->clk)) {
-		dev_err(&pdev->dev,
-			"Failed to get clock, err=%ld\n", PTR_ERR(data->clk));
-		return PTR_ERR(data->clk);
-	}
+	ret = imx_get_clks(&pdev->dev);
+	if (ret)
+		return ret;
 
-	ret = clk_prepare_enable(data->clk);
-	if (ret) {
-		dev_err(&pdev->dev,
-			"Failed to prepare or enable clock, err=%d\n", ret);
+	ret = imx_prepare_enable_clks(&pdev->dev);
+	if (ret)
 		return ret;
-	}
 
 	data->phy = devm_usb_get_phy_by_phandle(&pdev->dev, "fsl,usbphy", 0);
 	if (IS_ERR(data->phy)) {
@@ -201,8 +298,6 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev)
 		goto disable_device;
 	}
 
-	platform_set_drvdata(pdev, data);
-
 	if (data->supports_runtime_pm) {
 		pm_runtime_set_active(&pdev->dev);
 		pm_runtime_enable(&pdev->dev);
@@ -215,7 +310,7 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev)
 disable_device:
 	ci_hdrc_remove_device(data->ci_pdev);
 err_clk:
-	clk_disable_unprepare(data->clk);
+	imx_disable_unprepare_clks(&pdev->dev);
 	return ret;
 }
 
@@ -229,7 +324,7 @@ static int ci_hdrc_imx_remove(struct platform_device *pdev)
 		pm_runtime_put_noidle(&pdev->dev);
 	}
 	ci_hdrc_remove_device(data->ci_pdev);
-	clk_disable_unprepare(data->clk);
+	imx_disable_unprepare_clks(&pdev->dev);
 
 	return 0;
 }
@@ -241,7 +336,7 @@ static int imx_controller_suspend(struct device *dev)
 
 	dev_dbg(dev, "at %s\n", __func__);
 
-	clk_disable_unprepare(data->clk);
+	imx_disable_unprepare_clks(dev);
 	data->in_lpm = true;
 
 	return 0;
@@ -259,7 +354,7 @@ static int imx_controller_resume(struct device *dev)
 		return 0;
 	}
 
-	ret = clk_prepare_enable(data->clk);
+	ret = imx_prepare_enable_clks(dev);
 	if (ret)
 		return ret;
 
@@ -274,7 +369,7 @@ static int imx_controller_resume(struct device *dev)
 	return 0;
 
 clk_disable:
-	clk_disable_unprepare(data->clk);
+	imx_disable_unprepare_clks(dev);
 	return ret;
 }
 

From 0330a1fbe51c995b5e75c3ea1440b2e4ab68ee8c Mon Sep 17 00:00:00 2001
From: Li Jun <jun.li@freescale.com>
Date: Tue, 13 Oct 2015 18:23:31 +0800
Subject: [PATCH 105/201] usb: chipidea: debug: disable usb irq while role
 switch

commit 251b3c8b57481bcecd3f753108e36e7389ce12ac upstream.

Since the ci->role will be set after the host role start is complete, there
will be nobody cared irq during start host if usb irq enabled. This error
can be reproduced on i.mx6 sololite EVK board by:
1. disable otg id irq(IDIE) and disable all real otg properties of usbotg1
   in dts.
2. boot up the board with ID cable and usb device connected.
3. echo gadget > /sys/kernel/debug/ci_hdrc.0/role
4. echo host > /sys/kernel/debug/ci_hdrc.0/role
5. irq 212: nobody cared.

Signed-off-by: Li Jun <jun.li@freescale.com>
Signed-off-by: Peter Chen <peter.chen@freescale.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/chipidea/debug.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/chipidea/debug.c b/drivers/usb/chipidea/debug.c
index 080b7be3daf03..58c8485a0715a 100644
--- a/drivers/usb/chipidea/debug.c
+++ b/drivers/usb/chipidea/debug.c
@@ -322,8 +322,10 @@ static ssize_t ci_role_write(struct file *file, const char __user *ubuf,
 		return -EINVAL;
 
 	pm_runtime_get_sync(ci->dev);
+	disable_irq(ci->irq);
 	ci_role_stop(ci);
 	ret = ci_role_start(ci, role);
+	enable_irq(ci->irq);
 	pm_runtime_put_sync(ci->dev);
 
 	return ret ? ret : count;

From 5a2fb41430f27c6f6c02246aea1c7d7bfad92cd3 Mon Sep 17 00:00:00 2001
From: Jurgen Kramer <gtmkramer@xs4all.nl>
Date: Mon, 9 Nov 2015 12:13:55 +0100
Subject: [PATCH 106/201] ALSA: usb: Add native DSD support for Aune X1S

commit 16771c7c704769c5f3d70c024630b6e5b3eafa67 upstream.

This patch adds native DSD support for the Aune X1S 32BIT/384 DSD DAC

Signed-off-by: Jurgen Kramer <gtmkramer@xs4all.nl>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/usb/quirks.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 00ebc0ca008e0..528fa6e48293f 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1271,6 +1271,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
 	case USB_ID(0x20b1, 0x000a): /* Gustard DAC-X20U */
 	case USB_ID(0x20b1, 0x2009): /* DIYINHK DSD DXD 384kHz USB to I2S/DSD */
 	case USB_ID(0x20b1, 0x2023): /* JLsounds I2SoverUSB */
+	case USB_ID(0x20b1, 0x3023): /* Aune X1S 32BIT/384 DSD DAC */
 		if (fp->altsetting == 3)
 			return SNDRV_PCM_FMTBIT_DSD_U32_BE;
 		break;

From 1207663d5b08f9e1de552ccdf19b014eeb284b28 Mon Sep 17 00:00:00 2001
From: Jonas Gorski <jogo@openwrt.org>
Date: Sun, 23 Aug 2015 15:01:08 +0200
Subject: [PATCH 107/201] usb: ehci-orion: fix probe for !GENERIC_PHY

commit db1319e166c5e872c4be54eac4e47454133708cf upstream.

Commit d445913ce0ab7f ("usb: ehci-orion: add optional PHY support")
added support for optional phys, but devm_phy_optional_get returns
-ENOSYS if GENERIC_PHY is not enabled.

This causes probe failures, even when there are no phys specified:

[    1.443365] orion-ehci f1058000.usb: init f1058000.usb fail, -38
[    1.449403] orion-ehci: probe of f1058000.usb failed with error -38

Similar to dwc3, treat -ENOSYS as no phy.

Fixes: d445913ce0ab7f ("usb: ehci-orion: add optional PHY support")

Signed-off-by: Jonas Gorski <jogo@openwrt.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/ehci-orion.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/host/ehci-orion.c b/drivers/usb/host/ehci-orion.c
index bfcbb9aa8816b..ee8d5faa01947 100644
--- a/drivers/usb/host/ehci-orion.c
+++ b/drivers/usb/host/ehci-orion.c
@@ -224,7 +224,8 @@ static int ehci_orion_drv_probe(struct platform_device *pdev)
 	priv->phy = devm_phy_optional_get(&pdev->dev, "usb");
 	if (IS_ERR(priv->phy)) {
 		err = PTR_ERR(priv->phy);
-		goto err_phy_get;
+		if (err != -ENOSYS)
+			goto err_phy_get;
 	} else {
 		err = phy_init(priv->phy);
 		if (err)

From a0046cfd74f0d5a9556e20dc21393bf58e8bed7f Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Mon, 2 Nov 2015 10:27:00 +0100
Subject: [PATCH 108/201] usblp: do not set TASK_INTERRUPTIBLE before lock

commit 19cd80a214821f4b558560ebd76bfb2c38b4f3d8 upstream.

It is not permitted to set task state before lock. usblp_wwait sets
the state to TASK_INTERRUPTIBLE and calls mutex_lock_interruptible.
Upon return from that function, the state will be TASK_RUNNING again.

This is clearly a bug and a warning is generated with LOCKDEP too:
WARNING: CPU: 1 PID: 5109 at kernel/sched/core.c:7404 __might_sleep+0x7d/0x90()
do not call blocking ops when !TASK_RUNNING; state=1 set at [<ffffffffa0c588d0>] usblp_wwait+0xa0/0x310 [usblp]
Modules linked in: ...
CPU: 1 PID: 5109 Comm: captmon Tainted: G        W       4.2.5-0.gef2823b-default #1
Hardware name: LENOVO 23252SG/23252SG, BIOS G2ET33WW (1.13 ) 07/24/2012
 ffffffff81a4edce ffff880236ec7ba8 ffffffff81716651 0000000000000000
 ffff880236ec7bf8 ffff880236ec7be8 ffffffff8106e146 0000000000000282
 ffffffff81a50119 000000000000028b 0000000000000000 ffff8802dab7c508
Call Trace:
...
 [<ffffffff8106e1c6>] warn_slowpath_fmt+0x46/0x50
 [<ffffffff8109a8bd>] __might_sleep+0x7d/0x90
 [<ffffffff8171b20f>] mutex_lock_interruptible_nested+0x2f/0x4b0
 [<ffffffffa0c588fc>] usblp_wwait+0xcc/0x310 [usblp]
 [<ffffffffa0c58bb2>] usblp_write+0x72/0x350 [usblp]
 [<ffffffff8121ed98>] __vfs_write+0x28/0xf0
...

Commit 7f477358e2384c54b190cc3b6ce28277050a041b (usblp: Implement the
ENOSPC convention) moved the set prior locking. So move it back after
the lock.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Fixes: 7f477358e2 ("usblp: Implement the ENOSPC convention")
Acked-By: Pete Zaitcev <zaitcev@yahoo.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/usblp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c
index 433bbc34a8a48..071964c7847f1 100644
--- a/drivers/usb/class/usblp.c
+++ b/drivers/usb/class/usblp.c
@@ -884,11 +884,11 @@ static int usblp_wwait(struct usblp *usblp, int nonblock)
 
 	add_wait_queue(&usblp->wwait, &waita);
 	for (;;) {
-		set_current_state(TASK_INTERRUPTIBLE);
 		if (mutex_lock_interruptible(&usblp->mut)) {
 			rc = -EINTR;
 			break;
 		}
+		set_current_state(TASK_INTERRUPTIBLE);
 		rc = usblp_wtest(usblp, nonblock);
 		mutex_unlock(&usblp->mut);
 		if (rc <= 0)

From dbfee9e41918f4c8adb5f27db959c81bb19567a4 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@iki.fi>
Date: Tue, 27 Oct 2015 23:26:33 +0200
Subject: [PATCH 109/201] usb: phy: omap-otg: fix uninitialized pointer

commit 2c2025b41aeff57963f9ae2dd909fea704c625ab upstream.

otg_dev->extcon was referenced before otg_dev was initialized. Fix.

Fixes: a2fd2423240f ("usb: phy: omap-otg: Replace deprecated API of extcon")
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/phy/phy-omap-otg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/phy/phy-omap-otg.c b/drivers/usb/phy/phy-omap-otg.c
index 1270906ccb958..c4bf2de6d14ec 100644
--- a/drivers/usb/phy/phy-omap-otg.c
+++ b/drivers/usb/phy/phy-omap-otg.c
@@ -105,7 +105,6 @@ static int omap_otg_probe(struct platform_device *pdev)
 	extcon = extcon_get_extcon_dev(config->extcon);
 	if (!extcon)
 		return -EPROBE_DEFER;
-	otg_dev->extcon = extcon;
 
 	otg_dev = devm_kzalloc(&pdev->dev, sizeof(*otg_dev), GFP_KERNEL);
 	if (!otg_dev)
@@ -115,6 +114,7 @@ static int omap_otg_probe(struct platform_device *pdev)
 	if (IS_ERR(otg_dev->base))
 		return PTR_ERR(otg_dev->base);
 
+	otg_dev->extcon = extcon;
 	otg_dev->id_nb.notifier_call = omap_otg_id_notifier;
 	otg_dev->vbus_nb.notifier_call = omap_otg_vbus_notifier;
 

From 6763f60ff3f0909f6e11ece3eb9634791427acec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=20=C5=A0tetiar?= <ynezz@true.cz>
Date: Tue, 3 Nov 2015 11:25:28 +0100
Subject: [PATCH 110/201] USB: qcserial: Add support for Quectel EC20 Mini PCIe
 module
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 9d5b5ed796d7afd7e8d2ac4b4fb77c6a49463f4b upstream.

It seems like this device has same vendor and product IDs as G2K
devices, but it has different number of interfaces(4 vs 5) and also
different interface layout which makes it currently unusable:

	usbcore: registered new interface driver qcserial
	usbserial: USB Serial support registered for Qualcomm USB modem
	usb 2-1.2: unknown number of interfaces: 5

lsusb output:

	Bus 002 Device 003: ID 05c6:9215 Qualcomm, Inc. Acer Gobi 2000 Wireless
	Device Descriptor:
	  bLength                18
	  bDescriptorType         1
	  bcdUSB               2.00
	  bDeviceClass            0 (Defined at Interface level)
	  bDeviceSubClass         0
	  bDeviceProtocol         0
	  bMaxPacketSize0        64
	  idVendor           0x05c6 Qualcomm, Inc.
	  idProduct          0x9215 Acer Gobi 2000 Wireless Modem
	  bcdDevice            2.32
	  iManufacturer           1 Quectel
	  iProduct                2 Quectel LTE Module
	  iSerial                 0
	  bNumConfigurations      1
	  Configuration Descriptor:
	    bLength                 9
	    bDescriptorType         2
	    wTotalLength          209
	    bNumInterfaces          5
	    bConfigurationValue     1
	    iConfiguration          0
	    bmAttributes         0xa0
	      (Bus Powered)
	      Remote Wakeup
	    MaxPower              500mA

Signed-off-by: Petr Štetiar <ynezz@true.cz>
[johan: rename define and add comment ]
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/qcserial.c | 40 +++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c
index f49d262e926ba..1d7a2a48552c8 100644
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c
@@ -22,6 +22,8 @@
 #define DRIVER_AUTHOR "Qualcomm Inc"
 #define DRIVER_DESC "Qualcomm USB Serial driver"
 
+#define QUECTEL_EC20_PID	0x9215
+
 /* standard device layouts supported by this driver */
 enum qcserial_layouts {
 	QCSERIAL_G2K = 0,	/* Gobi 2000 */
@@ -169,6 +171,38 @@ static const struct usb_device_id id_table[] = {
 };
 MODULE_DEVICE_TABLE(usb, id_table);
 
+static int handle_quectel_ec20(struct device *dev, int ifnum)
+{
+	int altsetting = 0;
+
+	/*
+	 * Quectel EC20 Mini PCIe LTE module layout:
+	 * 0: DM/DIAG (use libqcdm from ModemManager for communication)
+	 * 1: NMEA
+	 * 2: AT-capable modem port
+	 * 3: Modem interface
+	 * 4: NDIS
+	 */
+	switch (ifnum) {
+	case 0:
+		dev_dbg(dev, "Quectel EC20 DM/DIAG interface found\n");
+		break;
+	case 1:
+		dev_dbg(dev, "Quectel EC20 NMEA GPS interface found\n");
+		break;
+	case 2:
+	case 3:
+		dev_dbg(dev, "Quectel EC20 Modem port found\n");
+		break;
+	case 4:
+		/* Don't claim the QMI/net interface */
+		altsetting = -1;
+		break;
+	}
+
+	return altsetting;
+}
+
 static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id)
 {
 	struct usb_host_interface *intf = serial->interface->cur_altsetting;
@@ -237,6 +271,12 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id)
 			altsetting = -1;
 		break;
 	case QCSERIAL_G2K:
+		/* handle non-standard layouts */
+		if (nintf == 5 && id->idProduct == QUECTEL_EC20_PID) {
+			altsetting = handle_quectel_ec20(dev, ifnum);
+			goto done;
+		}
+
 		/*
 		 * Gobi 2K+ USB layout:
 		 * 0: QMI/net

From 7413ab071feeb656dcd4b01b8b1b4b8d51313300 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Mon, 16 Nov 2015 13:15:46 +0100
Subject: [PATCH 111/201] USB: qcserial: Fix support for HP lt4112 LTE/HSPA+
 Gobi 4G Modem
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 59536da34513c594af2a6fd35ba65ea45b6960a1 upstream.

The DEVICE_HWI type was added under the faulty assumption that Huawei
devices based on Qualcomm chipsets and firmware use the static USB
interface numbering known from Gobi devices.  But this model does
not apply to Huawei devices like the HP branded lt4112 (Huawei me906e).
Huawei firmwares will dynamically assign interface numbers. Functions
are renumbered when the firmware is reconfigured.

Fix by changing the DEVICE_HWI type to use a simplified version
of Huawei's subclass + protocol scheme: Blacklisting known network
interface combinations and assuming the rest are serial.

Reported-and-tested-by: Muri Nicanor <muri+libqmi@immerda.ch>
Tested-by: Martin Hauke <mardnh@gmx.de>
Fixes: e7181d005e84 ("USB: qcserial: Add support for HP lt4112 LTE/HSPA+ Gobi 4G Modem")
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/qcserial.c | 54 ++++++++++++++++++++++-------------
 1 file changed, 34 insertions(+), 20 deletions(-)

diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c
index 1d7a2a48552c8..514fa91cf74e4 100644
--- a/drivers/usb/serial/qcserial.c
+++ b/drivers/usb/serial/qcserial.c
@@ -212,6 +212,10 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id)
 	__u8 ifnum;
 	int altsetting = -1;
 
+	/* we only support vendor specific functions */
+	if (intf->desc.bInterfaceClass != USB_CLASS_VENDOR_SPEC)
+		goto done;
+
 	nintf = serial->dev->actconfig->desc.bNumInterfaces;
 	dev_dbg(dev, "Num Interfaces = %d\n", nintf);
 	ifnum = intf->desc.bInterfaceNumber;
@@ -337,29 +341,39 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id)
 		break;
 	case QCSERIAL_HWI:
 		/*
-		 * Huawei layout:
-		 * 0: AT-capable modem port
-		 * 1: DM/DIAG
-		 * 2: AT-capable modem port
-		 * 3: CCID-compatible PCSC interface
-		 * 4: QMI/net
-		 * 5: NMEA
+		 * Huawei devices map functions by subclass + protocol
+		 * instead of interface numbers. The protocol identify
+		 * a specific function, while the subclass indicate a
+		 * specific firmware source
+		 *
+		 * This is a blacklist of functions known to be
+		 * non-serial.  The rest are assumed to be serial and
+		 * will be handled by this driver
 		 */
-		switch (ifnum) {
-		case 0:
-		case 2:
-			dev_dbg(dev, "Modem port found\n");
-			break;
-		case 1:
-			dev_dbg(dev, "DM/DIAG interface found\n");
-			break;
-		case 5:
-			dev_dbg(dev, "NMEA GPS interface found\n");
-			break;
-		default:
-			/* don't claim any unsupported interface */
+		switch (intf->desc.bInterfaceProtocol) {
+			/* QMI combined (qmi_wwan) */
+		case 0x07:
+		case 0x37:
+		case 0x67:
+			/* QMI data (qmi_wwan) */
+		case 0x08:
+		case 0x38:
+		case 0x68:
+			/* QMI control (qmi_wwan) */
+		case 0x09:
+		case 0x39:
+		case 0x69:
+			/* NCM like (huawei_cdc_ncm) */
+		case 0x16:
+		case 0x46:
+		case 0x76:
 			altsetting = -1;
 			break;
+		default:
+			dev_dbg(dev, "Huawei type serial port found (%02x/%02x/%02x)\n",
+				intf->desc.bInterfaceClass,
+				intf->desc.bInterfaceSubClass,
+				intf->desc.bInterfaceProtocol);
 		}
 		break;
 	default:

From b56ffb4c0719b374fbbcb99695fe7b0eea0bd96c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 23 Oct 2015 09:53:50 +0200
Subject: [PATCH 112/201] usb: musb: core: fix order of arguments to ulpi write
 callback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 705e63d2b29c8bbf091119084544d353bda70393 upstream.

There is a bit of a mess in the order of arguments to the ulpi write
callback. There is

	int ulpi_write(struct ulpi *ulpi, u8 addr, u8 val)

in drivers/usb/common/ulpi.c;

	struct usb_phy_io_ops {
		...
		int (*write)(struct usb_phy *x, u32 val, u32 reg);
	}

in include/linux/usb/phy.h.

The callback registered by the musb driver has to comply to the latter,
but up to now had "offset" first which effectively made the function
broken for correct users. So flip the order and while at it also
switch to the parameter names of struct usb_phy_io_ops's write.

Fixes: ffb865b1e460 ("usb: musb: add ulpi access operations")
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/musb_core.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index 4a518ff123106..2c624a10748d9 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -132,7 +132,7 @@ static inline struct musb *dev_to_musb(struct device *dev)
 /*-------------------------------------------------------------------------*/
 
 #ifndef CONFIG_BLACKFIN
-static int musb_ulpi_read(struct usb_phy *phy, u32 offset)
+static int musb_ulpi_read(struct usb_phy *phy, u32 reg)
 {
 	void __iomem *addr = phy->io_priv;
 	int	i = 0;
@@ -151,7 +151,7 @@ static int musb_ulpi_read(struct usb_phy *phy, u32 offset)
 	 * ULPICarKitControlDisableUTMI after clearing POWER_SUSPENDM.
 	 */
 
-	musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)offset);
+	musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)reg);
 	musb_writeb(addr, MUSB_ULPI_REG_CONTROL,
 			MUSB_ULPI_REG_REQ | MUSB_ULPI_RDN_WR);
 
@@ -176,7 +176,7 @@ static int musb_ulpi_read(struct usb_phy *phy, u32 offset)
 	return ret;
 }
 
-static int musb_ulpi_write(struct usb_phy *phy, u32 offset, u32 data)
+static int musb_ulpi_write(struct usb_phy *phy, u32 val, u32 reg)
 {
 	void __iomem *addr = phy->io_priv;
 	int	i = 0;
@@ -191,8 +191,8 @@ static int musb_ulpi_write(struct usb_phy *phy, u32 offset, u32 data)
 	power &= ~MUSB_POWER_SUSPENDM;
 	musb_writeb(addr, MUSB_POWER, power);
 
-	musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)offset);
-	musb_writeb(addr, MUSB_ULPI_REG_DATA, (u8)data);
+	musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)reg);
+	musb_writeb(addr, MUSB_ULPI_REG_DATA, (u8)val);
 	musb_writeb(addr, MUSB_ULPI_REG_CONTROL, MUSB_ULPI_REG_REQ);
 
 	while (!(musb_readb(addr, MUSB_ULPI_REG_CONTROL)

From b56df8407426969354832e5cdbe2001d14e61a13 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Sat, 14 Nov 2015 16:49:30 +0000
Subject: [PATCH 113/201] USB: ti_usb_3410_5052: Add Honeywell HGI80 ID

commit 1bcb49e663f88bccee35b8688e6a3da2bea31fd4 upstream.

The Honeywell HGI80 is a wireless interface to the evohome connected
thermostat. It uses a TI 3410 USB-serial port.

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/ti_usb_3410_5052.c | 2 ++
 drivers/usb/serial/ti_usb_3410_5052.h | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index e9da41d9fe7fc..2694df2f4559b 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -159,6 +159,7 @@ static const struct usb_device_id ti_id_table_3410[] = {
 	{ USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STEREO_PLUG_ID) },
 	{ USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STRIP_PORT_ID) },
 	{ USB_DEVICE(TI_VENDOR_ID, FRI2_PRODUCT_ID) },
+	{ USB_DEVICE(HONEYWELL_VENDOR_ID, HONEYWELL_HGI80_PRODUCT_ID) },
 	{ }	/* terminator */
 };
 
@@ -191,6 +192,7 @@ static const struct usb_device_id ti_id_table_combined[] = {
 	{ USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_PRODUCT_ID) },
 	{ USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STRIP_PORT_ID) },
 	{ USB_DEVICE(TI_VENDOR_ID, FRI2_PRODUCT_ID) },
+	{ USB_DEVICE(HONEYWELL_VENDOR_ID, HONEYWELL_HGI80_PRODUCT_ID) },
 	{ }	/* terminator */
 };
 
diff --git a/drivers/usb/serial/ti_usb_3410_5052.h b/drivers/usb/serial/ti_usb_3410_5052.h
index 4a2423e84d558..98f35c656c02d 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.h
+++ b/drivers/usb/serial/ti_usb_3410_5052.h
@@ -56,6 +56,10 @@
 #define ABBOTT_PRODUCT_ID		ABBOTT_STEREO_PLUG_ID
 #define ABBOTT_STRIP_PORT_ID		0x3420
 
+/* Honeywell vendor and product IDs */
+#define HONEYWELL_VENDOR_ID		0x10ac
+#define HONEYWELL_HGI80_PRODUCT_ID	0x0102  /* Honeywell HGI80 */
+
 /* Commands */
 #define TI_GET_VERSION			0x01
 #define TI_GET_PORT_STATUS		0x02

From 0399c7ca984120bbb3f215617e94c82003319e6b Mon Sep 17 00:00:00 2001
From: Aleksander Morgado <aleksander@aleksander.es>
Date: Wed, 11 Nov 2015 19:51:40 +0100
Subject: [PATCH 114/201] USB: serial: option: add support for Novatel MiFi
 USB620L
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit e07af133c3e2716db25e3e1e1d9f10c2088e9c1a upstream.

Also known as Verizon U620L.

The device is modeswitched from 1410:9020 to 1410:9022 by selecting the
4th USB configuration:

 $ sudo usb_modeswitch –v 0x1410 –p 0x9020 –u 4

This configuration provides a ECM interface as well as TTYs ('Enterprise
Mode' according to the U620 Linux integration guide).

Signed-off-by: Aleksander Morgado <aleksander@aleksander.es>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/option.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 6956c4f622161..82ae6afdd4d66 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -162,6 +162,7 @@ static void option_instat_callback(struct urb *urb);
 #define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_HIGHSPEED	0x9001
 #define NOVATELWIRELESS_PRODUCT_E362		0x9010
 #define NOVATELWIRELESS_PRODUCT_E371		0x9011
+#define NOVATELWIRELESS_PRODUCT_U620L		0x9022
 #define NOVATELWIRELESS_PRODUCT_G2		0xA010
 #define NOVATELWIRELESS_PRODUCT_MC551		0xB001
 
@@ -1060,6 +1061,7 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC551, 0xff, 0xff, 0xff) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_E362, 0xff, 0xff, 0xff) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_E371, 0xff, 0xff, 0xff) },
+	{ USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_U620L, 0xff, 0x00, 0x00) },
 
 	{ USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01) },
 	{ USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01A) },

From c88860cd6ec20ca7d518273f1bf01f1c0f3b2d80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Wed, 18 Nov 2015 21:12:33 +0100
Subject: [PATCH 115/201] USB: option: add XS Stick W100-2 from 4G Systems
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 638148e20c7f8f6e95017fdc13bce8549a6925e0 upstream.

Thomas reports
"
4gsystems sells two total different LTE-surfsticks under the same name.
..
The newer version of XS Stick W100 is from "omega"
..
Under windows the driver switches to the same ID, and uses MI03\6 for
network and MI01\6 for modem.
..
echo "1c9e 9b01" > /sys/bus/usb/drivers/qmi_wwan/new_id
echo "1c9e 9b01" > /sys/bus/usb-serial/drivers/option1/new_id

T:  Bus=01 Lev=01 Prnt=01 Port=03 Cnt=01 Dev#=  4 Spd=480 MxCh= 0
D:  Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs=  1
P:  Vendor=1c9e ProdID=9b01 Rev=02.32
S:  Manufacturer=USB Modem
S:  Product=USB Modem
S:  SerialNumber=
C:  #Ifs= 5 Cfg#= 1 Atr=80 MxPwr=500mA
I:  If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
I:  If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
I:  If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
I:  If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan
I:  If#= 4 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage

Now all important things are there:

wwp0s29f7u2i3 (net), ttyUSB2 (at), cdc-wdm0 (qmi), ttyUSB1 (at)

There is also ttyUSB0, but it is not usable, at least not for at.

The device works well with qmi and ModemManager-NetworkManager.
"

Reported-by: Thomas Schäfer <tschaefer@t-online.de>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/option.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 82ae6afdd4d66..e945b5195258f 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -358,6 +358,7 @@ static void option_instat_callback(struct urb *urb);
 /* This is the 4G XS Stick W14 a.k.a. Mobilcom Debitel Surf-Stick *
  * It seems to contain a Qualcomm QSC6240/6290 chipset            */
 #define FOUR_G_SYSTEMS_PRODUCT_W14		0x9603
+#define FOUR_G_SYSTEMS_PRODUCT_W100		0x9b01
 
 /* iBall 3.5G connect wireless modem */
 #define IBALL_3_5G_CONNECT			0x9605
@@ -523,6 +524,11 @@ static const struct option_blacklist_info four_g_w14_blacklist = {
 	.sendsetup = BIT(0) | BIT(1),
 };
 
+static const struct option_blacklist_info four_g_w100_blacklist = {
+	.sendsetup = BIT(1) | BIT(2),
+	.reserved = BIT(3),
+};
+
 static const struct option_blacklist_info alcatel_x200_blacklist = {
 	.sendsetup = BIT(0) | BIT(1),
 	.reserved = BIT(4),
@@ -1655,6 +1661,9 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W14),
   	  .driver_info = (kernel_ulong_t)&four_g_w14_blacklist
   	},
+	{ USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W100),
+	  .driver_info = (kernel_ulong_t)&four_g_w100_blacklist
+	},
 	{ USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, SPEEDUP_PRODUCT_SU9800, 0xff) },
 	{ USB_DEVICE(LONGCHEER_VENDOR_ID, ZOOM_PRODUCT_4597) },
 	{ USB_DEVICE(LONGCHEER_VENDOR_ID, IBALL_3_5G_CONNECT) },

From 72655363eb6e000aef420780655f11f1a4c1c2d9 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Wed, 18 Nov 2015 10:48:21 +0200
Subject: [PATCH 116/201] usb: xhci: fix checking ep busy for CFC

commit 42df7215facf27be8d53e657dd4a12d4ebad0a44 upstream.

Function ep_ring_is_processing() checks the dequeue pointer
in endpoint context to know whether an endpoint is busy with
processing TRBs. This is not correct since dequeue pointer
field in an endpoint context is only valid when the endpoint
is in Halted or Stopped states. This buggy code causes audio
noise when playing sound with USB headset connected to host
controllers which support CFC (one of xhci 1.1 features).

This patch should exist in stable kernel since v4.3.

Reported-and-tested-by: YD Tseng <yd_tseng@asmedia.com.tw>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-ring.c | 32 ++++++--------------------------
 1 file changed, 6 insertions(+), 26 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 97ffe39972735..fe9e2d3a223c1 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -3914,28 +3914,6 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 	return ret;
 }
 
-static int ep_ring_is_processing(struct xhci_hcd *xhci,
-		int slot_id, unsigned int ep_index)
-{
-	struct xhci_virt_device *xdev;
-	struct xhci_ring *ep_ring;
-	struct xhci_ep_ctx *ep_ctx;
-	struct xhci_virt_ep *xep;
-	dma_addr_t hw_deq;
-
-	xdev = xhci->devs[slot_id];
-	xep = &xhci->devs[slot_id]->eps[ep_index];
-	ep_ring = xep->ring;
-	ep_ctx = xhci_get_ep_ctx(xhci, xdev->out_ctx, ep_index);
-
-	if ((le32_to_cpu(ep_ctx->ep_info) & EP_STATE_MASK) != EP_STATE_RUNNING)
-		return 0;
-
-	hw_deq = le64_to_cpu(ep_ctx->deq) & ~EP_CTX_CYCLE_MASK;
-	return (hw_deq !=
-		xhci_trb_virt_to_dma(ep_ring->enq_seg, ep_ring->enqueue));
-}
-
 /*
  * Check transfer ring to guarantee there is enough room for the urb.
  * Update ISO URB start_frame and interval.
@@ -4001,10 +3979,12 @@ int xhci_queue_isoc_tx_prepare(struct xhci_hcd *xhci, gfp_t mem_flags,
 	}
 
 	/* Calculate the start frame and put it in urb->start_frame. */
-	if (HCC_CFC(xhci->hcc_params) &&
-			ep_ring_is_processing(xhci, slot_id, ep_index)) {
-		urb->start_frame = xep->next_frame_id;
-		goto skip_start_over;
+	if (HCC_CFC(xhci->hcc_params) && !list_empty(&ep_ring->td_list)) {
+		if ((le32_to_cpu(ep_ctx->ep_info) & EP_STATE_MASK) ==
+				EP_STATE_RUNNING) {
+			urb->start_frame = xep->next_frame_id;
+			goto skip_start_over;
+		}
 	}
 
 	start_frame = readl(&xhci->run_regs->microframe_index);

From da3e8b3f94a8f2c511dba4192ea994b6abcdaa59 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Sun, 15 Nov 2015 22:37:44 +0100
Subject: [PATCH 117/201] ALSA: usb-audio: add packet size quirk for the Medeli
 DD305

commit 98d362becb6621bebdda7ed0eac7ad7ec6c37898 upstream.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/usb/midi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sound/usb/midi.c b/sound/usb/midi.c
index 417ebb11cf489..4bed82c70256b 100644
--- a/sound/usb/midi.c
+++ b/sound/usb/midi.c
@@ -1341,6 +1341,7 @@ static int snd_usbmidi_out_endpoint_create(struct snd_usb_midi *umidi,
 		 * Various chips declare a packet size larger than 4 bytes, but
 		 * do not actually work with larger packets:
 		 */
+	case USB_ID(0x0a67, 0x5011): /* Medeli DD305 */
 	case USB_ID(0x0a92, 0x1020): /* ESI M4U */
 	case USB_ID(0x1430, 0x474b): /* RedOctane GH MIDI INTERFACE */
 	case USB_ID(0x15ca, 0x0101): /* Textech USB Midi Cable */

From a90b6640f8dfea2ab42c9cce3fc1d45bb169ca65 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Sun, 15 Nov 2015 22:38:29 +0100
Subject: [PATCH 118/201] ALSA: usb-audio: prevent CH345 multiport output SysEx
 corruption

commit 1ca8b201309d842642f221db7f02f71c0af5be2d upstream.

The CH345 USB MIDI chip has two output ports.  However, they are
multiplexed through one pin, and the number of ports cannot be reduced
even for hardware that implements only one connector, so for those
devices, data sent to either port ends up on the same hardware output.
This becomes a problem when both ports are used at the same time, as
longer MIDI commands (such as SysEx messages) are likely to be
interrupted by messages from the other port, and thus to get lost.

It would not be possible for the driver to detect how many ports the
device actually has, except that in practice, _all_ devices built with
the CH345 have only one port.  So we can just ignore the device's
descriptors, and hardcode one output port.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/usb/midi.c         |  3 +++
 sound/usb/quirks-table.h | 11 +++++++++++
 sound/usb/quirks.c       |  1 +
 sound/usb/usbaudio.h     |  1 +
 4 files changed, 16 insertions(+)

diff --git a/sound/usb/midi.c b/sound/usb/midi.c
index 4bed82c70256b..5010aadb2534f 100644
--- a/sound/usb/midi.c
+++ b/sound/usb/midi.c
@@ -2374,6 +2374,9 @@ int snd_usbmidi_create(struct snd_card *card,
 		if (err < 0)
 			break;
 
+		err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints);
+		break;
+	case QUIRK_MIDI_CH345:
 		err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints);
 		break;
 	default:
diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h
index e4756651a52c8..ecc2a4ea014d2 100644
--- a/sound/usb/quirks-table.h
+++ b/sound/usb/quirks-table.h
@@ -2820,6 +2820,17 @@ YAMAHA_DEVICE(0x7010, "UB99"),
 	.idProduct = 0x1020,
 },
 
+/* QinHeng devices */
+{
+	USB_DEVICE(0x1a86, 0x752d),
+	.driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
+		.vendor_name = "QinHeng",
+		.product_name = "CH345",
+		.ifnum = 1,
+		.type = QUIRK_MIDI_CH345
+	}
+},
+
 /* KeithMcMillen Stringport */
 {
 	USB_DEVICE(0x1f38, 0x0001),
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 528fa6e48293f..eef9b8e4b9498 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -535,6 +535,7 @@ int snd_usb_create_quirk(struct snd_usb_audio *chip,
 		[QUIRK_MIDI_CME] = create_any_midi_quirk,
 		[QUIRK_MIDI_AKAI] = create_any_midi_quirk,
 		[QUIRK_MIDI_FTDI] = create_any_midi_quirk,
+		[QUIRK_MIDI_CH345] = create_any_midi_quirk,
 		[QUIRK_AUDIO_STANDARD_INTERFACE] = create_standard_audio_quirk,
 		[QUIRK_AUDIO_FIXED_ENDPOINT] = create_fixed_stream_quirk,
 		[QUIRK_AUDIO_EDIROL_UAXX] = create_uaxx_quirk,
diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h
index 33a176437e2e4..4b4327b45606c 100644
--- a/sound/usb/usbaudio.h
+++ b/sound/usb/usbaudio.h
@@ -94,6 +94,7 @@ enum quirk_type {
 	QUIRK_MIDI_AKAI,
 	QUIRK_MIDI_US122L,
 	QUIRK_MIDI_FTDI,
+	QUIRK_MIDI_CH345,
 	QUIRK_AUDIO_STANDARD_INTERFACE,
 	QUIRK_AUDIO_FIXED_ENDPOINT,
 	QUIRK_AUDIO_EDIROL_UAXX,

From 7327b7fff4e17cfb785b4bc0d7754dd908d6930c Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <clemens@ladisch.de>
Date: Sun, 15 Nov 2015 22:39:08 +0100
Subject: [PATCH 119/201] ALSA: usb-audio: work around CH345 input SysEx
 corruption

commit a91e627e3f0ed820b11d86cdc04df38f65f33a70 upstream.

One of the many faults of the QinHeng CH345 USB MIDI interface chip is
that it does not handle received SysEx messages correctly -- every second
event packet has a wrong code index number, which is the one from the last
seen message, instead of 4.  For example, the two messages "FE F0 01 02 03
04 05 06 07 08 09 0A 0B 0C 0D 0E F7" result in the following event
packets:

correct:       CH345:
0F FE 00 00    0F FE 00 00
04 F0 01 02    04 F0 01 02
04 03 04 05    0F 03 04 05
04 06 07 08    04 06 07 08
04 09 0A 0B    0F 09 0A 0B
04 0C 0D 0E    04 0C 0D 0E
05 F7 00 00    05 F7 00 00

A class-compliant driver must interpret an event packet with CIN 15 as
having a single data byte, so the other two bytes would be ignored.  The
message received by the host would then be missing two bytes out of six;
in this example, "F0 01 02 03 06 07 08 09 0C 0D 0E F7".

These corrupted SysEx event packages contain only data bytes, while the
CH345 uses event packets with a correct CIN value only for messages with
a status byte, so it is possible to distinguish between these two cases by
checking for the presence of this status byte.

(Other bugs in the CH345's input handling, such as the corruption resulting
from running status, cannot be worked around.)

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/usb/midi.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/sound/usb/midi.c b/sound/usb/midi.c
index 5010aadb2534f..bec63e0d26056 100644
--- a/sound/usb/midi.c
+++ b/sound/usb/midi.c
@@ -174,6 +174,8 @@ struct snd_usb_midi_in_endpoint {
 		u8 running_status_length;
 	} ports[0x10];
 	u8 seen_f5;
+	bool in_sysex;
+	u8 last_cin;
 	u8 error_resubmit;
 	int current_port;
 };
@@ -467,6 +469,39 @@ static void snd_usbmidi_maudio_broken_running_status_input(
 		}
 }
 
+/*
+ * QinHeng CH345 is buggy: every second packet inside a SysEx has not CIN 4
+ * but the previously seen CIN, but still with three data bytes.
+ */
+static void ch345_broken_sysex_input(struct snd_usb_midi_in_endpoint *ep,
+				     uint8_t *buffer, int buffer_length)
+{
+	unsigned int i, cin, length;
+
+	for (i = 0; i + 3 < buffer_length; i += 4) {
+		if (buffer[i] == 0 && i > 0)
+			break;
+		cin = buffer[i] & 0x0f;
+		if (ep->in_sysex &&
+		    cin == ep->last_cin &&
+		    (buffer[i + 1 + (cin == 0x6)] & 0x80) == 0)
+			cin = 0x4;
+#if 0
+		if (buffer[i + 1] == 0x90) {
+			/*
+			 * Either a corrupted running status or a real note-on
+			 * message; impossible to detect reliably.
+			 */
+		}
+#endif
+		length = snd_usbmidi_cin_length[cin];
+		snd_usbmidi_input_data(ep, 0, &buffer[i + 1], length);
+		ep->in_sysex = cin == 0x4;
+		if (!ep->in_sysex)
+			ep->last_cin = cin;
+	}
+}
+
 /*
  * CME protocol: like the standard protocol, but SysEx commands are sent as a
  * single USB packet preceded by a 0x0F byte.
@@ -660,6 +695,12 @@ static struct usb_protocol_ops snd_usbmidi_cme_ops = {
 	.output_packet = snd_usbmidi_output_standard_packet,
 };
 
+static struct usb_protocol_ops snd_usbmidi_ch345_broken_sysex_ops = {
+	.input = ch345_broken_sysex_input,
+	.output = snd_usbmidi_standard_output,
+	.output_packet = snd_usbmidi_output_standard_packet,
+};
+
 /*
  * AKAI MPD16 protocol:
  *
@@ -2377,6 +2418,7 @@ int snd_usbmidi_create(struct snd_card *card,
 		err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints);
 		break;
 	case QUIRK_MIDI_CH345:
+		umidi->usb_protocol_ops = &snd_usbmidi_ch345_broken_sysex_ops;
 		err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints);
 		break;
 	default:

From 9f3de4e4d2150fe75a465fbc15b5c733ba7db5f2 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Tue, 6 Oct 2015 15:12:06 +0100
Subject: [PATCH 120/201] ttyFDC: Fix build problems due to use of
 module_{init,exit}
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 3e8137a185240fa6da0ff91cd9c604716371903b upstream.

Commit 0fd972a7d91d (module: relocate module_init from init.h to
module.h) broke the build of ttyFDC driver due to that driver's (mis)use
of module_mips_cdmm_driver() without first including module.h, for
example:

In file included from ./arch/mips/include/asm/cdmm.h +11 :0,
                 from drivers/tty/mips_ejtag_fdc.c +34 :
include/linux/device.h +1295 :1: warning: data definition has no type or storage class
./arch/mips/include/asm/cdmm.h +84 :2: note: in expansion of macro ‘module_driver’
drivers/tty/mips_ejtag_fdc.c +1157 :1: note: in expansion of macro ‘module_mips_cdmm_driver’
include/linux/device.h +1295 :1: error: type defaults to ‘int’ in declaration of ‘module_init’ [-Werror=implicit-int]
./arch/mips/include/asm/cdmm.h +84 :2: note: in expansion of macro ‘module_driver’
drivers/tty/mips_ejtag_fdc.c +1157 :1: note: in expansion of macro ‘module_mips_cdmm_driver’
drivers/tty/mips_ejtag_fdc.c +1157 :1: warning: parameter names (without types) in function declaration

Instead of just adding the module.h include, switch to using the new
builtin_mips_cdmm_driver() helper macro and drop the remove callback,
since it isn't needed. If module support is added later, the code can
always be resurrected.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: linux-mips@linux-mips.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/mips_ejtag_fdc.c | 35 +----------------------------------
 1 file changed, 1 insertion(+), 34 deletions(-)

diff --git a/drivers/tty/mips_ejtag_fdc.c b/drivers/tty/mips_ejtag_fdc.c
index a8c8cfd52a23b..57ff5d3157a65 100644
--- a/drivers/tty/mips_ejtag_fdc.c
+++ b/drivers/tty/mips_ejtag_fdc.c
@@ -1048,38 +1048,6 @@ static int mips_ejtag_fdc_tty_probe(struct mips_cdmm_device *dev)
 	return ret;
 }
 
-static int mips_ejtag_fdc_tty_remove(struct mips_cdmm_device *dev)
-{
-	struct mips_ejtag_fdc_tty *priv = mips_cdmm_get_drvdata(dev);
-	struct mips_ejtag_fdc_tty_port *dport;
-	int nport;
-	unsigned int cfg;
-
-	if (priv->irq >= 0) {
-		raw_spin_lock_irq(&priv->lock);
-		cfg = mips_ejtag_fdc_read(priv, REG_FDCFG);
-		/* Disable interrupts */
-		cfg &= ~(REG_FDCFG_TXINTTHRES | REG_FDCFG_RXINTTHRES);
-		cfg |= REG_FDCFG_TXINTTHRES_DISABLED;
-		cfg |= REG_FDCFG_RXINTTHRES_DISABLED;
-		mips_ejtag_fdc_write(priv, REG_FDCFG, cfg);
-		raw_spin_unlock_irq(&priv->lock);
-	} else {
-		priv->removing = true;
-		del_timer_sync(&priv->poll_timer);
-	}
-	kthread_stop(priv->thread);
-	if (dev->cpu == 0)
-		mips_ejtag_fdc_con.tty_drv = NULL;
-	tty_unregister_driver(priv->driver);
-	for (nport = 0; nport < NUM_TTY_CHANNELS; nport++) {
-		dport = &priv->ports[nport];
-		tty_port_destroy(&dport->port);
-	}
-	put_tty_driver(priv->driver);
-	return 0;
-}
-
 static int mips_ejtag_fdc_tty_cpu_down(struct mips_cdmm_device *dev)
 {
 	struct mips_ejtag_fdc_tty *priv = mips_cdmm_get_drvdata(dev);
@@ -1152,12 +1120,11 @@ static struct mips_cdmm_driver mips_ejtag_fdc_tty_driver = {
 		.name	= "mips_ejtag_fdc",
 	},
 	.probe		= mips_ejtag_fdc_tty_probe,
-	.remove		= mips_ejtag_fdc_tty_remove,
 	.cpu_down	= mips_ejtag_fdc_tty_cpu_down,
 	.cpu_up		= mips_ejtag_fdc_tty_cpu_up,
 	.id_table	= mips_ejtag_fdc_tty_ids,
 };
-module_mips_cdmm_driver(mips_ejtag_fdc_tty_driver);
+builtin_mips_cdmm_driver(mips_ejtag_fdc_tty_driver);
 
 static int __init mips_ejtag_fdc_init_console(void)
 {

From 45de0e37d23272442238afc0be3450494e4c853a Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Sun, 8 Nov 2015 08:52:31 -0500
Subject: [PATCH 121/201] tty: audit: Fix audit source
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 6b2a3d628aa752f0ab825fc6d4d07b09e274d1c1 upstream.

The data to audit/record is in the 'from' buffer (ie., the input
read buffer).

Fixes: 72586c6061ab ("n_tty: Fix auditing support for cannonical mode")
Cc: Miloslav Trmač <mitr@redhat.com>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Acked-by: Laura Abbott <labbott@fedoraproject.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_tty.c     | 2 +-
 drivers/tty/tty_audit.c | 2 +-
 include/linux/tty.h     | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index b09023b071696..a0285da0244cf 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -169,7 +169,7 @@ static inline int tty_copy_to_user(struct tty_struct *tty,
 {
 	struct n_tty_data *ldata = tty->disc_data;
 
-	tty_audit_add_data(tty, to, n, ldata->icanon);
+	tty_audit_add_data(tty, from, n, ldata->icanon);
 	return copy_to_user(to, from, n);
 }
 
diff --git a/drivers/tty/tty_audit.c b/drivers/tty/tty_audit.c
index 90ca082935f63..3d245cd3d8e62 100644
--- a/drivers/tty/tty_audit.c
+++ b/drivers/tty/tty_audit.c
@@ -265,7 +265,7 @@ static struct tty_audit_buf *tty_audit_buf_get(struct tty_struct *tty,
  *
  *	Audit @data of @size from @tty, if necessary.
  */
-void tty_audit_add_data(struct tty_struct *tty, unsigned char *data,
+void tty_audit_add_data(struct tty_struct *tty, const void *data,
 			size_t size, unsigned icanon)
 {
 	struct tty_audit_buf *buf;
diff --git a/include/linux/tty.h b/include/linux/tty.h
index d072ded416786..9bddda029b459 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -605,7 +605,7 @@ extern void n_tty_inherit_ops(struct tty_ldisc_ops *ops);
 
 /* tty_audit.c */
 #ifdef CONFIG_AUDIT
-extern void tty_audit_add_data(struct tty_struct *tty, unsigned char *data,
+extern void tty_audit_add_data(struct tty_struct *tty, const void *data,
 			       size_t size, unsigned icanon);
 extern void tty_audit_exit(void);
 extern void tty_audit_fork(struct signal_struct *sig);
@@ -613,8 +613,8 @@ extern void tty_audit_tiocsti(struct tty_struct *tty, char ch);
 extern void tty_audit_push(struct tty_struct *tty);
 extern int tty_audit_push_current(void);
 #else
-static inline void tty_audit_add_data(struct tty_struct *tty,
-		unsigned char *data, size_t size, unsigned icanon)
+static inline void tty_audit_add_data(struct tty_struct *tty, const void *data,
+				      size_t size, unsigned icanon)
 {
 }
 static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch)

From 2be56f43e40a65249132d153b14b8c33cb635d4d Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 11 Nov 2015 08:03:54 -0500
Subject: [PATCH 122/201] tty: Fix tty_send_xchar() lock order inversion

commit ee0c1a65cf95230d5eb3d9de94fd2ead9a428c67 upstream.

The correct lock order is atomic_write_lock => termios_rwsem, as
established by tty_write() => n_tty_write().

Fixes: c274f6ef1c666 ("tty: Hold termios_rwsem for tcflow(TCIxxx)")
Reported-and-Tested-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_io.c    | 4 ++++
 drivers/tty/tty_ioctl.c | 4 ----
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 2eefaa6e3e3a4..f435977de740d 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -1282,18 +1282,22 @@ int tty_send_xchar(struct tty_struct *tty, char ch)
 	int	was_stopped = tty->stopped;
 
 	if (tty->ops->send_xchar) {
+		down_read(&tty->termios_rwsem);
 		tty->ops->send_xchar(tty, ch);
+		up_read(&tty->termios_rwsem);
 		return 0;
 	}
 
 	if (tty_write_lock(tty, 0) < 0)
 		return -ERESTARTSYS;
 
+	down_read(&tty->termios_rwsem);
 	if (was_stopped)
 		start_tty(tty);
 	tty->ops->write(tty, &ch, 1);
 	if (was_stopped)
 		stop_tty(tty);
+	up_read(&tty->termios_rwsem);
 	tty_write_unlock(tty);
 	return 0;
 }
diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c
index 9c5aebfe7053c..1445dd39aa622 100644
--- a/drivers/tty/tty_ioctl.c
+++ b/drivers/tty/tty_ioctl.c
@@ -1147,16 +1147,12 @@ int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file,
 			spin_unlock_irq(&tty->flow_lock);
 			break;
 		case TCIOFF:
-			down_read(&tty->termios_rwsem);
 			if (STOP_CHAR(tty) != __DISABLED_CHAR)
 				retval = tty_send_xchar(tty, STOP_CHAR(tty));
-			up_read(&tty->termios_rwsem);
 			break;
 		case TCION:
-			down_read(&tty->termios_rwsem);
 			if (START_CHAR(tty) != __DISABLED_CHAR)
 				retval = tty_send_xchar(tty, START_CHAR(tty));
-			up_read(&tty->termios_rwsem);
 			break;
 		default:
 			return -EINVAL;

From 7588e627c8891e53e86c600d312cef20acef81e5 Mon Sep 17 00:00:00 2001
From: Rajmohan Mani <rajmohan.mani@intel.com>
Date: Wed, 18 Nov 2015 10:48:20 +0200
Subject: [PATCH 123/201] xhci: Workaround to get Intel xHCI reset working more
 reliably

commit a5964396190d0c40dd549c23848c282fffa5d1f2 upstream.

Existing Intel xHCI controllers require a delay of 1 mS,
after setting the CMD_RESET bit in command register, before
accessing any HC registers. This allows the HC to complete
the reset operation and be ready for HC register access.
Without this delay, the subsequent HC register access,
may result in a system hang, very rarely.

Verified CherryView / Braswell platforms go through over
5000 warm reboot cycles (which was not possible without
this patch), without any xHCI reset hang.

Signed-off-by: Rajmohan Mani <rajmohan.mani@intel.com>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 9957bd96d4bc3..385f9f5d6715f 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -175,6 +175,16 @@ int xhci_reset(struct xhci_hcd *xhci)
 	command |= CMD_RESET;
 	writel(command, &xhci->op_regs->command);
 
+	/* Existing Intel xHCI controllers require a delay of 1 mS,
+	 * after setting the CMD_RESET bit, and before accessing any
+	 * HC registers. This allows the HC to complete the
+	 * reset operation and be ready for HC register access.
+	 * Without this delay, the subsequent HC register access,
+	 * may result in a system hang very rarely.
+	 */
+	if (xhci->quirks & XHCI_INTEL_HOST)
+		udelay(1000);
+
 	ret = xhci_handshake(&xhci->op_regs->command,
 			CMD_RESET, 0, 10 * 1000 * 1000);
 	if (ret)

From b2cb7afbb1693195b3171e85b10c0aa42ff37990 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sun, 27 Sep 2015 16:45:01 -0400
Subject: [PATCH 124/201] staging/lustre: use jiffies for lp_last_query times

commit 9f088dba3cc267ea11ec0da318cd0175575b5f9b upstream.

The recently introduced lnet_peer_set_alive() function uses
get_seconds() to read the current time into a shared variable,
but all other uses of that variable compare it to jiffies values.

This changes the current use to jiffies as well for consistency.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: af3fa7c71bf ("staging/lustre/lnet: peer aliveness status and NI status")
Cc: Liang Zhen <liang.zhen@intel.com>
Cc: James Simmons <uja.ornl@gmail.com>
Cc: Isaac Huang <he.huang@intel.com>
Signed-off-by: Oleg Drokin <oleg.drokin@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/lustre/include/linux/lnet/lib-lnet.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
index a9c9a077c77d3..bc3d907fd20fd 100644
--- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
+++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h
@@ -680,7 +680,7 @@ void lnet_debug_peer(lnet_nid_t nid);
 static inline void
 lnet_peer_set_alive(lnet_peer_t *lp)
 {
-	lp->lp_last_alive = lp->lp_last_query = get_seconds();
+	lp->lp_last_alive = lp->lp_last_query = jiffies;
 	if (!lp->lp_alive)
 		lnet_notify_locked(lp, 0, 1, lp->lp_last_alive);
 }

From a8adab1a510bbb18be8cf2e35276dc933db814cc Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Fri, 20 Nov 2015 11:25:04 -0500
Subject: [PATCH 125/201] xen/events: Always allocate legacy interrupts on PV
 guests

commit b4ff8389ed14b849354b59ce9b360bdefcdbf99c upstream.

After commit 8c058b0b9c34 ("x86/irq: Probe for PIC presence before
allocating descs for legacy IRQs") early_irq_init() will no longer
preallocate descriptors for legacy interrupts if PIC does not
exist, which is the case for Xen PV guests.

Therefore we may need to allocate those descriptors ourselves.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/include/asm/irq.h       | 5 +++++
 arch/arm64/include/asm/irq.h     | 5 +++++
 drivers/xen/events/events_base.c | 5 +++--
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h
index be1d07d59ee97..1bd9510de1b9c 100644
--- a/arch/arm/include/asm/irq.h
+++ b/arch/arm/include/asm/irq.h
@@ -40,6 +40,11 @@ extern void arch_trigger_all_cpu_backtrace(bool);
 #define arch_trigger_all_cpu_backtrace(x) arch_trigger_all_cpu_backtrace(x)
 #endif
 
+static inline int nr_legacy_irqs(void)
+{
+	return NR_IRQS_LEGACY;
+}
+
 #endif
 
 #endif
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index bbb251b14746c..8b9bf54105b3e 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -21,4 +21,9 @@ static inline void acpi_irq_init(void)
 }
 #define acpi_irq_init acpi_irq_init
 
+static inline int nr_legacy_irqs(void)
+{
+	return 0;
+}
+
 #endif
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 6cd5e65c4aff0..fb236239972f7 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -39,6 +39,7 @@
 #include <asm/irq.h>
 #include <asm/idle.h>
 #include <asm/io_apic.h>
+#include <asm/i8259.h>
 #include <asm/xen/pci.h>
 #include <xen/page.h>
 #endif
@@ -420,7 +421,7 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi)
 		return xen_allocate_irq_dynamic();
 
 	/* Legacy IRQ descriptors are already allocated by the arch. */
-	if (gsi < NR_IRQS_LEGACY)
+	if (gsi < nr_legacy_irqs())
 		irq = gsi;
 	else
 		irq = irq_alloc_desc_at(gsi, -1);
@@ -446,7 +447,7 @@ static void xen_free_irq(unsigned irq)
 	kfree(info);
 
 	/* Legacy IRQ descriptors are managed by the arch. */
-	if (irq < NR_IRQS_LEGACY)
+	if (irq < nr_legacy_irqs())
 		return;
 
 	irq_free_desc(irq);

From 5f8b2364ca1fc71022ae7e33b6c517133b8267ea Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 9 Dec 2015 14:40:26 -0500
Subject: [PATCH 126/201] Linux 4.3.1

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index d5b37391195f8..266eeacc14904 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 3
-SUBLEVEL = 0
+SUBLEVEL = 1
 EXTRAVERSION =
 NAME = Blurry Fish Butt
 

From 6b8b7d4fb9d71db1d78d29ce39323e198049e2c3 Mon Sep 17 00:00:00 2001
From: sudip <sudipm.mukherjee@gmail.com>
Date: Thu, 17 Sep 2015 13:12:51 +0530
Subject: [PATCH 127/201] crypto: asymmetric_keys - remove always false
 comparison

commit 4dd17c9c8a30c8d8cd1c9d4b94f08aca4b038d3e upstream.

hour, min and sec are unsigned int and they can never be less than zero.

Signed-off-by: Sudip Mukherjee <sudip@vectorindia.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/asymmetric_keys/x509_cert_parser.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c
index af71878dc15bf..3000ea3b66874 100644
--- a/crypto/asymmetric_keys/x509_cert_parser.c
+++ b/crypto/asymmetric_keys/x509_cert_parser.c
@@ -546,9 +546,9 @@ int x509_decode_time(time64_t *_t,  size_t hdrlen,
 	if (year < 1970 ||
 	    mon < 1 || mon > 12 ||
 	    day < 1 || day > mon_len ||
-	    hour < 0 || hour > 23 ||
-	    min < 0 || min > 59 ||
-	    sec < 0 || sec > 59)
+	    hour > 23 ||
+	    min > 59 ||
+	    sec > 59)
 		goto invalid_time;
 	
 	*_t = mktime64(year, mon, day, hour, min, sec);

From 14a837d297366fed10bc5f6f2bbecf9400e84e15 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 12 Nov 2015 09:36:40 +0000
Subject: [PATCH 128/201] X.509: Fix the time validation [ver #2]

commit cc25b994acfbc901429da682d0f73c190e960206 upstream.

This fixes CVE-2015-5327.  It affects kernels from 4.3-rc1 onwards.

Fix the X.509 time validation to use month number-1 when looking up the
number of days in that month.  Also put the month number validation before
doing the lookup so as not to risk overrunning the array.

This can be tested by doing the following:

cat <<EOF | openssl x509 -outform DER | keyctl padd asymmetric "" @s
-----BEGIN CERTIFICATE-----
MIIDbjCCAlagAwIBAgIJAN/lUld+VR4hMA0GCSqGSIb3DQEBCwUAMCkxETAPBgNV
BAoMCGxvY2FsLWNhMRQwEgYDVQQDDAtzaWduaW5nIGtleTAeFw0xNTA5MDEyMTMw
MThaFw0xNjA4MzEyMTMwMThaMCkxETAPBgNVBAoMCGxvY2FsLWNhMRQwEgYDVQQD
DAtzaWduaW5nIGtleTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBANrn
crcMfMeG67nagX4+m02Xk9rkmsMKI5XTUxbikROe7GSUVJ27sPVPZp4mgzoWlvhh
jfK8CC/qhEhwep8Pgg4EJZyWOjhZb7R97ckGvLIoUC6IO3FC2ZnR7WtmWDgo2Jcj
VlXwJdHhKU1VZwulh81O61N8IBKqz2r/kDhIWiicUCUkI/Do/RMRfKAoDBcSh86m
gOeIAGfq62vbiZhVsX5dOE8Oo2TK5weAvwUIOR7OuGBl5AqwFlPnXQolewiHzKry
THg9e44HfzG4Mi6wUvcJxVaQT1h5SrKD779Z5+8+wf1JLaooetcEUArvWyuxCU59
qxA4lsTjBwl4cmEki+cCAwEAAaOBmDCBlTAMBgNVHRMEBTADAQH/MAsGA1UdDwQE
AwIHgDAdBgNVHQ4EFgQUyND/eKUis7ep/hXMJ8iZMdUhI+IwWQYDVR0jBFIwUIAU
yND/eKUis7ep/hXMJ8iZMdUhI+KhLaQrMCkxETAPBgNVBAoMCGxvY2FsLWNhMRQw
EgYDVQQDDAtzaWduaW5nIGtleYIJAN/lUld+VR4hMA0GCSqGSIb3DQEBCwUAA4IB
AQAMqm1N1yD5pimUELLhT5eO2lRdGUfTozljRxc7e2QT3RLk2TtGhg65JFFN6eml
XS58AEPVcAsSLDlR6WpOpOLB2giM0+fV/eYFHHmh22yqTJl4YgkdUwyzPdCHNOZL
hmSKeY9xliHb6PNrNWWtZwhYYvRaO2DX4GXOMR0Oa2O4vaYu6/qGlZOZv3U6qZLY
wwHEJSrqeBDyMuwN+eANHpoSpiBzD77S4e+7hUDJnql4j6xzJ65+nWJ89fCrQypR
4sN5R3aGeIh3QAQUIKpHilwek0CtEaYERgc5m+jGyKSc1rezJW62hWRTaitOc+d5
G5hh+9YpnYcxQHEKnZ7rFNKJ
-----END CERTIFICATE-----
EOF

If it works, it emit a key ID; if it fails, it should give a bad message
error.

Reported-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Acked-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 crypto/asymmetric_keys/x509_cert_parser.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c
index 3000ea3b66874..021d39c0ba75a 100644
--- a/crypto/asymmetric_keys/x509_cert_parser.c
+++ b/crypto/asymmetric_keys/x509_cert_parser.c
@@ -531,7 +531,11 @@ int x509_decode_time(time64_t *_t,  size_t hdrlen,
 	if (*p != 'Z')
 		goto unsupported_time;
 
-	mon_len = month_lengths[mon];
+	if (year < 1970 ||
+	    mon < 1 || mon > 12)
+		goto invalid_time;
+
+	mon_len = month_lengths[mon - 1];
 	if (mon == 2) {
 		if (year % 4 == 0) {
 			mon_len = 29;
@@ -543,14 +547,12 @@ int x509_decode_time(time64_t *_t,  size_t hdrlen,
 		}
 	}
 
-	if (year < 1970 ||
-	    mon < 1 || mon > 12 ||
-	    day < 1 || day > mon_len ||
+	if (day < 1 || day > mon_len ||
 	    hour > 23 ||
 	    min > 59 ||
 	    sec > 59)
 		goto invalid_time;
-	
+
 	*_t = mktime64(year, mon, day, hour, min, sec);
 	return 0;
 

From 14fd7c710c1f9f31ec4d36413c3066092c71aa3d Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Thu, 10 Dec 2015 18:11:40 -0500
Subject: [PATCH 129/201] Linux 4.3.2

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 266eeacc14904..1a4953b3e10ff 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 3
-SUBLEVEL = 1
+SUBLEVEL = 2
 EXTRAVERSION =
 NAME = Blurry Fish Butt
 

From bfa97da9146ac6b41f9b749e7dfd0921eeff9170 Mon Sep 17 00:00:00 2001
From: Paul Gortmaker <paul.gortmaker@windriver.com>
Date: Wed, 21 Oct 2015 14:04:47 +0100
Subject: [PATCH 130/201] certs: add .gitignore to stop git nagging about
 x509_certificate_list

commit 48dbc164b40dd9195dea8cd966e394819e420b64 upstream.

Currently we see this in "git status" if we build in the source dir:

Untracked files:
  (use "git add <file>..." to include in what will be committed)

        certs/x509_certificate_list

It looks like it used to live in kernel/ so we squash that .gitignore
entry at the same time.  I didn't bother to dig through git history to
see when it moved, since it is just a minor annoyance at most.

Cc: David Woodhouse <dwmw2@infradead.org>
Cc: keyrings@linux-nfs.org
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 certs/.gitignore  | 4 ++++
 kernel/.gitignore | 1 -
 2 files changed, 4 insertions(+), 1 deletion(-)
 create mode 100644 certs/.gitignore

diff --git a/certs/.gitignore b/certs/.gitignore
new file mode 100644
index 0000000000000..f51aea4a71ec8
--- /dev/null
+++ b/certs/.gitignore
@@ -0,0 +1,4 @@
+#
+# Generated files
+#
+x509_certificate_list
diff --git a/kernel/.gitignore b/kernel/.gitignore
index 790d83c7d1607..b3097bde4e9cb 100644
--- a/kernel/.gitignore
+++ b/kernel/.gitignore
@@ -5,4 +5,3 @@ config_data.h
 config_data.gz
 timeconst.h
 hz.bc
-x509_certificate_list

From 808e146b0ae32eb615e048c3b87016fcad3dd500 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?fran=C3=A7ois=20romieu?= <romieu@fr.zoreil.com>
Date: Wed, 11 Nov 2015 23:35:18 +0100
Subject: [PATCH 131/201] r8169: fix kasan reported skb use-after-free.

[ Upstream commit 39174291d8e8acfd1113214a943263aaa03c57c8 ]

Signed-off-by: Francois Romieu <romieu@fr.zoreil.com>
Reported-by: Dave Jones <davej@codemonkey.org.uk>
Fixes: d7d2d89d4b0af ("r8169: Add software counter for multicast packages")
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Corinna Vinschen <vinschen@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/realtek/r8169.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index b4f21232019a9..79ef799f88ab1 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -7429,15 +7429,15 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget
 
 			rtl8169_rx_vlan_tag(desc, skb);
 
+			if (skb->pkt_type == PACKET_MULTICAST)
+				dev->stats.multicast++;
+
 			napi_gro_receive(&tp->napi, skb);
 
 			u64_stats_update_begin(&tp->rx_stats.syncp);
 			tp->rx_stats.packets++;
 			tp->rx_stats.bytes += pkt_size;
 			u64_stats_update_end(&tp->rx_stats.syncp);
-
-			if (skb->pkt_type == PACKET_MULTICAST)
-				dev->stats.multicast++;
 		}
 release_descriptor:
 		desc->opts2 = 0;

From 382c8e80e40bcadb90ba1a156f522f7b5a22109f Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Tue, 10 Nov 2015 16:23:15 +0100
Subject: [PATCH 132/201] af-unix: fix use-after-free with concurrent readers
 while splicing

[ Upstream commit 73ed5d25dce0354ea381d6dc93005c3085fae03d ]

During splicing an af-unix socket to a pipe we have to drop all
af-unix socket locks. While doing so we allow another reader to enter
unix_stream_read_generic which can read, copy and finally free another
skb. If exactly this skb is just in process of being spliced we get a
use-after-free report by kasan.

First, we must make sure to not have a free while the skb is used during
the splice operation. We simply increment its use counter before unlocking
the reader lock.

Stream sockets have the nice characteristic that we don't care about
zero length writes and they never reach the peer socket's queue. That
said, we can take the UNIXCB.consumed field as the indicator if the
skb was already freed from the socket's receive queue. If the skb was
fully consumed after we locked the reader side again we know it has been
dropped by a second reader. We indicate a short read to user space and
abort the current splice operation.

This bug has been found with syzkaller
(http://github.com/google/syzkaller) by Dmitry Vyukov.

Fixes: 2b514574f7e8 ("net: af_unix: implement splice for stream af_unix sockets")
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/unix/af_unix.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 94f658235fb49..a5afe414a2d1d 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -440,6 +440,7 @@ static void unix_release_sock(struct sock *sk, int embrion)
 		if (state == TCP_LISTEN)
 			unix_release_sock(skb->sk, 1);
 		/* passed fds are erased in the kfree_skb hook	      */
+		UNIXCB(skb).consumed = skb->len;
 		kfree_skb(skb);
 	}
 
@@ -2071,6 +2072,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
 
 	do {
 		int chunk;
+		bool drop_skb;
 		struct sk_buff *skb, *last;
 
 		unix_state_lock(sk);
@@ -2151,7 +2153,11 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
 		}
 
 		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
+		skb_get(skb);
 		chunk = state->recv_actor(skb, skip, chunk, state);
+		drop_skb = !unix_skb_len(skb);
+		/* skb is only safe to use if !drop_skb */
+		consume_skb(skb);
 		if (chunk < 0) {
 			if (copied == 0)
 				copied = -EFAULT;
@@ -2160,6 +2166,18 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
 		copied += chunk;
 		size -= chunk;
 
+		if (drop_skb) {
+			/* the skb was touched by a concurrent reader;
+			 * we should not expect anything from this skb
+			 * anymore and assume it invalid - we can be
+			 * sure it was dropped from the socket queue
+			 *
+			 * let's report a short read
+			 */
+			err = 0;
+			break;
+		}
+
 		/* Mark read part of skb as used */
 		if (!(flags & MSG_PEEK)) {
 			UNIXCB(skb).consumed += chunk;

From a78f42950a48ed4f4721b29eb228d709ac91af9b Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 16 Nov 2015 16:25:56 +0100
Subject: [PATCH 133/201] af_unix: don't append consumed skbs to
 sk_receive_queue

[ Upstream commit 8844f97238ca6c1ca92a5d6c69f53efd361a266f ]

In case multiple writes to a unix stream socket race we could end up in a
situation where we pre-allocate a new skb for use in unix_stream_sendpage
but have to free it again in the locked section because another skb
has been appended meanwhile, which we must use. Accidentally we didn't
clear the pointer after consuming it and so we touched freed memory
while appending it to the sk_receive_queue. So, clear the pointer after
consuming the skb.

This bug has been found with syzkaller
(http://github.com/google/syzkaller) by Dmitry Vyukov.

Fixes: 869e7c62486e ("net: af_unix: implement stream sendpage support")
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/unix/af_unix.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index a5afe414a2d1d..3e2ca39f3b68c 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1799,6 +1799,7 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
 		 * this - does no harm
 		 */
 		consume_skb(newskb);
+		newskb = NULL;
 	}
 
 	if (skb_append_pagefrags(skb, page, offset, size)) {

From 26c04dfb4cfa10e6ed8cafd63313fef2be865fa0 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Tue, 17 Nov 2015 15:10:59 +0100
Subject: [PATCH 134/201] af_unix: take receive queue lock while appending new
 skb

[ Upstream commit a3a116e04cc6a94d595ead4e956ab1bc1d2f4746 ]

While possibly in future we don't necessarily need to use
sk_buff_head.lock this is a rather larger change, as it affects the
af_unix fd garbage collector, diag and socket cleanups. This is too much
for a stable patch.

For the time being grab sk_buff_head.lock without disabling bh and irqs,
so don't use locked skb_queue_tail.

Fixes: 869e7c62486e ("net: af_unix: implement stream sendpage support")
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Reported-by: Eric Dumazet <edumazet@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/unix/af_unix.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 3e2ca39f3b68c..42ab2cc1f05d8 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1812,8 +1812,11 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
 	skb->truesize += size;
 	atomic_add(size, &sk->sk_wmem_alloc);
 
-	if (newskb)
+	if (newskb) {
+		spin_lock(&other->sk_receive_queue.lock);
 		__skb_queue_tail(&other->sk_receive_queue, newskb);
+		spin_unlock(&other->sk_receive_queue.lock);
+	}
 
 	unix_state_unlock(other);
 	mutex_unlock(&unix_sk(other)->readlock);

From 58a6a46a036ce81a2a8ecaa6fc1537c894349e3f Mon Sep 17 00:00:00 2001
From: Rainer Weikusat <rweikusat@mobileactivedefense.com>
Date: Fri, 20 Nov 2015 22:07:23 +0000
Subject: [PATCH 135/201] unix: avoid use-after-free in ep_remove_wait_queue

[ Upstream commit 7d267278a9ece963d77eefec61630223fce08c6c ]

Rainer Weikusat <rweikusat@mobileactivedefense.com> writes:
An AF_UNIX datagram socket being the client in an n:1 association with
some server socket is only allowed to send messages to the server if the
receive queue of this socket contains at most sk_max_ack_backlog
datagrams. This implies that prospective writers might be forced to go
to sleep despite none of the message presently enqueued on the server
receive queue were sent by them. In order to ensure that these will be
woken up once space becomes again available, the present unix_dgram_poll
routine does a second sock_poll_wait call with the peer_wait wait queue
of the server socket as queue argument (unix_dgram_recvmsg does a wake
up on this queue after a datagram was received). This is inherently
problematic because the server socket is only guaranteed to remain alive
for as long as the client still holds a reference to it. In case the
connection is dissolved via connect or by the dead peer detection logic
in unix_dgram_sendmsg, the server socket may be freed despite "the
polling mechanism" (in particular, epoll) still has a pointer to the
corresponding peer_wait queue. There's no way to forcibly deregister a
wait queue with epoll.

Based on an idea by Jason Baron, the patch below changes the code such
that a wait_queue_t belonging to the client socket is enqueued on the
peer_wait queue of the server whenever the peer receive queue full
condition is detected by either a sendmsg or a poll. A wake up on the
peer queue is then relayed to the ordinary wait queue of the client
socket via wake function. The connection to the peer wait queue is again
dissolved if either a wake up is about to be relayed or the client
socket reconnects or a dead peer is detected or the client socket is
itself closed. This enables removing the second sock_poll_wait from
unix_dgram_poll, thus avoiding the use-after-free, while still ensuring
that no blocked writer sleeps forever.

Signed-off-by: Rainer Weikusat <rweikusat@mobileactivedefense.com>
Fixes: ec0d215f9420 ("af_unix: fix 'poll for write'/connected DGRAM sockets")
Reviewed-by: Jason Baron <jbaron@akamai.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/af_unix.h |   1 +
 net/unix/af_unix.c    | 183 +++++++++++++++++++++++++++++++++++++-----
 2 files changed, 165 insertions(+), 19 deletions(-)

diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index b36d837c701ec..2a91a0561a478 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -62,6 +62,7 @@ struct unix_sock {
 #define UNIX_GC_CANDIDATE	0
 #define UNIX_GC_MAYBE_CYCLE	1
 	struct socket_wq	peer_wq;
+	wait_queue_t		peer_wake;
 };
 
 static inline struct unix_sock *unix_sk(const struct sock *sk)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 42ab2cc1f05d8..153b2f2bbbb8b 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -326,6 +326,118 @@ static struct sock *unix_find_socket_byinode(struct inode *i)
 	return s;
 }
 
+/* Support code for asymmetrically connected dgram sockets
+ *
+ * If a datagram socket is connected to a socket not itself connected
+ * to the first socket (eg, /dev/log), clients may only enqueue more
+ * messages if the present receive queue of the server socket is not
+ * "too large". This means there's a second writeability condition
+ * poll and sendmsg need to test. The dgram recv code will do a wake
+ * up on the peer_wait wait queue of a socket upon reception of a
+ * datagram which needs to be propagated to sleeping would-be writers
+ * since these might not have sent anything so far. This can't be
+ * accomplished via poll_wait because the lifetime of the server
+ * socket might be less than that of its clients if these break their
+ * association with it or if the server socket is closed while clients
+ * are still connected to it and there's no way to inform "a polling
+ * implementation" that it should let go of a certain wait queue
+ *
+ * In order to propagate a wake up, a wait_queue_t of the client
+ * socket is enqueued on the peer_wait queue of the server socket
+ * whose wake function does a wake_up on the ordinary client socket
+ * wait queue. This connection is established whenever a write (or
+ * poll for write) hit the flow control condition and broken when the
+ * association to the server socket is dissolved or after a wake up
+ * was relayed.
+ */
+
+static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
+				      void *key)
+{
+	struct unix_sock *u;
+	wait_queue_head_t *u_sleep;
+
+	u = container_of(q, struct unix_sock, peer_wake);
+
+	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
+			    q);
+	u->peer_wake.private = NULL;
+
+	/* relaying can only happen while the wq still exists */
+	u_sleep = sk_sleep(&u->sk);
+	if (u_sleep)
+		wake_up_interruptible_poll(u_sleep, key);
+
+	return 0;
+}
+
+static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
+{
+	struct unix_sock *u, *u_other;
+	int rc;
+
+	u = unix_sk(sk);
+	u_other = unix_sk(other);
+	rc = 0;
+	spin_lock(&u_other->peer_wait.lock);
+
+	if (!u->peer_wake.private) {
+		u->peer_wake.private = other;
+		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
+
+		rc = 1;
+	}
+
+	spin_unlock(&u_other->peer_wait.lock);
+	return rc;
+}
+
+static void unix_dgram_peer_wake_disconnect(struct sock *sk,
+					    struct sock *other)
+{
+	struct unix_sock *u, *u_other;
+
+	u = unix_sk(sk);
+	u_other = unix_sk(other);
+	spin_lock(&u_other->peer_wait.lock);
+
+	if (u->peer_wake.private == other) {
+		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
+		u->peer_wake.private = NULL;
+	}
+
+	spin_unlock(&u_other->peer_wait.lock);
+}
+
+static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
+						   struct sock *other)
+{
+	unix_dgram_peer_wake_disconnect(sk, other);
+	wake_up_interruptible_poll(sk_sleep(sk),
+				   POLLOUT |
+				   POLLWRNORM |
+				   POLLWRBAND);
+}
+
+/* preconditions:
+ *	- unix_peer(sk) == other
+ *	- association is stable
+ */
+static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
+{
+	int connected;
+
+	connected = unix_dgram_peer_wake_connect(sk, other);
+
+	if (unix_recvq_full(other))
+		return 1;
+
+	if (connected)
+		unix_dgram_peer_wake_disconnect(sk, other);
+
+	return 0;
+}
+
 static inline int unix_writable(struct sock *sk)
 {
 	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
@@ -430,6 +542,8 @@ static void unix_release_sock(struct sock *sk, int embrion)
 			skpair->sk_state_change(skpair);
 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 		}
+
+		unix_dgram_peer_wake_disconnect(sk, skpair);
 		sock_put(skpair); /* It may now die */
 		unix_peer(sk) = NULL;
 	}
@@ -665,6 +779,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 	INIT_LIST_HEAD(&u->link);
 	mutex_init(&u->readlock); /* single task reading lock */
 	init_waitqueue_head(&u->peer_wait);
+	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 	unix_insert_socket(unix_sockets_unbound(sk), sk);
 out:
 	if (sk == NULL)
@@ -1032,6 +1147,8 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 	if (unix_peer(sk)) {
 		struct sock *old_peer = unix_peer(sk);
 		unix_peer(sk) = other;
+		unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
+
 		unix_state_double_unlock(sk, other);
 
 		if (other != old_peer)
@@ -1471,6 +1588,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 	struct scm_cookie scm;
 	int max_level;
 	int data_len = 0;
+	int sk_locked;
 
 	wait_for_unix_gc();
 	err = scm_send(sock, msg, &scm, false);
@@ -1549,12 +1667,14 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 		goto out_free;
 	}
 
+	sk_locked = 0;
 	unix_state_lock(other);
+restart_locked:
 	err = -EPERM;
 	if (!unix_may_send(sk, other))
 		goto out_unlock;
 
-	if (sock_flag(other, SOCK_DEAD)) {
+	if (unlikely(sock_flag(other, SOCK_DEAD))) {
 		/*
 		 *	Check with 1003.1g - what should
 		 *	datagram error
@@ -1562,10 +1682,14 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 		unix_state_unlock(other);
 		sock_put(other);
 
+		if (!sk_locked)
+			unix_state_lock(sk);
+
 		err = 0;
-		unix_state_lock(sk);
 		if (unix_peer(sk) == other) {
 			unix_peer(sk) = NULL;
+			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
+
 			unix_state_unlock(sk);
 
 			unix_dgram_disconnected(sk, other);
@@ -1591,21 +1715,38 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 			goto out_unlock;
 	}
 
-	if (unix_peer(other) != sk && unix_recvq_full(other)) {
-		if (!timeo) {
-			err = -EAGAIN;
-			goto out_unlock;
+	if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
+		if (timeo) {
+			timeo = unix_wait_for_peer(other, timeo);
+
+			err = sock_intr_errno(timeo);
+			if (signal_pending(current))
+				goto out_free;
+
+			goto restart;
 		}
 
-		timeo = unix_wait_for_peer(other, timeo);
+		if (!sk_locked) {
+			unix_state_unlock(other);
+			unix_state_double_lock(sk, other);
+		}
 
-		err = sock_intr_errno(timeo);
-		if (signal_pending(current))
-			goto out_free;
+		if (unix_peer(sk) != other ||
+		    unix_dgram_peer_wake_me(sk, other)) {
+			err = -EAGAIN;
+			sk_locked = 1;
+			goto out_unlock;
+		}
 
-		goto restart;
+		if (!sk_locked) {
+			sk_locked = 1;
+			goto restart_locked;
+		}
 	}
 
+	if (unlikely(sk_locked))
+		unix_state_unlock(sk);
+
 	if (sock_flag(other, SOCK_RCVTSTAMP))
 		__net_timestamp(skb);
 	maybe_add_creds(skb, sock, other);
@@ -1619,6 +1760,8 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 	return len;
 
 out_unlock:
+	if (sk_locked)
+		unix_state_unlock(sk);
 	unix_state_unlock(other);
 out_free:
 	kfree_skb(skb);
@@ -2475,14 +2618,16 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
 		return mask;
 
 	writable = unix_writable(sk);
-	other = unix_peer_get(sk);
-	if (other) {
-		if (unix_peer(other) != sk) {
-			sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
-			if (unix_recvq_full(other))
-				writable = 0;
-		}
-		sock_put(other);
+	if (writable) {
+		unix_state_lock(sk);
+
+		other = unix_peer(sk);
+		if (other && unix_peer(other) != sk &&
+		    unix_recvq_full(other) &&
+		    unix_dgram_peer_wake_me(sk, other))
+			writable = 0;
+
+		unix_state_unlock(sk);
 	}
 
 	if (writable)

From 343f4f8c013a6e41ba51da7f00c3e5c95197fa59 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Thu, 26 Nov 2015 12:08:18 +0100
Subject: [PATCH 136/201] af-unix: passcred support for sendpage

[ Upstream commit 9490f886b192964796285907d777ff00fba1fa0f ]

sendpage did not care about credentials at all. This could lead to
situations in which because of fd passing between processes we could
append data to skbs with different scm data. It is illegal to splice those
skbs together. Instead we have to allocate a new skb and if requested
fill out the scm details.

Fixes: 869e7c62486ec ("net: af_unix: implement stream sendpage support")
Reported-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Eric Dumazet <edumazet@google.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/unix/af_unix.c | 79 +++++++++++++++++++++++++++++++++++++---------
 1 file changed, 64 insertions(+), 15 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 153b2f2bbbb8b..128b0982c96b3 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1550,6 +1550,14 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen
 	return err;
 }
 
+static bool unix_passcred_enabled(const struct socket *sock,
+				  const struct sock *other)
+{
+	return test_bit(SOCK_PASSCRED, &sock->flags) ||
+	       !other->sk_socket ||
+	       test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
+}
+
 /*
  * Some apps rely on write() giving SCM_CREDENTIALS
  * We include credentials if source or destination socket
@@ -1560,14 +1568,41 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
 {
 	if (UNIXCB(skb).pid)
 		return;
-	if (test_bit(SOCK_PASSCRED, &sock->flags) ||
-	    !other->sk_socket ||
-	    test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
+	if (unix_passcred_enabled(sock, other)) {
 		UNIXCB(skb).pid  = get_pid(task_tgid(current));
 		current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
 	}
 }
 
+static int maybe_init_creds(struct scm_cookie *scm,
+			    struct socket *socket,
+			    const struct sock *other)
+{
+	int err;
+	struct msghdr msg = { .msg_controllen = 0 };
+
+	err = scm_send(socket, &msg, scm, false);
+	if (err)
+		return err;
+
+	if (unix_passcred_enabled(socket, other)) {
+		scm->pid = get_pid(task_tgid(current));
+		current_uid_gid(&scm->creds.uid, &scm->creds.gid);
+	}
+	return err;
+}
+
+static bool unix_skb_scm_eq(struct sk_buff *skb,
+			    struct scm_cookie *scm)
+{
+	const struct unix_skb_parms *u = &UNIXCB(skb);
+
+	return u->pid == scm->pid &&
+	       uid_eq(u->uid, scm->creds.uid) &&
+	       gid_eq(u->gid, scm->creds.gid) &&
+	       unix_secdata_eq(scm, skb);
+}
+
 /*
  *	Send AF_UNIX data.
  */
@@ -1883,8 +1918,10 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
 				    int offset, size_t size, int flags)
 {
-	int err = 0;
-	bool send_sigpipe = true;
+	int err;
+	bool send_sigpipe = false;
+	bool init_scm = true;
+	struct scm_cookie scm;
 	struct sock *other, *sk = socket->sk;
 	struct sk_buff *skb, *newskb = NULL, *tail = NULL;
 
@@ -1902,7 +1939,7 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
 		newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
 					      &err, 0);
 		if (!newskb)
-			return err;
+			goto err;
 	}
 
 	/* we must acquire readlock as we modify already present
@@ -1911,12 +1948,12 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
 	err = mutex_lock_interruptible(&unix_sk(other)->readlock);
 	if (err) {
 		err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
-		send_sigpipe = false;
 		goto err;
 	}
 
 	if (sk->sk_shutdown & SEND_SHUTDOWN) {
 		err = -EPIPE;
+		send_sigpipe = true;
 		goto err_unlock;
 	}
 
@@ -1925,17 +1962,27 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
 	if (sock_flag(other, SOCK_DEAD) ||
 	    other->sk_shutdown & RCV_SHUTDOWN) {
 		err = -EPIPE;
+		send_sigpipe = true;
 		goto err_state_unlock;
 	}
 
+	if (init_scm) {
+		err = maybe_init_creds(&scm, socket, other);
+		if (err)
+			goto err_state_unlock;
+		init_scm = false;
+	}
+
 	skb = skb_peek_tail(&other->sk_receive_queue);
 	if (tail && tail == skb) {
 		skb = newskb;
-	} else if (!skb) {
-		if (newskb)
+	} else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
+		if (newskb) {
 			skb = newskb;
-		else
+		} else {
+			tail = skb;
 			goto alloc_skb;
+		}
 	} else if (newskb) {
 		/* this is fast path, we don't necessarily need to
 		 * call to kfree_skb even though with newskb == NULL
@@ -1956,6 +2003,9 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
 	atomic_add(size, &sk->sk_wmem_alloc);
 
 	if (newskb) {
+		err = unix_scm_to_skb(&scm, skb, false);
+		if (err)
+			goto err_state_unlock;
 		spin_lock(&other->sk_receive_queue.lock);
 		__skb_queue_tail(&other->sk_receive_queue, newskb);
 		spin_unlock(&other->sk_receive_queue.lock);
@@ -1965,7 +2015,7 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
 	mutex_unlock(&unix_sk(other)->readlock);
 
 	other->sk_data_ready(other);
-
+	scm_destroy(&scm);
 	return size;
 
 err_state_unlock:
@@ -1976,6 +2026,8 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
 	kfree_skb(newskb);
 	if (send_sigpipe && !(flags & MSG_NOSIGNAL))
 		send_sig(SIGPIPE, current, 0);
+	if (!init_scm)
+		scm_destroy(&scm);
 	return err;
 }
 
@@ -2279,10 +2331,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state)
 
 		if (check_creds) {
 			/* Never glue messages from different writers */
-			if ((UNIXCB(skb).pid  != scm.pid) ||
-			    !uid_eq(UNIXCB(skb).uid, scm.creds.uid) ||
-			    !gid_eq(UNIXCB(skb).gid, scm.creds.gid) ||
-			    !unix_secdata_eq(&scm, skb))
+			if (!unix_skb_scm_eq(skb, &scm))
 				break;
 		} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
 			/* Copy credentials */

From fdaac6a92778b95ad41e65b6f9a88b454899d8d4 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 11 Nov 2015 11:51:06 -0800
Subject: [PATCH 137/201] ipv6: Avoid creating RTF_CACHE from a rt that is not
 managed by fib6 tree

[ Upstream commit 0d3f6d297bfb7af24d0508460fdb3d1ec4903fa3 ]

The original bug report:
https://bugzilla.redhat.com/show_bug.cgi?id=1272571

The setup has a IPv4 GRE tunnel running in a IPSec.  The bug
happens when ndisc starts sending router solicitation at the gre
interface.  The simplified oops stack is like:

__lock_acquire+0x1b2/0x1c30
lock_acquire+0xb9/0x140
_raw_write_lock_bh+0x3f/0x50
__ip6_ins_rt+0x2e/0x60
ip6_ins_rt+0x49/0x50
~~~~~~~~
__ip6_rt_update_pmtu.part.54+0x145/0x250
ip6_rt_update_pmtu+0x2e/0x40
~~~~~~~~
ip_tunnel_xmit+0x1f1/0xf40
__gre_xmit+0x7a/0x90
ipgre_xmit+0x15a/0x220
dev_hard_start_xmit+0x2bd/0x480
__dev_queue_xmit+0x696/0x730
dev_queue_xmit+0x10/0x20
neigh_direct_output+0x11/0x20
ip6_finish_output2+0x21f/0x770
ip6_finish_output+0xa7/0x1d0
ip6_output+0x56/0x190
~~~~~~~~
ndisc_send_skb+0x1d9/0x400
ndisc_send_rs+0x88/0xc0
~~~~~~~~

The rt passed to ip6_rt_update_pmtu() is created by
icmp6_dst_alloc() and it is not managed by the fib6 tree,
so its rt6i_table == NULL.  When __ip6_rt_update_pmtu() creates
a RTF_CACHE clone, the newly created clone also has rt6i_table == NULL
and it causes the ip6_ins_rt() oops.

During pmtu update, we only want to create a RTF_CACHE clone
from a rt which is currently managed (or owned) by the
fib6 tree.  It means either rt->rt6i_node != NULL or
rt is a RTF_PCPU clone.

It is worth to note that rt6i_table may not be NULL even it is
not (yet) managed by the fib6 tree (e.g. addrconf_dst_alloc()).
Hence, rt6i_node is a better check instead of rt6i_table.

Fixes: 45e4fd26683c ("ipv6: Only create RTF_CACHE routes after encountering pmtu")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Reported-by: Chris Siebenmann <cks-rhbugzilla@cs.toronto.edu>
Cc: Chris Siebenmann <cks-rhbugzilla@cs.toronto.edu>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/route.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 946880ad48acd..711ec7a46ec52 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1340,6 +1340,12 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
 	rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires);
 }
 
+static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
+{
+	return !(rt->rt6i_flags & RTF_CACHE) &&
+		(rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
+}
+
 static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
 				 const struct ipv6hdr *iph, u32 mtu)
 {
@@ -1353,7 +1359,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
 	if (mtu >= dst_mtu(dst))
 		return;
 
-	if (rt6->rt6i_flags & RTF_CACHE) {
+	if (!rt6_cache_allowed_for_pmtu(rt6)) {
 		rt6_do_update_pmtu(rt6, mtu);
 	} else {
 		const struct in6_addr *daddr, *saddr;

From dd91a7e65f8008ac989d61131670949feec08d23 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 11 Nov 2015 11:51:07 -0800
Subject: [PATCH 138/201] ipv6: Check expire on DST_NOCACHE route

[ Upstream commit 5973fb1e245086071bf71994c8b54d99526ded03 ]

Since the expires of the DST_NOCACHE rt can be set during
the ip6_rt_update_pmtu(), we also need to consider the expires
value when doing ip6_dst_check().

This patches creates __rt6_check_expired() to only
check the expire value (if one exists) of the current rt.

In rt6_dst_from_check(), it adds __rt6_check_expired() as
one of the condition check.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/route.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 711ec7a46ec52..ea892c19a1ba1 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -403,6 +403,14 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 	}
 }
 
+static bool __rt6_check_expired(const struct rt6_info *rt)
+{
+	if (rt->rt6i_flags & RTF_EXPIRES)
+		return time_after(jiffies, rt->dst.expires);
+	else
+		return false;
+}
+
 static bool rt6_check_expired(const struct rt6_info *rt)
 {
 	if (rt->rt6i_flags & RTF_EXPIRES) {
@@ -1270,7 +1278,8 @@ static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
 
 static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
 {
-	if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
+	if (!__rt6_check_expired(rt) &&
+	    rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
 	    rt6_check((struct rt6_info *)(rt->dst.from), cookie))
 		return &rt->dst;
 	else

From 4ab43ae83f90543998b1640fb2cb556ce1a604c0 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Wed, 11 Nov 2015 11:51:08 -0800
Subject: [PATCH 139/201] ipv6: Check rt->dst.from for the DST_NOCACHE route

[ Upstrem commit 02bcf4e082e4dc634409a6a6cb7def8806d6e5e6 ]

All DST_NOCACHE rt6_info used to have rt->dst.from set to
its parent.

After commit 8e3d5be73681 ("ipv6: Avoid double dst_free"),
DST_NOCACHE is also set to rt6_info which does not have
a parent (i.e. rt->dst.from is NULL).

This patch catches the rt->dst.from == NULL case.

Fixes: 8e3d5be73681 ("ipv6: Avoid double dst_free")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/ip6_fib.h | 3 ++-
 net/ipv6/route.c      | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index aaf9700fc9e5f..fb961a576abe4 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -167,7 +167,8 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
 
 static inline u32 rt6_get_cookie(const struct rt6_info *rt)
 {
-	if (rt->rt6i_flags & RTF_PCPU || unlikely(rt->dst.flags & DST_NOCACHE))
+	if (rt->rt6i_flags & RTF_PCPU ||
+	    (unlikely(rt->dst.flags & DST_NOCACHE) && rt->dst.from))
 		rt = (struct rt6_info *)(rt->dst.from);
 
 	return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ea892c19a1ba1..d3773269d6e04 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1299,7 +1299,8 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 
 	rt6_dst_from_metrics_check(rt);
 
-	if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
+	if (rt->rt6i_flags & RTF_PCPU ||
+	    (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from))
 		return rt6_dst_from_check(rt, cookie);
 	else
 		return rt6_check(rt, cookie);

From ea9b07516f19f4c6fc14697cb6c70bb9b74e3bfc Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Fri, 27 Nov 2015 18:17:05 +0100
Subject: [PATCH 140/201] Revert "ipv6: ndisc: inherit metadata dst when
 creating ndisc requests"

[ Upstream commit 304d888b29cf96f1dd53511ee686499cd8cdf249 ]

This reverts commit ab450605b35caa768ca33e86db9403229bf42be4.

In IPv6, we cannot inherit the dst of the original dst. ndisc packets
are IPv6 packets and may take another route than the original packet.

This patch breaks the following scenario: a packet comes from eth0 and
is forwarded through vxlan1. The encapsulated packet triggers an NS
which cannot be sent because of the wrong route.

CC: Jiri Benc <jbenc@redhat.com>
CC: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/ndisc.h |  3 +--
 net/ipv6/addrconf.c |  2 +-
 net/ipv6/ndisc.c    | 10 +++-------
 net/ipv6/route.c    |  2 +-
 4 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index aba5695fadb00..b3a7751251b4c 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -182,8 +182,7 @@ int ndisc_rcv(struct sk_buff *skb);
 
 void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
 		   const struct in6_addr *solicit,
-		   const struct in6_addr *daddr, const struct in6_addr *saddr,
-		   struct sk_buff *oskb);
+		   const struct in6_addr *daddr, const struct in6_addr *saddr);
 
 void ndisc_send_rs(struct net_device *dev,
 		   const struct in6_addr *saddr, const struct in6_addr *daddr);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index dd00828863a0c..3939dd290c448 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3628,7 +3628,7 @@ static void addrconf_dad_work(struct work_struct *w)
 
 	/* send a neighbour solicitation for our addr */
 	addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
-	ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any, NULL);
+	ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any);
 out:
 	in6_ifa_put(ifp);
 	rtnl_unlock();
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 64a71354b069d..9ad46cd7930d5 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -553,8 +553,7 @@ static void ndisc_send_unsol_na(struct net_device *dev)
 
 void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
 		   const struct in6_addr *solicit,
-		   const struct in6_addr *daddr, const struct in6_addr *saddr,
-		   struct sk_buff *oskb)
+		   const struct in6_addr *daddr, const struct in6_addr *saddr)
 {
 	struct sk_buff *skb;
 	struct in6_addr addr_buf;
@@ -590,9 +589,6 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
 		ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR,
 				       dev->dev_addr);
 
-	if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE) && oskb)
-		skb_dst_copy(skb, oskb);
-
 	ndisc_send_skb(skb, daddr, saddr);
 }
 
@@ -679,12 +675,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
 				  "%s: trying to ucast probe in NUD_INVALID: %pI6\n",
 				  __func__, target);
 		}
-		ndisc_send_ns(dev, neigh, target, target, saddr, skb);
+		ndisc_send_ns(dev, neigh, target, target, saddr);
 	} else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {
 		neigh_app_ns(neigh);
 	} else {
 		addrconf_addr_solict_mult(target, &mcaddr);
-		ndisc_send_ns(dev, NULL, target, &mcaddr, saddr, skb);
+		ndisc_send_ns(dev, NULL, target, &mcaddr, saddr);
 	}
 }
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d3773269d6e04..fd0e6746d0cfc 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -546,7 +546,7 @@ static void rt6_probe_deferred(struct work_struct *w)
 		container_of(w, struct __rt6_probe_work, work);
 
 	addrconf_addr_solict_mult(&work->target, &mcaddr);
-	ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL);
+	ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
 	dev_put(work->dev);
 	kfree(work);
 }

From 55eabd378b2e07bc2b677f07fd142365be68cb2b Mon Sep 17 00:00:00 2001
From: Kamal Mostafa <kamal@canonical.com>
Date: Wed, 11 Nov 2015 14:24:27 -0800
Subject: [PATCH 141/201] tools/net: Use include/uapi with __EXPORTED_HEADERS__

[ Upstream commit d7475de58575c904818efa369c82e88c6648ce2e ]

Use the local uapi headers to keep in sync with "recently" added #define's
(e.g. SKF_AD_VLAN_TPID).  Refactored CFLAGS, and bpf_asm doesn't need -I.

Fixes: 3f356385e8a4 ("filter: bpf_asm: add minimal bpf asm tool")
Signed-off-by: Kamal Mostafa <kamal@canonical.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/net/Makefile | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tools/net/Makefile b/tools/net/Makefile
index ee577ea03ba50..ddf8880106524 100644
--- a/tools/net/Makefile
+++ b/tools/net/Makefile
@@ -4,6 +4,9 @@ CC = gcc
 LEX = flex
 YACC = bison
 
+CFLAGS += -Wall -O2
+CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
+
 %.yacc.c: %.y
 	$(YACC) -o $@ -d $<
 
@@ -12,15 +15,13 @@ YACC = bison
 
 all : bpf_jit_disasm bpf_dbg bpf_asm
 
-bpf_jit_disasm : CFLAGS = -Wall -O2 -DPACKAGE='bpf_jit_disasm'
+bpf_jit_disasm : CFLAGS += -DPACKAGE='bpf_jit_disasm'
 bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl
 bpf_jit_disasm : bpf_jit_disasm.o
 
-bpf_dbg : CFLAGS = -Wall -O2
 bpf_dbg : LDLIBS = -lreadline
 bpf_dbg : bpf_dbg.o
 
-bpf_asm : CFLAGS = -Wall -O2 -I.
 bpf_asm : LDLIBS =
 bpf_asm : bpf_asm.o bpf_exp.yacc.o bpf_exp.lex.o
 bpf_exp.lex.o : bpf_exp.yacc.c

From 1a43c235781426c2b4368bffb6478c379c4529aa Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 11 Nov 2015 23:25:40 +0100
Subject: [PATCH 142/201] packet: do skb_probe_transport_header when we
 actually have data

[ Upstream commit efdfa2f7848f64517008136fb41f53c4a1faf93a ]

In tpacket_fill_skb() commit c1aad275b029 ("packet: set transport
header before doing xmit") and later on 40893fd0fd4e ("net: switch
to use skb_probe_transport_header()") was probing for a transport
header on the skb from a ring buffer slot, but at a time, where
the skb has _not even_ been filled with data yet. So that call into
the flow dissector is pretty useless. Lets do it after we've set
up the skb frags.

Fixes: c1aad275b029 ("packet: set transport header before doing xmit")
Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/packet/af_packet.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 27b2898f275c7..be038c91cf994 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2368,8 +2368,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 	skb_reserve(skb, hlen);
 	skb_reset_network_header(skb);
 
-	if (!packet_use_direct_xmit(po))
-		skb_probe_transport_header(skb, 0);
 	if (unlikely(po->tp_tx_has_off)) {
 		int off_min, off_max, off;
 		off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
@@ -2449,6 +2447,9 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 		len = ((to_write > len_max) ? len_max : to_write);
 	}
 
+	if (!packet_use_direct_xmit(po))
+		skb_probe_transport_header(skb, 0);
+
 	return tp_len;
 }
 

From 6b4a14f4265267d8d1ff6b2465e0be19da0c256a Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 11 Nov 2015 23:25:41 +0100
Subject: [PATCH 143/201] packet: always probe for transport header

[ Upstream commit 8fd6c80d9dd938ca338c70698533a7e304752846 ]

We concluded that the skb_probe_transport_header() should better be
called unconditionally. Avoiding the call into the flow dissector has
also not really much to do with the direct xmit mode.

While it seems that only virtio_net code makes use of GSO from non
RX/TX ring packet socket paths, we should probe for a transport header
nevertheless before they hit devices.

Reference: http://thread.gmane.org/gmane.linux.network/386173/
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/packet/af_packet.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index be038c91cf994..3059f518f298c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2447,8 +2447,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 		len = ((to_write > len_max) ? len_max : to_write);
 	}
 
-	if (!packet_use_direct_xmit(po))
-		skb_probe_transport_header(skb, 0);
+	skb_probe_transport_header(skb, 0);
 
 	return tp_len;
 }
@@ -2800,8 +2799,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 		len += vnet_hdr_len;
 	}
 
-	if (!packet_use_direct_xmit(po))
-		skb_probe_transport_header(skb, reserve);
+	skb_probe_transport_header(skb, reserve);
+
 	if (unlikely(extra_len == 4))
 		skb->no_fcs = 1;
 

From bbf14f8bb1041b5c11b9ccf998a075a03f04de3d Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 11 Nov 2015 23:25:42 +0100
Subject: [PATCH 144/201] packet: only allow extra vlan len on ethernet devices

[ Upstream commit 3c70c132488794e2489ab045559b0ce0afcf17de ]

Packet sockets can be used by various net devices and are not
really restricted to ARPHRD_ETHER device types. However, when
currently checking for the extra 4 bytes that can be transmitted
in VLAN case, our assumption is that we generally probe on
ARPHRD_ETHER devices. Therefore, before looking into Ethernet
header, check the device type first.

This also fixes the issue where non-ARPHRD_ETHER devices could
have no dev->hard_header_len in TX_RING SOCK_RAW case, and thus
the check would test unfilled linear part of the skb (instead
of non-linear).

Fixes: 57f89bfa2140 ("network: Allow af_packet to transmit +4 bytes for VLAN packets.")
Fixes: 52f1454f629f ("packet: allow to transmit +4 byte in TX_RING slot for VLAN case")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/packet/af_packet.c | 60 ++++++++++++++++++------------------------
 1 file changed, 25 insertions(+), 35 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 3059f518f298c..6e3cd2fe392ae 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1741,6 +1741,20 @@ static void fanout_release(struct sock *sk)
 		kfree_rcu(po->rollover, rcu);
 }
 
+static bool packet_extra_vlan_len_allowed(const struct net_device *dev,
+					  struct sk_buff *skb)
+{
+	/* Earlier code assumed this would be a VLAN pkt, double-check
+	 * this now that we have the actual packet in hand. We can only
+	 * do this check on Ethernet devices.
+	 */
+	if (unlikely(dev->type != ARPHRD_ETHER))
+		return false;
+
+	skb_reset_mac_header(skb);
+	return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q));
+}
+
 static const struct proto_ops packet_ops;
 
 static const struct proto_ops packet_ops_spkt;
@@ -1902,18 +1916,10 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg,
 		goto retry;
 	}
 
-	if (len > (dev->mtu + dev->hard_header_len + extra_len)) {
-		/* Earlier code assumed this would be a VLAN pkt,
-		 * double-check this now that we have the actual
-		 * packet in hand.
-		 */
-		struct ethhdr *ehdr;
-		skb_reset_mac_header(skb);
-		ehdr = eth_hdr(skb);
-		if (ehdr->h_proto != htons(ETH_P_8021Q)) {
-			err = -EMSGSIZE;
-			goto out_unlock;
-		}
+	if (len > (dev->mtu + dev->hard_header_len + extra_len) &&
+	    !packet_extra_vlan_len_allowed(dev, skb)) {
+		err = -EMSGSIZE;
+		goto out_unlock;
 	}
 
 	skb->protocol = proto;
@@ -2525,18 +2531,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 		tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
 					  addr, hlen);
 		if (likely(tp_len >= 0) &&
-		    tp_len > dev->mtu + dev->hard_header_len) {
-			struct ethhdr *ehdr;
-			/* Earlier code assumed this would be a VLAN pkt,
-			 * double-check this now that we have the actual
-			 * packet in hand.
-			 */
+		    tp_len > dev->mtu + dev->hard_header_len &&
+		    !packet_extra_vlan_len_allowed(dev, skb))
+			tp_len = -EMSGSIZE;
 
-			skb_reset_mac_header(skb);
-			ehdr = eth_hdr(skb);
-			if (ehdr->h_proto != htons(ETH_P_8021Q))
-				tp_len = -EMSGSIZE;
-		}
 		if (unlikely(tp_len < 0)) {
 			if (po->tp_loss) {
 				__packet_set_status(po, ph,
@@ -2757,18 +2755,10 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 
 	sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
 
-	if (!gso_type && (len > dev->mtu + reserve + extra_len)) {
-		/* Earlier code assumed this would be a VLAN pkt,
-		 * double-check this now that we have the actual
-		 * packet in hand.
-		 */
-		struct ethhdr *ehdr;
-		skb_reset_mac_header(skb);
-		ehdr = eth_hdr(skb);
-		if (ehdr->h_proto != htons(ETH_P_8021Q)) {
-			err = -EMSGSIZE;
-			goto out_free;
-		}
+	if (!gso_type && (len > dev->mtu + reserve + extra_len) &&
+	    !packet_extra_vlan_len_allowed(dev, skb)) {
+		err = -EMSGSIZE;
+		goto out_free;
 	}
 
 	skb->protocol = proto;

From 263c8f423ade0e7bd72ea82e36da3a458fac1e30 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 11 Nov 2015 23:25:43 +0100
Subject: [PATCH 145/201] packet: infer protocol from ethernet header if unset

[ Upstream commit c72219b75fde768efccf7666342282fab7f9e4e7 ]

In case no struct sockaddr_ll has been passed to packet
socket's sendmsg() when doing a TX_RING flush run, then
skb->protocol is set to po->num instead, which is the protocol
passed via socket(2)/bind(2).

Applications only xmitting can go the path of allocating the
socket as socket(PF_PACKET, <mode>, 0) and do a bind(2) on the
TX_RING with sll_protocol of 0. That way, register_prot_hook()
is neither called on creation nor on bind time, which saves
cycles when there's no interest in capturing anyway.

That leaves us however with po->num 0 instead and therefore
the TX_RING flush run sets skb->protocol to 0 as well. Eric
reported that this leads to problems when using tools like
trafgen over bonding device. I.e. the bonding's hash function
could invoke the kernel's flow dissector, which depends on
skb->protocol being properly set. In the current situation, all
the traffic is then directed to a single slave.

Fix it up by inferring skb->protocol from the Ethernet header
when not set and we have ARPHRD_ETHER device type. This is only
done in case of SOCK_RAW and where we have a dev->hard_header_len
length. In case of ARPHRD_ETHER devices, this is guaranteed to
cover ETH_HLEN, and therefore being accessed on the skb after
the skb_store_bits().

Reported-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/packet/af_packet.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 6e3cd2fe392ae..45d419656309d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2338,6 +2338,15 @@ static bool ll_header_truncated(const struct net_device *dev, int len)
 	return false;
 }
 
+static void tpacket_set_protocol(const struct net_device *dev,
+				 struct sk_buff *skb)
+{
+	if (dev->type == ARPHRD_ETHER) {
+		skb_reset_mac_header(skb);
+		skb->protocol = eth_hdr(skb)->h_proto;
+	}
+}
+
 static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 		void *frame, struct net_device *dev, int size_max,
 		__be16 proto, unsigned char *addr, int hlen)
@@ -2419,6 +2428,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 				dev->hard_header_len);
 		if (unlikely(err))
 			return err;
+		if (!skb->protocol)
+			tpacket_set_protocol(dev, skb);
 
 		data += dev->hard_header_len;
 		to_write -= dev->hard_header_len;

From 377204f209fd51a24a359873ad44a97e4b5b72c8 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 11 Nov 2015 23:25:44 +0100
Subject: [PATCH 146/201] packet: fix tpacket_snd max frame len

[ Upstream commit 5cfb4c8d05b4409c4044cb9c05b19705c1d9818b ]

Since it's introduction in commit 69e3c75f4d54 ("net: TX_RING and
packet mmap"), TX_RING could be used from SOCK_DGRAM and SOCK_RAW
side. When used with SOCK_DGRAM only, the size_max > dev->mtu +
reserve check should have reserve as 0, but currently, this is
unconditionally set (in it's original form as dev->hard_header_len).

I think this is not correct since tpacket_fill_skb() would then
take dev->mtu and dev->hard_header_len into account for SOCK_DGRAM,
the extra VLAN_HLEN could be possible in both cases. Presumably, the
reserve code was copied from packet_snd(), but later on missed the
check. Make it similar as we have it in packet_snd().

Fixes: 69e3c75f4d54 ("net: TX_RING and packet mmap")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/packet/af_packet.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 45d419656309d..4695a36eeca35 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2510,12 +2510,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 	if (unlikely(!(dev->flags & IFF_UP)))
 		goto out_put;
 
-	reserve = dev->hard_header_len + VLAN_HLEN;
+	if (po->sk.sk_socket->type == SOCK_RAW)
+		reserve = dev->hard_header_len;
 	size_max = po->tx_ring.frame_size
 		- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
 
-	if (size_max > dev->mtu + reserve)
-		size_max = dev->mtu + reserve;
+	if (size_max > dev->mtu + reserve + VLAN_HLEN)
+		size_max = dev->mtu + reserve + VLAN_HLEN;
 
 	do {
 		ph = packet_current_frame(po, &po->tx_ring,
@@ -2542,7 +2543,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
 		tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
 					  addr, hlen);
 		if (likely(tp_len >= 0) &&
-		    tp_len > dev->mtu + dev->hard_header_len &&
+		    tp_len > dev->mtu + reserve &&
 		    !packet_extra_vlan_len_allowed(dev, skb))
 			tp_len = -EMSGSIZE;
 

From 176cec782335b95d7830bce4a7e6a1fe0b644594 Mon Sep 17 00:00:00 2001
From: lucien <lucien.xin@gmail.com>
Date: Thu, 12 Nov 2015 13:07:07 +0800
Subject: [PATCH 147/201] sctp: translate host order to network order when
 setting a hmacid

[ Upstream commit ed5a377d87dc4c87fb3e1f7f698cba38cd893103 ]

now sctp auth cannot work well when setting a hmacid manually, which
is caused by that we didn't use the network order for hmacid, so fix
it by adding the transformation in sctp_auth_ep_set_hmacs.

even we set hmacid with the network order in userspace, it still
can't work, because of this condition in sctp_auth_ep_set_hmacs():

		if (id > SCTP_AUTH_HMAC_ID_MAX)
			return -EOPNOTSUPP;

so this wasn't working before and thus it won't break compatibility.

Fixes: 65b07e5d0d09 ("[SCTP]: API updates to suport SCTP-AUTH extensions.")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Vlad Yasevich <vyasevich@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sctp/auth.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index 4f15b7d730e13..1543e39f47c33 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -809,8 +809,8 @@ int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep,
 	if (!has_sha1)
 		return -EINVAL;
 
-	memcpy(ep->auth_hmacs_list->hmac_ids, &hmacs->shmac_idents[0],
-		hmacs->shmac_num_idents * sizeof(__u16));
+	for (i = 0; i < hmacs->shmac_num_idents; i++)
+		ep->auth_hmacs_list->hmac_ids[i] = htons(hmacs->shmac_idents[i]);
 	ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) +
 				hmacs->shmac_num_idents * sizeof(__u16));
 	return 0;

From 713a813ed2fab144e436a9a3e64445a441d9846d Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Thu, 12 Nov 2015 19:35:26 +0200
Subject: [PATCH 148/201] net/mlx5e: Added self loopback prevention

[ Upstream commit 66189961e986e53ae39822898fc2ce88f44c61bb ]

Prevent outgoing multicast frames from looping back to the RX queue.

By introducing new HW capability self_lb_en_modifiable, which indicates
the support to modify self_lb_en bit in modify_tir command.

When this capability is set we can prevent TIRs from sending back
loopback multicast traffic to their own RQs, by "refreshing TIRs" with
modify_tir command, on every time new channels (SQs/RQs) are created at
device open.
This is needed since TIRs are static and only allocated once on driver
load, and the loopback decision is under their responsibility.

Fixes issues of the kind:
"IPv6: eth2: IPv6 duplicate address fe80::e61d:2dff:fe5c:f2e9 detected!"
The issue is seen since the IPv6 solicitations multicast messages are
loopedback and the network stack thinks they are coming from another host.

Fixes: 5c50368f3831 ("net/mlx5e: Light-weight netdev open/stop")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../net/ethernet/mellanox/mlx5/core/en_main.c | 56 ++++++++++++++++++-
 include/linux/mlx5/mlx5_ifc.h                 | 24 ++++----
 2 files changed, 68 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 59874d666cfff..443632df20101 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1332,6 +1332,42 @@ static int mlx5e_modify_tir_lro(struct mlx5e_priv *priv, int tt)
 	return err;
 }
 
+static int mlx5e_refresh_tir_self_loopback_enable(struct mlx5_core_dev *mdev,
+						  u32 tirn)
+{
+	void *in;
+	int inlen;
+	int err;
+
+	inlen = MLX5_ST_SZ_BYTES(modify_tir_in);
+	in = mlx5_vzalloc(inlen);
+	if (!in)
+		return -ENOMEM;
+
+	MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
+
+	err = mlx5_core_modify_tir(mdev, tirn, in, inlen);
+
+	kvfree(in);
+
+	return err;
+}
+
+static int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5e_priv *priv)
+{
+	int err;
+	int i;
+
+	for (i = 0; i < MLX5E_NUM_TT; i++) {
+		err = mlx5e_refresh_tir_self_loopback_enable(priv->mdev,
+							     priv->tirn[i]);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int mlx5e_set_dev_port_mtu(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -1367,13 +1403,20 @@ int mlx5e_open_locked(struct net_device *netdev)
 
 	err = mlx5e_set_dev_port_mtu(netdev);
 	if (err)
-		return err;
+		goto err_clear_state_opened_flag;
 
 	err = mlx5e_open_channels(priv);
 	if (err) {
 		netdev_err(netdev, "%s: mlx5e_open_channels failed, %d\n",
 			   __func__, err);
-		return err;
+		goto err_clear_state_opened_flag;
+	}
+
+	err = mlx5e_refresh_tirs_self_loopback_enable(priv);
+	if (err) {
+		netdev_err(netdev, "%s: mlx5e_refresh_tirs_self_loopback_enable failed, %d\n",
+			   __func__, err);
+		goto err_close_channels;
 	}
 
 	mlx5e_update_carrier(priv);
@@ -1382,6 +1425,12 @@ int mlx5e_open_locked(struct net_device *netdev)
 	schedule_delayed_work(&priv->update_stats_work, 0);
 
 	return 0;
+
+err_close_channels:
+	mlx5e_close_channels(priv);
+err_clear_state_opened_flag:
+	clear_bit(MLX5E_STATE_OPENED, &priv->state);
+	return err;
 }
 
 static int mlx5e_open(struct net_device *netdev)
@@ -1899,6 +1948,9 @@ static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
 			       "Not creating net device, some required device capabilities are missing\n");
 		return -ENOTSUPP;
 	}
+	if (!MLX5_CAP_ETH(mdev, self_lb_en_modifiable))
+		mlx5_core_warn(mdev, "Self loop back prevention is not supported\n");
+
 	return 0;
 }
 
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index dd2097455a2e3..1565324eb620c 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -453,26 +453,28 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
 	u8         lro_cap[0x1];
 	u8         lro_psh_flag[0x1];
 	u8         lro_time_stamp[0x1];
-	u8         reserved_0[0x6];
+	u8         reserved_0[0x3];
+	u8         self_lb_en_modifiable[0x1];
+	u8         reserved_1[0x2];
 	u8         max_lso_cap[0x5];
-	u8         reserved_1[0x4];
+	u8         reserved_2[0x4];
 	u8         rss_ind_tbl_cap[0x4];
-	u8         reserved_2[0x3];
+	u8         reserved_3[0x3];
 	u8         tunnel_lso_const_out_ip_id[0x1];
-	u8         reserved_3[0x2];
+	u8         reserved_4[0x2];
 	u8         tunnel_statless_gre[0x1];
 	u8         tunnel_stateless_vxlan[0x1];
 
-	u8         reserved_4[0x20];
+	u8         reserved_5[0x20];
 
-	u8         reserved_5[0x10];
+	u8         reserved_6[0x10];
 	u8         lro_min_mss_size[0x10];
 
-	u8         reserved_6[0x120];
+	u8         reserved_7[0x120];
 
 	u8         lro_timer_supported_periods[4][0x20];
 
-	u8         reserved_7[0x600];
+	u8         reserved_8[0x600];
 };
 
 struct mlx5_ifc_roce_cap_bits {
@@ -4051,9 +4053,11 @@ struct mlx5_ifc_modify_tis_in_bits {
 };
 
 struct mlx5_ifc_modify_tir_bitmask_bits {
-	u8	   reserved[0x20];
+	u8	   reserved_0[0x20];
 
-	u8         reserved1[0x1f];
+	u8         reserved_1[0x1b];
+	u8         self_lb_en[0x1];
+	u8         reserved_2[0x3];
 	u8         lro[0x1];
 };
 

From 289defb1e093535624376d7098c5364f8804fdc7 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Thu, 12 Nov 2015 19:35:29 +0200
Subject: [PATCH 149/201] net/mlx4_core: Fix sleeping while holding spinlock at
 rem_slave_counters

[ Upstream commit f5adbfee72282bb1f456d52b04adacd4fe6ac502 ]

When cleaning slave's counter resources, we hold a spinlock that
protects the slave's counters list. As part of the clean, we call
__mlx4_clear_if_stat which calls mlx4_alloc_cmd_mailbox which is a
sleepable function.

In order to fix this issue, hold the spinlock, and copy all counter
indices into a temporary array, and release the spinlock. Afterwards,
iterate over this array and free every counter. Repeat this scenario
until the original list is empty (a new counter might have been added
while releasing the counters from the temporary array).

Fixes: b72ca7e96acf ("net/mlx4_core: Reset counters data when freed")
Reported-by: Moni Shoua <monis@mellanox.com>
Tested-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../ethernet/mellanox/mlx4/resource_tracker.c | 39 +++++++++++++------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 731423ca575da..8bead97373abd 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -4934,26 +4934,41 @@ static void rem_slave_counters(struct mlx4_dev *dev, int slave)
 	struct res_counter *counter;
 	struct res_counter *tmp;
 	int err;
-	int index;
+	int *counters_arr = NULL;
+	int i, j;
 
 	err = move_all_busy(dev, slave, RES_COUNTER);
 	if (err)
 		mlx4_warn(dev, "rem_slave_counters: Could not move all counters - too busy for slave %d\n",
 			  slave);
 
-	spin_lock_irq(mlx4_tlock(dev));
-	list_for_each_entry_safe(counter, tmp, counter_list, com.list) {
-		if (counter->com.owner == slave) {
-			index = counter->com.res_id;
-			rb_erase(&counter->com.node,
-				 &tracker->res_tree[RES_COUNTER]);
-			list_del(&counter->com.list);
-			kfree(counter);
-			__mlx4_counter_free(dev, index);
+	counters_arr = kmalloc_array(dev->caps.max_counters,
+				     sizeof(*counters_arr), GFP_KERNEL);
+	if (!counters_arr)
+		return;
+
+	do {
+		i = 0;
+		j = 0;
+		spin_lock_irq(mlx4_tlock(dev));
+		list_for_each_entry_safe(counter, tmp, counter_list, com.list) {
+			if (counter->com.owner == slave) {
+				counters_arr[i++] = counter->com.res_id;
+				rb_erase(&counter->com.node,
+					 &tracker->res_tree[RES_COUNTER]);
+				list_del(&counter->com.list);
+				kfree(counter);
+			}
+		}
+		spin_unlock_irq(mlx4_tlock(dev));
+
+		while (j < i) {
+			__mlx4_counter_free(dev, counters_arr[j++]);
 			mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0);
 		}
-	}
-	spin_unlock_irq(mlx4_tlock(dev));
+	} while (i);
+
+	kfree(counters_arr);
 }
 
 static void rem_slave_xrcdns(struct mlx4_dev *dev, int slave)

From 5c81f50e7fe60ed1d0820820df63f9193c6d5d0b Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Thu, 12 Nov 2015 17:35:58 +0100
Subject: [PATCH 150/201] ip_tunnel: disable preemption when updating per-cpu
 tstats

[ Upstream commit b4fe85f9c9146f60457e9512fb6055e69e6a7a65 ]

Drivers like vxlan use the recently introduced
udp_tunnel_xmit_skb/udp_tunnel6_xmit_skb APIs. udp_tunnel6_xmit_skb
makes use of ip6tunnel_xmit, and ip6tunnel_xmit, after sending the
packet, updates the struct stats using the usual
u64_stats_update_begin/end calls on this_cpu_ptr(dev->tstats).
udp_tunnel_xmit_skb makes use of iptunnel_xmit, which doesn't touch
tstats, so drivers like vxlan, immediately after, call
iptunnel_xmit_stats, which does the same thing - calls
u64_stats_update_begin/end on this_cpu_ptr(dev->tstats).

While vxlan is probably fine (I don't know?), calling a similar function
from, say, an unbound workqueue, on a fully preemptable kernel causes
real issues:

[  188.434537] BUG: using smp_processor_id() in preemptible [00000000] code: kworker/u8:0/6
[  188.435579] caller is debug_smp_processor_id+0x17/0x20
[  188.435583] CPU: 0 PID: 6 Comm: kworker/u8:0 Not tainted 4.2.6 #2
[  188.435607] Call Trace:
[  188.435611]  [<ffffffff8234e936>] dump_stack+0x4f/0x7b
[  188.435615]  [<ffffffff81915f3d>] check_preemption_disabled+0x19d/0x1c0
[  188.435619]  [<ffffffff81915f77>] debug_smp_processor_id+0x17/0x20

The solution would be to protect the whole
this_cpu_ptr(dev->tstats)/u64_stats_update_begin/end blocks with
disabling preemption and then reenabling it.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/ip6_tunnel.h | 3 ++-
 include/net/ip_tunnels.h | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index fa915fa0f703d..d49a8f8fae459 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -90,11 +90,12 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb,
 	err = ip6_local_out_sk(sk, skb);
 
 	if (net_xmit_eval(err) == 0) {
-		struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
+		struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats);
 		u64_stats_update_begin(&tstats->syncp);
 		tstats->tx_bytes += pkt_len;
 		tstats->tx_packets++;
 		u64_stats_update_end(&tstats->syncp);
+		put_cpu_ptr(tstats);
 	} else {
 		stats->tx_errors++;
 		stats->tx_aborted_errors++;
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index f6dafec9102c5..62a750a6a8f8c 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -287,12 +287,13 @@ static inline void iptunnel_xmit_stats(int err,
 				       struct pcpu_sw_netstats __percpu *stats)
 {
 	if (err > 0) {
-		struct pcpu_sw_netstats *tstats = this_cpu_ptr(stats);
+		struct pcpu_sw_netstats *tstats = get_cpu_ptr(stats);
 
 		u64_stats_update_begin(&tstats->syncp);
 		tstats->tx_bytes += err;
 		tstats->tx_packets++;
 		u64_stats_update_end(&tstats->syncp);
+		put_cpu_ptr(tstats);
 	} else if (err < 0) {
 		err_stats->tx_errors++;
 		err_stats->tx_aborted_errors++;

From 5eceaad8488b24f98ada6d5dfd5a6ec2df875d07 Mon Sep 17 00:00:00 2001
From: Dragos Tatulea <dragos@endocode.com>
Date: Mon, 16 Nov 2015 10:52:48 +0100
Subject: [PATCH 151/201] net: switchdev: fix return code of fdb_dump stub

[ Upstream commit 24cb7055a3066634a0f3fa0cd6a4780652905d35 ]

rtnl_fdb_dump always expects an index to be returned by the ndo_fdb_dump op,
but when CONFIG_NET_SWITCHDEV is off, it returns an error.

Fix that by returning the given unmodified idx.

A similar fix was 0890cf6cb6ab ("switchdev: fix return value of
switchdev_port_fdb_dump in case of error") but for the CONFIG_NET_SWITCHDEV=y
case.

Fixes: 45d4122ca7cd ("switchdev: add support for fdb add/del/dump via switchdev_port_obj ops.")
Signed-off-by: Dragos Tatulea <dragos@endocode.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/switchdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 319baab3b48ef..731c40e34bf25 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -272,7 +272,7 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb,
 					  struct net_device *filter_dev,
 					  int idx)
 {
-	return -EOPNOTSUPP;
+       return idx;
 }
 
 static inline void switchdev_port_fwd_mark_set(struct net_device *dev,

From ce2ceca37c846767abceee497f8adbb3328a8dd9 Mon Sep 17 00:00:00 2001
From: Pavel Fedin <p.fedin@samsung.com>
Date: Mon, 16 Nov 2015 17:51:34 +0300
Subject: [PATCH 152/201] net: thunder: Check for driver data in nicvf_remove()

[ Upstream commit 7750130d93decff06120df0d8ea024ff8a038a21 ]

In some cases the crash is caused by nicvf_remove() being called from
outside. For example, if we try to feed the device to vfio after the
probe has failed for some reason. So, move the check to better place.

Signed-off-by: Pavel Fedin <p.fedin@samsung.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/cavium/thunder/nicvf_main.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index a9377727c11c3..7f709cbdcd87d 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -1583,8 +1583,14 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 static void nicvf_remove(struct pci_dev *pdev)
 {
 	struct net_device *netdev = pci_get_drvdata(pdev);
-	struct nicvf *nic = netdev_priv(netdev);
-	struct net_device *pnetdev = nic->pnicvf->netdev;
+	struct nicvf *nic;
+	struct net_device *pnetdev;
+
+	if (!netdev)
+		return;
+
+	nic = netdev_priv(netdev);
+	pnetdev = nic->pnicvf->netdev;
 
 	/* Check if this Qset is assigned to different VF.
 	 * If yes, clean primary and all secondary Qsets.

From 3f7a8d274e209d0e5dba464618a78c37ec00e899 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 16 Nov 2015 13:09:10 -0500
Subject: [PATCH 153/201] snmp: Remove duplicate OUTMCAST stat increment

[ Upstream commit 41033f029e393a64e81966cbe34d66c6cf8a2e7e ]

the OUTMCAST stat is double incremented, getting bumped once in the mcast code
itself, and again in the common ip output path.  Remove the mcast bump, as its
not needed

Validated by the reporter, with good results

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Reported-by: Claus Jensen <claus.jensen@microsemi.com>
CC: Claus Jensen <claus.jensen@microsemi.com>
CC: David Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/mcast.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 083b2927fc67a..41e3b5ee8d0b6 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1651,7 +1651,6 @@ static void mld_sendpack(struct sk_buff *skb)
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
-		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
 	} else {
 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 	}
@@ -2014,7 +2013,6 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(net, idev, type);
 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
-		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, full_len);
 	} else
 		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
 

From d603fa768b63905317e9cf2cfb647ddff03d9715 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 18 Nov 2015 16:40:19 +0100
Subject: [PATCH 154/201] net/ip6_tunnel: fix dst leak

[ Upstream commit 206b49500df558dbc15d8836b09f6397ec5ed8bb ]

the commit cdf3464e6c6b ("ipv6: Fix dst_entry refcnt bugs in ip6_tunnel")
introduced percpu storage for ip6_tunnel dst cache, but while clearing
such cache it used raw_cpu_ptr to walk the per cpu entries, so cached
dst on non current cpu are not actually reset.

This patch replaces raw_cpu_ptr with per_cpu_ptr, properly cleaning
such storage.

Fixes: cdf3464e6c6b ("ipv6: Fix dst_entry refcnt bugs in ip6_tunnel")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/ip6_tunnel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index eabffbb89795d..137fca42aaa6b 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -177,7 +177,7 @@ void ip6_tnl_dst_reset(struct ip6_tnl *t)
 	int i;
 
 	for_each_possible_cpu(i)
-		ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), NULL);
+		ip6_tnl_per_cpu_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
 }
 EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset);
 

From e8afa3cd3e6dfe713e0d7f8ca8062aea50a14725 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= <bjorn@mork.no>
Date: Wed, 18 Nov 2015 21:13:07 +0100
Subject: [PATCH 155/201] net: qmi_wwan: add XS Stick W100-2 from 4G Systems
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 68242a5a1e2edce39b069385cbafb82304eac0f1 ]

Thomas reports
"
4gsystems sells two total different LTE-surfsticks under the same name.
..
The newer version of XS Stick W100 is from "omega"
..
Under windows the driver switches to the same ID, and uses MI03\6 for
network and MI01\6 for modem.
..
echo "1c9e 9b01" > /sys/bus/usb/drivers/qmi_wwan/new_id
echo "1c9e 9b01" > /sys/bus/usb-serial/drivers/option1/new_id

T:  Bus=01 Lev=01 Prnt=01 Port=03 Cnt=01 Dev#=  4 Spd=480 MxCh= 0
D:  Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs=  1
P:  Vendor=1c9e ProdID=9b01 Rev=02.32
S:  Manufacturer=USB Modem
S:  Product=USB Modem
S:  SerialNumber=
C:  #Ifs= 5 Cfg#= 1 Atr=80 MxPwr=500mA
I:  If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
I:  If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
I:  If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
I:  If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan
I:  If#= 4 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage

Now all important things are there:

wwp0s29f7u2i3 (net), ttyUSB2 (at), cdc-wdm0 (qmi), ttyUSB1 (at)

There is also ttyUSB0, but it is not usable, at least not for at.

The device works well with qmi and ModemManager-NetworkManager.
"

Reported-by: Thomas Schäfer <tschaefer@t-online.de>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/qmi_wwan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 2a7c1be23c4f2..66e0853d1680d 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -775,6 +775,7 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x2357, 0x9000, 4)},	/* TP-LINK MA260 */
 	{QMI_FIXED_INTF(0x1bc7, 0x1200, 5)},	/* Telit LE920 */
 	{QMI_FIXED_INTF(0x1bc7, 0x1201, 2)},	/* Telit LE920 */
+	{QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)},	/* XS Stick W100-2 from 4G Systems */
 	{QMI_FIXED_INTF(0x0b3c, 0xc000, 4)},	/* Olivetti Olicard 100 */
 	{QMI_FIXED_INTF(0x0b3c, 0xc001, 4)},	/* Olivetti Olicard 120 */
 	{QMI_FIXED_INTF(0x0b3c, 0xc002, 4)},	/* Olivetti Olicard 140 */

From 4ef5bc914bbd40811ebc0514d84175d303fb8b4f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 18 Nov 2015 12:40:13 -0800
Subject: [PATCH 156/201] tcp: md5: fix lockdep annotation

[ Upstream commit 1b8e6a01e19f001e9f93b39c32387961c91ed3cc ]

When a passive TCP is created, we eventually call tcp_md5_do_add()
with sk pointing to the child. It is not owner by the user yet (we
will add this socket into listener accept queue a bit later anyway)

But we do own the spinlock, so amend the lockdep annotation to avoid
following splat :

[ 8451.090932] net/ipv4/tcp_ipv4.c:923 suspicious rcu_dereference_protected() usage!
[ 8451.090932]
[ 8451.090932] other info that might help us debug this:
[ 8451.090932]
[ 8451.090934]
[ 8451.090934] rcu_scheduler_active = 1, debug_locks = 1
[ 8451.090936] 3 locks held by socket_sockopt_/214795:
[ 8451.090936]  #0:  (rcu_read_lock){.+.+..}, at: [<ffffffff855c6ac1>] __netif_receive_skb_core+0x151/0xe90
[ 8451.090947]  #1:  (rcu_read_lock){.+.+..}, at: [<ffffffff85618143>] ip_local_deliver_finish+0x43/0x2b0
[ 8451.090952]  #2:  (slock-AF_INET){+.-...}, at: [<ffffffff855acda5>] sk_clone_lock+0x1c5/0x500
[ 8451.090958]
[ 8451.090958] stack backtrace:
[ 8451.090960] CPU: 7 PID: 214795 Comm: socket_sockopt_

[ 8451.091215] Call Trace:
[ 8451.091216]  <IRQ>  [<ffffffff856fb29c>] dump_stack+0x55/0x76
[ 8451.091229]  [<ffffffff85123b5b>] lockdep_rcu_suspicious+0xeb/0x110
[ 8451.091235]  [<ffffffff8564544f>] tcp_md5_do_add+0x1bf/0x1e0
[ 8451.091239]  [<ffffffff85645751>] tcp_v4_syn_recv_sock+0x1f1/0x4c0
[ 8451.091242]  [<ffffffff85642b27>] ? tcp_v4_md5_hash_skb+0x167/0x190
[ 8451.091246]  [<ffffffff85647c78>] tcp_check_req+0x3c8/0x500
[ 8451.091249]  [<ffffffff856451ae>] ? tcp_v4_inbound_md5_hash+0x11e/0x190
[ 8451.091253]  [<ffffffff85647170>] tcp_v4_rcv+0x3c0/0x9f0
[ 8451.091256]  [<ffffffff85618143>] ? ip_local_deliver_finish+0x43/0x2b0
[ 8451.091260]  [<ffffffff856181b6>] ip_local_deliver_finish+0xb6/0x2b0
[ 8451.091263]  [<ffffffff85618143>] ? ip_local_deliver_finish+0x43/0x2b0
[ 8451.091267]  [<ffffffff85618d38>] ip_local_deliver+0x48/0x80
[ 8451.091270]  [<ffffffff85618510>] ip_rcv_finish+0x160/0x700
[ 8451.091273]  [<ffffffff8561900e>] ip_rcv+0x29e/0x3d0
[ 8451.091277]  [<ffffffff855c74b7>] __netif_receive_skb_core+0xb47/0xe90

Fixes: a8afca0329988 ("tcp: md5: protects md5sig_info with RCU")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_ipv4.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 93898e093d4e6..a7739c83aa84a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -922,7 +922,8 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
 	}
 
 	md5sig = rcu_dereference_protected(tp->md5sig_info,
-					   sock_owned_by_user(sk));
+					   sock_owned_by_user(sk) ||
+					   lockdep_is_held(&sk->sk_lock.slock));
 	if (!md5sig) {
 		md5sig = kmalloc(sizeof(*md5sig), gfp);
 		if (!md5sig)

From 6f5aa0f4aa406332521bde78e2b6290886f3961a Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 18 Nov 2015 18:17:30 -0800
Subject: [PATCH 157/201] tcp: disable Fast Open on timeouts after handshake

[ Upstream commit 0e45f4da5981895e885dd72fe912a3f8e32bae73 ]

Some middle-boxes black-hole the data after the Fast Open handshake
(https://www.ietf.org/proceedings/94/slides/slides-94-tcpm-13.pdf).
The exact reason is unknown. The work-around is to disable Fast Open
temporarily after multiple recurring timeouts with few or no data
delivered in the established state.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_timer.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 7149ebc820c7d..04f0a052b524d 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -176,6 +176,18 @@ static int tcp_write_timeout(struct sock *sk)
 		syn_set = true;
 	} else {
 		if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) {
+			/* Some middle-boxes may black-hole Fast Open _after_
+			 * the handshake. Therefore we conservatively disable
+			 * Fast Open on this path on recurring timeouts with
+			 * few or zero bytes acked after Fast Open.
+			 */
+			if (tp->syn_data_acked &&
+			    tp->bytes_acked <= tp->rx_opt.mss_clamp) {
+				tcp_fastopen_cache_set(sk, 0, NULL, true, 0);
+				if (icsk->icsk_retransmits == sysctl_tcp_retries1)
+					NET_INC_STATS_BH(sock_net(sk),
+							 LINUX_MIB_TCPFASTOPENACTIVEFAIL);
+			}
 			/* Black hole detection */
 			tcp_mtu_probing(icsk, sk);
 

From 11617ee108ae5bccaf3064d2304b8608874c85f9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 18 Nov 2015 21:03:33 -0800
Subject: [PATCH 158/201] tcp: fix potential huge kmalloc() calls in TCP_REPAIR

[ Upstream commit 5d4c9bfbabdb1d497f21afd81501e5c54b0c85d9 ]

tcp_send_rcvq() is used for re-injecting data into tcp receive queue.

Problems :

- No check against size is performed, allowed user to fool kernel in
  attempting very large memory allocations, eventually triggering
  OOM when memory is fragmented.

- In case of fault during the copy we do not return correct errno.

Lets use alloc_skb_with_frags() to cook optimal skbs.

Fixes: 292e8d8c8538 ("tcp: Move rcvq sending to tcp_input.c")
Fixes: c0e88ff0f256 ("tcp: Repair socket queues")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Acked-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_input.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a8f515bb19c4b..cc6bd43f7f89f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4457,19 +4457,34 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
 int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
 {
 	struct sk_buff *skb;
+	int err = -ENOMEM;
+	int data_len = 0;
 	bool fragstolen;
 
 	if (size == 0)
 		return 0;
 
-	skb = alloc_skb(size, sk->sk_allocation);
+	if (size > PAGE_SIZE) {
+		int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS);
+
+		data_len = npages << PAGE_SHIFT;
+		size = data_len + (size & ~PAGE_MASK);
+	}
+	skb = alloc_skb_with_frags(size - data_len, data_len,
+				   PAGE_ALLOC_COSTLY_ORDER,
+				   &err, sk->sk_allocation);
 	if (!skb)
 		goto err;
 
+	skb_put(skb, size - data_len);
+	skb->data_len = data_len;
+	skb->len = size;
+
 	if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
 		goto err_free;
 
-	if (memcpy_from_msg(skb_put(skb, size), msg, size))
+	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
+	if (err)
 		goto err_free;
 
 	TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
@@ -4485,7 +4500,8 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
 err_free:
 	kfree_skb(skb);
 err:
-	return -ENOMEM;
+	return err;
+
 }
 
 static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)

From b2d8d14ba2c37e3ff2d1267d4a32a32365859b8b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 26 Nov 2015 08:18:14 -0800
Subject: [PATCH 159/201] tcp: initialize tp->copied_seq in case of cross SYN
 connection

[ Upstream commit 142a2e7ece8d8ac0e818eb2c91f99ca894730e2a ]

Dmitry provided a syzkaller (http://github.com/google/syzkaller)
generated program that triggers the WARNING at
net/ipv4/tcp.c:1729 in tcp_recvmsg() :

WARN_ON(tp->copied_seq != tp->rcv_nxt &&
        !(flags & (MSG_PEEK | MSG_TRUNC)));

His program is specifically attempting a Cross SYN TCP exchange,
that we support (for the pleasure of hackers ?), but it looks we
lack proper tcp->copied_seq initialization.

Thanks again Dmitry for your report and testings.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Tested-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_input.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index cc6bd43f7f89f..0a2b61dbcd4e8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5659,6 +5659,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		}
 
 		tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
+		tp->copied_seq = tp->rcv_nxt;
 		tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
 
 		/* RFC1323: The window in SYN & SYN/ACK segments is

From 609d60122e815cdad20f887de8d2d60d989ef2cb Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 20 Nov 2015 00:11:56 +0100
Subject: [PATCH 160/201] net, scm: fix PaX detected msg_controllen overflow in
 scm_detach_fds

[ Upstream commit 6900317f5eff0a7070c5936e5383f589e0de7a09 ]

David and HacKurx reported a following/similar size overflow triggered
in a grsecurity kernel, thanks to PaX's gcc size overflow plugin:

(Already fixed in later grsecurity versions by Brad and PaX Team.)

[ 1002.296137] PAX: size overflow detected in function scm_detach_fds net/core/scm.c:314
               cicus.202_127 min, count: 4, decl: msg_controllen; num: 0; context: msghdr;
[ 1002.296145] CPU: 0 PID: 3685 Comm: scm_rights_recv Not tainted 4.2.3-grsec+ #7
[ 1002.296149] Hardware name: Apple Inc. MacBookAir5,1/Mac-66F35F19FE2A0D05, [...]
[ 1002.296153]  ffffffff81c27366 0000000000000000 ffffffff81c27375 ffffc90007843aa8
[ 1002.296162]  ffffffff818129ba 0000000000000000 ffffffff81c27366 ffffc90007843ad8
[ 1002.296169]  ffffffff8121f838 fffffffffffffffc fffffffffffffffc ffffc90007843e60
[ 1002.296176] Call Trace:
[ 1002.296190]  [<ffffffff818129ba>] dump_stack+0x45/0x57
[ 1002.296200]  [<ffffffff8121f838>] report_size_overflow+0x38/0x60
[ 1002.296209]  [<ffffffff816a979e>] scm_detach_fds+0x2ce/0x300
[ 1002.296220]  [<ffffffff81791899>] unix_stream_read_generic+0x609/0x930
[ 1002.296228]  [<ffffffff81791c9f>] unix_stream_recvmsg+0x4f/0x60
[ 1002.296236]  [<ffffffff8178dc00>] ? unix_set_peek_off+0x50/0x50
[ 1002.296243]  [<ffffffff8168fac7>] sock_recvmsg+0x47/0x60
[ 1002.296248]  [<ffffffff81691522>] ___sys_recvmsg+0xe2/0x1e0
[ 1002.296257]  [<ffffffff81693496>] __sys_recvmsg+0x46/0x80
[ 1002.296263]  [<ffffffff816934fc>] SyS_recvmsg+0x2c/0x40
[ 1002.296271]  [<ffffffff8181a3ab>] entry_SYSCALL_64_fastpath+0x12/0x85

Further investigation showed that this can happen when an *odd* number of
fds are being passed over AF_UNIX sockets.

In these cases CMSG_LEN(i * sizeof(int)) and CMSG_SPACE(i * sizeof(int)),
where i is the number of successfully passed fds, differ by 4 bytes due
to the extra CMSG_ALIGN() padding in CMSG_SPACE() to an 8 byte boundary
on 64 bit. The padding is used to align subsequent cmsg headers in the
control buffer.

When the control buffer passed in from the receiver side *lacks* these 4
bytes (e.g. due to buggy/wrong API usage), then msg->msg_controllen will
overflow in scm_detach_fds():

  int cmlen = CMSG_LEN(i * sizeof(int));  <--- cmlen w/o tail-padding
  err = put_user(SOL_SOCKET, &cm->cmsg_level);
  if (!err)
    err = put_user(SCM_RIGHTS, &cm->cmsg_type);
  if (!err)
    err = put_user(cmlen, &cm->cmsg_len);
  if (!err) {
    cmlen = CMSG_SPACE(i * sizeof(int));  <--- cmlen w/ 4 byte extra tail-padding
    msg->msg_control += cmlen;
    msg->msg_controllen -= cmlen;         <--- iff no tail-padding space here ...
  }                                            ... wrap-around

F.e. it will wrap to a length of 18446744073709551612 bytes in case the
receiver passed in msg->msg_controllen of 20 bytes, and the sender
properly transferred 1 fd to the receiver, so that its CMSG_LEN results
in 20 bytes and CMSG_SPACE in 24 bytes.

In case of MSG_CMSG_COMPAT (scm_detach_fds_compat()), I haven't seen an
issue in my tests as alignment seems always on 4 byte boundary. Same
should be in case of native 32 bit, where we end up with 4 byte boundaries
as well.

In practice, passing msg->msg_controllen of 20 to recvmsg() while receiving
a single fd would mean that on successful return, msg->msg_controllen is
being set by the kernel to 24 bytes instead, thus more than the input
buffer advertised. It could f.e. become an issue if such application later
on zeroes or copies the control buffer based on the returned msg->msg_controllen
elsewhere.

Maximum number of fds we can send is a hard upper limit SCM_MAX_FD (253).

Going over the code, it seems like msg->msg_controllen is not being read
after scm_detach_fds() in scm_recv() anymore by the kernel, good!

Relevant recvmsg() handler are unix_dgram_recvmsg() (unix_seqpacket_recvmsg())
and unix_stream_recvmsg(). Both return back to their recvmsg() caller,
and ___sys_recvmsg() places the updated length, that is, new msg_control -
old msg_control pointer into msg->msg_controllen (hence the 24 bytes seen
in the example).

Long time ago, Wei Yongjun fixed something related in commit 1ac70e7ad24a
("[NET]: Fix function put_cmsg() which may cause usr application memory
overflow").

RFC3542, section 20.2. says:

  The fields shown as "XX" are possible padding, between the cmsghdr
  structure and the data, and between the data and the next cmsghdr
  structure, if required by the implementation. While sending an
  application may or may not include padding at the end of last
  ancillary data in msg_controllen and implementations must accept both
  as valid. On receiving a portable application must provide space for
  padding at the end of the last ancillary data as implementations may
  copy out the padding at the end of the control message buffer and
  include it in the received msg_controllen. When recvmsg() is called
  if msg_controllen is too small for all the ancillary data items
  including any trailing padding after the last item an implementation
  may set MSG_CTRUNC.

Since we didn't place MSG_CTRUNC for already quite a long time, just do
the same as in 1ac70e7ad24a to avoid an overflow.

Btw, even man-page author got this wrong :/ See db939c9b26e9 ("cmsg.3: Fix
error in SCM_RIGHTS code sample"). Some people must have copied this (?),
thus it got triggered in the wild (reported several times during boot by
David and HacKurx).

No Fixes tag this time as pre 2002 (that is, pre history tree).

Reported-by: David Sterba <dave@jikos.cz>
Reported-by: HacKurx <hackurx@gmail.com>
Cc: PaX Team <pageexec@freemail.hu>
Cc: Emese Revfy <re.emese@gmail.com>
Cc: Brad Spengler <spender@grsecurity.net>
Cc: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
Cc: Eric Dumazet <edumazet@google.com>
Reviewed-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/scm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/core/scm.c b/net/core/scm.c
index 3b6899b7d810d..8a1741b14302b 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -305,6 +305,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
 			err = put_user(cmlen, &cm->cmsg_len);
 		if (!err) {
 			cmlen = CMSG_SPACE(i*sizeof(int));
+			if (msg->msg_controllen < cmlen)
+				cmlen = msg->msg_controllen;
 			msg->msg_control += cmlen;
 			msg->msg_controllen -= cmlen;
 		}

From 47f706657d85e1fbfb2e5ed8c7b90ff880bd4d6e Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Fri, 20 Nov 2015 13:54:19 +0100
Subject: [PATCH 161/201] net: ipmr: fix static mfc/dev leaks on table
 destruction

[ Upstream commit 0e615e9601a15efeeb8942cf7cd4dadba0c8c5a7 ]

When destroying an mrt table the static mfc entries and the static
devices are kept, which leads to devices that can never be destroyed
(because of refcnt taken) and leaked memory, for example:
unreferenced object 0xffff880034c144c0 (size 192):
  comm "mfc-broken", pid 4777, jiffies 4320349055 (age 46001.964s)
  hex dump (first 32 bytes):
    98 53 f0 34 00 88 ff ff 98 53 f0 34 00 88 ff ff  .S.4.....S.4....
    ef 0a 0a 14 01 02 03 04 00 00 00 00 01 00 00 00  ................
  backtrace:
    [<ffffffff815c1b9e>] kmemleak_alloc+0x4e/0xb0
    [<ffffffff811ea6e0>] kmem_cache_alloc+0x190/0x300
    [<ffffffff815931cb>] ip_mroute_setsockopt+0x5cb/0x910
    [<ffffffff8153d575>] do_ip_setsockopt.isra.11+0x105/0xff0
    [<ffffffff8153e490>] ip_setsockopt+0x30/0xa0
    [<ffffffff81564e13>] raw_setsockopt+0x33/0x90
    [<ffffffff814d1e14>] sock_common_setsockopt+0x14/0x20
    [<ffffffff814d0b51>] SyS_setsockopt+0x71/0xc0
    [<ffffffff815cdbf6>] entry_SYSCALL_64_fastpath+0x16/0x7a
    [<ffffffffffffffff>] 0xffffffffffffffff

Make sure that everything is cleaned on netns destruction.

Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Reviewed-by: Cong Wang <cwang@twopensource.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/ipmr.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 8e8203d5c520a..ef7e2c4342cbb 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -134,7 +134,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 			      struct mfc_cache *c, struct rtmsg *rtm);
 static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
 				 int cmd);
-static void mroute_clean_tables(struct mr_table *mrt);
+static void mroute_clean_tables(struct mr_table *mrt, bool all);
 static void ipmr_expire_process(unsigned long arg);
 
 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
@@ -350,7 +350,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
 static void ipmr_free_table(struct mr_table *mrt)
 {
 	del_timer_sync(&mrt->ipmr_expire_timer);
-	mroute_clean_tables(mrt);
+	mroute_clean_tables(mrt, true);
 	kfree(mrt);
 }
 
@@ -1208,7 +1208,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
  *	Close the multicast socket, and clear the vif tables etc
  */
 
-static void mroute_clean_tables(struct mr_table *mrt)
+static void mroute_clean_tables(struct mr_table *mrt, bool all)
 {
 	int i;
 	LIST_HEAD(list);
@@ -1217,8 +1217,9 @@ static void mroute_clean_tables(struct mr_table *mrt)
 	/* Shut down all active vif entries */
 
 	for (i = 0; i < mrt->maxvif; i++) {
-		if (!(mrt->vif_table[i].flags & VIFF_STATIC))
-			vif_delete(mrt, i, 0, &list);
+		if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
+			continue;
+		vif_delete(mrt, i, 0, &list);
 	}
 	unregister_netdevice_many(&list);
 
@@ -1226,7 +1227,7 @@ static void mroute_clean_tables(struct mr_table *mrt)
 
 	for (i = 0; i < MFC_LINES; i++) {
 		list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
-			if (c->mfc_flags & MFC_STATIC)
+			if (!all && (c->mfc_flags & MFC_STATIC))
 				continue;
 			list_del_rcu(&c->list);
 			mroute_netlink_event(mrt, c, RTM_DELROUTE);
@@ -1261,7 +1262,7 @@ static void mrtsock_destruct(struct sock *sk)
 						    NETCONFA_IFINDEX_ALL,
 						    net->ipv4.devconf_all);
 			RCU_INIT_POINTER(mrt->mroute_sk, NULL);
-			mroute_clean_tables(mrt);
+			mroute_clean_tables(mrt, false);
 		}
 	}
 	rtnl_unlock();

From 4eb5e5c531d96b2397ad2b02fc5a496307b2e143 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Fri, 20 Nov 2015 13:54:20 +0100
Subject: [PATCH 162/201] net: ip6mr: fix static mfc/dev leaks on table
 destruction

[ Upstream commit 4c6980462f32b4f282c5d8e5f7ea8070e2937725 ]

Similar to ipv4, when destroying an mrt table the static mfc entries and
the static devices are kept, which leads to devices that can never be
destroyed (because of refcnt taken) and leaked memory. Make sure that
everything is cleaned up on netns destruction.

Fixes: 8229efdaef1e ("netns: ip6mr: enable namespace support in ipv6 multicast forwarding code")
CC: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Reviewed-by: Cong Wang <cwang@twopensource.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/ip6mr.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 0e004cc42a22b..35eee72ab4af9 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -118,7 +118,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
 			      int cmd);
 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
 			       struct netlink_callback *cb);
-static void mroute_clean_tables(struct mr6_table *mrt);
+static void mroute_clean_tables(struct mr6_table *mrt, bool all);
 static void ipmr_expire_process(unsigned long arg);
 
 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
@@ -334,7 +334,7 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
 static void ip6mr_free_table(struct mr6_table *mrt)
 {
 	del_timer_sync(&mrt->ipmr_expire_timer);
-	mroute_clean_tables(mrt);
+	mroute_clean_tables(mrt, true);
 	kfree(mrt);
 }
 
@@ -1542,7 +1542,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
  *	Close the multicast socket, and clear the vif tables etc
  */
 
-static void mroute_clean_tables(struct mr6_table *mrt)
+static void mroute_clean_tables(struct mr6_table *mrt, bool all)
 {
 	int i;
 	LIST_HEAD(list);
@@ -1552,8 +1552,9 @@ static void mroute_clean_tables(struct mr6_table *mrt)
 	 *	Shut down all active vif entries
 	 */
 	for (i = 0; i < mrt->maxvif; i++) {
-		if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
-			mif6_delete(mrt, i, &list);
+		if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
+			continue;
+		mif6_delete(mrt, i, &list);
 	}
 	unregister_netdevice_many(&list);
 
@@ -1562,7 +1563,7 @@ static void mroute_clean_tables(struct mr6_table *mrt)
 	 */
 	for (i = 0; i < MFC6_LINES; i++) {
 		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
-			if (c->mfc_flags & MFC_STATIC)
+			if (!all && (c->mfc_flags & MFC_STATIC))
 				continue;
 			write_lock_bh(&mrt_lock);
 			list_del(&c->list);
@@ -1625,7 +1626,7 @@ int ip6mr_sk_done(struct sock *sk)
 						     net->ipv6.devconf_all);
 			write_unlock_bh(&mrt_lock);
 
-			mroute_clean_tables(mrt);
+			mroute_clean_tables(mrt, false);
 			err = 0;
 			break;
 		}

From b3abad339f8e268bb261e5844ab68b18a7797c29 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Date: Sat, 21 Nov 2015 19:46:19 +0100
Subject: [PATCH 163/201] vrf: fix double free and memory corruption on
 register_netdevice failure

[ Upstream commit 7f109f7cc37108cba7243bc832988525b0d85909 ]

When vrf's ->newlink is called, if register_netdevice() fails then it
does free_netdev(), but that's also done by rtnl_newlink() so a second
free happens and memory gets corrupted, to reproduce execute the
following line a couple of times (1 - 5 usually is enough):
$ for i in `seq 1 5`; do ip link add vrf: type vrf table 1; done;
This works because we fail in register_netdevice() because of the wrong
name "vrf:".

And here's a trace of one crash:
[   28.792157] ------------[ cut here ]------------
[   28.792407] kernel BUG at fs/namei.c:246!
[   28.792608] invalid opcode: 0000 [#1] SMP
[   28.793240] Modules linked in: vrf nfsd auth_rpcgss oid_registry
nfs_acl nfs lockd grace sunrpc crct10dif_pclmul crc32_pclmul
crc32c_intel qxl drm_kms_helper ttm drm aesni_intel aes_x86_64 psmouse
glue_helper lrw evdev gf128mul i2c_piix4 ablk_helper cryptd ppdev
parport_pc parport serio_raw pcspkr virtio_balloon virtio_console
i2c_core acpi_cpufreq button 9pnet_virtio 9p 9pnet fscache ipv6 autofs4
ext4 crc16 mbcache jbd2 virtio_blk virtio_net sg sr_mod cdrom
ata_generic ehci_pci uhci_hcd ehci_hcd e1000 usbcore usb_common ata_piix
libata virtio_pci virtio_ring virtio scsi_mod floppy
[   28.796016] CPU: 0 PID: 1148 Comm: ld-linux-x86-64 Not tainted
4.4.0-rc1+ #24
[   28.796016] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS 1.8.1-20150318_183358- 04/01/2014
[   28.796016] task: ffff8800352561c0 ti: ffff88003592c000 task.ti:
ffff88003592c000
[   28.796016] RIP: 0010:[<ffffffff812187b3>]  [<ffffffff812187b3>]
putname+0x43/0x60
[   28.796016] RSP: 0018:ffff88003592fe88  EFLAGS: 00010246
[   28.796016] RAX: 0000000000000000 RBX: ffff8800352561c0 RCX:
0000000000000001
[   28.796016] RDX: 0000000000000000 RSI: 0000000000000000 RDI:
ffff88003784f000
[   28.796016] RBP: ffff88003592ff08 R08: 0000000000000001 R09:
0000000000000000
[   28.796016] R10: 0000000000000000 R11: 0000000000000001 R12:
0000000000000000
[   28.796016] R13: 000000000000047c R14: ffff88003784f000 R15:
ffff8800358c4a00
[   28.796016] FS:  0000000000000000(0000) GS:ffff88003fc00000(0000)
knlGS:0000000000000000
[   28.796016] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   28.796016] CR2: 00007ffd583bc2d9 CR3: 0000000035a99000 CR4:
00000000000406f0
[   28.796016] Stack:
[   28.796016]  ffffffff8121045d ffffffff812102d3 ffff8800352561c0
ffff880035a91660
[   28.796016]  ffff8800008a9880 0000000000000000 ffffffff81a49940
00ffffff81218684
[   28.796016]  ffff8800352561c0 000000000000047c 0000000000000000
ffff880035b36d80
[   28.796016] Call Trace:
[   28.796016]  [<ffffffff8121045d>] ?
do_execveat_common.isra.34+0x74d/0x930
[   28.796016]  [<ffffffff812102d3>] ?
do_execveat_common.isra.34+0x5c3/0x930
[   28.796016]  [<ffffffff8121066c>] do_execve+0x2c/0x30
[   28.796016]  [<ffffffff810939a0>]
call_usermodehelper_exec_async+0xf0/0x140
[   28.796016]  [<ffffffff810938b0>] ? umh_complete+0x40/0x40
[   28.796016]  [<ffffffff815cb1af>] ret_from_fork+0x3f/0x70
[   28.796016] Code: 48 8d 47 1c 48 89 e5 53 48 8b 37 48 89 fb 48 39 c6
74 1a 48 8b 3d 7e e9 8f 00 e8 49 fa fc ff 48 89 df e8 f1 01 fd ff 5b 5d
f3 c3 <0f> 0b 48 89 fe 48 8b 3d 61 e9 8f 00 e8 2c fa fc ff 5b 5d eb e9
[   28.796016] RIP  [<ffffffff812187b3>] putname+0x43/0x60
[   28.796016]  RSP <ffff88003592fe88>

Fixes: 193125dbd8eb ("net: Introduce VRF device driver")
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Acked-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/vrf.c | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 488c6f50df736..c9e309cd9d82d 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -581,7 +581,6 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
 {
 	struct net_vrf *vrf = netdev_priv(dev);
 	struct net_vrf_dev *vrf_ptr;
-	int err;
 
 	if (!data || !data[IFLA_VRF_TABLE])
 		return -EINVAL;
@@ -590,26 +589,16 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
 
 	dev->priv_flags |= IFF_VRF_MASTER;
 
-	err = -ENOMEM;
 	vrf_ptr = kmalloc(sizeof(*dev->vrf_ptr), GFP_KERNEL);
 	if (!vrf_ptr)
-		goto out_fail;
+		return -ENOMEM;
 
 	vrf_ptr->ifindex = dev->ifindex;
 	vrf_ptr->tb_id = vrf->tb_id;
 
-	err = register_netdevice(dev);
-	if (err < 0)
-		goto out_fail;
-
 	rcu_assign_pointer(dev->vrf_ptr, vrf_ptr);
 
-	return 0;
-
-out_fail:
-	kfree(vrf_ptr);
-	free_netdev(dev);
-	return err;
+	return register_netdev(dev);
 }
 
 static size_t vrf_nl_getsize(const struct net_device *dev)

From 309ef6d135b8d7baeb06afdea48e611bb16c9ee0 Mon Sep 17 00:00:00 2001
From: Aaro Koskinen <aaro.koskinen@iki.fi>
Date: Sun, 22 Nov 2015 01:08:54 +0200
Subject: [PATCH 164/201] broadcom: fix PHY_ID_BCM5481 entry in the id table

[ Upstream commit 3c25a860d17b7378822f35d8c9141db9507e3beb ]

Commit fcb26ec5b18d ("broadcom: move all PHY_ID's to header")
updated broadcom_tbl to use PHY_IDs, but incorrectly replaced 0x0143bca0
with PHY_ID_BCM5482 (making a duplicate entry, and completely omitting
the original). Fix that.

Fixes: fcb26ec5b18d ("broadcom: move all PHY_ID's to header")
Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/phy/broadcom.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 9c71295f2fefb..85e640440bd9b 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -675,7 +675,7 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = {
 	{ PHY_ID_BCM5461, 0xfffffff0 },
 	{ PHY_ID_BCM54616S, 0xfffffff0 },
 	{ PHY_ID_BCM5464, 0xfffffff0 },
-	{ PHY_ID_BCM5482, 0xfffffff0 },
+	{ PHY_ID_BCM5481, 0xfffffff0 },
 	{ PHY_ID_BCM5482, 0xfffffff0 },
 	{ PHY_ID_BCM50610, 0xfffffff0 },
 	{ PHY_ID_BCM50610M, 0xfffffff0 },

From 126bb499631ca8c61db5628bdaaa7294ae73d04f Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Tue, 24 Nov 2015 13:57:57 +0800
Subject: [PATCH 165/201] tipc: fix error handling of expanding buffer headroom

[ Upstream commit 7098356baca723513e97ca0020df4e18bc353be3 ]

Coverity says:
*** CID 1338065:  Error handling issues  (CHECKED_RETURN)
/net/tipc/udp_media.c: 162 in tipc_udp_send_msg()
156             struct udp_media_addr *dst = (struct udp_media_addr *)&dest->value;
157             struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
158             struct sk_buff *clone;
159             struct rtable *rt;
160
161             if (skb_headroom(skb) < UDP_MIN_HEADROOM)
>>>     CID 1338065:  Error handling issues  (CHECKED_RETURN)
>>>     Calling "pskb_expand_head" without checking return value (as is done elsewhere 51 out of 56
+times).
162                     pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
163
164             clone = skb_clone(skb, GFP_ATOMIC);
165             skb_set_inner_protocol(clone, htons(ETH_P_TIPC));
166             ub = rcu_dereference_rtnl(b->media_ptr);
167             if (!ub) {

When expanding buffer headroom over udp tunnel with pskb_expand_head(),
it's unfortunate that we don't check its return value. As a result, if
the function returns an error code due to the lack of memory, it may
cause unpredictable consequence as we unconditionally consider that
it's always successful.

Fixes: e53567948f82 ("tipc: conditionally expand buffer headroom over udp tunnel")
Reported-by: <scan-admin@coverity.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/tipc/udp_media.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
index cd7c5f131e722..86f2e7c44694a 100644
--- a/net/tipc/udp_media.c
+++ b/net/tipc/udp_media.c
@@ -159,8 +159,11 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
 	struct sk_buff *clone;
 	struct rtable *rt;
 
-	if (skb_headroom(skb) < UDP_MIN_HEADROOM)
-		pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
+	if (skb_headroom(skb) < UDP_MIN_HEADROOM) {
+		err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
+		if (err)
+			goto tx_error;
+	}
 
 	clone = skb_clone(skb, GFP_ATOMIC);
 	skb_set_inner_protocol(clone, htons(ETH_P_TIPC));

From edd6db9dee45cdfd7b76f23ea513f5bab816c0bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= <mkubecek@suse.cz>
Date: Tue, 24 Nov 2015 15:07:11 +0100
Subject: [PATCH 166/201] ipv6: distinguish frag queues by device for multicast
 and link-local packets

[ Upstream commit 264640fc2c5f4f913db5c73fa3eb1ead2c45e9d7 ]

If a fragmented multicast packet is received on an ethernet device which
has an active macvlan on top of it, each fragment is duplicated and
received both on the underlying device and the macvlan. If some
fragments for macvlan are processed before the whole packet for the
underlying device is reassembled, the "overlapping fragments" test in
ip6_frag_queue() discards the whole fragment queue.

To resolve this, add device ifindex to the search key and require it to
match reassembling multicast packets and packets to link-local
addresses.

Note: similar patch has been already submitted by Yoshifuji Hideaki in

  http://patchwork.ozlabs.org/patch/220979/

but got lost and forgotten for some reason.

Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/ipv6.h                      |  1 +
 net/ipv6/netfilter/nf_conntrack_reasm.c |  5 +++--
 net/ipv6/reassembly.c                   | 10 +++++++---
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 711cca428cc8c..1b6371745e451 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -490,6 +490,7 @@ struct ip6_create_arg {
 	u32 user;
 	const struct in6_addr *src;
 	const struct in6_addr *dst;
+	int iif;
 	u8 ecn;
 };
 
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index c7196ad1d69f8..dc50143f50f2a 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -190,7 +190,7 @@ static void nf_ct_frag6_expire(unsigned long data)
 /* Creation primitives. */
 static inline struct frag_queue *fq_find(struct net *net, __be32 id,
 					 u32 user, struct in6_addr *src,
-					 struct in6_addr *dst, u8 ecn)
+					 struct in6_addr *dst, int iif, u8 ecn)
 {
 	struct inet_frag_queue *q;
 	struct ip6_create_arg arg;
@@ -200,6 +200,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
 	arg.user = user;
 	arg.src = src;
 	arg.dst = dst;
+	arg.iif = iif;
 	arg.ecn = ecn;
 
 	local_bh_disable();
@@ -603,7 +604,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
 	fhdr = (struct frag_hdr *)skb_transport_header(clone);
 
 	fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
-		     ip6_frag_ecn(hdr));
+		     skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
 	if (fq == NULL) {
 		pr_debug("Can't find and can't create new queue\n");
 		goto ret_orig;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index f1159bb76e0a5..04013a910ce5f 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -108,7 +108,10 @@ bool ip6_frag_match(const struct inet_frag_queue *q, const void *a)
 	return	fq->id == arg->id &&
 		fq->user == arg->user &&
 		ipv6_addr_equal(&fq->saddr, arg->src) &&
-		ipv6_addr_equal(&fq->daddr, arg->dst);
+		ipv6_addr_equal(&fq->daddr, arg->dst) &&
+		(arg->iif == fq->iif ||
+		 !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST |
+					       IPV6_ADDR_LINKLOCAL)));
 }
 EXPORT_SYMBOL(ip6_frag_match);
 
@@ -180,7 +183,7 @@ static void ip6_frag_expire(unsigned long data)
 
 static struct frag_queue *
 fq_find(struct net *net, __be32 id, const struct in6_addr *src,
-	const struct in6_addr *dst, u8 ecn)
+	const struct in6_addr *dst, int iif, u8 ecn)
 {
 	struct inet_frag_queue *q;
 	struct ip6_create_arg arg;
@@ -190,6 +193,7 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src,
 	arg.user = IP6_DEFRAG_LOCAL_DELIVER;
 	arg.src = src;
 	arg.dst = dst;
+	arg.iif = iif;
 	arg.ecn = ecn;
 
 	hash = inet6_hash_frag(id, src, dst);
@@ -551,7 +555,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
 	}
 
 	fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
-		     ip6_frag_ecn(hdr));
+		     skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
 	if (fq) {
 		int ret;
 

From 0eaa7b64f7c307249fc28f2a57ff20aa905910bb Mon Sep 17 00:00:00 2001
From: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Date: Tue, 24 Nov 2015 17:13:21 -0500
Subject: [PATCH 167/201] RDS: fix race condition when sending a message on
 unbound socket

[ Upstream commit 8c7188b23474cca017b3ef354c4a58456f68303a ]

Sasha's found a NULL pointer dereference in the RDS connection code when
sending a message to an apparently unbound socket.  The problem is caused
by the code checking if the socket is bound in rds_sendmsg(), which checks
the rs_bound_addr field without taking a lock on the socket.  This opens a
race where rs_bound_addr is temporarily set but where the transport is not
in rds_bind(), leading to a NULL pointer dereference when trying to
dereference 'trans' in __rds_conn_create().

Vegard wrote a reproducer for this issue, so kindly ask him to share if
you're interested.

I cannot reproduce the NULL pointer dereference using Vegard's reproducer
with this patch, whereas I could without.

Complete earlier incomplete fix to CVE-2015-6937:

  74e98eb08588 ("RDS: verify the underlying transport exists before creating a connection")

Cc: David S. Miller <davem@davemloft.net>
Cc: stable@vger.kernel.org

Reviewed-by: Vegard Nossum <vegard.nossum@oracle.com>
Reviewed-by: Sasha Levin <sasha.levin@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: Quentin Casasnovas <quentin.casasnovas@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/rds/connection.c | 6 ------
 net/rds/send.c       | 4 +++-
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/net/rds/connection.c b/net/rds/connection.c
index 49adeef8090ca..9b2de5e67d795 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -190,12 +190,6 @@ static struct rds_connection *__rds_conn_create(struct net *net,
 		}
 	}
 
-	if (trans == NULL) {
-		kmem_cache_free(rds_conn_slab, conn);
-		conn = ERR_PTR(-ENODEV);
-		goto out;
-	}
-
 	conn->c_trans = trans;
 
 	ret = trans->conn_alloc(conn, gfp);
diff --git a/net/rds/send.c b/net/rds/send.c
index 4df61a515b83d..859de6f32521d 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -1009,11 +1009,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)
 		release_sock(sk);
 	}
 
-	/* racing with another thread binding seems ok here */
+	lock_sock(sk);
 	if (daddr == 0 || rs->rs_bound_addr == 0) {
+		release_sock(sk);
 		ret = -ENOTCONN; /* XXX not a great errno */
 		goto out;
 	}
+	release_sock(sk);
 
 	if (payload_len > rds_sk_sndbuf(rs)) {
 		ret = -EMSGSIZE;

From 90d19ad685d03197418f5c8e970772192a032a87 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Mon, 30 Nov 2015 13:02:56 +0100
Subject: [PATCH 168/201] bpf, array: fix heap out-of-bounds access when
 updating elements

[ Upstream commit fbca9d2d35c6ef1b323fae75cc9545005ba25097 ]

During own review but also reported by Dmitry's syzkaller [1] it has been
noticed that we trigger a heap out-of-bounds access on eBPF array maps
when updating elements. This happens with each map whose map->value_size
(specified during map creation time) is not multiple of 8 bytes.

In array_map_alloc(), elem_size is round_up(attr->value_size, 8) and
used to align array map slots for faster access. However, in function
array_map_update_elem(), we update the element as ...

memcpy(array->value + array->elem_size * index, value, array->elem_size);

... where we access 'value' out-of-bounds, since it was allocated from
map_update_elem() from syscall side as kmalloc(map->value_size, GFP_USER)
and later on copied through copy_from_user(value, uvalue, map->value_size).
Thus, up to 7 bytes, we can access out-of-bounds.

Same could happen from within an eBPF program, where in worst case we
access beyond an eBPF program's designated stack.

Since 1be7f75d1668 ("bpf: enable non-root eBPF programs") didn't hit an
official release yet, it only affects priviledged users.

In case of array_map_lookup_elem(), the verifier prevents eBPF programs
from accessing beyond map->value_size through check_map_access(). Also
from syscall side map_lookup_elem() only copies map->value_size back to
user, so nothing could leak.

  [1] http://github.com/google/syzkaller

Fixes: 28fbcfa08d8e ("bpf: add array type of eBPF maps")
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/bpf/arraymap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 29ace107f2365..7a0decf471100 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -104,7 +104,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
 		/* all elements already exist */
 		return -EEXIST;
 
-	memcpy(array->value + array->elem_size * index, value, array->elem_size);
+	memcpy(array->value + array->elem_size * index, value, map->value_size);
 	return 0;
 }
 

From 645e3f33c73ad1153db0680b6833cf70d0d4dce3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 29 Nov 2015 19:37:57 -0800
Subject: [PATCH 169/201] ipv6: add complete rcu protection around np->opt

[ Upstream commit 45f6fad84cc305103b28d73482b344d7f5b76f39 ]

This patch addresses multiple problems :

UDP/RAW sendmsg() need to get a stable struct ipv6_txoptions
while socket is not locked : Other threads can change np->opt
concurrently. Dmitry posted a syzkaller
(http://github.com/google/syzkaller) program desmonstrating
use-after-free.

Starting with TCP/DCCP lockless listeners, tcp_v6_syn_recv_sock()
and dccp_v6_request_recv_sock() also need to use RCU protection
to dereference np->opt once (before calling ipv6_dup_options())

This patch adds full RCU protection to np->opt

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/ipv6.h             |  2 +-
 include/net/ipv6.h               | 21 +++++++++++++++++++-
 net/dccp/ipv6.c                  | 33 ++++++++++++++++++++------------
 net/ipv6/af_inet6.c              | 13 +++++++++----
 net/ipv6/datagram.c              |  4 +++-
 net/ipv6/exthdrs.c               |  3 ++-
 net/ipv6/inet6_connection_sock.c | 11 ++++++++---
 net/ipv6/ipv6_sockglue.c         | 33 +++++++++++++++++++++-----------
 net/ipv6/raw.c                   |  8 ++++++--
 net/ipv6/syncookies.c            |  2 +-
 net/ipv6/tcp_ipv6.c              | 28 ++++++++++++++++-----------
 net/ipv6/udp.c                   |  8 ++++++--
 net/l2tp/l2tp_ip6.c              |  8 ++++++--
 13 files changed, 122 insertions(+), 52 deletions(-)

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index f1f32af6d9b96..3e4ff3f1d3140 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -227,7 +227,7 @@ struct ipv6_pinfo {
 	struct ipv6_ac_socklist	*ipv6_ac_list;
 	struct ipv6_fl_socklist __rcu *ipv6_fl_list;
 
-	struct ipv6_txoptions	*opt;
+	struct ipv6_txoptions __rcu	*opt;
 	struct sk_buff		*pktoptions;
 	struct sk_buff		*rxpmtu;
 	struct inet6_cork	cork;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 1b6371745e451..b14e1581c4774 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -205,6 +205,7 @@ extern rwlock_t ip6_ra_lock;
  */
 
 struct ipv6_txoptions {
+	atomic_t		refcnt;
 	/* Length of this structure */
 	int			tot_len;
 
@@ -217,7 +218,7 @@ struct ipv6_txoptions {
 	struct ipv6_opt_hdr	*dst0opt;
 	struct ipv6_rt_hdr	*srcrt;	/* Routing Header */
 	struct ipv6_opt_hdr	*dst1opt;
-
+	struct rcu_head		rcu;
 	/* Option buffer, as read by IPV6_PKTOPTIONS, starts here. */
 };
 
@@ -252,6 +253,24 @@ struct ipv6_fl_socklist {
 	struct rcu_head			rcu;
 };
 
+static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np)
+{
+	struct ipv6_txoptions *opt;
+
+	rcu_read_lock();
+	opt = rcu_dereference(np->opt);
+	if (opt && !atomic_inc_not_zero(&opt->refcnt))
+		opt = NULL;
+	rcu_read_unlock();
+	return opt;
+}
+
+static inline void txopt_put(struct ipv6_txoptions *opt)
+{
+	if (opt && atomic_dec_and_test(&opt->refcnt))
+		kfree_rcu(opt, rcu);
+}
+
 struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label);
 struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
 					 struct ip6_flowlabel *fl,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 5165571f397aa..a0490508d2135 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -202,7 +202,9 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
 	security_req_classify_flow(req, flowi6_to_flowi(&fl6));
 
 
-	final_p = fl6_update_dst(&fl6, np->opt, &final);
+	rcu_read_lock();
+	final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
+	rcu_read_unlock();
 
 	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
 	if (IS_ERR(dst)) {
@@ -219,7 +221,10 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
 							 &ireq->ir_v6_loc_addr,
 							 &ireq->ir_v6_rmt_addr);
 		fl6.daddr = ireq->ir_v6_rmt_addr;
-		err = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
+		rcu_read_lock();
+		err = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt),
+			       np->tclass);
+		rcu_read_unlock();
 		err = net_xmit_eval(err);
 	}
 
@@ -415,6 +420,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 {
 	struct inet_request_sock *ireq = inet_rsk(req);
 	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
+	struct ipv6_txoptions *opt;
 	struct inet_sock *newinet;
 	struct dccp6_sock *newdp6;
 	struct sock *newsk;
@@ -534,13 +540,15 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 	 * Yes, keeping reference count would be much more clever, but we make
 	 * one more one thing there: reattach optmem to newsk.
 	 */
-	if (np->opt != NULL)
-		newnp->opt = ipv6_dup_options(newsk, np->opt);
-
+	opt = rcu_dereference(np->opt);
+	if (opt) {
+		opt = ipv6_dup_options(newsk, opt);
+		RCU_INIT_POINTER(newnp->opt, opt);
+	}
 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
-	if (newnp->opt != NULL)
-		inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
-						     newnp->opt->opt_flen);
+	if (opt)
+		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
+						    opt->opt_flen;
 
 	dccp_sync_mss(newsk, dst_mtu(dst));
 
@@ -793,6 +801,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct dccp_sock *dp = dccp_sk(sk);
 	struct in6_addr *saddr = NULL, *final_p, final;
+	struct ipv6_txoptions *opt;
 	struct flowi6 fl6;
 	struct dst_entry *dst;
 	int addr_type;
@@ -892,7 +901,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	fl6.fl6_sport = inet->inet_sport;
 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
-	final_p = fl6_update_dst(&fl6, np->opt, &final);
+	opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+	final_p = fl6_update_dst(&fl6, opt, &final);
 
 	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
 	if (IS_ERR(dst)) {
@@ -912,9 +922,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	__ip6_dst_store(sk, dst, NULL, NULL);
 
 	icsk->icsk_ext_hdr_len = 0;
-	if (np->opt != NULL)
-		icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
-					  np->opt->opt_nflen);
+	if (opt)
+		icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;
 
 	inet->inet_dport = usin->sin6_port;
 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 44bb66bde0e2d..38d66ddfb9375 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -428,9 +428,11 @@ void inet6_destroy_sock(struct sock *sk)
 
 	/* Free tx options */
 
-	opt = xchg(&np->opt, NULL);
-	if (opt)
-		sock_kfree_s(sk, opt, opt->tot_len);
+	opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL);
+	if (opt) {
+		atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+		txopt_put(opt);
+	}
 }
 EXPORT_SYMBOL_GPL(inet6_destroy_sock);
 
@@ -659,7 +661,10 @@ int inet6_sk_rebuild_header(struct sock *sk)
 		fl6.fl6_sport = inet->inet_sport;
 		security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
-		final_p = fl6_update_dst(&fl6, np->opt, &final);
+		rcu_read_lock();
+		final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt),
+					 &final);
+		rcu_read_unlock();
 
 		dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
 		if (IS_ERR(dst)) {
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 9aadd57808a51..a42a673aa5473 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -167,8 +167,10 @@ static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int a
 
 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
-	opt = flowlabel ? flowlabel->opt : np->opt;
+	rcu_read_lock();
+	opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt);
 	final_p = fl6_update_dst(&fl6, opt, &final);
+	rcu_read_unlock();
 
 	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
 	err = 0;
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index ce203b0402bea..ea7c4d64a00ad 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -727,6 +727,7 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
 			*((char **)&opt2->dst1opt) += dif;
 		if (opt2->srcrt)
 			*((char **)&opt2->srcrt) += dif;
+		atomic_set(&opt2->refcnt, 1);
 	}
 	return opt2;
 }
@@ -790,7 +791,7 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
 		return ERR_PTR(-ENOBUFS);
 
 	memset(opt2, 0, tot_len);
-
+	atomic_set(&opt2->refcnt, 1);
 	opt2->tot_len = tot_len;
 	p = (char *)(opt2 + 1);
 
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 6927f3fb5597f..9beed302eb36f 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -77,7 +77,9 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
 	memset(fl6, 0, sizeof(*fl6));
 	fl6->flowi6_proto = IPPROTO_TCP;
 	fl6->daddr = ireq->ir_v6_rmt_addr;
-	final_p = fl6_update_dst(fl6, np->opt, &final);
+	rcu_read_lock();
+	final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+	rcu_read_unlock();
 	fl6->saddr = ireq->ir_v6_loc_addr;
 	fl6->flowi6_oif = ireq->ir_iif;
 	fl6->flowi6_mark = ireq->ir_mark;
@@ -207,7 +209,9 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
 	fl6->fl6_dport = inet->inet_dport;
 	security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
 
-	final_p = fl6_update_dst(fl6, np->opt, &final);
+	rcu_read_lock();
+	final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+	rcu_read_unlock();
 
 	dst = __inet6_csk_dst_check(sk, np->dst_cookie);
 	if (!dst) {
@@ -240,7 +244,8 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused
 	/* Restore final destination back after routing done */
 	fl6.daddr = sk->sk_v6_daddr;
 
-	res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
+	res = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt),
+		       np->tclass);
 	rcu_read_unlock();
 	return res;
 }
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 63e6956917c9c..4449ad1f81147 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -111,7 +111,8 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
 			icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
 		}
 	}
-	opt = xchg(&inet6_sk(sk)->opt, opt);
+	opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt,
+		   opt);
 	sk_dst_reset(sk);
 
 	return opt;
@@ -231,9 +232,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 				sk->sk_socket->ops = &inet_dgram_ops;
 				sk->sk_family = PF_INET;
 			}
-			opt = xchg(&np->opt, NULL);
-			if (opt)
-				sock_kfree_s(sk, opt, opt->tot_len);
+			opt = xchg((__force struct ipv6_txoptions **)&np->opt,
+				   NULL);
+			if (opt) {
+				atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+				txopt_put(opt);
+			}
 			pktopt = xchg(&np->pktoptions, NULL);
 			kfree_skb(pktopt);
 
@@ -403,7 +407,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 		if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
 			break;
 
-		opt = ipv6_renew_options(sk, np->opt, optname,
+		opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+		opt = ipv6_renew_options(sk, opt, optname,
 					 (struct ipv6_opt_hdr __user *)optval,
 					 optlen);
 		if (IS_ERR(opt)) {
@@ -432,8 +437,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 		retv = 0;
 		opt = ipv6_update_options(sk, opt);
 sticky_done:
-		if (opt)
-			sock_kfree_s(sk, opt, opt->tot_len);
+		if (opt) {
+			atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+			txopt_put(opt);
+		}
 		break;
 	}
 
@@ -486,6 +493,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 			break;
 
 		memset(opt, 0, sizeof(*opt));
+		atomic_set(&opt->refcnt, 1);
 		opt->tot_len = sizeof(*opt) + optlen;
 		retv = -EFAULT;
 		if (copy_from_user(opt+1, optval, optlen))
@@ -502,8 +510,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 		retv = 0;
 		opt = ipv6_update_options(sk, opt);
 done:
-		if (opt)
-			sock_kfree_s(sk, opt, opt->tot_len);
+		if (opt) {
+			atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+			txopt_put(opt);
+		}
 		break;
 	}
 	case IPV6_UNICAST_HOPS:
@@ -1110,10 +1120,11 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 	case IPV6_RTHDR:
 	case IPV6_DSTOPTS:
 	{
+		struct ipv6_txoptions *opt;
 
 		lock_sock(sk);
-		len = ipv6_getsockopt_sticky(sk, np->opt,
-					     optname, optval, len);
+		opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+		len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len);
 		release_sock(sk);
 		/* check if ipv6_getsockopt_sticky() returns err code */
 		if (len < 0)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index fdbada1569a37..fe977299551ee 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -732,6 +732,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd,
 
 static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 {
+	struct ipv6_txoptions *opt_to_free = NULL;
 	struct ipv6_txoptions opt_space;
 	DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
 	struct in6_addr *daddr, *final_p, final;
@@ -838,8 +839,10 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		if (!(opt->opt_nflen|opt->opt_flen))
 			opt = NULL;
 	}
-	if (!opt)
-		opt = np->opt;
+	if (!opt) {
+		opt = txopt_get(np);
+		opt_to_free = opt;
+		}
 	if (flowlabel)
 		opt = fl6_merge_options(&opt_space, flowlabel, opt);
 	opt = ipv6_fixup_options(&opt_space, opt);
@@ -905,6 +908,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	dst_release(dst);
 out:
 	fl6_sock_release(flowlabel);
+	txopt_put(opt_to_free);
 	return err < 0 ? err : len;
 do_confirm:
 	dst_confirm(dst);
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 0909f4e0d53c2..f30bfdcdea541 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -225,7 +225,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 		memset(&fl6, 0, sizeof(fl6));
 		fl6.flowi6_proto = IPPROTO_TCP;
 		fl6.daddr = ireq->ir_v6_rmt_addr;
-		final_p = fl6_update_dst(&fl6, np->opt, &final);
+		final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
 		fl6.saddr = ireq->ir_v6_loc_addr;
 		fl6.flowi6_oif = sk->sk_bound_dev_if;
 		fl6.flowi6_mark = ireq->ir_mark;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 97d9314ea3611..9e9b77bd2d0a7 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -120,6 +120,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct in6_addr *saddr = NULL, *final_p, final;
+	struct ipv6_txoptions *opt;
 	struct flowi6 fl6;
 	struct dst_entry *dst;
 	int addr_type;
@@ -235,7 +236,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	fl6.fl6_dport = usin->sin6_port;
 	fl6.fl6_sport = inet->inet_sport;
 
-	final_p = fl6_update_dst(&fl6, np->opt, &final);
+	opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+	final_p = fl6_update_dst(&fl6, opt, &final);
 
 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
 
@@ -263,9 +265,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		tcp_fetch_timewait_stamp(sk, dst);
 
 	icsk->icsk_ext_hdr_len = 0;
-	if (np->opt)
-		icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
-					  np->opt->opt_nflen);
+	if (opt)
+		icsk->icsk_ext_hdr_len = opt->opt_flen +
+					 opt->opt_nflen;
 
 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 
@@ -461,7 +463,8 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
 
 		skb_set_queue_mapping(skb, queue_mapping);
-		err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
+		err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt),
+			       np->tclass);
 		err = net_xmit_eval(err);
 	}
 
@@ -991,6 +994,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	struct inet_request_sock *ireq;
 	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
 	struct tcp6_sock *newtcp6sk;
+	struct ipv6_txoptions *opt;
 	struct inet_sock *newinet;
 	struct tcp_sock *newtp;
 	struct sock *newsk;
@@ -1126,13 +1130,15 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	   but we make one more one thing there: reattach optmem
 	   to newsk.
 	 */
-	if (np->opt)
-		newnp->opt = ipv6_dup_options(newsk, np->opt);
-
+	opt = rcu_dereference(np->opt);
+	if (opt) {
+		opt = ipv6_dup_options(newsk, opt);
+		RCU_INIT_POINTER(newnp->opt, opt);
+	}
 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
-	if (newnp->opt)
-		inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
-						     newnp->opt->opt_flen);
+	if (opt)
+		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
+						    opt->opt_flen;
 
 	tcp_ca_openreq_child(newsk, dst);
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 0aba654f5b91c..8379fc2f4b1d6 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1107,6 +1107,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
 	struct in6_addr *daddr, *final_p, final;
 	struct ipv6_txoptions *opt = NULL;
+	struct ipv6_txoptions *opt_to_free = NULL;
 	struct ip6_flowlabel *flowlabel = NULL;
 	struct flowi6 fl6;
 	struct dst_entry *dst;
@@ -1260,8 +1261,10 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 			opt = NULL;
 		connected = 0;
 	}
-	if (!opt)
-		opt = np->opt;
+	if (!opt) {
+		opt = txopt_get(np);
+		opt_to_free = opt;
+	}
 	if (flowlabel)
 		opt = fl6_merge_options(&opt_space, flowlabel, opt);
 	opt = ipv6_fixup_options(&opt_space, opt);
@@ -1370,6 +1373,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 out:
 	dst_release(dst);
 	fl6_sock_release(flowlabel);
+	txopt_put(opt_to_free);
 	if (!err)
 		return len;
 	/*
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index d1ded3777815e..0ce9da948ad7f 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -486,6 +486,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name);
 	struct in6_addr *daddr, *final_p, final;
 	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct ipv6_txoptions *opt_to_free = NULL;
 	struct ipv6_txoptions *opt = NULL;
 	struct ip6_flowlabel *flowlabel = NULL;
 	struct dst_entry *dst = NULL;
@@ -575,8 +576,10 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 			opt = NULL;
 	}
 
-	if (opt == NULL)
-		opt = np->opt;
+	if (!opt) {
+		opt = txopt_get(np);
+		opt_to_free = opt;
+	}
 	if (flowlabel)
 		opt = fl6_merge_options(&opt_space, flowlabel, opt);
 	opt = ipv6_fixup_options(&opt_space, opt);
@@ -631,6 +634,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	dst_release(dst);
 out:
 	fl6_sock_release(flowlabel);
+	txopt_put(opt_to_free);
 
 	return err < 0 ? err : len;
 

From a9c680b65484ecb6d1d257f761b2f04e1d9e2d6e Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <koct9i@gmail.com>
Date: Tue, 1 Dec 2015 01:14:48 +0300
Subject: [PATCH 170/201] net/neighbour: fix crash at dumping device-agnostic
 proxy entries

[ Upstream commit 6adc5fd6a142c6e2c80574c1db0c7c17dedaa42e ]

Proxy entries could have null pointer to net-device.

Signed-off-by: Konstantin Khlebnikov <koct9i@gmail.com>
Fixes: 84920c1420e2 ("net: Allow ipv6 proxies and arp proxies be shown with iproute2")
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/neighbour.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 2b515ba7e94f4..c169bba44e059 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2215,7 +2215,7 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
 	ndm->ndm_pad2    = 0;
 	ndm->ndm_flags	 = pn->flags | NTF_PROXY;
 	ndm->ndm_type	 = RTN_UNICAST;
-	ndm->ndm_ifindex = pn->dev->ifindex;
+	ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
 	ndm->ndm_state	 = NUD_NONE;
 
 	if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
@@ -2290,7 +2290,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 		if (h > s_h)
 			s_idx = 0;
 		for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
-			if (dev_net(n->dev) != net)
+			if (pneigh_net(n) != net)
 				continue;
 			if (idx < s_idx)
 				goto next;

From 7a439c4a677039b91804b6779de18ddab6d6ebf9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 1 Dec 2015 07:20:07 -0800
Subject: [PATCH 171/201] ipv6: sctp: implement sctp_v6_destroy_sock()

[ Upstream commit 602dd62dfbda3e63a2d6a3cbde953ebe82bf5087 ]

Dmitry Vyukov reported a memory leak using IPV6 SCTP sockets.

We need to call inet6_destroy_sock() to properly release
inet6 specific fields.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sctp/socket.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 17bef01b9aa3e..3ec88be0faec8 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -7375,6 +7375,13 @@ struct proto sctp_prot = {
 
 #if IS_ENABLED(CONFIG_IPV6)
 
+#include <net/transp_v6.h>
+static void sctp_v6_destroy_sock(struct sock *sk)
+{
+	sctp_destroy_sock(sk);
+	inet6_destroy_sock(sk);
+}
+
 struct proto sctpv6_prot = {
 	.name		= "SCTPv6",
 	.owner		= THIS_MODULE,
@@ -7384,7 +7391,7 @@ struct proto sctpv6_prot = {
 	.accept		= sctp_accept,
 	.ioctl		= sctp_ioctl,
 	.init		= sctp_init_sock,
-	.destroy	= sctp_destroy_sock,
+	.destroy	= sctp_v6_destroy_sock,
 	.shutdown	= sctp_shutdown,
 	.setsockopt	= sctp_setsockopt,
 	.getsockopt	= sctp_getsockopt,

From bae56d7e3f815b339421306fc3b2e00107b56987 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Tue, 1 Dec 2015 18:33:36 +0100
Subject: [PATCH 172/201] openvswitch: fix hangup on vxlan/gre/geneve device
 deletion

[ Upstream commit 13175303024c8f4cd09e51079a8fcbbe572111ec ]

Each openvswitch tunnel vport (vxlan,gre,geneve) holds a reference
to the underlying tunnel device, but never released it when such
device is deleted.
Deleting the underlying device via the ip tool cause the kernel to
hangup in the netdev_wait_allrefs() loop.
This commit ensure that on device unregistration dp_detach_port_notify()
is called for all vports that hold the device reference, properly
releasing it.

Fixes: 614732eaa12d ("openvswitch: Use regular VXLAN net_device device")
Fixes: b2acd1dc3949 ("openvswitch: Use regular GRE net_device instead of vport")
Fixes: 6b001e682e90 ("openvswitch: Use Geneve device.")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Flavio Leitner <fbl@sysclose.org>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/openvswitch/dp_notify.c    | 2 +-
 net/openvswitch/vport-netdev.c | 8 ++++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index a7a80a6b77b0a..653d073bae453 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -58,7 +58,7 @@ void ovs_dp_notify_wq(struct work_struct *work)
 			struct hlist_node *n;
 
 			hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) {
-				if (vport->ops->type != OVS_VPORT_TYPE_NETDEV)
+				if (vport->ops->type == OVS_VPORT_TYPE_INTERNAL)
 					continue;
 
 				if (!(vport->dev->priv_flags & IFF_OVS_DATAPATH))
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
index f7e8dcce7adae..ac14c488669c5 100644
--- a/net/openvswitch/vport-netdev.c
+++ b/net/openvswitch/vport-netdev.c
@@ -180,9 +180,13 @@ void ovs_netdev_tunnel_destroy(struct vport *vport)
 	if (vport->dev->priv_flags & IFF_OVS_DATAPATH)
 		ovs_netdev_detach_dev(vport);
 
-	/* Early release so we can unregister the device */
+	/* We can be invoked by both explicit vport deletion and
+	 * underlying netdev deregistration; delete the link only
+	 * if it's not already shutting down.
+	 */
+	if (vport->dev->reg_state == NETREG_REGISTERED)
+		rtnl_delete_link(vport->dev);
 	dev_put(vport->dev);
-	rtnl_delete_link(vport->dev);
 	vport->dev = NULL;
 	rtnl_unlock();
 

From 36204ef3c3a43e1dc015d2401068e4bfb45e1e09 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 1 Dec 2015 20:08:51 -0800
Subject: [PATCH 173/201] net_sched: fix qdisc_tree_decrease_qlen() races

[ Upstream commit 4eaf3b84f2881c9c028f1d5e76c52ab575fe3a66 ]

qdisc_tree_decrease_qlen() suffers from two problems on multiqueue
devices.

One problem is that it updates sch->q.qlen and sch->qstats.drops
on the mq/mqprio root qdisc, while it should not : Daniele
reported underflows errors :
[  681.774821] PAX: sch->q.qlen: 0 n: 1
[  681.774825] PAX: size overflow detected in function qdisc_tree_decrease_qlen net/sched/sch_api.c:769 cicus.693_49 min, count: 72, decl: qlen; num: 0; context: sk_buff_head;
[  681.774954] CPU: 2 PID: 19 Comm: ksoftirqd/2 Tainted: G           O    4.2.6.201511282239-1-grsec #1
[  681.774955] Hardware name: ASUSTeK COMPUTER INC. X302LJ/X302LJ, BIOS X302LJ.202 03/05/2015
[  681.774956]  ffffffffa9a04863 0000000000000000 0000000000000000 ffffffffa990ff7c
[  681.774959]  ffffc90000d3bc38 ffffffffa95d2810 0000000000000007 ffffffffa991002b
[  681.774960]  ffffc90000d3bc68 ffffffffa91a44f4 0000000000000001 0000000000000001
[  681.774962] Call Trace:
[  681.774967]  [<ffffffffa95d2810>] dump_stack+0x4c/0x7f
[  681.774970]  [<ffffffffa91a44f4>] report_size_overflow+0x34/0x50
[  681.774972]  [<ffffffffa94d17e2>] qdisc_tree_decrease_qlen+0x152/0x160
[  681.774976]  [<ffffffffc02694b1>] fq_codel_dequeue+0x7b1/0x820 [sch_fq_codel]
[  681.774978]  [<ffffffffc02680a0>] ? qdisc_peek_dequeued+0xa0/0xa0 [sch_fq_codel]
[  681.774980]  [<ffffffffa94cd92d>] __qdisc_run+0x4d/0x1d0
[  681.774983]  [<ffffffffa949b2b2>] net_tx_action+0xc2/0x160
[  681.774985]  [<ffffffffa90664c1>] __do_softirq+0xf1/0x200
[  681.774987]  [<ffffffffa90665ee>] run_ksoftirqd+0x1e/0x30
[  681.774989]  [<ffffffffa90896b0>] smpboot_thread_fn+0x150/0x260
[  681.774991]  [<ffffffffa9089560>] ? sort_range+0x40/0x40
[  681.774992]  [<ffffffffa9085fe4>] kthread+0xe4/0x100
[  681.774994]  [<ffffffffa9085f00>] ? kthread_worker_fn+0x170/0x170
[  681.774995]  [<ffffffffa95d8d1e>] ret_from_fork+0x3e/0x70

mq/mqprio have their own ways to report qlen/drops by folding stats on
all their queues, with appropriate locking.

A second problem is that qdisc_tree_decrease_qlen() calls qdisc_lookup()
without proper locking : concurrent qdisc updates could corrupt the list
that qdisc_match_from_root() parses to find a qdisc given its handle.

Fix first problem adding a TCQ_F_NOPARENT qdisc flag that
qdisc_tree_decrease_qlen() can use to abort its tree traversal,
as soon as it meets a mq/mqprio qdisc children.

Second problem can be fixed by RCU protection.
Qdisc are already freed after RCU grace period, so qdisc_list_add() and
qdisc_list_del() simply have to use appropriate rcu list variants.

A future patch will add a per struct netdev_queue list anchor, so that
qdisc_tree_decrease_qlen() can have more efficient lookups.

Reported-by: Daniele Fucini <dfucini@gmail.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Cong Wang <cwang@twopensource.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/sch_generic.h |  3 +++
 net/sched/sch_api.c       | 27 ++++++++++++++++++---------
 net/sched/sch_generic.c   |  2 +-
 net/sched/sch_mq.c        |  4 ++--
 net/sched/sch_mqprio.c    |  4 ++--
 5 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 444faa89a55fd..f1ad8f8fd4f19 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -61,6 +61,9 @@ struct Qdisc {
 				      */
 #define TCQ_F_WARN_NONWC	(1 << 16)
 #define TCQ_F_CPUSTATS		0x20 /* run using percpu statistics */
+#define TCQ_F_NOPARENT		0x40 /* root of its hierarchy :
+				      * qdisc_tree_decrease_qlen() should stop.
+				      */
 	u32			limit;
 	const struct Qdisc_ops	*ops;
 	struct qdisc_size_table	__rcu *stab;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index f43c8f33f09ef..7ec667dd4ce1d 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -253,7 +253,8 @@ int qdisc_set_default(const char *name)
 }
 
 /* We know handle. Find qdisc among all qdisc's attached to device
-   (root qdisc, all its children, children of children etc.)
+ * (root qdisc, all its children, children of children etc.)
+ * Note: caller either uses rtnl or rcu_read_lock()
  */
 
 static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
@@ -264,7 +265,7 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
 	    root->handle == handle)
 		return root;
 
-	list_for_each_entry(q, &root->list, list) {
+	list_for_each_entry_rcu(q, &root->list, list) {
 		if (q->handle == handle)
 			return q;
 	}
@@ -277,15 +278,18 @@ void qdisc_list_add(struct Qdisc *q)
 		struct Qdisc *root = qdisc_dev(q)->qdisc;
 
 		WARN_ON_ONCE(root == &noop_qdisc);
-		list_add_tail(&q->list, &root->list);
+		ASSERT_RTNL();
+		list_add_tail_rcu(&q->list, &root->list);
 	}
 }
 EXPORT_SYMBOL(qdisc_list_add);
 
 void qdisc_list_del(struct Qdisc *q)
 {
-	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
-		list_del(&q->list);
+	if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
+		ASSERT_RTNL();
+		list_del_rcu(&q->list);
+	}
 }
 EXPORT_SYMBOL(qdisc_list_del);
 
@@ -750,14 +754,18 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
 	if (n == 0)
 		return;
 	drops = max_t(int, n, 0);
+	rcu_read_lock();
 	while ((parentid = sch->parent)) {
 		if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
-			return;
+			break;
 
+		if (sch->flags & TCQ_F_NOPARENT)
+			break;
+		/* TODO: perform the search on a per txq basis */
 		sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
 		if (sch == NULL) {
-			WARN_ON(parentid != TC_H_ROOT);
-			return;
+			WARN_ON_ONCE(parentid != TC_H_ROOT);
+			break;
 		}
 		cops = sch->ops->cl_ops;
 		if (cops->qlen_notify) {
@@ -768,6 +776,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
 		sch->q.qlen -= n;
 		__qdisc_qstats_drop(sch, drops);
 	}
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
 
@@ -941,7 +950,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
 		}
 		lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
 		if (!netif_is_multiqueue(dev))
-			sch->flags |= TCQ_F_ONETXQUEUE;
+			sch->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
 	}
 
 	sch->handle = handle;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index cb5d4ad32946c..e82a1ad80aa52 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -737,7 +737,7 @@ static void attach_one_default_qdisc(struct net_device *dev,
 		return;
 	}
 	if (!netif_is_multiqueue(dev))
-		qdisc->flags |= TCQ_F_ONETXQUEUE;
+		qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
 	dev_queue->qdisc_sleeping = qdisc;
 }
 
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index f3cbaecd283af..3e82f047caaf4 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -63,7 +63,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt)
 		if (qdisc == NULL)
 			goto err;
 		priv->qdiscs[ntx] = qdisc;
-		qdisc->flags |= TCQ_F_ONETXQUEUE;
+		qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
 	}
 
 	sch->flags |= TCQ_F_MQROOT;
@@ -156,7 +156,7 @@ static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
 
 	*old = dev_graft_qdisc(dev_queue, new);
 	if (new)
-		new->flags |= TCQ_F_ONETXQUEUE;
+		new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
 	if (dev->flags & IFF_UP)
 		dev_activate(dev);
 	return 0;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 3811a745452cf..ad70ecf57ce79 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -132,7 +132,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
 			goto err;
 		}
 		priv->qdiscs[i] = qdisc;
-		qdisc->flags |= TCQ_F_ONETXQUEUE;
+		qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
 	}
 
 	/* If the mqprio options indicate that hardware should own
@@ -209,7 +209,7 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
 	*old = dev_graft_qdisc(dev_queue, new);
 
 	if (new)
-		new->flags |= TCQ_F_ONETXQUEUE;
+		new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
 
 	if (dev->flags & IFF_UP)
 		dev_activate(dev);

From b855aa43eb1c47d6e10fd90df2f0fb8ab6423c12 Mon Sep 17 00:00:00 2001
From: Robin Ruede <r.ruede@gmail.com>
Date: Wed, 30 Sep 2015 21:23:33 +0200
Subject: [PATCH 174/201] btrfs: fix resending received snapshot with parent

commit b96b1db039ebc584d03a9933b279e0d3e704c528 upstream.

This fixes a regression introduced by 37b8d27d between v4.1 and v4.2.

When a snapshot is received, its received_uuid is set to the original
uuid of the subvolume. When that snapshot is then resent to a third
filesystem, it's received_uuid is set to the second uuid
instead of the original one. The same was true for the parent_uuid.
This behaviour was partially changed in 37b8d27d, but in that patch
only the parent_uuid was taken from the real original,
not the uuid itself, causing the search for the parent to fail in
the case below.

This happens for example when trying to send a series of linked
snapshots (e.g. created by snapper) from the backup file system back
to the original one.

The following commands reproduce the issue in v4.2.1
(no error in 4.1.6)

    # setup three test file systems
    for i in 1 2 3; do
	    truncate -s 50M fs$i
	    mkfs.btrfs fs$i
	    mkdir $i
	    mount fs$i $i
    done
    echo "content" > 1/testfile
    btrfs su snapshot -r 1/ 1/snap1
    echo "changed content" > 1/testfile
    btrfs su snapshot -r 1/ 1/snap2

    # works fine:
    btrfs send 1/snap1 | btrfs receive 2/
    btrfs send -p 1/snap1 1/snap2 | btrfs receive 2/

    # ERROR: could not find parent subvolume
    btrfs send 2/snap1 | btrfs receive 3/
    btrfs send -p 2/snap1 2/snap2 | btrfs receive 3/

Signed-off-by: Robin Ruede <rruede+git@gmail.com>
Fixes: 37b8d27de5d0 ("Btrfs: use received_uuid of parent during send")
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Tested-by: Ed Tomlinson <edt@aei.ca>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/send.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index a739b825bdd36..23bb2e4b911b3 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -2353,8 +2353,14 @@ static int send_subvol_begin(struct send_ctx *sctx)
 	}
 
 	TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen);
-	TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
-			sctx->send_root->root_item.uuid);
+
+	if (!btrfs_is_empty_uuid(sctx->send_root->root_item.received_uuid))
+		TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
+			    sctx->send_root->root_item.received_uuid);
+	else
+		TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID,
+			    sctx->send_root->root_item.uuid);
+
 	TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID,
 		    le64_to_cpu(sctx->send_root->root_item.ctransid));
 	if (parent_root) {

From 8b15aa67a2a49cf332363186f1c8249bdf2059ca Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Tue, 13 Oct 2015 15:15:00 +0100
Subject: [PATCH 175/201] Btrfs: fix file corruption and data loss after
 cloning inline extents

commit 8039d87d9e473aeb740d4fdbd59b9d2f89b2ced9 upstream.

Currently the clone ioctl allows to clone an inline extent from one file
to another that already has other (non-inlined) extents. This is a problem
because btrfs is not designed to deal with files having inline and regular
extents, if a file has an inline extent then it must be the only extent
in the file and must start at file offset 0. Having a file with an inline
extent followed by regular extents results in EIO errors when doing reads
or writes against the first 4K of the file.

Also, the clone ioctl allows one to lose data if the source file consists
of a single inline extent, with a size of N bytes, and the destination
file consists of a single inline extent with a size of M bytes, where we
have M > N. In this case the clone operation removes the inline extent
from the destination file and then copies the inline extent from the
source file into the destination file - we lose the M - N bytes from the
destination file, a read operation will get the value 0x00 for any bytes
in the the range [N, M] (the destination inode's i_size remained as M,
that's why we can read past N bytes).

So fix this by not allowing such destructive operations to happen and
return errno EOPNOTSUPP to user space.

Currently the fstest btrfs/035 tests the data loss case but it totally
ignores this - i.e. expects the operation to succeed and does not check
the we got data loss.

The following test case for fstests exercises all these cases that result
in file corruption and data loss:

  seq=`basename $0`
  seqres=$RESULT_DIR/$seq
  echo "QA output created by $seq"
  tmp=/tmp/$$
  status=1	# failure is the default!
  trap "_cleanup; exit \$status" 0 1 2 3 15

  _cleanup()
  {
      rm -f $tmp.*
  }

  # get standard environment, filters and checks
  . ./common/rc
  . ./common/filter

  # real QA test starts here
  _need_to_be_root
  _supported_fs btrfs
  _supported_os Linux
  _require_scratch
  _require_cloner
  _require_btrfs_fs_feature "no_holes"
  _require_btrfs_mkfs_feature "no-holes"

  rm -f $seqres.full

  test_cloning_inline_extents()
  {
      local mkfs_opts=$1
      local mount_opts=$2

      _scratch_mkfs $mkfs_opts >>$seqres.full 2>&1
      _scratch_mount $mount_opts

      # File bar, the source for all the following clone operations, consists
      # of a single inline extent (50 bytes).
      $XFS_IO_PROG -f -c "pwrite -S 0xbb 0 50" $SCRATCH_MNT/bar \
          | _filter_xfs_io

      # Test cloning into a file with an extent (non-inlined) where the
      # destination offset overlaps that extent. It should not be possible to
      # clone the inline extent from file bar into this file.
      $XFS_IO_PROG -f -c "pwrite -S 0xaa 0K 16K" $SCRATCH_MNT/foo \
          | _filter_xfs_io
      $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo

      # Doing IO against any range in the first 4K of the file should work.
      # Due to a past clone ioctl bug which allowed cloning the inline extent,
      # these operations resulted in EIO errors.
      echo "File foo data after clone operation:"
      # All bytes should have the value 0xaa (clone operation failed and did
      # not modify our file).
      od -t x1 $SCRATCH_MNT/foo
      $XFS_IO_PROG -c "pwrite -S 0xcc 0 100" $SCRATCH_MNT/foo | _filter_xfs_io

      # Test cloning the inline extent against a file which has a hole in its
      # first 4K followed by a non-inlined extent. It should not be possible
      # as well to clone the inline extent from file bar into this file.
      $XFS_IO_PROG -f -c "pwrite -S 0xdd 4K 12K" $SCRATCH_MNT/foo2 \
          | _filter_xfs_io
      $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo2

      # Doing IO against any range in the first 4K of the file should work.
      # Due to a past clone ioctl bug which allowed cloning the inline extent,
      # these operations resulted in EIO errors.
      echo "File foo2 data after clone operation:"
      # All bytes should have the value 0x00 (clone operation failed and did
      # not modify our file).
      od -t x1 $SCRATCH_MNT/foo2
      $XFS_IO_PROG -c "pwrite -S 0xee 0 90" $SCRATCH_MNT/foo2 | _filter_xfs_io

      # Test cloning the inline extent against a file which has a size of zero
      # but has a prealloc extent. It should not be possible as well to clone
      # the inline extent from file bar into this file.
      $XFS_IO_PROG -f -c "falloc -k 0 1M" $SCRATCH_MNT/foo3 | _filter_xfs_io
      $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo3

      # Doing IO against any range in the first 4K of the file should work.
      # Due to a past clone ioctl bug which allowed cloning the inline extent,
      # these operations resulted in EIO errors.
      echo "First 50 bytes of foo3 after clone operation:"
      # Should not be able to read any bytes, file has 0 bytes i_size (the
      # clone operation failed and did not modify our file).
      od -t x1 $SCRATCH_MNT/foo3
      $XFS_IO_PROG -c "pwrite -S 0xff 0 90" $SCRATCH_MNT/foo3 | _filter_xfs_io

      # Test cloning the inline extent against a file which consists of a
      # single inline extent that has a size not greater than the size of
      # bar's inline extent (40 < 50).
      # It should be possible to do the extent cloning from bar to this file.
      $XFS_IO_PROG -f -c "pwrite -S 0x01 0 40" $SCRATCH_MNT/foo4 \
          | _filter_xfs_io
      $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo4

      # Doing IO against any range in the first 4K of the file should work.
      echo "File foo4 data after clone operation:"
      # Must match file bar's content.
      od -t x1 $SCRATCH_MNT/foo4
      $XFS_IO_PROG -c "pwrite -S 0x02 0 90" $SCRATCH_MNT/foo4 | _filter_xfs_io

      # Test cloning the inline extent against a file which consists of a
      # single inline extent that has a size greater than the size of bar's
      # inline extent (60 > 50).
      # It should not be possible to clone the inline extent from file bar
      # into this file.
      $XFS_IO_PROG -f -c "pwrite -S 0x03 0 60" $SCRATCH_MNT/foo5 \
          | _filter_xfs_io
      $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo5

      # Reading the file should not fail.
      echo "File foo5 data after clone operation:"
      # Must have a size of 60 bytes, with all bytes having a value of 0x03
      # (the clone operation failed and did not modify our file).
      od -t x1 $SCRATCH_MNT/foo5

      # Test cloning the inline extent against a file which has no extents but
      # has a size greater than bar's inline extent (16K > 50).
      # It should not be possible to clone the inline extent from file bar
      # into this file.
      $XFS_IO_PROG -f -c "truncate 16K" $SCRATCH_MNT/foo6 | _filter_xfs_io
      $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo6

      # Reading the file should not fail.
      echo "File foo6 data after clone operation:"
      # Must have a size of 16K, with all bytes having a value of 0x00 (the
      # clone operation failed and did not modify our file).
      od -t x1 $SCRATCH_MNT/foo6

      # Test cloning the inline extent against a file which has no extents but
      # has a size not greater than bar's inline extent (30 < 50).
      # It should be possible to clone the inline extent from file bar into
      # this file.
      $XFS_IO_PROG -f -c "truncate 30" $SCRATCH_MNT/foo7 | _filter_xfs_io
      $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo7

      # Reading the file should not fail.
      echo "File foo7 data after clone operation:"
      # Must have a size of 50 bytes, with all bytes having a value of 0xbb.
      od -t x1 $SCRATCH_MNT/foo7

      # Test cloning the inline extent against a file which has a size not
      # greater than the size of bar's inline extent (20 < 50) but has
      # a prealloc extent that goes beyond the file's size. It should not be
      # possible to clone the inline extent from bar into this file.
      $XFS_IO_PROG -f -c "falloc -k 0 1M" \
                      -c "pwrite -S 0x88 0 20" \
                      $SCRATCH_MNT/foo8 | _filter_xfs_io
      $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo8

      echo "File foo8 data after clone operation:"
      # Must have a size of 20 bytes, with all bytes having a value of 0x88
      # (the clone operation did not modify our file).
      od -t x1 $SCRATCH_MNT/foo8

      _scratch_unmount
  }

  echo -e "\nTesting without compression and without the no-holes feature...\n"
  test_cloning_inline_extents

  echo -e "\nTesting with compression and without the no-holes feature...\n"
  test_cloning_inline_extents "" "-o compress"

  echo -e "\nTesting without compression and with the no-holes feature...\n"
  test_cloning_inline_extents "-O no-holes" ""

  echo -e "\nTesting with compression and with the no-holes feature...\n"
  test_cloning_inline_extents "-O no-holes" "-o compress"

  status=0
  exit

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/ioctl.c | 195 ++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 152 insertions(+), 43 deletions(-)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 8d20f3b1cab0a..30dca32804eeb 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3328,6 +3328,150 @@ static void clone_update_extent_map(struct inode *inode,
 			&BTRFS_I(inode)->runtime_flags);
 }
 
+/*
+ * Make sure we do not end up inserting an inline extent into a file that has
+ * already other (non-inline) extents. If a file has an inline extent it can
+ * not have any other extents and the (single) inline extent must start at the
+ * file offset 0. Failing to respect these rules will lead to file corruption,
+ * resulting in EIO errors on read/write operations, hitting BUG_ON's in mm, etc
+ *
+ * We can have extents that have been already written to disk or we can have
+ * dirty ranges still in delalloc, in which case the extent maps and items are
+ * created only when we run delalloc, and the delalloc ranges might fall outside
+ * the range we are currently locking in the inode's io tree. So we check the
+ * inode's i_size because of that (i_size updates are done while holding the
+ * i_mutex, which we are holding here).
+ * We also check to see if the inode has a size not greater than "datal" but has
+ * extents beyond it, due to an fallocate with FALLOC_FL_KEEP_SIZE (and we are
+ * protected against such concurrent fallocate calls by the i_mutex).
+ *
+ * If the file has no extents but a size greater than datal, do not allow the
+ * copy because we would need turn the inline extent into a non-inline one (even
+ * with NO_HOLES enabled). If we find our destination inode only has one inline
+ * extent, just overwrite it with the source inline extent if its size is less
+ * than the source extent's size, or we could copy the source inline extent's
+ * data into the destination inode's inline extent if the later is greater then
+ * the former.
+ */
+static int clone_copy_inline_extent(struct inode *src,
+				    struct inode *dst,
+				    struct btrfs_trans_handle *trans,
+				    struct btrfs_path *path,
+				    struct btrfs_key *new_key,
+				    const u64 drop_start,
+				    const u64 datal,
+				    const u64 skip,
+				    const u64 size,
+				    char *inline_data)
+{
+	struct btrfs_root *root = BTRFS_I(dst)->root;
+	const u64 aligned_end = ALIGN(new_key->offset + datal,
+				      root->sectorsize);
+	int ret;
+	struct btrfs_key key;
+
+	if (new_key->offset > 0)
+		return -EOPNOTSUPP;
+
+	key.objectid = btrfs_ino(dst);
+	key.type = BTRFS_EXTENT_DATA_KEY;
+	key.offset = 0;
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0) {
+		return ret;
+	} else if (ret > 0) {
+		if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
+			ret = btrfs_next_leaf(root, path);
+			if (ret < 0)
+				return ret;
+			else if (ret > 0)
+				goto copy_inline_extent;
+		}
+		btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+		if (key.objectid == btrfs_ino(dst) &&
+		    key.type == BTRFS_EXTENT_DATA_KEY) {
+			ASSERT(key.offset > 0);
+			return -EOPNOTSUPP;
+		}
+	} else if (i_size_read(dst) <= datal) {
+		struct btrfs_file_extent_item *ei;
+		u64 ext_len;
+
+		/*
+		 * If the file size is <= datal, make sure there are no other
+		 * extents following (can happen do to an fallocate call with
+		 * the flag FALLOC_FL_KEEP_SIZE).
+		 */
+		ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
+				    struct btrfs_file_extent_item);
+		/*
+		 * If it's an inline extent, it can not have other extents
+		 * following it.
+		 */
+		if (btrfs_file_extent_type(path->nodes[0], ei) ==
+		    BTRFS_FILE_EXTENT_INLINE)
+			goto copy_inline_extent;
+
+		ext_len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
+		if (ext_len > aligned_end)
+			return -EOPNOTSUPP;
+
+		ret = btrfs_next_item(root, path);
+		if (ret < 0) {
+			return ret;
+		} else if (ret == 0) {
+			btrfs_item_key_to_cpu(path->nodes[0], &key,
+					      path->slots[0]);
+			if (key.objectid == btrfs_ino(dst) &&
+			    key.type == BTRFS_EXTENT_DATA_KEY)
+				return -EOPNOTSUPP;
+		}
+	}
+
+copy_inline_extent:
+	/*
+	 * We have no extent items, or we have an extent at offset 0 which may
+	 * or may not be inlined. All these cases are dealt the same way.
+	 */
+	if (i_size_read(dst) > datal) {
+		/*
+		 * If the destination inode has an inline extent...
+		 * This would require copying the data from the source inline
+		 * extent into the beginning of the destination's inline extent.
+		 * But this is really complex, both extents can be compressed
+		 * or just one of them, which would require decompressing and
+		 * re-compressing data (which could increase the new compressed
+		 * size, not allowing the compressed data to fit anymore in an
+		 * inline extent).
+		 * So just don't support this case for now (it should be rare,
+		 * we are not really saving space when cloning inline extents).
+		 */
+		return -EOPNOTSUPP;
+	}
+
+	btrfs_release_path(path);
+	ret = btrfs_drop_extents(trans, root, dst, drop_start, aligned_end, 1);
+	if (ret)
+		return ret;
+	ret = btrfs_insert_empty_item(trans, root, path, new_key, size);
+	if (ret)
+		return ret;
+
+	if (skip) {
+		const u32 start = btrfs_file_extent_calc_inline_size(0);
+
+		memmove(inline_data + start, inline_data + start + skip, datal);
+	}
+
+	write_extent_buffer(path->nodes[0], inline_data,
+			    btrfs_item_ptr_offset(path->nodes[0],
+						  path->slots[0]),
+			    size);
+	inode_add_bytes(dst, datal);
+
+	return 0;
+}
+
 /**
  * btrfs_clone() - clone a range from inode file to another
  *
@@ -3594,21 +3738,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
 			} else if (type == BTRFS_FILE_EXTENT_INLINE) {
 				u64 skip = 0;
 				u64 trim = 0;
-				u64 aligned_end = 0;
-
-				/*
-				 * Don't copy an inline extent into an offset
-				 * greater than zero. Having an inline extent
-				 * at such an offset results in chaos as btrfs
-				 * isn't prepared for such cases. Just skip
-				 * this case for the same reasons as commented
-				 * at btrfs_ioctl_clone().
-				 */
-				if (last_dest_end > 0) {
-					ret = -EOPNOTSUPP;
-					btrfs_end_transaction(trans, root);
-					goto out;
-				}
 
 				if (off > key.offset) {
 					skip = off - key.offset;
@@ -3626,42 +3755,22 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
 				size -= skip + trim;
 				datal -= skip + trim;
 
-				aligned_end = ALIGN(new_key.offset + datal,
-						    root->sectorsize);
-				ret = btrfs_drop_extents(trans, root, inode,
-							 drop_start,
-							 aligned_end,
-							 1);
+				ret = clone_copy_inline_extent(src, inode,
+							       trans, path,
+							       &new_key,
+							       drop_start,
+							       datal,
+							       skip, size, buf);
 				if (ret) {
 					if (ret != -EOPNOTSUPP)
 						btrfs_abort_transaction(trans,
-							root, ret);
-					btrfs_end_transaction(trans, root);
-					goto out;
-				}
-
-				ret = btrfs_insert_empty_item(trans, root, path,
-							      &new_key, size);
-				if (ret) {
-					btrfs_abort_transaction(trans, root,
-								ret);
+									root,
+									ret);
 					btrfs_end_transaction(trans, root);
 					goto out;
 				}
-
-				if (skip) {
-					u32 start =
-					  btrfs_file_extent_calc_inline_size(0);
-					memmove(buf+start, buf+start+skip,
-						datal);
-				}
-
 				leaf = path->nodes[0];
 				slot = path->slots[0];
-				write_extent_buffer(leaf, buf,
-					    btrfs_item_ptr_offset(leaf, slot),
-					    size);
-				inode_add_bytes(inode, datal);
 			}
 
 			/* If we have an implicit hole (NO_HOLES feature). */

From 90291b48b1d907425d8741861fff1dfe4cf7156f Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Fri, 16 Oct 2015 12:34:25 +0100
Subject: [PATCH 176/201] Btrfs: fix truncation of compressed and inlined
 extents

commit 0305cd5f7fca85dae392b9ba85b116896eb7c1c7 upstream.

When truncating a file to a smaller size which consists of an inline
extent that is compressed, we did not discard (or made unusable) the
data between the new file size and the old file size, wasting metadata
space and allowing for the truncated data to be leaked and the data
corruption/loss mentioned below.
We were also not correctly decrementing the number of bytes used by the
inode, we were setting it to zero, giving a wrong report for callers of
the stat(2) syscall. The fsck tool also reported an error about a mismatch
between the nbytes of the file versus the real space used by the file.

Now because we weren't discarding the truncated region of the file, it
was possible for a caller of the clone ioctl to actually read the data
that was truncated, allowing for a security breach without requiring root
access to the system, using only standard filesystem operations. The
scenario is the following:

   1) User A creates a file which consists of an inline and compressed
      extent with a size of 2000 bytes - the file is not accessible to
      any other users (no read, write or execution permission for anyone
      else);

   2) The user truncates the file to a size of 1000 bytes;

   3) User A makes the file world readable;

   4) User B creates a file consisting of an inline extent of 2000 bytes;

   5) User B issues a clone operation from user A's file into its own
      file (using a length argument of 0, clone the whole range);

   6) User B now gets to see the 1000 bytes that user A truncated from
      its file before it made its file world readbale. User B also lost
      the bytes in the range [1000, 2000[ bytes from its own file, but
      that might be ok if his/her intention was reading stale data from
      user A that was never supposed to be public.

Note that this contrasts with the case where we truncate a file from 2000
bytes to 1000 bytes and then truncate it back from 1000 to 2000 bytes. In
this case reading any byte from the range [1000, 2000[ will return a value
of 0x00, instead of the original data.

This problem exists since the clone ioctl was added and happens both with
and without my recent data loss and file corruption fixes for the clone
ioctl (patch "Btrfs: fix file corruption and data loss after cloning
inline extents").

So fix this by truncating the compressed inline extents as we do for the
non-compressed case, which involves decompressing, if the data isn't already
in the page cache, compressing the truncated version of the extent, writing
the compressed content into the inline extent and then truncate it.

The following test case for fstests reproduces the problem. In order for
the test to pass both this fix and my previous fix for the clone ioctl
that forbids cloning a smaller inline extent into a larger one,
which is titled "Btrfs: fix file corruption and data loss after cloning
inline extents", are needed. Without that other fix the test fails in a
different way that does not leak the truncated data, instead part of
destination file gets replaced with zeroes (because the destination file
has a larger inline extent than the source).

  seq=`basename $0`
  seqres=$RESULT_DIR/$seq
  echo "QA output created by $seq"
  tmp=/tmp/$$
  status=1	# failure is the default!
  trap "_cleanup; exit \$status" 0 1 2 3 15

  _cleanup()
  {
      rm -f $tmp.*
  }

  # get standard environment, filters and checks
  . ./common/rc
  . ./common/filter

  # real QA test starts here
  _need_to_be_root
  _supported_fs btrfs
  _supported_os Linux
  _require_scratch
  _require_cloner

  rm -f $seqres.full

  _scratch_mkfs >>$seqres.full 2>&1
  _scratch_mount "-o compress"

  # Create our test files. File foo is going to be the source of a clone operation
  # and consists of a single inline extent with an uncompressed size of 512 bytes,
  # while file bar consists of a single inline extent with an uncompressed size of
  # 256 bytes. For our test's purpose, it's important that file bar has an inline
  # extent with a size smaller than foo's inline extent.
  $XFS_IO_PROG -f -c "pwrite -S 0xa1 0 128"   \
          -c "pwrite -S 0x2a 128 384" \
          $SCRATCH_MNT/foo | _filter_xfs_io
  $XFS_IO_PROG -f -c "pwrite -S 0xbb 0 256" $SCRATCH_MNT/bar | _filter_xfs_io

  # Now durably persist all metadata and data. We do this to make sure that we get
  # on disk an inline extent with a size of 512 bytes for file foo.
  sync

  # Now truncate our file foo to a smaller size. Because it consists of a
  # compressed and inline extent, btrfs did not shrink the inline extent to the
  # new size (if the extent was not compressed, btrfs would shrink it to 128
  # bytes), it only updates the inode's i_size to 128 bytes.
  $XFS_IO_PROG -c "truncate 128" $SCRATCH_MNT/foo

  # Now clone foo's inline extent into bar.
  # This clone operation should fail with errno EOPNOTSUPP because the source
  # file consists only of an inline extent and the file's size is smaller than
  # the inline extent of the destination (128 bytes < 256 bytes). However the
  # clone ioctl was not prepared to deal with a file that has a size smaller
  # than the size of its inline extent (something that happens only for compressed
  # inline extents), resulting in copying the full inline extent from the source
  # file into the destination file.
  #
  # Note that btrfs' clone operation for inline extents consists of removing the
  # inline extent from the destination inode and copy the inline extent from the
  # source inode into the destination inode, meaning that if the destination
  # inode's inline extent is larger (N bytes) than the source inode's inline
  # extent (M bytes), some bytes (N - M bytes) will be lost from the destination
  # file. Btrfs could copy the source inline extent's data into the destination's
  # inline extent so that we would not lose any data, but that's currently not
  # done due to the complexity that would be needed to deal with such cases
  # (specially when one or both extents are compressed), returning EOPNOTSUPP, as
  # it's normally not a very common case to clone very small files (only case
  # where we get inline extents) and copying inline extents does not save any
  # space (unlike for normal, non-inlined extents).
  $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/foo $SCRATCH_MNT/bar

  # Now because the above clone operation used to succeed, and due to foo's inline
  # extent not being shinked by the truncate operation, our file bar got the whole
  # inline extent copied from foo, making us lose the last 128 bytes from bar
  # which got replaced by the bytes in range [128, 256[ from foo before foo was
  # truncated - in other words, data loss from bar and being able to read old and
  # stale data from foo that should not be possible to read anymore through normal
  # filesystem operations. Contrast with the case where we truncate a file from a
  # size N to a smaller size M, truncate it back to size N and then read the range
  # [M, N[, we should always get the value 0x00 for all the bytes in that range.

  # We expected the clone operation to fail with errno EOPNOTSUPP and therefore
  # not modify our file's bar data/metadata. So its content should be 256 bytes
  # long with all bytes having the value 0xbb.
  #
  # Without the btrfs bug fix, the clone operation succeeded and resulted in
  # leaking truncated data from foo, the bytes that belonged to its range
  # [128, 256[, and losing data from bar in that same range. So reading the
  # file gave us the following content:
  #
  # 0000000 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1
  # *
  # 0000200 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a
  # *
  # 0000400
  echo "File bar's content after the clone operation:"
  od -t x1 $SCRATCH_MNT/bar

  # Also because the foo's inline extent was not shrunk by the truncate
  # operation, btrfs' fsck, which is run by the fstests framework everytime a
  # test completes, failed reporting the following error:
  #
  #  root 5 inode 257 errors 400, nbytes wrong

  status=0
  exit

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/inode.c | 82 +++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 68 insertions(+), 14 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 611b66d73e80b..1141090423e7e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4217,6 +4217,47 @@ static int truncate_space_check(struct btrfs_trans_handle *trans,
 
 }
 
+static int truncate_inline_extent(struct inode *inode,
+				  struct btrfs_path *path,
+				  struct btrfs_key *found_key,
+				  const u64 item_end,
+				  const u64 new_size)
+{
+	struct extent_buffer *leaf = path->nodes[0];
+	int slot = path->slots[0];
+	struct btrfs_file_extent_item *fi;
+	u32 size = (u32)(new_size - found_key->offset);
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+
+	fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+
+	if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) {
+		loff_t offset = new_size;
+		loff_t page_end = ALIGN(offset, PAGE_CACHE_SIZE);
+
+		/*
+		 * Zero out the remaining of the last page of our inline extent,
+		 * instead of directly truncating our inline extent here - that
+		 * would be much more complex (decompressing all the data, then
+		 * compressing the truncated data, which might be bigger than
+		 * the size of the inline extent, resize the extent, etc).
+		 * We release the path because to get the page we might need to
+		 * read the extent item from disk (data not in the page cache).
+		 */
+		btrfs_release_path(path);
+		return btrfs_truncate_page(inode, offset, page_end - offset, 0);
+	}
+
+	btrfs_set_file_extent_ram_bytes(leaf, fi, size);
+	size = btrfs_file_extent_calc_inline_size(size);
+	btrfs_truncate_item(root, path, size, 1);
+
+	if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
+		inode_sub_bytes(inode, item_end + 1 - new_size);
+
+	return 0;
+}
+
 /*
  * this can truncate away extent items, csum items and directory items.
  * It starts at a high offset and removes keys until it can't find
@@ -4411,27 +4452,40 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 			 * special encodings
 			 */
 			if (!del_item &&
-			    btrfs_file_extent_compression(leaf, fi) == 0 &&
 			    btrfs_file_extent_encryption(leaf, fi) == 0 &&
 			    btrfs_file_extent_other_encoding(leaf, fi) == 0) {
-				u32 size = new_size - found_key.offset;
-
-				if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
-					inode_sub_bytes(inode, item_end + 1 -
-							new_size);
 
 				/*
-				 * update the ram bytes to properly reflect
-				 * the new size of our item
+				 * Need to release path in order to truncate a
+				 * compressed extent. So delete any accumulated
+				 * extent items so far.
 				 */
-				btrfs_set_file_extent_ram_bytes(leaf, fi, size);
-				size =
-				    btrfs_file_extent_calc_inline_size(size);
-				btrfs_truncate_item(root, path, size, 1);
+				if (btrfs_file_extent_compression(leaf, fi) !=
+				    BTRFS_COMPRESS_NONE && pending_del_nr) {
+					err = btrfs_del_items(trans, root, path,
+							      pending_del_slot,
+							      pending_del_nr);
+					if (err) {
+						btrfs_abort_transaction(trans,
+									root,
+									err);
+						goto error;
+					}
+					pending_del_nr = 0;
+				}
+
+				err = truncate_inline_extent(inode, path,
+							     &found_key,
+							     item_end,
+							     new_size);
+				if (err) {
+					btrfs_abort_transaction(trans,
+								root, err);
+					goto error;
+				}
 			} else if (test_bit(BTRFS_ROOT_REF_COWS,
 					    &root->state)) {
-				inode_sub_bytes(inode, item_end + 1 -
-						found_key.offset);
+				inode_sub_bytes(inode, item_end + 1 - new_size);
 			}
 		}
 delete:

From fcb184a168452e315676f306b054c908e8ff94ab Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Thu, 22 Oct 2015 09:47:34 +0100
Subject: [PATCH 177/201] Btrfs: fix regression when running delayed references
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 2c3cf7d5f6105bb957df125dfce61d4483b8742d upstream.

In the kernel 4.2 merge window we had a refactoring/rework of the delayed
references implementation in order to fix certain problems with qgroups.
However that rework introduced one more regression that leads to the
following trace when running delayed references for metadata:

[35908.064664] kernel BUG at fs/btrfs/extent-tree.c:1832!
[35908.065201] invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC
[35908.065201] Modules linked in: dm_flakey dm_mod btrfs crc32c_generic xor raid6_pq nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscache sunrpc loop fuse parport_pc psmouse i2
[35908.065201] CPU: 14 PID: 15014 Comm: kworker/u32:9 Tainted: G        W       4.3.0-rc5-btrfs-next-17+ #1
[35908.065201] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org 04/01/2014
[35908.065201] Workqueue: btrfs-extent-refs btrfs_extent_refs_helper [btrfs]
[35908.065201] task: ffff880114b7d780 ti: ffff88010c4c8000 task.ti: ffff88010c4c8000
[35908.065201] RIP: 0010:[<ffffffffa04928b5>]  [<ffffffffa04928b5>] insert_inline_extent_backref+0x52/0xb1 [btrfs]
[35908.065201] RSP: 0018:ffff88010c4cbb08  EFLAGS: 00010293
[35908.065201] RAX: 0000000000000000 RBX: ffff88008a661000 RCX: 0000000000000000
[35908.065201] RDX: ffffffffa04dd58f RSI: 0000000000000001 RDI: 0000000000000000
[35908.065201] RBP: ffff88010c4cbb40 R08: 0000000000001000 R09: ffff88010c4cb9f8
[35908.065201] R10: 0000000000000000 R11: 000000000000002c R12: 0000000000000000
[35908.065201] R13: ffff88020a74c578 R14: 0000000000000000 R15: 0000000000000000
[35908.065201] FS:  0000000000000000(0000) GS:ffff88023edc0000(0000) knlGS:0000000000000000
[35908.065201] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[35908.065201] CR2: 00000000015e8708 CR3: 0000000102185000 CR4: 00000000000006e0
[35908.065201] Stack:
[35908.065201]  ffff88010c4cbb18 0000000000000f37 ffff88020a74c578 ffff88015a408000
[35908.065201]  ffff880154a44000 0000000000000000 0000000000000005 ffff88010c4cbbd8
[35908.065201]  ffffffffa0492b9a 0000000000000005 0000000000000000 0000000000000000
[35908.065201] Call Trace:
[35908.065201]  [<ffffffffa0492b9a>] __btrfs_inc_extent_ref+0x8b/0x208 [btrfs]
[35908.065201]  [<ffffffffa0497117>] ? __btrfs_run_delayed_refs+0x4d4/0xd33 [btrfs]
[35908.065201]  [<ffffffffa049773d>] __btrfs_run_delayed_refs+0xafa/0xd33 [btrfs]
[35908.065201]  [<ffffffffa04a976a>] ? join_transaction.isra.10+0x25/0x41f [btrfs]
[35908.065201]  [<ffffffffa04a97ed>] ? join_transaction.isra.10+0xa8/0x41f [btrfs]
[35908.065201]  [<ffffffffa049914d>] btrfs_run_delayed_refs+0x75/0x1dd [btrfs]
[35908.065201]  [<ffffffffa04992f1>] delayed_ref_async_start+0x3c/0x7b [btrfs]
[35908.065201]  [<ffffffffa04d4b4f>] normal_work_helper+0x14c/0x32a [btrfs]
[35908.065201]  [<ffffffffa04d4e93>] btrfs_extent_refs_helper+0x12/0x14 [btrfs]
[35908.065201]  [<ffffffff81063b23>] process_one_work+0x24a/0x4ac
[35908.065201]  [<ffffffff81064285>] worker_thread+0x206/0x2c2
[35908.065201]  [<ffffffff8106407f>] ? rescuer_thread+0x2cb/0x2cb
[35908.065201]  [<ffffffff8106407f>] ? rescuer_thread+0x2cb/0x2cb
[35908.065201]  [<ffffffff8106904d>] kthread+0xef/0xf7
[35908.065201]  [<ffffffff81068f5e>] ? kthread_parkme+0x24/0x24
[35908.065201]  [<ffffffff8147d10f>] ret_from_fork+0x3f/0x70
[35908.065201]  [<ffffffff81068f5e>] ? kthread_parkme+0x24/0x24
[35908.065201] Code: 6a 01 41 56 41 54 ff 75 10 41 51 4d 89 c1 49 89 c8 48 8d 4d d0 e8 f6 f1 ff ff 48 83 c4 28 85 c0 75 2c 49 81 fc ff 00 00 00 77 02 <0f> 0b 4c 8b 45 30 8b 4d 28 45 31
[35908.065201] RIP  [<ffffffffa04928b5>] insert_inline_extent_backref+0x52/0xb1 [btrfs]
[35908.065201]  RSP <ffff88010c4cbb08>
[35908.310885] ---[ end trace fe4299baf0666457 ]---

This happens because the new delayed references code no longer merges
delayed references that have different sequence values. The following
steps are an example sequence leading to this issue:

1) Transaction N starts, fs_info->tree_mod_seq has value 0;

2) Extent buffer (btree node) A is allocated, delayed reference Ref1 for
   bytenr A is created, with a value of 1 and a seq value of 0;

3) fs_info->tree_mod_seq is incremented to 1;

4) Extent buffer A is deleted through btrfs_del_items(), which calls
   btrfs_del_leaf(), which in turn calls btrfs_free_tree_block(). The
   later returns the metadata extent associated to extent buffer A to
   the free space cache (the range is not pinned), because the extent
   buffer was created in the current transaction (N) and writeback never
   happened for the extent buffer (flag BTRFS_HEADER_FLAG_WRITTEN not set
   in the extent buffer).
   This creates the delayed reference Ref2 for bytenr A, with a value
   of -1 and a seq value of 1;

5) Delayed reference Ref2 is not merged with Ref1 when we create it,
   because they have different sequence numbers (decided at
   add_delayed_ref_tail_merge());

6) fs_info->tree_mod_seq is incremented to 2;

7) Some task attempts to allocate a new extent buffer (done at
   extent-tree.c:find_free_extent()), but due to heavy fragmentation
   and running low on metadata space the clustered allocation fails
   and we fall back to unclustered allocation, which finds the
   extent at offset A, so a new extent buffer at offset A is allocated.
   This creates delayed reference Ref3 for bytenr A, with a value of 1
   and a seq value of 2;

8) Ref3 is not merged neither with Ref2 nor Ref1, again because they
   all have different seq values;

9) We start running the delayed references (__btrfs_run_delayed_refs());

10) The delayed Ref1 is the first one being applied, which ends up
    creating an inline extent backref in the extent tree;

10) Next the delayed reference Ref3 is selected for execution, and not
    Ref2, because select_delayed_ref() always gives a preference for
    positive references (that have an action of BTRFS_ADD_DELAYED_REF);

11) When running Ref3 we encounter alreay the inline extent backref
    in the extent tree at insert_inline_extent_backref(), which makes
    us hit the following BUG_ON:

        BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID);

    This is always true because owner corresponds to the level of the
    extent buffer/btree node in the btree.

For the scenario described above we hit the BUG_ON because we never merge
references that have different seq values.

We used to do the merging before the 4.2 kernel, more specifically, before
the commmits:

  c6fc24549960 ("btrfs: delayed-ref: Use list to replace the ref_root in ref_head.")
  c43d160fcd5e ("btrfs: delayed-ref: Cleanup the unneeded functions.")

This issue became more exposed after the following change that was added
to 4.2 as well:

  cffc3374e567 ("Btrfs: fix order by which delayed references are run")

Which in turn fixed another regression by the two commits previously
mentioned.

So fix this by bringing back the delayed reference merge code, with the
proper adaptations so that it operates against the new data structure
(linked list vs old red black tree implementation).

This issue was hit running fstest btrfs/063 in a loop. Several people have
reported this issue in the mailing list when running on kernels 4.2+.

Very special thanks to Stéphane Lesimple for helping debugging this issue
and testing this fix on his multi terabyte filesystem (which took more
than one day to balance alone, plus fsck, etc).

Fixes: c6fc24549960 ("btrfs: delayed-ref: Use list to replace the ref_root in ref_head.")
Reported-by: Peter Becker <floyd.net@gmail.com>
Reported-by: Stéphane Lesimple <stephane_btrfs@lesimple.fr>
Tested-by: Stéphane Lesimple <stephane_btrfs@lesimple.fr>
Reported-by: Malte Schröder <malte@tnxip.de>
Reported-by: Derek Dongray <derek@valedon.co.uk>
Reported-by: Erkki Seppala <flux-btrfs@inside.org>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/delayed-ref.c | 113 +++++++++++++++++++++++++++++++++++++++++
 fs/btrfs/extent-tree.c |  14 +++++
 2 files changed, 127 insertions(+)

diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index ac3e81da6d4ed..4832943aaa5cd 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -197,6 +197,119 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
 		trans->delayed_ref_updates--;
 }
 
+static bool merge_ref(struct btrfs_trans_handle *trans,
+		      struct btrfs_delayed_ref_root *delayed_refs,
+		      struct btrfs_delayed_ref_head *head,
+		      struct btrfs_delayed_ref_node *ref,
+		      u64 seq)
+{
+	struct btrfs_delayed_ref_node *next;
+	bool done = false;
+
+	next = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
+				list);
+	while (!done && &next->list != &head->ref_list) {
+		int mod;
+		struct btrfs_delayed_ref_node *next2;
+
+		next2 = list_next_entry(next, list);
+
+		if (next == ref)
+			goto next;
+
+		if (seq && next->seq >= seq)
+			goto next;
+
+		if (next->type != ref->type || next->no_quota != ref->no_quota)
+			goto next;
+
+		if ((ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
+		     ref->type == BTRFS_SHARED_BLOCK_REF_KEY) &&
+		    comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref),
+				   btrfs_delayed_node_to_tree_ref(next),
+				   ref->type))
+			goto next;
+		if ((ref->type == BTRFS_EXTENT_DATA_REF_KEY ||
+		     ref->type == BTRFS_SHARED_DATA_REF_KEY) &&
+		    comp_data_refs(btrfs_delayed_node_to_data_ref(ref),
+				   btrfs_delayed_node_to_data_ref(next)))
+			goto next;
+
+		if (ref->action == next->action) {
+			mod = next->ref_mod;
+		} else {
+			if (ref->ref_mod < next->ref_mod) {
+				swap(ref, next);
+				done = true;
+			}
+			mod = -next->ref_mod;
+		}
+
+		drop_delayed_ref(trans, delayed_refs, head, next);
+		ref->ref_mod += mod;
+		if (ref->ref_mod == 0) {
+			drop_delayed_ref(trans, delayed_refs, head, ref);
+			done = true;
+		} else {
+			/*
+			 * Can't have multiples of the same ref on a tree block.
+			 */
+			WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
+				ref->type == BTRFS_SHARED_BLOCK_REF_KEY);
+		}
+next:
+		next = next2;
+	}
+
+	return done;
+}
+
+void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans,
+			      struct btrfs_fs_info *fs_info,
+			      struct btrfs_delayed_ref_root *delayed_refs,
+			      struct btrfs_delayed_ref_head *head)
+{
+	struct btrfs_delayed_ref_node *ref;
+	u64 seq = 0;
+
+	assert_spin_locked(&head->lock);
+
+	if (list_empty(&head->ref_list))
+		return;
+
+	/* We don't have too many refs to merge for data. */
+	if (head->is_data)
+		return;
+
+	spin_lock(&fs_info->tree_mod_seq_lock);
+	if (!list_empty(&fs_info->tree_mod_seq_list)) {
+		struct seq_list *elem;
+
+		elem = list_first_entry(&fs_info->tree_mod_seq_list,
+					struct seq_list, list);
+		seq = elem->seq;
+	}
+	spin_unlock(&fs_info->tree_mod_seq_lock);
+
+	ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node,
+			       list);
+	while (&ref->list != &head->ref_list) {
+		if (seq && ref->seq >= seq)
+			goto next;
+
+		if (merge_ref(trans, delayed_refs, head, ref, seq)) {
+			if (list_empty(&head->ref_list))
+				break;
+			ref = list_first_entry(&head->ref_list,
+					       struct btrfs_delayed_ref_node,
+					       list);
+			continue;
+		}
+next:
+		ref = list_next_entry(ref, list);
+	}
+}
+
 int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
 			    struct btrfs_delayed_ref_root *delayed_refs,
 			    u64 seq)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 601d7d45d164a..00158bfd1771e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2433,7 +2433,21 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
 			}
 		}
 
+		/*
+		 * We need to try and merge add/drops of the same ref since we
+		 * can run into issues with relocate dropping the implicit ref
+		 * and then it being added back again before the drop can
+		 * finish.  If we merged anything we need to re-loop so we can
+		 * get a good ref.
+		 * Or we can get node references of the same type that weren't
+		 * merged when created due to bumps in the tree mod seq, and
+		 * we need to merge them to prevent adding an inline extent
+		 * backref before dropping it (triggering a BUG_ON at
+		 * insert_inline_extent_backref()).
+		 */
 		spin_lock(&locked_ref->lock);
+		btrfs_merge_delayed_refs(trans, fs_info, delayed_refs,
+					 locked_ref);
 
 		/*
 		 * locked_ref is the head node, so we have to go one

From 280bd68e66a0fe027ee186a01a9edd869700a295 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Fri, 6 Nov 2015 13:33:33 +0000
Subject: [PATCH 178/201] Btrfs: fix race leading to incorrect item deletion
 when dropping extents

commit aeafbf8486c9e2bd53f5cc3c10c0b7fd7149d69c upstream.

While running a stress test I got the following warning triggered:

  [191627.672810] ------------[ cut here ]------------
  [191627.673949] WARNING: CPU: 8 PID: 8447 at fs/btrfs/file.c:779 __btrfs_drop_extents+0x391/0xa50 [btrfs]()
  (...)
  [191627.701485] Call Trace:
  [191627.702037]  [<ffffffff8145f077>] dump_stack+0x4f/0x7b
  [191627.702992]  [<ffffffff81095de5>] ? console_unlock+0x356/0x3a2
  [191627.704091]  [<ffffffff8104b3b0>] warn_slowpath_common+0xa1/0xbb
  [191627.705380]  [<ffffffffa0664499>] ? __btrfs_drop_extents+0x391/0xa50 [btrfs]
  [191627.706637]  [<ffffffff8104b46d>] warn_slowpath_null+0x1a/0x1c
  [191627.707789]  [<ffffffffa0664499>] __btrfs_drop_extents+0x391/0xa50 [btrfs]
  [191627.709155]  [<ffffffff8115663c>] ? cache_alloc_debugcheck_after.isra.32+0x171/0x1d0
  [191627.712444]  [<ffffffff81155007>] ? kmemleak_alloc_recursive.constprop.40+0x16/0x18
  [191627.714162]  [<ffffffffa06570c9>] insert_reserved_file_extent.constprop.40+0x83/0x24e [btrfs]
  [191627.715887]  [<ffffffffa065422b>] ? start_transaction+0x3bb/0x610 [btrfs]
  [191627.717287]  [<ffffffffa065b604>] btrfs_finish_ordered_io+0x273/0x4e2 [btrfs]
  [191627.728865]  [<ffffffffa065b888>] finish_ordered_fn+0x15/0x17 [btrfs]
  [191627.730045]  [<ffffffffa067d688>] normal_work_helper+0x14c/0x32c [btrfs]
  [191627.731256]  [<ffffffffa067d96a>] btrfs_endio_write_helper+0x12/0x14 [btrfs]
  [191627.732661]  [<ffffffff81061119>] process_one_work+0x24c/0x4ae
  [191627.733822]  [<ffffffff810615b0>] worker_thread+0x206/0x2c2
  [191627.734857]  [<ffffffff810613aa>] ? process_scheduled_works+0x2f/0x2f
  [191627.736052]  [<ffffffff810613aa>] ? process_scheduled_works+0x2f/0x2f
  [191627.737349]  [<ffffffff810669a6>] kthread+0xef/0xf7
  [191627.738267]  [<ffffffff810f3b3a>] ? time_hardirqs_on+0x15/0x28
  [191627.739330]  [<ffffffff810668b7>] ? __kthread_parkme+0xad/0xad
  [191627.741976]  [<ffffffff81465592>] ret_from_fork+0x42/0x70
  [191627.743080]  [<ffffffff810668b7>] ? __kthread_parkme+0xad/0xad
  [191627.744206] ---[ end trace bbfddacb7aaada8d ]---

  $ cat -n fs/btrfs/file.c
  691  int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
  (...)
  758                  btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
  759                  if (key.objectid > ino ||
  760                      key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
  761                          break;
  762
  763                  fi = btrfs_item_ptr(leaf, path->slots[0],
  764                                      struct btrfs_file_extent_item);
  765                  extent_type = btrfs_file_extent_type(leaf, fi);
  766
  767                  if (extent_type == BTRFS_FILE_EXTENT_REG ||
  768                      extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
  (...)
  774                  } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
  (...)
  778                  } else {
  779                          WARN_ON(1);
  780                          extent_end = search_start;
  781                  }
  (...)

This happened because the item we were processing did not match a file
extent item (its key type != BTRFS_EXTENT_DATA_KEY), and even on this
case we cast the item to a struct btrfs_file_extent_item pointer and
then find a type field value that does not match any of the expected
values (BTRFS_FILE_EXTENT_[REG|PREALLOC|INLINE]). This scenario happens
due to a tiny time window where a race can happen as exemplified below.
For example, consider the following scenario where we're using the
NO_HOLES feature and we have the following two neighbour leafs:

               Leaf X (has N items)                    Leaf Y

[ ... (257 INODE_ITEM 0) (257 INODE_REF 256) ]  [ (257 EXTENT_DATA 8192), ... ]
          slot N - 2         slot N - 1              slot 0

Our inode 257 has an implicit hole in the range [0, 8K[ (implicit rather
than explicit because NO_HOLES is enabled). Now if our inode has an
ordered extent for the range [4K, 8K[ that is finishing, the following
can happen:

          CPU 1                                       CPU 2

  btrfs_finish_ordered_io()
    insert_reserved_file_extent()
      __btrfs_drop_extents()
         Searches for the key
          (257 EXTENT_DATA 4096) through
          btrfs_lookup_file_extent()

         Key not found and we get a path where
         path->nodes[0] == leaf X and
         path->slots[0] == N

         Because path->slots[0] is >=
         btrfs_header_nritems(leaf X), we call
         btrfs_next_leaf()

         btrfs_next_leaf() releases the path

                                                  inserts key
                                                  (257 INODE_REF 4096)
                                                  at the end of leaf X,
                                                  leaf X now has N + 1 keys,
                                                  and the new key is at
                                                  slot N

         btrfs_next_leaf() searches for
         key (257 INODE_REF 256), with
         path->keep_locks set to 1,
         because it was the last key it
         saw in leaf X

           finds it in leaf X again and
           notices it's no longer the last
           key of the leaf, so it returns 0
           with path->nodes[0] == leaf X and
           path->slots[0] == N (which is now
           < btrfs_header_nritems(leaf X)),
           pointing to the new key
           (257 INODE_REF 4096)

         __btrfs_drop_extents() casts the
         item at path->nodes[0], slot
         path->slots[0], to a struct
         btrfs_file_extent_item - it does
         not skip keys for the target
         inode with a type less than
         BTRFS_EXTENT_DATA_KEY
         (BTRFS_INODE_REF_KEY < BTRFS_EXTENT_DATA_KEY)

         sees a bogus value for the type
         field triggering the WARN_ON in
         the trace shown above, and sets
         extent_end = search_start (4096)

         does the if-then-else logic to
         fixup 0 length extent items created
         by a past bug from hole punching:

           if (extent_end == key.offset &&
               extent_end >= search_start)
               goto delete_extent_item;

         that evaluates to true and it ends
         up deleting the key pointed to by
         path->slots[0], (257 INODE_REF 4096),
         from leaf X

The same could happen for example for a xattr that ends up having a key
with an offset value that matches search_start (very unlikely but not
impossible).

So fix this by ensuring that keys smaller than BTRFS_EXTENT_DATA_KEY are
skipped, never casted to struct btrfs_file_extent_item and never deleted
by accident. Also protect against the unexpected case of getting a key
for a lower inode number by skipping that key and issuing a warning.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/file.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 8c6f247ba81d4..10b1471e17905 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -756,8 +756,16 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
 		}
 
 		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
-		if (key.objectid > ino ||
-		    key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
+
+		if (key.objectid > ino)
+			break;
+		if (WARN_ON_ONCE(key.objectid < ino) ||
+		    key.type < BTRFS_EXTENT_DATA_KEY) {
+			ASSERT(del_nr == 0);
+			path->slots[0]++;
+			goto next_slot;
+		}
+		if (key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
 			break;
 
 		fi = btrfs_item_ptr(leaf, path->slots[0],
@@ -776,8 +784,8 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
 				btrfs_file_extent_inline_len(leaf,
 						     path->slots[0], fi);
 		} else {
-			WARN_ON(1);
-			extent_end = search_start;
+			/* can't happen */
+			BUG();
 		}
 
 		/*

From 42c6cb15b97363f55f34f4845fe2c1503a9558a4 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 9 Nov 2015 00:33:58 +0000
Subject: [PATCH 179/201] Btrfs: fix race leading to BUG_ON when running
 delalloc for nodatacow

commit 1d512cb77bdbda80f0dd0620a3b260d697fd581d upstream.

If we are using the NO_HOLES feature, we have a tiny time window when
running delalloc for a nodatacow inode where we can race with a concurrent
link or xattr add operation leading to a BUG_ON.

This happens because at run_delalloc_nocow() we end up casting a leaf item
of type BTRFS_INODE_[REF|EXTREF]_KEY or of type BTRFS_XATTR_ITEM_KEY to a
file extent item (struct btrfs_file_extent_item) and then analyse its
extent type field, which won't match any of the expected extent types
(values BTRFS_FILE_EXTENT_[REG|PREALLOC|INLINE]) and therefore trigger an
explicit BUG_ON(1).

The following sequence diagram shows how the race happens when running a
no-cow dellaloc range [4K, 8K[ for inode 257 and we have the following
neighbour leafs:

             Leaf X (has N items)                    Leaf Y

 [ ... (257 INODE_ITEM 0) (257 INODE_REF 256) ]  [ (257 EXTENT_DATA 8192), ... ]
              slot N - 2         slot N - 1              slot 0

 (Note the implicit hole for inode 257 regarding the [0, 8K[ range)

       CPU 1                                         CPU 2

 run_dealloc_nocow()
   btrfs_lookup_file_extent()
     --> searches for a key with value
         (257 EXTENT_DATA 4096) in the
         fs/subvol tree
     --> returns us a path with
         path->nodes[0] == leaf X and
         path->slots[0] == N

   because path->slots[0] is >=
   btrfs_header_nritems(leaf X), it
   calls btrfs_next_leaf()

   btrfs_next_leaf()
     --> releases the path

                                              hard link added to our inode,
                                              with key (257 INODE_REF 500)
                                              added to the end of leaf X,
                                              so leaf X now has N + 1 keys

     --> searches for the key
         (257 INODE_REF 256), because
         it was the last key in leaf X
         before it released the path,
         with path->keep_locks set to 1

     --> ends up at leaf X again and
         it verifies that the key
         (257 INODE_REF 256) is no longer
         the last key in the leaf, so it
         returns with path->nodes[0] ==
         leaf X and path->slots[0] == N,
         pointing to the new item with
         key (257 INODE_REF 500)

   the loop iteration of run_dealloc_nocow()
   does not break out the loop and continues
   because the key referenced in the path
   at path->nodes[0] and path->slots[0] is
   for inode 257, its type is < BTRFS_EXTENT_DATA_KEY
   and its offset (500) is less then our delalloc
   range's end (8192)

   the item pointed by the path, an inode reference item,
   is (incorrectly) interpreted as a file extent item and
   we get an invalid extent type, leading to the BUG_ON(1):

   if (extent_type == BTRFS_FILE_EXTENT_REG ||
      extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
       (...)
   } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
       (...)
   } else {
       BUG_ON(1)
   }

The same can happen if a xattr is added concurrently and ends up having
a key with an offset smaller then the delalloc's range end.

So fix this by skipping keys with a type smaller than
BTRFS_EXTENT_DATA_KEY.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/inode.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1141090423e7e..e0c17e072a7a7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1294,8 +1294,14 @@ static noinline int run_delalloc_nocow(struct inode *inode,
 		num_bytes = 0;
 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 
-		if (found_key.objectid > ino ||
-		    found_key.type > BTRFS_EXTENT_DATA_KEY ||
+		if (found_key.objectid > ino)
+			break;
+		if (WARN_ON_ONCE(found_key.objectid < ino) ||
+		    found_key.type < BTRFS_EXTENT_DATA_KEY) {
+			path->slots[0]++;
+			goto next_slot;
+		}
+		if (found_key.type > BTRFS_EXTENT_DATA_KEY ||
 		    found_key.offset > end)
 			break;
 

From 82c82fadbd75f62bfb9d83eb3a308b55f4c4507d Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 9 Nov 2015 18:06:38 +0000
Subject: [PATCH 180/201] Btrfs: fix race when listing an inode's xattrs

commit f1cd1f0b7d1b5d4aaa5711e8f4e4898b0045cb6d upstream.

When listing a inode's xattrs we have a time window where we race against
a concurrent operation for adding a new hard link for our inode that makes
us not return any xattr to user space. In order for this to happen, the
first xattr of our inode needs to be at slot 0 of a leaf and the previous
leaf must still have room for an inode ref (or extref) item, and this can
happen because an inode's listxattrs callback does not lock the inode's
i_mutex (nor does the VFS does it for us), but adding a hard link to an
inode makes the VFS lock the inode's i_mutex before calling the inode's
link callback.

If we have the following leafs:

               Leaf X (has N items)                    Leaf Y

 [ ... (257 INODE_ITEM 0) (257 INODE_REF 256) ]  [ (257 XATTR_ITEM 12345), ... ]
           slot N - 2         slot N - 1              slot 0

The race illustrated by the following sequence diagram is possible:

       CPU 1                                               CPU 2

  btrfs_listxattr()

    searches for key (257 XATTR_ITEM 0)

    gets path with path->nodes[0] == leaf X
    and path->slots[0] == N

    because path->slots[0] is >=
    btrfs_header_nritems(leaf X), it calls
    btrfs_next_leaf()

    btrfs_next_leaf()
      releases the path

                                                   adds key (257 INODE_REF 666)
                                                   to the end of leaf X (slot N),
                                                   and leaf X now has N + 1 items

      searches for the key (257 INODE_REF 256),
      with path->keep_locks == 1, because that
      is the last key it saw in leaf X before
      releasing the path

      ends up at leaf X again and it verifies
      that the key (257 INODE_REF 256) is no
      longer the last key in leaf X, so it
      returns with path->nodes[0] == leaf X
      and path->slots[0] == N, pointing to
      the new item with key (257 INODE_REF 666)

    btrfs_listxattr's loop iteration sees that
    the type of the key pointed by the path is
    different from the type BTRFS_XATTR_ITEM_KEY
    and so it breaks the loop and stops looking
    for more xattr items
      --> the application doesn't get any xattr
          listed for our inode

So fix this by breaking the loop only if the key's type is greater than
BTRFS_XATTR_ITEM_KEY and skip the current key if its type is smaller.

Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/xattr.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 6f518c90e1c14..1fcd7b6e7564d 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -313,8 +313,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
 		/* check to make sure this item is what we want */
 		if (found_key.objectid != key.objectid)
 			break;
-		if (found_key.type != BTRFS_XATTR_ITEM_KEY)
+		if (found_key.type > BTRFS_XATTR_ITEM_KEY)
 			break;
+		if (found_key.type < BTRFS_XATTR_ITEM_KEY)
+			goto next;
 
 		di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
 		if (verify_dir_item(root, leaf, di))

From f0009492b51e67fc498ab6bcc127c9b418806e71 Mon Sep 17 00:00:00 2001
From: David Sterba <dsterba@suse.com>
Date: Mon, 9 Nov 2015 11:44:45 +0100
Subject: [PATCH 181/201] btrfs: fix signed overflows in btrfs_sync_file

commit 9dcbeed4d7e11e1dcf5e55475de3754f0855d1c2 upstream.

The calculation of range length in btrfs_sync_file leads to signed
overflow. This was caught by PaX gcc SIZE_OVERFLOW plugin.

https://forums.grsecurity.net/viewtopic.php?f=1&t=4284

The fsync call passes 0 and LLONG_MAX, the range length does not fit to
loff_t and overflows, but the value is converted to u64 so it silently
works as expected.

The minimal fix is a typecast to u64, switching functions to take
(start, end) instead of (start, len) would be more intrusive.

Coccinelle script found that there's one more opencoded calculation of
the length.

<smpl>
@@
loff_t start, end;
@@
* end - start
</smpl>

Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/file.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 10b1471e17905..6e80b78f797f8 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1876,8 +1876,13 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 	struct btrfs_log_ctx ctx;
 	int ret = 0;
 	bool full_sync = 0;
-	const u64 len = end - start + 1;
+	u64 len;
 
+	/*
+	 * The range length can be represented by u64, we have to do the typecasts
+	 * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync()
+	 */
+	len = (u64)end - (u64)start + 1;
 	trace_btrfs_sync_file(file, datasync);
 
 	/*
@@ -2065,8 +2070,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 			}
 		}
 		if (!full_sync) {
-			ret = btrfs_wait_ordered_range(inode, start,
-						       end - start + 1);
+			ret = btrfs_wait_ordered_range(inode, start, len);
 			if (ret) {
 				btrfs_end_transaction(trans, root);
 				goto out;

From 4d8dce2c10dfaba12c767e48d254082333424c7e Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 27 Nov 2015 19:23:24 +0100
Subject: [PATCH 182/201] rbd: don't put snap_context twice in
 rbd_queue_workfn()

commit 70b16db86f564977df074072143284aec2cb1162 upstream.

Commit 4e752f0ab0e8 ("rbd: access snapshot context and mapping size
safely") moved ceph_get_snap_context() out of rbd_img_request_create()
and into rbd_queue_workfn(), adding a ceph_put_snap_context() to the
error path in rbd_queue_workfn().  However, rbd_img_request_create()
consumes a ref on snapc, so calling ceph_put_snap_context() after
a successful rbd_img_request_create() leads to an extra put.  Fix it.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Josh Durgin <jdurgin@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/rbd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 128e7df5b8072..8630a77ea4625 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3444,6 +3444,7 @@ static void rbd_queue_workfn(struct work_struct *work)
 		goto err_rq;
 	}
 	img_request->rq = rq;
+	snapc = NULL; /* img_request consumes a ref */
 
 	if (op_type == OBJ_OP_DISCARD)
 		result = rbd_img_request_fill(img_request, OBJ_REQUEST_NODATA,

From 99d17a1f12d6e56fa197f9e6c131236df50ef391 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 2 Oct 2015 23:54:58 -0400
Subject: [PATCH 183/201] ext4 crypto: fix memory leak in ext4_bio_write_page()

commit 937d7b84dca58f2565715f2c8e52f14c3d65fb22 upstream.

There are times when ext4_bio_write_page() is called even though we
don't actually need to do any I/O.  This happens when ext4_writepage()
gets called by the jbd2 commit path when an inode needs to force its
pages written out in order to provide data=ordered guarantees --- and
a page is backed by an unwritten (e.g., uninitialized) block on disk,
or if delayed allocation means the page's backing store hasn't been
allocated yet.  In that case, we need to skip the call to
ext4_encrypt_page(), since in addition to wasting CPU, it leads to a
bounce page and an ext4 crypto context getting leaked.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/page-io.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 84ba4d2b3a35f..17fbe3882b8eb 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -425,6 +425,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
 	struct buffer_head *bh, *head;
 	int ret = 0;
 	int nr_submitted = 0;
+	int nr_to_submit = 0;
 
 	blocksize = 1 << inode->i_blkbits;
 
@@ -477,11 +478,13 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
 			unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
 		}
 		set_buffer_async_write(bh);
+		nr_to_submit++;
 	} while ((bh = bh->b_this_page) != head);
 
 	bh = head = page_buffers(page);
 
-	if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
+	if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode) &&
+	    nr_to_submit) {
 		data_page = ext4_encrypt(inode, page);
 		if (IS_ERR(data_page)) {
 			ret = PTR_ERR(data_page);

From bcdde051c4086a43f97119fefd70735b26f924b6 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 3 Oct 2015 10:49:29 -0400
Subject: [PATCH 184/201] ext4 crypto: fix bugs in ext4_encrypted_zeroout()

commit 36086d43f6575c081067de9855786a2fc91df77b upstream.

Fix multiple bugs in ext4_encrypted_zeroout(), including one that
could cause us to write an encrypted zero page to the wrong location
on disk, potentially causing data and file system corruption.
Fortunately, this tends to only show up in stress tests, but even with
these fixes, we are seeing some test failures with generic/127 --- but
these are now caused by data failures instead of metadata corruption.

Since ext4_encrypted_zeroout() is only used for some optimizations to
keep the extent tree from being too fragmented, and
ext4_encrypted_zeroout() itself isn't all that optimized from a time
or IOPS perspective, disable the extent tree optimization for
encrypted inodes for now.  This prevents the data corruption issues
reported by generic/127 until we can figure out what's going wrong.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/crypto.c  | 23 +++++++++++++++++++----
 fs/ext4/extents.c |  3 +++
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index 45731558138c8..2fab243a4c9e9 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -411,7 +411,13 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
 	ext4_lblk_t		lblk = ex->ee_block;
 	ext4_fsblk_t		pblk = ext4_ext_pblock(ex);
 	unsigned int		len = ext4_ext_get_actual_len(ex);
-	int			err = 0;
+	int			ret, err = 0;
+
+#if 0
+	ext4_msg(inode->i_sb, KERN_CRIT,
+		 "ext4_encrypted_zeroout ino %lu lblk %u len %u",
+		 (unsigned long) inode->i_ino, lblk, len);
+#endif
 
 	BUG_ON(inode->i_sb->s_blocksize != PAGE_CACHE_SIZE);
 
@@ -437,17 +443,26 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
 			goto errout;
 		}
 		bio->bi_bdev = inode->i_sb->s_bdev;
-		bio->bi_iter.bi_sector = pblk;
-		err = bio_add_page(bio, ciphertext_page,
+		bio->bi_iter.bi_sector =
+			pblk << (inode->i_sb->s_blocksize_bits - 9);
+		ret = bio_add_page(bio, ciphertext_page,
 				   inode->i_sb->s_blocksize, 0);
-		if (err) {
+		if (ret != inode->i_sb->s_blocksize) {
+			/* should never happen! */
+			ext4_msg(inode->i_sb, KERN_ERR,
+				 "bio_add_page failed: %d", ret);
+			WARN_ON(1);
 			bio_put(bio);
+			err = -EIO;
 			goto errout;
 		}
 		err = submit_bio_wait(WRITE, bio);
+		if ((err == 0) && bio->bi_error)
+			err = -EIO;
 		bio_put(bio);
 		if (err)
 			goto errout;
+		lblk++; pblk++;
 	}
 	err = 0;
 errout:
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 2553aa8b608d8..7f486e350d15d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3558,6 +3558,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 		max_zeroout = sbi->s_extent_max_zeroout_kb >>
 			(inode->i_sb->s_blocksize_bits - 10);
 
+	if (ext4_encrypted_inode(inode))
+		max_zeroout = 0;
+
 	/* If extent is less than s_max_zeroout_kb, zeroout directly */
 	if (max_zeroout && (ee_len <= max_zeroout)) {
 		err = ext4_ext_zeroout(inode, ex);

From 5a4ead78e6a00d20924ea1485d51529d9d6c335f Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Sat, 17 Oct 2015 22:57:06 -0400
Subject: [PATCH 185/201] ext4: fix potential use after free in
 __ext4_journal_stop

commit 6934da9238da947628be83635e365df41064b09b upstream.

There is a use-after-free possibility in __ext4_journal_stop() in the
case that we free the handle in the first jbd2_journal_stop() because
we're referencing handle->h_err afterwards. This was introduced in
9705acd63b125dee8b15c705216d7186daea4625 and it is wrong. Fix it by
storing the handle->h_err value beforehand and avoid referencing
potentially freed handle.

Fixes: 9705acd63b125dee8b15c705216d7186daea4625
Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/ext4_jbd2.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index d418431818187..e770c1ee4613e 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -88,13 +88,13 @@ int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
 		return 0;
 	}
 
+	err = handle->h_err;
 	if (!handle->h_transaction) {
-		err = jbd2_journal_stop(handle);
-		return handle->h_err ? handle->h_err : err;
+		rc = jbd2_journal_stop(handle);
+		return err ? err : rc;
 	}
 
 	sb = handle->h_transaction->t_journal->j_private;
-	err = handle->h_err;
 	rc = jbd2_journal_stop(handle);
 
 	if (!err)

From 5cbe4871e7fd45910472918126b9f1e77a5bda84 Mon Sep 17 00:00:00 2001
From: Daeho Jeong <daeho.jeong@samsung.com>
Date: Sun, 18 Oct 2015 17:02:56 -0400
Subject: [PATCH 186/201] ext4, jbd2: ensure entering into panic after
 recording an error in superblock

commit 4327ba52afd03fc4b5afa0ee1d774c9c5b0e85c5 upstream.

If a EXT4 filesystem utilizes JBD2 journaling and an error occurs, the
journaling will be aborted first and the error number will be recorded
into JBD2 superblock and, finally, the system will enter into the
panic state in "errors=panic" option.  But, in the rare case, this
sequence is little twisted like the below figure and it will happen
that the system enters into panic state, which means the system reset
in mobile environment, before completion of recording an error in the
journal superblock. In this case, e2fsck cannot recognize that the
filesystem failure occurred in the previous run and the corruption
wouldn't be fixed.

Task A                        Task B
ext4_handle_error()
-> jbd2_journal_abort()
  -> __journal_abort_soft()
    -> __jbd2_journal_abort_hard()
    | -> journal->j_flags |= JBD2_ABORT;
    |
    |                         __ext4_abort()
    |                         -> jbd2_journal_abort()
    |                         | -> __journal_abort_soft()
    |                         |   -> if (journal->j_flags & JBD2_ABORT)
    |                         |           return;
    |                         -> panic()
    |
    -> jbd2_journal_update_sb_errno()

Tested-by: Hobin Woo <hobin.woo@samsung.com>
Signed-off-by: Daeho Jeong <daeho.jeong@samsung.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/super.c      | 12 ++++++++++--
 fs/jbd2/journal.c    |  6 +++++-
 include/linux/jbd2.h |  1 +
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a63c7b0a10cfc..df84bd256c9f5 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -394,9 +394,13 @@ static void ext4_handle_error(struct super_block *sb)
 		smp_wmb();
 		sb->s_flags |= MS_RDONLY;
 	}
-	if (test_opt(sb, ERRORS_PANIC))
+	if (test_opt(sb, ERRORS_PANIC)) {
+		if (EXT4_SB(sb)->s_journal &&
+		  !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
+			return;
 		panic("EXT4-fs (device %s): panic forced after error\n",
 			sb->s_id);
+	}
 }
 
 #define ext4_error_ratelimit(sb)					\
@@ -585,8 +589,12 @@ void __ext4_abort(struct super_block *sb, const char *function,
 			jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 		save_error_info(sb, function, line);
 	}
-	if (test_opt(sb, ERRORS_PANIC))
+	if (test_opt(sb, ERRORS_PANIC)) {
+		if (EXT4_SB(sb)->s_journal &&
+		  !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR))
+			return;
 		panic("EXT4-fs panic from previous error\n");
+	}
 }
 
 void __ext4_msg(struct super_block *sb,
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 8270fe9e3641e..37023d0bdae49 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2071,8 +2071,12 @@ static void __journal_abort_soft (journal_t *journal, int errno)
 
 	__jbd2_journal_abort_hard(journal);
 
-	if (errno)
+	if (errno) {
 		jbd2_journal_update_sb_errno(journal);
+		write_lock(&journal->j_state_lock);
+		journal->j_flags |= JBD2_REC_ERR;
+		write_unlock(&journal->j_state_lock);
+	}
 }
 
 /**
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index df07e78487d56..1abeb820a630b 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1046,6 +1046,7 @@ struct journal_s
 #define JBD2_ABORT_ON_SYNCDATA_ERR	0x040	/* Abort the journal on file
 						 * data write error in ordered
 						 * mode */
+#define JBD2_REC_ERR	0x080	/* The errno in the sb has been recorded */
 
 /*
  * Function declarations for the journaling transaction and buffer

From eee26fbfd7a6970ad29daa83f5d7d83df17b017c Mon Sep 17 00:00:00 2001
From: Stefan Richter <stefanr@s5r6.in-berlin.de>
Date: Tue, 3 Nov 2015 01:46:21 +0100
Subject: [PATCH 187/201] firewire: ohci: fix JMicron JMB38x IT context
 discovery

commit 100ceb66d5c40cc0c7018e06a9474302470be73c upstream.

Reported by Clifford and Craig for JMicron OHCI-1394 + SDHCI combo
controllers:  Often or even most of the time, the controller is
initialized with the message "added OHCI v1.10 device as card 0, 4 IR +
0 IT contexts, quirks 0x10".  With 0 isochronous transmit DMA contexts
(IT contexts), applications like audio output are impossible.

However, OHCI-1394 demands that at least 4 IT contexts are implemented
by the link layer controller, and indeed JMicron JMB38x do implement
four of them.  Only their IsoXmitIntMask register is unreliable at early
access.

With my own JMB381 single function controller I found:
  - I can reproduce the problem with a lower probability than Craig's.
  - If I put a loop around the section which clears and reads
    IsoXmitIntMask, then either the first or the second attempt will
    return the correct initial mask of 0x0000000f.  I never encountered
    a case of needing more than a second attempt.
  - Consequently, if I put a dummy reg_read(...IsoXmitIntMaskSet)
    before the first write, the subsequent read will return the correct
    result.
  - If I merely ignore a wrong read result and force the known real
    result, later isochronous transmit DMA usage works just fine.

So let's just fix this chip bug up by the latter method.  Tested with
JMB381 on kernel 3.13 and 4.3.

Since OHCI-1394 generally requires 4 IT contexts at a minium, this
workaround is simply applied whenever the initial read of IsoXmitIntMask
returns 0, regardless whether it's a JMicron chip or not.  I never heard
of this issue together with any other chip though.

I am not 100% sure that this fix works on the OHCI-1394 part of JMB380
and JMB388 combo controllers exactly the same as on the JMB381 single-
function controller, but so far I haven't had a chance to let an owner
of a combo chip run a patched kernel.

Strangely enough, IsoRecvIntMask is always reported correctly, even
though it is probed right before IsoXmitIntMask.

Reported-by: Clifford Dunn
Reported-by: Craig Moore <craig.moore@qenos.com>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/firewire/ohci.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index f51d376d10ba6..c2f5117fd8cb0 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -3675,6 +3675,11 @@ static int pci_probe(struct pci_dev *dev,
 
 	reg_write(ohci, OHCI1394_IsoXmitIntMaskSet, ~0);
 	ohci->it_context_support = reg_read(ohci, OHCI1394_IsoXmitIntMaskSet);
+	/* JMicron JMB38x often shows 0 at first read, just ignore it */
+	if (!ohci->it_context_support) {
+		ohci_notice(ohci, "overriding IsoXmitIntMask\n");
+		ohci->it_context_support = 0xf;
+	}
 	reg_write(ohci, OHCI1394_IsoXmitIntMaskClear, ~0);
 	ohci->it_context_mask = ohci->it_context_support;
 	ohci->n_it = hweight32(ohci->it_context_mask);

From 9a4a72786164f9aa3ca99189ef2948e020d31068 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@poochiereds.net>
Date: Thu, 17 Sep 2015 07:47:08 -0400
Subject: [PATCH 188/201] nfsd: serialize state seqid morphing operations

commit 35a92fe8770ce54c5eb275cd76128645bea2d200 upstream.

Andrew was seeing a race occur when an OPEN and OPEN_DOWNGRADE were
running in parallel. The server would receive the OPEN_DOWNGRADE first
and check its seqid, but then an OPEN would race in and bump it. The
OPEN_DOWNGRADE would then complete and bump the seqid again.  The result
was that the OPEN_DOWNGRADE would be applied after the OPEN, even though
it should have been rejected since the seqid changed.

The only recourse we have here I think is to serialize operations that
bump the seqid in a stateid, particularly when we're given a seqid in
the call. To address this, we add a new rw_semaphore to the
nfs4_ol_stateid struct. We do a down_write prior to checking the seqid
after looking up the stateid to ensure that nothing else is going to
bump it while we're operating on it.

In the case of OPEN, we do a down_read, as the call doesn't contain a
seqid. Those can run in parallel -- we just need to serialize them when
there is a concurrent OPEN_DOWNGRADE or CLOSE.

LOCK and LOCKU however always take the write lock as there is no
opportunity for parallelizing those.

Reported-and-Tested-by: Andrew W Elble <aweits@rit.edu>
Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfsd/nfs4state.c | 33 ++++++++++++++++++++++++++++-----
 fs/nfsd/state.h     | 19 ++++++++++---------
 2 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 0f1d5691b7957..1b39edf10b67d 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3360,6 +3360,7 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
 	stp->st_access_bmap = 0;
 	stp->st_deny_bmap = 0;
 	stp->st_openstp = NULL;
+	init_rwsem(&stp->st_rwsem);
 	spin_lock(&oo->oo_owner.so_client->cl_lock);
 	list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
 	spin_lock(&fp->fi_lock);
@@ -4187,15 +4188,20 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	 */
 	if (stp) {
 		/* Stateid was found, this is an OPEN upgrade */
+		down_read(&stp->st_rwsem);
 		status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
-		if (status)
+		if (status) {
+			up_read(&stp->st_rwsem);
 			goto out;
+		}
 	} else {
 		stp = open->op_stp;
 		open->op_stp = NULL;
 		init_open_stateid(stp, fp, open);
+		down_read(&stp->st_rwsem);
 		status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
 		if (status) {
+			up_read(&stp->st_rwsem);
 			release_open_stateid(stp);
 			goto out;
 		}
@@ -4207,6 +4213,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
 	}
 	update_stateid(&stp->st_stid.sc_stateid);
 	memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+	up_read(&stp->st_rwsem);
 
 	if (nfsd4_has_session(&resp->cstate)) {
 		if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
@@ -4819,10 +4826,13 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
 		 * revoked delegations are kept only for free_stateid.
 		 */
 		return nfserr_bad_stateid;
+	down_write(&stp->st_rwsem);
 	status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate));
-	if (status)
-		return status;
-	return nfs4_check_fh(current_fh, &stp->st_stid);
+	if (status == nfs_ok)
+		status = nfs4_check_fh(current_fh, &stp->st_stid);
+	if (status != nfs_ok)
+		up_write(&stp->st_rwsem);
+	return status;
 }
 
 /* 
@@ -4869,6 +4879,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs
 		return status;
 	oo = openowner(stp->st_stateowner);
 	if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
+		up_write(&stp->st_rwsem);
 		nfs4_put_stid(&stp->st_stid);
 		return nfserr_bad_stateid;
 	}
@@ -4899,11 +4910,14 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		goto out;
 	oo = openowner(stp->st_stateowner);
 	status = nfserr_bad_stateid;
-	if (oo->oo_flags & NFS4_OO_CONFIRMED)
+	if (oo->oo_flags & NFS4_OO_CONFIRMED) {
+		up_write(&stp->st_rwsem);
 		goto put_stateid;
+	}
 	oo->oo_flags |= NFS4_OO_CONFIRMED;
 	update_stateid(&stp->st_stid.sc_stateid);
 	memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+	up_write(&stp->st_rwsem);
 	dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
 		__func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid));
 
@@ -4982,6 +4996,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
 	memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
 	status = nfs_ok;
 put_stateid:
+	up_write(&stp->st_rwsem);
 	nfs4_put_stid(&stp->st_stid);
 out:
 	nfsd4_bump_seqid(cstate, status);
@@ -5035,6 +5050,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		goto out; 
 	update_stateid(&stp->st_stid.sc_stateid);
 	memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
+	up_write(&stp->st_rwsem);
 
 	nfsd4_close_open_stateid(stp);
 
@@ -5260,6 +5276,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
 	stp->st_access_bmap = 0;
 	stp->st_deny_bmap = open_stp->st_deny_bmap;
 	stp->st_openstp = open_stp;
+	init_rwsem(&stp->st_rwsem);
 	list_add(&stp->st_locks, &open_stp->st_locks);
 	list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
 	spin_lock(&fp->fi_lock);
@@ -5428,6 +5445,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 					&open_stp, nn);
 		if (status)
 			goto out;
+		up_write(&open_stp->st_rwsem);
 		open_sop = openowner(open_stp->st_stateowner);
 		status = nfserr_bad_stateid;
 		if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
@@ -5435,6 +5453,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 			goto out;
 		status = lookup_or_create_lock_state(cstate, open_stp, lock,
 							&lock_stp, &new);
+		if (status == nfs_ok)
+			down_write(&lock_stp->st_rwsem);
 	} else {
 		status = nfs4_preprocess_seqid_op(cstate,
 				       lock->lk_old_lock_seqid,
@@ -5540,6 +5560,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 		    seqid_mutating_err(ntohl(status)))
 			lock_sop->lo_owner.so_seqid++;
 
+		up_write(&lock_stp->st_rwsem);
+
 		/*
 		 * If this is a new, never-before-used stateid, and we are
 		 * returning an error, then just go ahead and release it.
@@ -5709,6 +5731,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 fput:
 	fput(filp);
 put_stateid:
+	up_write(&stp->st_rwsem);
 	nfs4_put_stid(&stp->st_stid);
 out:
 	nfsd4_bump_seqid(cstate, status);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 583ffc13cae27..31bde12feefee 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -534,15 +534,16 @@ struct nfs4_file {
  * Better suggestions welcome.
  */
 struct nfs4_ol_stateid {
-	struct nfs4_stid    st_stid; /* must be first field */
-	struct list_head              st_perfile;
-	struct list_head              st_perstateowner;
-	struct list_head              st_locks;
-	struct nfs4_stateowner      * st_stateowner;
-	struct nfs4_clnt_odstate    * st_clnt_odstate;
-	unsigned char                 st_access_bmap;
-	unsigned char                 st_deny_bmap;
-	struct nfs4_ol_stateid         * st_openstp;
+	struct nfs4_stid		st_stid;
+	struct list_head		st_perfile;
+	struct list_head		st_perstateowner;
+	struct list_head		st_locks;
+	struct nfs4_stateowner		*st_stateowner;
+	struct nfs4_clnt_odstate	*st_clnt_odstate;
+	unsigned char			st_access_bmap;
+	unsigned char			st_deny_bmap;
+	struct nfs4_ol_stateid		*st_openstp;
+	struct rw_semaphore		st_rwsem;
 };
 
 static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s)

From 4e9f452926507ecabd3030efda61413a7cd9bef2 Mon Sep 17 00:00:00 2001
From: Andrew Elble <aweits@rit.edu>
Date: Thu, 15 Oct 2015 12:07:28 -0400
Subject: [PATCH 189/201] nfsd: eliminate sending duplicate and repeated
 delegations

commit 34ed9872e745fa56f10e9bef2cf3d2336c6c8816 upstream.

We've observed the nfsd server in a state where there are
multiple delegations on the same nfs4_file for the same client.
The nfs client does attempt to DELEGRETURN these when they are presented to
it - but apparently under some (unknown) circumstances the client does not
manage to return all of them. This leads to the eventual
attempt to CB_RECALL more than one delegation with the same nfs
filehandle to the same client. The first recall will succeed, but the
next recall will fail with NFS4ERR_BADHANDLE. This leads to the server
having delegations on cl_revoked that the client has no way to FREE
or DELEGRETURN, with resulting inability to recover. The state manager
on the server will continually assert SEQ4_STATUS_RECALLABLE_STATE_REVOKED,
and the state manager on the client will be looping unable to satisfy
the server.

List discussion also reports a race between OPEN and DELEGRETURN that
will be avoided by only sending the delegation once to the
client. This is also logically in accordance with RFC5561 9.1.1 and 10.2.

So, let's:

1.) Not hand out duplicate delegations.
2.) Only send them to the client once.

RFC 5561:

9.1.1:
"Delegations and layouts, on the other hand, are not associated with a
specific owner but are associated with the client as a whole
(identified by a client ID)."

10.2:
"...the stateid for a delegation is associated with a client ID and may be
used on behalf of all the open-owners for the given client.  A
delegation is made to the client as a whole and not to any specific
process or thread of control within it."

Reported-by: Eric Meddaugh <etmsys@rit.edu>
Cc: Trond Myklebust <trond.myklebust@primarydata.com>
Cc: Olga Kornievskaia <aglo@umich.edu>
Signed-off-by: Andrew Elble <aweits@rit.edu>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfsd/nfs4state.c | 94 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 84 insertions(+), 10 deletions(-)

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1b39edf10b67d..0dea0c254ddf1 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -765,16 +765,68 @@ void nfs4_unhash_stid(struct nfs4_stid *s)
 	s->sc_type = 0;
 }
 
-static void
+/**
+ * nfs4_get_existing_delegation - Discover if this delegation already exists
+ * @clp:     a pointer to the nfs4_client we're granting a delegation to
+ * @fp:      a pointer to the nfs4_file we're granting a delegation on
+ *
+ * Return:
+ *      On success: NULL if an existing delegation was not found.
+ *
+ *      On error: -EAGAIN if one was previously granted to this nfs4_client
+ *                 for this nfs4_file.
+ *
+ */
+
+static int
+nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp)
+{
+	struct nfs4_delegation *searchdp = NULL;
+	struct nfs4_client *searchclp = NULL;
+
+	lockdep_assert_held(&state_lock);
+	lockdep_assert_held(&fp->fi_lock);
+
+	list_for_each_entry(searchdp, &fp->fi_delegations, dl_perfile) {
+		searchclp = searchdp->dl_stid.sc_client;
+		if (clp == searchclp) {
+			return -EAGAIN;
+		}
+	}
+	return 0;
+}
+
+/**
+ * hash_delegation_locked - Add a delegation to the appropriate lists
+ * @dp:     a pointer to the nfs4_delegation we are adding.
+ * @fp:     a pointer to the nfs4_file we're granting a delegation on
+ *
+ * Return:
+ *      On success: NULL if the delegation was successfully hashed.
+ *
+ *      On error: -EAGAIN if one was previously granted to this
+ *                 nfs4_client for this nfs4_file. Delegation is not hashed.
+ *
+ */
+
+static int
 hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
 {
+	int status;
+	struct nfs4_client *clp = dp->dl_stid.sc_client;
+
 	lockdep_assert_held(&state_lock);
 	lockdep_assert_held(&fp->fi_lock);
 
+	status = nfs4_get_existing_delegation(clp, fp);
+	if (status)
+		return status;
+	++fp->fi_delegees;
 	atomic_inc(&dp->dl_stid.sc_count);
 	dp->dl_stid.sc_type = NFS4_DELEG_STID;
 	list_add(&dp->dl_perfile, &fp->fi_delegations);
-	list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
+	list_add(&dp->dl_perclnt, &clp->cl_delegations);
+	return 0;
 }
 
 static bool
@@ -3946,6 +3998,18 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag)
 	return fl;
 }
 
+/**
+ * nfs4_setlease - Obtain a delegation by requesting lease from vfs layer
+ * @dp:   a pointer to the nfs4_delegation we're adding.
+ *
+ * Return:
+ *      On success: Return code will be 0 on success.
+ *
+ *      On error: -EAGAIN if there was an existing delegation.
+ *                 nonzero if there is an error in other cases.
+ *
+ */
+
 static int nfs4_setlease(struct nfs4_delegation *dp)
 {
 	struct nfs4_file *fp = dp->dl_stid.sc_file;
@@ -3977,16 +4041,19 @@ static int nfs4_setlease(struct nfs4_delegation *dp)
 		goto out_unlock;
 	/* Race breaker */
 	if (fp->fi_deleg_file) {
-		status = 0;
-		++fp->fi_delegees;
-		hash_delegation_locked(dp, fp);
+		status = hash_delegation_locked(dp, fp);
 		goto out_unlock;
 	}
 	fp->fi_deleg_file = filp;
-	fp->fi_delegees = 1;
-	hash_delegation_locked(dp, fp);
+	fp->fi_delegees = 0;
+	status = hash_delegation_locked(dp, fp);
 	spin_unlock(&fp->fi_lock);
 	spin_unlock(&state_lock);
+	if (status) {
+		/* Should never happen, this is a new fi_deleg_file  */
+		WARN_ON_ONCE(1);
+		goto out_fput;
+	}
 	return 0;
 out_unlock:
 	spin_unlock(&fp->fi_lock);
@@ -4006,6 +4073,15 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
 	if (fp->fi_had_conflict)
 		return ERR_PTR(-EAGAIN);
 
+	spin_lock(&state_lock);
+	spin_lock(&fp->fi_lock);
+	status = nfs4_get_existing_delegation(clp, fp);
+	spin_unlock(&fp->fi_lock);
+	spin_unlock(&state_lock);
+
+	if (status)
+		return ERR_PTR(status);
+
 	dp = alloc_init_deleg(clp, fh, odstate);
 	if (!dp)
 		return ERR_PTR(-ENOMEM);
@@ -4024,9 +4100,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
 		status = -EAGAIN;
 		goto out_unlock;
 	}
-	++fp->fi_delegees;
-	hash_delegation_locked(dp, fp);
-	status = 0;
+	status = hash_delegation_locked(dp, fp);
 out_unlock:
 	spin_unlock(&fp->fi_lock);
 	spin_unlock(&state_lock);

From 8489733094edbec8f3d62f96cd8b15402e66350a Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 5 Nov 2015 00:01:51 +0100
Subject: [PATCH 190/201] debugfs: fix refcount imbalance in start_creating

commit 0ee9608c89e81a1ccee52ecb58a7ff040e2522d9 upstream.

In debugfs' start_creating(), we pin the file system to safely access
its root. When we failed to create a file, we unpin the file system via
failed_creating() to release the mount count and eventually the reference
of the vfsmount.

However, when we run into an error during lookup_one_len() when still
in start_creating(), we only release the parent's mutex but not so the
reference on the mount. Looks like it was done in the past, but after
splitting portions of __create_file() into start_creating() and
end_creating() via 190afd81e4a5 ("debugfs: split the beginning and the
end of __create_file() off"), this seemed missed. Noticed during code
review.

Fixes: 190afd81e4a5 ("debugfs: split the beginning and the end of __create_file() off")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/debugfs/inode.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index c711be8d6a3cc..9c8d23316da18 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -271,8 +271,12 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
 		dput(dentry);
 		dentry = ERR_PTR(-EEXIST);
 	}
-	if (IS_ERR(dentry))
+
+	if (IS_ERR(dentry)) {
 		mutex_unlock(&d_inode(parent)->i_mutex);
+		simple_release_fs(&debugfs_mount, &debugfs_mount_count);
+	}
+
 	return dentry;
 }
 

From 9a08c6f60bbc067199214b4730df27979716f242 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Fri, 20 Nov 2015 09:56:20 -0500
Subject: [PATCH 191/201] nfs4: start callback_ident at idr 1

commit c68a027c05709330fe5b2f50c50d5fa02124b5d8 upstream.

If clp->cl_cb_ident is zero, then nfs_cb_idr_remove_locked() skips removing
it when the nfs_client is freed.  A decoding or server bug can then find
and try to put that first nfs_client which would lead to a crash.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Fixes: d6870312659d ("nfs4client: convert to idr_alloc()")
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfs/nfs4client.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 223bedda64ae4..10410e8b58530 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -33,7 +33,7 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
 		return ret;
 	idr_preload(GFP_KERNEL);
 	spin_lock(&nn->nfs_client_lock);
-	ret = idr_alloc(&nn->cb_ident_idr, clp, 0, 0, GFP_NOWAIT);
+	ret = idr_alloc(&nn->cb_ident_idr, clp, 1, 0, GFP_NOWAIT);
 	if (ret >= 0)
 		clp->cl_cb_ident = ret;
 	spin_unlock(&nn->nfs_client_lock);

From 4ffcc6f992ca1bef2e710ebe2fde80ced92ec263 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@poochiereds.net>
Date: Wed, 25 Nov 2015 13:43:14 -0500
Subject: [PATCH 192/201] nfs4: resend LAYOUTGET when there is a race that
 changes the seqid

commit 4f2e9dce0c6348a95eaa56ade9bab18572221088 upstream.

pnfs_layout_process will check the returned layout stateid against what
the kernel has in-core. If it turns out that the stateid we received is
older, then we should resend the LAYOUTGET instead of falling back to
MDS I/O.

Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfs/pnfs.c | 56 ++++++++++++++++++++++++++++-----------------------
 1 file changed, 31 insertions(+), 25 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 8abe27165ad04..abf5caea20c93 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -872,33 +872,38 @@ send_layoutget(struct pnfs_layout_hdr *lo,
 
 	dprintk("--> %s\n", __func__);
 
-	lgp = kzalloc(sizeof(*lgp), gfp_flags);
-	if (lgp == NULL)
-		return NULL;
+	/*
+	 * Synchronously retrieve layout information from server and
+	 * store in lseg. If we race with a concurrent seqid morphing
+	 * op, then re-send the LAYOUTGET.
+	 */
+	do {
+		lgp = kzalloc(sizeof(*lgp), gfp_flags);
+		if (lgp == NULL)
+			return NULL;
+
+		i_size = i_size_read(ino);
+
+		lgp->args.minlength = PAGE_CACHE_SIZE;
+		if (lgp->args.minlength > range->length)
+			lgp->args.minlength = range->length;
+		if (range->iomode == IOMODE_READ) {
+			if (range->offset >= i_size)
+				lgp->args.minlength = 0;
+			else if (i_size - range->offset < lgp->args.minlength)
+				lgp->args.minlength = i_size - range->offset;
+		}
+		lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
+		lgp->args.range = *range;
+		lgp->args.type = server->pnfs_curr_ld->id;
+		lgp->args.inode = ino;
+		lgp->args.ctx = get_nfs_open_context(ctx);
+		lgp->gfp_flags = gfp_flags;
+		lgp->cred = lo->plh_lc_cred;
 
-	i_size = i_size_read(ino);
+		lseg = nfs4_proc_layoutget(lgp, gfp_flags);
+	} while (lseg == ERR_PTR(-EAGAIN));
 
-	lgp->args.minlength = PAGE_CACHE_SIZE;
-	if (lgp->args.minlength > range->length)
-		lgp->args.minlength = range->length;
-	if (range->iomode == IOMODE_READ) {
-		if (range->offset >= i_size)
-			lgp->args.minlength = 0;
-		else if (i_size - range->offset < lgp->args.minlength)
-			lgp->args.minlength = i_size - range->offset;
-	}
-	lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
-	lgp->args.range = *range;
-	lgp->args.type = server->pnfs_curr_ld->id;
-	lgp->args.inode = ino;
-	lgp->args.ctx = get_nfs_open_context(ctx);
-	lgp->gfp_flags = gfp_flags;
-	lgp->cred = lo->plh_lc_cred;
-
-	/* Synchronously retrieve layout information from server and
-	 * store in lseg.
-	 */
-	lseg = nfs4_proc_layoutget(lgp, gfp_flags);
 	if (IS_ERR(lseg)) {
 		switch (PTR_ERR(lseg)) {
 		case -ENOMEM:
@@ -1687,6 +1692,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
 		/* existing state ID, make sure the sequence number matches. */
 		if (pnfs_layout_stateid_blocked(lo, &res->stateid)) {
 			dprintk("%s forget reply due to sequence\n", __func__);
+			status = -EAGAIN;
 			goto out_forget_reply;
 		}
 		pnfs_set_layout_stateid(lo, &res->stateid, false);

From 3994c2dee338ddc51adc2426d1208b41f04d15eb Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@poochiereds.net>
Date: Wed, 25 Nov 2015 13:50:11 -0500
Subject: [PATCH 193/201] nfs: if we have no valid attrs, then don't declare
 the attribute cache valid

commit c812012f9ca7cf89c9e1a1cd512e6c3b5be04b85 upstream.

If we pass in an empty nfs_fattr struct to nfs_update_inode, it will
(correctly) not update any of the attributes, but it then clears the
NFS_INO_INVALID_ATTR flag, which indicates that the attributes are
up to date. Don't clear the flag if the fattr struct has no valid
attrs to apply.

Reviewed-by: Steve French <steve.french@primarydata.com>
Signed-off-by: Jeff Layton <jeff.layton@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfs/inode.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 326d9e10d8337..ffdf9b9e88ab6 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1824,7 +1824,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0)
 			nfsi->attr_gencount = fattr->gencount;
 	}
-	invalid &= ~NFS_INO_INVALID_ATTR;
+
+	/* Don't declare attrcache up to date if there were no attrs! */
+	if (fattr->valid != 0)
+		invalid &= ~NFS_INO_INVALID_ATTR;
+
 	/* Don't invalidate the data if we were to blame */
 	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
 				|| S_ISLNK(inode->i_mode)))

From eeec50cb5826af4c70564ed49d88626439598327 Mon Sep 17 00:00:00 2001
From: Junxiao Bi <junxiao.bi@oracle.com>
Date: Fri, 20 Nov 2015 15:57:30 -0800
Subject: [PATCH 194/201] ocfs2: fix umask ignored issue

commit 8f1eb48758aacf6c1ffce18179295adbf3bd7640 upstream.

New created file's mode is not masked with umask, and this makes umask not
work for ocfs2 volume.

Fixes: 702e5bc ("ocfs2: use generic posix ACL infrastructure")
Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Gang He <ghe@suse.com>
Cc: Mark Fasheh <mfasheh@suse.de>
Cc: Joel Becker <jlbec@evilplan.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ocfs2/namei.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index b7dfac226b1e2..12bfa9ca55832 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -374,6 +374,8 @@ static int ocfs2_mknod(struct inode *dir,
 		mlog_errno(status);
 		goto leave;
 	}
+	/* update inode->i_mode after mask with "umask". */
+	inode->i_mode = mode;
 
 	handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb,
 							    S_ISDIR(mode),

From 278b54df14299e1cfbe6175e2a098bf0b6616611 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Tue, 24 Nov 2015 10:35:29 +0800
Subject: [PATCH 195/201] block: fix segment split

commit 578270bfbd2803dc7b0b03fbc2ac119efbc73195 upstream.

Inside blk_bio_segment_split(), previous bvec pointer(bvprvp)
always points to the iterator local variable, which is obviously
wrong, so fix it by pointing to the local variable of 'bvprv'.

Fixes: 5014c311baa2b(block: fix bogus compiler warnings in blk-merge.c)
Reported-by: Michael Ellerman <mpe@ellerman.id.au>
Reported-by: Mark Salter <msalter@redhat.com>
Tested-by: Laurent Dufour <ldufour@linux.vnet.ibm.com>
Tested-by: Mark Salter <msalter@redhat.com>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 block/blk-merge.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/blk-merge.c b/block/blk-merge.c
index c4e9c37f3e381..0e5f4fc124491 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -91,7 +91,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 
 			seg_size += bv.bv_len;
 			bvprv = bv;
-			bvprvp = &bv;
+			bvprvp = &bvprv;
 			sectors += bv.bv_len >> 9;
 			continue;
 		}
@@ -101,7 +101,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q,
 
 		nsegs++;
 		bvprv = bv;
-		bvprvp = &bv;
+		bvprvp = &bvprv;
 		seg_size = bv.bv_len;
 		sectors += bv.bv_len >> 9;
 	}

From 4bf8855ea964d7faa5dece0a3dd032d47f4b9a42 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 30 Sep 2015 15:04:42 +0200
Subject: [PATCH 196/201] ceph: fix message length computation

commit 777d738a5e58ba3b6f3932ab1543ce93703f4873 upstream.

create_request_message() computes the maximum length of a message,
but uses the wrong type for the time stamp: sizeof(struct timespec)
may be 8 or 16 depending on the architecture, while sizeof(struct
ceph_timespec) is always 8, and that is what gets put into the
message.

Found while auditing the uses of timespec for y2038 problems.

Fixes: b8e69066d8af ("ceph: include time stamp in every MDS request")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Yan, Zheng <zyan@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ceph/mds_client.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 51cb02da75d98..fe2c982764e78 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1935,7 +1935,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
 
 	len = sizeof(*head) +
 		pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
-		sizeof(struct timespec);
+		sizeof(struct ceph_timespec);
 
 	/* calculate (max) length for cap releases */
 	len += sizeof(struct ceph_mds_request_release) *

From 4ba2bd2c4729caaf6702c5426ffb7c65b9b1405d Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 12 Nov 2015 12:13:57 -0800
Subject: [PATCH 197/201] ALSA: pci: depend on ZONE_DMA

commit 2db1a57986d37653583e67ccbf13082aadc8f25d upstream.

There are several sound drivers that 'select ZONE_DMA'.  This is
backwards as ZONE_DMA is an architecture capability exported to drivers.
Switch the polarity of the dependency to disable these drivers when the
architecture does not support ZONE_DMA.  This was discovered in the
context of testing/enabling devm_memremap_pages() which depends on
ZONE_DEVICE.  ZONE_DEVICE in turn depends on !ZONE_DMA.

Reported-by: Jeff Moyer <jmoyer@redhat.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/Kconfig | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig
index edfc1b8d553ee..656ce39bddbc5 100644
--- a/sound/pci/Kconfig
+++ b/sound/pci/Kconfig
@@ -25,7 +25,7 @@ config SND_ALS300
 	select SND_PCM
 	select SND_AC97_CODEC
 	select SND_OPL3_LIB
-	select ZONE_DMA
+	depends on ZONE_DMA
 	help
 	  Say 'Y' or 'M' to include support for Avance Logic ALS300/ALS300+
 
@@ -50,7 +50,7 @@ config SND_ALI5451
 	tristate "ALi M5451 PCI Audio Controller"
 	select SND_MPU401_UART
 	select SND_AC97_CODEC
-	select ZONE_DMA
+	depends on ZONE_DMA
 	help
 	  Say Y here to include support for the integrated AC97 sound
 	  device on motherboards using the ALi M5451 Audio Controller
@@ -155,7 +155,7 @@ config SND_AZT3328
 	select SND_PCM
 	select SND_RAWMIDI
 	select SND_AC97_CODEC
-	select ZONE_DMA
+	depends on ZONE_DMA
 	help
 	  Say Y here to include support for Aztech AZF3328 (PCI168)
 	  soundcards.
@@ -463,7 +463,7 @@ config SND_EMU10K1
 	select SND_HWDEP
 	select SND_RAWMIDI
 	select SND_AC97_CODEC
-	select ZONE_DMA
+	depends on ZONE_DMA
 	help
 	  Say Y to include support for Sound Blaster PCI 512, Live!,
 	  Audigy and E-mu APS (partially supported) soundcards.
@@ -479,7 +479,7 @@ config SND_EMU10K1X
 	tristate "Emu10k1X (Dell OEM Version)"
 	select SND_AC97_CODEC
 	select SND_RAWMIDI
-	select ZONE_DMA
+	depends on ZONE_DMA
 	help
 	  Say Y here to include support for the Dell OEM version of the
 	  Sound Blaster Live!.
@@ -513,7 +513,7 @@ config SND_ES1938
 	select SND_OPL3_LIB
 	select SND_MPU401_UART
 	select SND_AC97_CODEC
-	select ZONE_DMA
+	depends on ZONE_DMA
 	help
 	  Say Y here to include support for soundcards based on ESS Solo-1
 	  (ES1938, ES1946, ES1969) chips.
@@ -525,7 +525,7 @@ config SND_ES1968
 	tristate "ESS ES1968/1978 (Maestro-1/2/2E)"
 	select SND_MPU401_UART
 	select SND_AC97_CODEC
-	select ZONE_DMA
+	depends on ZONE_DMA
 	help
 	  Say Y here to include support for soundcards based on ESS Maestro
 	  1/2/2E chips.
@@ -612,7 +612,7 @@ config SND_ICE1712
 	select SND_MPU401_UART
 	select SND_AC97_CODEC
 	select BITREVERSE
-	select ZONE_DMA
+	depends on ZONE_DMA
 	help
 	  Say Y here to include support for soundcards based on the
 	  ICE1712 (Envy24) chip.
@@ -700,7 +700,7 @@ config SND_LX6464ES
 config SND_MAESTRO3
 	tristate "ESS Allegro/Maestro3"
 	select SND_AC97_CODEC
-	select ZONE_DMA
+	depends on ZONE_DMA
 	help
 	  Say Y here to include support for soundcards based on ESS Maestro 3
 	  (Allegro) chips.
@@ -806,7 +806,7 @@ config SND_SIS7019
 	tristate "SiS 7019 Audio Accelerator"
 	depends on X86_32
 	select SND_AC97_CODEC
-	select ZONE_DMA
+	depends on ZONE_DMA
 	help
 	  Say Y here to include support for the SiS 7019 Audio Accelerator.
 
@@ -818,7 +818,7 @@ config SND_SONICVIBES
 	select SND_OPL3_LIB
 	select SND_MPU401_UART
 	select SND_AC97_CODEC
-	select ZONE_DMA
+	depends on ZONE_DMA
 	help
 	  Say Y here to include support for soundcards based on the S3
 	  SonicVibes chip.
@@ -830,7 +830,7 @@ config SND_TRIDENT
 	tristate "Trident 4D-Wave DX/NX; SiS 7018"
 	select SND_MPU401_UART
 	select SND_AC97_CODEC
-	select ZONE_DMA
+	depends on ZONE_DMA
 	help
 	  Say Y here to include support for soundcards based on Trident
 	  4D-Wave DX/NX or SiS 7018 chips.

From de48652f0e6acd07b4fa0e58a5e76f43477f8e2a Mon Sep 17 00:00:00 2001
From: "Lu, Han" <han.lu@intel.com>
Date: Wed, 11 Nov 2015 16:54:27 +0800
Subject: [PATCH 198/201] ALSA: hda/hdmi - apply Skylake fix-ups to Broxton
 display codec

commit e2656412f2a7343ecfd13eb74bac0a6e6e9c5aad upstream.

Broxton and Skylake have the same behavior on display audio. So this patch
applys Skylake fix-ups to Broxton.

Signed-off-by: Lu, Han <han.lu@intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 sound/pci/hda/patch_hdmi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index acbfbe087ee86..f22f5c4094473 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -50,8 +50,9 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info");
 #define is_haswell(codec)  ((codec)->core.vendor_id == 0x80862807)
 #define is_broadwell(codec)    ((codec)->core.vendor_id == 0x80862808)
 #define is_skylake(codec) ((codec)->core.vendor_id == 0x80862809)
+#define is_broxton(codec) ((codec)->core.vendor_id == 0x8086280a)
 #define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \
-					|| is_skylake(codec))
+				|| is_skylake(codec) || is_broxton(codec))
 
 #define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882)
 #define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883)

From 4f2347e64803932bc354a12830e3cd10d86e5b49 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Mon, 21 Sep 2015 08:42:04 -0300
Subject: [PATCH 199/201] cobalt: fix Kconfig dependency

commit fc88dd16a0e430f57458e6bd9b62a631c6ea53a1 upstream.

The cobalt driver should depend on VIDEO_V4L2_SUBDEV_API.

This fixes this kbuild error:

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master
head:   99bc7215bc60f6cd414cf1b85cd9d52cc596cccb
commit: 85756a069c55e0315ac5990806899cfb607b987f [media] cobalt: add new driver
config: x86_64-randconfig-s0-09201514 (attached as .config)
reproduce:
  git checkout 85756a069c55e0315ac5990806899cfb607b987f
  # save the attached .config to linux build tree
  make ARCH=x86_64

All error/warnings (new ones prefixed by >>):

   drivers/media/i2c/adv7604.c: In function 'adv76xx_get_format':
>> drivers/media/i2c/adv7604.c:1853:9: error: implicit declaration of function 'v4l2_subdev_get_try_format' [-Werror=implicit-function-declaration]
      fmt = v4l2_subdev_get_try_format(sd, cfg, format->pad);
            ^
   drivers/media/i2c/adv7604.c:1853:7: warning: assignment makes pointer from integer without a cast [-Wint-conversion]
      fmt = v4l2_subdev_get_try_format(sd, cfg, format->pad);
          ^
   drivers/media/i2c/adv7604.c: In function 'adv76xx_set_format':
   drivers/media/i2c/adv7604.c:1882:7: warning: assignment makes pointer from integer without a cast [-Wint-conversion]
      fmt = v4l2_subdev_get_try_format(sd, cfg, format->pad);
          ^
   cc1: some warnings being treated as errors

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/media/pci/cobalt/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/pci/cobalt/Kconfig b/drivers/media/pci/cobalt/Kconfig
index 1f88ccc174daa..a01f0cc745cc3 100644
--- a/drivers/media/pci/cobalt/Kconfig
+++ b/drivers/media/pci/cobalt/Kconfig
@@ -1,6 +1,6 @@
 config VIDEO_COBALT
 	tristate "Cisco Cobalt support"
-	depends on VIDEO_V4L2 && I2C && MEDIA_CONTROLLER
+	depends on VIDEO_V4L2 && I2C && VIDEO_V4L2_SUBDEV_API
 	depends on PCI_MSI && MTD_COMPLEX_MAPPINGS
 	depends on GPIOLIB || COMPILE_TEST
 	depends on SND

From e0aa614dc9ce697f51a0992507b36b3edbc7a641 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Fri, 23 Oct 2015 07:52:54 +0100
Subject: [PATCH 200/201] Btrfs: fix regression running delayed references when
 using qgroups
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit b06c4bf5c874a57254b197f53ddf588e7a24a2bf upstream.

In the kernel 4.2 merge window we had a big changes to the implementation
of delayed references and qgroups which made the no_quota field of delayed
references not used anymore. More specifically the no_quota field is not
used anymore as of:

  commit 0ed4792af0e8 ("btrfs: qgroup: Switch to new extent-oriented qgroup mechanism.")

Leaving the no_quota field actually prevents delayed references from
getting merged, which in turn cause the following BUG_ON(), at
fs/btrfs/extent-tree.c, to be hit when qgroups are enabled:

  static int run_delayed_tree_ref(...)
  {
     (...)
     BUG_ON(node->ref_mod != 1);
     (...)
  }

This happens on a scenario like the following:

  1) Ref1 bytenr X, action = BTRFS_ADD_DELAYED_REF, no_quota = 1, added.

  2) Ref2 bytenr X, action = BTRFS_DROP_DELAYED_REF, no_quota = 0, added.
     It's not merged with Ref1 because Ref1->no_quota != Ref2->no_quota.

  3) Ref3 bytenr X, action = BTRFS_ADD_DELAYED_REF, no_quota = 1, added.
     It's not merged with the reference at the tail of the list of refs
     for bytenr X because the reference at the tail, Ref2 is incompatible
     due to Ref2->no_quota != Ref3->no_quota.

  4) Ref4 bytenr X, action = BTRFS_DROP_DELAYED_REF, no_quota = 0, added.
     It's not merged with the reference at the tail of the list of refs
     for bytenr X because the reference at the tail, Ref3 is incompatible
     due to Ref3->no_quota != Ref4->no_quota.

  5) We run delayed references, trigger merging of delayed references,
     through __btrfs_run_delayed_refs() -> btrfs_merge_delayed_refs().

  6) Ref1 and Ref3 are merged as Ref1->no_quota = Ref3->no_quota and
     all other conditions are satisfied too. So Ref1 gets a ref_mod
     value of 2.

  7) Ref2 and Ref4 are merged as Ref2->no_quota = Ref4->no_quota and
     all other conditions are satisfied too. So Ref2 gets a ref_mod
     value of 2.

  8) Ref1 and Ref2 aren't merged, because they have different values
     for their no_quota field.

  9) Delayed reference Ref1 is picked for running (select_delayed_ref()
     always prefers references with an action == BTRFS_ADD_DELAYED_REF).
     So run_delayed_tree_ref() is called for Ref1 which triggers the
     BUG_ON because Ref1->red_mod != 1 (equals 2).

So fix this by removing the no_quota field, as it's not used anymore as
of commit 0ed4792af0e8 ("btrfs: qgroup: Switch to new extent-oriented
qgroup mechanism.").

The use of no_quota was also buggy in at least two places:

1) At delayed-refs.c:btrfs_add_delayed_tree_ref() - we were setting
   no_quota to 0 instead of 1 when the following condition was true:
   is_fstree(ref_root) || !fs_info->quota_enabled

2) At extent-tree.c:__btrfs_inc_extent_ref() - we were attempting to
   reset a node's no_quota when the condition "!is_fstree(root_objectid)
   || !root->fs_info->quota_enabled" was true but we did it only in
   an unused local stack variable, that is, we never reset the no_quota
   value in the node itself.

This fixes the remainder of problems several people have been having when
running delayed references, mostly while a balance is running in parallel,
on a 4.2+ kernel.

Very special thanks to Stéphane Lesimple for helping debugging this issue
and testing this fix on his multi terabyte filesystem (which took more
than one day to balance alone, plus fsck, etc).

Also, this fixes deadlock issue when using the clone ioctl with qgroups
enabled, as reported by Elias Probst in the mailing list. The deadlock
happens because after calling btrfs_insert_empty_item we have our path
holding a write lock on a leaf of the fs/subvol tree and then before
releasing the path we called check_ref() which did backref walking, when
qgroups are enabled, and tried to read lock the same leaf. The trace for
this case is the following:

  INFO: task systemd-nspawn:6095 blocked for more than 120 seconds.
  (...)
  Call Trace:
    [<ffffffff86999201>] schedule+0x74/0x83
    [<ffffffff863ef64c>] btrfs_tree_read_lock+0xc0/0xea
    [<ffffffff86137ed7>] ? wait_woken+0x74/0x74
    [<ffffffff8639f0a7>] btrfs_search_old_slot+0x51a/0x810
    [<ffffffff863a129b>] btrfs_next_old_leaf+0xdf/0x3ce
    [<ffffffff86413a00>] ? ulist_add_merge+0x1b/0x127
    [<ffffffff86411688>] __resolve_indirect_refs+0x62a/0x667
    [<ffffffff863ef546>] ? btrfs_clear_lock_blocking_rw+0x78/0xbe
    [<ffffffff864122d3>] find_parent_nodes+0xaf3/0xfc6
    [<ffffffff86412838>] __btrfs_find_all_roots+0x92/0xf0
    [<ffffffff864128f2>] btrfs_find_all_roots+0x45/0x65
    [<ffffffff8639a75b>] ? btrfs_get_tree_mod_seq+0x2b/0x88
    [<ffffffff863e852e>] check_ref+0x64/0xc4
    [<ffffffff863e9e01>] btrfs_clone+0x66e/0xb5d
    [<ffffffff863ea77f>] btrfs_ioctl_clone+0x48f/0x5bb
    [<ffffffff86048a68>] ? native_sched_clock+0x28/0x77
    [<ffffffff863ed9b0>] btrfs_ioctl+0xabc/0x25cb
  (...)

The problem goes away by eleminating check_ref(), which no longer is
needed as its purpose was to get a value for the no_quota field of
a delayed reference (this patch removes the no_quota field as mentioned
earlier).

Reported-by: Stéphane Lesimple <stephane_btrfs@lesimple.fr>
Tested-by: Stéphane Lesimple <stephane_btrfs@lesimple.fr>
Reported-by: Elias Probst <mail@eliasprobst.eu>
Reported-by: Peter Becker <floyd.net@gmail.com>
Reported-by: Malte Schröder <malte@tnxip.de>
Reported-by: Derek Dongray <derek@valedon.co.uk>
Reported-by: Erkki Seppala <flux-btrfs@inside.org>
Cc: stable@vger.kernel.org  # 4.2+
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
---
 fs/btrfs/ctree.h       |  4 +--
 fs/btrfs/delayed-ref.c | 28 ++++++-------------
 fs/btrfs/delayed-ref.h |  7 ++---
 fs/btrfs/extent-tree.c | 45 +++++++++++-------------------
 fs/btrfs/file.c        | 10 +++----
 fs/btrfs/inode.c       |  4 +--
 fs/btrfs/ioctl.c       | 62 +-----------------------------------------
 fs/btrfs/relocation.c  | 16 +++++------
 fs/btrfs/tree-log.c    |  2 +-
 9 files changed, 44 insertions(+), 134 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 938efe33be809..94eea1f432801 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3398,7 +3398,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
 int btrfs_free_extent(struct btrfs_trans_handle *trans,
 		      struct btrfs_root *root,
 		      u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
-		      u64 owner, u64 offset, int no_quota);
+		      u64 owner, u64 offset);
 
 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len,
 			       int delalloc);
@@ -3411,7 +3411,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 			 struct btrfs_root *root,
 			 u64 bytenr, u64 num_bytes, u64 parent,
-			 u64 root_objectid, u64 owner, u64 offset, int no_quota);
+			 u64 root_objectid, u64 owner, u64 offset);
 
 int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans,
 				   struct btrfs_root *root);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 4832943aaa5cd..7832031fef68d 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -220,7 +220,7 @@ static bool merge_ref(struct btrfs_trans_handle *trans,
 		if (seq && next->seq >= seq)
 			goto next;
 
-		if (next->type != ref->type || next->no_quota != ref->no_quota)
+		if (next->type != ref->type)
 			goto next;
 
 		if ((ref->type == BTRFS_TREE_BLOCK_REF_KEY ||
@@ -405,8 +405,7 @@ add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans,
 	exist = list_entry(href->ref_list.prev, struct btrfs_delayed_ref_node,
 			   list);
 	/* No need to compare bytenr nor is_head */
-	if (exist->type != ref->type || exist->no_quota != ref->no_quota ||
-	    exist->seq != ref->seq)
+	if (exist->type != ref->type || exist->seq != ref->seq)
 		goto add_tail;
 
 	if ((exist->type == BTRFS_TREE_BLOCK_REF_KEY ||
@@ -637,7 +636,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 		     struct btrfs_delayed_ref_head *head_ref,
 		     struct btrfs_delayed_ref_node *ref, u64 bytenr,
 		     u64 num_bytes, u64 parent, u64 ref_root, int level,
-		     int action, int no_quota)
+		     int action)
 {
 	struct btrfs_delayed_tree_ref *full_ref;
 	struct btrfs_delayed_ref_root *delayed_refs;
@@ -659,7 +658,6 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 	ref->action = action;
 	ref->is_head = 0;
 	ref->in_tree = 1;
-	ref->no_quota = no_quota;
 	ref->seq = seq;
 
 	full_ref = btrfs_delayed_node_to_tree_ref(ref);
@@ -692,7 +690,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 		     struct btrfs_delayed_ref_head *head_ref,
 		     struct btrfs_delayed_ref_node *ref, u64 bytenr,
 		     u64 num_bytes, u64 parent, u64 ref_root, u64 owner,
-		     u64 offset, int action, int no_quota)
+		     u64 offset, int action)
 {
 	struct btrfs_delayed_data_ref *full_ref;
 	struct btrfs_delayed_ref_root *delayed_refs;
@@ -715,7 +713,6 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 	ref->action = action;
 	ref->is_head = 0;
 	ref->in_tree = 1;
-	ref->no_quota = no_quota;
 	ref->seq = seq;
 
 	full_ref = btrfs_delayed_node_to_data_ref(ref);
@@ -746,17 +743,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 			       struct btrfs_trans_handle *trans,
 			       u64 bytenr, u64 num_bytes, u64 parent,
 			       u64 ref_root,  int level, int action,
-			       struct btrfs_delayed_extent_op *extent_op,
-			       int no_quota)
+			       struct btrfs_delayed_extent_op *extent_op)
 {
 	struct btrfs_delayed_tree_ref *ref;
 	struct btrfs_delayed_ref_head *head_ref;
 	struct btrfs_delayed_ref_root *delayed_refs;
 	struct btrfs_qgroup_extent_record *record = NULL;
 
-	if (!is_fstree(ref_root) || !fs_info->quota_enabled)
-		no_quota = 0;
-
 	BUG_ON(extent_op && extent_op->is_data);
 	ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
 	if (!ref)
@@ -785,8 +778,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 					bytenr, num_bytes, action, 0);
 
 	add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr,
-				   num_bytes, parent, ref_root, level, action,
-				   no_quota);
+			     num_bytes, parent, ref_root, level, action);
 	spin_unlock(&delayed_refs->lock);
 
 	return 0;
@@ -807,17 +799,13 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 			       u64 bytenr, u64 num_bytes,
 			       u64 parent, u64 ref_root,
 			       u64 owner, u64 offset, int action,
-			       struct btrfs_delayed_extent_op *extent_op,
-			       int no_quota)
+			       struct btrfs_delayed_extent_op *extent_op)
 {
 	struct btrfs_delayed_data_ref *ref;
 	struct btrfs_delayed_ref_head *head_ref;
 	struct btrfs_delayed_ref_root *delayed_refs;
 	struct btrfs_qgroup_extent_record *record = NULL;
 
-	if (!is_fstree(ref_root) || !fs_info->quota_enabled)
-		no_quota = 0;
-
 	BUG_ON(extent_op && !extent_op->is_data);
 	ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
 	if (!ref)
@@ -853,7 +841,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 
 	add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr,
 				   num_bytes, parent, ref_root, owner, offset,
-				   action, no_quota);
+				   action);
 	spin_unlock(&delayed_refs->lock);
 
 	return 0;
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 13fb5e6090fe0..930887a4275f5 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -68,7 +68,6 @@ struct btrfs_delayed_ref_node {
 
 	unsigned int action:8;
 	unsigned int type:8;
-	unsigned int no_quota:1;
 	/* is this node still in the rbtree? */
 	unsigned int is_head:1;
 	unsigned int in_tree:1;
@@ -233,15 +232,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
 			       struct btrfs_trans_handle *trans,
 			       u64 bytenr, u64 num_bytes, u64 parent,
 			       u64 ref_root, int level, int action,
-			       struct btrfs_delayed_extent_op *extent_op,
-			       int no_quota);
+			       struct btrfs_delayed_extent_op *extent_op);
 int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
 			       struct btrfs_trans_handle *trans,
 			       u64 bytenr, u64 num_bytes,
 			       u64 parent, u64 ref_root,
 			       u64 owner, u64 offset, int action,
-			       struct btrfs_delayed_extent_op *extent_op,
-			       int no_quota);
+			       struct btrfs_delayed_extent_op *extent_op);
 int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
 				struct btrfs_trans_handle *trans,
 				u64 bytenr, u64 num_bytes,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 00158bfd1771e..cadacf643bd0a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -95,8 +95,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
 				     struct btrfs_root *root,
 				     u64 parent, u64 root_objectid,
 				     u64 flags, struct btrfs_disk_key *key,
-				     int level, struct btrfs_key *ins,
-				     int no_quota);
+				     int level, struct btrfs_key *ins);
 static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *extent_root, u64 flags,
 			  int force);
@@ -2009,8 +2008,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 			 struct btrfs_root *root,
 			 u64 bytenr, u64 num_bytes, u64 parent,
-			 u64 root_objectid, u64 owner, u64 offset,
-			 int no_quota)
+			 u64 root_objectid, u64 owner, u64 offset)
 {
 	int ret;
 	struct btrfs_fs_info *fs_info = root->fs_info;
@@ -2022,12 +2020,12 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 		ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
 					num_bytes,
 					parent, root_objectid, (int)owner,
-					BTRFS_ADD_DELAYED_REF, NULL, no_quota);
+					BTRFS_ADD_DELAYED_REF, NULL);
 	} else {
 		ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
 					num_bytes,
 					parent, root_objectid, owner, offset,
-					BTRFS_ADD_DELAYED_REF, NULL, no_quota);
+					BTRFS_ADD_DELAYED_REF, NULL);
 	}
 	return ret;
 }
@@ -2048,15 +2046,11 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
 	u64 num_bytes = node->num_bytes;
 	u64 refs;
 	int ret;
-	int no_quota = node->no_quota;
 
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
 
-	if (!is_fstree(root_objectid) || !root->fs_info->quota_enabled)
-		no_quota = 1;
-
 	path->reada = 1;
 	path->leave_spinning = 1;
 	/* this will setup the path even if it fails to insert the back ref */
@@ -2291,8 +2285,7 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
 						parent, ref_root,
 						extent_op->flags_to_set,
 						&extent_op->key,
-						ref->level, &ins,
-						node->no_quota);
+						ref->level, &ins);
 	} else if (node->action == BTRFS_ADD_DELAYED_REF) {
 		ret = __btrfs_inc_extent_ref(trans, root, node,
 					     parent, ref_root,
@@ -3123,7 +3116,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
 	int level;
 	int ret = 0;
 	int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *,
-			    u64, u64, u64, u64, u64, u64, int);
+			    u64, u64, u64, u64, u64, u64);
 
 
 	if (btrfs_test_is_dummy_root(root))
@@ -3164,15 +3157,14 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
 			key.offset -= btrfs_file_extent_offset(buf, fi);
 			ret = process_func(trans, root, bytenr, num_bytes,
 					   parent, ref_root, key.objectid,
-					   key.offset, 1);
+					   key.offset);
 			if (ret)
 				goto fail;
 		} else {
 			bytenr = btrfs_node_blockptr(buf, i);
 			num_bytes = root->nodesize;
 			ret = process_func(trans, root, bytenr, num_bytes,
-					   parent, ref_root, level - 1, 0,
-					   1);
+					   parent, ref_root, level - 1, 0);
 			if (ret)
 				goto fail;
 		}
@@ -6247,7 +6239,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 	int extent_slot = 0;
 	int found_extent = 0;
 	int num_to_del = 1;
-	int no_quota = node->no_quota;
 	u32 item_size;
 	u64 refs;
 	u64 bytenr = node->bytenr;
@@ -6256,9 +6247,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 	bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
 						 SKINNY_METADATA);
 
-	if (!info->quota_enabled || !is_fstree(root_objectid))
-		no_quota = 1;
-
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
@@ -6584,7 +6572,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
 					buf->start, buf->len,
 					parent, root->root_key.objectid,
 					btrfs_header_level(buf),
-					BTRFS_DROP_DELAYED_REF, NULL, 0);
+					BTRFS_DROP_DELAYED_REF, NULL);
 		BUG_ON(ret); /* -ENOMEM */
 	}
 
@@ -6632,7 +6620,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
 /* Can return -ENOMEM */
 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 		      u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
-		      u64 owner, u64 offset, int no_quota)
+		      u64 owner, u64 offset)
 {
 	int ret;
 	struct btrfs_fs_info *fs_info = root->fs_info;
@@ -6655,13 +6643,13 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 		ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
 					num_bytes,
 					parent, root_objectid, (int)owner,
-					BTRFS_DROP_DELAYED_REF, NULL, no_quota);
+					BTRFS_DROP_DELAYED_REF, NULL);
 	} else {
 		ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
 						num_bytes,
 						parent, root_objectid, owner,
 						offset, BTRFS_DROP_DELAYED_REF,
-						NULL, no_quota);
+						NULL);
 	}
 	return ret;
 }
@@ -7443,8 +7431,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
 				     struct btrfs_root *root,
 				     u64 parent, u64 root_objectid,
 				     u64 flags, struct btrfs_disk_key *key,
-				     int level, struct btrfs_key *ins,
-				     int no_quota)
+				     int level, struct btrfs_key *ins)
 {
 	int ret;
 	struct btrfs_fs_info *fs_info = root->fs_info;
@@ -7534,7 +7521,7 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
 	ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid,
 					 ins->offset, 0,
 					 root_objectid, owner, offset,
-					 BTRFS_ADD_DELAYED_EXTENT, NULL, 0);
+					 BTRFS_ADD_DELAYED_EXTENT, NULL);
 	return ret;
 }
 
@@ -7748,7 +7735,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
 						 ins.objectid, ins.offset,
 						 parent, root_objectid, level,
 						 BTRFS_ADD_DELAYED_EXTENT,
-						 extent_op, 0);
+						 extent_op);
 		if (ret)
 			goto out_free_delayed;
 	}
@@ -8296,7 +8283,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
 			}
 		}
 		ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
-				root->root_key.objectid, level - 1, 0, 0);
+				root->root_key.objectid, level - 1, 0);
 		BUG_ON(ret); /* -ENOMEM */
 	}
 	btrfs_tree_unlock(next);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 6e80b78f797f8..e27ea7ae7f26a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -855,7 +855,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
 						disk_bytenr, num_bytes, 0,
 						root->root_key.objectid,
 						new_key.objectid,
-						start - extent_offset, 1);
+						start - extent_offset);
 				BUG_ON(ret); /* -ENOMEM */
 			}
 			key.offset = start;
@@ -933,7 +933,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
 						disk_bytenr, num_bytes, 0,
 						root->root_key.objectid,
 						key.objectid, key.offset -
-						extent_offset, 0);
+						extent_offset);
 				BUG_ON(ret); /* -ENOMEM */
 				inode_sub_bytes(inode,
 						extent_end - key.offset);
@@ -1212,7 +1212,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
 
 		ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
 					   root->root_key.objectid,
-					   ino, orig_offset, 1);
+					   ino, orig_offset);
 		BUG_ON(ret); /* -ENOMEM */
 
 		if (split == start) {
@@ -1239,7 +1239,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
 		del_nr++;
 		ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
 					0, root->root_key.objectid,
-					ino, orig_offset, 0);
+					ino, orig_offset);
 		BUG_ON(ret); /* -ENOMEM */
 	}
 	other_start = 0;
@@ -1256,7 +1256,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
 		del_nr++;
 		ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
 					0, root->root_key.objectid,
-					ino, orig_offset, 0);
+					ino, orig_offset);
 		BUG_ON(ret); /* -ENOMEM */
 	}
 	if (del_nr == 0) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index e0c17e072a7a7..396e3d5c4e835 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2579,7 +2579,7 @@ static noinline int relink_extent_backref(struct btrfs_path *path,
 	ret = btrfs_inc_extent_ref(trans, root, new->bytenr,
 			new->disk_len, 0,
 			backref->root_id, backref->inum,
-			new->file_pos, 0);	/* start - extent_offset */
+			new->file_pos);	/* start - extent_offset */
 	if (ret) {
 		btrfs_abort_transaction(trans, root, ret);
 		goto out_free_path;
@@ -4521,7 +4521,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 			ret = btrfs_free_extent(trans, root, extent_start,
 						extent_num_bytes, 0,
 						btrfs_header_owner(leaf),
-						ino, extent_offset, 0);
+						ino, extent_offset);
 			BUG_ON(ret);
 			if (btrfs_should_throttle_delayed_refs(trans, root))
 				btrfs_async_run_delayed_refs(root,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 30dca32804eeb..6548a36823bc9 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3203,41 +3203,6 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
 	return ret;
 }
 
-/* Helper to check and see if this root currently has a ref on the given disk
- * bytenr.  If it does then we need to update the quota for this root.  This
- * doesn't do anything if quotas aren't enabled.
- */
-static int check_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-		     u64 disko)
-{
-	struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem);
-	struct ulist *roots;
-	struct ulist_iterator uiter;
-	struct ulist_node *root_node = NULL;
-	int ret;
-
-	if (!root->fs_info->quota_enabled)
-		return 1;
-
-	btrfs_get_tree_mod_seq(root->fs_info, &tree_mod_seq_elem);
-	ret = btrfs_find_all_roots(trans, root->fs_info, disko,
-				   tree_mod_seq_elem.seq, &roots);
-	if (ret < 0)
-		goto out;
-	ret = 0;
-	ULIST_ITER_INIT(&uiter);
-	while ((root_node = ulist_next(roots, &uiter))) {
-		if (root_node->val == root->objectid) {
-			ret = 1;
-			break;
-		}
-	}
-	ulist_free(roots);
-out:
-	btrfs_put_tree_mod_seq(root->fs_info, &tree_mod_seq_elem);
-	return ret;
-}
-
 static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
 				     struct inode *inode,
 				     u64 endoff,
@@ -3496,9 +3461,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
 	u32 nritems;
 	int slot;
 	int ret;
-	int no_quota;
 	const u64 len = olen_aligned;
-	u64 last_disko = 0;
 	u64 last_dest_end = destoff;
 
 	ret = -ENOMEM;
@@ -3544,7 +3507,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
 
 		nritems = btrfs_header_nritems(path->nodes[0]);
 process_slot:
-		no_quota = 1;
 		if (path->slots[0] >= nritems) {
 			ret = btrfs_next_leaf(BTRFS_I(src)->root, path);
 			if (ret < 0)
@@ -3696,35 +3658,13 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
 				btrfs_set_file_extent_num_bytes(leaf, extent,
 								datal);
 
-				/*
-				 * We need to look up the roots that point at
-				 * this bytenr and see if the new root does.  If
-				 * it does not we need to make sure we update
-				 * quotas appropriately.
-				 */
-				if (disko && root != BTRFS_I(src)->root &&
-				    disko != last_disko) {
-					no_quota = check_ref(trans, root,
-							     disko);
-					if (no_quota < 0) {
-						btrfs_abort_transaction(trans,
-									root,
-									ret);
-						btrfs_end_transaction(trans,
-								      root);
-						ret = no_quota;
-						goto out;
-					}
-				}
-
 				if (disko) {
 					inode_add_bytes(inode, datal);
 					ret = btrfs_inc_extent_ref(trans, root,
 							disko, diskl, 0,
 							root->root_key.objectid,
 							btrfs_ino(inode),
-							new_key.offset - datao,
-							no_quota);
+							new_key.offset - datao);
 					if (ret) {
 						btrfs_abort_transaction(trans,
 									root,
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 303babeef5057..ab507e3d536bf 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1716,7 +1716,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
 		ret = btrfs_inc_extent_ref(trans, root, new_bytenr,
 					   num_bytes, parent,
 					   btrfs_header_owner(leaf),
-					   key.objectid, key.offset, 1);
+					   key.objectid, key.offset);
 		if (ret) {
 			btrfs_abort_transaction(trans, root, ret);
 			break;
@@ -1724,7 +1724,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
 
 		ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
 					parent, btrfs_header_owner(leaf),
-					key.objectid, key.offset, 1);
+					key.objectid, key.offset);
 		if (ret) {
 			btrfs_abort_transaction(trans, root, ret);
 			break;
@@ -1900,23 +1900,21 @@ int replace_path(struct btrfs_trans_handle *trans,
 
 		ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize,
 					path->nodes[level]->start,
-					src->root_key.objectid, level - 1, 0,
-					1);
+					src->root_key.objectid, level - 1, 0);
 		BUG_ON(ret);
 		ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize,
 					0, dest->root_key.objectid, level - 1,
-					0, 1);
+					0);
 		BUG_ON(ret);
 
 		ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,
 					path->nodes[level]->start,
-					src->root_key.objectid, level - 1, 0,
-					1);
+					src->root_key.objectid, level - 1, 0);
 		BUG_ON(ret);
 
 		ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,
 					0, dest->root_key.objectid, level - 1,
-					0, 1);
+					0);
 		BUG_ON(ret);
 
 		btrfs_unlock_up_safe(path, 0);
@@ -2745,7 +2743,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
 						node->eb->start, blocksize,
 						upper->eb->start,
 						btrfs_header_owner(upper->eb),
-						node->level, 0, 1);
+						node->level, 0);
 			BUG_ON(ret);
 
 			ret = btrfs_drop_subtree(trans, root, eb, upper->eb);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 1bbaace733838..6f8af2de5912c 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -691,7 +691,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
 				ret = btrfs_inc_extent_ref(trans, root,
 						ins.objectid, ins.offset,
 						0, root->root_key.objectid,
-						key->objectid, offset, 0);
+						key->objectid, offset);
 				if (ret)
 					goto out;
 			} else {

From 35483418917d63df90bae5b2d0b7b047d7ed8ec7 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 14 Dec 2015 21:41:43 -0800
Subject: [PATCH 201/201] Linux 4.3.3

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 1a4953b3e10ff..2070d16bb5a4d 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
 VERSION = 4
 PATCHLEVEL = 3
-SUBLEVEL = 2
+SUBLEVEL = 3
 EXTRAVERSION =
 NAME = Blurry Fish Butt