From 7585db5b763975f7bfb00215fef1663df795c524 Mon Sep 17 00:00:00 2001 From: "H. Nikolaus Schaller" Date: Wed, 28 Oct 2015 19:00:26 +0100 Subject: [PATCH 001/201] ARM: 8449/1: fix bug in vdsomunge swab32 macro commit 38850d786a799c3ff2de0dc1980902c3263698dc upstream. Commit 8a603f91cc48 ("ARM: 8445/1: fix vdsomunge not to depend on glibc specific byteswap.h") unfortunately introduced a bug created but not found during discussion and patch simplification. Reported-by: Efraim Yawitz Signed-off-by: H. Nikolaus Schaller Fixes: 8a603f91cc48 ("ARM: 8445/1: fix vdsomunge not to depend on glibc specific byteswap.h") Signed-off-by: Nathan Lynch Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/vdso/vdsomunge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/vdso/vdsomunge.c b/arch/arm/vdso/vdsomunge.c index 0cebd98cd88c3..f6455273b2f87 100644 --- a/arch/arm/vdso/vdsomunge.c +++ b/arch/arm/vdso/vdsomunge.c @@ -66,7 +66,7 @@ ((((x) & 0x000000ff) << 24) | \ (((x) & 0x0000ff00) << 8) | \ (((x) & 0x00ff0000) >> 8) | \ - (((x) & 0xff000000) << 24)) + (((x) & 0xff000000) >> 24)) #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define HOST_ORDER ELFDATA2LSB From 0bbb0285d4a496282e11b8619ee10f449e90f39b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Thu, 22 Oct 2015 14:24:24 +0200 Subject: [PATCH 002/201] USB: qcserial: add Sierra Wireless MC74xx/EM74xx MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f504ab1888026d15b5be8f9c262bf4ae9cacd177 upstream. New device IDs shamelessly lifted from the vendor driver. Signed-off-by: Bjørn Mork Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/qcserial.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index ebcec8cda8584..f49d262e926ba 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -153,6 +153,8 @@ static const struct usb_device_id id_table[] = { {DEVICE_SWI(0x1199, 0x9056)}, /* Sierra Wireless Modem */ {DEVICE_SWI(0x1199, 0x9060)}, /* Sierra Wireless Modem */ {DEVICE_SWI(0x1199, 0x9061)}, /* Sierra Wireless Modem */ + {DEVICE_SWI(0x1199, 0x9070)}, /* Sierra Wireless MC74xx/EM74xx */ + {DEVICE_SWI(0x1199, 0x9071)}, /* Sierra Wireless MC74xx/EM74xx */ {DEVICE_SWI(0x413c, 0x81a2)}, /* Dell Wireless 5806 Gobi(TM) 4G LTE Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a3)}, /* Dell Wireless 5570 HSPA+ (42Mbps) Mobile Broadband Card */ {DEVICE_SWI(0x413c, 0x81a4)}, /* Dell Wireless 5570e HSPA+ (42Mbps) Mobile Broadband Card */ From 1f9ba7bc8b7ede093b8083cbd4f9bf1ddc36f324 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Wed, 28 Oct 2015 13:09:53 -0400 Subject: [PATCH 003/201] tipc: linearize arriving NAME_DISTR and LINK_PROTO buffers [ Upstream commit 5cbb28a4bf65c7e4daa6c25b651fed8eb888c620 ] Testing of the new UDP bearer has revealed that reception of NAME_DISTRIBUTOR, LINK_PROTOCOL/RESET and LINK_PROTOCOL/ACTIVATE message buffers is not prepared for the case that those may be non-linear. We now linearize all such buffers before they are delivered up to the generic reception layer. In order for the commit to apply cleanly to 'net' and 'stable', we do the change in the function tipc_udp_recv() for now. Later, we will post a commit to 'net-next' moving the linearization to generic code, in tipc_named_rcv() and tipc_link_proto_rcv(). Fixes: commit d0f91938bede ("tipc: add ip/udp media type") Signed-off-by: Jon Maloy Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/udp_media.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 6e648d90297a9..cd7c5f131e722 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -48,6 +48,7 @@ #include #include "core.h" #include "bearer.h" +#include "msg.h" /* IANA assigned UDP port */ #define UDP_PORT_DEFAULT 6118 @@ -222,6 +223,10 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) { struct udp_bearer *ub; struct tipc_bearer *b; + int usr = msg_user(buf_msg(skb)); + + if ((usr == LINK_PROTOCOL) || (usr == NAME_DISTRIBUTOR)) + skb_linearize(skb); ub = rcu_dereference_sk_user_data(sk); if (!ub) { From cfb2f132a8fad8c3b81f023d3259c3964a22c471 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 29 Oct 2015 18:11:35 -0700 Subject: [PATCH 004/201] net: bcmgenet: Software reset EPHY after power on [ Upstream commit 5dbebbb44a6ad94aab2cd1a46f7676f255403f64 ] The EPHY on GENET v1->v3 is extremely finicky, and will show occasional failures based on the timing and reset sequence, ranging from duplicate packets, to extremely high latencies. Perform an additional software reset, and re-configuration to make sure it is in a consistent and working state. Fixes: 6ac3ce8295e6 ("net: bcmgenet: Remove excessive PHY reset") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 +++- drivers/net/ethernet/broadcom/genet/bcmgenet.h | 1 + drivers/net/ethernet/broadcom/genet/bcmmii.c | 18 ++++++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 1805541b4240e..5e3cd76cb69bd 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -907,8 +907,10 @@ static void bcmgenet_power_up(struct bcmgenet_priv *priv, } bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT); - if (mode == GENET_POWER_PASSIVE) + if (mode == GENET_POWER_PASSIVE) { bcmgenet_phy_power_set(priv->dev, true); + bcmgenet_mii_reset(priv->dev); + } } /* ioctl handle special commands that are not present in ethtool. */ diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h index 7299d10754226..c739f7ebc9929 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h @@ -674,6 +674,7 @@ int bcmgenet_mii_init(struct net_device *dev); int bcmgenet_mii_config(struct net_device *dev); int bcmgenet_mii_probe(struct net_device *dev); void bcmgenet_mii_exit(struct net_device *dev); +void bcmgenet_mii_reset(struct net_device *dev); void bcmgenet_phy_power_set(struct net_device *dev, bool enable); void bcmgenet_mii_setup(struct net_device *dev); diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c index c8affad76f368..8bdfe53754ba8 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmmii.c +++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c @@ -163,6 +163,7 @@ void bcmgenet_mii_setup(struct net_device *dev) phy_print_status(phydev); } + static int bcmgenet_fixed_phy_link_update(struct net_device *dev, struct fixed_phy_status *status) { @@ -172,6 +173,22 @@ static int bcmgenet_fixed_phy_link_update(struct net_device *dev, return 0; } +/* Perform a voluntary PHY software reset, since the EPHY is very finicky about + * not doing it and will start corrupting packets + */ +void bcmgenet_mii_reset(struct net_device *dev) +{ + struct bcmgenet_priv *priv = netdev_priv(dev); + + if (GENET_IS_V4(priv)) + return; + + if (priv->phydev) { + phy_init_hw(priv->phydev); + phy_start_aneg(priv->phydev); + } +} + void bcmgenet_phy_power_set(struct net_device *dev, bool enable) { struct bcmgenet_priv *priv = netdev_priv(dev); @@ -214,6 +231,7 @@ static void bcmgenet_internal_phy_setup(struct net_device *dev) reg = bcmgenet_ext_readl(priv, EXT_EXT_PWR_MGMT); reg |= EXT_PWR_DN_EN_LD; bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT); + bcmgenet_mii_reset(dev); } static void bcmgenet_moca_phy_setup(struct bcmgenet_priv *priv) From 795d2a6ddbe561cbac89bce6f54be39b72040655 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 30 Oct 2015 10:23:33 +0200 Subject: [PATCH 005/201] ipv4: fix to not remove local route on link down [ Upstream commit 4f823defdd5b106a5e89745ee8b163c71855de1e ] When fib_netdev_event calls fib_disable_ip on NETDEV_DOWN event we should not delete the local routes if the local address is still present. The confusion comes from the fact that both fib_netdev_event and fib_inetaddr_event use the NETDEV_DOWN constant. Fix it by returning back the variable 'force'. Steps to reproduce: modprobe dummy ifconfig dummy0 192.168.168.1 up ifconfig dummy0 down ip route list table local | grep dummy | grep host local 192.168.168.1 dev dummy0 proto kernel scope host src 192.168.168.1 Fixes: 8a3d03166f19 ("net: track link-status of ipv4 nexthops") Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ip_fib.h | 2 +- net/ipv4/fib_frontend.c | 13 +++++++------ net/ipv4/fib_semantics.c | 11 ++++++++--- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 727d6e9a96856..965fa5b1a274e 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -317,7 +317,7 @@ void fib_flush_external(struct net *net); /* Exported by fib_semantics.c */ int ip_fib_check_default(__be32 gw, struct net_device *dev); -int fib_sync_down_dev(struct net_device *dev, unsigned long event); +int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); int fib_sync_down_addr(struct net *net, __be32 local); int fib_sync_up(struct net_device *dev, unsigned int nh_flags); void fib_select_multipath(struct fib_result *res); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 690bcbc59f26d..457b2cd75b85b 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1110,9 +1110,10 @@ static void nl_fib_lookup_exit(struct net *net) net->ipv4.fibnl = NULL; } -static void fib_disable_ip(struct net_device *dev, unsigned long event) +static void fib_disable_ip(struct net_device *dev, unsigned long event, + bool force) { - if (fib_sync_down_dev(dev, event)) + if (fib_sync_down_dev(dev, event, force)) fib_flush(dev_net(dev)); rt_cache_flush(dev_net(dev)); arp_ifdown(dev); @@ -1140,7 +1141,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, /* Last address was deleted from this interface. * Disable IP. */ - fib_disable_ip(dev, event); + fib_disable_ip(dev, event, true); } else { rt_cache_flush(dev_net(dev)); } @@ -1157,7 +1158,7 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo unsigned int flags; if (event == NETDEV_UNREGISTER) { - fib_disable_ip(dev, event); + fib_disable_ip(dev, event, true); rt_flush_dev(dev); return NOTIFY_DONE; } @@ -1178,14 +1179,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo rt_cache_flush(net); break; case NETDEV_DOWN: - fib_disable_ip(dev, event); + fib_disable_ip(dev, event, false); break; case NETDEV_CHANGE: flags = dev_get_flags(dev); if (flags & (IFF_RUNNING | IFF_LOWER_UP)) fib_sync_up(dev, RTNH_F_LINKDOWN); else - fib_sync_down_dev(dev, event); + fib_sync_down_dev(dev, event, false); /* fall through */ case NETDEV_CHANGEMTU: rt_cache_flush(net); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 064bd3caaa4f1..2aa5b5e7da75a 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1281,7 +1281,13 @@ int fib_sync_down_addr(struct net *net, __be32 local) return ret; } -int fib_sync_down_dev(struct net_device *dev, unsigned long event) +/* Event force Flags Description + * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host + * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host + * NETDEV_DOWN 1 LINKDOWN|DEAD Last address removed + * NETDEV_UNREGISTER 1 LINKDOWN|DEAD Device removed + */ +int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) { int ret = 0; int scope = RT_SCOPE_NOWHERE; @@ -1290,8 +1296,7 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event) struct hlist_head *head = &fib_info_devhash[hash]; struct fib_nh *nh; - if (event == NETDEV_UNREGISTER || - event == NETDEV_DOWN) + if (force) scope = -1; hlist_for_each_entry(nh, head, nh_hash) { From 7b4d18bbbd1549def1317b9229e822379f09c921 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 30 Oct 2015 10:23:34 +0200 Subject: [PATCH 006/201] ipv4: update RTNH_F_LINKDOWN flag on UP event [ Upstream commit c9b3292eeb52c6834e972eb5b8fe38914771ed12 ] When nexthop is part of multipath route we should clear the LINKDOWN flag when link goes UP or when first address is added. This is needed because we always set LINKDOWN flag when DEAD flag was set but now on UP the nexthop is not dead anymore. Examples when LINKDOWN bit can be forgotten when no NETDEV_CHANGE is delivered: - link goes down (LINKDOWN is set), then link goes UP and device shows carrier OK but LINKDOWN remains set - last address is deleted (LINKDOWN is set), then address is added and device shows carrier OK but LINKDOWN remains set Steps to reproduce: modprobe dummy ifconfig dummy0 192.168.168.1 up here add a multipath route where one nexthop is for dummy0: ip route add 1.2.3.4 nexthop dummy0 nexthop SOME_OTHER_DEVICE ifconfig dummy0 down ifconfig dummy0 up now ip route shows nexthop that is not dead. Now set the sysctl var: echo 1 > /proc/sys/net/ipv4/conf/dummy0/ignore_routes_with_linkdown now ip route will show a dead nexthop because the forgotten RTNH_F_LINKDOWN is propagated as RTNH_F_DEAD. Fixes: 8a3d03166f19 ("net: track link-status of ipv4 nexthops") Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_semantics.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 2aa5b5e7da75a..e966f8599b4a1 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1445,6 +1445,13 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags) if (!(dev->flags & IFF_UP)) return 0; + if (nh_flags & RTNH_F_DEAD) { + unsigned int flags = dev_get_flags(dev); + + if (flags & (IFF_RUNNING | IFF_LOWER_UP)) + nh_flags |= RTNH_F_LINKDOWN; + } + prev_fi = NULL; hash = fib_devindex_hashfn(dev->ifindex); head = &fib_info_devhash[hash]; From b8f93a05cbad22868ec327683a915cc9eece584e Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Fri, 30 Oct 2015 16:43:55 +0800 Subject: [PATCH 007/201] stmmac: Correctly report PTP capabilities. [ Upstream commit e6dbe1eb2db0d7a14991c06278dd3030c45fb825 ] priv->hwts_*_en indicate if timestamping is enabled/disabled at run time. But priv->dma_cap.time_stamp and priv->dma_cap.atime_stamp indicates HW is support for PTPv1/PTPv2. Signed-off-by: Phil Reid Acked-by: Richard Cochran Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 771cda2a48b2a..2e51b816a7e81 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -721,10 +721,13 @@ static int stmmac_get_ts_info(struct net_device *dev, { struct stmmac_priv *priv = netdev_priv(dev); - if ((priv->hwts_tx_en) && (priv->hwts_rx_en)) { + if ((priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp)) { - info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | + info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_RAW_HARDWARE; if (priv->ptp_clock) From e02e04ff6036fa7afe7e59cddcc86c598002bc76 Mon Sep 17 00:00:00 2001 From: Ani Sinha Date: Fri, 30 Oct 2015 16:54:31 -0700 Subject: [PATCH 008/201] ipmr: fix possible race resulting from improper usage of IP_INC_STATS_BH() in preemptible context. [ Upstream commit 44f49dd8b5a606870a1f21101522a0f9c4414784 ] Fixes the following kernel BUG : BUG: using __this_cpu_add() in preemptible [00000000] code: bash/2758 caller is __this_cpu_preempt_check+0x13/0x15 CPU: 0 PID: 2758 Comm: bash Tainted: P O 3.18.19 #2 ffffffff8170eaca ffff880110d1b788 ffffffff81482b2a 0000000000000000 0000000000000000 ffff880110d1b7b8 ffffffff812010ae ffff880007cab800 ffff88001a060800 ffff88013a899108 ffff880108b84240 ffff880110d1b7c8 Call Trace: [] dump_stack+0x52/0x80 [] check_preemption_disabled+0xce/0xe1 [] __this_cpu_preempt_check+0x13/0x15 [] ipmr_queue_xmit+0x647/0x70c [] ip_mr_forward+0x32f/0x34e [] ip_mroute_setsockopt+0xe03/0x108c [] ? get_parent_ip+0x11/0x42 [] ? pollwake+0x4d/0x51 [] ? default_wake_function+0x0/0xf [] ? get_parent_ip+0x11/0x42 [] ? __wake_up_common+0x45/0x77 [] ? _raw_spin_unlock_irqrestore+0x1d/0x32 [] ? __wake_up_sync_key+0x4a/0x53 [] ? sock_def_readable+0x71/0x75 [] do_ip_setsockopt+0x9d/0xb55 [] ? unix_seqpacket_sendmsg+0x3f/0x41 [] ? sock_sendmsg+0x6d/0x86 [] ? sockfd_lookup_light+0x12/0x5d [] ? SyS_sendto+0xf3/0x11b [] ? new_sync_read+0x82/0xaa [] compat_ip_setsockopt+0x3b/0x99 [] compat_raw_setsockopt+0x11/0x32 [] compat_sock_common_setsockopt+0x18/0x1f [] compat_SyS_setsockopt+0x1a9/0x1cf [] compat_SyS_socketcall+0x180/0x1e3 [] cstar_dispatch+0x7/0x1e Signed-off-by: Ani Sinha Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ipmr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 866ee89f5254a..8e8203d5c520a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1682,8 +1682,8 @@ static inline int ipmr_forward_finish(struct sock *sk, struct sk_buff *skb) { struct ip_options *opt = &(IPCB(skb)->opt); - IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); - IP_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len); + IP_INC_STATS(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP_ADD_STATS(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len); if (unlikely(opt->optlen)) ip_forward_options(skb); @@ -1745,7 +1745,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, * to blackhole. */ - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS); + IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS); ip_rt_put(rt); goto out_free; } From 684d640a5b73bf4994bd2100894841abbb924bca Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 Nov 2015 17:08:19 -0800 Subject: [PATCH 009/201] sit: fix sit0 percpu double allocations [ Upstream commit 4ece9009774596ee3df0acba65a324b7ea79387c ] sit0 device allocates its percpu storage twice : - One time in ipip6_tunnel_init() - One time in ipip6_fb_tunnel_init() Thus we leak 48 bytes per possible cpu per network namespace dismantle. ipip6_fb_tunnel_init() can be much simpler and does not return an error, and should be called after register_netdev() Note that ipip6_tunnel_clone_6rd() also needs to be called after register_netdev() (calling ipip6_tunnel_init()) Fixes: ebe084aafb7e ("sit: Use ipip6_tunnel_init as the ndo_init function.") Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Cc: Steffen Klassert Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/sit.c | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 94428fd85b2f9..dcccae86190f2 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1394,34 +1394,20 @@ static int ipip6_tunnel_init(struct net_device *dev) return 0; } -static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) +static void __net_init ipip6_fb_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); struct iphdr *iph = &tunnel->parms.iph; struct net *net = dev_net(dev); struct sit_net *sitn = net_generic(net, sit_net_id); - tunnel->dev = dev; - tunnel->net = dev_net(dev); - iph->version = 4; iph->protocol = IPPROTO_IPV6; iph->ihl = 5; iph->ttl = 64; - dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); - if (!dev->tstats) - return -ENOMEM; - - tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); - if (!tunnel->dst_cache) { - free_percpu(dev->tstats); - return -ENOMEM; - } - dev_hold(dev); rcu_assign_pointer(sitn->tunnels_wc[0], tunnel); - return 0; } static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -1831,23 +1817,19 @@ static int __net_init sit_init_net(struct net *net) */ sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; - err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); - if (err) - goto err_dev_free; - - ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn); err = register_netdev(sitn->fb_tunnel_dev); if (err) goto err_reg_dev; + ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn); + ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); + t = netdev_priv(sitn->fb_tunnel_dev); strcpy(t->parms.name, sitn->fb_tunnel_dev->name); return 0; err_reg_dev: - dev_put(sitn->fb_tunnel_dev); -err_dev_free: ipip6_dev_free(sitn->fb_tunnel_dev); err_alloc_dev: return err; From c555478720e688d9b9fda6afb51253fc9d3f71c6 Mon Sep 17 00:00:00 2001 From: Martin Habets Date: Mon, 2 Nov 2015 12:51:31 +0000 Subject: [PATCH 010/201] sfc: push partner queue for skb->xmit_more [ Upstream commit b2663a4f30e85ec606b806f5135413e6d5c78d1e ] When the IP stack passes SKBs the sfc driver puts them in 2 different TX queues (called partners), one for checksummed and one for not checksummed. If the SKB has xmit_more set the driver will delay pushing the work to the NIC. When later it does decide to push the buffers this patch ensures it also pushes the partner queue, if that also has any delayed work. Before this fix the work in the partner queue would be left for a long time and cause a netdev watchdog. Fixes: 70b33fb ("sfc: add support for skb->xmit_more") Reported-by: Jianlin Shi Signed-off-by: Martin Habets Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/sfc/ef10.c | 4 +++- drivers/net/ethernet/sfc/farch.c | 4 +++- drivers/net/ethernet/sfc/net_driver.h | 2 ++ drivers/net/ethernet/sfc/tx.c | 30 +++++++++++++++++++++++++-- 4 files changed, 36 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index ff649ebef6374..286cc6b69d579 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1849,7 +1849,9 @@ static void efx_ef10_tx_write(struct efx_tx_queue *tx_queue) unsigned int write_ptr; efx_qword_t *txd; - BUG_ON(tx_queue->write_count == tx_queue->insert_count); + tx_queue->xmit_more_available = false; + if (unlikely(tx_queue->write_count == tx_queue->insert_count)) + return; do { write_ptr = tx_queue->write_count & tx_queue->ptr_mask; diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index f08266f0eca23..5a1c5a8f278ad 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -321,7 +321,9 @@ void efx_farch_tx_write(struct efx_tx_queue *tx_queue) unsigned write_ptr; unsigned old_write_count = tx_queue->write_count; - BUG_ON(tx_queue->write_count == tx_queue->insert_count); + tx_queue->xmit_more_available = false; + if (unlikely(tx_queue->write_count == tx_queue->insert_count)) + return; do { write_ptr = tx_queue->write_count & tx_queue->ptr_mask; diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index c530e1c4cb4f1..24038ef96d9ff 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -219,6 +219,7 @@ struct efx_tx_buffer { * @tso_packets: Number of packets via the TSO xmit path * @pushes: Number of times the TX push feature has been used * @pio_packets: Number of times the TX PIO feature has been used + * @xmit_more_available: Are any packets waiting to be pushed to the NIC * @empty_read_count: If the completion path has seen the queue as empty * and the transmission path has not yet checked this, the value of * @read_count bitwise-added to %EFX_EMPTY_COUNT_VALID; otherwise 0. @@ -253,6 +254,7 @@ struct efx_tx_queue { unsigned int tso_packets; unsigned int pushes; unsigned int pio_packets; + bool xmit_more_available; /* Statistics to supplement MAC stats */ unsigned long tx_packets; diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 1833a01465711..67f6afaa022f4 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -431,8 +431,20 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) efx_tx_maybe_stop_queue(tx_queue); /* Pass off to hardware */ - if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) + if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) { + struct efx_tx_queue *txq2 = efx_tx_queue_partner(tx_queue); + + /* There could be packets left on the partner queue if those + * SKBs had skb->xmit_more set. If we do not push those they + * could be left for a long time and cause a netdev watchdog. + */ + if (txq2->xmit_more_available) + efx_nic_push_buffers(txq2); + efx_nic_push_buffers(tx_queue); + } else { + tx_queue->xmit_more_available = skb->xmit_more; + } tx_queue->tx_packets++; @@ -722,6 +734,7 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue) tx_queue->read_count = 0; tx_queue->old_read_count = 0; tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID; + tx_queue->xmit_more_available = false; /* Set up TX descriptor ring */ efx_nic_init_tx(tx_queue); @@ -747,6 +760,7 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue) ++tx_queue->read_count; } + tx_queue->xmit_more_available = false; netdev_tx_reset_queue(tx_queue->core_txq); } @@ -1302,8 +1316,20 @@ static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, efx_tx_maybe_stop_queue(tx_queue); /* Pass off to hardware */ - if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) + if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) { + struct efx_tx_queue *txq2 = efx_tx_queue_partner(tx_queue); + + /* There could be packets left on the partner queue if those + * SKBs had skb->xmit_more set. If we do not push those they + * could be left for a long time and cause a netdev watchdog. + */ + if (txq2->xmit_more_available) + efx_nic_push_buffers(txq2); + efx_nic_push_buffers(tx_queue); + } else { + tx_queue->xmit_more_available = skb->xmit_more; + } tx_queue->tso_bursts++; return NETDEV_TX_OK; From 2e05a9e795039ff20784f0be4a609bfc28ca9d20 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 2 Nov 2015 07:50:07 -0800 Subject: [PATCH 011/201] net: avoid NULL deref in inet_ctl_sock_destroy() [ Upstream commit 8fa677d2706d325d71dab91bf6e6512c05214e37 ] Under low memory conditions, tcp_sk_init() and icmp_sk_init() can both iterate on all possible cpus and call inet_ctl_sock_destroy(), with eventual NULL pointer. Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/inet_common.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/inet_common.h b/include/net/inet_common.h index 279f83591971b..109e3ee9108c0 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -41,7 +41,8 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, static inline void inet_ctl_sock_destroy(struct sock *sk) { - sock_release(sk->sk_socket); + if (sk) + sock_release(sk->sk_socket); } #endif From 35b2aa1c3c36ba20d7f58b77a1e590af7b7d6c1f Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Wed, 4 Nov 2015 14:47:53 +0100 Subject: [PATCH 012/201] ipv6: clean up dev_snmp6 proc entry when we fail to initialize inet6_dev [ Upstream commit 2a189f9e57650e9f310ddf4aad75d66c1233a064 ] In ipv6_add_dev, when addrconf_sysctl_register fails, we do not clean up the dev_snmp6 entry that we have already registered for this device. Call snmp6_unregister_dev in this case. Fixes: a317a2f19da7d ("ipv6: fail early when creating netdev named all or default") Reported-by: Dmitry Vyukov Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/addrconf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 36b85bd05ac8a..dd00828863a0c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -417,6 +417,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) if (err) { ipv6_mc_destroy_dev(ndev); del_timer(&ndev->regen_timer); + snmp6_unregister_dev(ndev); goto err_release; } /* protected by rtnl_lock */ From 10b1965171b100b1348fa03e801a77d0178687bd Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Tue, 3 Nov 2015 14:32:57 -0800 Subject: [PATCH 013/201] ipv4: disable BH when changing ip local port range [ Upstream commit 4ee3bd4a8c7463cdef0b82ebc33fc94a9170a7e0 ] This fixes the following lockdep warning: [ INFO: inconsistent lock state ] 4.3.0-rc7+ #1197 Not tainted --------------------------------- inconsistent {IN-SOFTIRQ-R} -> {SOFTIRQ-ON-W} usage. sysctl/1019 [HC0[0]:SC0[0]:HE1:SE1] takes: (&(&net->ipv4.ip_local_ports.lock)->seqcount){+.+-..}, at: [] ipv4_local_port_range+0xb4/0x12a {IN-SOFTIRQ-R} state was registered at: [] __lock_acquire+0x2f6/0xdf0 [] lock_acquire+0x11c/0x1a4 [] inet_get_local_port_range+0x4e/0xae [] udp_flow_src_port.constprop.40+0x23/0x116 [] vxlan_xmit_one+0x219/0xa6a [] vxlan_xmit+0xa6b/0xaa5 [] dev_hard_start_xmit+0x2ae/0x465 [] __dev_queue_xmit+0x531/0x633 [] dev_queue_xmit_sk+0x13/0x15 [] neigh_resolve_output+0x12f/0x14d [] ip6_finish_output2+0x344/0x39f [] ip6_finish_output+0x88/0x8e [] ip6_output+0x91/0xe5 [] dst_output_sk+0x47/0x4c [] NF_HOOK_THRESH.constprop.30+0x38/0x82 [] mld_sendpack+0x189/0x266 [] mld_ifc_timer_expire+0x1ef/0x223 [] call_timer_fn+0xfb/0x28c [] run_timer_softirq+0x1c7/0x1f1 Fixes: b8f1a55639e6 ("udp: Add function to make source port for UDP tunnels") Cc: Tom Herbert Signed-off-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/sysctl_net_ipv4.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 894da3a70aff9..ade773744b988 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -48,14 +48,14 @@ static void set_local_port_range(struct net *net, int range[2]) { bool same_parity = !((range[0] ^ range[1]) & 1); - write_seqlock(&net->ipv4.ip_local_ports.lock); + write_seqlock_bh(&net->ipv4.ip_local_ports.lock); if (same_parity && !net->ipv4.ip_local_ports.warned) { net->ipv4.ip_local_ports.warned = true; pr_err_ratelimited("ip_local_port_range: prefer different parity for start/end values.\n"); } net->ipv4.ip_local_ports.range[0] = range[0]; net->ipv4.ip_local_ports.range[1] = range[1]; - write_sequnlock(&net->ipv4.ip_local_ports.lock); + write_sequnlock_bh(&net->ipv4.ip_local_ports.lock); } /* Validate changes from /proc interface. */ From 0c04f7b0d0d109016317fddb16dccdbc7ae2c4fa Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 3 Nov 2015 15:59:28 -0800 Subject: [PATCH 014/201] net: Fix prefsrc lookups [ Upstream commit e1b8d903c6c3862160d2d5036806a94786c8fc4e ] A bug report (https://bugzilla.kernel.org/show_bug.cgi?id=107071) noted that the follwoing ip command is failing with v4.3: $ ip route add 10.248.5.0/24 dev bond0.250 table vlan_250 src 10.248.5.154 RTNETLINK answers: Invalid argument 021dd3b8a142d changed the lookup of the given preferred source address to use the table id passed in, but this assumes the local entries are in the given table which is not necessarily true for non-VRF use cases. When validating the preferred source fallback to the local table on failure. Fixes: 021dd3b8a142d ("net: Add routes to the table associated with the device") Signed-off-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/fib_semantics.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e966f8599b4a1..ef5892f5e046a 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -864,14 +864,21 @@ static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc) if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || fib_prefsrc != cfg->fc_dst) { u32 tb_id = cfg->fc_table; + int rc; if (tb_id == RT_TABLE_MAIN) tb_id = RT_TABLE_LOCAL; - if (inet_addr_type_table(cfg->fc_nlinfo.nl_net, - fib_prefsrc, tb_id) != RTN_LOCAL) { - return false; + rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net, + fib_prefsrc, tb_id); + + if (rc != RTN_LOCAL && tb_id != RT_TABLE_LOCAL) { + rc = inet_addr_type_table(cfg->fc_nlinfo.nl_net, + fib_prefsrc, RT_TABLE_LOCAL); } + + if (rc != RTN_LOCAL) + return false; } return true; } From 46d85c56f04ce31e80b52786c5b739a56f0ef3ae Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Wed, 4 Nov 2015 13:49:49 +0100 Subject: [PATCH 015/201] tun_dst: Fix potential NULL dereference [ Upstream commit f63ce5b6fa5e9a0faf7a0e1ef2993a502878c78a ] In tun_dst_unclone() the return value of skb_metadata_dst() is checked for being NULL after it is dereferenced. Fix this by moving the dereference after the NULL check. Found by the Coverity scanner (CID 1338068). Fixes: fc4099f17240 ("openvswitch: Fix egress tunnel info.") Cc: Pravin B Shelar Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/dst_metadata.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index ce009710120ca..6816f0fa5693d 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -63,12 +63,13 @@ static inline struct metadata_dst *tun_rx_dst(int md_size) static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); - int md_size = md_dst->u.tun_info.options_len; + int md_size; struct metadata_dst *new_md; if (!md_dst) return ERR_PTR(-EINVAL); + md_size = md_dst->u.tun_info.options_len; new_md = metadata_dst_alloc(md_size, GFP_ATOMIC); if (!new_md) return ERR_PTR(-ENOMEM); From 1e2d4ae7356d809aa52a6fa406d441b321ac7827 Mon Sep 17 00:00:00 2001 From: Francesco Ruggeri Date: Thu, 5 Nov 2015 08:16:14 -0800 Subject: [PATCH 016/201] packet: race condition in packet_bind [ Upstream commit 30f7ea1c2b5f5fb7462c5ae44fe2e40cb2d6a474 ] There is a race conditions between packet_notifier and packet_bind{_spkt}. It happens if packet_notifier(NETDEV_UNREGISTER) executes between the time packet_bind{_spkt} takes a reference on the new netdevice and the time packet_do_bind sets po->ifindex. In this case the notification can be missed. If this happens during a dev_change_net_namespace this can result in the netdevice to be moved to the new namespace while the packet_sock in the old namespace still holds a reference on it. When the netdevice is later deleted in the new namespace the deletion hangs since the packet_sock is not found in the new namespace' &net->packet.sklist. It can be reproduced with the script below. This patch makes packet_do_bind check again for the presence of the netdevice in the packet_sock's namespace after the synchronize_net in unregister_prot_hook. More in general it also uses the rcu lock for the duration of the bind to stop dev_change_net_namespace/rollback_registered_many from going past the synchronize_net following unlist_netdevice, so that no NETDEV_UNREGISTER notifications can happen on the new netdevice while the bind is executing. In order to do this some code from packet_bind{_spkt} is consolidated into packet_do_dev. import socket, os, time, sys proto=7 realDev='em1' vlanId=400 if len(sys.argv) > 1: vlanId=int(sys.argv[1]) dev='vlan%d' % vlanId os.system('taskset -p 0x10 %d' % os.getpid()) s = socket.socket(socket.PF_PACKET, socket.SOCK_RAW, proto) os.system('ip link add link %s name %s type vlan id %d' % (realDev, dev, vlanId)) os.system('ip netns add dummy') pid=os.fork() if pid == 0: # dev should be moved while packet_do_bind is in synchronize net os.system('taskset -p 0x20000 %d' % os.getpid()) os.system('ip link set %s netns dummy' % dev) os.system('ip netns exec dummy ip link del %s' % dev) s.close() sys.exit(0) time.sleep(.004) try: s.bind(('%s' % dev, proto+1)) except: print 'Could not bind socket' s.close() os.system('ip netns del dummy') sys.exit(0) os.waitpid(pid, 0) s.close() os.system('ip netns del dummy') sys.exit(0) Signed-off-by: Francesco Ruggeri Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 80 ++++++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 31 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index aa4b15c358844..27b2898f275c7 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2903,22 +2903,40 @@ static int packet_release(struct socket *sock) * Attach a packet hook. */ -static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) +static int packet_do_bind(struct sock *sk, const char *name, int ifindex, + __be16 proto) { struct packet_sock *po = pkt_sk(sk); struct net_device *dev_curr; __be16 proto_curr; bool need_rehook; + struct net_device *dev = NULL; + int ret = 0; + bool unlisted = false; - if (po->fanout) { - if (dev) - dev_put(dev); - + if (po->fanout) return -EINVAL; - } lock_sock(sk); spin_lock(&po->bind_lock); + rcu_read_lock(); + + if (name) { + dev = dev_get_by_name_rcu(sock_net(sk), name); + if (!dev) { + ret = -ENODEV; + goto out_unlock; + } + } else if (ifindex) { + dev = dev_get_by_index_rcu(sock_net(sk), ifindex); + if (!dev) { + ret = -ENODEV; + goto out_unlock; + } + } + + if (dev) + dev_hold(dev); proto_curr = po->prot_hook.type; dev_curr = po->prot_hook.dev; @@ -2926,14 +2944,29 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) need_rehook = proto_curr != proto || dev_curr != dev; if (need_rehook) { - unregister_prot_hook(sk, true); + if (po->running) { + rcu_read_unlock(); + __unregister_prot_hook(sk, true); + rcu_read_lock(); + dev_curr = po->prot_hook.dev; + if (dev) + unlisted = !dev_get_by_index_rcu(sock_net(sk), + dev->ifindex); + } po->num = proto; po->prot_hook.type = proto; - po->prot_hook.dev = dev; - po->ifindex = dev ? dev->ifindex : 0; - packet_cached_dev_assign(po, dev); + if (unlikely(unlisted)) { + dev_put(dev); + po->prot_hook.dev = NULL; + po->ifindex = -1; + packet_cached_dev_reset(po); + } else { + po->prot_hook.dev = dev; + po->ifindex = dev ? dev->ifindex : 0; + packet_cached_dev_assign(po, dev); + } } if (dev_curr) dev_put(dev_curr); @@ -2941,7 +2974,7 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) if (proto == 0 || !need_rehook) goto out_unlock; - if (!dev || (dev->flags & IFF_UP)) { + if (!unlisted && (!dev || (dev->flags & IFF_UP))) { register_prot_hook(sk); } else { sk->sk_err = ENETDOWN; @@ -2950,9 +2983,10 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto) } out_unlock: + rcu_read_unlock(); spin_unlock(&po->bind_lock); release_sock(sk); - return 0; + return ret; } /* @@ -2964,8 +2998,6 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, { struct sock *sk = sock->sk; char name[15]; - struct net_device *dev; - int err = -ENODEV; /* * Check legality @@ -2975,19 +3007,13 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, return -EINVAL; strlcpy(name, uaddr->sa_data, sizeof(name)); - dev = dev_get_by_name(sock_net(sk), name); - if (dev) - err = packet_do_bind(sk, dev, pkt_sk(sk)->num); - return err; + return packet_do_bind(sk, name, 0, pkt_sk(sk)->num); } static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr; struct sock *sk = sock->sk; - struct net_device *dev = NULL; - int err; - /* * Check legality @@ -2998,16 +3024,8 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len if (sll->sll_family != AF_PACKET) return -EINVAL; - if (sll->sll_ifindex) { - err = -ENODEV; - dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex); - if (dev == NULL) - goto out; - } - err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num); - -out: - return err; + return packet_do_bind(sk, NULL, sll->sll_ifindex, + sll->sll_protocol ? : pkt_sk(sk)->num); } static struct proto packet_proto = { From 1c88ba236f4040530a98ca81d0316bd03fb5a41f Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Fri, 6 Nov 2015 17:23:23 -0800 Subject: [PATCH 017/201] bonding: fix panic on non-ARPHRD_ETHER enslave failure [ Upstream commit 40baec225765c54eefa870530dd613bad9829bb7 ] Since commit 7d5cd2ce529b, when bond_enslave fails on devices that are not ARPHRD_ETHER, if needed, it resets the bonding device back to ARPHRD_ETHER by calling ether_setup. Unfortunately, ether_setup clobbers dev->flags, clearing IFF_UP if the bond device is up, leaving it in a quasi-down state without having actually gone through dev_close. For bonding, if any periodic work queue items are active (miimon, arp_interval, etc), those will remain running, as they are stopped by bond_close. At this point, if the bonding module is unloaded or the bond is deleted, the system will panic when the work function is called. This panic is resolved by calling dev_close on the bond itself prior to calling ether_setup. Cc: Nikolay Aleksandrov Signed-off-by: Jay Vosburgh Fixes: 7d5cd2ce5292 ("bonding: correctly handle bonding type change on enslave failure") Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/bonding/bond_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 771a449d2f563..bcd7bddbe312b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1749,6 +1749,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) slave_dev->dev_addr)) eth_hw_addr_random(bond_dev); if (bond_dev->type != ARPHRD_ETHER) { + dev_close(bond_dev); ether_setup(bond_dev); bond_dev->flags |= IFF_MASTER; bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING; From d71ab4e1efcb5cdc25bc5e20b655e16ef0927c01 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 9 Nov 2015 17:51:23 -0800 Subject: [PATCH 018/201] net: fix a race in dst_release() [ Upstream commit d69bbf88c8d0b367cf3e3a052f6daadf630ee566 ] Only cpu seeing dst refcount going to 0 can safely dereference dst->flags. Otherwise an other cpu might already have freed the dst. Fixes: 27b75c95f10d ("net: avoid RCU for NOCACHE dst") Reported-by: Greg Thelen Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/dst.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dst.c b/net/core/dst.c index 0771c8cb9307c..d6a5a0bc7df5d 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -306,7 +306,7 @@ void dst_release(struct dst_entry *dst) if (unlikely(newrefcnt < 0)) net_warn_ratelimited("%s: dst:%p refcnt:%d\n", __func__, dst, newrefcnt); - if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt) + if (!newrefcnt && unlikely(dst->flags & DST_NOCACHE)) call_rcu(&dst->rcu_head, dst_destroy_rcu); } } From 9837c4f9a02a877d094bc0c21d3f3b6d242d7749 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 28 Aug 2015 09:41:39 +0100 Subject: [PATCH 019/201] ARM: 8426/1: dma-mapping: add missing range check in dma_mmap() commit 371f0f085f629fc0f66695f572373ca4445a67ad upstream. dma_mmap() function in IOMMU-based dma-mapping implementation lacked a check for valid range of mmap parameters (offset and buffer size), what might have caused access beyond the allocated buffer. This patch fixes this issue. Signed-off-by: Marek Szyprowski Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/dma-mapping.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 1a7815e5421b6..c0865867ef0de 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1407,12 +1407,17 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, unsigned long uaddr = vma->vm_start; unsigned long usize = vma->vm_end - vma->vm_start; struct page **pages = __iommu_get_pages(cpu_addr, attrs); + unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long off = vma->vm_pgoff; vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot); if (!pages) return -ENXIO; + if (off >= nr_pages || (usize >> PAGE_SHIFT) > nr_pages - off) + return -ENXIO; + do { int ret = vm_insert_page(vma, uaddr, *pages++); if (ret) { From 53c5c971742f3e4340239a8a47cb0509eb460318 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 28 Aug 2015 09:42:09 +0100 Subject: [PATCH 020/201] ARM: 8427/1: dma-mapping: add support for offset parameter in dma_mmap() commit 7e31210349e9e03a9a4dff31ab5f2bc83e8e84f5 upstream. IOMMU-based dma_mmap() implementation lacked proper support for offset parameter used in mmap call (it always assumed that mapping starts from offset zero). This patch adds support for offset parameter to IOMMU-based implementation. Signed-off-by: Marek Szyprowski Signed-off-by: Russell King Signed-off-by: Greg Kroah-Hartman --- arch/arm/mm/dma-mapping.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index c0865867ef0de..ad4eb2d26e169 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1418,6 +1418,8 @@ static int arm_iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, if (off >= nr_pages || (usize >> PAGE_SHIFT) > nr_pages - off) return -ENXIO; + pages += off; + do { int ret = vm_insert_page(vma, uaddr, *pages++); if (ret) { From 6b98f5acaab04d568c5bc9d003bb3c90221360c5 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 14 Oct 2015 14:42:43 +0300 Subject: [PATCH 021/201] ARM: common: edma: Fix channel parameter for irq callbacks commit 696d8b70c09dd421c4d037fab04341e5b30585cf upstream. In case when the interrupt happened for the second eDMA the channel number was incorrectly passed to the client driver. Signed-off-by: Peter Ujfalusi Signed-off-by: Vinod Koul Signed-off-by: Greg Kroah-Hartman --- arch/arm/common/edma.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/common/edma.c b/arch/arm/common/edma.c index 873dbfcc7dc9d..56fc339571f92 100644 --- a/arch/arm/common/edma.c +++ b/arch/arm/common/edma.c @@ -406,7 +406,8 @@ static irqreturn_t dma_irq_handler(int irq, void *data) BIT(slot)); if (edma_cc[ctlr]->intr_data[channel].callback) edma_cc[ctlr]->intr_data[channel].callback( - channel, EDMA_DMA_COMPLETE, + EDMA_CTLR_CHAN(ctlr, channel), + EDMA_DMA_COMPLETE, edma_cc[ctlr]->intr_data[channel].data); } } while (sh_ipr); @@ -460,7 +461,8 @@ static irqreturn_t dma_ccerr_handler(int irq, void *data) if (edma_cc[ctlr]->intr_data[k]. callback) { edma_cc[ctlr]->intr_data[k]. - callback(k, + callback( + EDMA_CTLR_CHAN(ctlr, k), EDMA_DMA_CC_ERROR, edma_cc[ctlr]->intr_data [k].data); From c09ea0445eec7604f4855a1340bd0a831b2b6831 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Wed, 16 Sep 2015 09:35:06 +0800 Subject: [PATCH 022/201] ARM: dts: imx27.dtsi: change the clock information for usb commit facf47ee6b4d07d43c3bfd6f0762f1b28f64703a upstream. For imx27, it needs three clocks to let the controller work, the old code is wrong, and usbmisc has not included clock handling code any more. Without this patch, it will cause below data abort when accessing usbmisc registers. usbcore: registered new interface driver usb-storage Unhandled fault: external abort on non-linefetch (0x008) at 0xf4424600 pgd = c0004000 [f4424600] *pgd=10000452(bad) Internal error: : 8 [#1] PREEMPT ARM Modules linked in: CPU: 0 PID: 1 Comm: swapper Not tainted 4.1.0-next-20150701-dirty #3089 Hardware name: Freescale i.MX27 (Device Tree Support) task: c7832b60 ti: c783e000 task.ti: c783e000 PC is at usbmisc_imx27_init+0x4c/0xbc LR is at usbmisc_imx27_init+0x40/0xbc pc : [] lr : [] psr: 60000093 sp : c783fe08 ip : 00000000 fp : 00000000 r10: c0576434 r9 : 0000009c r8 : c7a773a0 r7 : 01000000 r6 : 60000013 r5 : c7a776f0 r4 : c7a773f0 r3 : f4424600 r2 : 00000000 r1 : 00000001 r0 : 00000001 Flags: nZCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment kernel Control: 0005317f Table: a0004000 DAC: 00000017 Process swapper (pid: 1, stack limit = 0xc783e190) Stack: (0xc783fe08 to 0xc7840000) Signed-off-by: Peter Chen Reported-by: Fabio Estevam Tested-by: Fabio Estevam Acked-by: Shawn Guo Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/imx27.dtsi | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/imx27.dtsi b/arch/arm/boot/dts/imx27.dtsi index feb9d34b239c8..f818ea483aeb5 100644 --- a/arch/arm/boot/dts/imx27.dtsi +++ b/arch/arm/boot/dts/imx27.dtsi @@ -486,7 +486,10 @@ compatible = "fsl,imx27-usb"; reg = <0x10024000 0x200>; interrupts = <56>; - clocks = <&clks IMX27_CLK_USB_IPG_GATE>; + clocks = <&clks IMX27_CLK_USB_IPG_GATE>, + <&clks IMX27_CLK_USB_AHB_GATE>, + <&clks IMX27_CLK_USB_DIV>; + clock-names = "ipg", "ahb", "per"; fsl,usbmisc = <&usbmisc 0>; status = "disabled"; }; @@ -495,7 +498,10 @@ compatible = "fsl,imx27-usb"; reg = <0x10024200 0x200>; interrupts = <54>; - clocks = <&clks IMX27_CLK_USB_IPG_GATE>; + clocks = <&clks IMX27_CLK_USB_IPG_GATE>, + <&clks IMX27_CLK_USB_AHB_GATE>, + <&clks IMX27_CLK_USB_DIV>; + clock-names = "ipg", "ahb", "per"; fsl,usbmisc = <&usbmisc 1>; dr_mode = "host"; status = "disabled"; @@ -505,7 +511,10 @@ compatible = "fsl,imx27-usb"; reg = <0x10024400 0x200>; interrupts = <55>; - clocks = <&clks IMX27_CLK_USB_IPG_GATE>; + clocks = <&clks IMX27_CLK_USB_IPG_GATE>, + <&clks IMX27_CLK_USB_AHB_GATE>, + <&clks IMX27_CLK_USB_DIV>; + clock-names = "ipg", "ahb", "per"; fsl,usbmisc = <&usbmisc 2>; dr_mode = "host"; status = "disabled"; @@ -515,7 +524,6 @@ #index-cells = <1>; compatible = "fsl,imx27-usbmisc"; reg = <0x10024600 0x200>; - clocks = <&clks IMX27_CLK_USB_AHB_GATE>; }; sahara2: sahara@10025000 { From 73982e966793c4c38cc19af4c193fdd88fd90213 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Tue, 30 Jun 2015 17:15:50 +0300 Subject: [PATCH 023/201] ARM: tegra: paz00: use con_id's to refer GPIO's in gpiod_lookup table commit e77b675f8786f38d40fc1562e1275875daf67fef upstream. Commit 72daceb9a10a ("net: rfkill: gpio: Add default GPIO driver mappings for ACPI") removed possibility to request GPIO by table index for non-ACPI platforms without changing its users. As result "shutdown" GPIO request will fail if request for "reset" GPIO succeeded or "reset" will be requested instead of "shutdown" if "reset" wasn't defined. Fix it by making gpiod_lookup_table use con_id's instead of indexes. Signed-off-by: Dmitry Osipenko Fixes: 72daceb (net: rfkill: gpio: Add default GPIO driver mappings for ACPI) Acked-by: Alexandre Courbot Reviewed-by: Marc Dietrich Tested-by: Marc Dietrich Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-tegra/board-paz00.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-tegra/board-paz00.c b/arch/arm/mach-tegra/board-paz00.c index fbe74c6806f3b..49d1110cff534 100644 --- a/arch/arm/mach-tegra/board-paz00.c +++ b/arch/arm/mach-tegra/board-paz00.c @@ -39,8 +39,8 @@ static struct platform_device wifi_rfkill_device = { static struct gpiod_lookup_table wifi_gpio_lookup = { .dev_id = "rfkill_gpio", .table = { - GPIO_LOOKUP_IDX("tegra-gpio", 25, NULL, 0, 0), - GPIO_LOOKUP_IDX("tegra-gpio", 85, NULL, 1, 0), + GPIO_LOOKUP("tegra-gpio", 25, "reset", 0), + GPIO_LOOKUP("tegra-gpio", 85, "shutdown", 0), { }, }, }; From 7fbe6208f715497642c80da028565a9f8c5f117b Mon Sep 17 00:00:00 2001 From: Holger Busse Date: Wed, 26 Aug 2015 10:45:45 +0200 Subject: [PATCH 024/201] ARM: at91/dt: corrections to i2c1 declaration to sama5d4 commit d1a9c24ad16ab2b26f1574bc3f2c165a7beff5df upstream. Correcting the dma declaration for i2c1 dma. Signed-off-by: Holger Busse Signed-off-by: Nicolas Ferre Fixes: 4cc7cdf35c5f ("ARM: at91/dt: add i2c1 declaration to sama5d4") Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/sama5d4.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi index 8d1de29e8da10..fb92481e60d46 100644 --- a/arch/arm/boot/dts/sama5d4.dtsi +++ b/arch/arm/boot/dts/sama5d4.dtsi @@ -939,11 +939,11 @@ reg = <0xf8018000 0x4000>; interrupts = <33 IRQ_TYPE_LEVEL_HIGH 6>; dmas = <&dma1 - (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)) - AT91_XDMAC_DT_PERID(4)>, + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(4))>, <&dma1 - (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1)) - AT91_XDMAC_DT_PERID(5)>; + (AT91_XDMAC_DT_MEM_IF(0) | AT91_XDMAC_DT_PER_IF(1) + | AT91_XDMAC_DT_PERID(5))>; dma-names = "tx", "rx"; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_i2c1>; From 48d6d4f8e499a546f3bf03b1afefba59a5216e28 Mon Sep 17 00:00:00 2001 From: Patrick Doyle Date: Fri, 16 Oct 2015 12:39:05 +0200 Subject: [PATCH 025/201] ARM: at91: pm: at91_pm_suspend_in_sram() must be 8-byte aligned commit 5fcf8d1a0e84792b2bc44922c5d833dab96a9c1e upstream. fncpy() requires that the source and the destination are both 8-byte aligned. Signed-off-by: Patrick Doyle Signed-off-by: Alexandre Belloni Acked-by: Nicolas Ferre Fixes: d94e688cae56 ("ARM: at91/pm: move the copying the sram function to the sram initialization phase") Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-at91/pm_suspend.S | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/mach-at91/pm_suspend.S b/arch/arm/mach-at91/pm_suspend.S index 0d95f488b47a7..a25defda3d226 100644 --- a/arch/arm/mach-at91/pm_suspend.S +++ b/arch/arm/mach-at91/pm_suspend.S @@ -80,6 +80,8 @@ tmp2 .req r5 * @r2: base address of second SDRAM Controller or 0 if not present * @r3: pm information */ +/* at91_pm_suspend_in_sram must be 8-byte aligned per the requirements of fncpy() */ + .align 3 ENTRY(at91_pm_suspend_in_sram) /* Save registers on stack */ stmfd sp!, {r4 - r12, lr} From f706ac7622cb87d7e3bba4704d896e2e88fd3dad Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 21 Aug 2015 14:38:38 +0200 Subject: [PATCH 026/201] ARM: dts: Add vbus regulator to USB2 phy nodes on exynos3250, exynos4210 and exynos4412 boards commit 4ae9a4c66cdcb8b5d4e4d904846f1b450dbcabb4 upstream. Exynos USB2 PHY driver now supports VBUS regulator, so add it to all boards which have it available. This also fixes commit 7eec1266751b ("ARM: dts: Add Maxim 77693 PMIC to exynos4412-trats2"), which added new regulators to Trats2 board, but without linking them to the consumers. Signed-off-by: Marek Szyprowski Signed-off-by: Krzysztof Kozlowski Fixes: 7eec1266751b ("ARM: dts: Add Maxim 77693 PMIC to exynos4412-trats2") Signed-off-by: Kukjin Kim Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/exynos3250-monk.dts | 1 + arch/arm/boot/dts/exynos3250-rinato.dts | 1 + arch/arm/boot/dts/exynos4210-trats.dts | 2 +- arch/arm/boot/dts/exynos4210-universal_c210.dts | 2 +- arch/arm/boot/dts/exynos4412-trats2.dts | 1 + 5 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/exynos3250-monk.dts b/arch/arm/boot/dts/exynos3250-monk.dts index 540a0adf2be6d..35b39d2255d3f 100644 --- a/arch/arm/boot/dts/exynos3250-monk.dts +++ b/arch/arm/boot/dts/exynos3250-monk.dts @@ -161,6 +161,7 @@ }; &exynos_usbphy { + vbus-supply = <&safeout_reg>; status = "okay"; }; diff --git a/arch/arm/boot/dts/exynos3250-rinato.dts b/arch/arm/boot/dts/exynos3250-rinato.dts index 41a5fafb9aa93..23623cd3ebd93 100644 --- a/arch/arm/boot/dts/exynos3250-rinato.dts +++ b/arch/arm/boot/dts/exynos3250-rinato.dts @@ -153,6 +153,7 @@ &exynos_usbphy { status = "okay"; + vbus-supply = <&safeout_reg>; }; &hsotg { diff --git a/arch/arm/boot/dts/exynos4210-trats.dts b/arch/arm/boot/dts/exynos4210-trats.dts index ba34886f8b65b..01d38f2145b97 100644 --- a/arch/arm/boot/dts/exynos4210-trats.dts +++ b/arch/arm/boot/dts/exynos4210-trats.dts @@ -251,6 +251,7 @@ &exynos_usbphy { status = "okay"; + vbus-supply = <&safe1_sreg>; }; &fimd { @@ -448,7 +449,6 @@ safe1_sreg: ESAFEOUT1 { regulator-name = "SAFEOUT1"; - regulator-always-on; }; safe2_sreg: ESAFEOUT2 { diff --git a/arch/arm/boot/dts/exynos4210-universal_c210.dts b/arch/arm/boot/dts/exynos4210-universal_c210.dts index eb379526e2342..2c04297825fe9 100644 --- a/arch/arm/boot/dts/exynos4210-universal_c210.dts +++ b/arch/arm/boot/dts/exynos4210-universal_c210.dts @@ -248,6 +248,7 @@ &exynos_usbphy { status = "okay"; + vbus-supply = <&safeout1_reg>; }; &fimd { @@ -486,7 +487,6 @@ safeout1_reg: ESAFEOUT1 { regulator-name = "SAFEOUT1"; - regulator-always-on; }; safeout2_reg: ESAFEOUT2 { diff --git a/arch/arm/boot/dts/exynos4412-trats2.dts b/arch/arm/boot/dts/exynos4412-trats2.dts index 2a1ebb76ebe00..50a5e8a85283c 100644 --- a/arch/arm/boot/dts/exynos4412-trats2.dts +++ b/arch/arm/boot/dts/exynos4412-trats2.dts @@ -391,6 +391,7 @@ }; &exynos_usbphy { + vbus-supply = <&esafeout1_reg>; status = "okay"; }; From 5a089fcb4663cffbd0fac3460272c92ccd04ee31 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 16 Oct 2015 12:32:32 -0700 Subject: [PATCH 027/201] ARM: dts: Fix WLAN regression on omap5-uevm commit 0efc898a9bea7a2e8e583c6efab0e19dc7093078 upstream. Commit 99f84cae43df ("ARM: dts: add wl12xx/wl18xx bindings") added device tree bindings for the TI WLAN SDIO on many omap variants. I recall wondering how come omap5-uevm did not have the WLAN added and this issue has been bugging me for a while now, and I finally tracked it down to a bad pinmux regression, and a missing deferred probe handling for the 32k clock from palmas that's requested by twl6040. Basically 392adaf796b9 ("ARM: dts: omap5-evm: Add mcspi data") added pin muxing for mcspi4 that conflicts with the onboard WLAN. While some omap5-uevm don't have WLAN populated, the pins are not reused for other devices. And as the SDIO bus should be probed, let's try to enable WLAN by default. Let's fix the regression and add the WLAN configuration as done for the other boards in 99f84cae43df ("ARM: dts: add wl12xx/wl18xx bindings"). And let's use the new MMC pwrseq for the 32k clock as suggested by Javier Martinez Canillas . Note that without a related deferred probe fix for twl6040, the 32k clock is not initialized if palmas-clk is a module and twl6040 is built-in. Let's also use the generic "non-removable" instead of the legacy "ti,non-removable" property while at it. And finally, note that omap5 seems to require WAKEUP_EN for the WLAN GPIO interrupt. Fixes: 392adaf796b9 ("ARM: dts: omap5-evm: Add mcspi data") Cc: Sourav Poddar Signed-off-by: Tony Lindgren Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/omap5-uevm.dts | 66 ++++++++++++++++++++++++++------ 1 file changed, 55 insertions(+), 11 deletions(-) diff --git a/arch/arm/boot/dts/omap5-uevm.dts b/arch/arm/boot/dts/omap5-uevm.dts index 3cb030f9d2c4d..c85761212d90d 100644 --- a/arch/arm/boot/dts/omap5-uevm.dts +++ b/arch/arm/boot/dts/omap5-uevm.dts @@ -31,6 +31,24 @@ regulator-max-microvolt = <3000000>; }; + mmc3_pwrseq: sdhci0_pwrseq { + compatible = "mmc-pwrseq-simple"; + clocks = <&clk32kgaudio>; + clock-names = "ext_clock"; + }; + + vmmcsdio_fixed: fixedregulator-mmcsdio { + compatible = "regulator-fixed"; + regulator-name = "vmmcsdio_fixed"; + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + gpio = <&gpio5 12 GPIO_ACTIVE_HIGH>; /* gpio140 WLAN_EN */ + enable-active-high; + startup-delay-us = <70000>; + pinctrl-names = "default"; + pinctrl-0 = <&wlan_pins>; + }; + /* HS USB Host PHY on PORT 2 */ hsusb2_phy: hsusb2_phy { compatible = "usb-nop-xceiv"; @@ -197,12 +215,20 @@ >; }; - mcspi4_pins: pinmux_mcspi4_pins { + mmc3_pins: pinmux_mmc3_pins { + pinctrl-single,pins = < + OMAP5_IOPAD(0x01a4, PIN_INPUT_PULLUP | MUX_MODE0) /* wlsdio_clk */ + OMAP5_IOPAD(0x01a6, PIN_INPUT_PULLUP | MUX_MODE0) /* wlsdio_cmd */ + OMAP5_IOPAD(0x01a8, PIN_INPUT_PULLUP | MUX_MODE0) /* wlsdio_data0 */ + OMAP5_IOPAD(0x01aa, PIN_INPUT_PULLUP | MUX_MODE0) /* wlsdio_data1 */ + OMAP5_IOPAD(0x01ac, PIN_INPUT_PULLUP | MUX_MODE0) /* wlsdio_data2 */ + OMAP5_IOPAD(0x01ae, PIN_INPUT_PULLUP | MUX_MODE0) /* wlsdio_data3 */ + >; + }; + + wlan_pins: pinmux_wlan_pins { pinctrl-single,pins = < - 0x164 (PIN_INPUT | MUX_MODE1) /* mcspi4_clk */ - 0x168 (PIN_INPUT | MUX_MODE1) /* mcspi4_simo */ - 0x16a (PIN_INPUT | MUX_MODE1) /* mcspi4_somi */ - 0x16c (PIN_INPUT | MUX_MODE1) /* mcspi4_cs0 */ + OMAP5_IOPAD(0x1bc, PIN_OUTPUT | MUX_MODE6) /* mcspi1_clk.gpio5_140 */ >; }; @@ -276,6 +302,12 @@ 0x1A (PIN_OUTPUT | MUX_MODE0) /* fref_clk1_out, USB hub clk */ >; }; + + wlcore_irq_pin: pinmux_wlcore_irq_pin { + pinctrl-single,pins = < + OMAP5_IOPAD(0x040, WAKEUP_EN | PIN_INPUT_PULLUP | MUX_MODE6) /* llia_wakereqin.gpio1_wk14 */ + >; + }; }; &mmc1 { @@ -290,8 +322,25 @@ }; &mmc3 { + vmmc-supply = <&vmmcsdio_fixed>; + mmc-pwrseq = <&mmc3_pwrseq>; bus-width = <4>; - ti,non-removable; + non-removable; + cap-power-off-card; + pinctrl-names = "default"; + pinctrl-0 = <&mmc3_pins &wlcore_irq_pin>; + interrupts-extended = <&gic GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH + &omap5_pmx_core 0x168>; + + #address-cells = <1>; + #size-cells = <0>; + wlcore: wlcore@2 { + compatible = "ti,wl1271"; + reg = <2>; + interrupt-parent = <&gpio1>; + interrupts = <14 IRQ_TYPE_LEVEL_HIGH>; /* gpio 14 */ + ref-clock-frequency = <26000000>; + }; }; &mmc4 { @@ -598,11 +647,6 @@ pinctrl-0 = <&mcspi3_pins>; }; -&mcspi4 { - pinctrl-names = "default"; - pinctrl-0 = <&mcspi4_pins>; -}; - &uart1 { pinctrl-names = "default"; pinctrl-0 = <&uart1_pins>; From 2c45565f18b2b076669d133805ce8ec313208f73 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Sat, 10 Oct 2015 22:48:56 +0800 Subject: [PATCH 028/201] ARM: dts: sun6i: hummingbird: Fix VDD-CPU and VDD-GPU regulator names commit 976d84fce6aa1e5bf92b8d06d69014ac45fd5fad upstream. The VDD-CPU and VDD-GPU regulators were incorrectly swapped. Fixes: bab03561224ba ("ARM: dts: sun6i: hummingbird: Add AXP221 regulator nodes") Signed-off-by: Chen-Yu Tsai Signed-off-by: Maxime Ripard Signed-off-by: Greg Kroah-Hartman --- arch/arm/boot/dts/sun6i-a31-hummingbird.dts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/sun6i-a31-hummingbird.dts b/arch/arm/boot/dts/sun6i-a31-hummingbird.dts index d0cfadac0691d..18f26ca4e3755 100644 --- a/arch/arm/boot/dts/sun6i-a31-hummingbird.dts +++ b/arch/arm/boot/dts/sun6i-a31-hummingbird.dts @@ -184,18 +184,18 @@ regulator-name = "vcc-3v0"; }; - vdd_cpu: dcdc2 { + vdd_gpu: dcdc2 { regulator-always-on; regulator-min-microvolt = <700000>; regulator-max-microvolt = <1320000>; - regulator-name = "vdd-cpu"; + regulator-name = "vdd-gpu"; }; - vdd_gpu: dcdc3 { + vdd_cpu: dcdc3 { regulator-always-on; regulator-min-microvolt = <700000>; regulator-max-microvolt = <1320000>; - regulator-name = "vdd-gpu"; + regulator-name = "vdd-cpu"; }; vdd_sys_dll: dcdc4 { From aaa46adc35b604f6445b795b14ed9dfb79ce9c38 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 12 Oct 2015 15:46:08 +0200 Subject: [PATCH 029/201] ARM: pxa: remove incorrect __init annotation on pxa27x_set_pwrmode commit 54c09889bff6d99c8733eed4a26c9391b177c88b upstream. The z2 machine calls pxa27x_set_pwrmode() in order to power off the machine, but this function gets discarded early at boot because it is marked __init, as pointed out by kbuild: WARNING: vmlinux.o(.text+0x145c4): Section mismatch in reference from the function z2_power_off() to the function .init.text:pxa27x_set_pwrmode() The function z2_power_off() references the function __init pxa27x_set_pwrmode(). This is often because z2_power_off lacks a __init annotation or the annotation of pxa27x_set_pwrmode is wrong. This removes the __init section modifier to fix rebooting and the build error. Signed-off-by: Arnd Bergmann Fixes: ba4a90a6d86a ("ARM: pxa/z2: fix building error of pxa27x_cpu_suspend() no longer available") Signed-off-by: Robert Jarzmik Signed-off-by: Greg Kroah-Hartman --- arch/arm/mach-pxa/include/mach/pxa27x.h | 2 +- arch/arm/mach-pxa/pxa27x.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-pxa/include/mach/pxa27x.h b/arch/arm/mach-pxa/include/mach/pxa27x.h index 599b925a657c4..1a4291936c582 100644 --- a/arch/arm/mach-pxa/include/mach/pxa27x.h +++ b/arch/arm/mach-pxa/include/mach/pxa27x.h @@ -19,7 +19,7 @@ #define ARB_CORE_PARK (1<<24) /* Be parked with core when idle */ #define ARB_LOCK_FLAG (1<<23) /* Only Locking masters gain access to the bus */ -extern int __init pxa27x_set_pwrmode(unsigned int mode); +extern int pxa27x_set_pwrmode(unsigned int mode); extern void pxa27x_cpu_pm_enter(suspend_state_t state); #endif /* __MACH_PXA27x_H */ diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c index 221260d5d1092..ffc4240285577 100644 --- a/arch/arm/mach-pxa/pxa27x.c +++ b/arch/arm/mach-pxa/pxa27x.c @@ -84,7 +84,7 @@ EXPORT_SYMBOL_GPL(pxa27x_configure_ac97reset); */ static unsigned int pwrmode = PWRMODE_SLEEP; -int __init pxa27x_set_pwrmode(unsigned int mode) +int pxa27x_set_pwrmode(unsigned int mode) { switch (mode) { case PWRMODE_SLEEP: From 9473e288c36140f5b2e0cedc9b8fbcc7443838b9 Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sun, 25 Oct 2015 23:21:42 +0100 Subject: [PATCH 030/201] MIPS: lantiq: add clk_round_rate() commit 4e7d30dba493b60a80e9b590add1b4402265cc83 upstream. This adds a basic implementation of clk_round_rate() The clk_round_rate() function is called by multiple drivers and subsystems now and the lantiq clk driver is supposed to export this, but doesn't do so, this causes linking problems like this one: ERROR: "clk_round_rate" [drivers/media/v4l2-core/videodev.ko] undefined! Signed-off-by: Hauke Mehrtens Acked-by: John Crispin Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/11358/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/lantiq/clk.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/arch/mips/lantiq/clk.c b/arch/mips/lantiq/clk.c index 3fc2e6d70c779..a0706fd4ce0a0 100644 --- a/arch/mips/lantiq/clk.c +++ b/arch/mips/lantiq/clk.c @@ -99,6 +99,23 @@ int clk_set_rate(struct clk *clk, unsigned long rate) } EXPORT_SYMBOL(clk_set_rate); +long clk_round_rate(struct clk *clk, unsigned long rate) +{ + if (unlikely(!clk_good(clk))) + return 0; + if (clk->rates && *clk->rates) { + unsigned long *r = clk->rates; + + while (*r && (*r != rate)) + r++; + if (!*r) { + return clk->rate; + } + } + return rate; +} +EXPORT_SYMBOL(clk_round_rate); + int clk_enable(struct clk *clk) { if (unlikely(!clk_good(clk))) From c6f3f16cccadfb898160bd9d2cfdb58574db4123 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 6 Oct 2015 15:12:05 +0100 Subject: [PATCH 031/201] MIPS: CDMM: Add builtin_mips_cdmm_driver() macro commit 1b4a5ddb127caf125e14551ebd334be1acf21805 upstream. Add helper macro builtin_mips_cdmm_driver() for builtin CDMM drivers that don't do anything special in init and have no exit. The module_mips_cdmm_driver() helper isn't really appropriate for drivers that can't be built as a module. Signed-off-by: James Hogan Cc: Greg Kroah-Hartman Cc: Jiri Slaby Cc: linux-mips@linux-mips.org Patchwork: http://patchwork.linux-mips.org/patch/11264/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/include/asm/cdmm.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/mips/include/asm/cdmm.h b/arch/mips/include/asm/cdmm.h index bece2064cc8cb..c06dbf8ba9370 100644 --- a/arch/mips/include/asm/cdmm.h +++ b/arch/mips/include/asm/cdmm.h @@ -84,6 +84,17 @@ void mips_cdmm_driver_unregister(struct mips_cdmm_driver *); module_driver(__mips_cdmm_driver, mips_cdmm_driver_register, \ mips_cdmm_driver_unregister) +/* + * builtin_mips_cdmm_driver() - Helper macro for drivers that don't do anything + * special in init and have no exit. This eliminates some boilerplate. Each + * driver may only use this macro once, and calling it replaces device_initcall + * (or in some cases, the legacy __initcall). This is meant to be a direct + * parallel of module_mips_cdmm_driver() above but without the __exit stuff that + * is not used for builtin cases. + */ +#define builtin_mips_cdmm_driver(__mips_cdmm_driver) \ + builtin_driver(__mips_cdmm_driver, mips_cdmm_driver_register) + /* drivers/tty/mips_ejtag_fdc.c */ #ifdef CONFIG_MIPS_EJTAG_FDC_EARLYCON From e3301b7ef826632aa792add0f6d97ed577bc2ea9 Mon Sep 17 00:00:00 2001 From: Alban Bedel Date: Tue, 17 Nov 2015 09:40:07 +0100 Subject: [PATCH 032/201] MIPS: ath79: Fix the DDR control initialization on ar71xx and ar934x commit 5011a7e808c9fec643d752c5a495a48f27268a48 upstream. The DDR control initialization needs to know the SoC type, however ath79_detect_sys_type() was called after ath79_ddr_ctrl_init(). Reverse the order to fix the DDR control initialization on ar71xx and ar934x. Signed-off-by: Alban Bedel Cc: Felix Fietkau Cc: Qais Yousef Cc: Andrew Bresticker Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/11500/ Signed-off-by: Ralf Baechle Signed-off-by: Greg Kroah-Hartman --- arch/mips/ath79/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index 1ba21204ebe02..9a00137035798 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -216,9 +216,9 @@ void __init plat_mem_setup(void) AR71XX_RESET_SIZE); ath79_pll_base = ioremap_nocache(AR71XX_PLL_BASE, AR71XX_PLL_SIZE); + ath79_detect_sys_type(); ath79_ddr_ctrl_init(); - ath79_detect_sys_type(); if (mips_machtype != ATH79_MACH_GENERIC_OF) detect_memory_region(0, ATH79_MEM_SIZE_MIN, ATH79_MEM_SIZE_MAX); From d169209a986eeb1313a5142e716fa0fc5ba37ad3 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 11 Nov 2015 14:21:18 +0000 Subject: [PATCH 033/201] MIPS: KVM: Fix ASID restoration logic commit 002374f371bd02df864cce1fe85d90dc5b292837 upstream. ASID restoration on guest resume should determine the guest execution mode based on the guest Status register rather than bit 30 of the guest PC. Fix the two places in locore.S that do this, loading the guest status from the cop0 area. Note, this assembly is specific to the trap & emulate implementation of KVM, so it doesn't need to check the supervisor bit as that mode is not implemented in the guest. Fixes: b680f70fc111 ("KVM/MIPS32: Entry point for trampolining to...") Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Paolo Bonzini Cc: Gleb Natapov Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/mips/kvm/locore.S | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/arch/mips/kvm/locore.S b/arch/mips/kvm/locore.S index c567240386a0f..d1ee95a7f7dd8 100644 --- a/arch/mips/kvm/locore.S +++ b/arch/mips/kvm/locore.S @@ -165,9 +165,11 @@ FEXPORT(__kvm_mips_vcpu_run) FEXPORT(__kvm_mips_load_asid) /* Set the ASID for the Guest Kernel */ - INT_SLL t0, t0, 1 /* with kseg0 @ 0x40000000, kernel */ - /* addresses shift to 0x80000000 */ - bltz t0, 1f /* If kernel */ + PTR_L t0, VCPU_COP0(k1) + LONG_L t0, COP0_STATUS(t0) + andi t0, KSU_USER | ST0_ERL | ST0_EXL + xori t0, KSU_USER + bnez t0, 1f /* If kernel */ INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */ INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID /* else user */ 1: @@ -482,9 +484,11 @@ __kvm_mips_return_to_guest: mtc0 t0, CP0_EPC /* Set the ASID for the Guest Kernel */ - INT_SLL t0, t0, 1 /* with kseg0 @ 0x40000000, kernel */ - /* addresses shift to 0x80000000 */ - bltz t0, 1f /* If kernel */ + PTR_L t0, VCPU_COP0(k1) + LONG_L t0, COP0_STATUS(t0) + andi t0, KSU_USER | ST0_ERL | ST0_EXL + xori t0, KSU_USER + bnez t0, 1f /* If kernel */ INT_ADDIU t1, k1, VCPU_GUEST_KERNEL_ASID /* (BD) */ INT_ADDIU t1, k1, VCPU_GUEST_USER_ASID /* else user */ 1: From 58e3361f1425c81b3b6800b190a525d8b876df28 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 11 Nov 2015 14:21:19 +0000 Subject: [PATCH 034/201] MIPS: KVM: Fix CACHE immediate offset sign extension commit c5c2a3b998f1ff5a586f9d37e154070b8d550d17 upstream. The immediate field of the CACHE instruction is signed, so ensure that it gets sign extended by casting it to an int16_t rather than just masking the low 16 bits. Fixes: e685c689f3a8 ("KVM/MIPS32: Privileged instruction/target branch emulation.") Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Paolo Bonzini Cc: Gleb Natapov Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/mips/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index d5fa3eaf39a10..41b1b090f56f6 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -1581,7 +1581,7 @@ enum emulation_result kvm_mips_emulate_cache(uint32_t inst, uint32_t *opc, base = (inst >> 21) & 0x1f; op_inst = (inst >> 16) & 0x1f; - offset = inst & 0xffff; + offset = (int16_t)inst; cache = (inst >> 16) & 0x3; op = (inst >> 18) & 0x7; From 40b295b30ed21e0e8f5bd3d3208323dd4d53fa8d Mon Sep 17 00:00:00 2001 From: James Hogan Date: Wed, 11 Nov 2015 14:21:20 +0000 Subject: [PATCH 035/201] MIPS: KVM: Uninit VCPU in vcpu_create error path commit 585bb8f9a5e592f2ce7abbe5ed3112d5438d2754 upstream. If either of the memory allocations in kvm_arch_vcpu_create() fail, the vcpu which has been allocated and kvm_vcpu_init'd doesn't get uninit'd in the error handling path. Add a call to kvm_vcpu_uninit() to fix this. Fixes: 669e846e6c4e ("KVM/MIPS32: MIPS arch specific APIs for KVM") Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Paolo Bonzini Cc: Gleb Natapov Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/mips/kvm/mips.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 49ff3bfc007e5..b9b803facdbf7 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -279,7 +279,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) if (!gebase) { err = -ENOMEM; - goto out_free_cpu; + goto out_uninit_cpu; } kvm_debug("Allocated %d bytes for KVM Exception Handlers @ %p\n", ALIGN(size, PAGE_SIZE), gebase); @@ -343,6 +343,9 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) out_free_gebase: kfree(gebase); +out_uninit_cpu: + kvm_vcpu_uninit(vcpu); + out_free_cpu: kfree(vcpu); From 2bb00c908676bd240f8a8d0d19e477874c85f124 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Thu, 8 Oct 2015 20:23:33 +0200 Subject: [PATCH 036/201] kvm: x86: set KVM_REQ_EVENT when updating IRR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit c77f3fab441c3e466b4c3601a475fc31ce156b06 upstream. After moving PIR to IRR, the interrupt needs to be delivered manually. Reported-by: Paolo Bonzini Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/lapic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 8d9013c5e1eef..ae4483a3e1a72 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -348,6 +348,8 @@ void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) struct kvm_lapic *apic = vcpu->arch.apic; __kvm_apic_update_irr(pir, apic->regs); + + kvm_make_request(KVM_REQ_EVENT, vcpu); } EXPORT_SYMBOL_GPL(kvm_apic_update_irr); From ff3a8601b8a4be9932c714b59392900ecb10bc84 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 19 Oct 2015 11:30:19 +0200 Subject: [PATCH 037/201] kvm: x86: zero EFER on INIT commit 5690891bcec5fcfda38da974ffa5488e36a59811 upstream. Not zeroing EFER means that a 32-bit firmware cannot enter paging mode without clearing EFER.LME first (which it should not know about). Yang Zhang from Intel confirmed that the manual is wrong and EFER is cleared to zero on INIT. Fixes: d28bc9dd25ce023270d2e039e7c98d38ecbf7758 Cc: Yang Z Zhang Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/svm.c | 11 +++++------ arch/x86/kvm/vmx.c | 3 +-- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 2f9ed1ff06326..5b342a031478a 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1086,7 +1086,7 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) return target_tsc - tsc; } -static void init_vmcb(struct vcpu_svm *svm, bool init_event) +static void init_vmcb(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; struct vmcb_save_area *save = &svm->vmcb->save; @@ -1157,8 +1157,7 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event) init_sys_seg(&save->ldtr, SEG_TYPE_LDT); init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); - if (!init_event) - svm_set_efer(&svm->vcpu, 0); + svm_set_efer(&svm->vcpu, 0); save->dr6 = 0xffff0ff0; kvm_set_rflags(&svm->vcpu, 2); save->rip = 0x0000fff0; @@ -1212,7 +1211,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) if (kvm_vcpu_is_reset_bsp(&svm->vcpu)) svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP; } - init_vmcb(svm, init_event); + init_vmcb(svm); kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy); kvm_register_write(vcpu, VCPU_REGS_RDX, eax); @@ -1268,7 +1267,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id) clear_page(svm->vmcb); svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT; svm->asid_generation = 0; - init_vmcb(svm, false); + init_vmcb(svm); svm_init_osvw(&svm->vcpu); @@ -1890,7 +1889,7 @@ static int shutdown_interception(struct vcpu_svm *svm) * so reinitialize it. */ clear_page(svm->vmcb); - init_vmcb(svm, false); + init_vmcb(svm); kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; return 0; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6a8bc64566abd..ceb051496eed4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4771,8 +4771,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx_set_cr0(vcpu, cr0); /* enter rmode */ vmx->vcpu.arch.cr0 = cr0; vmx_set_cr4(vcpu, 0); - if (!init_event) - vmx_set_efer(vcpu, 0); + vmx_set_efer(vcpu, 0); vmx_fpu_activate(vcpu); update_exception_bitmap(vcpu); From bead1229f821de2ec81b5d361e1750a8e564d248 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Fri, 30 Oct 2015 16:36:24 +0100 Subject: [PATCH 038/201] KVM: x86: add read_phys to x86_emulate_ops MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 7a036a6f670f63b32c5ee126425f9109271ca13f upstream. We want to read the physical memory when emulating RSM. X86EMUL_IO_NEEDED is returned on all errors for consistency with other helpers. Signed-off-by: Radim Krčmář Tested-by: Laszlo Ersek Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/kvm_emulate.h | 10 ++++++++++ arch/x86/kvm/x86.c | 10 ++++++++++ 2 files changed, 20 insertions(+) diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index e16466ec473cb..e9cd7befcb76a 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -111,6 +111,16 @@ struct x86_emulate_ops { unsigned int bytes, struct x86_exception *fault); + /* + * read_phys: Read bytes of standard (non-emulated/special) memory. + * Used for descriptor reading. + * @addr: [IN ] Physical address from which to read. + * @val: [OUT] Value read from memory. + * @bytes: [IN ] Number of bytes to read from memory. + */ + int (*read_phys)(struct x86_emulate_ctxt *ctxt, unsigned long addr, + void *val, unsigned int bytes); + /* * write_std: Write bytes of standard (non-emulated/special) memory. * Used for descriptor writing. diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9a9a198303219..ac10269bc25ed 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4059,6 +4059,15 @@ static int kvm_read_guest_virt_system(struct x86_emulate_ctxt *ctxt, return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, exception); } +static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt, + unsigned long addr, void *val, unsigned int bytes) +{ + struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); + int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes); + + return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE; +} + int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, gva_t addr, void *val, unsigned int bytes, @@ -4794,6 +4803,7 @@ static const struct x86_emulate_ops emulate_ops = { .write_gpr = emulator_write_gpr, .read_std = kvm_read_guest_virt_system, .write_std = kvm_write_guest_virt_system, + .read_phys = kvm_read_guest_phys_system, .fetch = kvm_fetch_guest_virt, .read_emulated = emulator_read_emulated, .write_emulated = emulator_write_emulated, From b6b8b23bd73762dbb662b5a5a17d1c86f708fa01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Fri, 30 Oct 2015 16:36:25 +0100 Subject: [PATCH 039/201] KVM: x86: handle SMBASE as physical address in RSM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit f40606b147dd5b4678cedc877a71deb520ca507e upstream. GET_SMSTATE depends on real mode to ensure that smbase+offset is treated as a physical address, which has already caused a bug after shuffling the code. Enforce physical addressing. Signed-off-by: Radim Krčmář Reported-by: Laszlo Ersek Tested-by: Laszlo Ersek Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 9da95b9daf8de..b60fed56671b8 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2272,8 +2272,8 @@ static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt) #define GET_SMSTATE(type, smbase, offset) \ ({ \ type __val; \ - int r = ctxt->ops->read_std(ctxt, smbase + offset, &__val, \ - sizeof(__val), NULL); \ + int r = ctxt->ops->read_phys(ctxt, smbase + offset, &__val, \ + sizeof(__val)); \ if (r != X86EMUL_CONTINUE) \ return X86EMUL_UNHANDLEABLE; \ __val; \ @@ -2484,8 +2484,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) /* * Get back to real mode, to prepare a safe state in which to load - * CR0/CR3/CR4/EFER. Also this will ensure that addresses passed - * to read_std/write_std are not virtual. + * CR0/CR3/CR4/EFER. * * CR4.PCIDE must be zero, because it is a 64-bit mode only feature. */ From 15978cabaaf026f578661f069e18344dfd487cad Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 3 Nov 2015 13:43:05 +0100 Subject: [PATCH 040/201] KVM: x86: allow RSM from 64-bit mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 89651a3decbe03754f304a0b248f27eeb9a37937 upstream. The SDM says that exiting system management mode from 64-bit mode is invalid, but that would be too good to be true. But actually, most of the code is already there to support exiting from compat mode (EFER.LME=1, EFER.LMA=0). Getting all the way from 64-bit mode to real mode only requires clearing CS.L and CR4.PCIDE. Fixes: 660a5d517aaab9187f93854425c4c63f4a09195c Tested-by: Laszlo Ersek Cc: Radim Krčmář Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/emulate.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index b60fed56671b8..1505587d06e97 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2484,16 +2484,36 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) /* * Get back to real mode, to prepare a safe state in which to load - * CR0/CR3/CR4/EFER. - * - * CR4.PCIDE must be zero, because it is a 64-bit mode only feature. + * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU + * supports long mode. */ + cr4 = ctxt->ops->get_cr(ctxt, 4); + if (emulator_has_longmode(ctxt)) { + struct desc_struct cs_desc; + + /* Zero CR4.PCIDE before CR0.PG. */ + if (cr4 & X86_CR4_PCIDE) { + ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE); + cr4 &= ~X86_CR4_PCIDE; + } + + /* A 32-bit code segment is required to clear EFER.LMA. */ + memset(&cs_desc, 0, sizeof(cs_desc)); + cs_desc.type = 0xb; + cs_desc.s = cs_desc.g = cs_desc.p = 1; + ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS); + } + + /* For the 64-bit case, this will clear EFER.LMA. */ cr0 = ctxt->ops->get_cr(ctxt, 0); if (cr0 & X86_CR0_PE) ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE)); - cr4 = ctxt->ops->get_cr(ctxt, 4); + + /* Now clear CR4.PAE (which must be done before clearing EFER.LME). */ if (cr4 & X86_CR4_PAE) ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE); + + /* And finally go back to 32-bit mode. */ efer = 0; ctxt->ops->set_msr(ctxt, MSR_EFER, efer); @@ -4454,7 +4474,7 @@ static const struct opcode twobyte_table[256] = { F(DstMem | SrcReg | Src2CL | ModRM, em_shld), N, N, /* 0xA8 - 0xAF */ I(Stack | Src2GS, em_push_sreg), I(Stack | Src2GS, em_pop_sreg), - II(No64 | EmulateOnUD | ImplicitOps, em_rsm, rsm), + II(EmulateOnUD | ImplicitOps, em_rsm, rsm), F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_bts), F(DstMem | SrcReg | Src2ImmByte | ModRM, em_shrd), F(DstMem | SrcReg | Src2CL | ModRM, em_shrd), From 1e143e741de2129c896e49eed917b8ff1d6c65bf Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Wed, 4 Nov 2015 12:54:41 +0100 Subject: [PATCH 041/201] KVM: x86: obey KVM_X86_QUIRK_CD_NW_CLEARED in kvm_set_cr0() commit 879ae1880449c88db11c1ebdaedc2da79b2fe73f upstream. Commit b18d5431acc7 ("KVM: x86: fix CR0.CD virtualization") was technically correct, but it broke OVMF guests by slowing down various parts of the firmware. Commit fb279950ba02 ("KVM: vmx: obey KVM_QUIRK_CD_NW_CLEARED") quirked the first function modified by b18d5431acc7, vmx_get_mt_mask(), for OVMF's sake. This restored the speed of the OVMF code that runs before PlatformPei (including the memory intensive LZMA decompression in SEC). This patch extends the quirk to the second function modified by b18d5431acc7, kvm_set_cr0(). It eliminates the intrusive slowdown that hits the EFI_MP_SERVICES_PROTOCOL implementation of edk2's UefiCpuPkg/CpuDxe -- which is built into OVMF --, when CpuDxe starts up all APs at once for initialization, in order to count them. We also carry over the kvm_arch_has_noncoherent_dma() sub-condition from the other half of the original commit b18d5431acc7. Fixes: b18d5431acc7a2fd22767925f3a6f597aa4bd29e Cc: Jordan Justen Cc: Alex Williamson Reviewed-by: Xiao Guangrong Tested-by: Janusz Mocek Signed-off-by: Laszlo Ersek # Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/kvm/x86.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ac10269bc25ed..43609af032833 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -622,7 +622,9 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) if ((cr0 ^ old_cr0) & update_bits) kvm_mmu_reset_context(vcpu); - if ((cr0 ^ old_cr0) & X86_CR0_CD) + if (((cr0 ^ old_cr0) & X86_CR0_CD) && + kvm_arch_has_noncoherent_dma(vcpu->kvm) && + !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_CD_NW_CLEARED)) kvm_zap_gfn_range(vcpu->kvm, 0, ~0ULL); return 0; From 971cdbf756b5e29427ca8d94e0df3a25a66a7ed8 Mon Sep 17 00:00:00 2001 From: Eric Northup Date: Tue, 3 Nov 2015 18:03:53 +0100 Subject: [PATCH 042/201] KVM: x86: work around infinite loop in microcode when #AC is delivered commit 54a20552e1eae07aa240fa370a0293e006b5faed upstream. It was found that a guest can DoS a host by triggering an infinite stream of "alignment check" (#AC) exceptions. This causes the microcode to enter an infinite loop where the core never receives another interrupt. The host kernel panics pretty quickly due to the effects (CVE-2015-5307). Signed-off-by: Eric Northup Signed-off-by: Paolo Bonzini Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/uapi/asm/svm.h | 1 + arch/x86/kvm/svm.c | 8 ++++++++ arch/x86/kvm/vmx.c | 5 ++++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/uapi/asm/svm.h b/arch/x86/include/uapi/asm/svm.h index b5d7640abc5d6..8a4add8e46393 100644 --- a/arch/x86/include/uapi/asm/svm.h +++ b/arch/x86/include/uapi/asm/svm.h @@ -100,6 +100,7 @@ { SVM_EXIT_EXCP_BASE + UD_VECTOR, "UD excp" }, \ { SVM_EXIT_EXCP_BASE + PF_VECTOR, "PF excp" }, \ { SVM_EXIT_EXCP_BASE + NM_VECTOR, "NM excp" }, \ + { SVM_EXIT_EXCP_BASE + AC_VECTOR, "AC excp" }, \ { SVM_EXIT_EXCP_BASE + MC_VECTOR, "MC excp" }, \ { SVM_EXIT_INTR, "interrupt" }, \ { SVM_EXIT_NMI, "nmi" }, \ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 5b342a031478a..d7f89387ba0c9 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1107,6 +1107,7 @@ static void init_vmcb(struct vcpu_svm *svm) set_exception_intercept(svm, PF_VECTOR); set_exception_intercept(svm, UD_VECTOR); set_exception_intercept(svm, MC_VECTOR); + set_exception_intercept(svm, AC_VECTOR); set_intercept(svm, INTERCEPT_INTR); set_intercept(svm, INTERCEPT_NMI); @@ -1795,6 +1796,12 @@ static int ud_interception(struct vcpu_svm *svm) return 1; } +static int ac_interception(struct vcpu_svm *svm) +{ + kvm_queue_exception_e(&svm->vcpu, AC_VECTOR, 0); + return 1; +} + static void svm_fpu_activate(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3370,6 +3377,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, + [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception, [SVM_EXIT_INTR] = intr_interception, [SVM_EXIT_NMI] = nmi_interception, [SVM_EXIT_SMI] = nop_on_interception, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ceb051496eed4..343d3692dd65d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1567,7 +1567,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) u32 eb; eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | - (1u << NM_VECTOR) | (1u << DB_VECTOR); + (1u << NM_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR); if ((vcpu->guest_debug & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) @@ -5103,6 +5103,9 @@ static int handle_exception(struct kvm_vcpu *vcpu) return handle_rmode_exception(vcpu, ex_no, error_code); switch (ex_no) { + case AC_VECTOR: + kvm_queue_exception_e(vcpu, AC_VECTOR, error_code); + return 1; case DB_VECTOR: dr6 = vmcs_readl(EXIT_QUALIFICATION); if (!(vcpu->guest_debug & From 268ad525ea7faf13437b29f564e681e885d07b60 Mon Sep 17 00:00:00 2001 From: Krzysztof Mazur Date: Fri, 6 Nov 2015 14:18:36 +0100 Subject: [PATCH 043/201] x86/setup: Fix low identity map for >= 2GB kernel range commit 68accac392d859d24adcf1be3a90e41f978bd54c upstream. The commit f5f3497cad8c extended the low identity mapping. However, if the kernel uses more than 2 GB (VMSPLIT_2G_OPT or VMSPLIT_1G memory split), the normal memory mapping is overwritten by the low identity mapping causing a crash. To avoid overwritting, limit the low identity map to cover only memory before kernel range (PAGE_OFFSET). Fixes: f5f3497cad8c "x86/setup: Extend low identity map to cover whole kernel range Signed-off-by: Krzysztof Mazur Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Laszlo Ersek Cc: Matt Fleming Cc: Paolo Bonzini Link: http://lkml.kernel.org/r/1446815916-22105-1-git-send-email-krzysiek@podlesie.net Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index a3cccbfc5f777..37c8ea8e75ffd 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1180,7 +1180,7 @@ void __init setup_arch(char **cmdline_p) */ clone_pgd_range(initial_page_table, swapper_pg_dir + KERNEL_PGD_BOUNDARY, - KERNEL_PGD_PTRS); + min(KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY)); #endif tboot_probe(); From abb92fbf9750341af0425f638d361d7c06b36c49 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 3 Nov 2015 10:40:14 +0100 Subject: [PATCH 044/201] x86/irq: Probe for PIC presence before allocating descs for legacy IRQs commit 8c058b0b9c34d8c8d7912880956543769323e2d8 upstream. Commit d32932d02e18 ("x86/irq: Convert IOAPIC to use hierarchical irqdomain interfaces") brought a regression for Hyper-V Gen2 instances. These instances don't have i8259 legacy PIC but they use legacy IRQs for serial port, rtc, and acpi. With this commit included we end up with these IRQs not initialized. Earlier, there was a special workaround for legacy IRQs in mp_map_pin_to_irq() doing mp_irqdomain_map() without looking at nr_legacy_irqs() and now we fail in __irq_domain_alloc_irqs() when irq_domain_alloc_descs() returns -EEXIST. The essence of the issue seems to be that early_irq_init() calls arch_probe_nr_irqs() to figure out the number of legacy IRQs before we probe for i8259 and gets 16. Later when init_8259A() is called we switch to NULL legacy PIC and nr_legacy_irqs() starts to return 0 but we already have 16 descs allocated. Solve the issue by separating i8259 probe from init and calling it in arch_probe_nr_irqs() before we actually use nr_legacy_irqs() information. Fixes: d32932d02e18 ("x86/irq: Convert IOAPIC to use hierarchical irqdomain interfaces") Signed-off-by: Vitaly Kuznetsov Cc: Jiang Liu Cc: K. Y. Srinivasan Link: http://lkml.kernel.org/r/1446543614-3621-1-git-send-email-vkuznets@redhat.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/i8259.h | 1 + arch/x86/kernel/apic/vector.c | 6 +++++- arch/x86/kernel/i8259.c | 29 +++++++++++++++++++++-------- 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h index ccffa53750a89..39bcefc20de73 100644 --- a/arch/x86/include/asm/i8259.h +++ b/arch/x86/include/asm/i8259.h @@ -60,6 +60,7 @@ struct legacy_pic { void (*mask_all)(void); void (*restore_mask)(void); void (*init)(int auto_eoi); + int (*probe)(void); int (*irq_pending)(unsigned int irq); void (*make_irq)(unsigned int irq); }; diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 836d11b92811c..861bc59c8f256 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -361,7 +361,11 @@ int __init arch_probe_nr_irqs(void) if (nr < nr_irqs) nr_irqs = nr; - return nr_legacy_irqs(); + /* + * We don't know if PIC is present at this point so we need to do + * probe() to get the right number of legacy IRQs. + */ + return legacy_pic->probe(); } #ifdef CONFIG_X86_IO_APIC diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index 16cb827a5b277..be22f5a2192e3 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -295,16 +295,11 @@ static void unmask_8259A(void) raw_spin_unlock_irqrestore(&i8259A_lock, flags); } -static void init_8259A(int auto_eoi) +static int probe_8259A(void) { unsigned long flags; unsigned char probe_val = ~(1 << PIC_CASCADE_IR); unsigned char new_val; - - i8259A_auto_eoi = auto_eoi; - - raw_spin_lock_irqsave(&i8259A_lock, flags); - /* * Check to see if we have a PIC. * Mask all except the cascade and read @@ -312,16 +307,28 @@ static void init_8259A(int auto_eoi) * have a PIC, we will read 0xff as opposed to the * value we wrote. */ + raw_spin_lock_irqsave(&i8259A_lock, flags); + outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ outb(probe_val, PIC_MASTER_IMR); new_val = inb(PIC_MASTER_IMR); if (new_val != probe_val) { printk(KERN_INFO "Using NULL legacy PIC\n"); legacy_pic = &null_legacy_pic; - raw_spin_unlock_irqrestore(&i8259A_lock, flags); - return; } + raw_spin_unlock_irqrestore(&i8259A_lock, flags); + return nr_legacy_irqs(); +} + +static void init_8259A(int auto_eoi) +{ + unsigned long flags; + + i8259A_auto_eoi = auto_eoi; + + raw_spin_lock_irqsave(&i8259A_lock, flags); + outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ /* @@ -379,6 +386,10 @@ static int legacy_pic_irq_pending_noop(unsigned int irq) { return 0; } +static int legacy_pic_probe(void) +{ + return 0; +} struct legacy_pic null_legacy_pic = { .nr_legacy_irqs = 0, @@ -388,6 +399,7 @@ struct legacy_pic null_legacy_pic = { .mask_all = legacy_pic_noop, .restore_mask = legacy_pic_noop, .init = legacy_pic_int_noop, + .probe = legacy_pic_probe, .irq_pending = legacy_pic_irq_pending_noop, .make_irq = legacy_pic_uint_noop, }; @@ -400,6 +412,7 @@ struct legacy_pic default_legacy_pic = { .mask_all = mask_8259A, .restore_mask = unmask_8259A, .init = init_8259A, + .probe = probe_8259A, .irq_pending = i8259A_irq_pending, .make_irq = make_8259A_irq, }; From 58c9f67c37c1591544c06581d820f690d740ef45 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 5 Nov 2015 16:57:56 +0100 Subject: [PATCH 045/201] x86/cpu: Call verify_cpu() after having entered long mode too commit 04633df0c43d710e5f696b06539c100898678235 upstream. When we get loaded by a 64-bit bootloader, kernel entry point is startup_64 in head_64.S. We don't trust any and all bootloaders because some will fiddle with CPU configuration so we go ahead and massage each CPU into sanity again. For example, some dell BIOSes have this XD disable feature which set IA32_MISC_ENABLE[34] and disable NX. This might be some dumb workaround for other OSes but Linux sure doesn't need it. A similar thing is present in the Surface 3 firmware - see https://bugzilla.kernel.org/show_bug.cgi?id=106051 - which sets this bit only on the BSP: # rdmsr -a 0x1a0 400850089 850089 850089 850089 I know, right?! There's not even an off switch in there. So fix all those cases by sanitizing the 64-bit entry point too. For that, make verify_cpu() callable in 64-bit mode also. Requested-and-debugged-by: "H. Peter Anvin" Reported-and-tested-by: Bastien Nocera Signed-off-by: Borislav Petkov Cc: Matt Fleming Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1446739076-21303-1-git-send-email-bp@alien8.de Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/head_64.S | 8 ++++++++ arch/x86/kernel/verify_cpu.S | 12 +++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 1d40ca8a73f27..ffdc0e8603902 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -65,6 +65,9 @@ startup_64: * tables and then reload them. */ + /* Sanitize CPU configuration */ + call verify_cpu + /* * Compute the delta between the address I am compiled to run at and the * address I am actually running at. @@ -174,6 +177,9 @@ ENTRY(secondary_startup_64) * after the boot processor executes this code. */ + /* Sanitize CPU configuration */ + call verify_cpu + movq $(init_level4_pgt - __START_KERNEL_map), %rax 1: @@ -288,6 +294,8 @@ ENTRY(secondary_startup_64) pushq %rax # target address in negative space lretq +#include "verify_cpu.S" + #ifdef CONFIG_HOTPLUG_CPU /* * Boot CPU0 entry point. It's called from play_dead(). Everything has been set diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S index b9242bacbe59a..4cf401f581e7e 100644 --- a/arch/x86/kernel/verify_cpu.S +++ b/arch/x86/kernel/verify_cpu.S @@ -34,10 +34,11 @@ #include verify_cpu: - pushfl # Save caller passed flags - pushl $0 # Kill any dangerous flags - popfl + pushf # Save caller passed flags + push $0 # Kill any dangerous flags + popf +#ifndef __x86_64__ pushfl # standard way to check for cpuid popl %eax movl %eax,%ebx @@ -48,6 +49,7 @@ verify_cpu: popl %eax cmpl %eax,%ebx jz verify_cpu_no_longmode # cpu has no cpuid +#endif movl $0x0,%eax # See if cpuid 1 is implemented cpuid @@ -130,10 +132,10 @@ verify_cpu_sse_test: jmp verify_cpu_sse_test # try again verify_cpu_no_longmode: - popfl # Restore caller passed flags + popf # Restore caller passed flags movl $1,%eax ret verify_cpu_sse_ok: - popfl # Restore caller passed flags + popf # Restore caller passed flags xorl %eax, %eax ret From 905095d7916c3fab9ea86f0eb55e4a6b65713f5e Mon Sep 17 00:00:00 2001 From: Andrew Cooper Date: Wed, 3 Jun 2015 10:31:14 +0100 Subject: [PATCH 046/201] x86/cpu: Fix SMAP check in PVOPS environments commit 581b7f158fe0383b492acd1ce3fb4e99d4e57808 upstream. There appears to be no formal statement of what pv_irq_ops.save_fl() is supposed to return precisely. Native returns the full flags, while lguest and Xen only return the Interrupt Flag, and both have comments by the implementations stating that only the Interrupt Flag is looked at. This may have been true when initially implemented, but no longer is. To make matters worse, the Xen PVOP leaves the upper bits undefined, making the BUG_ON() undefined behaviour. Experimentally, this now trips for 32bit PV guests on Broadwell hardware. The BUG_ON() is consistent for an individual build, but not consistent for all builds. It has also been a sitting timebomb since SMAP support was introduced. Use native_save_fl() instead, which will obtain an accurate view of the AC flag. Signed-off-by: Andrew Cooper Reviewed-by: David Vrabel Tested-by: Rusty Russell Cc: Rusty Russell Cc: Konrad Rzeszutek Wilk Cc: Boris Ostrovsky Cc: Cc: Xen-devel Link: http://lkml.kernel.org/r/1433323874-6927-1-git-send-email-andrew.cooper3@citrix.com Signed-off-by: Thomas Gleixner Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/cpu/common.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index de22ea7ff82f9..1a292573ddf79 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -273,10 +273,9 @@ __setup("nosmap", setup_disable_smap); static __always_inline void setup_smap(struct cpuinfo_x86 *c) { - unsigned long eflags; + unsigned long eflags = native_save_fl(); /* This should have been cleared long ago */ - raw_local_save_flags(eflags); BUG_ON(eflags & X86_EFLAGS_AC); if (cpu_has(c, X86_FEATURE_SMAP)) { From 2062c50207c0a0624d459d56356f5aaf51323eb2 Mon Sep 17 00:00:00 2001 From: Huaitong Han Date: Fri, 6 Nov 2015 17:00:23 +0800 Subject: [PATCH 047/201] x86/fpu: Fix get_xsave_addr() behavior under virtualization commit a05917b6ba9dc9a95fc42bdcbe3a875e8ad83935 upstream. KVM uses the get_xsave_addr() function in a different fashion from the native kernel, in that the 'xsave' parameter belongs to guest vcpu, not the currently running task. But 'xsave' is replaced with current task's (host) xsave structure, so get_xsave_addr() will incorrectly return the bad xsave address to KVM. Fix it so that the passed in 'xsave' address is used - as intended originally. Signed-off-by: Huaitong Han Reviewed-by: Dave Hansen Cc: Andy Lutomirski Cc: Paolo Bonzini Cc: Borislav Petkov Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Thomas Gleixner Cc: dave.hansen@intel.com Link: http://lkml.kernel.org/r/1446800423-21622-1-git-send-email-huaitong.han@intel.com [ Tidied up the changelog. ] Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/fpu/xstate.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 62fc001c7846d..2c4ac072a7023 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -402,7 +402,6 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature) if (!boot_cpu_has(X86_FEATURE_XSAVE)) return NULL; - xsave = ¤t->thread.fpu.state.xsave; /* * We should not ever be requesting features that we * have not enabled. Remember that pcntxt_mask is From 44a06ce07b02abb8a0b5c03e055dae3613fce390 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 10 Nov 2015 16:23:54 -0800 Subject: [PATCH 048/201] x86/fpu: Fix 32-bit signal frame handling commit ab6b52947545a5355154f64f449f97af9d05845f upstream. (This should have gone to LKML originally. Sorry for the extra noise, folks on the cc.) Background: Signal frames on x86 have two formats: 1. For 32-bit executables (whether on a real 32-bit kernel or under 32-bit emulation on a 64-bit kernel) we have a 'fpregset_t' that includes the "FSAVE" registers. 2. For 64-bit executables (on 64-bit kernels obviously), the 'fpregset_t' is smaller and does not contain the "FSAVE" state. When creating the signal frame, we have to be aware of whether we are running a 32 or 64-bit executable so we create the correct format signal frame. Problem: save_xstate_epilog() uses 'fx_sw_reserved_ia32' whenever it is called for a 32-bit executable. This is for real 32-bit and ia32 emulation. But, fpu__init_prepare_fx_sw_frame() only initializes 'fx_sw_reserved_ia32' when emulation is enabled, *NOT* for real 32-bit kernels. This leads to really wierd situations where 32-bit programs lose their extended state when returning from a signal handler. The kernel copies the uninitialized (zero) 'fx_sw_reserved_ia32' out to userspace in save_xstate_epilog(). But when returning from the signal, the kernel errors out in check_for_xstate() when it does not see FP_XSTATE_MAGIC1 present (because it was zeroed). This leads to the FPU/XSAVE state being initialized. For MPX, this leads to the most permissive state and means we silently lose bounds violations. I think this would also mean that we could lose *ANY* FPU/SSE/AVX state. I'm not sure why no one has spotted this bug. I believe this was broken by: 72a671ced66d ("x86, fpu: Unify signal handling code paths for x86 and x86_64 kernels") way back in 2012. Signed-off-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave@sr71.net Cc: fenghua.yu@intel.com Cc: yu-cheng.yu@intel.com Link: http://lkml.kernel.org/r/20151111002354.A0799571@viggo.jf.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/kernel/fpu/signal.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 50ec9af1bd518..6545e6ddbfb1b 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -385,20 +385,19 @@ fpu__alloc_mathframe(unsigned long sp, int ia32_frame, */ void fpu__init_prepare_fx_sw_frame(void) { - int fsave_header_size = sizeof(struct fregs_state); int size = xstate_size + FP_XSTATE_MAGIC2_SIZE; - if (config_enabled(CONFIG_X86_32)) - size += fsave_header_size; - fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; fx_sw_reserved.extended_size = size; fx_sw_reserved.xfeatures = xfeatures_mask; fx_sw_reserved.xstate_size = xstate_size; - if (config_enabled(CONFIG_IA32_EMULATION)) { + if (config_enabled(CONFIG_IA32_EMULATION) || + config_enabled(CONFIG_X86_32)) { + int fsave_header_size = sizeof(struct fregs_state); + fx_sw_reserved_ia32 = fx_sw_reserved; - fx_sw_reserved_ia32.extended_size += fsave_header_size; + fx_sw_reserved_ia32.extended_size = size + fsave_header_size; } } From 10ec42fc4ed17919a154c3f6448cb3db4bbeed6d Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 11 Nov 2015 10:19:31 -0800 Subject: [PATCH 049/201] x86/mpx: Do proper get_user() when running 32-bit binaries on 64-bit kernels commit 46561c3959d6307d22139c24cd0bf196162e5681 upstream. When you call get_user(foo, bar), you effectively do a copy_from_user(&foo, bar, sizeof(*bar)); Note that the sizeof() is implicit. When we reach out to userspace to try to zap an entire "bounds table" we need to go read a "bounds directory entry" in order to locate the table's address. The size of a "directory entry" depends on the binary being run and is always the size of a pointer. But, when we have a 64-bit kernel and a 32-bit application, the directory entry is still only 32-bits long, but we fetch it with a 64-bit pointer which makes get_user() does a 64-bit fetch. Reading 4 extra bytes isn't harmful, unless we are at the end of and run off the table. It might also cause the zero page to get faulted in unnecessarily even if you are not at the end. Fix it up by doing a special 32-bit get_user() via a cast when we have 32-bit userspace. Signed-off-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20151111181931.3ACF6822@viggo.jf.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/mpx.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 134948b0926f5..e9d04ff749d15 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -584,6 +584,29 @@ static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm, return bt_addr; } +/* + * We only want to do a 4-byte get_user() on 32-bit. Otherwise, + * we might run off the end of the bounds table if we are on + * a 64-bit kernel and try to get 8 bytes. + */ +int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret, + long __user *bd_entry_ptr) +{ + u32 bd_entry_32; + int ret; + + if (is_64bit_mm(mm)) + return get_user(*bd_entry_ret, bd_entry_ptr); + + /* + * Note that get_user() uses the type of the *pointer* to + * establish the size of the get, not the destination. + */ + ret = get_user(bd_entry_32, (u32 __user *)bd_entry_ptr); + *bd_entry_ret = bd_entry_32; + return ret; +} + /* * Get the base of bounds tables pointed by specific bounds * directory entry. @@ -604,7 +627,7 @@ static int get_bt_addr(struct mm_struct *mm, int need_write = 0; pagefault_disable(); - ret = get_user(bd_entry, bd_entry_ptr); + ret = get_user_bd_entry(mm, &bd_entry, bd_entry_ptr); pagefault_enable(); if (!ret) break; From 7631e903a6bd10e1eba646393991d36501382511 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Wed, 11 Nov 2015 10:19:34 -0800 Subject: [PATCH 050/201] x86/mpx: Fix 32-bit address space calculation commit f3119b830264d89d216bfb378ab65065dffa02d9 upstream. I received a bug report that running 32-bit MPX binaries on 64-bit kernels was broken. I traced it down to this little code snippet. We were switching our "number of bounds directory entries" calculation correctly. But, we didn't switch the other side of the calculation: the virtual space size. This meant that we were calculating an absurd size for bd_entry_virt_space() on 32-bit because we used the 64-bit virt_space. This was _also_ broken for 32-bit kernels running on 64-bit hardware since boot_cpu_data.x86_virt_bits=48 even when running in 32-bit mode. Correct that and properly handle all 3 possible cases: 1. 32-bit binary on 64-bit kernel 2. 64-bit binary on 64-bit kernel 3. 32-bit binary on 32-bit kernel This manifested in having bounds tables not properly unmapped. It "leaked" memory but had no functional impact otherwise. Signed-off-by: Dave Hansen Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20151111181934.FA7FAC34@viggo.jf.intel.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- arch/x86/mm/mpx.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index e9d04ff749d15..71fc79a58a15a 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -722,11 +722,23 @@ static unsigned long mpx_get_bt_entry_offset_bytes(struct mm_struct *mm, */ static inline unsigned long bd_entry_virt_space(struct mm_struct *mm) { - unsigned long long virt_space = (1ULL << boot_cpu_data.x86_virt_bits); - if (is_64bit_mm(mm)) - return virt_space / MPX_BD_NR_ENTRIES_64; - else - return virt_space / MPX_BD_NR_ENTRIES_32; + unsigned long long virt_space; + unsigned long long GB = (1ULL << 30); + + /* + * This covers 32-bit emulation as well as 32-bit kernels + * running on 64-bit harware. + */ + if (!is_64bit_mm(mm)) + return (4ULL * GB) / MPX_BD_NR_ENTRIES_32; + + /* + * 'x86_virt_bits' returns what the hardware is capable + * of, and returns the full >32-bit adddress space when + * running 32-bit kernels on 64-bit hardware. + */ + virt_space = (1ULL << boot_cpu_data.x86_virt_bits); + return virt_space / MPX_BD_NR_ENTRIES_64; } /* From 328a2865479c18995902c9be3ff754cd4a91845d Mon Sep 17 00:00:00 2001 From: Andrei Otcheretianski Date: Sun, 25 Oct 2015 10:59:38 +0200 Subject: [PATCH 051/201] mac80211: Fix local deauth while associating commit a64cba3c5330704a034bd3179270b8d04daf6987 upstream. Local request to deauthenticate wasn't handled while associating, thus the association could continue even when the user space required to disconnect. Signed-off-by: Andrei Otcheretianski Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mlme.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index cd7e55e08a238..af86975b35daa 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5028,6 +5028,25 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, return 0; } + if (ifmgd->assoc_data && + ether_addr_equal(ifmgd->assoc_data->bss->bssid, req->bssid)) { + sdata_info(sdata, + "aborting association with %pM by local choice (Reason: %u=%s)\n", + req->bssid, req->reason_code, + ieee80211_get_reason_code_string(req->reason_code)); + + drv_mgd_prepare_tx(sdata->local, sdata); + ieee80211_send_deauth_disassoc(sdata, req->bssid, + IEEE80211_STYPE_DEAUTH, + req->reason_code, tx, + frame_buf); + ieee80211_destroy_assoc_data(sdata, false); + ieee80211_report_disconnect(sdata, frame_buf, + sizeof(frame_buf), true, + req->reason_code); + return 0; + } + if (ifmgd->associated && ether_addr_equal(ifmgd->associated->bssid, req->bssid)) { sdata_info(sdata, From 85eeb5c5ad3d97ea3e59221b4237881b3a5d5731 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 28 Aug 2015 10:52:53 +0200 Subject: [PATCH 052/201] mac80211: fix driver RSSI event calculations commit 8ec6d97871f37e4743678ea4a455bd59580aa0f4 upstream. The ifmgd->ave_beacon_signal value cannot be taken as is for comparisons, it must be divided by since it's represented like that for better accuracy of the EWMA calculations. This would lead to invalid driver RSSI events. Fix the used value. Fixes: 615f7b9bb1f8 ("mac80211: add driver RSSI threshold events") Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index af86975b35daa..d011bc5391975 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -3391,7 +3391,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, if (ifmgd->rssi_min_thold != ifmgd->rssi_max_thold && ifmgd->count_beacon_signal >= IEEE80211_SIGNAL_AVE_MIN_COUNT) { - int sig = ifmgd->ave_beacon_signal; + int sig = ifmgd->ave_beacon_signal / 16; int last_sig = ifmgd->last_ave_beacon_signal; struct ieee80211_event event = { .type = RSSI_EVENT, From db96a3cfca19fd71caab2e88bc2e455da52d654e Mon Sep 17 00:00:00 2001 From: Arik Nemtsov Date: Sun, 25 Oct 2015 10:59:41 +0200 Subject: [PATCH 053/201] mac80211: allow null chandef in tracing commit 254d3dfe445f94a764e399ca12e04365ac9413ed upstream. In TDLS channel-switch operations the chandef can sometimes be NULL. Avoid an oops in the trace code for these cases and just print a chandef full of zeros. Fixes: a7a6bdd0670fe ("mac80211: introduce TDLS channel switch ops") Signed-off-by: Arik Nemtsov Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/trace.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 6f14591d8ca9e..0b13bfa6f32fe 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -33,11 +33,11 @@ __field(u32, chan_width) \ __field(u32, center_freq1) \ __field(u32, center_freq2) -#define CHANDEF_ASSIGN(c) \ - __entry->control_freq = (c)->chan ? (c)->chan->center_freq : 0; \ - __entry->chan_width = (c)->width; \ - __entry->center_freq1 = (c)->center_freq1; \ - __entry->center_freq2 = (c)->center_freq2; +#define CHANDEF_ASSIGN(c) \ + __entry->control_freq = (c) ? ((c)->chan ? (c)->chan->center_freq : 0) : 0; \ + __entry->chan_width = (c) ? (c)->width : 0; \ + __entry->center_freq1 = (c) ? (c)->center_freq1 : 0; \ + __entry->center_freq2 = (c) ? (c)->center_freq2 : 0; #define CHANDEF_PR_FMT " control:%d MHz width:%d center: %d/%d MHz" #define CHANDEF_PR_ARG __entry->control_freq, __entry->chan_width, \ __entry->center_freq1, __entry->center_freq2 From 901b9f45117c6de411ed6ed8a1e08304a1bfe869 Mon Sep 17 00:00:00 2001 From: "Janusz.Dziedzic@tieto.com" Date: Tue, 27 Oct 2015 08:35:11 +0100 Subject: [PATCH 054/201] mac80211: fix divide by zero when NOA update commit 519ee6918b91abdc4bc9720deae17599a109eb40 upstream. In case of one shot NOA the interval can be 0, catch that instead of potentially (depending on the driver) crashing like this: divide error: 0000 [#1] SMP [...] Call Trace: [] ieee80211_extend_absent_time+0x6c/0xb0 [mac80211] [] ieee80211_update_p2p_noa+0xb7/0xe0 [mac80211] [] ath9k_p2p_ps_timer+0x170/0x190 [ath9k] [] ath_gen_timer_isr+0xc8/0xf0 [ath9k_hw] [] ath9k_tasklet+0x296/0x2f0 [ath9k] [] tasklet_action+0xe5/0xf0 [...] Signed-off-by: Janusz Dziedzic Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/mac80211/util.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 1104421bc5255..cd90ece807721 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2951,6 +2951,13 @@ ieee80211_extend_noa_desc(struct ieee80211_noa_data *data, u32 tsf, int i) if (end > 0) return false; + /* One shot NOA */ + if (data->count[i] == 1) + return false; + + if (data->desc[i].interval == 0) + return false; + /* End time is in the past, check for repetitions */ skip = DIV_ROUND_UP(-end, data->desc[i].interval); if (data->count[i] < 255) { From 526b4f4d1e1f3e16e0bd185181c23e5ff841a20e Mon Sep 17 00:00:00 2001 From: Ola Olsson Date: Thu, 29 Oct 2015 07:04:58 +0100 Subject: [PATCH 055/201] nl80211: Fix potential memory leak from parse_acl_data commit 4baf6bea37247e59f1971e8009d13aeda95edba2 upstream. If parse_acl_data succeeds but the subsequent parsing of smps attributes fails, there will be a memory leak due to early returns. Fix that by moving the ACL parsing later. Fixes: 18998c381b19b ("cfg80211: allow requesting SMPS mode on ap start") Signed-off-by: Ola Olsson Signed-off-by: Johannes Berg Signed-off-by: Greg Kroah-Hartman --- net/wireless/nl80211.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5d8748b4c8a2d..6a1040daa9b9e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3409,12 +3409,6 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) wdev->iftype)) return -EINVAL; - if (info->attrs[NL80211_ATTR_ACL_POLICY]) { - params.acl = parse_acl_data(&rdev->wiphy, info); - if (IS_ERR(params.acl)) - return PTR_ERR(params.acl); - } - if (info->attrs[NL80211_ATTR_SMPS_MODE]) { params.smps_mode = nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]); @@ -3438,6 +3432,12 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) params.smps_mode = NL80211_SMPS_OFF; } + if (info->attrs[NL80211_ATTR_ACL_POLICY]) { + params.acl = parse_acl_data(&rdev->wiphy, info); + if (IS_ERR(params.acl)) + return PTR_ERR(params.acl); + } + wdev_lock(wdev); err = rdev_start_ap(rdev, dev, ¶ms); if (!err) { From 306105dcdc8f3abb1f19fbed6643bf67e9611f77 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Sun, 25 Oct 2015 22:54:19 +0100 Subject: [PATCH 056/201] NFC: st-nci: Fix incorrect spi buffer size commit a1269dd116319335db6d73013a31c038486c813e upstream. When sending data over SPI, the maximum expected length is the maximum nci packet payload + data header size + the frame head room (1 for the ndlc header) + the frame trail room (0). Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz Signed-off-by: Greg Kroah-Hartman --- drivers/nfc/st-nci/spi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nfc/st-nci/spi.c b/drivers/nfc/st-nci/spi.c index 598a58c4d6d13..887d308c2ec80 100644 --- a/drivers/nfc/st-nci/spi.c +++ b/drivers/nfc/st-nci/spi.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "ndlc.h" @@ -94,7 +95,8 @@ static int st_nci_spi_write(void *phy_id, struct sk_buff *skb) struct st_nci_spi_phy *phy = phy_id; struct spi_device *dev = phy->spi_dev; struct sk_buff *skb_rx; - u8 buf[ST_NCI_SPI_MAX_SIZE]; + u8 buf[ST_NCI_SPI_MAX_SIZE + NCI_DATA_HDR_SIZE + + ST_NCI_FRAME_HEADROOM + ST_NCI_FRAME_TAILROOM]; struct spi_transfer spi_xfer = { .tx_buf = skb->data, .rx_buf = buf, From fa885c63ce9659910f55b9335d61752e723156e6 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Sun, 25 Oct 2015 22:54:20 +0100 Subject: [PATCH 057/201] NFC: nci: Fix incorrect data chaining when sending data commit 500c4ef02277eaadbfe20537f963b6221f6ac007 upstream. When sending HCI data over NCI, cmd information should be present only on the first packet. Each packet shall be specifically allocated and sent to the NCI layer. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz Signed-off-by: Greg Kroah-Hartman --- net/nfc/nci/hci.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index 609f92283d1b7..321889ee21078 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -146,18 +146,18 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, if (!conn_info) return -EPROTO; - skb = nci_skb_alloc(ndev, 2 + conn_info->max_pkt_payload_len + + i = 0; + skb = nci_skb_alloc(ndev, conn_info->max_pkt_payload_len + NCI_DATA_HDR_SIZE, GFP_KERNEL); if (!skb) return -ENOMEM; - skb_reserve(skb, 2 + NCI_DATA_HDR_SIZE); + skb_reserve(skb, NCI_DATA_HDR_SIZE + 2); *skb_push(skb, 1) = data_type; - i = 0; - len = conn_info->max_pkt_payload_len; - do { + len = conn_info->max_pkt_payload_len; + /* If last packet add NCI_HFP_NO_CHAINING */ if (i + conn_info->max_pkt_payload_len - (skb->len + 1) >= data_len) { @@ -177,9 +177,15 @@ static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe, return r; i += len; + if (i < data_len) { - skb_trim(skb, 0); - skb_pull(skb, len); + skb = nci_skb_alloc(ndev, + conn_info->max_pkt_payload_len + + NCI_DATA_HDR_SIZE, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + skb_reserve(skb, NCI_DATA_HDR_SIZE + 1); } } while (i < data_len); From 37e8081d25e7b4ea1dde6ff19e847ca880ae8442 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Sun, 25 Oct 2015 22:54:21 +0100 Subject: [PATCH 058/201] NFC: nci: Fix improper management of HCI return code commit d8cd37ed2fc871c66b4c79c59f651dc2cdf7091c upstream. When sending HCI data over NCI, HCI return code is part of the NCI data. In order to get correctly the HCI return code, we assume the NCI communication is successful and extract the return code for the nci_hci functions return code. This is done because nci_to_errno does not match hci return code value. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz Signed-off-by: Greg Kroah-Hartman --- net/nfc/nci/hci.c | 66 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 49 insertions(+), 17 deletions(-) diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index 321889ee21078..b07092f4111bd 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -101,6 +101,20 @@ struct nci_hcp_packet { #define NCI_HCP_MSG_GET_CMD(header) (header & 0x3f) #define NCI_HCP_MSG_GET_PIPE(header) (header & 0x7f) +static int nci_hci_result_to_errno(u8 result) +{ + switch (result) { + case NCI_HCI_ANY_OK: + return 0; + case NCI_HCI_ANY_E_REG_PAR_UNKNOWN: + return -EOPNOTSUPP; + case NCI_HCI_ANY_E_TIMEOUT: + return -ETIME; + default: + return -1; + } +} + /* HCI core */ static void nci_hci_reset_pipes(struct nci_hci_dev *hdev) { @@ -218,7 +232,8 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd, const u8 *param, size_t param_len, struct sk_buff **skb) { - struct nci_conn_info *conn_info; + struct nci_hcp_message *message; + struct nci_conn_info *conn_info; struct nci_data data; int r; u8 pipe = ndev->hci_dev->gate2pipe[gate]; @@ -238,9 +253,15 @@ int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd, r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); - - if (r == NCI_STATUS_OK && skb) - *skb = conn_info->rx_skb; + if (r == NCI_STATUS_OK) { + message = (struct nci_hcp_message *)conn_info->rx_skb->data; + r = nci_hci_result_to_errno( + NCI_HCP_MSG_GET_CMD(message->header)); + skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN); + + if (!r && skb) + *skb = conn_info->rx_skb; + } return r; } @@ -334,9 +355,6 @@ static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe, struct nci_conn_info *conn_info; u8 status = result; - if (result != NCI_HCI_ANY_OK) - goto exit; - conn_info = ndev->hci_dev->conn_info; if (!conn_info) { status = NCI_STATUS_REJECTED; @@ -346,7 +364,7 @@ static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe, conn_info->rx_skb = skb; exit: - nci_req_complete(ndev, status); + nci_req_complete(ndev, NCI_STATUS_OK); } /* Receive hcp message for pipe, with type and cmd. @@ -401,7 +419,7 @@ void nci_hci_data_received_cb(void *context, { struct nci_dev *ndev = (struct nci_dev *)context; struct nci_hcp_packet *packet; - u8 pipe, type, instruction; + u8 pipe, type; struct sk_buff *hcp_skb; struct sk_buff *frag_skb; int msg_len; @@ -440,7 +458,7 @@ void nci_hci_data_received_cb(void *context, *skb_put(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN) = pipe; skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) { - msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN; + msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN; memcpy(skb_put(hcp_skb, msg_len), frag_skb->data + NCI_HCI_HCP_PACKET_HEADER_LEN, msg_len); } @@ -458,11 +476,10 @@ void nci_hci_data_received_cb(void *context, packet = (struct nci_hcp_packet *)hcp_skb->data; type = NCI_HCP_MSG_GET_TYPE(packet->message.header); if (type == NCI_HCI_HCP_RESPONSE) { - pipe = packet->header; - instruction = NCI_HCP_MSG_GET_CMD(packet->message.header); - skb_pull(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN + - NCI_HCI_HCP_MESSAGE_HEADER_LEN); - nci_hci_hcp_message_rx(ndev, pipe, type, instruction, hcp_skb); + pipe = NCI_HCP_MSG_GET_PIPE(packet->header); + skb_pull(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN); + nci_hci_hcp_message_rx(ndev, pipe, type, + NCI_STATUS_OK, hcp_skb); } else { skb_queue_tail(&ndev->hci_dev->msg_rx_queue, hcp_skb); schedule_work(&ndev->hci_dev->msg_rx_work); @@ -494,6 +511,7 @@ EXPORT_SYMBOL(nci_hci_open_pipe); int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx, const u8 *param, size_t param_len) { + struct nci_hcp_message *message; struct nci_conn_info *conn_info; struct nci_data data; int r; @@ -526,6 +544,12 @@ int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx, r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); + if (r == NCI_STATUS_OK) { + message = (struct nci_hcp_message *)conn_info->rx_skb->data; + r = nci_hci_result_to_errno( + NCI_HCP_MSG_GET_CMD(message->header)); + skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN); + } kfree(tmp); return r; @@ -535,6 +559,7 @@ EXPORT_SYMBOL(nci_hci_set_param); int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx, struct sk_buff **skb) { + struct nci_hcp_message *message; struct nci_conn_info *conn_info; struct nci_data data; int r; @@ -559,8 +584,15 @@ int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx, r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data, msecs_to_jiffies(NCI_DATA_TIMEOUT)); - if (r == NCI_STATUS_OK) - *skb = conn_info->rx_skb; + if (r == NCI_STATUS_OK) { + message = (struct nci_hcp_message *)conn_info->rx_skb->data; + r = nci_hci_result_to_errno( + NCI_HCP_MSG_GET_CMD(message->header)); + skb_pull(conn_info->rx_skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN); + + if (!r && skb) + *skb = conn_info->rx_skb; + } return r; } From 7fc3efcbf181df8ba61753887654633389a1fb37 Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Sun, 25 Oct 2015 22:54:22 +0100 Subject: [PATCH 059/201] NFC: nci: extract pipe value using NCI_HCP_MSG_GET_PIPE commit e65917b6d54f8b47d8293ea96adfa604fd46cf0d upstream. When receiving data in nci_hci_msg_rx_work, extract pipe value using NCI_HCP_MSG_GET_PIPE macro. Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz Signed-off-by: Greg Kroah-Hartman --- net/nfc/nci/hci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index b07092f4111bd..30b09f04c142d 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -402,7 +402,7 @@ static void nci_hci_msg_rx_work(struct work_struct *work) u8 pipe, type, instruction; while ((skb = skb_dequeue(&hdev->msg_rx_queue)) != NULL) { - pipe = skb->data[0]; + pipe = NCI_HCP_MSG_GET_PIPE(skb->data[0]); skb_pull(skb, NCI_HCI_HCP_PACKET_HEADER_LEN); message = (struct nci_hcp_message *)skb->data; type = NCI_HCP_MSG_GET_TYPE(message->header); @@ -439,7 +439,7 @@ void nci_hci_data_received_cb(void *context, /* it's the last fragment. Does it need re-aggregation? */ if (skb_queue_len(&ndev->hci_dev->rx_hcp_frags)) { - pipe = packet->header & NCI_HCI_FRAGMENT; + pipe = NCI_HCP_MSG_GET_PIPE(packet->header); skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb); msg_len = 0; From 66c4d636733dbc7d9aa3ed0191e9d02ba836d676 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Wed, 21 Oct 2015 19:55:32 +0300 Subject: [PATCH 060/201] iwlwifi: pcie: fix (again) prepare card flow commit 03a19cbb91994212be72ce15ac3406fa9f8ba079 upstream. The hardware bug in the commit mentioned below forces us not to re-enable the clock gating in the Host Cluster. The impact on the power consumption is minimal and it allows the WAKE_ME interrupt to propagate. Fixes: c9fdec9f3970 ("iwlwifi: pcie: fix prepare card flow") Signed-off-by: Emmanuel Grumbach Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/pcie/trans.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/iwlwifi/pcie/trans.c b/drivers/net/wireless/iwlwifi/pcie/trans.c index 6ba7d300b08f3..90283453073c1 100644 --- a/drivers/net/wireless/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/iwlwifi/pcie/trans.c @@ -592,10 +592,8 @@ static int iwl_pcie_prepare_card_hw(struct iwl_trans *trans) do { ret = iwl_pcie_set_hw_ready(trans); - if (ret >= 0) { - ret = 0; - goto out; - } + if (ret >= 0) + return 0; usleep_range(200, 1000); t += 200; @@ -605,10 +603,6 @@ static int iwl_pcie_prepare_card_hw(struct iwl_trans *trans) IWL_ERR(trans, "Couldn't prepare the card\n"); -out: - iwl_clear_bit(trans, CSR_DBG_LINK_PWR_MGMT_REG, - CSR_RESET_LINK_PWR_MGMT_DISABLED); - return ret; } From c592b07ed26abb4235c29620495a006acdc8bac6 Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Wed, 28 Oct 2015 12:32:20 +0200 Subject: [PATCH 061/201] iwlwifi: Add new PCI IDs for the 8260 series commit 4ab75944c4b324c1f5f01dbd4c4d122d2b9da187 upstream. Add some new PCI IDs for the 8260 series which were missing. The following sub-system IDs were added: 0x0130, 0x1130, 0x0132, 0x1132, 0x1150, 0x8110, 0x9110, 0x8130, 0x9130, 0x8132, 0x9132, 0x8150, 0x9150, 0x0044, 0x0930 Signed-off-by: Oren Givon Signed-off-by: Emmanuel Grumbach Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/iwlwifi/pcie/drv.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/iwlwifi/pcie/drv.c b/drivers/net/wireless/iwlwifi/pcie/drv.c index 644b58bc5226c..639761fb2bfb2 100644 --- a/drivers/net/wireless/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/iwlwifi/pcie/drv.c @@ -423,14 +423,21 @@ static const struct pci_device_id iwl_hw_card_ids[] = { /* 8000 Series */ {IWL_PCI_DEVICE(0x24F3, 0x0010, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x1010, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x0130, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x1130, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x0132, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x1132, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0110, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x01F0, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x0012, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x1012, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x1110, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0050, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0250, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x1050, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0150, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x1150, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F4, 0x0030, iwl8260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x24F4, 0x1130, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F4, 0x1030, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0xC010, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0xC110, iwl8260_2ac_cfg)}, @@ -438,18 +445,28 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x24F3, 0xC050, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0xD050, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x8010, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x8110, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x9010, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x9110, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F4, 0x8030, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F4, 0x9030, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x8130, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x9130, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x8132, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x9132, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x8050, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x8150, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x9050, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x9150, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0004, iwl8260_2n_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x0044, iwl8260_2n_cfg)}, {IWL_PCI_DEVICE(0x24F5, 0x0010, iwl4165_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F6, 0x0030, iwl4165_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0810, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0910, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0850, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0950, iwl8260_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24F3, 0x0930, iwl8260_2ac_cfg)}, #endif /* CONFIG_IWLMVM */ {0} From bf14478a37d1f22b04025714f88b5e18edd57b31 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Fri, 25 Sep 2015 18:09:35 +0200 Subject: [PATCH 062/201] net: mvneta: Fix CPU_MAP registers initialisation commit 2502d0ef272da7058ef303b849a2c8dc324c2e2e upstream. The CPU_MAP register is duplicated for each CPUs at different addresses, each instance being at a different address. However, the code so far was using CONFIG_NR_CPUS to initialise the CPU_MAP registers for each registers, while the SoCs embed at most 4 CPUs. This is especially an issue with multi_v7_defconfig, where CONFIG_NR_CPUS is currently set to 16, resulting in writes to registers that are not CPU_MAP. Fixes: c5aff18204da ("net: mvneta: driver for Marvell Armada 370/XP network unit") Signed-off-by: Maxime Ripard Signed-off-by: Gregory CLEMENT Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvneta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 514df76fc70f3..8c6f307457d3b 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -949,7 +949,7 @@ static void mvneta_defaults_set(struct mvneta_port *pp) /* Set CPU queue access map - all CPUs have access to all RX * queues and to all TX queues */ - for (cpu = 0; cpu < CONFIG_NR_CPUS; cpu++) + for_each_present_cpu(cpu) mvreg_write(pp, MVNETA_CPU_MAP(cpu), (MVNETA_CPU_RXQ_ACCESS_ALL_MASK | MVNETA_CPU_TXQ_ACCESS_ALL_MASK)); From ed42098a90ca511bd74a55a29db6f0f8bd193d2c Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Mon, 30 Nov 2015 13:27:44 +0100 Subject: [PATCH 063/201] net: mvneta: fix error path for building skb commit 26c17a179f3f64f92de6e837c14279a6431a7ab6 upstream. In the actual RX processing, there is same error path for both descriptor ring refilling and building skb fails. This is not correct, because after successful refill, the ring is already updated with newly allocated buffer. Then, in case of build_skb() fail, hitherto code left the original buffer unmapped. This patch fixes above situation by swapping error check of skb build with DMA-unmap of original buffer. Signed-off-by: Marcin Wojtas Acked-by: Simon Guinot Fixes a84e32894191 ("net: mvneta: fix refilling for Rx DMA buffers") Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/marvell/mvneta.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 8c6f307457d3b..0e49244981d6e 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -1533,12 +1533,16 @@ static int mvneta_rx(struct mvneta_port *pp, int rx_todo, } skb = build_skb(data, pp->frag_size > PAGE_SIZE ? 0 : pp->frag_size); - if (!skb) - goto err_drop_frame; + /* After refill old buffer has to be unmapped regardless + * the skb is successfully built or not. + */ dma_unmap_single(dev->dev.parent, phys_addr, MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE); + if (!skb) + goto err_drop_frame; + rcvd_pkts++; rcvd_bytes += rx_bytes; From 40a76e5410358a5193bd6643032b6046e27807be Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 30 Sep 2015 15:59:17 +0200 Subject: [PATCH 064/201] fs/proc, core/debug: Don't expose absolute kernel addresses via wchan commit b2f73922d119686323f14fbbe46587f863852328 upstream. So the /proc/PID/stat 'wchan' field (the 30th field, which contains the absolute kernel address of the kernel function a task is blocked in) leaks absolute kernel addresses to unprivileged user-space: seq_put_decimal_ull(m, ' ', wchan); The absolute address might also leak via /proc/PID/wchan as well, if KALLSYMS is turned off or if the symbol lookup fails for some reason: static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { unsigned long wchan; char symname[KSYM_NAME_LEN]; wchan = get_wchan(task); if (lookup_symbol_name(wchan, symname) < 0) { if (!ptrace_may_access(task, PTRACE_MODE_READ)) return 0; seq_printf(m, "%lu", wchan); } else { seq_printf(m, "%s", symname); } return 0; } This isn't ideal, because for example it trivially leaks the KASLR offset to any local attacker: fomalhaut:~> printf "%016lx\n" $(cat /proc/$$/stat | cut -d' ' -f35) ffffffff8123b380 Most real-life uses of wchan are symbolic: ps -eo pid:10,tid:10,wchan:30,comm and procps uses /proc/PID/wchan, not the absolute address in /proc/PID/stat: triton:~/tip> strace -f ps -eo pid:10,tid:10,wchan:30,comm 2>&1 | grep wchan | tail -1 open("/proc/30833/wchan", O_RDONLY) = 6 There's one compatibility quirk here: procps relies on whether the absolute value is non-zero - and we can provide that functionality by outputing "0" or "1" depending on whether the task is blocked (whether there's a wchan address). These days there appears to be very little legitimate reason user-space would be interested in the absolute address. The absolute address is mostly historic: from the days when we didn't have kallsyms and user-space procps had to do the decoding itself via the System.map. So this patch sets all numeric output to "0" or "1" and keeps only symbolic output, in /proc/PID/wchan. ( The absolute sleep address can generally still be profiled via perf, by tasks with sufficient privileges. ) Reviewed-by: Thomas Gleixner Acked-by: Kees Cook Acked-by: Linus Torvalds Cc: Al Viro Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: Kostya Serebryany Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Sasha Levin Cc: kasan-dev Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/20150930135917.GA3285@gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Greg Kroah-Hartman --- Documentation/filesystems/proc.txt | 5 +++-- fs/proc/array.c | 16 ++++++++++++++-- fs/proc/base.c | 9 +++------ 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index d411ca63c8b6c..3a9d65c912e78 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -140,7 +140,8 @@ Table 1-1: Process specific entries in /proc stat Process status statm Process memory status information status Process status in human readable form - wchan If CONFIG_KALLSYMS is set, a pre-decoded wchan + wchan Present with CONFIG_KALLSYMS=y: it shows the kernel function + symbol the task is blocked in - or "0" if not blocked. pagemap Page table stack Report full stack trace, enable via CONFIG_STACKTRACE smaps a extension based on maps, showing the memory consumption of @@ -310,7 +311,7 @@ Table 1-4: Contents of the stat files (as of 2.6.30-rc7) blocked bitmap of blocked signals sigign bitmap of ignored signals sigcatch bitmap of caught signals - wchan address where process went to sleep + 0 (place holder, used to be the wchan address, use /proc/PID/wchan instead) 0 (place holder) 0 (place holder) exit_signal signal to send to parent thread on exit diff --git a/fs/proc/array.c b/fs/proc/array.c index f60f0121e3319..eed2050db9be9 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -375,7 +375,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task, int whole) { - unsigned long vsize, eip, esp, wchan = ~0UL; + unsigned long vsize, eip, esp, wchan = 0; int priority, nice; int tty_pgrp = -1, tty_nr = 0; sigset_t sigign, sigcatch; @@ -507,7 +507,19 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, seq_put_decimal_ull(m, ' ', task->blocked.sig[0] & 0x7fffffffUL); seq_put_decimal_ull(m, ' ', sigign.sig[0] & 0x7fffffffUL); seq_put_decimal_ull(m, ' ', sigcatch.sig[0] & 0x7fffffffUL); - seq_put_decimal_ull(m, ' ', wchan); + + /* + * We used to output the absolute kernel address, but that's an + * information leak - so instead we show a 0/1 flag here, to signal + * to user-space whether there's a wchan field in /proc/PID/wchan. + * + * This works with older implementations of procps as well. + */ + if (wchan) + seq_puts(m, " 1"); + else + seq_puts(m, " 0"); + seq_put_decimal_ull(m, ' ', 0); seq_put_decimal_ull(m, ' ', 0); seq_put_decimal_ll(m, ' ', task->exit_signal); diff --git a/fs/proc/base.c b/fs/proc/base.c index b25eee4cead53..29595af328669 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -430,13 +430,10 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, wchan = get_wchan(task); - if (lookup_symbol_name(wchan, symname) < 0) { - if (!ptrace_may_access(task, PTRACE_MODE_READ)) - return 0; - seq_printf(m, "%lu", wchan); - } else { + if (wchan && ptrace_may_access(task, PTRACE_MODE_READ) && !lookup_symbol_name(wchan, symname)) seq_printf(m, "%s", symname); - } + else + seq_putc(m, '0'); return 0; } From e6a981502ce51fed9f4aa123bfec8adb4de09c0a Mon Sep 17 00:00:00 2001 From: Simran Rai Date: Mon, 19 Oct 2015 15:27:19 -0700 Subject: [PATCH 065/201] clk: iproc: Fix PLL output frequency calculation commit 63243a4da7d0dfa19dcacd0a529782eeb2f86f92 upstream. This patch affects the clocks that use fractional ndivider in their PLL output frequency calculation. Instead of 2^20 divide factor, the clock's ndiv integer shift was used. Fixed the bug by replacing ndiv integer shift with 2^20 factor. Signed-off-by: Simran Rai Signed-off-by: Ray Jui Reviewed-by: Scott Branden Fixes: 5fe225c105fd ("clk: iproc: add initial common clock support") Signed-off-by: Michael Turquette Signed-off-by: Greg Kroah-Hartman --- drivers/clk/bcm/clk-iproc-pll.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/clk/bcm/clk-iproc-pll.c b/drivers/clk/bcm/clk-iproc-pll.c index 2dda4e8295a91..d679ab8696532 100644 --- a/drivers/clk/bcm/clk-iproc-pll.c +++ b/drivers/clk/bcm/clk-iproc-pll.c @@ -345,8 +345,8 @@ static unsigned long iproc_pll_recalc_rate(struct clk_hw *hw, struct iproc_pll *pll = clk->pll; const struct iproc_pll_ctrl *ctrl = pll->ctrl; u32 val; - u64 ndiv; - unsigned int ndiv_int, ndiv_frac, pdiv; + u64 ndiv, ndiv_int, ndiv_frac; + unsigned int pdiv; if (parent_rate == 0) return 0; @@ -366,22 +366,19 @@ static unsigned long iproc_pll_recalc_rate(struct clk_hw *hw, val = readl(pll->pll_base + ctrl->ndiv_int.offset); ndiv_int = (val >> ctrl->ndiv_int.shift) & bit_mask(ctrl->ndiv_int.width); - ndiv = (u64)ndiv_int << ctrl->ndiv_int.shift; + ndiv = ndiv_int << 20; if (ctrl->flags & IPROC_CLK_PLL_HAS_NDIV_FRAC) { val = readl(pll->pll_base + ctrl->ndiv_frac.offset); ndiv_frac = (val >> ctrl->ndiv_frac.shift) & bit_mask(ctrl->ndiv_frac.width); - - if (ndiv_frac != 0) - ndiv = ((u64)ndiv_int << ctrl->ndiv_int.shift) | - ndiv_frac; + ndiv += ndiv_frac; } val = readl(pll->pll_base + ctrl->pdiv.offset); pdiv = (val >> ctrl->pdiv.shift) & bit_mask(ctrl->pdiv.width); - clk->rate = (ndiv * parent_rate) >> ctrl->ndiv_int.shift; + clk->rate = (ndiv * parent_rate) >> 20; if (pdiv == 0) clk->rate *= 2; From b14a6617539431dadedec6d7dc023a87490dbb62 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 23 Oct 2015 11:36:01 +0200 Subject: [PATCH 066/201] clk: versatile-icst: fix memory leak commit 7bdccef34fc67d3fce6778a018601dd41e43c5ce upstream. A static code checker found a memory leak in the Versatile ICST code. Fix it. Fixes: a183da637c52 "clk: versatile: respect parent rate in ICST clock" Reported-by: Stephen Boyd Signed-off-by: Linus Walleij Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- drivers/clk/versatile/clk-icst.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/clk/versatile/clk-icst.c b/drivers/clk/versatile/clk-icst.c index a3893ea2199da..08c5ee9768799 100644 --- a/drivers/clk/versatile/clk-icst.c +++ b/drivers/clk/versatile/clk-icst.c @@ -157,8 +157,10 @@ struct clk *icst_clk_register(struct device *dev, icst->lockreg = base + desc->lock_offset; clk = clk_register(dev, &icst->hw); - if (IS_ERR(clk)) + if (IS_ERR(clk)) { + kfree(pclone); kfree(icst); + } return clk; } From 8cb5ad246f79e4703e010e0d0e0211fb95576401 Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Fri, 18 Sep 2015 09:29:04 -0700 Subject: [PATCH 067/201] mfd: twl6040: Fix deferred probe handling for clk32k commit 75c08f17ec87c2d742487bb87408d6feebc526bd upstream. Commit 68bab8662f49 ("mfd: twl6040: Optional clk32k clock handling") added clock handling for the 32k clock from palmas-clk. However, that patch did not consider a typical situation where twl6040 is built-in, and palmas-clk is a loadable module like we have in omap2plus_defconfig. If palmas-clk is not loaded before twl6040 probes, we will get a "clk32k is not handled" warning during booting. This means that any drivers relying on this clock will mysteriously fail, including omap5-uevm WLAN and audio. Note that for WLAN, we probably should also eventually get the clk32kgaudio for MMC3 directly as that's shared between audio and WLAN SDIO at least for omap5-uevm. It seems the WLAN chip cannot get it as otherwise MMC3 won't get properly probed. Fixes: 68bab8662f49 ("mfd: twl6040: Optional clk32k clock handling") Signed-off-by: Tony Lindgren Reviewed-by: Felipe Balbi Signed-off-by: Lee Jones Signed-off-by: Greg Kroah-Hartman --- drivers/mfd/twl6040.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/mfd/twl6040.c b/drivers/mfd/twl6040.c index a151ee2eed2a4..08a693cd38cc4 100644 --- a/drivers/mfd/twl6040.c +++ b/drivers/mfd/twl6040.c @@ -647,6 +647,8 @@ static int twl6040_probe(struct i2c_client *client, twl6040->clk32k = devm_clk_get(&client->dev, "clk32k"); if (IS_ERR(twl6040->clk32k)) { + if (PTR_ERR(twl6040->clk32k) == -EPROBE_DEFER) + return -EPROBE_DEFER; dev_info(&client->dev, "clk32k is not handled\n"); twl6040->clk32k = NULL; } From a251acf26f718499da7db0aa8b73ee671c749028 Mon Sep 17 00:00:00 2001 From: Aniket Nagarnaik Date: Fri, 18 Sep 2015 06:32:09 -0700 Subject: [PATCH 068/201] mwifiex: fix NULL pointer dereference during hidden SSID scan commit 17e524b1b60f4390d24a51d9524d1648cf5d1447 upstream. This NULL pointer dereference is observed during suspend resume stress test. All pending commands are cancelled when system goes into suspend state. There a corner case in which host may receive response for last scan command after this and try to trigger extra active scan for hidden SSIDs. The issue is fixed by adding a NULL check to skip that extra scan. Fixes: 2375fa2b36feaf34 (mwifiex: fix unable to connect hidden SSID..) Signed-off-by: Aniket Nagarnaik Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mwifiex/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mwifiex/scan.c b/drivers/net/wireless/mwifiex/scan.c index 5847863a2d6be..278ec477fd90b 100644 --- a/drivers/net/wireless/mwifiex/scan.c +++ b/drivers/net/wireless/mwifiex/scan.c @@ -1889,7 +1889,7 @@ mwifiex_active_scan_req_for_passive_chan(struct mwifiex_private *priv) u8 id = 0; struct mwifiex_user_scan_cfg *user_scan_cfg; - if (adapter->active_scan_triggered) { + if (adapter->active_scan_triggered || !priv->scan_request) { adapter->active_scan_triggered = false; return 0; } From 0c7b4e0d36288a4695a56fd89dbf4c9e00ca9a0d Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Fri, 18 Sep 2015 06:32:10 -0700 Subject: [PATCH 069/201] mwifiex: avoid memsetting PCIe event buffer commit 14d9c11c91a606fed65eaae2455423a23bb4ae59 upstream. Preallocated PCIe buffer is being reused for all PCIe interface events. Physical address of the buffer is shared with firmware so that it can perform DMA on it. As event length is specified in the header, there should not be a problem if the buffer gets overwritten. We will save some cycles by avoiding memset everytime while submitting the buffer to firmware. Fixes: 2728cecdc7d6bf3d21(mwifiex: corrections in PCIe event skb) Signed-off-by: Amitkumar Karwar Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mwifiex/pcie.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/mwifiex/pcie.c b/drivers/net/wireless/mwifiex/pcie.c index 408b684607165..21192b6f9c64f 100644 --- a/drivers/net/wireless/mwifiex/pcie.c +++ b/drivers/net/wireless/mwifiex/pcie.c @@ -1815,7 +1815,6 @@ static int mwifiex_pcie_event_complete(struct mwifiex_adapter *adapter, if (!card->evt_buf_list[rdptr]) { skb_push(skb, INTF_HEADER_LEN); skb_put(skb, MAX_EVENT_SIZE - skb->len); - memset(skb->data, 0, MAX_EVENT_SIZE); if (mwifiex_map_pci_memory(adapter, skb, MAX_EVENT_SIZE, PCI_DMA_FROMDEVICE)) From c1c2cc1b5f3e83fed0049c2d59f9ef38787763fc Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 21 Sep 2015 19:19:53 +0300 Subject: [PATCH 070/201] mwifiex: fix mwifiex_rdeeprom_read() commit 1f9c6e1bc1ba5f8a10fcd6e99d170954d7c6d382 upstream. There were several bugs here. 1) The done label was in the wrong place so we didn't copy any information out when there was no command given. 2) We were using PAGE_SIZE as the size of the buffer instead of "PAGE_SIZE - pos". 3) snprintf() returns the number of characters that would have been printed if there were enough space. If there was not enough space (and we had fixed the memory corruption bug #2) then it would result in an information leak when we do simple_read_from_buffer(). I've changed it to use scnprintf() instead. I also removed the initialization at the start of the function, because I thought it made the code a little more clear. Fixes: 5e6e3a92b9a4 ('wireless: mwifiex: initial commit for Marvell mwifiex driver') Signed-off-by: Dan Carpenter Acked-by: Amitkumar Karwar Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/mwifiex/debugfs.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/wireless/mwifiex/debugfs.c b/drivers/net/wireless/mwifiex/debugfs.c index 5a0636d43a1b9..5583856fc5c41 100644 --- a/drivers/net/wireless/mwifiex/debugfs.c +++ b/drivers/net/wireless/mwifiex/debugfs.c @@ -731,7 +731,7 @@ mwifiex_rdeeprom_read(struct file *file, char __user *ubuf, (struct mwifiex_private *) file->private_data; unsigned long addr = get_zeroed_page(GFP_KERNEL); char *buf = (char *) addr; - int pos = 0, ret = 0, i; + int pos, ret, i; u8 value[MAX_EEPROM_DATA]; if (!buf) @@ -739,7 +739,7 @@ mwifiex_rdeeprom_read(struct file *file, char __user *ubuf, if (saved_offset == -1) { /* No command has been given */ - pos += snprintf(buf, PAGE_SIZE, "0"); + pos = snprintf(buf, PAGE_SIZE, "0"); goto done; } @@ -748,17 +748,17 @@ mwifiex_rdeeprom_read(struct file *file, char __user *ubuf, (u16) saved_bytes, value); if (ret) { ret = -EINVAL; - goto done; + goto out_free; } - pos += snprintf(buf, PAGE_SIZE, "%d %d ", saved_offset, saved_bytes); + pos = snprintf(buf, PAGE_SIZE, "%d %d ", saved_offset, saved_bytes); for (i = 0; i < saved_bytes; i++) - pos += snprintf(buf + strlen(buf), PAGE_SIZE, "%d ", value[i]); - - ret = simple_read_from_buffer(ubuf, count, ppos, buf, pos); + pos += scnprintf(buf + pos, PAGE_SIZE - pos, "%d ", value[i]); done: + ret = simple_read_from_buffer(ubuf, count, ppos, buf, pos); +out_free: free_page(addr); return ret; } From 17b9d97eec72418d235453aef32a87e2b39f29ee Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Sun, 18 Oct 2015 22:14:48 -0500 Subject: [PATCH 071/201] staging: rtl8712: Add device ID for Sitecom WLA2100 commit 1e6e63283691a2a9048a35d9c6c59cf0abd342e4 upstream. This adds the USB ID for the Sitecom WLA2100. The Windows 10 inf file was checked to verify that the addition is correct. Reported-by: Frans van de Wiel Signed-off-by: Larry Finger Cc: Frans van de Wiel Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8712/usb_intf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/rtl8712/usb_intf.c b/drivers/staging/rtl8712/usb_intf.c index f8b5b332e7c3d..943a0e2045329 100644 --- a/drivers/staging/rtl8712/usb_intf.c +++ b/drivers/staging/rtl8712/usb_intf.c @@ -144,6 +144,7 @@ static struct usb_device_id rtl871x_usb_id_tbl[] = { {USB_DEVICE(0x0DF6, 0x0058)}, {USB_DEVICE(0x0DF6, 0x0049)}, {USB_DEVICE(0x0DF6, 0x004C)}, + {USB_DEVICE(0x0DF6, 0x006C)}, {USB_DEVICE(0x0DF6, 0x0064)}, /* Skyworth */ {USB_DEVICE(0x14b2, 0x3300)}, From de2ebbd167835c1c627bd24a1b6d846e0dead8f5 Mon Sep 17 00:00:00 2001 From: David Herrmann Date: Mon, 7 Sep 2015 12:05:41 +0200 Subject: [PATCH 072/201] Bluetooth: hidp: fix device disconnect on idle timeout commit 660f0fc07d21114549c1862e67e78b1cf0c90c29 upstream. The HIDP specs define an idle-timeout which automatically disconnects a device. This has always been implemented in the HIDP layer and forced a synchronous shutdown of the hidp-scheduler. This works just fine, but lacks a forced disconnect on the underlying l2cap channels. This has been broken since: commit 5205185d461d5902325e457ca80bd421127b7308 Author: David Herrmann Date: Sat Apr 6 20:28:47 2013 +0200 Bluetooth: hidp: remove old session-management The old session-management always forced an l2cap error on the ctrl/intr channels when shutting down. The new session-management skips this, as we don't want to enforce channel policy on the caller. In other words, if user-space removes an HIDP device, the underlying channels (which are *owned* and *referenced* by user-space) are still left active. User-space needs to call shutdown(2) or close(2) to release them. Unfortunately, this does not work with idle-timeouts. There is no way to signal user-space that the HIDP layer has been stopped. The API simply does not support any event-passing except for poll(2). Hence, we restore old behavior and force EUNATCH on the sockets if the HIDP layer is disconnected due to idle-timeouts (behavior of explicit disconnects remains unmodified). User-space can still call getsockopt(..., SO_ERROR, ...) ..to retrieve the EUNATCH error and clear sk_err. Hence, the channels can still be re-used (which nobody does so far, though). Therefore, the API still supports the new behavior, but with this patch it's also compatible to the old implicit channel shutdown. Reported-by: Mark Haun Reported-by: Luiz Augusto von Dentz Signed-off-by: David Herrmann Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hidp/core.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index f1a117f8cad22..0bec4588c3c8c 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -401,6 +401,20 @@ static void hidp_idle_timeout(unsigned long arg) { struct hidp_session *session = (struct hidp_session *) arg; + /* The HIDP user-space API only contains calls to add and remove + * devices. There is no way to forward events of any kind. Therefore, + * we have to forcefully disconnect a device on idle-timeouts. This is + * unfortunate and weird API design, but it is spec-compliant and + * required for backwards-compatibility. Hence, on idle-timeout, we + * signal driver-detach events, so poll() will be woken up with an + * error-condition on both sockets. + */ + + session->intr_sock->sk->sk_err = EUNATCH; + session->ctrl_sock->sk->sk_err = EUNATCH; + wake_up_interruptible(sk_sleep(session->intr_sock->sk)); + wake_up_interruptible(sk_sleep(session->ctrl_sock->sk)); + hidp_session_terminate(session); } From f499c55b9eb89486f0b62fc24824bbe5415303de Mon Sep 17 00:00:00 2001 From: Dmitry Tunin Date: Mon, 5 Oct 2015 19:29:33 +0300 Subject: [PATCH 073/201] Bluetooth: ath3k: Add new AR3012 0930:021c id commit cd355ff071cd37e7197eccf9216770b2b29369f7 upstream. This adapter works with the existing linux-firmware. T: Bus=01 Lev=01 Prnt=01 Port=03 Cnt=02 Dev#= 3 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0930 ProdID=021c Rev=00.01 C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb BugLink: https://bugs.launchpad.net/bugs/1502781 Signed-off-by: Dmitry Tunin Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/ath3k.c | 2 ++ drivers/bluetooth/btusb.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index e527a3e13939c..60c15f8bc09d0 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -93,6 +93,7 @@ static const struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x04CA, 0x300f) }, { USB_DEVICE(0x04CA, 0x3010) }, { USB_DEVICE(0x0930, 0x0219) }, + { USB_DEVICE(0x0930, 0x021c) }, { USB_DEVICE(0x0930, 0x0220) }, { USB_DEVICE(0x0930, 0x0227) }, { USB_DEVICE(0x0b05, 0x17d0) }, @@ -153,6 +154,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0930, 0x021c), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0b05, 0x17d0), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index b6aceaf82aa88..6291b5d7f4318 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -195,6 +195,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x04ca, 0x300f), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0930, 0x021c), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0b05, 0x17d0), .driver_info = BTUSB_ATH3012 }, From 65fe0308fde6307efe411850b3907930792a9492 Mon Sep 17 00:00:00 2001 From: Dmitry Tunin Date: Fri, 16 Oct 2015 11:45:26 +0300 Subject: [PATCH 074/201] Bluetooth: ath3k: Add support of AR3012 0cf3:817b device commit 18e0afab8ce3f1230ce3fef52b2e73374fd9c0e7 upstream. T: Bus=04 Lev=02 Prnt=02 Port=04 Cnt=01 Dev#= 3 Spd=12 MxCh= 0 D: Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs= 1 P: Vendor=0cf3 ProdID=817b Rev=00.02 C: #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA I: If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb I: If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb BugLink: https://bugs.launchpad.net/bugs/1506615 Signed-off-by: Dmitry Tunin Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- drivers/bluetooth/ath3k.c | 2 ++ drivers/bluetooth/btusb.c | 1 + 2 files changed, 3 insertions(+) diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c index 60c15f8bc09d0..fa893c3ec4087 100644 --- a/drivers/bluetooth/ath3k.c +++ b/drivers/bluetooth/ath3k.c @@ -105,6 +105,7 @@ static const struct usb_device_id ath3k_table[] = { { USB_DEVICE(0x0CF3, 0x311F) }, { USB_DEVICE(0x0cf3, 0x3121) }, { USB_DEVICE(0x0CF3, 0x817a) }, + { USB_DEVICE(0x0CF3, 0x817b) }, { USB_DEVICE(0x0cf3, 0xe003) }, { USB_DEVICE(0x0CF3, 0xE004) }, { USB_DEVICE(0x0CF3, 0xE005) }, @@ -166,6 +167,7 @@ static const struct usb_device_id ath3k_blist_tbl[] = { { USB_DEVICE(0x0cf3, 0x311F), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x3121), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0CF3, 0x817a), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0CF3, 0x817b), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe005), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe006), .driver_info = BTUSB_ATH3012 }, diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 6291b5d7f4318..3d0d643c8bff4 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -207,6 +207,7 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0cf3, 0x311f), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x3121), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0x817a), .driver_info = BTUSB_ATH3012 }, + { USB_DEVICE(0x0cf3, 0x817b), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe003), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe004), .driver_info = BTUSB_ATH3012 }, { USB_DEVICE(0x0cf3, 0xe005), .driver_info = BTUSB_ATH3012 }, From 099208a1cb10bb4eee2622dc3ffe5497f4d6cf9f Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Mon, 19 Oct 2015 10:51:47 +0300 Subject: [PATCH 075/201] Bluetooth: Fix removing connection parameters when unpairing commit a6ad2a6b9cc1d9d791aee5462cfb8528f366f1d4 upstream. The commit 89cbb0638e9b7 introduced support for deferred connection parameter removal when unpairing by removing them only once an existing connection gets disconnected. However, it failed to address the scenario when we're *not* connected and do an unpair operation. What makes things worse is that most user space BlueZ versions will first issue a disconnect request and only then unpair, meaning the buggy code will be triggered every time. This effectively causes the kernel to resume scanning and reconnect to a device for which we've removed all keys and GATT database information. This patch fixes the issue by adding the missing call to the hci_conn_params_del() function to a branch which handles the case of no existing connection. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/mgmt.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index c4fe2fee753fc..72c9376ec03cf 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3090,6 +3090,11 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, } else { u8 addr_type; + if (cp->addr.type == BDADDR_LE_PUBLIC) + addr_type = ADDR_LE_DEV_PUBLIC; + else + addr_type = ADDR_LE_DEV_RANDOM; + conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, &cp->addr.bdaddr); if (conn) { @@ -3105,13 +3110,10 @@ static int unpair_device(struct sock *sk, struct hci_dev *hdev, void *data, */ if (!cp->disconnect) conn = NULL; + } else { + hci_conn_params_del(hdev, &cp->addr.bdaddr, addr_type); } - if (cp->addr.type == BDADDR_LE_PUBLIC) - addr_type = ADDR_LE_DEV_PUBLIC; - else - addr_type = ADDR_LE_DEV_RANDOM; - hci_remove_irk(hdev, &cp->addr.bdaddr, addr_type); err = hci_remove_ltk(hdev, &cp->addr.bdaddr, addr_type); From 25f47b043dd719e7637063f74569cd94796ccdf1 Mon Sep 17 00:00:00 2001 From: Johan Hedberg Date: Wed, 21 Oct 2015 15:21:31 +0300 Subject: [PATCH 076/201] Bluetooth: Fix missing hdev locking for LE scan cleanup commit 8ce783dc5ea3af3a213ac9b4d9d2ccfeeb9c9058 upstream. The hci_conn objects don't have a dedicated lock themselves but rely on the caller to hold the hci_dev lock for most types of access. The hci_conn_timeout() function has so far sent certain HCI commands based on the hci_conn state which has been possible without holding the hci_dev lock. The recent changes to do LE scanning before connect attempts added even more operations to hci_conn and hci_dev from hci_conn_timeout, thereby exposing potential race conditions with the hci_dev and hci_conn states. As an example of such a race, here there's a timeout but an l2cap_sock_connect() call manages to race with the cleanup routine: [Oct21 08:14] l2cap_chan_timeout: chan ee4b12c0 state BT_CONNECT [ +0.000004] l2cap_chan_close: chan ee4b12c0 state BT_CONNECT [ +0.000002] l2cap_chan_del: chan ee4b12c0, conn f3141580, err 111, state BT_CONNECT [ +0.000002] l2cap_sock_teardown_cb: chan ee4b12c0 state BT_CONNECT [ +0.000005] l2cap_chan_put: chan ee4b12c0 orig refcnt 4 [ +0.000010] hci_conn_drop: hcon f53d56e0 orig refcnt 1 [ +0.000013] l2cap_chan_put: chan ee4b12c0 orig refcnt 3 [ +0.000063] hci_conn_timeout: hcon f53d56e0 state BT_CONNECT [ +0.000049] hci_conn_params_del: addr ee:0d:30:09:53:1f (type 1) [ +0.000002] hci_chan_list_flush: hcon f53d56e0 [ +0.000001] hci_chan_del: hci0 hcon f53d56e0 chan f4e7ccc0 [ +0.004528] l2cap_sock_create: sock e708fc00 [ +0.000023] l2cap_chan_create: chan ee4b1770 [ +0.000001] l2cap_chan_hold: chan ee4b1770 orig refcnt 1 [ +0.000002] l2cap_sock_init: sk ee4b3390 [ +0.000029] l2cap_sock_bind: sk ee4b3390 [ +0.000010] l2cap_sock_setsockopt: sk ee4b3390 [ +0.000037] l2cap_sock_connect: sk ee4b3390 [ +0.000002] l2cap_chan_connect: 00:02:72:d9:e5:8b -> ee:0d:30:09:53:1f (type 2) psm 0x00 [ +0.000002] hci_get_route: 00:02:72:d9:e5:8b -> ee:0d:30:09:53:1f [ +0.000001] hci_dev_hold: hci0 orig refcnt 8 [ +0.000003] hci_conn_hold: hcon f53d56e0 orig refcnt 0 Above the l2cap_chan_connect() shouldn't have been able to reach the hci_conn f53d56e0 anymore but since hci_conn_timeout didn't do proper locking that's not the case. The end result is a reference to hci_conn that's not in the conn_hash list, resulting in list corruption when trying to remove it later: [Oct21 08:15] l2cap_chan_timeout: chan ee4b1770 state BT_CONNECT [ +0.000004] l2cap_chan_close: chan ee4b1770 state BT_CONNECT [ +0.000003] l2cap_chan_del: chan ee4b1770, conn f3141580, err 111, state BT_CONNECT [ +0.000001] l2cap_sock_teardown_cb: chan ee4b1770 state BT_CONNECT [ +0.000005] l2cap_chan_put: chan ee4b1770 orig refcnt 4 [ +0.000002] hci_conn_drop: hcon f53d56e0 orig refcnt 1 [ +0.000015] l2cap_chan_put: chan ee4b1770 orig refcnt 3 [ +0.000038] hci_conn_timeout: hcon f53d56e0 state BT_CONNECT [ +0.000003] hci_chan_list_flush: hcon f53d56e0 [ +0.000002] hci_conn_hash_del: hci0 hcon f53d56e0 [ +0.000001] ------------[ cut here ]------------ [ +0.000461] WARNING: CPU: 0 PID: 1782 at lib/list_debug.c:56 __list_del_entry+0x3f/0x71() [ +0.000839] list_del corruption, f53d56e0->prev is LIST_POISON2 (00000200) The necessary fix is unfortunately more complicated than just adding hci_dev_lock/unlock calls to the hci_conn_timeout() call path. Particularly, the hci_conn_del() API, which expects the hci_dev lock to be held, performs a cancel_delayed_work_sync(&hcon->disc_work) which would lead to a deadlock if the hci_conn_timeout() call path tries to acquire the same lock. This patch solves the problem by deferring the cleanup work to a separate work callback. To protect against the hci_dev or hci_conn going away meanwhile temporary references are taken with the help of hci_dev_hold() and hci_conn_get(). Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann Signed-off-by: Greg Kroah-Hartman --- include/net/bluetooth/hci_core.h | 1 + net/bluetooth/hci_conn.c | 52 ++++++++++++++++++++++++++------ 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 9e1a59e01fa2f..544a0201abdb4 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -469,6 +469,7 @@ struct hci_conn { struct delayed_work auto_accept_work; struct delayed_work idle_work; struct delayed_work le_conn_timeout; + struct work_struct le_scan_cleanup; struct device dev; struct dentry *debugfs; diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 2dda439c8cb83..ec4836f243bc1 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -137,18 +137,51 @@ static void hci_conn_cleanup(struct hci_conn *conn) hci_conn_put(conn); } -/* This function requires the caller holds hdev->lock */ -static void hci_connect_le_scan_remove(struct hci_conn *conn) +static void le_scan_cleanup(struct work_struct *work) { - hci_connect_le_scan_cleanup(conn); + struct hci_conn *conn = container_of(work, struct hci_conn, + le_scan_cleanup); + struct hci_dev *hdev = conn->hdev; + struct hci_conn *c = NULL; - /* We can't call hci_conn_del here since that would deadlock - * with trying to call cancel_delayed_work_sync(&conn->disc_work). - * Instead, call just hci_conn_cleanup() which contains the bare - * minimum cleanup operations needed for a connection in this - * state. + BT_DBG("%s hcon %p", hdev->name, conn); + + hci_dev_lock(hdev); + + /* Check that the hci_conn is still around */ + rcu_read_lock(); + list_for_each_entry_rcu(c, &hdev->conn_hash.list, list) { + if (c == conn) + break; + } + rcu_read_unlock(); + + if (c == conn) { + hci_connect_le_scan_cleanup(conn); + hci_conn_cleanup(conn); + } + + hci_dev_unlock(hdev); + hci_dev_put(hdev); + hci_conn_put(conn); +} + +static void hci_connect_le_scan_remove(struct hci_conn *conn) +{ + BT_DBG("%s hcon %p", conn->hdev->name, conn); + + /* We can't call hci_conn_del/hci_conn_cleanup here since that + * could deadlock with another hci_conn_del() call that's holding + * hci_dev_lock and doing cancel_delayed_work_sync(&conn->disc_work). + * Instead, grab temporary extra references to the hci_dev and + * hci_conn and perform the necessary cleanup in a separate work + * callback. */ - hci_conn_cleanup(conn); + + hci_dev_hold(conn->hdev); + hci_conn_get(conn); + + schedule_work(&conn->le_scan_cleanup); } static void hci_acl_create_connection(struct hci_conn *conn) @@ -580,6 +613,7 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst, INIT_DELAYED_WORK(&conn->auto_accept_work, hci_conn_auto_accept); INIT_DELAYED_WORK(&conn->idle_work, hci_conn_idle); INIT_DELAYED_WORK(&conn->le_conn_timeout, le_conn_timeout); + INIT_WORK(&conn->le_scan_cleanup, le_scan_cleanup); atomic_set(&conn->refcnt, 0); From 8d66c7371044032029efd1aecfc9a3b3e2125a9d Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 30 Oct 2015 13:48:19 +0100 Subject: [PATCH 077/201] can: Use correct type in sizeof() in nla_put() commit 562b103a21974c2f9cd67514d110f918bb3e1796 upstream. The sizeof() is invoked on an incorrect variable, likely due to some copy-paste error, and this might result in memory corruption. Fix this. Signed-off-by: Marek Vasut Cc: Wolfgang Grandegger Cc: netdev@vger.kernel.org Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index aede704605c66..141c2a42d7eda 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -915,7 +915,7 @@ static int can_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put(skb, IFLA_CAN_BITTIMING_CONST, sizeof(*priv->bittiming_const), priv->bittiming_const)) || - nla_put(skb, IFLA_CAN_CLOCK, sizeof(cm), &priv->clock) || + nla_put(skb, IFLA_CAN_CLOCK, sizeof(priv->clock), &priv->clock) || nla_put_u32(skb, IFLA_CAN_STATE, state) || nla_put(skb, IFLA_CAN_CTRLMODE, sizeof(cm), &cm) || nla_put_u32(skb, IFLA_CAN_RESTART_MS, priv->restart_ms) || From bf804ca6bcbb73b7c6ab2328f4baadd0e1f1f2d8 Mon Sep 17 00:00:00 2001 From: Mirza Krak Date: Tue, 10 Nov 2015 14:59:34 +0100 Subject: [PATCH 078/201] can: sja1000: clear interrupts on start commit 7cecd9ab80f43972c056dc068338f7bcc407b71c upstream. According to SJA1000 data sheet error-warning (EI) interrupt is not cleared by setting the controller in to reset-mode. Then if we have the following case: - system is suspended (echo mem > /sys/power/state) and SJA1000 is left in operating state - A bus error condition occurs which activates EI interrupt, system is still suspended which means EI interrupt will be not be handled nor cleared. If the above two events occur, on resume there is no way to return the SJA1000 to operating state, except to cycle power to it. By simply reading the IR register on start we will clear any previous conditions that could be present. Signed-off-by: Mirza Krak Reported-by: Christian Magnusson Signed-off-by: Marc Kleine-Budde Signed-off-by: Greg Kroah-Hartman --- drivers/net/can/sja1000/sja1000.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c index 7b92e911a6168..f10834be48a5c 100644 --- a/drivers/net/can/sja1000/sja1000.c +++ b/drivers/net/can/sja1000/sja1000.c @@ -218,6 +218,9 @@ static void sja1000_start(struct net_device *dev) priv->write_reg(priv, SJA1000_RXERR, 0x0); priv->read_reg(priv, SJA1000_ECC); + /* clear interrupt flags */ + priv->read_reg(priv, SJA1000_IR); + /* leave reset mode */ set_normal_mode(dev); } From 000fb8a2db323fa064992bde94c5df46ab661895 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 22 Oct 2015 15:41:52 +0100 Subject: [PATCH 079/201] arm64: Fix compat register mappings commit 5accd17d0eb523350c9ef754d655e379c9bb93b3 upstream. For reasons not entirely apparent, but now enshrined in history, the architectural mapping of AArch32 banked registers to AArch64 registers actually orders SP_ and LR_ backwards compared to the intuitive r13/r14 order, for all modes except FIQ. Fix the compat__ macros accordingly, in the hope of avoiding subtle bugs with KVM and AArch32 guests. Signed-off-by: Robin Murphy Acked-by: Will Deacon Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/include/asm/ptrace.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index 536274ed292ea..e9e5467e0bf45 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -83,14 +83,14 @@ #define compat_sp regs[13] #define compat_lr regs[14] #define compat_sp_hyp regs[15] -#define compat_sp_irq regs[16] -#define compat_lr_irq regs[17] -#define compat_sp_svc regs[18] -#define compat_lr_svc regs[19] -#define compat_sp_abt regs[20] -#define compat_lr_abt regs[21] -#define compat_sp_und regs[22] -#define compat_lr_und regs[23] +#define compat_lr_irq regs[16] +#define compat_sp_irq regs[17] +#define compat_lr_svc regs[18] +#define compat_sp_svc regs[19] +#define compat_lr_abt regs[20] +#define compat_sp_abt regs[21] +#define compat_lr_und regs[22] +#define compat_sp_und regs[23] #define compat_r8_fiq regs[24] #define compat_r9_fiq regs[25] #define compat_r10_fiq regs[26] From ea9130ac6927fc4fe26cab6aa25d9e4172c38cd5 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 26 Oct 2015 21:42:33 +0000 Subject: [PATCH 080/201] arm64: page-align sections for DEBUG_RODATA commit cb083816ab5ac3d10a9417527f07fc5962cc3808 upstream. A kernel built with DEBUG_RO_DATA && !CONFIG_DEBUG_ALIGN_RODATA doesn't have .text aligned to a page boundary, though fixup_executable works at page-granularity thanks to its use of create_mapping. If .text is not page-aligned, the first page it exists in may be marked non-executable, leading to failures when an attempt is made to execute code in said page. This patch upgrades ALIGN_DEBUG_RO and ALIGN_DEBUG_RO_MIN to force page alignment for DEBUG_RO_DATA && !CONFIG_DEBUG_ALIGN_RODATA kernels, ensuring that all sections with specific RWX permission requirements are mapped with the correct permissions. Signed-off-by: Mark Rutland Reported-by: Jeremy Linton Reviewed-by: Laura Abbott Acked-by: Ard Biesheuvel Cc: Suzuki Poulose Cc: Will Deacon Fixes: da141706aea52c1a ("arm64: add better page protections to arm64") Signed-off-by: Catalin Marinas Signed-off-by: Greg Kroah-Hartman --- arch/arm64/kernel/vmlinux.lds.S | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 98073332e2d05..4d77757b5894a 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -60,9 +60,12 @@ PECOFF_FILE_ALIGNMENT = 0x200; #define PECOFF_EDATA_PADDING #endif -#ifdef CONFIG_DEBUG_ALIGN_RODATA +#if defined(CONFIG_DEBUG_ALIGN_RODATA) #define ALIGN_DEBUG_RO . = ALIGN(1< Date: Tue, 20 Oct 2015 17:25:09 +0900 Subject: [PATCH 081/201] pinctrl: uniphier: set input-enable before pin-muxing commit bac7f4c1bf5e7c6ccd5bb71edc015b26c77f7460 upstream. While IECTRL is disabled, input signals are pulled-down internally. If pin-muxing is set up first, glitch signals (Low to High transition) might be input to hardware blocks. Bad case scenario: [1] The hardware block is already running before pinctrl is handled. (the reset is de-asserted by default or by a firmware, for example) [2] The pin-muxing is set up. The input signals to hardware block are pulled-down by the chip-internal biasing. [3] The pins are input-enabled. The signals from the board reach the hardware block. Actually, one invalid character is input to the UART blocks for such SoCs as PH1-LD4, PH1-sLD8, where UART devices start to run at the power on reset. To avoid such problems, pins should be input-enabled before muxing. Fixes: 6e9088920258 ("pinctrl: UniPhier: add UniPhier pinctrl core support") Signed-off-by: Masahiro Yamada Reported-by: Dai Okamura Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/uniphier/pinctrl-uniphier-core.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/pinctrl/uniphier/pinctrl-uniphier-core.c b/drivers/pinctrl/uniphier/pinctrl-uniphier-core.c index 918f3b643f1b0..589872cc8adbb 100644 --- a/drivers/pinctrl/uniphier/pinctrl-uniphier-core.c +++ b/drivers/pinctrl/uniphier/pinctrl-uniphier-core.c @@ -539,6 +539,12 @@ static int uniphier_pmx_set_one_mux(struct pinctrl_dev *pctldev, unsigned pin, unsigned reg, reg_end, shift, mask; int ret; + /* some pins need input-enabling */ + ret = uniphier_conf_pin_input_enable(pctldev, + &pctldev->desc->pins[pin], 1); + if (ret) + return ret; + reg = UNIPHIER_PINCTRL_PINMUX_BASE + pin * mux_bits / 32 * reg_stride; reg_end = reg + reg_stride; shift = pin * mux_bits % 32; @@ -563,9 +569,7 @@ static int uniphier_pmx_set_one_mux(struct pinctrl_dev *pctldev, unsigned pin, return ret; } - /* some pins need input-enabling */ - return uniphier_conf_pin_input_enable(pctldev, - &pctldev->desc->pins[pin], 1); + return 0; } static int uniphier_pmx_set_mux(struct pinctrl_dev *pctldev, From 0e9637a2b1d42761dddeaad4dcf3c09cb34e5db5 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 11 Oct 2015 17:39:31 +0200 Subject: [PATCH 082/201] pinctrl: qcom: ssbi: fix compilation with DEBUG_FS=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 11091fb0a1227d569d09353e1ce1f88694a033dc upstream. The DEBUG_FS=n #defines for the dbg_show functions were missed when renaming the driver from msm_ to pm8xxx_, causing it to break the build when DEBUG_FS isn't enabled: CC [M] drivers/pinctrl/qcom/pinctrl-ssbi-gpio.o drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c:597:14: error: ‘pm8xxx_gpio_dbg_show’ undeclared here (not in a function) .dbg_show = pm8xxx_gpio_dbg_show, Fix this by renaming them correctly. Fixes: b4c45fe974bc ("pinctrl: qcom: ssbi: Family A gpio & mpp drivers") Signed-off-by: Jonas Gorski Reviewed-by: Bjorn Andersson Signed-off-by: Linus Walleij Signed-off-by: Greg Kroah-Hartman --- drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c | 2 +- drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c index e1a3721bc8e58..d809c9eaa3231 100644 --- a/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c +++ b/drivers/pinctrl/qcom/pinctrl-ssbi-gpio.c @@ -584,7 +584,7 @@ static void pm8xxx_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip) } #else -#define msm_gpio_dbg_show NULL +#define pm8xxx_gpio_dbg_show NULL #endif static struct gpio_chip pm8xxx_gpio_template = { diff --git a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c index 6652b8d7f707a..8982027de8e8b 100644 --- a/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c +++ b/drivers/pinctrl/qcom/pinctrl-ssbi-mpp.c @@ -639,7 +639,7 @@ static void pm8xxx_mpp_dbg_show(struct seq_file *s, struct gpio_chip *chip) } #else -#define msm_mpp_dbg_show NULL +#define pm8xxx_mpp_dbg_show NULL #endif static struct gpio_chip pm8xxx_mpp_template = { From 92f503d0b0491f90c196a1ed3dda601c4016b1a3 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Wed, 9 Sep 2015 11:34:37 +0300 Subject: [PATCH 083/201] ath10k: add ATH10K_FW_FEATURE_RAW_MODE_SUPPORT to ath10k_core_fw_feature_str[] commit 5af82fa66a7ee8dfc29fadb487a02e2ef14ea965 upstream. This was missed in the original commit adding the flag and ath10k only printed "bit10": ath10k_pci 0000:02:00.0: qca988x hw2.0 (0x4100016c, 0x043202ff) fw 10.2.4.70.6-2 api 3 htt-ver 2.1 wmi-op 5 htt-op 2 cal otp max-sta 128 raw 0 hwcrypto 1 features no-p2p,bit10 Also add a build test to avoid this happening again. Fixes: ccec9038c721 ("ath10k: enable raw encap mode and software crypto engine") Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/core.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index b87b98617073b..9e3d89d064453 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -142,12 +142,17 @@ static const char *const ath10k_core_fw_feature_str[] = { [ATH10K_FW_FEATURE_IGNORE_OTP_RESULT] = "ignore-otp", [ATH10K_FW_FEATURE_NO_NWIFI_DECAP_4ADDR_PADDING] = "no-4addr-pad", [ATH10K_FW_FEATURE_SUPPORTS_SKIP_CLOCK_INIT] = "skip-clock-init", + [ATH10K_FW_FEATURE_RAW_MODE_SUPPORT] = "raw-mode", }; static unsigned int ath10k_core_get_fw_feature_str(char *buf, size_t buf_len, enum ath10k_fw_features feat) { + /* make sure that ath10k_core_fw_feature_str[] gets updated */ + BUILD_BUG_ON(ARRAY_SIZE(ath10k_core_fw_feature_str) != + ATH10K_FW_FEATURE_COUNT); + if (feat >= ARRAY_SIZE(ath10k_core_fw_feature_str) || WARN_ON(!ath10k_core_fw_feature_str[feat])) { return scnprintf(buf, buf_len, "bit%d", feat); From 3a0ff96194c14f7915e571553a2328e477da8c97 Mon Sep 17 00:00:00 2001 From: Vivek Natarajan Date: Tue, 6 Oct 2015 15:19:34 +0300 Subject: [PATCH 084/201] ath10k: use station's current operating mode from assoc request commit 72f8cef5d1155209561b01e092ce1a04ad50c4cb upstream. The current number of spatial streams used by the client is advertised as a separate IE in assoc request. Use this information to set the NSS operating mode. Fixes: 45c9abc059fa ("ath10k: implement more versatile set_bitrate_mask"). Signed-off-by: Vivek Natarajan Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/mac.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 64674c955d447..ae90a2cdab9b2 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -2083,7 +2083,8 @@ static void ath10k_peer_assoc_h_ht(struct ath10k *ar, enum ieee80211_band band; const u8 *ht_mcs_mask; const u16 *vht_mcs_mask; - int i, n, max_nss; + int i, n; + u8 max_nss; u32 stbc; lockdep_assert_held(&ar->conf_mutex); @@ -2168,7 +2169,7 @@ static void ath10k_peer_assoc_h_ht(struct ath10k *ar, arg->peer_ht_rates.rates[i] = i; } else { arg->peer_ht_rates.num_rates = n; - arg->peer_num_spatial_streams = max_nss; + arg->peer_num_spatial_streams = min(sta->rx_nss, max_nss); } ath10k_dbg(ar, ATH10K_DBG_MAC, "mac ht peer %pM mcs cnt %d nss %d\n", From 97f367a3193fa5d4e85a469a3d862396c5484c3e Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Tue, 3 Nov 2015 11:51:33 +0530 Subject: [PATCH 085/201] ath10k: fix invalid NSS for 4x4 devices commit f680f70adbeab28b35f849016b964dd645db6237 upstream. The number of spatial streams that are derived from chain mask for 4x4 devices is using wrong bitmask and conditional check. This is affecting downlink throughput for QCA99x0 devices. Earlier cfg_tx_chainmask is not filled by default until user configured it and so get_nss_from_chainmask never be called. This issue is exposed by recent commit 166de3f1895d ("ath10k: remove supported chain mask"). By default maximum supported chain mask is filled in cfg_tx_chainmask. Fixes: 5572a95b4b ("ath10k: apply chainmask settings to vdev on creation") Signed-off-by: Rajkumar Manoharan Signed-off-by: Kalle Valo Signed-off-by: Greg Kroah-Hartman --- drivers/net/wireless/ath/ath10k/mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index ae90a2cdab9b2..e1d32de8b1110 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -4056,7 +4056,7 @@ static int ath10k_config(struct ieee80211_hw *hw, u32 changed) static u32 get_nss_from_chainmask(u16 chain_mask) { - if ((chain_mask & 0x15) == 0x15) + if ((chain_mask & 0xf) == 0xf) return 4; else if ((chain_mask & 0x7) == 0x7) return 3; From c2ec9e3a2cfe513efcd08d0492295124a9677ae2 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 27 Oct 2015 13:13:38 +0100 Subject: [PATCH 086/201] s390/kernel: fix ptrace peek/poke for floating point registers commit 55a423b6f105fa323168f15f4bb67f23b21da44e upstream. git commit 155e839a814834a3b4b31e729f4716e59d3d2dd4 "s390/kernel: dynamically allocate FP register save area" introduced a regression in regard to ptrace. If the vector register extension is not present or unused the ptrace peek of a floating pointer register return incorrect data and the ptrace poke to a floating pointer register overwrites the task structure starting at task->thread.fpu.fprs. Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/kernel/ptrace.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 8b1c8e33f184a..d319f36f7d1d6 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -244,7 +244,7 @@ static unsigned long __peek_user(struct task_struct *child, addr_t addr) ((addr_t) child->thread.fpu.vxrs + 2*offset); else tmp = *(addr_t *) - ((addr_t) &child->thread.fpu.fprs + offset); + ((addr_t) child->thread.fpu.fprs + offset); } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) { /* @@ -388,7 +388,7 @@ static int __poke_user(struct task_struct *child, addr_t addr, addr_t data) child->thread.fpu.vxrs + 2*offset) = data; else *(addr_t *)((addr_t) - &child->thread.fpu.fprs + offset) = data; + child->thread.fpu.fprs + offset) = data; } else if (addr < (addr_t) (&dummy->regs.per_info + 1)) { /* @@ -622,7 +622,7 @@ static u32 __peek_user_compat(struct task_struct *child, addr_t addr) ((addr_t) child->thread.fpu.vxrs + 2*offset); else tmp = *(__u32 *) - ((addr_t) &child->thread.fpu.fprs + offset); + ((addr_t) child->thread.fpu.fprs + offset); } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) { /* @@ -747,7 +747,7 @@ static int __poke_user_compat(struct task_struct *child, child->thread.fpu.vxrs + 2*offset) = tmp; else *(__u32 *)((addr_t) - &child->thread.fpu.fprs + offset) = tmp; + child->thread.fpu.fprs + offset) = tmp; } else if (addr < (addr_t) (&dummy32->regs.per_info + 1)) { /* From 46af83a483b3ddcd4bfff4889b2f8330806bfd7b Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 9 Oct 2015 11:07:06 +0200 Subject: [PATCH 087/201] s390/pci: reshuffle struct used to write debug data commit 7cc8944e13c73374b6f33b39ca24c0891c87b077 upstream. zpci_err_insn writes stale stack content to the debugfs. Ensure that the struct in zpci_err_insn is ordered in a way that we don't have uninitialized holes in it. In addition to that add the packed attribute. Fixes: 3d8258e (s390/pci: move debug messages to debugfs) Signed-off-by: Sebastian Ott Reviewed-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky Signed-off-by: Greg Kroah-Hartman --- arch/s390/pci/pci_insn.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c index dcc2634ccbe29..10ca15dcab11f 100644 --- a/arch/s390/pci/pci_insn.c +++ b/arch/s390/pci/pci_insn.c @@ -16,11 +16,11 @@ static inline void zpci_err_insn(u8 cc, u8 status, u64 req, u64 offset) { struct { - u8 cc; - u8 status; u64 req; u64 offset; - } data = {cc, status, req, offset}; + u8 cc; + u8 status; + } __packed data = {req, offset, cc, status}; zpci_err_hex(&data, sizeof(data)); } From 0c590b83b0308375b0d5e34fe6c9756d007184b4 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 26 Oct 2015 08:41:29 +0100 Subject: [PATCH 088/201] KVM: s390: SCA must not cross page boundaries commit c5c2c393468576bad6d10b2b5fefff8cd25df3f4 upstream. We seemed to have missed a few corner cases in commit f6c137ff00a4 ("KVM: s390: randomize sca address"). The SCA has a maximum size of 2112 bytes. By setting the sca_offset to some unlucky numbers, we exceed the page. 0x7c0 (1984) -> Fits exactly 0x7d0 (2000) -> 16 bytes out 0x7e0 (2016) -> 32 bytes out 0x7f0 (2032) -> 48 bytes out One VCPU entry is 32 bytes long. For the last two cases, we actually write data to the other page. 1. The address of the VCPU. 2. Injection/delivery/clearing of SIGP externall calls via SIGP IF. Especially the 2. happens regularly. So this could produce two problems: 1. The guest losing/getting external calls. 2. Random memory overwrites in the host. So this problem happens on every 127 + 128 created VM with 64 VCPUs. Acked-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger Signed-off-by: Greg Kroah-Hartman --- arch/s390/kvm/kvm-s390.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 0a67c40eece9b..2275413625dbc 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1120,7 +1120,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (!kvm->arch.sca) goto out_err; spin_lock(&kvm_lock); - sca_offset = (sca_offset + 16) & 0x7f0; + sca_offset += 16; + if (sca_offset + sizeof(struct sca_block) > PAGE_SIZE) + sca_offset = 0; kvm->arch.sca = (struct sca_block *) ((char *) kvm->arch.sca + sca_offset); spin_unlock(&kvm_lock); From 752f0d74a7a63723e0baca91fdd9cb55f012911a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 5 Nov 2015 09:03:50 +0100 Subject: [PATCH 089/201] KVM: Provide function for VCPU lookup by id commit db27a7a37aa0b1f8b373f8b0fb72a2ccaafb85b7 upstream. Let's provide a function to lookup a VCPU by id. Reviewed-by: Christian Borntraeger Reviewed-by: Dominik Dingel Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger [split patch from refactoring patch] Signed-off-by: Greg Kroah-Hartman --- include/linux/kvm_host.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1bef9e21e7259..e480f9fbdd629 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -442,6 +442,17 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) (vcpup = kvm_get_vcpu(kvm, idx)) != NULL; \ idx++) +static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) +{ + struct kvm_vcpu *vcpu; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) + if (vcpu->vcpu_id == id) + return vcpu; + return NULL; +} + #define kvm_for_each_memslot(memslot, slots) \ for (memslot = &slots->memslots[0]; \ memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\ From 0c1396192a918b5d5879f8bdd59b97340e39818c Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 5 Nov 2015 09:06:06 +0100 Subject: [PATCH 090/201] KVM: s390: fix wrong lookup of VCPUs by array index commit 152e9f65d66f0a3891efc3869440becc0e7ff53f upstream. For now, VCPUs were always created sequentially with incrementing VCPU ids. Therefore, the index in the VCPUs array matched the id. As sequential creation might change with cpu hotplug, let's use the correct lookup function to find a VCPU by id, not array index. Let's also use kvm_lookup_vcpu() for validation of the sending VCPU on external call injection. Reviewed-by: Christian Borntraeger Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger Signed-off-by: Greg Kroah-Hartman --- arch/s390/kvm/interrupt.c | 3 +-- arch/s390/kvm/sigp.c | 8 ++------ 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 5c2c169395c3c..bb2468a8e7795 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1057,8 +1057,7 @@ static int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq) src_id, 0); /* sending vcpu invalid */ - if (src_id >= KVM_MAX_VCPUS || - kvm_get_vcpu(vcpu->kvm, src_id) == NULL) + if (kvm_get_vcpu_by_id(vcpu->kvm, src_id) == NULL) return -EINVAL; if (sclp.has_sigpif) diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index da690b69f9fe1..77c22d685c7a1 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -291,12 +291,8 @@ static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code, u16 cpu_addr, u32 parameter, u64 *status_reg) { int rc; - struct kvm_vcpu *dst_vcpu; + struct kvm_vcpu *dst_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr); - if (cpu_addr >= KVM_MAX_VCPUS) - return SIGP_CC_NOT_OPERATIONAL; - - dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); if (!dst_vcpu) return SIGP_CC_NOT_OPERATIONAL; @@ -478,7 +474,7 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu) trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr); if (order_code == SIGP_EXTERNAL_CALL) { - dest_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr); BUG_ON(dest_vcpu == NULL); kvm_s390_vcpu_wakeup(dest_vcpu); From 705e4e3c2cd5b193ebf83e762fb303be7b256cea Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 5 Nov 2015 09:38:15 +0100 Subject: [PATCH 091/201] KVM: s390: avoid memory overwrites on emergency signal injection commit b85de33a1a3433487b6a721cfdce25ec8673e622 upstream. Commit 383d0b050106 ("KVM: s390: handle pending local interrupts via bitmap") introduced a possible memory overwrite from user space. User space could pass an invalid emergency signal code (sending VCPU) and therefore exceed the bitmap. Let's take care of this case and check that the id is in the valid range. Reviewed-by: Dominik Dingel Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger Signed-off-by: Greg Kroah-Hartman --- arch/s390/kvm/interrupt.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index bb2468a8e7795..08de785c9e639 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -1136,6 +1136,10 @@ static int __inject_sigp_emergency(struct kvm_vcpu *vcpu, trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY, irq->u.emerg.code, 0); + /* sending vcpu invalid */ + if (kvm_get_vcpu_by_id(vcpu->kvm, irq->u.emerg.code) == NULL) + return -EINVAL; + set_bit(irq->u.emerg.code, li->sigp_emerg_pending); set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs); atomic_or(CPUSTAT_EXT_INT, li->cpuflags); From 5d19d22c7ba11df61e175dc289ef1058270b00ae Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 6 Nov 2015 12:08:48 +0100 Subject: [PATCH 092/201] KVM: s390: enable SIMD only when no VCPUs were created commit 5967c17b118a2bd1dd1d554cc4eee16233e52bec upstream. We should never allow to enable/disable any facilities for the guest when other VCPUs were already created. kvm_arch_vcpu_(load|put) relies on SIMD not changing during runtime. If somebody would create and run VCPUs and then decides to enable SIMD, undefined behaviour could be possible (e.g. vector save area not being set up). Acked-by: Christian Borntraeger Acked-by: Cornelia Huck Signed-off-by: David Hildenbrand Signed-off-by: Christian Borntraeger Signed-off-by: Greg Kroah-Hartman --- arch/s390/kvm/kvm-s390.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 2275413625dbc..1290af83417a3 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -342,12 +342,16 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) r = 0; break; case KVM_CAP_S390_VECTOR_REGISTERS: - if (MACHINE_HAS_VX) { + mutex_lock(&kvm->lock); + if (atomic_read(&kvm->online_vcpus)) { + r = -EBUSY; + } else if (MACHINE_HAS_VX) { set_kvm_facility(kvm->arch.model.fac->mask, 129); set_kvm_facility(kvm->arch.model.fac->list, 129); r = 0; } else r = -EINVAL; + mutex_unlock(&kvm->lock); VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s", r ? "(not available)" : "(success)"); break; From 580c9bb0647dd6d75e28141942a0fd8a0d8962a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Mon, 31 Aug 2015 19:48:28 +0300 Subject: [PATCH 093/201] Revert "usb: dwc3: gadget: drop unnecessary loop when cleaning up TRBs" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit d115d7050a0d2c4967532f18c9cb522fea6b7280 upstream. This reverts commit 8f2c9544aba636134303105ecb164190a39dece4. As it breaks g_ether on my Baytrail FFRD8 device. Everything starts out fine, but after a bit of data has been transferred it just stops flowing. Note that I do get a bunch of these "NOHZ: local_softirq_pending 08" when booting the machine, but I'm not really sure if they're related to this problem. Cc: Felipe Balbi Cc: Greg Kroah-Hartman Cc: linux-usb@vger.kernel.org Cc: stable@vger.kernel.org Signed-off-by: Ville Syrjälä Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 1e8bdf8178113..11784de01ca88 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1872,27 +1872,32 @@ static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep, unsigned int i; int ret; - req = next_request(&dep->req_queued); - if (!req) { - WARN_ON_ONCE(1); - return 1; - } - i = 0; do { - slot = req->start_slot + i; - if ((slot == DWC3_TRB_NUM - 1) && + req = next_request(&dep->req_queued); + if (!req) { + WARN_ON_ONCE(1); + return 1; + } + i = 0; + do { + slot = req->start_slot + i; + if ((slot == DWC3_TRB_NUM - 1) && usb_endpoint_xfer_isoc(dep->endpoint.desc)) - slot++; - slot %= DWC3_TRB_NUM; - trb = &dep->trb_pool[slot]; + slot++; + slot %= DWC3_TRB_NUM; + trb = &dep->trb_pool[slot]; + + ret = __dwc3_cleanup_done_trbs(dwc, dep, req, trb, + event, status); + if (ret) + break; + } while (++i < req->request.num_mapped_sgs); + + dwc3_gadget_giveback(dep, req, status); - ret = __dwc3_cleanup_done_trbs(dwc, dep, req, trb, - event, status); if (ret) break; - } while (++i < req->request.num_mapped_sgs); - - dwc3_gadget_giveback(dep, req, status); + } while (1); if (usb_endpoint_xfer_isoc(dep->endpoint.desc) && list_empty(&dep->req_queued)) { From 5d8a3e3b16366682be5ef5d3f6b24ae39bef5b78 Mon Sep 17 00:00:00 2001 From: Mian Yousaf Kaukab Date: Mon, 19 Oct 2015 16:25:15 +0200 Subject: [PATCH 094/201] usb: gadget: net2280: restore ep_cfg after defect7374 workaround commit 81e9d14a53eb1abfbe6ac828a87a2deb4702b5f1 upstream. Defect 7374 workaround enables all GPEP as endpoint 0. Restore endpoint number when defect 7374 workaround is disabled. Otherwise, check to match USB endpoint number to hardware endpoint number in net2280_enable() fails. Reported-by: Paul Jones Signed-off-by: Mian Yousaf Kaukab Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/net2280.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c index cf0ed42f5591c..6706aef907f4b 100644 --- a/drivers/usb/gadget/udc/net2280.c +++ b/drivers/usb/gadget/udc/net2280.c @@ -1913,7 +1913,7 @@ static void defect7374_disable_data_eps(struct net2280 *dev) for (i = 1; i < 5; i++) { ep = &dev->ep[i]; - writel(0, &ep->cfg->ep_cfg); + writel(i, &ep->cfg->ep_cfg); } /* CSROUT, CSRIN, PCIOUT, PCIIN, STATIN, RCIN */ From f95102dd298de57c5c27b65271f4359dd24c30b2 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Mon, 16 Nov 2015 19:22:08 +0100 Subject: [PATCH 095/201] usb: gadget: atmel_usba_udc: Expose correct device speed commit d134c48d889ddceadf4c990e6f3df16b816ed5d4 upstream. Following changes that appeared in lk 4.0.0, the gadget udc driver for some ARM based Atmel SoCs (e.g. at91sam9x5 and sama5d3 families) incorrectly deduced full-speed USB link speed even when the hardware had negotiated a high-speed link. The fix is to make sure that the UDPHS Interrupt Enable Register value does not mask the SPEED bit in the Interrupt Status Register. For a mass storage gadget this problem lead to failures when the host had a USB 3 port with the xhci_hcd driver. If the host was a USB 2 port using the ehci_hcd driver then the mass storage gadget worked (but probably at a lower speed than it should have). Signed-off-by: Douglas Gilbert Reviewed-by: Boris Brezillon Fixes: 9870d895ad87 ("usb: atmel_usba_udc: Mask status with enabled irqs") Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/gadget/udc/atmel_usba_udc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c index f0f2b066ac083..f92f5aff0dd5e 100644 --- a/drivers/usb/gadget/udc/atmel_usba_udc.c +++ b/drivers/usb/gadget/udc/atmel_usba_udc.c @@ -1633,7 +1633,7 @@ static irqreturn_t usba_udc_irq(int irq, void *devid) spin_lock(&udc->lock); int_enb = usba_int_enb_get(udc); - status = usba_readl(udc, INT_STA) & int_enb; + status = usba_readl(udc, INT_STA) & (int_enb | USBA_HIGH_SPEED); DBG(DBG_INT, "irq, status=%#08x\n", status); if (status & USBA_DET_SUSPEND) { From aaa9636572ef79ac6cd1088a0f32653ae242bca7 Mon Sep 17 00:00:00 2001 From: Ben McCauley Date: Mon, 16 Nov 2015 10:47:24 -0600 Subject: [PATCH 096/201] usb: dwc3: gadget: let us set lower max_speed commit b9e51b2b1fda19143f48d182ed7a2943f21e1ae4 upstream. In some SoCs, dwc3 is implemented as a USB2.0 only core, meaning that it can't ever achieve SuperSpeed. Currect driver always sets gadget.max_speed to USB_SPEED_SUPER unconditionally. This can causes issues to some Host stacks where the host will issue a GetBOS() request and we will reply with a BOS containing Superspeed Capability Descriptor. At least Windows seems to be upset by this fact and prints a warning that we should connect $this device to another port. [ balbi@ti.com : rewrote entire commit, including source code comment to make a lot clearer what the problem is ] Signed-off-by: Ben McCauley Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/gadget.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 11784de01ca88..9c38e4b0a2622 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2723,11 +2723,33 @@ int dwc3_gadget_init(struct dwc3 *dwc) } dwc->gadget.ops = &dwc3_gadget_ops; - dwc->gadget.max_speed = USB_SPEED_SUPER; dwc->gadget.speed = USB_SPEED_UNKNOWN; dwc->gadget.sg_supported = true; dwc->gadget.name = "dwc3-gadget"; + /* + * FIXME We might be setting max_speed to revision < DWC3_REVISION_220A) + dwc3_trace(trace_dwc3_gadget, + "Changing max_speed on rev %08x\n", + dwc->revision); + + dwc->gadget.max_speed = dwc->maximum_speed; + /* * Per databook, DWC3 needs buffer size to be aligned to MaxPacketSize * on ep out. From c0cd28e68d5862fbf92c36b18cf28e3457bc5053 Mon Sep 17 00:00:00 2001 From: Li Jun Date: Fri, 12 Dec 2014 09:11:42 +0800 Subject: [PATCH 097/201] usb: chipidea: otg: gadget module load and unload support commit 85da852df66e5e0d3aba761b0fece7c958ff0685 upstream. This patch is to support load and unload gadget driver in full OTG mode. Signed-off-by: Li Jun Signed-off-by: Peter Chen Tested-by: Jiada Wang Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/udc.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c index 8223fe73ea859..391a1225b0ba3 100644 --- a/drivers/usb/chipidea/udc.c +++ b/drivers/usb/chipidea/udc.c @@ -1751,6 +1751,22 @@ static int ci_udc_start(struct usb_gadget *gadget, return retval; } +static void ci_udc_stop_for_otg_fsm(struct ci_hdrc *ci) +{ + if (!ci_otg_is_fsm_mode(ci)) + return; + + mutex_lock(&ci->fsm.lock); + if (ci->fsm.otg->state == OTG_STATE_A_PERIPHERAL) { + ci->fsm.a_bidl_adis_tmout = 1; + ci_hdrc_otg_fsm_start(ci); + } else if (ci->fsm.otg->state == OTG_STATE_B_PERIPHERAL) { + ci->fsm.protocol = PROTO_UNDEF; + ci->fsm.otg->state = OTG_STATE_UNDEFINED; + } + mutex_unlock(&ci->fsm.lock); +} + /** * ci_udc_stop: unregister a gadget driver */ @@ -1775,6 +1791,7 @@ static int ci_udc_stop(struct usb_gadget *gadget) ci->driver = NULL; spin_unlock_irqrestore(&ci->lock, flags); + ci_udc_stop_for_otg_fsm(ci); return 0; } From 6ef8be0f8815bb27a8cb9183ce0180442407a73d Mon Sep 17 00:00:00 2001 From: John Youn Date: Fri, 7 Aug 2015 11:04:14 -0700 Subject: [PATCH 098/201] usb: dwc3: pci: Add the Synopsys HAPS AXI Product ID commit 41adc59caece02aa2e988a0e8f9fe8e6f426f82e upstream. This ID is for the Synopsys DWC_usb3 core with AXI interface on PCIe HAPS platform. This core has the debug registers mapped at a separate BAR in order to support enhanced hibernation. Signed-off-by: John Youn Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index f62617999f3c7..361b7322548f4 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -27,6 +27,7 @@ #include "platform_data.h" #define PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3 0xabcd +#define PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3_AXI 0xabce #define PCI_DEVICE_ID_INTEL_BYT 0x0f37 #define PCI_DEVICE_ID_INTEL_MRFLD 0x119e #define PCI_DEVICE_ID_INTEL_BSW 0x22B7 @@ -178,6 +179,10 @@ static const struct pci_device_id dwc3_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_SYNOPSYS, PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3), }, + { + PCI_DEVICE(PCI_VENDOR_ID_SYNOPSYS, + PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3_AXI), + }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BSW), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BYT), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MRFLD), }, From 044668df6617ff9302975d6ea16fa53ef17f0149 Mon Sep 17 00:00:00 2001 From: John Youn Date: Fri, 7 Aug 2015 11:47:25 -0700 Subject: [PATCH 099/201] usb: dwc3: pci: Add the PCI Product ID for Synopsys USB 3.1 commit e8095a25364a30216ad40dbe8893ed5c3c235949 upstream. This adds the PCI product ID for the Synopsys USB 3.1 IP core (DWC_usb31) on a HAPS-based PCI development platform. Signed-off-by: John Youn Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-pci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 361b7322548f4..db67ed20f6255 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -28,6 +28,7 @@ #define PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3 0xabcd #define PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3_AXI 0xabce +#define PCI_DEVICE_ID_SYNOPSYS_HAPSUSB31 0xabcf #define PCI_DEVICE_ID_INTEL_BYT 0x0f37 #define PCI_DEVICE_ID_INTEL_MRFLD 0x119e #define PCI_DEVICE_ID_INTEL_BSW 0x22B7 @@ -183,6 +184,10 @@ static const struct pci_device_id dwc3_pci_id_table[] = { PCI_DEVICE(PCI_VENDOR_ID_SYNOPSYS, PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3_AXI), }, + { + PCI_DEVICE(PCI_VENDOR_ID_SYNOPSYS, + PCI_DEVICE_ID_SYNOPSYS_HAPSUSB31), + }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BSW), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BYT), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MRFLD), }, From ab6fc45ff2c2e9183d81901d20224b4d37820570 Mon Sep 17 00:00:00 2001 From: John Youn Date: Fri, 4 Sep 2015 19:15:10 -0700 Subject: [PATCH 100/201] usb: dwc3: Support Synopsys USB 3.1 IP commit 690fb3718a70c66004342f6f5e2e8a5f95b977db upstream. This patch allows the dwc3 driver to run on the new Synopsys USB 3.1 IP core, albeit in USB 3.0 mode only. The Synopsys USB 3.1 IP (DWC_usb31) retains mostly the same register interface and programming model as the existing USB 3.0 controller IP (DWC_usb3). However the GSNPSID and version numbers are different. Add checking for the new ID to pass driver probe. Also, since the DWC_usb31 version number is lower in value than the full GSNPSID of the DWC_usb3 IP, we set the high bit to identify DWC_usb31 and to ensure the values are higher. Finally, add a documentation note about the revision numbering scheme. Any future revision checks (for STARS, workarounds, and new features) should take into consideration how it applies to both the 3.1/3.0 IP. Signed-off-by: John Youn Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/core.c | 10 ++++++++-- drivers/usb/dwc3/core.h | 18 ++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 064123e445664..386ea764d8b2c 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -507,12 +507,18 @@ static int dwc3_core_init(struct dwc3 *dwc) reg = dwc3_readl(dwc->regs, DWC3_GSNPSID); /* This should read as U3 followed by revision number */ - if ((reg & DWC3_GSNPSID_MASK) != 0x55330000) { + if ((reg & DWC3_GSNPSID_MASK) == 0x55330000) { + /* Detected DWC_usb3 IP */ + dwc->revision = reg; + } else if ((reg & DWC3_GSNPSID_MASK) == 0x33310000) { + /* Detected DWC_usb31 IP */ + dwc->revision = dwc3_readl(dwc->regs, DWC3_VER_NUMBER); + dwc->revision |= DWC3_REVISION_IS_DWC31; + } else { dev_err(dwc->dev, "this is not a DesignWare USB3 DRD Core\n"); ret = -ENODEV; goto err0; } - dwc->revision = reg; /* * Write Linux Version Code to our GUID register so it's easy to figure diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 0447788845856..1841dd025ea7a 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -108,6 +108,9 @@ #define DWC3_GPRTBIMAP_FS0 0xc188 #define DWC3_GPRTBIMAP_FS1 0xc18c +#define DWC3_VER_NUMBER 0xc1a0 +#define DWC3_VER_TYPE 0xc1a4 + #define DWC3_GUSB2PHYCFG(n) (0xc200 + (n * 0x04)) #define DWC3_GUSB2I2CCTL(n) (0xc240 + (n * 0x04)) @@ -766,6 +769,14 @@ struct dwc3 { u32 num_event_buffers; u32 u1u2; u32 maximum_speed; + + /* + * All 3.1 IP version constants are greater than the 3.0 IP + * version constants. This works for most version checks in + * dwc3. However, in the future, this may not apply as + * features may be developed on newer versions of the 3.0 IP + * that are not in the 3.1 IP. + */ u32 revision; #define DWC3_REVISION_173A 0x5533173a @@ -788,6 +799,13 @@ struct dwc3 { #define DWC3_REVISION_270A 0x5533270a #define DWC3_REVISION_280A 0x5533280a +/* + * NOTICE: we're using bit 31 as a "is usb 3.1" flag. This is really + * just so dwc31 revisions are always larger than dwc3. + */ +#define DWC3_REVISION_IS_DWC31 0x80000000 +#define DWC3_USB31_REVISION_110A (0x3131302a | DWC3_REVISION_IS_USB31) + enum dwc3_ep0_next ep0_next_event; enum dwc3_ep0_state ep0state; enum dwc3_link_state link_state; From a78bd89f08fdaf04c83ce4b4bc2a6cfd90acc15c Mon Sep 17 00:00:00 2001 From: John Youn Date: Sat, 26 Sep 2015 00:11:15 -0700 Subject: [PATCH 101/201] usb: dwc3: pci: Add platform data for Synopsys HAPS commit bb7f3d6d323a56b9c3b3e727380d1395a7f10107 upstream. Add platform data and set usb3_lpm_capable and has_lpm_erratum. Signed-off-by: John Youn Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-pci.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index db67ed20f6255..0a3ccfc502c89 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -108,6 +108,21 @@ static int dwc3_pci_quirks(struct pci_dev *pdev) } } + if (pdev->vendor == PCI_VENDOR_ID_SYNOPSYS && + (pdev->device == PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3 || + pdev->device == PCI_DEVICE_ID_SYNOPSYS_HAPSUSB3_AXI || + pdev->device == PCI_DEVICE_ID_SYNOPSYS_HAPSUSB31)) { + + struct dwc3_platform_data pdata; + + memset(&pdata, 0, sizeof(pdata)); + pdata.usb3_lpm_capable = true; + pdata.has_lpm_erratum = true; + + return platform_device_add_data(pci_get_drvdata(pdev), &pdata, + sizeof(pdata)); + } + return 0; } From 78e46d1ff1d6932628e5ca6bd1648596f9901588 Mon Sep 17 00:00:00 2001 From: John Youn Date: Fri, 2 Oct 2015 20:30:57 -0700 Subject: [PATCH 102/201] usb: dwc3: Add dis_enblslpm_quirk commit ec791d149bca4511e7d3a6a92bb3b030c5a443f9 upstream. Add a quirk to clear the GUSB2PHYCFG.ENBLSLPM bit, which controls whether the PHY receives the suspend signal from the controller. Signed-off-by: John Youn Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/usb/dwc3.txt | 2 ++ drivers/usb/dwc3/core.c | 6 ++++++ drivers/usb/dwc3/core.h | 4 ++++ drivers/usb/dwc3/platform_data.h | 1 + 4 files changed, 13 insertions(+) diff --git a/Documentation/devicetree/bindings/usb/dwc3.txt b/Documentation/devicetree/bindings/usb/dwc3.txt index 0815eac5b1851..e12f3448846a2 100644 --- a/Documentation/devicetree/bindings/usb/dwc3.txt +++ b/Documentation/devicetree/bindings/usb/dwc3.txt @@ -35,6 +35,8 @@ Optional properties: LTSSM during USB3 Compliance mode. - snps,dis_u3_susphy_quirk: when set core will disable USB3 suspend phy. - snps,dis_u2_susphy_quirk: when set core will disable USB2 suspend phy. + - snps,dis_enblslpm_quirk: when set clears the enblslpm in GUSB2PHYCFG, + disabling the suspend signal to the PHY. - snps,is-utmi-l1-suspend: true when DWC3 asserts output signal utmi_l1_suspend_n, false when asserts utmi_sleep_n - snps,hird-threshold: HIRD threshold diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c index 386ea764d8b2c..51156ec91c78c 100644 --- a/drivers/usb/dwc3/core.c +++ b/drivers/usb/dwc3/core.c @@ -488,6 +488,9 @@ static int dwc3_phy_setup(struct dwc3 *dwc) if (dwc->dis_u2_susphy_quirk) reg &= ~DWC3_GUSB2PHYCFG_SUSPHY; + if (dwc->dis_enblslpm_quirk) + reg &= ~DWC3_GUSB2PHYCFG_ENBLSLPM; + dwc3_writel(dwc->regs, DWC3_GUSB2PHYCFG(0), reg); return 0; @@ -885,6 +888,8 @@ static int dwc3_probe(struct platform_device *pdev) "snps,dis_u3_susphy_quirk"); dwc->dis_u2_susphy_quirk = of_property_read_bool(node, "snps,dis_u2_susphy_quirk"); + dwc->dis_enblslpm_quirk = device_property_read_bool(dev, + "snps,dis_enblslpm_quirk"); dwc->tx_de_emphasis_quirk = of_property_read_bool(node, "snps,tx_de_emphasis_quirk"); @@ -915,6 +920,7 @@ static int dwc3_probe(struct platform_device *pdev) dwc->rx_detect_poll_quirk = pdata->rx_detect_poll_quirk; dwc->dis_u3_susphy_quirk = pdata->dis_u3_susphy_quirk; dwc->dis_u2_susphy_quirk = pdata->dis_u2_susphy_quirk; + dwc->dis_enblslpm_quirk = pdata->dis_enblslpm_quirk; dwc->tx_de_emphasis_quirk = pdata->tx_de_emphasis_quirk; if (pdata->tx_de_emphasis) diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h index 1841dd025ea7a..6e53ce9ce3207 100644 --- a/drivers/usb/dwc3/core.h +++ b/drivers/usb/dwc3/core.h @@ -178,6 +178,7 @@ #define DWC3_GUSB2PHYCFG_PHYSOFTRST (1 << 31) #define DWC3_GUSB2PHYCFG_SUSPHY (1 << 6) #define DWC3_GUSB2PHYCFG_ULPI_UTMI (1 << 4) +#define DWC3_GUSB2PHYCFG_ENBLSLPM (1 << 8) /* Global USB2 PHY Vendor Control Register */ #define DWC3_GUSB2PHYACC_NEWREGREQ (1 << 25) @@ -715,6 +716,8 @@ struct dwc3_scratchpad_array { * @rx_detect_poll_quirk: set if we enable rx_detect to polling lfps quirk * @dis_u3_susphy_quirk: set if we disable usb3 suspend phy * @dis_u2_susphy_quirk: set if we disable usb2 suspend phy + * @dis_enblslpm_quirk: set if we clear enblslpm in GUSB2PHYCFG, + * disabling the suspend signal to the PHY. * @tx_de_emphasis_quirk: set if we enable Tx de-emphasis quirk * @tx_de_emphasis: Tx de-emphasis value * 0 - -6dB de-emphasis @@ -859,6 +862,7 @@ struct dwc3 { unsigned rx_detect_poll_quirk:1; unsigned dis_u3_susphy_quirk:1; unsigned dis_u2_susphy_quirk:1; + unsigned dis_enblslpm_quirk:1; unsigned tx_de_emphasis_quirk:1; unsigned tx_de_emphasis:2; diff --git a/drivers/usb/dwc3/platform_data.h b/drivers/usb/dwc3/platform_data.h index d3614ecbb9ca2..db29380022604 100644 --- a/drivers/usb/dwc3/platform_data.h +++ b/drivers/usb/dwc3/platform_data.h @@ -42,6 +42,7 @@ struct dwc3_platform_data { unsigned rx_detect_poll_quirk:1; unsigned dis_u3_susphy_quirk:1; unsigned dis_u2_susphy_quirk:1; + unsigned dis_enblslpm_quirk:1; unsigned tx_de_emphasis_quirk:1; unsigned tx_de_emphasis:2; From 61d93448a08b9d0c194f75269d940a7038a2fab8 Mon Sep 17 00:00:00 2001 From: John Youn Date: Fri, 2 Oct 2015 20:32:17 -0700 Subject: [PATCH 103/201] usb: dwc3: pci: Set enblslpm quirk for Synopsys platforms commit 94218ee31ba56fb3a8625978b393124ad660408e upstream. Certain Synopsys prototyping PHY boards are not able to meet timings constraints for LPM. This allows the PHY to meet those timings by leaving the PHY clock running during suspend. Signed-off-by: John Youn Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/dwc3/dwc3-pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 0a3ccfc502c89..2cd33cb07ed96 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -118,6 +118,7 @@ static int dwc3_pci_quirks(struct pci_dev *pdev) memset(&pdata, 0, sizeof(pdata)); pdata.usb3_lpm_capable = true; pdata.has_lpm_erratum = true; + pdata.dis_enblslpm_quirk = true; return platform_device_add_data(pci_get_drvdata(pdev), &pdata, sizeof(pdata)); From a46143c2744f7a35554b030295f928b4efdceb33 Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Wed, 16 Sep 2015 09:40:51 +0800 Subject: [PATCH 104/201] usb: chipidea: imx: refine clock operations to adapt for all platforms commit ae3e57ae26cdcc85728bb566f999bcb9a7cc6954 upstream. Some i.mx platforms need three clocks to let controller work, but others only need one, refine clock operation to adapt for all platforms, it fixes a regression found at i.mx27. Signed-off-by: Peter Chen Tested-by: Fabio Estevam Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/ci_hdrc_imx.c | 131 +++++++++++++++++++++++++---- 1 file changed, 113 insertions(+), 18 deletions(-) diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c index dcc50c878159e..ad53aed9b2806 100644 --- a/drivers/usb/chipidea/ci_hdrc_imx.c +++ b/drivers/usb/chipidea/ci_hdrc_imx.c @@ -73,6 +73,12 @@ struct ci_hdrc_imx_data { struct imx_usbmisc_data *usbmisc_data; bool supports_runtime_pm; bool in_lpm; + /* SoC before i.mx6 (except imx23/imx28) needs three clks */ + bool need_three_clks; + struct clk *clk_ipg; + struct clk *clk_ahb; + struct clk *clk_per; + /* --------------------------------- */ }; /* Common functions shared by usbmisc drivers */ @@ -124,6 +130,102 @@ static struct imx_usbmisc_data *usbmisc_get_init_data(struct device *dev) } /* End of common functions shared by usbmisc drivers*/ +static int imx_get_clks(struct device *dev) +{ + struct ci_hdrc_imx_data *data = dev_get_drvdata(dev); + int ret = 0; + + data->clk_ipg = devm_clk_get(dev, "ipg"); + if (IS_ERR(data->clk_ipg)) { + /* If the platform only needs one clocks */ + data->clk = devm_clk_get(dev, NULL); + if (IS_ERR(data->clk)) { + ret = PTR_ERR(data->clk); + dev_err(dev, + "Failed to get clks, err=%ld,%ld\n", + PTR_ERR(data->clk), PTR_ERR(data->clk_ipg)); + return ret; + } + return ret; + } + + data->clk_ahb = devm_clk_get(dev, "ahb"); + if (IS_ERR(data->clk_ahb)) { + ret = PTR_ERR(data->clk_ahb); + dev_err(dev, + "Failed to get ahb clock, err=%d\n", ret); + return ret; + } + + data->clk_per = devm_clk_get(dev, "per"); + if (IS_ERR(data->clk_per)) { + ret = PTR_ERR(data->clk_per); + dev_err(dev, + "Failed to get per clock, err=%d\n", ret); + return ret; + } + + data->need_three_clks = true; + return ret; +} + +static int imx_prepare_enable_clks(struct device *dev) +{ + struct ci_hdrc_imx_data *data = dev_get_drvdata(dev); + int ret = 0; + + if (data->need_three_clks) { + ret = clk_prepare_enable(data->clk_ipg); + if (ret) { + dev_err(dev, + "Failed to prepare/enable ipg clk, err=%d\n", + ret); + return ret; + } + + ret = clk_prepare_enable(data->clk_ahb); + if (ret) { + dev_err(dev, + "Failed to prepare/enable ahb clk, err=%d\n", + ret); + clk_disable_unprepare(data->clk_ipg); + return ret; + } + + ret = clk_prepare_enable(data->clk_per); + if (ret) { + dev_err(dev, + "Failed to prepare/enable per clk, err=%d\n", + ret); + clk_disable_unprepare(data->clk_ahb); + clk_disable_unprepare(data->clk_ipg); + return ret; + } + } else { + ret = clk_prepare_enable(data->clk); + if (ret) { + dev_err(dev, + "Failed to prepare/enable clk, err=%d\n", + ret); + return ret; + } + } + + return ret; +} + +static void imx_disable_unprepare_clks(struct device *dev) +{ + struct ci_hdrc_imx_data *data = dev_get_drvdata(dev); + + if (data->need_three_clks) { + clk_disable_unprepare(data->clk_per); + clk_disable_unprepare(data->clk_ahb); + clk_disable_unprepare(data->clk_ipg); + } else { + clk_disable_unprepare(data->clk); + } +} static int ci_hdrc_imx_probe(struct platform_device *pdev) { @@ -142,23 +244,18 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) if (!data) return -ENOMEM; + platform_set_drvdata(pdev, data); data->usbmisc_data = usbmisc_get_init_data(&pdev->dev); if (IS_ERR(data->usbmisc_data)) return PTR_ERR(data->usbmisc_data); - data->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(data->clk)) { - dev_err(&pdev->dev, - "Failed to get clock, err=%ld\n", PTR_ERR(data->clk)); - return PTR_ERR(data->clk); - } + ret = imx_get_clks(&pdev->dev); + if (ret) + return ret; - ret = clk_prepare_enable(data->clk); - if (ret) { - dev_err(&pdev->dev, - "Failed to prepare or enable clock, err=%d\n", ret); + ret = imx_prepare_enable_clks(&pdev->dev); + if (ret) return ret; - } data->phy = devm_usb_get_phy_by_phandle(&pdev->dev, "fsl,usbphy", 0); if (IS_ERR(data->phy)) { @@ -201,8 +298,6 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) goto disable_device; } - platform_set_drvdata(pdev, data); - if (data->supports_runtime_pm) { pm_runtime_set_active(&pdev->dev); pm_runtime_enable(&pdev->dev); @@ -215,7 +310,7 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev) disable_device: ci_hdrc_remove_device(data->ci_pdev); err_clk: - clk_disable_unprepare(data->clk); + imx_disable_unprepare_clks(&pdev->dev); return ret; } @@ -229,7 +324,7 @@ static int ci_hdrc_imx_remove(struct platform_device *pdev) pm_runtime_put_noidle(&pdev->dev); } ci_hdrc_remove_device(data->ci_pdev); - clk_disable_unprepare(data->clk); + imx_disable_unprepare_clks(&pdev->dev); return 0; } @@ -241,7 +336,7 @@ static int imx_controller_suspend(struct device *dev) dev_dbg(dev, "at %s\n", __func__); - clk_disable_unprepare(data->clk); + imx_disable_unprepare_clks(dev); data->in_lpm = true; return 0; @@ -259,7 +354,7 @@ static int imx_controller_resume(struct device *dev) return 0; } - ret = clk_prepare_enable(data->clk); + ret = imx_prepare_enable_clks(dev); if (ret) return ret; @@ -274,7 +369,7 @@ static int imx_controller_resume(struct device *dev) return 0; clk_disable: - clk_disable_unprepare(data->clk); + imx_disable_unprepare_clks(dev); return ret; } From 0330a1fbe51c995b5e75c3ea1440b2e4ab68ee8c Mon Sep 17 00:00:00 2001 From: Li Jun Date: Tue, 13 Oct 2015 18:23:31 +0800 Subject: [PATCH 105/201] usb: chipidea: debug: disable usb irq while role switch commit 251b3c8b57481bcecd3f753108e36e7389ce12ac upstream. Since the ci->role will be set after the host role start is complete, there will be nobody cared irq during start host if usb irq enabled. This error can be reproduced on i.mx6 sololite EVK board by: 1. disable otg id irq(IDIE) and disable all real otg properties of usbotg1 in dts. 2. boot up the board with ID cable and usb device connected. 3. echo gadget > /sys/kernel/debug/ci_hdrc.0/role 4. echo host > /sys/kernel/debug/ci_hdrc.0/role 5. irq 212: nobody cared. Signed-off-by: Li Jun Signed-off-by: Peter Chen Signed-off-by: Greg Kroah-Hartman --- drivers/usb/chipidea/debug.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/chipidea/debug.c b/drivers/usb/chipidea/debug.c index 080b7be3daf03..58c8485a0715a 100644 --- a/drivers/usb/chipidea/debug.c +++ b/drivers/usb/chipidea/debug.c @@ -322,8 +322,10 @@ static ssize_t ci_role_write(struct file *file, const char __user *ubuf, return -EINVAL; pm_runtime_get_sync(ci->dev); + disable_irq(ci->irq); ci_role_stop(ci); ret = ci_role_start(ci, role); + enable_irq(ci->irq); pm_runtime_put_sync(ci->dev); return ret ? ret : count; From 5a2fb41430f27c6f6c02246aea1c7d7bfad92cd3 Mon Sep 17 00:00:00 2001 From: Jurgen Kramer Date: Mon, 9 Nov 2015 12:13:55 +0100 Subject: [PATCH 106/201] ALSA: usb: Add native DSD support for Aune X1S commit 16771c7c704769c5f3d70c024630b6e5b3eafa67 upstream. This patch adds native DSD support for the Aune X1S 32BIT/384 DSD DAC Signed-off-by: Jurgen Kramer Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 00ebc0ca008e0..528fa6e48293f 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -1271,6 +1271,7 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip, case USB_ID(0x20b1, 0x000a): /* Gustard DAC-X20U */ case USB_ID(0x20b1, 0x2009): /* DIYINHK DSD DXD 384kHz USB to I2S/DSD */ case USB_ID(0x20b1, 0x2023): /* JLsounds I2SoverUSB */ + case USB_ID(0x20b1, 0x3023): /* Aune X1S 32BIT/384 DSD DAC */ if (fp->altsetting == 3) return SNDRV_PCM_FMTBIT_DSD_U32_BE; break; From 1207663d5b08f9e1de552ccdf19b014eeb284b28 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 23 Aug 2015 15:01:08 +0200 Subject: [PATCH 107/201] usb: ehci-orion: fix probe for !GENERIC_PHY commit db1319e166c5e872c4be54eac4e47454133708cf upstream. Commit d445913ce0ab7f ("usb: ehci-orion: add optional PHY support") added support for optional phys, but devm_phy_optional_get returns -ENOSYS if GENERIC_PHY is not enabled. This causes probe failures, even when there are no phys specified: [ 1.443365] orion-ehci f1058000.usb: init f1058000.usb fail, -38 [ 1.449403] orion-ehci: probe of f1058000.usb failed with error -38 Similar to dwc3, treat -ENOSYS as no phy. Fixes: d445913ce0ab7f ("usb: ehci-orion: add optional PHY support") Signed-off-by: Jonas Gorski Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/ehci-orion.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/ehci-orion.c b/drivers/usb/host/ehci-orion.c index bfcbb9aa8816b..ee8d5faa01947 100644 --- a/drivers/usb/host/ehci-orion.c +++ b/drivers/usb/host/ehci-orion.c @@ -224,7 +224,8 @@ static int ehci_orion_drv_probe(struct platform_device *pdev) priv->phy = devm_phy_optional_get(&pdev->dev, "usb"); if (IS_ERR(priv->phy)) { err = PTR_ERR(priv->phy); - goto err_phy_get; + if (err != -ENOSYS) + goto err_phy_get; } else { err = phy_init(priv->phy); if (err) From a0046cfd74f0d5a9556e20dc21393bf58e8bed7f Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 2 Nov 2015 10:27:00 +0100 Subject: [PATCH 108/201] usblp: do not set TASK_INTERRUPTIBLE before lock commit 19cd80a214821f4b558560ebd76bfb2c38b4f3d8 upstream. It is not permitted to set task state before lock. usblp_wwait sets the state to TASK_INTERRUPTIBLE and calls mutex_lock_interruptible. Upon return from that function, the state will be TASK_RUNNING again. This is clearly a bug and a warning is generated with LOCKDEP too: WARNING: CPU: 1 PID: 5109 at kernel/sched/core.c:7404 __might_sleep+0x7d/0x90() do not call blocking ops when !TASK_RUNNING; state=1 set at [] usblp_wwait+0xa0/0x310 [usblp] Modules linked in: ... CPU: 1 PID: 5109 Comm: captmon Tainted: G W 4.2.5-0.gef2823b-default #1 Hardware name: LENOVO 23252SG/23252SG, BIOS G2ET33WW (1.13 ) 07/24/2012 ffffffff81a4edce ffff880236ec7ba8 ffffffff81716651 0000000000000000 ffff880236ec7bf8 ffff880236ec7be8 ffffffff8106e146 0000000000000282 ffffffff81a50119 000000000000028b 0000000000000000 ffff8802dab7c508 Call Trace: ... [] warn_slowpath_fmt+0x46/0x50 [] __might_sleep+0x7d/0x90 [] mutex_lock_interruptible_nested+0x2f/0x4b0 [] usblp_wwait+0xcc/0x310 [usblp] [] usblp_write+0x72/0x350 [usblp] [] __vfs_write+0x28/0xf0 ... Commit 7f477358e2384c54b190cc3b6ce28277050a041b (usblp: Implement the ENOSPC convention) moved the set prior locking. So move it back after the lock. Signed-off-by: Jiri Slaby Fixes: 7f477358e2 ("usblp: Implement the ENOSPC convention") Acked-By: Pete Zaitcev Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/usblp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index 433bbc34a8a48..071964c7847f1 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -884,11 +884,11 @@ static int usblp_wwait(struct usblp *usblp, int nonblock) add_wait_queue(&usblp->wwait, &waita); for (;;) { - set_current_state(TASK_INTERRUPTIBLE); if (mutex_lock_interruptible(&usblp->mut)) { rc = -EINTR; break; } + set_current_state(TASK_INTERRUPTIBLE); rc = usblp_wtest(usblp, nonblock); mutex_unlock(&usblp->mut); if (rc <= 0) From dbfee9e41918f4c8adb5f27db959c81bb19567a4 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Tue, 27 Oct 2015 23:26:33 +0200 Subject: [PATCH 109/201] usb: phy: omap-otg: fix uninitialized pointer commit 2c2025b41aeff57963f9ae2dd909fea704c625ab upstream. otg_dev->extcon was referenced before otg_dev was initialized. Fix. Fixes: a2fd2423240f ("usb: phy: omap-otg: Replace deprecated API of extcon") Reviewed-by: Chanwoo Choi Signed-off-by: Aaro Koskinen Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/phy/phy-omap-otg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/phy/phy-omap-otg.c b/drivers/usb/phy/phy-omap-otg.c index 1270906ccb958..c4bf2de6d14ec 100644 --- a/drivers/usb/phy/phy-omap-otg.c +++ b/drivers/usb/phy/phy-omap-otg.c @@ -105,7 +105,6 @@ static int omap_otg_probe(struct platform_device *pdev) extcon = extcon_get_extcon_dev(config->extcon); if (!extcon) return -EPROBE_DEFER; - otg_dev->extcon = extcon; otg_dev = devm_kzalloc(&pdev->dev, sizeof(*otg_dev), GFP_KERNEL); if (!otg_dev) @@ -115,6 +114,7 @@ static int omap_otg_probe(struct platform_device *pdev) if (IS_ERR(otg_dev->base)) return PTR_ERR(otg_dev->base); + otg_dev->extcon = extcon; otg_dev->id_nb.notifier_call = omap_otg_id_notifier; otg_dev->vbus_nb.notifier_call = omap_otg_vbus_notifier; From 6763f60ff3f0909f6e11ece3eb9634791427acec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Petr=20=C5=A0tetiar?= Date: Tue, 3 Nov 2015 11:25:28 +0100 Subject: [PATCH 110/201] USB: qcserial: Add support for Quectel EC20 Mini PCIe module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 9d5b5ed796d7afd7e8d2ac4b4fb77c6a49463f4b upstream. It seems like this device has same vendor and product IDs as G2K devices, but it has different number of interfaces(4 vs 5) and also different interface layout which makes it currently unusable: usbcore: registered new interface driver qcserial usbserial: USB Serial support registered for Qualcomm USB modem usb 2-1.2: unknown number of interfaces: 5 lsusb output: Bus 002 Device 003: ID 05c6:9215 Qualcomm, Inc. Acer Gobi 2000 Wireless Device Descriptor: bLength 18 bDescriptorType 1 bcdUSB 2.00 bDeviceClass 0 (Defined at Interface level) bDeviceSubClass 0 bDeviceProtocol 0 bMaxPacketSize0 64 idVendor 0x05c6 Qualcomm, Inc. idProduct 0x9215 Acer Gobi 2000 Wireless Modem bcdDevice 2.32 iManufacturer 1 Quectel iProduct 2 Quectel LTE Module iSerial 0 bNumConfigurations 1 Configuration Descriptor: bLength 9 bDescriptorType 2 wTotalLength 209 bNumInterfaces 5 bConfigurationValue 1 iConfiguration 0 bmAttributes 0xa0 (Bus Powered) Remote Wakeup MaxPower 500mA Signed-off-by: Petr Štetiar [johan: rename define and add comment ] Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/qcserial.c | 40 +++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index f49d262e926ba..1d7a2a48552c8 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -22,6 +22,8 @@ #define DRIVER_AUTHOR "Qualcomm Inc" #define DRIVER_DESC "Qualcomm USB Serial driver" +#define QUECTEL_EC20_PID 0x9215 + /* standard device layouts supported by this driver */ enum qcserial_layouts { QCSERIAL_G2K = 0, /* Gobi 2000 */ @@ -169,6 +171,38 @@ static const struct usb_device_id id_table[] = { }; MODULE_DEVICE_TABLE(usb, id_table); +static int handle_quectel_ec20(struct device *dev, int ifnum) +{ + int altsetting = 0; + + /* + * Quectel EC20 Mini PCIe LTE module layout: + * 0: DM/DIAG (use libqcdm from ModemManager for communication) + * 1: NMEA + * 2: AT-capable modem port + * 3: Modem interface + * 4: NDIS + */ + switch (ifnum) { + case 0: + dev_dbg(dev, "Quectel EC20 DM/DIAG interface found\n"); + break; + case 1: + dev_dbg(dev, "Quectel EC20 NMEA GPS interface found\n"); + break; + case 2: + case 3: + dev_dbg(dev, "Quectel EC20 Modem port found\n"); + break; + case 4: + /* Don't claim the QMI/net interface */ + altsetting = -1; + break; + } + + return altsetting; +} + static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) { struct usb_host_interface *intf = serial->interface->cur_altsetting; @@ -237,6 +271,12 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) altsetting = -1; break; case QCSERIAL_G2K: + /* handle non-standard layouts */ + if (nintf == 5 && id->idProduct == QUECTEL_EC20_PID) { + altsetting = handle_quectel_ec20(dev, ifnum); + goto done; + } + /* * Gobi 2K+ USB layout: * 0: QMI/net From 7413ab071feeb656dcd4b01b8b1b4b8d51313300 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Mon, 16 Nov 2015 13:15:46 +0100 Subject: [PATCH 111/201] USB: qcserial: Fix support for HP lt4112 LTE/HSPA+ Gobi 4G Modem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 59536da34513c594af2a6fd35ba65ea45b6960a1 upstream. The DEVICE_HWI type was added under the faulty assumption that Huawei devices based on Qualcomm chipsets and firmware use the static USB interface numbering known from Gobi devices. But this model does not apply to Huawei devices like the HP branded lt4112 (Huawei me906e). Huawei firmwares will dynamically assign interface numbers. Functions are renumbered when the firmware is reconfigured. Fix by changing the DEVICE_HWI type to use a simplified version of Huawei's subclass + protocol scheme: Blacklisting known network interface combinations and assuming the rest are serial. Reported-and-tested-by: Muri Nicanor Tested-by: Martin Hauke Fixes: e7181d005e84 ("USB: qcserial: Add support for HP lt4112 LTE/HSPA+ Gobi 4G Modem") Signed-off-by: Bjørn Mork Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/qcserial.c | 54 ++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/drivers/usb/serial/qcserial.c b/drivers/usb/serial/qcserial.c index 1d7a2a48552c8..514fa91cf74e4 100644 --- a/drivers/usb/serial/qcserial.c +++ b/drivers/usb/serial/qcserial.c @@ -212,6 +212,10 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) __u8 ifnum; int altsetting = -1; + /* we only support vendor specific functions */ + if (intf->desc.bInterfaceClass != USB_CLASS_VENDOR_SPEC) + goto done; + nintf = serial->dev->actconfig->desc.bNumInterfaces; dev_dbg(dev, "Num Interfaces = %d\n", nintf); ifnum = intf->desc.bInterfaceNumber; @@ -337,29 +341,39 @@ static int qcprobe(struct usb_serial *serial, const struct usb_device_id *id) break; case QCSERIAL_HWI: /* - * Huawei layout: - * 0: AT-capable modem port - * 1: DM/DIAG - * 2: AT-capable modem port - * 3: CCID-compatible PCSC interface - * 4: QMI/net - * 5: NMEA + * Huawei devices map functions by subclass + protocol + * instead of interface numbers. The protocol identify + * a specific function, while the subclass indicate a + * specific firmware source + * + * This is a blacklist of functions known to be + * non-serial. The rest are assumed to be serial and + * will be handled by this driver */ - switch (ifnum) { - case 0: - case 2: - dev_dbg(dev, "Modem port found\n"); - break; - case 1: - dev_dbg(dev, "DM/DIAG interface found\n"); - break; - case 5: - dev_dbg(dev, "NMEA GPS interface found\n"); - break; - default: - /* don't claim any unsupported interface */ + switch (intf->desc.bInterfaceProtocol) { + /* QMI combined (qmi_wwan) */ + case 0x07: + case 0x37: + case 0x67: + /* QMI data (qmi_wwan) */ + case 0x08: + case 0x38: + case 0x68: + /* QMI control (qmi_wwan) */ + case 0x09: + case 0x39: + case 0x69: + /* NCM like (huawei_cdc_ncm) */ + case 0x16: + case 0x46: + case 0x76: altsetting = -1; break; + default: + dev_dbg(dev, "Huawei type serial port found (%02x/%02x/%02x)\n", + intf->desc.bInterfaceClass, + intf->desc.bInterfaceSubClass, + intf->desc.bInterfaceProtocol); } break; default: From b56ffb4c0719b374fbbcb99695fe7b0eea0bd96c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Fri, 23 Oct 2015 09:53:50 +0200 Subject: [PATCH 112/201] usb: musb: core: fix order of arguments to ulpi write callback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 705e63d2b29c8bbf091119084544d353bda70393 upstream. There is a bit of a mess in the order of arguments to the ulpi write callback. There is int ulpi_write(struct ulpi *ulpi, u8 addr, u8 val) in drivers/usb/common/ulpi.c; struct usb_phy_io_ops { ... int (*write)(struct usb_phy *x, u32 val, u32 reg); } in include/linux/usb/phy.h. The callback registered by the musb driver has to comply to the latter, but up to now had "offset" first which effectively made the function broken for correct users. So flip the order and while at it also switch to the parameter names of struct usb_phy_io_ops's write. Fixes: ffb865b1e460 ("usb: musb: add ulpi access operations") Signed-off-by: Uwe Kleine-König Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/usb/musb/musb_core.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 4a518ff123106..2c624a10748d9 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -132,7 +132,7 @@ static inline struct musb *dev_to_musb(struct device *dev) /*-------------------------------------------------------------------------*/ #ifndef CONFIG_BLACKFIN -static int musb_ulpi_read(struct usb_phy *phy, u32 offset) +static int musb_ulpi_read(struct usb_phy *phy, u32 reg) { void __iomem *addr = phy->io_priv; int i = 0; @@ -151,7 +151,7 @@ static int musb_ulpi_read(struct usb_phy *phy, u32 offset) * ULPICarKitControlDisableUTMI after clearing POWER_SUSPENDM. */ - musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)offset); + musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)reg); musb_writeb(addr, MUSB_ULPI_REG_CONTROL, MUSB_ULPI_REG_REQ | MUSB_ULPI_RDN_WR); @@ -176,7 +176,7 @@ static int musb_ulpi_read(struct usb_phy *phy, u32 offset) return ret; } -static int musb_ulpi_write(struct usb_phy *phy, u32 offset, u32 data) +static int musb_ulpi_write(struct usb_phy *phy, u32 val, u32 reg) { void __iomem *addr = phy->io_priv; int i = 0; @@ -191,8 +191,8 @@ static int musb_ulpi_write(struct usb_phy *phy, u32 offset, u32 data) power &= ~MUSB_POWER_SUSPENDM; musb_writeb(addr, MUSB_POWER, power); - musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)offset); - musb_writeb(addr, MUSB_ULPI_REG_DATA, (u8)data); + musb_writeb(addr, MUSB_ULPI_REG_ADDR, (u8)reg); + musb_writeb(addr, MUSB_ULPI_REG_DATA, (u8)val); musb_writeb(addr, MUSB_ULPI_REG_CONTROL, MUSB_ULPI_REG_REQ); while (!(musb_readb(addr, MUSB_ULPI_REG_CONTROL) From b56df8407426969354832e5cdbe2001d14e61a13 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sat, 14 Nov 2015 16:49:30 +0000 Subject: [PATCH 113/201] USB: ti_usb_3410_5052: Add Honeywell HGI80 ID commit 1bcb49e663f88bccee35b8688e6a3da2bea31fd4 upstream. The Honeywell HGI80 is a wireless interface to the evohome connected thermostat. It uses a TI 3410 USB-serial port. Signed-off-by: David Woodhouse Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/ti_usb_3410_5052.c | 2 ++ drivers/usb/serial/ti_usb_3410_5052.h | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c index e9da41d9fe7fc..2694df2f4559b 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.c +++ b/drivers/usb/serial/ti_usb_3410_5052.c @@ -159,6 +159,7 @@ static const struct usb_device_id ti_id_table_3410[] = { { USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STEREO_PLUG_ID) }, { USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STRIP_PORT_ID) }, { USB_DEVICE(TI_VENDOR_ID, FRI2_PRODUCT_ID) }, + { USB_DEVICE(HONEYWELL_VENDOR_ID, HONEYWELL_HGI80_PRODUCT_ID) }, { } /* terminator */ }; @@ -191,6 +192,7 @@ static const struct usb_device_id ti_id_table_combined[] = { { USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_PRODUCT_ID) }, { USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STRIP_PORT_ID) }, { USB_DEVICE(TI_VENDOR_ID, FRI2_PRODUCT_ID) }, + { USB_DEVICE(HONEYWELL_VENDOR_ID, HONEYWELL_HGI80_PRODUCT_ID) }, { } /* terminator */ }; diff --git a/drivers/usb/serial/ti_usb_3410_5052.h b/drivers/usb/serial/ti_usb_3410_5052.h index 4a2423e84d558..98f35c656c02d 100644 --- a/drivers/usb/serial/ti_usb_3410_5052.h +++ b/drivers/usb/serial/ti_usb_3410_5052.h @@ -56,6 +56,10 @@ #define ABBOTT_PRODUCT_ID ABBOTT_STEREO_PLUG_ID #define ABBOTT_STRIP_PORT_ID 0x3420 +/* Honeywell vendor and product IDs */ +#define HONEYWELL_VENDOR_ID 0x10ac +#define HONEYWELL_HGI80_PRODUCT_ID 0x0102 /* Honeywell HGI80 */ + /* Commands */ #define TI_GET_VERSION 0x01 #define TI_GET_PORT_STATUS 0x02 From 0399c7ca984120bbb3f215617e94c82003319e6b Mon Sep 17 00:00:00 2001 From: Aleksander Morgado Date: Wed, 11 Nov 2015 19:51:40 +0100 Subject: [PATCH 114/201] USB: serial: option: add support for Novatel MiFi USB620L MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit e07af133c3e2716db25e3e1e1d9f10c2088e9c1a upstream. Also known as Verizon U620L. The device is modeswitched from 1410:9020 to 1410:9022 by selecting the 4th USB configuration: $ sudo usb_modeswitch –v 0x1410 –p 0x9020 –u 4 This configuration provides a ECM interface as well as TTYs ('Enterprise Mode' according to the U620 Linux integration guide). Signed-off-by: Aleksander Morgado Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 6956c4f622161..82ae6afdd4d66 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -162,6 +162,7 @@ static void option_instat_callback(struct urb *urb); #define NOVATELWIRELESS_PRODUCT_HSPA_EMBEDDED_HIGHSPEED 0x9001 #define NOVATELWIRELESS_PRODUCT_E362 0x9010 #define NOVATELWIRELESS_PRODUCT_E371 0x9011 +#define NOVATELWIRELESS_PRODUCT_U620L 0x9022 #define NOVATELWIRELESS_PRODUCT_G2 0xA010 #define NOVATELWIRELESS_PRODUCT_MC551 0xB001 @@ -1060,6 +1061,7 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_MC551, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_E362, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_E371, 0xff, 0xff, 0xff) }, + { USB_DEVICE_AND_INTERFACE_INFO(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_U620L, 0xff, 0x00, 0x00) }, { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01) }, { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_H01A) }, From c88860cd6ec20ca7d518273f1bf01f1c0f3b2d80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 18 Nov 2015 21:12:33 +0100 Subject: [PATCH 115/201] USB: option: add XS Stick W100-2 from 4G Systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 638148e20c7f8f6e95017fdc13bce8549a6925e0 upstream. Thomas reports " 4gsystems sells two total different LTE-surfsticks under the same name. .. The newer version of XS Stick W100 is from "omega" .. Under windows the driver switches to the same ID, and uses MI03\6 for network and MI01\6 for modem. .. echo "1c9e 9b01" > /sys/bus/usb/drivers/qmi_wwan/new_id echo "1c9e 9b01" > /sys/bus/usb-serial/drivers/option1/new_id T: Bus=01 Lev=01 Prnt=01 Port=03 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1c9e ProdID=9b01 Rev=02.32 S: Manufacturer=USB Modem S: Product=USB Modem S: SerialNumber= C: #Ifs= 5 Cfg#= 1 Atr=80 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#= 4 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage Now all important things are there: wwp0s29f7u2i3 (net), ttyUSB2 (at), cdc-wdm0 (qmi), ttyUSB1 (at) There is also ttyUSB0, but it is not usable, at least not for at. The device works well with qmi and ModemManager-NetworkManager. " Reported-by: Thomas Schäfer Signed-off-by: Bjørn Mork Signed-off-by: Greg Kroah-Hartman --- drivers/usb/serial/option.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 82ae6afdd4d66..e945b5195258f 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -358,6 +358,7 @@ static void option_instat_callback(struct urb *urb); /* This is the 4G XS Stick W14 a.k.a. Mobilcom Debitel Surf-Stick * * It seems to contain a Qualcomm QSC6240/6290 chipset */ #define FOUR_G_SYSTEMS_PRODUCT_W14 0x9603 +#define FOUR_G_SYSTEMS_PRODUCT_W100 0x9b01 /* iBall 3.5G connect wireless modem */ #define IBALL_3_5G_CONNECT 0x9605 @@ -523,6 +524,11 @@ static const struct option_blacklist_info four_g_w14_blacklist = { .sendsetup = BIT(0) | BIT(1), }; +static const struct option_blacklist_info four_g_w100_blacklist = { + .sendsetup = BIT(1) | BIT(2), + .reserved = BIT(3), +}; + static const struct option_blacklist_info alcatel_x200_blacklist = { .sendsetup = BIT(0) | BIT(1), .reserved = BIT(4), @@ -1655,6 +1661,9 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W14), .driver_info = (kernel_ulong_t)&four_g_w14_blacklist }, + { USB_DEVICE(LONGCHEER_VENDOR_ID, FOUR_G_SYSTEMS_PRODUCT_W100), + .driver_info = (kernel_ulong_t)&four_g_w100_blacklist + }, { USB_DEVICE_INTERFACE_CLASS(LONGCHEER_VENDOR_ID, SPEEDUP_PRODUCT_SU9800, 0xff) }, { USB_DEVICE(LONGCHEER_VENDOR_ID, ZOOM_PRODUCT_4597) }, { USB_DEVICE(LONGCHEER_VENDOR_ID, IBALL_3_5G_CONNECT) }, From 72655363eb6e000aef420780655f11f1a4c1c2d9 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 18 Nov 2015 10:48:21 +0200 Subject: [PATCH 116/201] usb: xhci: fix checking ep busy for CFC commit 42df7215facf27be8d53e657dd4a12d4ebad0a44 upstream. Function ep_ring_is_processing() checks the dequeue pointer in endpoint context to know whether an endpoint is busy with processing TRBs. This is not correct since dequeue pointer field in an endpoint context is only valid when the endpoint is in Halted or Stopped states. This buggy code causes audio noise when playing sound with USB headset connected to host controllers which support CFC (one of xhci 1.1 features). This patch should exist in stable kernel since v4.3. Reported-and-tested-by: YD Tseng Signed-off-by: Lu Baolu Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 97ffe39972735..fe9e2d3a223c1 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -3914,28 +3914,6 @@ static int xhci_queue_isoc_tx(struct xhci_hcd *xhci, gfp_t mem_flags, return ret; } -static int ep_ring_is_processing(struct xhci_hcd *xhci, - int slot_id, unsigned int ep_index) -{ - struct xhci_virt_device *xdev; - struct xhci_ring *ep_ring; - struct xhci_ep_ctx *ep_ctx; - struct xhci_virt_ep *xep; - dma_addr_t hw_deq; - - xdev = xhci->devs[slot_id]; - xep = &xhci->devs[slot_id]->eps[ep_index]; - ep_ring = xep->ring; - ep_ctx = xhci_get_ep_ctx(xhci, xdev->out_ctx, ep_index); - - if ((le32_to_cpu(ep_ctx->ep_info) & EP_STATE_MASK) != EP_STATE_RUNNING) - return 0; - - hw_deq = le64_to_cpu(ep_ctx->deq) & ~EP_CTX_CYCLE_MASK; - return (hw_deq != - xhci_trb_virt_to_dma(ep_ring->enq_seg, ep_ring->enqueue)); -} - /* * Check transfer ring to guarantee there is enough room for the urb. * Update ISO URB start_frame and interval. @@ -4001,10 +3979,12 @@ int xhci_queue_isoc_tx_prepare(struct xhci_hcd *xhci, gfp_t mem_flags, } /* Calculate the start frame and put it in urb->start_frame. */ - if (HCC_CFC(xhci->hcc_params) && - ep_ring_is_processing(xhci, slot_id, ep_index)) { - urb->start_frame = xep->next_frame_id; - goto skip_start_over; + if (HCC_CFC(xhci->hcc_params) && !list_empty(&ep_ring->td_list)) { + if ((le32_to_cpu(ep_ctx->ep_info) & EP_STATE_MASK) == + EP_STATE_RUNNING) { + urb->start_frame = xep->next_frame_id; + goto skip_start_over; + } } start_frame = readl(&xhci->run_regs->microframe_index); From da3e8b3f94a8f2c511dba4192ea994b6abcdaa59 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Sun, 15 Nov 2015 22:37:44 +0100 Subject: [PATCH 117/201] ALSA: usb-audio: add packet size quirk for the Medeli DD305 commit 98d362becb6621bebdda7ed0eac7ad7ec6c37898 upstream. Signed-off-by: Clemens Ladisch Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/midi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/usb/midi.c b/sound/usb/midi.c index 417ebb11cf489..4bed82c70256b 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -1341,6 +1341,7 @@ static int snd_usbmidi_out_endpoint_create(struct snd_usb_midi *umidi, * Various chips declare a packet size larger than 4 bytes, but * do not actually work with larger packets: */ + case USB_ID(0x0a67, 0x5011): /* Medeli DD305 */ case USB_ID(0x0a92, 0x1020): /* ESI M4U */ case USB_ID(0x1430, 0x474b): /* RedOctane GH MIDI INTERFACE */ case USB_ID(0x15ca, 0x0101): /* Textech USB Midi Cable */ From a90b6640f8dfea2ab42c9cce3fc1d45bb169ca65 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Sun, 15 Nov 2015 22:38:29 +0100 Subject: [PATCH 118/201] ALSA: usb-audio: prevent CH345 multiport output SysEx corruption commit 1ca8b201309d842642f221db7f02f71c0af5be2d upstream. The CH345 USB MIDI chip has two output ports. However, they are multiplexed through one pin, and the number of ports cannot be reduced even for hardware that implements only one connector, so for those devices, data sent to either port ends up on the same hardware output. This becomes a problem when both ports are used at the same time, as longer MIDI commands (such as SysEx messages) are likely to be interrupted by messages from the other port, and thus to get lost. It would not be possible for the driver to detect how many ports the device actually has, except that in practice, _all_ devices built with the CH345 have only one port. So we can just ignore the device's descriptors, and hardcode one output port. Signed-off-by: Clemens Ladisch Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/midi.c | 3 +++ sound/usb/quirks-table.h | 11 +++++++++++ sound/usb/quirks.c | 1 + sound/usb/usbaudio.h | 1 + 4 files changed, 16 insertions(+) diff --git a/sound/usb/midi.c b/sound/usb/midi.c index 4bed82c70256b..5010aadb2534f 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -2374,6 +2374,9 @@ int snd_usbmidi_create(struct snd_card *card, if (err < 0) break; + err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints); + break; + case QUIRK_MIDI_CH345: err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints); break; default: diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index e4756651a52c8..ecc2a4ea014d2 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -2820,6 +2820,17 @@ YAMAHA_DEVICE(0x7010, "UB99"), .idProduct = 0x1020, }, +/* QinHeng devices */ +{ + USB_DEVICE(0x1a86, 0x752d), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .vendor_name = "QinHeng", + .product_name = "CH345", + .ifnum = 1, + .type = QUIRK_MIDI_CH345 + } +}, + /* KeithMcMillen Stringport */ { USB_DEVICE(0x1f38, 0x0001), diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index 528fa6e48293f..eef9b8e4b9498 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -535,6 +535,7 @@ int snd_usb_create_quirk(struct snd_usb_audio *chip, [QUIRK_MIDI_CME] = create_any_midi_quirk, [QUIRK_MIDI_AKAI] = create_any_midi_quirk, [QUIRK_MIDI_FTDI] = create_any_midi_quirk, + [QUIRK_MIDI_CH345] = create_any_midi_quirk, [QUIRK_AUDIO_STANDARD_INTERFACE] = create_standard_audio_quirk, [QUIRK_AUDIO_FIXED_ENDPOINT] = create_fixed_stream_quirk, [QUIRK_AUDIO_EDIROL_UAXX] = create_uaxx_quirk, diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h index 33a176437e2e4..4b4327b45606c 100644 --- a/sound/usb/usbaudio.h +++ b/sound/usb/usbaudio.h @@ -94,6 +94,7 @@ enum quirk_type { QUIRK_MIDI_AKAI, QUIRK_MIDI_US122L, QUIRK_MIDI_FTDI, + QUIRK_MIDI_CH345, QUIRK_AUDIO_STANDARD_INTERFACE, QUIRK_AUDIO_FIXED_ENDPOINT, QUIRK_AUDIO_EDIROL_UAXX, From 7327b7fff4e17cfb785b4bc0d7754dd908d6930c Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Sun, 15 Nov 2015 22:39:08 +0100 Subject: [PATCH 119/201] ALSA: usb-audio: work around CH345 input SysEx corruption commit a91e627e3f0ed820b11d86cdc04df38f65f33a70 upstream. One of the many faults of the QinHeng CH345 USB MIDI interface chip is that it does not handle received SysEx messages correctly -- every second event packet has a wrong code index number, which is the one from the last seen message, instead of 4. For example, the two messages "FE F0 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E F7" result in the following event packets: correct: CH345: 0F FE 00 00 0F FE 00 00 04 F0 01 02 04 F0 01 02 04 03 04 05 0F 03 04 05 04 06 07 08 04 06 07 08 04 09 0A 0B 0F 09 0A 0B 04 0C 0D 0E 04 0C 0D 0E 05 F7 00 00 05 F7 00 00 A class-compliant driver must interpret an event packet with CIN 15 as having a single data byte, so the other two bytes would be ignored. The message received by the host would then be missing two bytes out of six; in this example, "F0 01 02 03 06 07 08 09 0C 0D 0E F7". These corrupted SysEx event packages contain only data bytes, while the CH345 uses event packets with a correct CIN value only for messages with a status byte, so it is possible to distinguish between these two cases by checking for the presence of this status byte. (Other bugs in the CH345's input handling, such as the corruption resulting from running status, cannot be worked around.) Signed-off-by: Clemens Ladisch Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/usb/midi.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/sound/usb/midi.c b/sound/usb/midi.c index 5010aadb2534f..bec63e0d26056 100644 --- a/sound/usb/midi.c +++ b/sound/usb/midi.c @@ -174,6 +174,8 @@ struct snd_usb_midi_in_endpoint { u8 running_status_length; } ports[0x10]; u8 seen_f5; + bool in_sysex; + u8 last_cin; u8 error_resubmit; int current_port; }; @@ -467,6 +469,39 @@ static void snd_usbmidi_maudio_broken_running_status_input( } } +/* + * QinHeng CH345 is buggy: every second packet inside a SysEx has not CIN 4 + * but the previously seen CIN, but still with three data bytes. + */ +static void ch345_broken_sysex_input(struct snd_usb_midi_in_endpoint *ep, + uint8_t *buffer, int buffer_length) +{ + unsigned int i, cin, length; + + for (i = 0; i + 3 < buffer_length; i += 4) { + if (buffer[i] == 0 && i > 0) + break; + cin = buffer[i] & 0x0f; + if (ep->in_sysex && + cin == ep->last_cin && + (buffer[i + 1 + (cin == 0x6)] & 0x80) == 0) + cin = 0x4; +#if 0 + if (buffer[i + 1] == 0x90) { + /* + * Either a corrupted running status or a real note-on + * message; impossible to detect reliably. + */ + } +#endif + length = snd_usbmidi_cin_length[cin]; + snd_usbmidi_input_data(ep, 0, &buffer[i + 1], length); + ep->in_sysex = cin == 0x4; + if (!ep->in_sysex) + ep->last_cin = cin; + } +} + /* * CME protocol: like the standard protocol, but SysEx commands are sent as a * single USB packet preceded by a 0x0F byte. @@ -660,6 +695,12 @@ static struct usb_protocol_ops snd_usbmidi_cme_ops = { .output_packet = snd_usbmidi_output_standard_packet, }; +static struct usb_protocol_ops snd_usbmidi_ch345_broken_sysex_ops = { + .input = ch345_broken_sysex_input, + .output = snd_usbmidi_standard_output, + .output_packet = snd_usbmidi_output_standard_packet, +}; + /* * AKAI MPD16 protocol: * @@ -2377,6 +2418,7 @@ int snd_usbmidi_create(struct snd_card *card, err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints); break; case QUIRK_MIDI_CH345: + umidi->usb_protocol_ops = &snd_usbmidi_ch345_broken_sysex_ops; err = snd_usbmidi_detect_per_port_endpoints(umidi, endpoints); break; default: From 9f3de4e4d2150fe75a465fbc15b5c733ba7db5f2 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 6 Oct 2015 15:12:06 +0100 Subject: [PATCH 120/201] ttyFDC: Fix build problems due to use of module_{init,exit} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 3e8137a185240fa6da0ff91cd9c604716371903b upstream. Commit 0fd972a7d91d (module: relocate module_init from init.h to module.h) broke the build of ttyFDC driver due to that driver's (mis)use of module_mips_cdmm_driver() without first including module.h, for example: In file included from ./arch/mips/include/asm/cdmm.h +11 :0, from drivers/tty/mips_ejtag_fdc.c +34 : include/linux/device.h +1295 :1: warning: data definition has no type or storage class ./arch/mips/include/asm/cdmm.h +84 :2: note: in expansion of macro ‘module_driver’ drivers/tty/mips_ejtag_fdc.c +1157 :1: note: in expansion of macro ‘module_mips_cdmm_driver’ include/linux/device.h +1295 :1: error: type defaults to ‘int’ in declaration of ‘module_init’ [-Werror=implicit-int] ./arch/mips/include/asm/cdmm.h +84 :2: note: in expansion of macro ‘module_driver’ drivers/tty/mips_ejtag_fdc.c +1157 :1: note: in expansion of macro ‘module_mips_cdmm_driver’ drivers/tty/mips_ejtag_fdc.c +1157 :1: warning: parameter names (without types) in function declaration Instead of just adding the module.h include, switch to using the new builtin_mips_cdmm_driver() helper macro and drop the remove callback, since it isn't needed. If module support is added later, the code can always be resurrected. Signed-off-by: James Hogan Cc: Greg Kroah-Hartman Cc: Jiri Slaby Cc: Paul Gortmaker Cc: linux-mips@linux-mips.org Signed-off-by: Greg Kroah-Hartman --- drivers/tty/mips_ejtag_fdc.c | 35 +---------------------------------- 1 file changed, 1 insertion(+), 34 deletions(-) diff --git a/drivers/tty/mips_ejtag_fdc.c b/drivers/tty/mips_ejtag_fdc.c index a8c8cfd52a23b..57ff5d3157a65 100644 --- a/drivers/tty/mips_ejtag_fdc.c +++ b/drivers/tty/mips_ejtag_fdc.c @@ -1048,38 +1048,6 @@ static int mips_ejtag_fdc_tty_probe(struct mips_cdmm_device *dev) return ret; } -static int mips_ejtag_fdc_tty_remove(struct mips_cdmm_device *dev) -{ - struct mips_ejtag_fdc_tty *priv = mips_cdmm_get_drvdata(dev); - struct mips_ejtag_fdc_tty_port *dport; - int nport; - unsigned int cfg; - - if (priv->irq >= 0) { - raw_spin_lock_irq(&priv->lock); - cfg = mips_ejtag_fdc_read(priv, REG_FDCFG); - /* Disable interrupts */ - cfg &= ~(REG_FDCFG_TXINTTHRES | REG_FDCFG_RXINTTHRES); - cfg |= REG_FDCFG_TXINTTHRES_DISABLED; - cfg |= REG_FDCFG_RXINTTHRES_DISABLED; - mips_ejtag_fdc_write(priv, REG_FDCFG, cfg); - raw_spin_unlock_irq(&priv->lock); - } else { - priv->removing = true; - del_timer_sync(&priv->poll_timer); - } - kthread_stop(priv->thread); - if (dev->cpu == 0) - mips_ejtag_fdc_con.tty_drv = NULL; - tty_unregister_driver(priv->driver); - for (nport = 0; nport < NUM_TTY_CHANNELS; nport++) { - dport = &priv->ports[nport]; - tty_port_destroy(&dport->port); - } - put_tty_driver(priv->driver); - return 0; -} - static int mips_ejtag_fdc_tty_cpu_down(struct mips_cdmm_device *dev) { struct mips_ejtag_fdc_tty *priv = mips_cdmm_get_drvdata(dev); @@ -1152,12 +1120,11 @@ static struct mips_cdmm_driver mips_ejtag_fdc_tty_driver = { .name = "mips_ejtag_fdc", }, .probe = mips_ejtag_fdc_tty_probe, - .remove = mips_ejtag_fdc_tty_remove, .cpu_down = mips_ejtag_fdc_tty_cpu_down, .cpu_up = mips_ejtag_fdc_tty_cpu_up, .id_table = mips_ejtag_fdc_tty_ids, }; -module_mips_cdmm_driver(mips_ejtag_fdc_tty_driver); +builtin_mips_cdmm_driver(mips_ejtag_fdc_tty_driver); static int __init mips_ejtag_fdc_init_console(void) { From 45de0e37d23272442238afc0be3450494e4c853a Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 8 Nov 2015 08:52:31 -0500 Subject: [PATCH 121/201] tty: audit: Fix audit source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 6b2a3d628aa752f0ab825fc6d4d07b09e274d1c1 upstream. The data to audit/record is in the 'from' buffer (ie., the input read buffer). Fixes: 72586c6061ab ("n_tty: Fix auditing support for cannonical mode") Cc: Miloslav Trmač Signed-off-by: Peter Hurley Acked-by: Laura Abbott Signed-off-by: Greg Kroah-Hartman Signed-off-by: Greg Kroah-Hartman --- drivers/tty/n_tty.c | 2 +- drivers/tty/tty_audit.c | 2 +- include/linux/tty.h | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index b09023b071696..a0285da0244cf 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -169,7 +169,7 @@ static inline int tty_copy_to_user(struct tty_struct *tty, { struct n_tty_data *ldata = tty->disc_data; - tty_audit_add_data(tty, to, n, ldata->icanon); + tty_audit_add_data(tty, from, n, ldata->icanon); return copy_to_user(to, from, n); } diff --git a/drivers/tty/tty_audit.c b/drivers/tty/tty_audit.c index 90ca082935f63..3d245cd3d8e62 100644 --- a/drivers/tty/tty_audit.c +++ b/drivers/tty/tty_audit.c @@ -265,7 +265,7 @@ static struct tty_audit_buf *tty_audit_buf_get(struct tty_struct *tty, * * Audit @data of @size from @tty, if necessary. */ -void tty_audit_add_data(struct tty_struct *tty, unsigned char *data, +void tty_audit_add_data(struct tty_struct *tty, const void *data, size_t size, unsigned icanon) { struct tty_audit_buf *buf; diff --git a/include/linux/tty.h b/include/linux/tty.h index d072ded416786..9bddda029b459 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -605,7 +605,7 @@ extern void n_tty_inherit_ops(struct tty_ldisc_ops *ops); /* tty_audit.c */ #ifdef CONFIG_AUDIT -extern void tty_audit_add_data(struct tty_struct *tty, unsigned char *data, +extern void tty_audit_add_data(struct tty_struct *tty, const void *data, size_t size, unsigned icanon); extern void tty_audit_exit(void); extern void tty_audit_fork(struct signal_struct *sig); @@ -613,8 +613,8 @@ extern void tty_audit_tiocsti(struct tty_struct *tty, char ch); extern void tty_audit_push(struct tty_struct *tty); extern int tty_audit_push_current(void); #else -static inline void tty_audit_add_data(struct tty_struct *tty, - unsigned char *data, size_t size, unsigned icanon) +static inline void tty_audit_add_data(struct tty_struct *tty, const void *data, + size_t size, unsigned icanon) { } static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch) From 2be56f43e40a65249132d153b14b8c33cb635d4d Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Wed, 11 Nov 2015 08:03:54 -0500 Subject: [PATCH 122/201] tty: Fix tty_send_xchar() lock order inversion commit ee0c1a65cf95230d5eb3d9de94fd2ead9a428c67 upstream. The correct lock order is atomic_write_lock => termios_rwsem, as established by tty_write() => n_tty_write(). Fixes: c274f6ef1c666 ("tty: Hold termios_rwsem for tcflow(TCIxxx)") Reported-and-Tested-by: Dmitry Vyukov Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_io.c | 4 ++++ drivers/tty/tty_ioctl.c | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 2eefaa6e3e3a4..f435977de740d 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -1282,18 +1282,22 @@ int tty_send_xchar(struct tty_struct *tty, char ch) int was_stopped = tty->stopped; if (tty->ops->send_xchar) { + down_read(&tty->termios_rwsem); tty->ops->send_xchar(tty, ch); + up_read(&tty->termios_rwsem); return 0; } if (tty_write_lock(tty, 0) < 0) return -ERESTARTSYS; + down_read(&tty->termios_rwsem); if (was_stopped) start_tty(tty); tty->ops->write(tty, &ch, 1); if (was_stopped) stop_tty(tty); + up_read(&tty->termios_rwsem); tty_write_unlock(tty); return 0; } diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c index 9c5aebfe7053c..1445dd39aa622 100644 --- a/drivers/tty/tty_ioctl.c +++ b/drivers/tty/tty_ioctl.c @@ -1147,16 +1147,12 @@ int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file, spin_unlock_irq(&tty->flow_lock); break; case TCIOFF: - down_read(&tty->termios_rwsem); if (STOP_CHAR(tty) != __DISABLED_CHAR) retval = tty_send_xchar(tty, STOP_CHAR(tty)); - up_read(&tty->termios_rwsem); break; case TCION: - down_read(&tty->termios_rwsem); if (START_CHAR(tty) != __DISABLED_CHAR) retval = tty_send_xchar(tty, START_CHAR(tty)); - up_read(&tty->termios_rwsem); break; default: return -EINVAL; From 7588e627c8891e53e86c600d312cef20acef81e5 Mon Sep 17 00:00:00 2001 From: Rajmohan Mani Date: Wed, 18 Nov 2015 10:48:20 +0200 Subject: [PATCH 123/201] xhci: Workaround to get Intel xHCI reset working more reliably commit a5964396190d0c40dd549c23848c282fffa5d1f2 upstream. Existing Intel xHCI controllers require a delay of 1 mS, after setting the CMD_RESET bit in command register, before accessing any HC registers. This allows the HC to complete the reset operation and be ready for HC register access. Without this delay, the subsequent HC register access, may result in a system hang, very rarely. Verified CherryView / Braswell platforms go through over 5000 warm reboot cycles (which was not possible without this patch), without any xHCI reset hang. Signed-off-by: Rajmohan Mani Tested-by: Joe Lawrence Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 9957bd96d4bc3..385f9f5d6715f 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -175,6 +175,16 @@ int xhci_reset(struct xhci_hcd *xhci) command |= CMD_RESET; writel(command, &xhci->op_regs->command); + /* Existing Intel xHCI controllers require a delay of 1 mS, + * after setting the CMD_RESET bit, and before accessing any + * HC registers. This allows the HC to complete the + * reset operation and be ready for HC register access. + * Without this delay, the subsequent HC register access, + * may result in a system hang very rarely. + */ + if (xhci->quirks & XHCI_INTEL_HOST) + udelay(1000); + ret = xhci_handshake(&xhci->op_regs->command, CMD_RESET, 0, 10 * 1000 * 1000); if (ret) From b2cb7afbb1693195b3171e85b10c0aa42ff37990 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Sun, 27 Sep 2015 16:45:01 -0400 Subject: [PATCH 124/201] staging/lustre: use jiffies for lp_last_query times commit 9f088dba3cc267ea11ec0da318cd0175575b5f9b upstream. The recently introduced lnet_peer_set_alive() function uses get_seconds() to read the current time into a shared variable, but all other uses of that variable compare it to jiffies values. This changes the current use to jiffies as well for consistency. Signed-off-by: Arnd Bergmann Fixes: af3fa7c71bf ("staging/lustre/lnet: peer aliveness status and NI status") Cc: Liang Zhen Cc: James Simmons Cc: Isaac Huang Signed-off-by: Oleg Drokin Signed-off-by: Greg Kroah-Hartman --- drivers/staging/lustre/include/linux/lnet/lib-lnet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h index a9c9a077c77d3..bc3d907fd20fd 100644 --- a/drivers/staging/lustre/include/linux/lnet/lib-lnet.h +++ b/drivers/staging/lustre/include/linux/lnet/lib-lnet.h @@ -680,7 +680,7 @@ void lnet_debug_peer(lnet_nid_t nid); static inline void lnet_peer_set_alive(lnet_peer_t *lp) { - lp->lp_last_alive = lp->lp_last_query = get_seconds(); + lp->lp_last_alive = lp->lp_last_query = jiffies; if (!lp->lp_alive) lnet_notify_locked(lp, 0, 1, lp->lp_last_alive); } From a8adab1a510bbb18be8cf2e35276dc933db814cc Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Fri, 20 Nov 2015 11:25:04 -0500 Subject: [PATCH 125/201] xen/events: Always allocate legacy interrupts on PV guests commit b4ff8389ed14b849354b59ce9b360bdefcdbf99c upstream. After commit 8c058b0b9c34 ("x86/irq: Probe for PIC presence before allocating descs for legacy IRQs") early_irq_init() will no longer preallocate descriptors for legacy interrupts if PIC does not exist, which is the case for Xen PV guests. Therefore we may need to allocate those descriptors ourselves. Signed-off-by: Boris Ostrovsky Suggested-by: Thomas Gleixner Signed-off-by: David Vrabel Cc: Vitaly Kuznetsov Signed-off-by: Greg Kroah-Hartman --- arch/arm/include/asm/irq.h | 5 +++++ arch/arm64/include/asm/irq.h | 5 +++++ drivers/xen/events/events_base.c | 5 +++-- 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h index be1d07d59ee97..1bd9510de1b9c 100644 --- a/arch/arm/include/asm/irq.h +++ b/arch/arm/include/asm/irq.h @@ -40,6 +40,11 @@ extern void arch_trigger_all_cpu_backtrace(bool); #define arch_trigger_all_cpu_backtrace(x) arch_trigger_all_cpu_backtrace(x) #endif +static inline int nr_legacy_irqs(void) +{ + return NR_IRQS_LEGACY; +} + #endif #endif diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index bbb251b14746c..8b9bf54105b3e 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -21,4 +21,9 @@ static inline void acpi_irq_init(void) } #define acpi_irq_init acpi_irq_init +static inline int nr_legacy_irqs(void) +{ + return 0; +} + #endif diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index 6cd5e65c4aff0..fb236239972f7 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #endif @@ -420,7 +421,7 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi) return xen_allocate_irq_dynamic(); /* Legacy IRQ descriptors are already allocated by the arch. */ - if (gsi < NR_IRQS_LEGACY) + if (gsi < nr_legacy_irqs()) irq = gsi; else irq = irq_alloc_desc_at(gsi, -1); @@ -446,7 +447,7 @@ static void xen_free_irq(unsigned irq) kfree(info); /* Legacy IRQ descriptors are managed by the arch. */ - if (irq < NR_IRQS_LEGACY) + if (irq < nr_legacy_irqs()) return; irq_free_desc(irq); From 5f8b2364ca1fc71022ae7e33b6c517133b8267ea Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 9 Dec 2015 14:40:26 -0500 Subject: [PATCH 126/201] Linux 4.3.1 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index d5b37391195f8..266eeacc14904 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 3 -SUBLEVEL = 0 +SUBLEVEL = 1 EXTRAVERSION = NAME = Blurry Fish Butt From 6b8b7d4fb9d71db1d78d29ce39323e198049e2c3 Mon Sep 17 00:00:00 2001 From: sudip Date: Thu, 17 Sep 2015 13:12:51 +0530 Subject: [PATCH 127/201] crypto: asymmetric_keys - remove always false comparison commit 4dd17c9c8a30c8d8cd1c9d4b94f08aca4b038d3e upstream. hour, min and sec are unsigned int and they can never be less than zero. Signed-off-by: Sudip Mukherjee Signed-off-by: Herbert Xu Signed-off-by: Greg Kroah-Hartman --- crypto/asymmetric_keys/x509_cert_parser.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c index af71878dc15bf..3000ea3b66874 100644 --- a/crypto/asymmetric_keys/x509_cert_parser.c +++ b/crypto/asymmetric_keys/x509_cert_parser.c @@ -546,9 +546,9 @@ int x509_decode_time(time64_t *_t, size_t hdrlen, if (year < 1970 || mon < 1 || mon > 12 || day < 1 || day > mon_len || - hour < 0 || hour > 23 || - min < 0 || min > 59 || - sec < 0 || sec > 59) + hour > 23 || + min > 59 || + sec > 59) goto invalid_time; *_t = mktime64(year, mon, day, hour, min, sec); From 14a837d297366fed10bc5f6f2bbecf9400e84e15 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 12 Nov 2015 09:36:40 +0000 Subject: [PATCH 128/201] X.509: Fix the time validation [ver #2] commit cc25b994acfbc901429da682d0f73c190e960206 upstream. This fixes CVE-2015-5327. It affects kernels from 4.3-rc1 onwards. Fix the X.509 time validation to use month number-1 when looking up the number of days in that month. Also put the month number validation before doing the lookup so as not to risk overrunning the array. This can be tested by doing the following: cat < Signed-off-by: David Howells Tested-by: Mimi Zohar Acked-by: David Woodhouse Signed-off-by: James Morris Signed-off-by: Greg Kroah-Hartman --- crypto/asymmetric_keys/x509_cert_parser.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/crypto/asymmetric_keys/x509_cert_parser.c b/crypto/asymmetric_keys/x509_cert_parser.c index 3000ea3b66874..021d39c0ba75a 100644 --- a/crypto/asymmetric_keys/x509_cert_parser.c +++ b/crypto/asymmetric_keys/x509_cert_parser.c @@ -531,7 +531,11 @@ int x509_decode_time(time64_t *_t, size_t hdrlen, if (*p != 'Z') goto unsupported_time; - mon_len = month_lengths[mon]; + if (year < 1970 || + mon < 1 || mon > 12) + goto invalid_time; + + mon_len = month_lengths[mon - 1]; if (mon == 2) { if (year % 4 == 0) { mon_len = 29; @@ -543,14 +547,12 @@ int x509_decode_time(time64_t *_t, size_t hdrlen, } } - if (year < 1970 || - mon < 1 || mon > 12 || - day < 1 || day > mon_len || + if (day < 1 || day > mon_len || hour > 23 || min > 59 || sec > 59) goto invalid_time; - + *_t = mktime64(year, mon, day, hour, min, sec); return 0; From 14fd7c710c1f9f31ec4d36413c3066092c71aa3d Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 10 Dec 2015 18:11:40 -0500 Subject: [PATCH 129/201] Linux 4.3.2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 266eeacc14904..1a4953b3e10ff 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 3 -SUBLEVEL = 1 +SUBLEVEL = 2 EXTRAVERSION = NAME = Blurry Fish Butt From bfa97da9146ac6b41f9b749e7dfd0921eeff9170 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Wed, 21 Oct 2015 14:04:47 +0100 Subject: [PATCH 130/201] certs: add .gitignore to stop git nagging about x509_certificate_list commit 48dbc164b40dd9195dea8cd966e394819e420b64 upstream. Currently we see this in "git status" if we build in the source dir: Untracked files: (use "git add ..." to include in what will be committed) certs/x509_certificate_list It looks like it used to live in kernel/ so we squash that .gitignore entry at the same time. I didn't bother to dig through git history to see when it moved, since it is just a minor annoyance at most. Cc: David Woodhouse Cc: keyrings@linux-nfs.org Signed-off-by: Paul Gortmaker Signed-off-by: David Howells Signed-off-by: Greg Kroah-Hartman --- certs/.gitignore | 4 ++++ kernel/.gitignore | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 certs/.gitignore diff --git a/certs/.gitignore b/certs/.gitignore new file mode 100644 index 0000000000000..f51aea4a71ec8 --- /dev/null +++ b/certs/.gitignore @@ -0,0 +1,4 @@ +# +# Generated files +# +x509_certificate_list diff --git a/kernel/.gitignore b/kernel/.gitignore index 790d83c7d1607..b3097bde4e9cb 100644 --- a/kernel/.gitignore +++ b/kernel/.gitignore @@ -5,4 +5,3 @@ config_data.h config_data.gz timeconst.h hz.bc -x509_certificate_list From 808e146b0ae32eb615e048c3b87016fcad3dd500 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?fran=C3=A7ois=20romieu?= Date: Wed, 11 Nov 2015 23:35:18 +0100 Subject: [PATCH 131/201] r8169: fix kasan reported skb use-after-free. [ Upstream commit 39174291d8e8acfd1113214a943263aaa03c57c8 ] Signed-off-by: Francois Romieu Reported-by: Dave Jones Fixes: d7d2d89d4b0af ("r8169: Add software counter for multicast packages") Acked-by: Eric Dumazet Acked-by: Corinna Vinschen Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/realtek/r8169.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index b4f21232019a9..79ef799f88ab1 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -7429,15 +7429,15 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget rtl8169_rx_vlan_tag(desc, skb); + if (skb->pkt_type == PACKET_MULTICAST) + dev->stats.multicast++; + napi_gro_receive(&tp->napi, skb); u64_stats_update_begin(&tp->rx_stats.syncp); tp->rx_stats.packets++; tp->rx_stats.bytes += pkt_size; u64_stats_update_end(&tp->rx_stats.syncp); - - if (skb->pkt_type == PACKET_MULTICAST) - dev->stats.multicast++; } release_descriptor: desc->opts2 = 0; From 382c8e80e40bcadb90ba1a156f522f7b5a22109f Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 10 Nov 2015 16:23:15 +0100 Subject: [PATCH 132/201] af-unix: fix use-after-free with concurrent readers while splicing [ Upstream commit 73ed5d25dce0354ea381d6dc93005c3085fae03d ] During splicing an af-unix socket to a pipe we have to drop all af-unix socket locks. While doing so we allow another reader to enter unix_stream_read_generic which can read, copy and finally free another skb. If exactly this skb is just in process of being spliced we get a use-after-free report by kasan. First, we must make sure to not have a free while the skb is used during the splice operation. We simply increment its use counter before unlocking the reader lock. Stream sockets have the nice characteristic that we don't care about zero length writes and they never reach the peer socket's queue. That said, we can take the UNIXCB.consumed field as the indicator if the skb was already freed from the socket's receive queue. If the skb was fully consumed after we locked the reader side again we know it has been dropped by a second reader. We indicate a short read to user space and abort the current splice operation. This bug has been found with syzkaller (http://github.com/google/syzkaller) by Dmitry Vyukov. Fixes: 2b514574f7e8 ("net: af_unix: implement splice for stream af_unix sockets") Reported-by: Dmitry Vyukov Cc: Dmitry Vyukov Cc: Eric Dumazet Acked-by: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/unix/af_unix.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 94f658235fb49..a5afe414a2d1d 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -440,6 +440,7 @@ static void unix_release_sock(struct sock *sk, int embrion) if (state == TCP_LISTEN) unix_release_sock(skb->sk, 1); /* passed fds are erased in the kfree_skb hook */ + UNIXCB(skb).consumed = skb->len; kfree_skb(skb); } @@ -2071,6 +2072,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state) do { int chunk; + bool drop_skb; struct sk_buff *skb, *last; unix_state_lock(sk); @@ -2151,7 +2153,11 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state) } chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); + skb_get(skb); chunk = state->recv_actor(skb, skip, chunk, state); + drop_skb = !unix_skb_len(skb); + /* skb is only safe to use if !drop_skb */ + consume_skb(skb); if (chunk < 0) { if (copied == 0) copied = -EFAULT; @@ -2160,6 +2166,18 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state) copied += chunk; size -= chunk; + if (drop_skb) { + /* the skb was touched by a concurrent reader; + * we should not expect anything from this skb + * anymore and assume it invalid - we can be + * sure it was dropped from the socket queue + * + * let's report a short read + */ + err = 0; + break; + } + /* Mark read part of skb as used */ if (!(flags & MSG_PEEK)) { UNIXCB(skb).consumed += chunk; From a78f42950a48ed4f4721b29eb228d709ac91af9b Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 16 Nov 2015 16:25:56 +0100 Subject: [PATCH 133/201] af_unix: don't append consumed skbs to sk_receive_queue [ Upstream commit 8844f97238ca6c1ca92a5d6c69f53efd361a266f ] In case multiple writes to a unix stream socket race we could end up in a situation where we pre-allocate a new skb for use in unix_stream_sendpage but have to free it again in the locked section because another skb has been appended meanwhile, which we must use. Accidentally we didn't clear the pointer after consuming it and so we touched freed memory while appending it to the sk_receive_queue. So, clear the pointer after consuming the skb. This bug has been found with syzkaller (http://github.com/google/syzkaller) by Dmitry Vyukov. Fixes: 869e7c62486e ("net: af_unix: implement stream sendpage support") Reported-by: Dmitry Vyukov Cc: Dmitry Vyukov Cc: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/unix/af_unix.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index a5afe414a2d1d..3e2ca39f3b68c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1799,6 +1799,7 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, * this - does no harm */ consume_skb(newskb); + newskb = NULL; } if (skb_append_pagefrags(skb, page, offset, size)) { From 26c04dfb4cfa10e6ed8cafd63313fef2be865fa0 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 17 Nov 2015 15:10:59 +0100 Subject: [PATCH 134/201] af_unix: take receive queue lock while appending new skb [ Upstream commit a3a116e04cc6a94d595ead4e956ab1bc1d2f4746 ] While possibly in future we don't necessarily need to use sk_buff_head.lock this is a rather larger change, as it affects the af_unix fd garbage collector, diag and socket cleanups. This is too much for a stable patch. For the time being grab sk_buff_head.lock without disabling bh and irqs, so don't use locked skb_queue_tail. Fixes: 869e7c62486e ("net: af_unix: implement stream sendpage support") Cc: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Reported-by: Eric Dumazet Acked-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/unix/af_unix.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3e2ca39f3b68c..42ab2cc1f05d8 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1812,8 +1812,11 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, skb->truesize += size; atomic_add(size, &sk->sk_wmem_alloc); - if (newskb) + if (newskb) { + spin_lock(&other->sk_receive_queue.lock); __skb_queue_tail(&other->sk_receive_queue, newskb); + spin_unlock(&other->sk_receive_queue.lock); + } unix_state_unlock(other); mutex_unlock(&unix_sk(other)->readlock); From 58a6a46a036ce81a2a8ecaa6fc1537c894349e3f Mon Sep 17 00:00:00 2001 From: Rainer Weikusat Date: Fri, 20 Nov 2015 22:07:23 +0000 Subject: [PATCH 135/201] unix: avoid use-after-free in ep_remove_wait_queue [ Upstream commit 7d267278a9ece963d77eefec61630223fce08c6c ] Rainer Weikusat writes: An AF_UNIX datagram socket being the client in an n:1 association with some server socket is only allowed to send messages to the server if the receive queue of this socket contains at most sk_max_ack_backlog datagrams. This implies that prospective writers might be forced to go to sleep despite none of the message presently enqueued on the server receive queue were sent by them. In order to ensure that these will be woken up once space becomes again available, the present unix_dgram_poll routine does a second sock_poll_wait call with the peer_wait wait queue of the server socket as queue argument (unix_dgram_recvmsg does a wake up on this queue after a datagram was received). This is inherently problematic because the server socket is only guaranteed to remain alive for as long as the client still holds a reference to it. In case the connection is dissolved via connect or by the dead peer detection logic in unix_dgram_sendmsg, the server socket may be freed despite "the polling mechanism" (in particular, epoll) still has a pointer to the corresponding peer_wait queue. There's no way to forcibly deregister a wait queue with epoll. Based on an idea by Jason Baron, the patch below changes the code such that a wait_queue_t belonging to the client socket is enqueued on the peer_wait queue of the server whenever the peer receive queue full condition is detected by either a sendmsg or a poll. A wake up on the peer queue is then relayed to the ordinary wait queue of the client socket via wake function. The connection to the peer wait queue is again dissolved if either a wake up is about to be relayed or the client socket reconnects or a dead peer is detected or the client socket is itself closed. This enables removing the second sock_poll_wait from unix_dgram_poll, thus avoiding the use-after-free, while still ensuring that no blocked writer sleeps forever. Signed-off-by: Rainer Weikusat Fixes: ec0d215f9420 ("af_unix: fix 'poll for write'/connected DGRAM sockets") Reviewed-by: Jason Baron Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/af_unix.h | 1 + net/unix/af_unix.c | 183 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 165 insertions(+), 19 deletions(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index b36d837c701ec..2a91a0561a478 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -62,6 +62,7 @@ struct unix_sock { #define UNIX_GC_CANDIDATE 0 #define UNIX_GC_MAYBE_CYCLE 1 struct socket_wq peer_wq; + wait_queue_t peer_wake; }; static inline struct unix_sock *unix_sk(const struct sock *sk) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 42ab2cc1f05d8..153b2f2bbbb8b 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -326,6 +326,118 @@ static struct sock *unix_find_socket_byinode(struct inode *i) return s; } +/* Support code for asymmetrically connected dgram sockets + * + * If a datagram socket is connected to a socket not itself connected + * to the first socket (eg, /dev/log), clients may only enqueue more + * messages if the present receive queue of the server socket is not + * "too large". This means there's a second writeability condition + * poll and sendmsg need to test. The dgram recv code will do a wake + * up on the peer_wait wait queue of a socket upon reception of a + * datagram which needs to be propagated to sleeping would-be writers + * since these might not have sent anything so far. This can't be + * accomplished via poll_wait because the lifetime of the server + * socket might be less than that of its clients if these break their + * association with it or if the server socket is closed while clients + * are still connected to it and there's no way to inform "a polling + * implementation" that it should let go of a certain wait queue + * + * In order to propagate a wake up, a wait_queue_t of the client + * socket is enqueued on the peer_wait queue of the server socket + * whose wake function does a wake_up on the ordinary client socket + * wait queue. This connection is established whenever a write (or + * poll for write) hit the flow control condition and broken when the + * association to the server socket is dissolved or after a wake up + * was relayed. + */ + +static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags, + void *key) +{ + struct unix_sock *u; + wait_queue_head_t *u_sleep; + + u = container_of(q, struct unix_sock, peer_wake); + + __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait, + q); + u->peer_wake.private = NULL; + + /* relaying can only happen while the wq still exists */ + u_sleep = sk_sleep(&u->sk); + if (u_sleep) + wake_up_interruptible_poll(u_sleep, key); + + return 0; +} + +static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other) +{ + struct unix_sock *u, *u_other; + int rc; + + u = unix_sk(sk); + u_other = unix_sk(other); + rc = 0; + spin_lock(&u_other->peer_wait.lock); + + if (!u->peer_wake.private) { + u->peer_wake.private = other; + __add_wait_queue(&u_other->peer_wait, &u->peer_wake); + + rc = 1; + } + + spin_unlock(&u_other->peer_wait.lock); + return rc; +} + +static void unix_dgram_peer_wake_disconnect(struct sock *sk, + struct sock *other) +{ + struct unix_sock *u, *u_other; + + u = unix_sk(sk); + u_other = unix_sk(other); + spin_lock(&u_other->peer_wait.lock); + + if (u->peer_wake.private == other) { + __remove_wait_queue(&u_other->peer_wait, &u->peer_wake); + u->peer_wake.private = NULL; + } + + spin_unlock(&u_other->peer_wait.lock); +} + +static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk, + struct sock *other) +{ + unix_dgram_peer_wake_disconnect(sk, other); + wake_up_interruptible_poll(sk_sleep(sk), + POLLOUT | + POLLWRNORM | + POLLWRBAND); +} + +/* preconditions: + * - unix_peer(sk) == other + * - association is stable + */ +static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other) +{ + int connected; + + connected = unix_dgram_peer_wake_connect(sk, other); + + if (unix_recvq_full(other)) + return 1; + + if (connected) + unix_dgram_peer_wake_disconnect(sk, other); + + return 0; +} + static inline int unix_writable(struct sock *sk) { return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf; @@ -430,6 +542,8 @@ static void unix_release_sock(struct sock *sk, int embrion) skpair->sk_state_change(skpair); sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); } + + unix_dgram_peer_wake_disconnect(sk, skpair); sock_put(skpair); /* It may now die */ unix_peer(sk) = NULL; } @@ -665,6 +779,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) INIT_LIST_HEAD(&u->link); mutex_init(&u->readlock); /* single task reading lock */ init_waitqueue_head(&u->peer_wait); + init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); unix_insert_socket(unix_sockets_unbound(sk), sk); out: if (sk == NULL) @@ -1032,6 +1147,8 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr, if (unix_peer(sk)) { struct sock *old_peer = unix_peer(sk); unix_peer(sk) = other; + unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer); + unix_state_double_unlock(sk, other); if (other != old_peer) @@ -1471,6 +1588,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, struct scm_cookie scm; int max_level; int data_len = 0; + int sk_locked; wait_for_unix_gc(); err = scm_send(sock, msg, &scm, false); @@ -1549,12 +1667,14 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, goto out_free; } + sk_locked = 0; unix_state_lock(other); +restart_locked: err = -EPERM; if (!unix_may_send(sk, other)) goto out_unlock; - if (sock_flag(other, SOCK_DEAD)) { + if (unlikely(sock_flag(other, SOCK_DEAD))) { /* * Check with 1003.1g - what should * datagram error @@ -1562,10 +1682,14 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, unix_state_unlock(other); sock_put(other); + if (!sk_locked) + unix_state_lock(sk); + err = 0; - unix_state_lock(sk); if (unix_peer(sk) == other) { unix_peer(sk) = NULL; + unix_dgram_peer_wake_disconnect_wakeup(sk, other); + unix_state_unlock(sk); unix_dgram_disconnected(sk, other); @@ -1591,21 +1715,38 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, goto out_unlock; } - if (unix_peer(other) != sk && unix_recvq_full(other)) { - if (!timeo) { - err = -EAGAIN; - goto out_unlock; + if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) { + if (timeo) { + timeo = unix_wait_for_peer(other, timeo); + + err = sock_intr_errno(timeo); + if (signal_pending(current)) + goto out_free; + + goto restart; } - timeo = unix_wait_for_peer(other, timeo); + if (!sk_locked) { + unix_state_unlock(other); + unix_state_double_lock(sk, other); + } - err = sock_intr_errno(timeo); - if (signal_pending(current)) - goto out_free; + if (unix_peer(sk) != other || + unix_dgram_peer_wake_me(sk, other)) { + err = -EAGAIN; + sk_locked = 1; + goto out_unlock; + } - goto restart; + if (!sk_locked) { + sk_locked = 1; + goto restart_locked; + } } + if (unlikely(sk_locked)) + unix_state_unlock(sk); + if (sock_flag(other, SOCK_RCVTSTAMP)) __net_timestamp(skb); maybe_add_creds(skb, sock, other); @@ -1619,6 +1760,8 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, return len; out_unlock: + if (sk_locked) + unix_state_unlock(sk); unix_state_unlock(other); out_free: kfree_skb(skb); @@ -2475,14 +2618,16 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock, return mask; writable = unix_writable(sk); - other = unix_peer_get(sk); - if (other) { - if (unix_peer(other) != sk) { - sock_poll_wait(file, &unix_sk(other)->peer_wait, wait); - if (unix_recvq_full(other)) - writable = 0; - } - sock_put(other); + if (writable) { + unix_state_lock(sk); + + other = unix_peer(sk); + if (other && unix_peer(other) != sk && + unix_recvq_full(other) && + unix_dgram_peer_wake_me(sk, other)) + writable = 0; + + unix_state_unlock(sk); } if (writable) From 343f4f8c013a6e41ba51da7f00c3e5c95197fa59 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 26 Nov 2015 12:08:18 +0100 Subject: [PATCH 136/201] af-unix: passcred support for sendpage [ Upstream commit 9490f886b192964796285907d777ff00fba1fa0f ] sendpage did not care about credentials at all. This could lead to situations in which because of fd passing between processes we could append data to skbs with different scm data. It is illegal to splice those skbs together. Instead we have to allocate a new skb and if requested fill out the scm details. Fixes: 869e7c62486ec ("net: af_unix: implement stream sendpage support") Reported-by: Al Viro Cc: Al Viro Cc: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/unix/af_unix.c | 79 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 64 insertions(+), 15 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 153b2f2bbbb8b..128b0982c96b3 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1550,6 +1550,14 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen return err; } +static bool unix_passcred_enabled(const struct socket *sock, + const struct sock *other) +{ + return test_bit(SOCK_PASSCRED, &sock->flags) || + !other->sk_socket || + test_bit(SOCK_PASSCRED, &other->sk_socket->flags); +} + /* * Some apps rely on write() giving SCM_CREDENTIALS * We include credentials if source or destination socket @@ -1560,14 +1568,41 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, { if (UNIXCB(skb).pid) return; - if (test_bit(SOCK_PASSCRED, &sock->flags) || - !other->sk_socket || - test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { + if (unix_passcred_enabled(sock, other)) { UNIXCB(skb).pid = get_pid(task_tgid(current)); current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid); } } +static int maybe_init_creds(struct scm_cookie *scm, + struct socket *socket, + const struct sock *other) +{ + int err; + struct msghdr msg = { .msg_controllen = 0 }; + + err = scm_send(socket, &msg, scm, false); + if (err) + return err; + + if (unix_passcred_enabled(socket, other)) { + scm->pid = get_pid(task_tgid(current)); + current_uid_gid(&scm->creds.uid, &scm->creds.gid); + } + return err; +} + +static bool unix_skb_scm_eq(struct sk_buff *skb, + struct scm_cookie *scm) +{ + const struct unix_skb_parms *u = &UNIXCB(skb); + + return u->pid == scm->pid && + uid_eq(u->uid, scm->creds.uid) && + gid_eq(u->gid, scm->creds.gid) && + unix_secdata_eq(scm, skb); +} + /* * Send AF_UNIX data. */ @@ -1883,8 +1918,10 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, int offset, size_t size, int flags) { - int err = 0; - bool send_sigpipe = true; + int err; + bool send_sigpipe = false; + bool init_scm = true; + struct scm_cookie scm; struct sock *other, *sk = socket->sk; struct sk_buff *skb, *newskb = NULL, *tail = NULL; @@ -1902,7 +1939,7 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, &err, 0); if (!newskb) - return err; + goto err; } /* we must acquire readlock as we modify already present @@ -1911,12 +1948,12 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, err = mutex_lock_interruptible(&unix_sk(other)->readlock); if (err) { err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS; - send_sigpipe = false; goto err; } if (sk->sk_shutdown & SEND_SHUTDOWN) { err = -EPIPE; + send_sigpipe = true; goto err_unlock; } @@ -1925,17 +1962,27 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, if (sock_flag(other, SOCK_DEAD) || other->sk_shutdown & RCV_SHUTDOWN) { err = -EPIPE; + send_sigpipe = true; goto err_state_unlock; } + if (init_scm) { + err = maybe_init_creds(&scm, socket, other); + if (err) + goto err_state_unlock; + init_scm = false; + } + skb = skb_peek_tail(&other->sk_receive_queue); if (tail && tail == skb) { skb = newskb; - } else if (!skb) { - if (newskb) + } else if (!skb || !unix_skb_scm_eq(skb, &scm)) { + if (newskb) { skb = newskb; - else + } else { + tail = skb; goto alloc_skb; + } } else if (newskb) { /* this is fast path, we don't necessarily need to * call to kfree_skb even though with newskb == NULL @@ -1956,6 +2003,9 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, atomic_add(size, &sk->sk_wmem_alloc); if (newskb) { + err = unix_scm_to_skb(&scm, skb, false); + if (err) + goto err_state_unlock; spin_lock(&other->sk_receive_queue.lock); __skb_queue_tail(&other->sk_receive_queue, newskb); spin_unlock(&other->sk_receive_queue.lock); @@ -1965,7 +2015,7 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, mutex_unlock(&unix_sk(other)->readlock); other->sk_data_ready(other); - + scm_destroy(&scm); return size; err_state_unlock: @@ -1976,6 +2026,8 @@ static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, kfree_skb(newskb); if (send_sigpipe && !(flags & MSG_NOSIGNAL)) send_sig(SIGPIPE, current, 0); + if (!init_scm) + scm_destroy(&scm); return err; } @@ -2279,10 +2331,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state) if (check_creds) { /* Never glue messages from different writers */ - if ((UNIXCB(skb).pid != scm.pid) || - !uid_eq(UNIXCB(skb).uid, scm.creds.uid) || - !gid_eq(UNIXCB(skb).gid, scm.creds.gid) || - !unix_secdata_eq(&scm, skb)) + if (!unix_skb_scm_eq(skb, &scm)) break; } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { /* Copy credentials */ From fdaac6a92778b95ad41e65b6f9a88b454899d8d4 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 11 Nov 2015 11:51:06 -0800 Subject: [PATCH 137/201] ipv6: Avoid creating RTF_CACHE from a rt that is not managed by fib6 tree [ Upstream commit 0d3f6d297bfb7af24d0508460fdb3d1ec4903fa3 ] The original bug report: https://bugzilla.redhat.com/show_bug.cgi?id=1272571 The setup has a IPv4 GRE tunnel running in a IPSec. The bug happens when ndisc starts sending router solicitation at the gre interface. The simplified oops stack is like: __lock_acquire+0x1b2/0x1c30 lock_acquire+0xb9/0x140 _raw_write_lock_bh+0x3f/0x50 __ip6_ins_rt+0x2e/0x60 ip6_ins_rt+0x49/0x50 ~~~~~~~~ __ip6_rt_update_pmtu.part.54+0x145/0x250 ip6_rt_update_pmtu+0x2e/0x40 ~~~~~~~~ ip_tunnel_xmit+0x1f1/0xf40 __gre_xmit+0x7a/0x90 ipgre_xmit+0x15a/0x220 dev_hard_start_xmit+0x2bd/0x480 __dev_queue_xmit+0x696/0x730 dev_queue_xmit+0x10/0x20 neigh_direct_output+0x11/0x20 ip6_finish_output2+0x21f/0x770 ip6_finish_output+0xa7/0x1d0 ip6_output+0x56/0x190 ~~~~~~~~ ndisc_send_skb+0x1d9/0x400 ndisc_send_rs+0x88/0xc0 ~~~~~~~~ The rt passed to ip6_rt_update_pmtu() is created by icmp6_dst_alloc() and it is not managed by the fib6 tree, so its rt6i_table == NULL. When __ip6_rt_update_pmtu() creates a RTF_CACHE clone, the newly created clone also has rt6i_table == NULL and it causes the ip6_ins_rt() oops. During pmtu update, we only want to create a RTF_CACHE clone from a rt which is currently managed (or owned) by the fib6 tree. It means either rt->rt6i_node != NULL or rt is a RTF_PCPU clone. It is worth to note that rt6i_table may not be NULL even it is not (yet) managed by the fib6 tree (e.g. addrconf_dst_alloc()). Hence, rt6i_node is a better check instead of rt6i_table. Fixes: 45e4fd26683c ("ipv6: Only create RTF_CACHE routes after encountering pmtu") Signed-off-by: Martin KaFai Lau Reported-by: Chris Siebenmann Cc: Chris Siebenmann Cc: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 946880ad48acd..711ec7a46ec52 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1340,6 +1340,12 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); } +static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) +{ + return !(rt->rt6i_flags & RTF_CACHE) && + (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node); +} + static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, const struct ipv6hdr *iph, u32 mtu) { @@ -1353,7 +1359,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, if (mtu >= dst_mtu(dst)) return; - if (rt6->rt6i_flags & RTF_CACHE) { + if (!rt6_cache_allowed_for_pmtu(rt6)) { rt6_do_update_pmtu(rt6, mtu); } else { const struct in6_addr *daddr, *saddr; From dd91a7e65f8008ac989d61131670949feec08d23 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 11 Nov 2015 11:51:07 -0800 Subject: [PATCH 138/201] ipv6: Check expire on DST_NOCACHE route [ Upstream commit 5973fb1e245086071bf71994c8b54d99526ded03 ] Since the expires of the DST_NOCACHE rt can be set during the ip6_rt_update_pmtu(), we also need to consider the expires value when doing ip6_dst_check(). This patches creates __rt6_check_expired() to only check the expire value (if one exists) of the current rt. In rt6_dst_from_check(), it adds __rt6_check_expired() as one of the condition check. Signed-off-by: Martin KaFai Lau Cc: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/route.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 711ec7a46ec52..ea892c19a1ba1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -403,6 +403,14 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, } } +static bool __rt6_check_expired(const struct rt6_info *rt) +{ + if (rt->rt6i_flags & RTF_EXPIRES) + return time_after(jiffies, rt->dst.expires); + else + return false; +} + static bool rt6_check_expired(const struct rt6_info *rt) { if (rt->rt6i_flags & RTF_EXPIRES) { @@ -1270,7 +1278,8 @@ static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie) { - if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && + if (!__rt6_check_expired(rt) && + rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && rt6_check((struct rt6_info *)(rt->dst.from), cookie)) return &rt->dst; else From 4ab43ae83f90543998b1640fb2cb556ce1a604c0 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 11 Nov 2015 11:51:08 -0800 Subject: [PATCH 139/201] ipv6: Check rt->dst.from for the DST_NOCACHE route [ Upstrem commit 02bcf4e082e4dc634409a6a6cb7def8806d6e5e6 ] All DST_NOCACHE rt6_info used to have rt->dst.from set to its parent. After commit 8e3d5be73681 ("ipv6: Avoid double dst_free"), DST_NOCACHE is also set to rt6_info which does not have a parent (i.e. rt->dst.from is NULL). This patch catches the rt->dst.from == NULL case. Fixes: 8e3d5be73681 ("ipv6: Avoid double dst_free") Signed-off-by: Martin KaFai Lau Cc: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ip6_fib.h | 3 ++- net/ipv6/route.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index aaf9700fc9e5f..fb961a576abe4 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -167,7 +167,8 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) static inline u32 rt6_get_cookie(const struct rt6_info *rt) { - if (rt->rt6i_flags & RTF_PCPU || unlikely(rt->dst.flags & DST_NOCACHE)) + if (rt->rt6i_flags & RTF_PCPU || + (unlikely(rt->dst.flags & DST_NOCACHE) && rt->dst.from)) rt = (struct rt6_info *)(rt->dst.from); return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ea892c19a1ba1..d3773269d6e04 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1299,7 +1299,8 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) rt6_dst_from_metrics_check(rt); - if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE)) + if (rt->rt6i_flags & RTF_PCPU || + (unlikely(dst->flags & DST_NOCACHE) && rt->dst.from)) return rt6_dst_from_check(rt, cookie); else return rt6_check(rt, cookie); From ea9b07516f19f4c6fc14697cb6c70bb9b74e3bfc Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 27 Nov 2015 18:17:05 +0100 Subject: [PATCH 140/201] Revert "ipv6: ndisc: inherit metadata dst when creating ndisc requests" [ Upstream commit 304d888b29cf96f1dd53511ee686499cd8cdf249 ] This reverts commit ab450605b35caa768ca33e86db9403229bf42be4. In IPv6, we cannot inherit the dst of the original dst. ndisc packets are IPv6 packets and may take another route than the original packet. This patch breaks the following scenario: a packet comes from eth0 and is forwarded through vxlan1. The encapsulated packet triggers an NS which cannot be sent because of the wrong route. CC: Jiri Benc CC: Thomas Graf Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ndisc.h | 3 +-- net/ipv6/addrconf.c | 2 +- net/ipv6/ndisc.c | 10 +++------- net/ipv6/route.c | 2 +- 4 files changed, 6 insertions(+), 11 deletions(-) diff --git a/include/net/ndisc.h b/include/net/ndisc.h index aba5695fadb00..b3a7751251b4c 100644 --- a/include/net/ndisc.h +++ b/include/net/ndisc.h @@ -182,8 +182,7 @@ int ndisc_rcv(struct sk_buff *skb); void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, const struct in6_addr *solicit, - const struct in6_addr *daddr, const struct in6_addr *saddr, - struct sk_buff *oskb); + const struct in6_addr *daddr, const struct in6_addr *saddr); void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, const struct in6_addr *daddr); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index dd00828863a0c..3939dd290c448 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3628,7 +3628,7 @@ static void addrconf_dad_work(struct work_struct *w) /* send a neighbour solicitation for our addr */ addrconf_addr_solict_mult(&ifp->addr, &mcaddr); - ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any, NULL); + ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any); out: in6_ifa_put(ifp); rtnl_unlock(); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 64a71354b069d..9ad46cd7930d5 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -553,8 +553,7 @@ static void ndisc_send_unsol_na(struct net_device *dev) void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, const struct in6_addr *solicit, - const struct in6_addr *daddr, const struct in6_addr *saddr, - struct sk_buff *oskb) + const struct in6_addr *daddr, const struct in6_addr *saddr) { struct sk_buff *skb; struct in6_addr addr_buf; @@ -590,9 +589,6 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr); - if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE) && oskb) - skb_dst_copy(skb, oskb); - ndisc_send_skb(skb, daddr, saddr); } @@ -679,12 +675,12 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) "%s: trying to ucast probe in NUD_INVALID: %pI6\n", __func__, target); } - ndisc_send_ns(dev, neigh, target, target, saddr, skb); + ndisc_send_ns(dev, neigh, target, target, saddr); } else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) { neigh_app_ns(neigh); } else { addrconf_addr_solict_mult(target, &mcaddr); - ndisc_send_ns(dev, NULL, target, &mcaddr, saddr, skb); + ndisc_send_ns(dev, NULL, target, &mcaddr, saddr); } } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d3773269d6e04..fd0e6746d0cfc 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -546,7 +546,7 @@ static void rt6_probe_deferred(struct work_struct *w) container_of(w, struct __rt6_probe_work, work); addrconf_addr_solict_mult(&work->target, &mcaddr); - ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL, NULL); + ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL); dev_put(work->dev); kfree(work); } From 55eabd378b2e07bc2b677f07fd142365be68cb2b Mon Sep 17 00:00:00 2001 From: Kamal Mostafa Date: Wed, 11 Nov 2015 14:24:27 -0800 Subject: [PATCH 141/201] tools/net: Use include/uapi with __EXPORTED_HEADERS__ [ Upstream commit d7475de58575c904818efa369c82e88c6648ce2e ] Use the local uapi headers to keep in sync with "recently" added #define's (e.g. SKF_AD_VLAN_TPID). Refactored CFLAGS, and bpf_asm doesn't need -I. Fixes: 3f356385e8a4 ("filter: bpf_asm: add minimal bpf asm tool") Signed-off-by: Kamal Mostafa Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- tools/net/Makefile | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/net/Makefile b/tools/net/Makefile index ee577ea03ba50..ddf8880106524 100644 --- a/tools/net/Makefile +++ b/tools/net/Makefile @@ -4,6 +4,9 @@ CC = gcc LEX = flex YACC = bison +CFLAGS += -Wall -O2 +CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include + %.yacc.c: %.y $(YACC) -o $@ -d $< @@ -12,15 +15,13 @@ YACC = bison all : bpf_jit_disasm bpf_dbg bpf_asm -bpf_jit_disasm : CFLAGS = -Wall -O2 -DPACKAGE='bpf_jit_disasm' +bpf_jit_disasm : CFLAGS += -DPACKAGE='bpf_jit_disasm' bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl bpf_jit_disasm : bpf_jit_disasm.o -bpf_dbg : CFLAGS = -Wall -O2 bpf_dbg : LDLIBS = -lreadline bpf_dbg : bpf_dbg.o -bpf_asm : CFLAGS = -Wall -O2 -I. bpf_asm : LDLIBS = bpf_asm : bpf_asm.o bpf_exp.yacc.o bpf_exp.lex.o bpf_exp.lex.o : bpf_exp.yacc.c From 1a43c235781426c2b4368bffb6478c379c4529aa Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 11 Nov 2015 23:25:40 +0100 Subject: [PATCH 142/201] packet: do skb_probe_transport_header when we actually have data [ Upstream commit efdfa2f7848f64517008136fb41f53c4a1faf93a ] In tpacket_fill_skb() commit c1aad275b029 ("packet: set transport header before doing xmit") and later on 40893fd0fd4e ("net: switch to use skb_probe_transport_header()") was probing for a transport header on the skb from a ring buffer slot, but at a time, where the skb has _not even_ been filled with data yet. So that call into the flow dissector is pretty useless. Lets do it after we've set up the skb frags. Fixes: c1aad275b029 ("packet: set transport header before doing xmit") Reported-by: Eric Dumazet Signed-off-by: Daniel Borkmann Acked-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 27b2898f275c7..be038c91cf994 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2368,8 +2368,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb_reserve(skb, hlen); skb_reset_network_header(skb); - if (!packet_use_direct_xmit(po)) - skb_probe_transport_header(skb, 0); if (unlikely(po->tp_tx_has_off)) { int off_min, off_max, off; off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); @@ -2449,6 +2447,9 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, len = ((to_write > len_max) ? len_max : to_write); } + if (!packet_use_direct_xmit(po)) + skb_probe_transport_header(skb, 0); + return tp_len; } From 6b4a14f4265267d8d1ff6b2465e0be19da0c256a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 11 Nov 2015 23:25:41 +0100 Subject: [PATCH 143/201] packet: always probe for transport header [ Upstream commit 8fd6c80d9dd938ca338c70698533a7e304752846 ] We concluded that the skb_probe_transport_header() should better be called unconditionally. Avoiding the call into the flow dissector has also not really much to do with the direct xmit mode. While it seems that only virtio_net code makes use of GSO from non RX/TX ring packet socket paths, we should probe for a transport header nevertheless before they hit devices. Reference: http://thread.gmane.org/gmane.linux.network/386173/ Signed-off-by: Daniel Borkmann Acked-by: Jason Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index be038c91cf994..3059f518f298c 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2447,8 +2447,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, len = ((to_write > len_max) ? len_max : to_write); } - if (!packet_use_direct_xmit(po)) - skb_probe_transport_header(skb, 0); + skb_probe_transport_header(skb, 0); return tp_len; } @@ -2800,8 +2799,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) len += vnet_hdr_len; } - if (!packet_use_direct_xmit(po)) - skb_probe_transport_header(skb, reserve); + skb_probe_transport_header(skb, reserve); + if (unlikely(extra_len == 4)) skb->no_fcs = 1; From bbf14f8bb1041b5c11b9ccf998a075a03f04de3d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 11 Nov 2015 23:25:42 +0100 Subject: [PATCH 144/201] packet: only allow extra vlan len on ethernet devices [ Upstream commit 3c70c132488794e2489ab045559b0ce0afcf17de ] Packet sockets can be used by various net devices and are not really restricted to ARPHRD_ETHER device types. However, when currently checking for the extra 4 bytes that can be transmitted in VLAN case, our assumption is that we generally probe on ARPHRD_ETHER devices. Therefore, before looking into Ethernet header, check the device type first. This also fixes the issue where non-ARPHRD_ETHER devices could have no dev->hard_header_len in TX_RING SOCK_RAW case, and thus the check would test unfilled linear part of the skb (instead of non-linear). Fixes: 57f89bfa2140 ("network: Allow af_packet to transmit +4 bytes for VLAN packets.") Fixes: 52f1454f629f ("packet: allow to transmit +4 byte in TX_RING slot for VLAN case") Signed-off-by: Daniel Borkmann Acked-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 60 ++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 35 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 3059f518f298c..6e3cd2fe392ae 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1741,6 +1741,20 @@ static void fanout_release(struct sock *sk) kfree_rcu(po->rollover, rcu); } +static bool packet_extra_vlan_len_allowed(const struct net_device *dev, + struct sk_buff *skb) +{ + /* Earlier code assumed this would be a VLAN pkt, double-check + * this now that we have the actual packet in hand. We can only + * do this check on Ethernet devices. + */ + if (unlikely(dev->type != ARPHRD_ETHER)) + return false; + + skb_reset_mac_header(skb); + return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q)); +} + static const struct proto_ops packet_ops; static const struct proto_ops packet_ops_spkt; @@ -1902,18 +1916,10 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, goto retry; } - if (len > (dev->mtu + dev->hard_header_len + extra_len)) { - /* Earlier code assumed this would be a VLAN pkt, - * double-check this now that we have the actual - * packet in hand. - */ - struct ethhdr *ehdr; - skb_reset_mac_header(skb); - ehdr = eth_hdr(skb); - if (ehdr->h_proto != htons(ETH_P_8021Q)) { - err = -EMSGSIZE; - goto out_unlock; - } + if (len > (dev->mtu + dev->hard_header_len + extra_len) && + !packet_extra_vlan_len_allowed(dev, skb)) { + err = -EMSGSIZE; + goto out_unlock; } skb->protocol = proto; @@ -2525,18 +2531,10 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, addr, hlen); if (likely(tp_len >= 0) && - tp_len > dev->mtu + dev->hard_header_len) { - struct ethhdr *ehdr; - /* Earlier code assumed this would be a VLAN pkt, - * double-check this now that we have the actual - * packet in hand. - */ + tp_len > dev->mtu + dev->hard_header_len && + !packet_extra_vlan_len_allowed(dev, skb)) + tp_len = -EMSGSIZE; - skb_reset_mac_header(skb); - ehdr = eth_hdr(skb); - if (ehdr->h_proto != htons(ETH_P_8021Q)) - tp_len = -EMSGSIZE; - } if (unlikely(tp_len < 0)) { if (po->tp_loss) { __packet_set_status(po, ph, @@ -2757,18 +2755,10 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); - if (!gso_type && (len > dev->mtu + reserve + extra_len)) { - /* Earlier code assumed this would be a VLAN pkt, - * double-check this now that we have the actual - * packet in hand. - */ - struct ethhdr *ehdr; - skb_reset_mac_header(skb); - ehdr = eth_hdr(skb); - if (ehdr->h_proto != htons(ETH_P_8021Q)) { - err = -EMSGSIZE; - goto out_free; - } + if (!gso_type && (len > dev->mtu + reserve + extra_len) && + !packet_extra_vlan_len_allowed(dev, skb)) { + err = -EMSGSIZE; + goto out_free; } skb->protocol = proto; From 263c8f423ade0e7bd72ea82e36da3a458fac1e30 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 11 Nov 2015 23:25:43 +0100 Subject: [PATCH 145/201] packet: infer protocol from ethernet header if unset [ Upstream commit c72219b75fde768efccf7666342282fab7f9e4e7 ] In case no struct sockaddr_ll has been passed to packet socket's sendmsg() when doing a TX_RING flush run, then skb->protocol is set to po->num instead, which is the protocol passed via socket(2)/bind(2). Applications only xmitting can go the path of allocating the socket as socket(PF_PACKET, , 0) and do a bind(2) on the TX_RING with sll_protocol of 0. That way, register_prot_hook() is neither called on creation nor on bind time, which saves cycles when there's no interest in capturing anyway. That leaves us however with po->num 0 instead and therefore the TX_RING flush run sets skb->protocol to 0 as well. Eric reported that this leads to problems when using tools like trafgen over bonding device. I.e. the bonding's hash function could invoke the kernel's flow dissector, which depends on skb->protocol being properly set. In the current situation, all the traffic is then directed to a single slave. Fix it up by inferring skb->protocol from the Ethernet header when not set and we have ARPHRD_ETHER device type. This is only done in case of SOCK_RAW and where we have a dev->hard_header_len length. In case of ARPHRD_ETHER devices, this is guaranteed to cover ETH_HLEN, and therefore being accessed on the skb after the skb_store_bits(). Reported-by: Eric Dumazet Signed-off-by: Daniel Borkmann Acked-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 6e3cd2fe392ae..45d419656309d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2338,6 +2338,15 @@ static bool ll_header_truncated(const struct net_device *dev, int len) return false; } +static void tpacket_set_protocol(const struct net_device *dev, + struct sk_buff *skb) +{ + if (dev->type == ARPHRD_ETHER) { + skb_reset_mac_header(skb); + skb->protocol = eth_hdr(skb)->h_proto; + } +} + static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, void *frame, struct net_device *dev, int size_max, __be16 proto, unsigned char *addr, int hlen) @@ -2419,6 +2428,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, dev->hard_header_len); if (unlikely(err)) return err; + if (!skb->protocol) + tpacket_set_protocol(dev, skb); data += dev->hard_header_len; to_write -= dev->hard_header_len; From 377204f209fd51a24a359873ad44a97e4b5b72c8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 11 Nov 2015 23:25:44 +0100 Subject: [PATCH 146/201] packet: fix tpacket_snd max frame len [ Upstream commit 5cfb4c8d05b4409c4044cb9c05b19705c1d9818b ] Since it's introduction in commit 69e3c75f4d54 ("net: TX_RING and packet mmap"), TX_RING could be used from SOCK_DGRAM and SOCK_RAW side. When used with SOCK_DGRAM only, the size_max > dev->mtu + reserve check should have reserve as 0, but currently, this is unconditionally set (in it's original form as dev->hard_header_len). I think this is not correct since tpacket_fill_skb() would then take dev->mtu and dev->hard_header_len into account for SOCK_DGRAM, the extra VLAN_HLEN could be possible in both cases. Presumably, the reserve code was copied from packet_snd(), but later on missed the check. Make it similar as we have it in packet_snd(). Fixes: 69e3c75f4d54 ("net: TX_RING and packet mmap") Signed-off-by: Daniel Borkmann Acked-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/packet/af_packet.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 45d419656309d..4695a36eeca35 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2510,12 +2510,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) if (unlikely(!(dev->flags & IFF_UP))) goto out_put; - reserve = dev->hard_header_len + VLAN_HLEN; + if (po->sk.sk_socket->type == SOCK_RAW) + reserve = dev->hard_header_len; size_max = po->tx_ring.frame_size - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); - if (size_max > dev->mtu + reserve) - size_max = dev->mtu + reserve; + if (size_max > dev->mtu + reserve + VLAN_HLEN) + size_max = dev->mtu + reserve + VLAN_HLEN; do { ph = packet_current_frame(po, &po->tx_ring, @@ -2542,7 +2543,7 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, addr, hlen); if (likely(tp_len >= 0) && - tp_len > dev->mtu + dev->hard_header_len && + tp_len > dev->mtu + reserve && !packet_extra_vlan_len_allowed(dev, skb)) tp_len = -EMSGSIZE; From 176cec782335b95d7830bce4a7e6a1fe0b644594 Mon Sep 17 00:00:00 2001 From: lucien Date: Thu, 12 Nov 2015 13:07:07 +0800 Subject: [PATCH 147/201] sctp: translate host order to network order when setting a hmacid [ Upstream commit ed5a377d87dc4c87fb3e1f7f698cba38cd893103 ] now sctp auth cannot work well when setting a hmacid manually, which is caused by that we didn't use the network order for hmacid, so fix it by adding the transformation in sctp_auth_ep_set_hmacs. even we set hmacid with the network order in userspace, it still can't work, because of this condition in sctp_auth_ep_set_hmacs(): if (id > SCTP_AUTH_HMAC_ID_MAX) return -EOPNOTSUPP; so this wasn't working before and thus it won't break compatibility. Fixes: 65b07e5d0d09 ("[SCTP]: API updates to suport SCTP-AUTH extensions.") Signed-off-by: Xin Long Signed-off-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Acked-by: Vlad Yasevich Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/auth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 4f15b7d730e13..1543e39f47c33 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -809,8 +809,8 @@ int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep, if (!has_sha1) return -EINVAL; - memcpy(ep->auth_hmacs_list->hmac_ids, &hmacs->shmac_idents[0], - hmacs->shmac_num_idents * sizeof(__u16)); + for (i = 0; i < hmacs->shmac_num_idents; i++) + ep->auth_hmacs_list->hmac_ids[i] = htons(hmacs->shmac_idents[i]); ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) + hmacs->shmac_num_idents * sizeof(__u16)); return 0; From 713a813ed2fab144e436a9a3e64445a441d9846d Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 12 Nov 2015 19:35:26 +0200 Subject: [PATCH 148/201] net/mlx5e: Added self loopback prevention [ Upstream commit 66189961e986e53ae39822898fc2ce88f44c61bb ] Prevent outgoing multicast frames from looping back to the RX queue. By introducing new HW capability self_lb_en_modifiable, which indicates the support to modify self_lb_en bit in modify_tir command. When this capability is set we can prevent TIRs from sending back loopback multicast traffic to their own RQs, by "refreshing TIRs" with modify_tir command, on every time new channels (SQs/RQs) are created at device open. This is needed since TIRs are static and only allocated once on driver load, and the loopback decision is under their responsibility. Fixes issues of the kind: "IPv6: eth2: IPv6 duplicate address fe80::e61d:2dff:fe5c:f2e9 detected!" The issue is seen since the IPv6 solicitations multicast messages are loopedback and the network stack thinks they are coming from another host. Fixes: 5c50368f3831 ("net/mlx5e: Light-weight netdev open/stop") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 56 ++++++++++++++++++- include/linux/mlx5/mlx5_ifc.h | 24 ++++---- 2 files changed, 68 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 59874d666cfff..443632df20101 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1332,6 +1332,42 @@ static int mlx5e_modify_tir_lro(struct mlx5e_priv *priv, int tt) return err; } +static int mlx5e_refresh_tir_self_loopback_enable(struct mlx5_core_dev *mdev, + u32 tirn) +{ + void *in; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_tir_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); + + err = mlx5_core_modify_tir(mdev, tirn, in, inlen); + + kvfree(in); + + return err; +} + +static int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5e_priv *priv) +{ + int err; + int i; + + for (i = 0; i < MLX5E_NUM_TT; i++) { + err = mlx5e_refresh_tir_self_loopback_enable(priv->mdev, + priv->tirn[i]); + if (err) + return err; + } + + return 0; +} + static int mlx5e_set_dev_port_mtu(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -1367,13 +1403,20 @@ int mlx5e_open_locked(struct net_device *netdev) err = mlx5e_set_dev_port_mtu(netdev); if (err) - return err; + goto err_clear_state_opened_flag; err = mlx5e_open_channels(priv); if (err) { netdev_err(netdev, "%s: mlx5e_open_channels failed, %d\n", __func__, err); - return err; + goto err_clear_state_opened_flag; + } + + err = mlx5e_refresh_tirs_self_loopback_enable(priv); + if (err) { + netdev_err(netdev, "%s: mlx5e_refresh_tirs_self_loopback_enable failed, %d\n", + __func__, err); + goto err_close_channels; } mlx5e_update_carrier(priv); @@ -1382,6 +1425,12 @@ int mlx5e_open_locked(struct net_device *netdev) schedule_delayed_work(&priv->update_stats_work, 0); return 0; + +err_close_channels: + mlx5e_close_channels(priv); +err_clear_state_opened_flag: + clear_bit(MLX5E_STATE_OPENED, &priv->state); + return err; } static int mlx5e_open(struct net_device *netdev) @@ -1899,6 +1948,9 @@ static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) "Not creating net device, some required device capabilities are missing\n"); return -ENOTSUPP; } + if (!MLX5_CAP_ETH(mdev, self_lb_en_modifiable)) + mlx5_core_warn(mdev, "Self loop back prevention is not supported\n"); + return 0; } diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index dd2097455a2e3..1565324eb620c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -453,26 +453,28 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 lro_cap[0x1]; u8 lro_psh_flag[0x1]; u8 lro_time_stamp[0x1]; - u8 reserved_0[0x6]; + u8 reserved_0[0x3]; + u8 self_lb_en_modifiable[0x1]; + u8 reserved_1[0x2]; u8 max_lso_cap[0x5]; - u8 reserved_1[0x4]; + u8 reserved_2[0x4]; u8 rss_ind_tbl_cap[0x4]; - u8 reserved_2[0x3]; + u8 reserved_3[0x3]; u8 tunnel_lso_const_out_ip_id[0x1]; - u8 reserved_3[0x2]; + u8 reserved_4[0x2]; u8 tunnel_statless_gre[0x1]; u8 tunnel_stateless_vxlan[0x1]; - u8 reserved_4[0x20]; + u8 reserved_5[0x20]; - u8 reserved_5[0x10]; + u8 reserved_6[0x10]; u8 lro_min_mss_size[0x10]; - u8 reserved_6[0x120]; + u8 reserved_7[0x120]; u8 lro_timer_supported_periods[4][0x20]; - u8 reserved_7[0x600]; + u8 reserved_8[0x600]; }; struct mlx5_ifc_roce_cap_bits { @@ -4051,9 +4053,11 @@ struct mlx5_ifc_modify_tis_in_bits { }; struct mlx5_ifc_modify_tir_bitmask_bits { - u8 reserved[0x20]; + u8 reserved_0[0x20]; - u8 reserved1[0x1f]; + u8 reserved_1[0x1b]; + u8 self_lb_en[0x1]; + u8 reserved_2[0x3]; u8 lro[0x1]; }; From 289defb1e093535624376d7098c5364f8804fdc7 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 12 Nov 2015 19:35:29 +0200 Subject: [PATCH 149/201] net/mlx4_core: Fix sleeping while holding spinlock at rem_slave_counters [ Upstream commit f5adbfee72282bb1f456d52b04adacd4fe6ac502 ] When cleaning slave's counter resources, we hold a spinlock that protects the slave's counters list. As part of the clean, we call __mlx4_clear_if_stat which calls mlx4_alloc_cmd_mailbox which is a sleepable function. In order to fix this issue, hold the spinlock, and copy all counter indices into a temporary array, and release the spinlock. Afterwards, iterate over this array and free every counter. Repeat this scenario until the original list is empty (a new counter might have been added while releasing the counters from the temporary array). Fixes: b72ca7e96acf ("net/mlx4_core: Reset counters data when freed") Reported-by: Moni Shoua Tested-by: Moni Shoua Signed-off-by: Jack Morgenstein Signed-off-by: Eran Ben Elisha Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- .../ethernet/mellanox/mlx4/resource_tracker.c | 39 +++++++++++++------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 731423ca575da..8bead97373abd 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -4934,26 +4934,41 @@ static void rem_slave_counters(struct mlx4_dev *dev, int slave) struct res_counter *counter; struct res_counter *tmp; int err; - int index; + int *counters_arr = NULL; + int i, j; err = move_all_busy(dev, slave, RES_COUNTER); if (err) mlx4_warn(dev, "rem_slave_counters: Could not move all counters - too busy for slave %d\n", slave); - spin_lock_irq(mlx4_tlock(dev)); - list_for_each_entry_safe(counter, tmp, counter_list, com.list) { - if (counter->com.owner == slave) { - index = counter->com.res_id; - rb_erase(&counter->com.node, - &tracker->res_tree[RES_COUNTER]); - list_del(&counter->com.list); - kfree(counter); - __mlx4_counter_free(dev, index); + counters_arr = kmalloc_array(dev->caps.max_counters, + sizeof(*counters_arr), GFP_KERNEL); + if (!counters_arr) + return; + + do { + i = 0; + j = 0; + spin_lock_irq(mlx4_tlock(dev)); + list_for_each_entry_safe(counter, tmp, counter_list, com.list) { + if (counter->com.owner == slave) { + counters_arr[i++] = counter->com.res_id; + rb_erase(&counter->com.node, + &tracker->res_tree[RES_COUNTER]); + list_del(&counter->com.list); + kfree(counter); + } + } + spin_unlock_irq(mlx4_tlock(dev)); + + while (j < i) { + __mlx4_counter_free(dev, counters_arr[j++]); mlx4_release_resource(dev, slave, RES_COUNTER, 1, 0); } - } - spin_unlock_irq(mlx4_tlock(dev)); + } while (i); + + kfree(counters_arr); } static void rem_slave_xrcdns(struct mlx4_dev *dev, int slave) From 5c81f50e7fe60ed1d0820820df63f9193c6d5d0b Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 12 Nov 2015 17:35:58 +0100 Subject: [PATCH 150/201] ip_tunnel: disable preemption when updating per-cpu tstats [ Upstream commit b4fe85f9c9146f60457e9512fb6055e69e6a7a65 ] Drivers like vxlan use the recently introduced udp_tunnel_xmit_skb/udp_tunnel6_xmit_skb APIs. udp_tunnel6_xmit_skb makes use of ip6tunnel_xmit, and ip6tunnel_xmit, after sending the packet, updates the struct stats using the usual u64_stats_update_begin/end calls on this_cpu_ptr(dev->tstats). udp_tunnel_xmit_skb makes use of iptunnel_xmit, which doesn't touch tstats, so drivers like vxlan, immediately after, call iptunnel_xmit_stats, which does the same thing - calls u64_stats_update_begin/end on this_cpu_ptr(dev->tstats). While vxlan is probably fine (I don't know?), calling a similar function from, say, an unbound workqueue, on a fully preemptable kernel causes real issues: [ 188.434537] BUG: using smp_processor_id() in preemptible [00000000] code: kworker/u8:0/6 [ 188.435579] caller is debug_smp_processor_id+0x17/0x20 [ 188.435583] CPU: 0 PID: 6 Comm: kworker/u8:0 Not tainted 4.2.6 #2 [ 188.435607] Call Trace: [ 188.435611] [] dump_stack+0x4f/0x7b [ 188.435615] [] check_preemption_disabled+0x19d/0x1c0 [ 188.435619] [] debug_smp_processor_id+0x17/0x20 The solution would be to protect the whole this_cpu_ptr(dev->tstats)/u64_stats_update_begin/end blocks with disabling preemption and then reenabling it. Signed-off-by: Jason A. Donenfeld Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ip6_tunnel.h | 3 ++- include/net/ip_tunnels.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index fa915fa0f703d..d49a8f8fae459 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -90,11 +90,12 @@ static inline void ip6tunnel_xmit(struct sock *sk, struct sk_buff *skb, err = ip6_local_out_sk(sk, skb); if (net_xmit_eval(err) == 0) { - struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); + struct pcpu_sw_netstats *tstats = get_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); tstats->tx_bytes += pkt_len; tstats->tx_packets++; u64_stats_update_end(&tstats->syncp); + put_cpu_ptr(tstats); } else { stats->tx_errors++; stats->tx_aborted_errors++; diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index f6dafec9102c5..62a750a6a8f8c 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -287,12 +287,13 @@ static inline void iptunnel_xmit_stats(int err, struct pcpu_sw_netstats __percpu *stats) { if (err > 0) { - struct pcpu_sw_netstats *tstats = this_cpu_ptr(stats); + struct pcpu_sw_netstats *tstats = get_cpu_ptr(stats); u64_stats_update_begin(&tstats->syncp); tstats->tx_bytes += err; tstats->tx_packets++; u64_stats_update_end(&tstats->syncp); + put_cpu_ptr(tstats); } else if (err < 0) { err_stats->tx_errors++; err_stats->tx_aborted_errors++; From 5eceaad8488b24f98ada6d5dfd5a6ec2df875d07 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 16 Nov 2015 10:52:48 +0100 Subject: [PATCH 151/201] net: switchdev: fix return code of fdb_dump stub [ Upstream commit 24cb7055a3066634a0f3fa0cd6a4780652905d35 ] rtnl_fdb_dump always expects an index to be returned by the ndo_fdb_dump op, but when CONFIG_NET_SWITCHDEV is off, it returns an error. Fix that by returning the given unmodified idx. A similar fix was 0890cf6cb6ab ("switchdev: fix return value of switchdev_port_fdb_dump in case of error") but for the CONFIG_NET_SWITCHDEV=y case. Fixes: 45d4122ca7cd ("switchdev: add support for fdb add/del/dump via switchdev_port_obj ops.") Signed-off-by: Dragos Tatulea Acked-by: Jiri Pirko Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/switchdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 319baab3b48ef..731c40e34bf25 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -272,7 +272,7 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb, struct net_device *filter_dev, int idx) { - return -EOPNOTSUPP; + return idx; } static inline void switchdev_port_fwd_mark_set(struct net_device *dev, From ce2ceca37c846767abceee497f8adbb3328a8dd9 Mon Sep 17 00:00:00 2001 From: Pavel Fedin Date: Mon, 16 Nov 2015 17:51:34 +0300 Subject: [PATCH 152/201] net: thunder: Check for driver data in nicvf_remove() [ Upstream commit 7750130d93decff06120df0d8ea024ff8a038a21 ] In some cases the crash is caused by nicvf_remove() being called from outside. For example, if we try to feed the device to vfio after the probe has failed for some reason. So, move the check to better place. Signed-off-by: Pavel Fedin Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index a9377727c11c3..7f709cbdcd87d 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -1583,8 +1583,14 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) static void nicvf_remove(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); - struct nicvf *nic = netdev_priv(netdev); - struct net_device *pnetdev = nic->pnicvf->netdev; + struct nicvf *nic; + struct net_device *pnetdev; + + if (!netdev) + return; + + nic = netdev_priv(netdev); + pnetdev = nic->pnicvf->netdev; /* Check if this Qset is assigned to different VF. * If yes, clean primary and all secondary Qsets. From 3f7a8d274e209d0e5dba464618a78c37ec00e899 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Mon, 16 Nov 2015 13:09:10 -0500 Subject: [PATCH 153/201] snmp: Remove duplicate OUTMCAST stat increment [ Upstream commit 41033f029e393a64e81966cbe34d66c6cf8a2e7e ] the OUTMCAST stat is double incremented, getting bumped once in the mcast code itself, and again in the common ip output path. Remove the mcast bump, as its not needed Validated by the reporter, with good results Signed-off-by: Neil Horman Reported-by: Claus Jensen CC: Claus Jensen CC: David Miller Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/mcast.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 083b2927fc67a..41e3b5ee8d0b6 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1651,7 +1651,6 @@ static void mld_sendpack(struct sk_buff *skb) if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); - IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len); } else { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); } @@ -2014,7 +2013,6 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, type); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); - IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, full_len); } else IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); From d603fa768b63905317e9cf2cfb647ddff03d9715 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 18 Nov 2015 16:40:19 +0100 Subject: [PATCH 154/201] net/ip6_tunnel: fix dst leak [ Upstream commit 206b49500df558dbc15d8836b09f6397ec5ed8bb ] the commit cdf3464e6c6b ("ipv6: Fix dst_entry refcnt bugs in ip6_tunnel") introduced percpu storage for ip6_tunnel dst cache, but while clearing such cache it used raw_cpu_ptr to walk the per cpu entries, so cached dst on non current cpu are not actually reset. This patch replaces raw_cpu_ptr with per_cpu_ptr, properly cleaning such storage. Fixes: cdf3464e6c6b ("ipv6: Fix dst_entry refcnt bugs in ip6_tunnel") Signed-off-by: Paolo Abeni Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6_tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index eabffbb89795d..137fca42aaa6b 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -177,7 +177,7 @@ void ip6_tnl_dst_reset(struct ip6_tnl *t) int i; for_each_possible_cpu(i) - ip6_tnl_per_cpu_dst_set(raw_cpu_ptr(t->dst_cache), NULL); + ip6_tnl_per_cpu_dst_set(per_cpu_ptr(t->dst_cache, i), NULL); } EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset); From e8afa3cd3e6dfe713e0d7f8ca8062aea50a14725 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 18 Nov 2015 21:13:07 +0100 Subject: [PATCH 155/201] net: qmi_wwan: add XS Stick W100-2 from 4G Systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit 68242a5a1e2edce39b069385cbafb82304eac0f1 ] Thomas reports " 4gsystems sells two total different LTE-surfsticks under the same name. .. The newer version of XS Stick W100 is from "omega" .. Under windows the driver switches to the same ID, and uses MI03\6 for network and MI01\6 for modem. .. echo "1c9e 9b01" > /sys/bus/usb/drivers/qmi_wwan/new_id echo "1c9e 9b01" > /sys/bus/usb-serial/drivers/option1/new_id T: Bus=01 Lev=01 Prnt=01 Port=03 Cnt=01 Dev#= 4 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=1c9e ProdID=9b01 Rev=02.32 S: Manufacturer=USB Modem S: Product=USB Modem S: SerialNumber= C: #Ifs= 5 Cfg#= 1 Atr=80 MxPwr=500mA I: If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=option I: If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=qmi_wwan I: If#= 4 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=usb-storage Now all important things are there: wwp0s29f7u2i3 (net), ttyUSB2 (at), cdc-wdm0 (qmi), ttyUSB1 (at) There is also ttyUSB0, but it is not usable, at least not for at. The device works well with qmi and ModemManager-NetworkManager. " Reported-by: Thomas Schäfer Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 2a7c1be23c4f2..66e0853d1680d 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -775,6 +775,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x2357, 0x9000, 4)}, /* TP-LINK MA260 */ {QMI_FIXED_INTF(0x1bc7, 0x1200, 5)}, /* Telit LE920 */ {QMI_FIXED_INTF(0x1bc7, 0x1201, 2)}, /* Telit LE920 */ + {QMI_FIXED_INTF(0x1c9e, 0x9b01, 3)}, /* XS Stick W100-2 from 4G Systems */ {QMI_FIXED_INTF(0x0b3c, 0xc000, 4)}, /* Olivetti Olicard 100 */ {QMI_FIXED_INTF(0x0b3c, 0xc001, 4)}, /* Olivetti Olicard 120 */ {QMI_FIXED_INTF(0x0b3c, 0xc002, 4)}, /* Olivetti Olicard 140 */ From 4ef5bc914bbd40811ebc0514d84175d303fb8b4f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 12:40:13 -0800 Subject: [PATCH 156/201] tcp: md5: fix lockdep annotation [ Upstream commit 1b8e6a01e19f001e9f93b39c32387961c91ed3cc ] When a passive TCP is created, we eventually call tcp_md5_do_add() with sk pointing to the child. It is not owner by the user yet (we will add this socket into listener accept queue a bit later anyway) But we do own the spinlock, so amend the lockdep annotation to avoid following splat : [ 8451.090932] net/ipv4/tcp_ipv4.c:923 suspicious rcu_dereference_protected() usage! [ 8451.090932] [ 8451.090932] other info that might help us debug this: [ 8451.090932] [ 8451.090934] [ 8451.090934] rcu_scheduler_active = 1, debug_locks = 1 [ 8451.090936] 3 locks held by socket_sockopt_/214795: [ 8451.090936] #0: (rcu_read_lock){.+.+..}, at: [] __netif_receive_skb_core+0x151/0xe90 [ 8451.090947] #1: (rcu_read_lock){.+.+..}, at: [] ip_local_deliver_finish+0x43/0x2b0 [ 8451.090952] #2: (slock-AF_INET){+.-...}, at: [] sk_clone_lock+0x1c5/0x500 [ 8451.090958] [ 8451.090958] stack backtrace: [ 8451.090960] CPU: 7 PID: 214795 Comm: socket_sockopt_ [ 8451.091215] Call Trace: [ 8451.091216] [] dump_stack+0x55/0x76 [ 8451.091229] [] lockdep_rcu_suspicious+0xeb/0x110 [ 8451.091235] [] tcp_md5_do_add+0x1bf/0x1e0 [ 8451.091239] [] tcp_v4_syn_recv_sock+0x1f1/0x4c0 [ 8451.091242] [] ? tcp_v4_md5_hash_skb+0x167/0x190 [ 8451.091246] [] tcp_check_req+0x3c8/0x500 [ 8451.091249] [] ? tcp_v4_inbound_md5_hash+0x11e/0x190 [ 8451.091253] [] tcp_v4_rcv+0x3c0/0x9f0 [ 8451.091256] [] ? ip_local_deliver_finish+0x43/0x2b0 [ 8451.091260] [] ip_local_deliver_finish+0xb6/0x2b0 [ 8451.091263] [] ? ip_local_deliver_finish+0x43/0x2b0 [ 8451.091267] [] ip_local_deliver+0x48/0x80 [ 8451.091270] [] ip_rcv_finish+0x160/0x700 [ 8451.091273] [] ip_rcv+0x29e/0x3d0 [ 8451.091277] [] __netif_receive_skb_core+0xb47/0xe90 Fixes: a8afca0329988 ("tcp: md5: protects md5sig_info with RCU") Signed-off-by: Eric Dumazet Reported-by: Willem de Bruijn Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_ipv4.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 93898e093d4e6..a7739c83aa84a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -922,7 +922,8 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, } md5sig = rcu_dereference_protected(tp->md5sig_info, - sock_owned_by_user(sk)); + sock_owned_by_user(sk) || + lockdep_is_held(&sk->sk_lock.slock)); if (!md5sig) { md5sig = kmalloc(sizeof(*md5sig), gfp); if (!md5sig) From 6f5aa0f4aa406332521bde78e2b6290886f3961a Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 18 Nov 2015 18:17:30 -0800 Subject: [PATCH 157/201] tcp: disable Fast Open on timeouts after handshake [ Upstream commit 0e45f4da5981895e885dd72fe912a3f8e32bae73 ] Some middle-boxes black-hole the data after the Fast Open handshake (https://www.ietf.org/proceedings/94/slides/slides-94-tcpm-13.pdf). The exact reason is unknown. The work-around is to disable Fast Open temporarily after multiple recurring timeouts with few or no data delivered in the established state. Signed-off-by: Yuchung Cheng Signed-off-by: Eric Dumazet Reported-by: Christoph Paasch Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_timer.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 7149ebc820c7d..04f0a052b524d 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -176,6 +176,18 @@ static int tcp_write_timeout(struct sock *sk) syn_set = true; } else { if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) { + /* Some middle-boxes may black-hole Fast Open _after_ + * the handshake. Therefore we conservatively disable + * Fast Open on this path on recurring timeouts with + * few or zero bytes acked after Fast Open. + */ + if (tp->syn_data_acked && + tp->bytes_acked <= tp->rx_opt.mss_clamp) { + tcp_fastopen_cache_set(sk, 0, NULL, true, 0); + if (icsk->icsk_retransmits == sysctl_tcp_retries1) + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPFASTOPENACTIVEFAIL); + } /* Black hole detection */ tcp_mtu_probing(icsk, sk); From 11617ee108ae5bccaf3064d2304b8608874c85f9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Nov 2015 21:03:33 -0800 Subject: [PATCH 158/201] tcp: fix potential huge kmalloc() calls in TCP_REPAIR [ Upstream commit 5d4c9bfbabdb1d497f21afd81501e5c54b0c85d9 ] tcp_send_rcvq() is used for re-injecting data into tcp receive queue. Problems : - No check against size is performed, allowed user to fool kernel in attempting very large memory allocations, eventually triggering OOM when memory is fragmented. - In case of fault during the copy we do not return correct errno. Lets use alloc_skb_with_frags() to cook optimal skbs. Fixes: 292e8d8c8538 ("tcp: Move rcvq sending to tcp_input.c") Fixes: c0e88ff0f256 ("tcp: Repair socket queues") Signed-off-by: Eric Dumazet Cc: Pavel Emelyanov Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a8f515bb19c4b..cc6bd43f7f89f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4457,19 +4457,34 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) { struct sk_buff *skb; + int err = -ENOMEM; + int data_len = 0; bool fragstolen; if (size == 0) return 0; - skb = alloc_skb(size, sk->sk_allocation); + if (size > PAGE_SIZE) { + int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS); + + data_len = npages << PAGE_SHIFT; + size = data_len + (size & ~PAGE_MASK); + } + skb = alloc_skb_with_frags(size - data_len, data_len, + PAGE_ALLOC_COSTLY_ORDER, + &err, sk->sk_allocation); if (!skb) goto err; + skb_put(skb, size - data_len); + skb->data_len = data_len; + skb->len = size; + if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) goto err_free; - if (memcpy_from_msg(skb_put(skb, size), msg, size)) + err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size); + if (err) goto err_free; TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt; @@ -4485,7 +4500,8 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size) err_free: kfree_skb(skb); err: - return -ENOMEM; + return err; + } static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) From b2d8d14ba2c37e3ff2d1267d4a32a32365859b8b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Nov 2015 08:18:14 -0800 Subject: [PATCH 159/201] tcp: initialize tp->copied_seq in case of cross SYN connection [ Upstream commit 142a2e7ece8d8ac0e818eb2c91f99ca894730e2a ] Dmitry provided a syzkaller (http://github.com/google/syzkaller) generated program that triggers the WARNING at net/ipv4/tcp.c:1729 in tcp_recvmsg() : WARN_ON(tp->copied_seq != tp->rcv_nxt && !(flags & (MSG_PEEK | MSG_TRUNC))); His program is specifically attempting a Cross SYN TCP exchange, that we support (for the pleasure of hackers ?), but it looks we lack proper tcp->copied_seq initialization. Thanks again Dmitry for your report and testings. Signed-off-by: Eric Dumazet Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/tcp_input.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cc6bd43f7f89f..0a2b61dbcd4e8 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5659,6 +5659,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, } tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; + tp->copied_seq = tp->rcv_nxt; tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1; /* RFC1323: The window in SYN & SYN/ACK segments is From 609d60122e815cdad20f887de8d2d60d989ef2cb Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 20 Nov 2015 00:11:56 +0100 Subject: [PATCH 160/201] net, scm: fix PaX detected msg_controllen overflow in scm_detach_fds [ Upstream commit 6900317f5eff0a7070c5936e5383f589e0de7a09 ] David and HacKurx reported a following/similar size overflow triggered in a grsecurity kernel, thanks to PaX's gcc size overflow plugin: (Already fixed in later grsecurity versions by Brad and PaX Team.) [ 1002.296137] PAX: size overflow detected in function scm_detach_fds net/core/scm.c:314 cicus.202_127 min, count: 4, decl: msg_controllen; num: 0; context: msghdr; [ 1002.296145] CPU: 0 PID: 3685 Comm: scm_rights_recv Not tainted 4.2.3-grsec+ #7 [ 1002.296149] Hardware name: Apple Inc. MacBookAir5,1/Mac-66F35F19FE2A0D05, [...] [ 1002.296153] ffffffff81c27366 0000000000000000 ffffffff81c27375 ffffc90007843aa8 [ 1002.296162] ffffffff818129ba 0000000000000000 ffffffff81c27366 ffffc90007843ad8 [ 1002.296169] ffffffff8121f838 fffffffffffffffc fffffffffffffffc ffffc90007843e60 [ 1002.296176] Call Trace: [ 1002.296190] [] dump_stack+0x45/0x57 [ 1002.296200] [] report_size_overflow+0x38/0x60 [ 1002.296209] [] scm_detach_fds+0x2ce/0x300 [ 1002.296220] [] unix_stream_read_generic+0x609/0x930 [ 1002.296228] [] unix_stream_recvmsg+0x4f/0x60 [ 1002.296236] [] ? unix_set_peek_off+0x50/0x50 [ 1002.296243] [] sock_recvmsg+0x47/0x60 [ 1002.296248] [] ___sys_recvmsg+0xe2/0x1e0 [ 1002.296257] [] __sys_recvmsg+0x46/0x80 [ 1002.296263] [] SyS_recvmsg+0x2c/0x40 [ 1002.296271] [] entry_SYSCALL_64_fastpath+0x12/0x85 Further investigation showed that this can happen when an *odd* number of fds are being passed over AF_UNIX sockets. In these cases CMSG_LEN(i * sizeof(int)) and CMSG_SPACE(i * sizeof(int)), where i is the number of successfully passed fds, differ by 4 bytes due to the extra CMSG_ALIGN() padding in CMSG_SPACE() to an 8 byte boundary on 64 bit. The padding is used to align subsequent cmsg headers in the control buffer. When the control buffer passed in from the receiver side *lacks* these 4 bytes (e.g. due to buggy/wrong API usage), then msg->msg_controllen will overflow in scm_detach_fds(): int cmlen = CMSG_LEN(i * sizeof(int)); <--- cmlen w/o tail-padding err = put_user(SOL_SOCKET, &cm->cmsg_level); if (!err) err = put_user(SCM_RIGHTS, &cm->cmsg_type); if (!err) err = put_user(cmlen, &cm->cmsg_len); if (!err) { cmlen = CMSG_SPACE(i * sizeof(int)); <--- cmlen w/ 4 byte extra tail-padding msg->msg_control += cmlen; msg->msg_controllen -= cmlen; <--- iff no tail-padding space here ... } ... wrap-around F.e. it will wrap to a length of 18446744073709551612 bytes in case the receiver passed in msg->msg_controllen of 20 bytes, and the sender properly transferred 1 fd to the receiver, so that its CMSG_LEN results in 20 bytes and CMSG_SPACE in 24 bytes. In case of MSG_CMSG_COMPAT (scm_detach_fds_compat()), I haven't seen an issue in my tests as alignment seems always on 4 byte boundary. Same should be in case of native 32 bit, where we end up with 4 byte boundaries as well. In practice, passing msg->msg_controllen of 20 to recvmsg() while receiving a single fd would mean that on successful return, msg->msg_controllen is being set by the kernel to 24 bytes instead, thus more than the input buffer advertised. It could f.e. become an issue if such application later on zeroes or copies the control buffer based on the returned msg->msg_controllen elsewhere. Maximum number of fds we can send is a hard upper limit SCM_MAX_FD (253). Going over the code, it seems like msg->msg_controllen is not being read after scm_detach_fds() in scm_recv() anymore by the kernel, good! Relevant recvmsg() handler are unix_dgram_recvmsg() (unix_seqpacket_recvmsg()) and unix_stream_recvmsg(). Both return back to their recvmsg() caller, and ___sys_recvmsg() places the updated length, that is, new msg_control - old msg_control pointer into msg->msg_controllen (hence the 24 bytes seen in the example). Long time ago, Wei Yongjun fixed something related in commit 1ac70e7ad24a ("[NET]: Fix function put_cmsg() which may cause usr application memory overflow"). RFC3542, section 20.2. says: The fields shown as "XX" are possible padding, between the cmsghdr structure and the data, and between the data and the next cmsghdr structure, if required by the implementation. While sending an application may or may not include padding at the end of last ancillary data in msg_controllen and implementations must accept both as valid. On receiving a portable application must provide space for padding at the end of the last ancillary data as implementations may copy out the padding at the end of the control message buffer and include it in the received msg_controllen. When recvmsg() is called if msg_controllen is too small for all the ancillary data items including any trailing padding after the last item an implementation may set MSG_CTRUNC. Since we didn't place MSG_CTRUNC for already quite a long time, just do the same as in 1ac70e7ad24a to avoid an overflow. Btw, even man-page author got this wrong :/ See db939c9b26e9 ("cmsg.3: Fix error in SCM_RIGHTS code sample"). Some people must have copied this (?), thus it got triggered in the wild (reported several times during boot by David and HacKurx). No Fixes tag this time as pre 2002 (that is, pre history tree). Reported-by: David Sterba Reported-by: HacKurx Cc: PaX Team Cc: Emese Revfy Cc: Brad Spengler Cc: Wei Yongjun Cc: Eric Dumazet Reviewed-by: Hannes Frederic Sowa Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/scm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/scm.c b/net/core/scm.c index 3b6899b7d810d..8a1741b14302b 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -305,6 +305,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm) err = put_user(cmlen, &cm->cmsg_len); if (!err) { cmlen = CMSG_SPACE(i*sizeof(int)); + if (msg->msg_controllen < cmlen) + cmlen = msg->msg_controllen; msg->msg_control += cmlen; msg->msg_controllen -= cmlen; } From 47f706657d85e1fbfb2e5ed8c7b90ff880bd4d6e Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 20 Nov 2015 13:54:19 +0100 Subject: [PATCH 161/201] net: ipmr: fix static mfc/dev leaks on table destruction [ Upstream commit 0e615e9601a15efeeb8942cf7cd4dadba0c8c5a7 ] When destroying an mrt table the static mfc entries and the static devices are kept, which leads to devices that can never be destroyed (because of refcnt taken) and leaked memory, for example: unreferenced object 0xffff880034c144c0 (size 192): comm "mfc-broken", pid 4777, jiffies 4320349055 (age 46001.964s) hex dump (first 32 bytes): 98 53 f0 34 00 88 ff ff 98 53 f0 34 00 88 ff ff .S.4.....S.4.... ef 0a 0a 14 01 02 03 04 00 00 00 00 01 00 00 00 ................ backtrace: [] kmemleak_alloc+0x4e/0xb0 [] kmem_cache_alloc+0x190/0x300 [] ip_mroute_setsockopt+0x5cb/0x910 [] do_ip_setsockopt.isra.11+0x105/0xff0 [] ip_setsockopt+0x30/0xa0 [] raw_setsockopt+0x33/0x90 [] sock_common_setsockopt+0x14/0x20 [] SyS_setsockopt+0x71/0xc0 [] entry_SYSCALL_64_fastpath+0x16/0x7a [] 0xffffffffffffffff Make sure that everything is cleaned on netns destruction. Signed-off-by: Nikolay Aleksandrov Reviewed-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/ipmr.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8e8203d5c520a..ef7e2c4342cbb 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -134,7 +134,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, int cmd); -static void mroute_clean_tables(struct mr_table *mrt); +static void mroute_clean_tables(struct mr_table *mrt, bool all); static void ipmr_expire_process(unsigned long arg); #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES @@ -350,7 +350,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) static void ipmr_free_table(struct mr_table *mrt) { del_timer_sync(&mrt->ipmr_expire_timer); - mroute_clean_tables(mrt); + mroute_clean_tables(mrt, true); kfree(mrt); } @@ -1208,7 +1208,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, * Close the multicast socket, and clear the vif tables etc */ -static void mroute_clean_tables(struct mr_table *mrt) +static void mroute_clean_tables(struct mr_table *mrt, bool all) { int i; LIST_HEAD(list); @@ -1217,8 +1217,9 @@ static void mroute_clean_tables(struct mr_table *mrt) /* Shut down all active vif entries */ for (i = 0; i < mrt->maxvif; i++) { - if (!(mrt->vif_table[i].flags & VIFF_STATIC)) - vif_delete(mrt, i, 0, &list); + if (!all && (mrt->vif_table[i].flags & VIFF_STATIC)) + continue; + vif_delete(mrt, i, 0, &list); } unregister_netdevice_many(&list); @@ -1226,7 +1227,7 @@ static void mroute_clean_tables(struct mr_table *mrt) for (i = 0; i < MFC_LINES; i++) { list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) { - if (c->mfc_flags & MFC_STATIC) + if (!all && (c->mfc_flags & MFC_STATIC)) continue; list_del_rcu(&c->list); mroute_netlink_event(mrt, c, RTM_DELROUTE); @@ -1261,7 +1262,7 @@ static void mrtsock_destruct(struct sock *sk) NETCONFA_IFINDEX_ALL, net->ipv4.devconf_all); RCU_INIT_POINTER(mrt->mroute_sk, NULL); - mroute_clean_tables(mrt); + mroute_clean_tables(mrt, false); } } rtnl_unlock(); From 4eb5e5c531d96b2397ad2b02fc5a496307b2e143 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Fri, 20 Nov 2015 13:54:20 +0100 Subject: [PATCH 162/201] net: ip6mr: fix static mfc/dev leaks on table destruction [ Upstream commit 4c6980462f32b4f282c5d8e5f7ea8070e2937725 ] Similar to ipv4, when destroying an mrt table the static mfc entries and the static devices are kept, which leads to devices that can never be destroyed (because of refcnt taken) and leaked memory. Make sure that everything is cleaned up on netns destruction. Fixes: 8229efdaef1e ("netns: ip6mr: enable namespace support in ipv6 multicast forwarding code") CC: Benjamin Thery Signed-off-by: Nikolay Aleksandrov Reviewed-by: Cong Wang Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv6/ip6mr.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 0e004cc42a22b..35eee72ab4af9 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -118,7 +118,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc, int cmd); static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb); -static void mroute_clean_tables(struct mr6_table *mrt); +static void mroute_clean_tables(struct mr6_table *mrt, bool all); static void ipmr_expire_process(unsigned long arg); #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES @@ -334,7 +334,7 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id) static void ip6mr_free_table(struct mr6_table *mrt) { del_timer_sync(&mrt->ipmr_expire_timer); - mroute_clean_tables(mrt); + mroute_clean_tables(mrt, true); kfree(mrt); } @@ -1542,7 +1542,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, * Close the multicast socket, and clear the vif tables etc */ -static void mroute_clean_tables(struct mr6_table *mrt) +static void mroute_clean_tables(struct mr6_table *mrt, bool all) { int i; LIST_HEAD(list); @@ -1552,8 +1552,9 @@ static void mroute_clean_tables(struct mr6_table *mrt) * Shut down all active vif entries */ for (i = 0; i < mrt->maxvif; i++) { - if (!(mrt->vif6_table[i].flags & VIFF_STATIC)) - mif6_delete(mrt, i, &list); + if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC)) + continue; + mif6_delete(mrt, i, &list); } unregister_netdevice_many(&list); @@ -1562,7 +1563,7 @@ static void mroute_clean_tables(struct mr6_table *mrt) */ for (i = 0; i < MFC6_LINES; i++) { list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) { - if (c->mfc_flags & MFC_STATIC) + if (!all && (c->mfc_flags & MFC_STATIC)) continue; write_lock_bh(&mrt_lock); list_del(&c->list); @@ -1625,7 +1626,7 @@ int ip6mr_sk_done(struct sock *sk) net->ipv6.devconf_all); write_unlock_bh(&mrt_lock); - mroute_clean_tables(mrt); + mroute_clean_tables(mrt, false); err = 0; break; } From b3abad339f8e268bb261e5844ab68b18a7797c29 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Sat, 21 Nov 2015 19:46:19 +0100 Subject: [PATCH 163/201] vrf: fix double free and memory corruption on register_netdevice failure [ Upstream commit 7f109f7cc37108cba7243bc832988525b0d85909 ] When vrf's ->newlink is called, if register_netdevice() fails then it does free_netdev(), but that's also done by rtnl_newlink() so a second free happens and memory gets corrupted, to reproduce execute the following line a couple of times (1 - 5 usually is enough): $ for i in `seq 1 5`; do ip link add vrf: type vrf table 1; done; This works because we fail in register_netdevice() because of the wrong name "vrf:". And here's a trace of one crash: [ 28.792157] ------------[ cut here ]------------ [ 28.792407] kernel BUG at fs/namei.c:246! [ 28.792608] invalid opcode: 0000 [#1] SMP [ 28.793240] Modules linked in: vrf nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace sunrpc crct10dif_pclmul crc32_pclmul crc32c_intel qxl drm_kms_helper ttm drm aesni_intel aes_x86_64 psmouse glue_helper lrw evdev gf128mul i2c_piix4 ablk_helper cryptd ppdev parport_pc parport serio_raw pcspkr virtio_balloon virtio_console i2c_core acpi_cpufreq button 9pnet_virtio 9p 9pnet fscache ipv6 autofs4 ext4 crc16 mbcache jbd2 virtio_blk virtio_net sg sr_mod cdrom ata_generic ehci_pci uhci_hcd ehci_hcd e1000 usbcore usb_common ata_piix libata virtio_pci virtio_ring virtio scsi_mod floppy [ 28.796016] CPU: 0 PID: 1148 Comm: ld-linux-x86-64 Not tainted 4.4.0-rc1+ #24 [ 28.796016] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.8.1-20150318_183358- 04/01/2014 [ 28.796016] task: ffff8800352561c0 ti: ffff88003592c000 task.ti: ffff88003592c000 [ 28.796016] RIP: 0010:[] [] putname+0x43/0x60 [ 28.796016] RSP: 0018:ffff88003592fe88 EFLAGS: 00010246 [ 28.796016] RAX: 0000000000000000 RBX: ffff8800352561c0 RCX: 0000000000000001 [ 28.796016] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88003784f000 [ 28.796016] RBP: ffff88003592ff08 R08: 0000000000000001 R09: 0000000000000000 [ 28.796016] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 [ 28.796016] R13: 000000000000047c R14: ffff88003784f000 R15: ffff8800358c4a00 [ 28.796016] FS: 0000000000000000(0000) GS:ffff88003fc00000(0000) knlGS:0000000000000000 [ 28.796016] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 28.796016] CR2: 00007ffd583bc2d9 CR3: 0000000035a99000 CR4: 00000000000406f0 [ 28.796016] Stack: [ 28.796016] ffffffff8121045d ffffffff812102d3 ffff8800352561c0 ffff880035a91660 [ 28.796016] ffff8800008a9880 0000000000000000 ffffffff81a49940 00ffffff81218684 [ 28.796016] ffff8800352561c0 000000000000047c 0000000000000000 ffff880035b36d80 [ 28.796016] Call Trace: [ 28.796016] [] ? do_execveat_common.isra.34+0x74d/0x930 [ 28.796016] [] ? do_execveat_common.isra.34+0x5c3/0x930 [ 28.796016] [] do_execve+0x2c/0x30 [ 28.796016] [] call_usermodehelper_exec_async+0xf0/0x140 [ 28.796016] [] ? umh_complete+0x40/0x40 [ 28.796016] [] ret_from_fork+0x3f/0x70 [ 28.796016] Code: 48 8d 47 1c 48 89 e5 53 48 8b 37 48 89 fb 48 39 c6 74 1a 48 8b 3d 7e e9 8f 00 e8 49 fa fc ff 48 89 df e8 f1 01 fd ff 5b 5d f3 c3 <0f> 0b 48 89 fe 48 8b 3d 61 e9 8f 00 e8 2c fa fc ff 5b 5d eb e9 [ 28.796016] RIP [] putname+0x43/0x60 [ 28.796016] RSP Fixes: 193125dbd8eb ("net: Introduce VRF device driver") Signed-off-by: Nikolay Aleksandrov Acked-by: David Ahern Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/vrf.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 488c6f50df736..c9e309cd9d82d 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -581,7 +581,6 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev, { struct net_vrf *vrf = netdev_priv(dev); struct net_vrf_dev *vrf_ptr; - int err; if (!data || !data[IFLA_VRF_TABLE]) return -EINVAL; @@ -590,26 +589,16 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev, dev->priv_flags |= IFF_VRF_MASTER; - err = -ENOMEM; vrf_ptr = kmalloc(sizeof(*dev->vrf_ptr), GFP_KERNEL); if (!vrf_ptr) - goto out_fail; + return -ENOMEM; vrf_ptr->ifindex = dev->ifindex; vrf_ptr->tb_id = vrf->tb_id; - err = register_netdevice(dev); - if (err < 0) - goto out_fail; - rcu_assign_pointer(dev->vrf_ptr, vrf_ptr); - return 0; - -out_fail: - kfree(vrf_ptr); - free_netdev(dev); - return err; + return register_netdev(dev); } static size_t vrf_nl_getsize(const struct net_device *dev) From 309ef6d135b8d7baeb06afdea48e611bb16c9ee0 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 22 Nov 2015 01:08:54 +0200 Subject: [PATCH 164/201] broadcom: fix PHY_ID_BCM5481 entry in the id table [ Upstream commit 3c25a860d17b7378822f35d8c9141db9507e3beb ] Commit fcb26ec5b18d ("broadcom: move all PHY_ID's to header") updated broadcom_tbl to use PHY_IDs, but incorrectly replaced 0x0143bca0 with PHY_ID_BCM5482 (making a duplicate entry, and completely omitting the original). Fix that. Fixes: fcb26ec5b18d ("broadcom: move all PHY_ID's to header") Signed-off-by: Aaro Koskinen Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- drivers/net/phy/broadcom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 9c71295f2fefb..85e640440bd9b 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -675,7 +675,7 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = { { PHY_ID_BCM5461, 0xfffffff0 }, { PHY_ID_BCM54616S, 0xfffffff0 }, { PHY_ID_BCM5464, 0xfffffff0 }, - { PHY_ID_BCM5482, 0xfffffff0 }, + { PHY_ID_BCM5481, 0xfffffff0 }, { PHY_ID_BCM5482, 0xfffffff0 }, { PHY_ID_BCM50610, 0xfffffff0 }, { PHY_ID_BCM50610M, 0xfffffff0 }, From 126bb499631ca8c61db5628bdaaa7294ae73d04f Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Tue, 24 Nov 2015 13:57:57 +0800 Subject: [PATCH 165/201] tipc: fix error handling of expanding buffer headroom [ Upstream commit 7098356baca723513e97ca0020df4e18bc353be3 ] Coverity says: *** CID 1338065: Error handling issues (CHECKED_RETURN) /net/tipc/udp_media.c: 162 in tipc_udp_send_msg() 156 struct udp_media_addr *dst = (struct udp_media_addr *)&dest->value; 157 struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; 158 struct sk_buff *clone; 159 struct rtable *rt; 160 161 if (skb_headroom(skb) < UDP_MIN_HEADROOM) >>> CID 1338065: Error handling issues (CHECKED_RETURN) >>> Calling "pskb_expand_head" without checking return value (as is done elsewhere 51 out of 56 +times). 162 pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC); 163 164 clone = skb_clone(skb, GFP_ATOMIC); 165 skb_set_inner_protocol(clone, htons(ETH_P_TIPC)); 166 ub = rcu_dereference_rtnl(b->media_ptr); 167 if (!ub) { When expanding buffer headroom over udp tunnel with pskb_expand_head(), it's unfortunate that we don't check its return value. As a result, if the function returns an error code due to the lack of memory, it may cause unpredictable consequence as we unconditionally consider that it's always successful. Fixes: e53567948f82 ("tipc: conditionally expand buffer headroom over udp tunnel") Reported-by: Cc: Stephen Hemminger Signed-off-by: Ying Xue Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/tipc/udp_media.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index cd7c5f131e722..86f2e7c44694a 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -159,8 +159,11 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, struct sk_buff *clone; struct rtable *rt; - if (skb_headroom(skb) < UDP_MIN_HEADROOM) - pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC); + if (skb_headroom(skb) < UDP_MIN_HEADROOM) { + err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC); + if (err) + goto tx_error; + } clone = skb_clone(skb, GFP_ATOMIC); skb_set_inner_protocol(clone, htons(ETH_P_TIPC)); From edd6db9dee45cdfd7b76f23ea513f5bab816c0bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michal=20Kube=C4=8Dek?= Date: Tue, 24 Nov 2015 15:07:11 +0100 Subject: [PATCH 166/201] ipv6: distinguish frag queues by device for multicast and link-local packets [ Upstream commit 264640fc2c5f4f913db5c73fa3eb1ead2c45e9d7 ] If a fragmented multicast packet is received on an ethernet device which has an active macvlan on top of it, each fragment is duplicated and received both on the underlying device and the macvlan. If some fragments for macvlan are processed before the whole packet for the underlying device is reassembled, the "overlapping fragments" test in ip6_frag_queue() discards the whole fragment queue. To resolve this, add device ifindex to the search key and require it to match reassembling multicast packets and packets to link-local addresses. Note: similar patch has been already submitted by Yoshifuji Hideaki in http://patchwork.ozlabs.org/patch/220979/ but got lost and forgotten for some reason. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/ipv6.h | 1 + net/ipv6/netfilter/nf_conntrack_reasm.c | 5 +++-- net/ipv6/reassembly.c | 10 +++++++--- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 711cca428cc8c..1b6371745e451 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -490,6 +490,7 @@ struct ip6_create_arg { u32 user; const struct in6_addr *src; const struct in6_addr *dst; + int iif; u8 ecn; }; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index c7196ad1d69f8..dc50143f50f2a 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -190,7 +190,7 @@ static void nf_ct_frag6_expire(unsigned long data) /* Creation primitives. */ static inline struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, struct in6_addr *src, - struct in6_addr *dst, u8 ecn) + struct in6_addr *dst, int iif, u8 ecn) { struct inet_frag_queue *q; struct ip6_create_arg arg; @@ -200,6 +200,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id, arg.user = user; arg.src = src; arg.dst = dst; + arg.iif = iif; arg.ecn = ecn; local_bh_disable(); @@ -603,7 +604,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) fhdr = (struct frag_hdr *)skb_transport_header(clone); fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr, - ip6_frag_ecn(hdr)); + skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr)); if (fq == NULL) { pr_debug("Can't find and can't create new queue\n"); goto ret_orig; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index f1159bb76e0a5..04013a910ce5f 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -108,7 +108,10 @@ bool ip6_frag_match(const struct inet_frag_queue *q, const void *a) return fq->id == arg->id && fq->user == arg->user && ipv6_addr_equal(&fq->saddr, arg->src) && - ipv6_addr_equal(&fq->daddr, arg->dst); + ipv6_addr_equal(&fq->daddr, arg->dst) && + (arg->iif == fq->iif || + !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST | + IPV6_ADDR_LINKLOCAL))); } EXPORT_SYMBOL(ip6_frag_match); @@ -180,7 +183,7 @@ static void ip6_frag_expire(unsigned long data) static struct frag_queue * fq_find(struct net *net, __be32 id, const struct in6_addr *src, - const struct in6_addr *dst, u8 ecn) + const struct in6_addr *dst, int iif, u8 ecn) { struct inet_frag_queue *q; struct ip6_create_arg arg; @@ -190,6 +193,7 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src, arg.user = IP6_DEFRAG_LOCAL_DELIVER; arg.src = src; arg.dst = dst; + arg.iif = iif; arg.ecn = ecn; hash = inet6_hash_frag(id, src, dst); @@ -551,7 +555,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) } fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, - ip6_frag_ecn(hdr)); + skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr)); if (fq) { int ret; From 0eaa7b64f7c307249fc28f2a57ff20aa905910bb Mon Sep 17 00:00:00 2001 From: Quentin Casasnovas Date: Tue, 24 Nov 2015 17:13:21 -0500 Subject: [PATCH 167/201] RDS: fix race condition when sending a message on unbound socket [ Upstream commit 8c7188b23474cca017b3ef354c4a58456f68303a ] Sasha's found a NULL pointer dereference in the RDS connection code when sending a message to an apparently unbound socket. The problem is caused by the code checking if the socket is bound in rds_sendmsg(), which checks the rs_bound_addr field without taking a lock on the socket. This opens a race where rs_bound_addr is temporarily set but where the transport is not in rds_bind(), leading to a NULL pointer dereference when trying to dereference 'trans' in __rds_conn_create(). Vegard wrote a reproducer for this issue, so kindly ask him to share if you're interested. I cannot reproduce the NULL pointer dereference using Vegard's reproducer with this patch, whereas I could without. Complete earlier incomplete fix to CVE-2015-6937: 74e98eb08588 ("RDS: verify the underlying transport exists before creating a connection") Cc: David S. Miller Cc: stable@vger.kernel.org Reviewed-by: Vegard Nossum Reviewed-by: Sasha Levin Acked-by: Santosh Shilimkar Signed-off-by: Quentin Casasnovas Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/rds/connection.c | 6 ------ net/rds/send.c | 4 +++- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/net/rds/connection.c b/net/rds/connection.c index 49adeef8090ca..9b2de5e67d795 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -190,12 +190,6 @@ static struct rds_connection *__rds_conn_create(struct net *net, } } - if (trans == NULL) { - kmem_cache_free(rds_conn_slab, conn); - conn = ERR_PTR(-ENODEV); - goto out; - } - conn->c_trans = trans; ret = trans->conn_alloc(conn, gfp); diff --git a/net/rds/send.c b/net/rds/send.c index 4df61a515b83d..859de6f32521d 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -1009,11 +1009,13 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) release_sock(sk); } - /* racing with another thread binding seems ok here */ + lock_sock(sk); if (daddr == 0 || rs->rs_bound_addr == 0) { + release_sock(sk); ret = -ENOTCONN; /* XXX not a great errno */ goto out; } + release_sock(sk); if (payload_len > rds_sk_sndbuf(rs)) { ret = -EMSGSIZE; From 90d19ad685d03197418f5c8e970772192a032a87 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 30 Nov 2015 13:02:56 +0100 Subject: [PATCH 168/201] bpf, array: fix heap out-of-bounds access when updating elements [ Upstream commit fbca9d2d35c6ef1b323fae75cc9545005ba25097 ] During own review but also reported by Dmitry's syzkaller [1] it has been noticed that we trigger a heap out-of-bounds access on eBPF array maps when updating elements. This happens with each map whose map->value_size (specified during map creation time) is not multiple of 8 bytes. In array_map_alloc(), elem_size is round_up(attr->value_size, 8) and used to align array map slots for faster access. However, in function array_map_update_elem(), we update the element as ... memcpy(array->value + array->elem_size * index, value, array->elem_size); ... where we access 'value' out-of-bounds, since it was allocated from map_update_elem() from syscall side as kmalloc(map->value_size, GFP_USER) and later on copied through copy_from_user(value, uvalue, map->value_size). Thus, up to 7 bytes, we can access out-of-bounds. Same could happen from within an eBPF program, where in worst case we access beyond an eBPF program's designated stack. Since 1be7f75d1668 ("bpf: enable non-root eBPF programs") didn't hit an official release yet, it only affects priviledged users. In case of array_map_lookup_elem(), the verifier prevents eBPF programs from accessing beyond map->value_size through check_map_access(). Also from syscall side map_lookup_elem() only copies map->value_size back to user, so nothing could leak. [1] http://github.com/google/syzkaller Fixes: 28fbcfa08d8e ("bpf: add array type of eBPF maps") Reported-by: Dmitry Vyukov Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/arraymap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 29ace107f2365..7a0decf471100 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -104,7 +104,7 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value, /* all elements already exist */ return -EEXIST; - memcpy(array->value + array->elem_size * index, value, array->elem_size); + memcpy(array->value + array->elem_size * index, value, map->value_size); return 0; } From 645e3f33c73ad1153db0680b6833cf70d0d4dce3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Nov 2015 19:37:57 -0800 Subject: [PATCH 169/201] ipv6: add complete rcu protection around np->opt [ Upstream commit 45f6fad84cc305103b28d73482b344d7f5b76f39 ] This patch addresses multiple problems : UDP/RAW sendmsg() need to get a stable struct ipv6_txoptions while socket is not locked : Other threads can change np->opt concurrently. Dmitry posted a syzkaller (http://github.com/google/syzkaller) program desmonstrating use-after-free. Starting with TCP/DCCP lockless listeners, tcp_v6_syn_recv_sock() and dccp_v6_request_recv_sock() also need to use RCU protection to dereference np->opt once (before calling ipv6_dup_options()) This patch adds full RCU protection to np->opt Reported-by: Dmitry Vyukov Signed-off-by: Eric Dumazet Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/linux/ipv6.h | 2 +- include/net/ipv6.h | 21 +++++++++++++++++++- net/dccp/ipv6.c | 33 ++++++++++++++++++++------------ net/ipv6/af_inet6.c | 13 +++++++++---- net/ipv6/datagram.c | 4 +++- net/ipv6/exthdrs.c | 3 ++- net/ipv6/inet6_connection_sock.c | 11 ++++++++--- net/ipv6/ipv6_sockglue.c | 33 +++++++++++++++++++++----------- net/ipv6/raw.c | 8 ++++++-- net/ipv6/syncookies.c | 2 +- net/ipv6/tcp_ipv6.c | 28 ++++++++++++++++----------- net/ipv6/udp.c | 8 ++++++-- net/l2tp/l2tp_ip6.c | 8 ++++++-- 13 files changed, 122 insertions(+), 52 deletions(-) diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index f1f32af6d9b96..3e4ff3f1d3140 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -227,7 +227,7 @@ struct ipv6_pinfo { struct ipv6_ac_socklist *ipv6_ac_list; struct ipv6_fl_socklist __rcu *ipv6_fl_list; - struct ipv6_txoptions *opt; + struct ipv6_txoptions __rcu *opt; struct sk_buff *pktoptions; struct sk_buff *rxpmtu; struct inet6_cork cork; diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 1b6371745e451..b14e1581c4774 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -205,6 +205,7 @@ extern rwlock_t ip6_ra_lock; */ struct ipv6_txoptions { + atomic_t refcnt; /* Length of this structure */ int tot_len; @@ -217,7 +218,7 @@ struct ipv6_txoptions { struct ipv6_opt_hdr *dst0opt; struct ipv6_rt_hdr *srcrt; /* Routing Header */ struct ipv6_opt_hdr *dst1opt; - + struct rcu_head rcu; /* Option buffer, as read by IPV6_PKTOPTIONS, starts here. */ }; @@ -252,6 +253,24 @@ struct ipv6_fl_socklist { struct rcu_head rcu; }; +static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np) +{ + struct ipv6_txoptions *opt; + + rcu_read_lock(); + opt = rcu_dereference(np->opt); + if (opt && !atomic_inc_not_zero(&opt->refcnt)) + opt = NULL; + rcu_read_unlock(); + return opt; +} + +static inline void txopt_put(struct ipv6_txoptions *opt) +{ + if (opt && atomic_dec_and_test(&opt->refcnt)) + kfree_rcu(opt, rcu); +} + struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label); struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, struct ip6_flowlabel *fl, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 5165571f397aa..a0490508d2135 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -202,7 +202,9 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) security_req_classify_flow(req, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); + rcu_read_unlock(); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { @@ -219,7 +221,10 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) &ireq->ir_v6_loc_addr, &ireq->ir_v6_rmt_addr); fl6.daddr = ireq->ir_v6_rmt_addr; - err = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); + rcu_read_lock(); + err = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt), + np->tclass); + rcu_read_unlock(); err = net_xmit_eval(err); } @@ -415,6 +420,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *newnp, *np = inet6_sk(sk); + struct ipv6_txoptions *opt; struct inet_sock *newinet; struct dccp6_sock *newdp6; struct sock *newsk; @@ -534,13 +540,15 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, * Yes, keeping reference count would be much more clever, but we make * one more one thing there: reattach optmem to newsk. */ - if (np->opt != NULL) - newnp->opt = ipv6_dup_options(newsk, np->opt); - + opt = rcu_dereference(np->opt); + if (opt) { + opt = ipv6_dup_options(newsk, opt); + RCU_INIT_POINTER(newnp->opt, opt); + } inet_csk(newsk)->icsk_ext_hdr_len = 0; - if (newnp->opt != NULL) - inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + - newnp->opt->opt_flen); + if (opt) + inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + + opt->opt_flen; dccp_sync_mss(newsk, dst_mtu(dst)); @@ -793,6 +801,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct ipv6_pinfo *np = inet6_sk(sk); struct dccp_sock *dp = dccp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; + struct ipv6_txoptions *opt; struct flowi6 fl6; struct dst_entry *dst; int addr_type; @@ -892,7 +901,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.fl6_sport = inet->inet_sport; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl6, np->opt, &final); + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + final_p = fl6_update_dst(&fl6, opt, &final); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { @@ -912,9 +922,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, __ip6_dst_store(sk, dst, NULL, NULL); icsk->icsk_ext_hdr_len = 0; - if (np->opt != NULL) - icsk->icsk_ext_hdr_len = (np->opt->opt_flen + - np->opt->opt_nflen); + if (opt) + icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen; inet->inet_dport = usin->sin6_port; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 44bb66bde0e2d..38d66ddfb9375 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -428,9 +428,11 @@ void inet6_destroy_sock(struct sock *sk) /* Free tx options */ - opt = xchg(&np->opt, NULL); - if (opt) - sock_kfree_s(sk, opt, opt->tot_len); + opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } } EXPORT_SYMBOL_GPL(inet6_destroy_sock); @@ -659,7 +661,10 @@ int inet6_sk_rebuild_header(struct sock *sk) fl6.fl6_sport = inet->inet_sport; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - final_p = fl6_update_dst(&fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), + &final); + rcu_read_unlock(); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 9aadd57808a51..a42a673aa5473 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -167,8 +167,10 @@ static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int a security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - opt = flowlabel ? flowlabel->opt : np->opt; + rcu_read_lock(); + opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt); final_p = fl6_update_dst(&fl6, opt, &final); + rcu_read_unlock(); dst = ip6_dst_lookup_flow(sk, &fl6, final_p); err = 0; diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index ce203b0402bea..ea7c4d64a00ad 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -727,6 +727,7 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt) *((char **)&opt2->dst1opt) += dif; if (opt2->srcrt) *((char **)&opt2->srcrt) += dif; + atomic_set(&opt2->refcnt, 1); } return opt2; } @@ -790,7 +791,7 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, return ERR_PTR(-ENOBUFS); memset(opt2, 0, tot_len); - + atomic_set(&opt2->refcnt, 1); opt2->tot_len = tot_len; p = (char *)(opt2 + 1); diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 6927f3fb5597f..9beed302eb36f 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -77,7 +77,9 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, memset(fl6, 0, sizeof(*fl6)); fl6->flowi6_proto = IPPROTO_TCP; fl6->daddr = ireq->ir_v6_rmt_addr; - final_p = fl6_update_dst(fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); + rcu_read_unlock(); fl6->saddr = ireq->ir_v6_loc_addr; fl6->flowi6_oif = ireq->ir_iif; fl6->flowi6_mark = ireq->ir_mark; @@ -207,7 +209,9 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, fl6->fl6_dport = inet->inet_dport; security_sk_classify_flow(sk, flowi6_to_flowi(fl6)); - final_p = fl6_update_dst(fl6, np->opt, &final); + rcu_read_lock(); + final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); + rcu_read_unlock(); dst = __inet6_csk_dst_check(sk, np->dst_cookie); if (!dst) { @@ -240,7 +244,8 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused /* Restore final destination back after routing done */ fl6.daddr = sk->sk_v6_daddr; - res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); + res = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt), + np->tclass); rcu_read_unlock(); return res; } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 63e6956917c9c..4449ad1f81147 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -111,7 +111,8 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie); } } - opt = xchg(&inet6_sk(sk)->opt, opt); + opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt, + opt); sk_dst_reset(sk); return opt; @@ -231,9 +232,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sk->sk_socket->ops = &inet_dgram_ops; sk->sk_family = PF_INET; } - opt = xchg(&np->opt, NULL); - if (opt) - sock_kfree_s(sk, opt, opt->tot_len); + opt = xchg((__force struct ipv6_txoptions **)&np->opt, + NULL); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } pktopt = xchg(&np->pktoptions, NULL); kfree_skb(pktopt); @@ -403,7 +407,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW)) break; - opt = ipv6_renew_options(sk, np->opt, optname, + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + opt = ipv6_renew_options(sk, opt, optname, (struct ipv6_opt_hdr __user *)optval, optlen); if (IS_ERR(opt)) { @@ -432,8 +437,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, retv = 0; opt = ipv6_update_options(sk, opt); sticky_done: - if (opt) - sock_kfree_s(sk, opt, opt->tot_len); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } break; } @@ -486,6 +493,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, break; memset(opt, 0, sizeof(*opt)); + atomic_set(&opt->refcnt, 1); opt->tot_len = sizeof(*opt) + optlen; retv = -EFAULT; if (copy_from_user(opt+1, optval, optlen)) @@ -502,8 +510,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, retv = 0; opt = ipv6_update_options(sk, opt); done: - if (opt) - sock_kfree_s(sk, opt, opt->tot_len); + if (opt) { + atomic_sub(opt->tot_len, &sk->sk_omem_alloc); + txopt_put(opt); + } break; } case IPV6_UNICAST_HOPS: @@ -1110,10 +1120,11 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, case IPV6_RTHDR: case IPV6_DSTOPTS: { + struct ipv6_txoptions *opt; lock_sock(sk); - len = ipv6_getsockopt_sticky(sk, np->opt, - optname, optval, len); + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len); release_sock(sk); /* check if ipv6_getsockopt_sticky() returns err code */ if (len < 0) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index fdbada1569a37..fe977299551ee 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -732,6 +732,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd, static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { + struct ipv6_txoptions *opt_to_free = NULL; struct ipv6_txoptions opt_space; DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); struct in6_addr *daddr, *final_p, final; @@ -838,8 +839,10 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; } - if (!opt) - opt = np->opt; + if (!opt) { + opt = txopt_get(np); + opt_to_free = opt; + } if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); @@ -905,6 +908,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) dst_release(dst); out: fl6_sock_release(flowlabel); + txopt_put(opt_to_free); return err < 0 ? err : len; do_confirm: dst_confirm(dst); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 0909f4e0d53c2..f30bfdcdea541 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -225,7 +225,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = IPPROTO_TCP; fl6.daddr = ireq->ir_v6_rmt_addr; - final_p = fl6_update_dst(&fl6, np->opt, &final); + final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); fl6.saddr = ireq->ir_v6_loc_addr; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = ireq->ir_mark; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 97d9314ea3611..9e9b77bd2d0a7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -120,6 +120,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; + struct ipv6_txoptions *opt; struct flowi6 fl6; struct dst_entry *dst; int addr_type; @@ -235,7 +236,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; - final_p = fl6_update_dst(&fl6, np->opt, &final); + opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk)); + final_p = fl6_update_dst(&fl6, opt, &final); security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); @@ -263,9 +265,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, tcp_fetch_timewait_stamp(sk, dst); icsk->icsk_ext_hdr_len = 0; - if (np->opt) - icsk->icsk_ext_hdr_len = (np->opt->opt_flen + - np->opt->opt_nflen); + if (opt) + icsk->icsk_ext_hdr_len = opt->opt_flen + + opt->opt_nflen; tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); @@ -461,7 +463,8 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); skb_set_queue_mapping(skb, queue_mapping); - err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass); + err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt), + np->tclass); err = net_xmit_eval(err); } @@ -991,6 +994,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, struct inet_request_sock *ireq; struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct tcp6_sock *newtcp6sk; + struct ipv6_txoptions *opt; struct inet_sock *newinet; struct tcp_sock *newtp; struct sock *newsk; @@ -1126,13 +1130,15 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, but we make one more one thing there: reattach optmem to newsk. */ - if (np->opt) - newnp->opt = ipv6_dup_options(newsk, np->opt); - + opt = rcu_dereference(np->opt); + if (opt) { + opt = ipv6_dup_options(newsk, opt); + RCU_INIT_POINTER(newnp->opt, opt); + } inet_csk(newsk)->icsk_ext_hdr_len = 0; - if (newnp->opt) - inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + - newnp->opt->opt_flen); + if (opt) + inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + + opt->opt_flen; tcp_ca_openreq_child(newsk, dst); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 0aba654f5b91c..8379fc2f4b1d6 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1107,6 +1107,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); struct in6_addr *daddr, *final_p, final; struct ipv6_txoptions *opt = NULL; + struct ipv6_txoptions *opt_to_free = NULL; struct ip6_flowlabel *flowlabel = NULL; struct flowi6 fl6; struct dst_entry *dst; @@ -1260,8 +1261,10 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) opt = NULL; connected = 0; } - if (!opt) - opt = np->opt; + if (!opt) { + opt = txopt_get(np); + opt_to_free = opt; + } if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); @@ -1370,6 +1373,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) out: dst_release(dst); fl6_sock_release(flowlabel); + txopt_put(opt_to_free); if (!err) return len; /* diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index d1ded3777815e..0ce9da948ad7f 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -486,6 +486,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name); struct in6_addr *daddr, *final_p, final; struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_txoptions *opt_to_free = NULL; struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; struct dst_entry *dst = NULL; @@ -575,8 +576,10 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) opt = NULL; } - if (opt == NULL) - opt = np->opt; + if (!opt) { + opt = txopt_get(np); + opt_to_free = opt; + } if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); opt = ipv6_fixup_options(&opt_space, opt); @@ -631,6 +634,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) dst_release(dst); out: fl6_sock_release(flowlabel); + txopt_put(opt_to_free); return err < 0 ? err : len; From a9c680b65484ecb6d1d257f761b2f04e1d9e2d6e Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 1 Dec 2015 01:14:48 +0300 Subject: [PATCH 170/201] net/neighbour: fix crash at dumping device-agnostic proxy entries [ Upstream commit 6adc5fd6a142c6e2c80574c1db0c7c17dedaa42e ] Proxy entries could have null pointer to net-device. Signed-off-by: Konstantin Khlebnikov Fixes: 84920c1420e2 ("net: Allow ipv6 proxies and arp proxies be shown with iproute2") Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/core/neighbour.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 2b515ba7e94f4..c169bba44e059 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2215,7 +2215,7 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, ndm->ndm_pad2 = 0; ndm->ndm_flags = pn->flags | NTF_PROXY; ndm->ndm_type = RTN_UNICAST; - ndm->ndm_ifindex = pn->dev->ifindex; + ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0; ndm->ndm_state = NUD_NONE; if (nla_put(skb, NDA_DST, tbl->key_len, pn->key)) @@ -2290,7 +2290,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, if (h > s_h) s_idx = 0; for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) { - if (dev_net(n->dev) != net) + if (pneigh_net(n) != net) continue; if (idx < s_idx) goto next; From 7a439c4a677039b91804b6779de18ddab6d6ebf9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 1 Dec 2015 07:20:07 -0800 Subject: [PATCH 171/201] ipv6: sctp: implement sctp_v6_destroy_sock() [ Upstream commit 602dd62dfbda3e63a2d6a3cbde953ebe82bf5087 ] Dmitry Vyukov reported a memory leak using IPV6 SCTP sockets. We need to call inet6_destroy_sock() to properly release inet6 specific fields. Reported-by: Dmitry Vyukov Signed-off-by: Eric Dumazet Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/sctp/socket.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 17bef01b9aa3e..3ec88be0faec8 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -7375,6 +7375,13 @@ struct proto sctp_prot = { #if IS_ENABLED(CONFIG_IPV6) +#include +static void sctp_v6_destroy_sock(struct sock *sk) +{ + sctp_destroy_sock(sk); + inet6_destroy_sock(sk); +} + struct proto sctpv6_prot = { .name = "SCTPv6", .owner = THIS_MODULE, @@ -7384,7 +7391,7 @@ struct proto sctpv6_prot = { .accept = sctp_accept, .ioctl = sctp_ioctl, .init = sctp_init_sock, - .destroy = sctp_destroy_sock, + .destroy = sctp_v6_destroy_sock, .shutdown = sctp_shutdown, .setsockopt = sctp_setsockopt, .getsockopt = sctp_getsockopt, From bae56d7e3f815b339421306fc3b2e00107b56987 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 1 Dec 2015 18:33:36 +0100 Subject: [PATCH 172/201] openvswitch: fix hangup on vxlan/gre/geneve device deletion [ Upstream commit 13175303024c8f4cd09e51079a8fcbbe572111ec ] Each openvswitch tunnel vport (vxlan,gre,geneve) holds a reference to the underlying tunnel device, but never released it when such device is deleted. Deleting the underlying device via the ip tool cause the kernel to hangup in the netdev_wait_allrefs() loop. This commit ensure that on device unregistration dp_detach_port_notify() is called for all vports that hold the device reference, properly releasing it. Fixes: 614732eaa12d ("openvswitch: Use regular VXLAN net_device device") Fixes: b2acd1dc3949 ("openvswitch: Use regular GRE net_device instead of vport") Fixes: 6b001e682e90 ("openvswitch: Use Geneve device.") Signed-off-by: Paolo Abeni Acked-by: Flavio Leitner Acked-by: Pravin B Shelar Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/openvswitch/dp_notify.c | 2 +- net/openvswitch/vport-netdev.c | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c index a7a80a6b77b0a..653d073bae453 100644 --- a/net/openvswitch/dp_notify.c +++ b/net/openvswitch/dp_notify.c @@ -58,7 +58,7 @@ void ovs_dp_notify_wq(struct work_struct *work) struct hlist_node *n; hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node) { - if (vport->ops->type != OVS_VPORT_TYPE_NETDEV) + if (vport->ops->type == OVS_VPORT_TYPE_INTERNAL) continue; if (!(vport->dev->priv_flags & IFF_OVS_DATAPATH)) diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index f7e8dcce7adae..ac14c488669c5 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -180,9 +180,13 @@ void ovs_netdev_tunnel_destroy(struct vport *vport) if (vport->dev->priv_flags & IFF_OVS_DATAPATH) ovs_netdev_detach_dev(vport); - /* Early release so we can unregister the device */ + /* We can be invoked by both explicit vport deletion and + * underlying netdev deregistration; delete the link only + * if it's not already shutting down. + */ + if (vport->dev->reg_state == NETREG_REGISTERED) + rtnl_delete_link(vport->dev); dev_put(vport->dev); - rtnl_delete_link(vport->dev); vport->dev = NULL; rtnl_unlock(); From 36204ef3c3a43e1dc015d2401068e4bfb45e1e09 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 1 Dec 2015 20:08:51 -0800 Subject: [PATCH 173/201] net_sched: fix qdisc_tree_decrease_qlen() races [ Upstream commit 4eaf3b84f2881c9c028f1d5e76c52ab575fe3a66 ] qdisc_tree_decrease_qlen() suffers from two problems on multiqueue devices. One problem is that it updates sch->q.qlen and sch->qstats.drops on the mq/mqprio root qdisc, while it should not : Daniele reported underflows errors : [ 681.774821] PAX: sch->q.qlen: 0 n: 1 [ 681.774825] PAX: size overflow detected in function qdisc_tree_decrease_qlen net/sched/sch_api.c:769 cicus.693_49 min, count: 72, decl: qlen; num: 0; context: sk_buff_head; [ 681.774954] CPU: 2 PID: 19 Comm: ksoftirqd/2 Tainted: G O 4.2.6.201511282239-1-grsec #1 [ 681.774955] Hardware name: ASUSTeK COMPUTER INC. X302LJ/X302LJ, BIOS X302LJ.202 03/05/2015 [ 681.774956] ffffffffa9a04863 0000000000000000 0000000000000000 ffffffffa990ff7c [ 681.774959] ffffc90000d3bc38 ffffffffa95d2810 0000000000000007 ffffffffa991002b [ 681.774960] ffffc90000d3bc68 ffffffffa91a44f4 0000000000000001 0000000000000001 [ 681.774962] Call Trace: [ 681.774967] [] dump_stack+0x4c/0x7f [ 681.774970] [] report_size_overflow+0x34/0x50 [ 681.774972] [] qdisc_tree_decrease_qlen+0x152/0x160 [ 681.774976] [] fq_codel_dequeue+0x7b1/0x820 [sch_fq_codel] [ 681.774978] [] ? qdisc_peek_dequeued+0xa0/0xa0 [sch_fq_codel] [ 681.774980] [] __qdisc_run+0x4d/0x1d0 [ 681.774983] [] net_tx_action+0xc2/0x160 [ 681.774985] [] __do_softirq+0xf1/0x200 [ 681.774987] [] run_ksoftirqd+0x1e/0x30 [ 681.774989] [] smpboot_thread_fn+0x150/0x260 [ 681.774991] [] ? sort_range+0x40/0x40 [ 681.774992] [] kthread+0xe4/0x100 [ 681.774994] [] ? kthread_worker_fn+0x170/0x170 [ 681.774995] [] ret_from_fork+0x3e/0x70 mq/mqprio have their own ways to report qlen/drops by folding stats on all their queues, with appropriate locking. A second problem is that qdisc_tree_decrease_qlen() calls qdisc_lookup() without proper locking : concurrent qdisc updates could corrupt the list that qdisc_match_from_root() parses to find a qdisc given its handle. Fix first problem adding a TCQ_F_NOPARENT qdisc flag that qdisc_tree_decrease_qlen() can use to abort its tree traversal, as soon as it meets a mq/mqprio qdisc children. Second problem can be fixed by RCU protection. Qdisc are already freed after RCU grace period, so qdisc_list_add() and qdisc_list_del() simply have to use appropriate rcu list variants. A future patch will add a per struct netdev_queue list anchor, so that qdisc_tree_decrease_qlen() can have more efficient lookups. Reported-by: Daniele Fucini Signed-off-by: Eric Dumazet Cc: Cong Wang Cc: Jamal Hadi Salim Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- include/net/sch_generic.h | 3 +++ net/sched/sch_api.c | 27 ++++++++++++++++++--------- net/sched/sch_generic.c | 2 +- net/sched/sch_mq.c | 4 ++-- net/sched/sch_mqprio.c | 4 ++-- 5 files changed, 26 insertions(+), 14 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 444faa89a55fd..f1ad8f8fd4f19 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -61,6 +61,9 @@ struct Qdisc { */ #define TCQ_F_WARN_NONWC (1 << 16) #define TCQ_F_CPUSTATS 0x20 /* run using percpu statistics */ +#define TCQ_F_NOPARENT 0x40 /* root of its hierarchy : + * qdisc_tree_decrease_qlen() should stop. + */ u32 limit; const struct Qdisc_ops *ops; struct qdisc_size_table __rcu *stab; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index f43c8f33f09ef..7ec667dd4ce1d 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -253,7 +253,8 @@ int qdisc_set_default(const char *name) } /* We know handle. Find qdisc among all qdisc's attached to device - (root qdisc, all its children, children of children etc.) + * (root qdisc, all its children, children of children etc.) + * Note: caller either uses rtnl or rcu_read_lock() */ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) @@ -264,7 +265,7 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) root->handle == handle) return root; - list_for_each_entry(q, &root->list, list) { + list_for_each_entry_rcu(q, &root->list, list) { if (q->handle == handle) return q; } @@ -277,15 +278,18 @@ void qdisc_list_add(struct Qdisc *q) struct Qdisc *root = qdisc_dev(q)->qdisc; WARN_ON_ONCE(root == &noop_qdisc); - list_add_tail(&q->list, &root->list); + ASSERT_RTNL(); + list_add_tail_rcu(&q->list, &root->list); } } EXPORT_SYMBOL(qdisc_list_add); void qdisc_list_del(struct Qdisc *q) { - if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) - list_del(&q->list); + if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { + ASSERT_RTNL(); + list_del_rcu(&q->list); + } } EXPORT_SYMBOL(qdisc_list_del); @@ -750,14 +754,18 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) if (n == 0) return; drops = max_t(int, n, 0); + rcu_read_lock(); while ((parentid = sch->parent)) { if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS)) - return; + break; + if (sch->flags & TCQ_F_NOPARENT) + break; + /* TODO: perform the search on a per txq basis */ sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { - WARN_ON(parentid != TC_H_ROOT); - return; + WARN_ON_ONCE(parentid != TC_H_ROOT); + break; } cops = sch->ops->cl_ops; if (cops->qlen_notify) { @@ -768,6 +776,7 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) sch->q.qlen -= n; __qdisc_qstats_drop(sch, drops); } + rcu_read_unlock(); } EXPORT_SYMBOL(qdisc_tree_decrease_qlen); @@ -941,7 +950,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, } lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock); if (!netif_is_multiqueue(dev)) - sch->flags |= TCQ_F_ONETXQUEUE; + sch->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } sch->handle = handle; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index cb5d4ad32946c..e82a1ad80aa52 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -737,7 +737,7 @@ static void attach_one_default_qdisc(struct net_device *dev, return; } if (!netif_is_multiqueue(dev)) - qdisc->flags |= TCQ_F_ONETXQUEUE; + qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; dev_queue->qdisc_sleeping = qdisc; } diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index f3cbaecd283af..3e82f047caaf4 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -63,7 +63,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt) if (qdisc == NULL) goto err; priv->qdiscs[ntx] = qdisc; - qdisc->flags |= TCQ_F_ONETXQUEUE; + qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } sch->flags |= TCQ_F_MQROOT; @@ -156,7 +156,7 @@ static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, *old = dev_graft_qdisc(dev_queue, new); if (new) - new->flags |= TCQ_F_ONETXQUEUE; + new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; if (dev->flags & IFF_UP) dev_activate(dev); return 0; diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 3811a745452cf..ad70ecf57ce79 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -132,7 +132,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) goto err; } priv->qdiscs[i] = qdisc; - qdisc->flags |= TCQ_F_ONETXQUEUE; + qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; } /* If the mqprio options indicate that hardware should own @@ -209,7 +209,7 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, *old = dev_graft_qdisc(dev_queue, new); if (new) - new->flags |= TCQ_F_ONETXQUEUE; + new->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; if (dev->flags & IFF_UP) dev_activate(dev); From b855aa43eb1c47d6e10fd90df2f0fb8ab6423c12 Mon Sep 17 00:00:00 2001 From: Robin Ruede Date: Wed, 30 Sep 2015 21:23:33 +0200 Subject: [PATCH 174/201] btrfs: fix resending received snapshot with parent commit b96b1db039ebc584d03a9933b279e0d3e704c528 upstream. This fixes a regression introduced by 37b8d27d between v4.1 and v4.2. When a snapshot is received, its received_uuid is set to the original uuid of the subvolume. When that snapshot is then resent to a third filesystem, it's received_uuid is set to the second uuid instead of the original one. The same was true for the parent_uuid. This behaviour was partially changed in 37b8d27d, but in that patch only the parent_uuid was taken from the real original, not the uuid itself, causing the search for the parent to fail in the case below. This happens for example when trying to send a series of linked snapshots (e.g. created by snapper) from the backup file system back to the original one. The following commands reproduce the issue in v4.2.1 (no error in 4.1.6) # setup three test file systems for i in 1 2 3; do truncate -s 50M fs$i mkfs.btrfs fs$i mkdir $i mount fs$i $i done echo "content" > 1/testfile btrfs su snapshot -r 1/ 1/snap1 echo "changed content" > 1/testfile btrfs su snapshot -r 1/ 1/snap2 # works fine: btrfs send 1/snap1 | btrfs receive 2/ btrfs send -p 1/snap1 1/snap2 | btrfs receive 2/ # ERROR: could not find parent subvolume btrfs send 2/snap1 | btrfs receive 3/ btrfs send -p 2/snap1 2/snap2 | btrfs receive 3/ Signed-off-by: Robin Ruede Fixes: 37b8d27de5d0 ("Btrfs: use received_uuid of parent during send") Reviewed-by: Filipe Manana Tested-by: Ed Tomlinson Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/send.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index a739b825bdd36..23bb2e4b911b3 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -2353,8 +2353,14 @@ static int send_subvol_begin(struct send_ctx *sctx) } TLV_PUT_STRING(sctx, BTRFS_SEND_A_PATH, name, namelen); - TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, - sctx->send_root->root_item.uuid); + + if (!btrfs_is_empty_uuid(sctx->send_root->root_item.received_uuid)) + TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, + sctx->send_root->root_item.received_uuid); + else + TLV_PUT_UUID(sctx, BTRFS_SEND_A_UUID, + sctx->send_root->root_item.uuid); + TLV_PUT_U64(sctx, BTRFS_SEND_A_CTRANSID, le64_to_cpu(sctx->send_root->root_item.ctransid)); if (parent_root) { From 8b15aa67a2a49cf332363186f1c8249bdf2059ca Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 13 Oct 2015 15:15:00 +0100 Subject: [PATCH 175/201] Btrfs: fix file corruption and data loss after cloning inline extents commit 8039d87d9e473aeb740d4fdbd59b9d2f89b2ced9 upstream. Currently the clone ioctl allows to clone an inline extent from one file to another that already has other (non-inlined) extents. This is a problem because btrfs is not designed to deal with files having inline and regular extents, if a file has an inline extent then it must be the only extent in the file and must start at file offset 0. Having a file with an inline extent followed by regular extents results in EIO errors when doing reads or writes against the first 4K of the file. Also, the clone ioctl allows one to lose data if the source file consists of a single inline extent, with a size of N bytes, and the destination file consists of a single inline extent with a size of M bytes, where we have M > N. In this case the clone operation removes the inline extent from the destination file and then copies the inline extent from the source file into the destination file - we lose the M - N bytes from the destination file, a read operation will get the value 0x00 for any bytes in the the range [N, M] (the destination inode's i_size remained as M, that's why we can read past N bytes). So fix this by not allowing such destructive operations to happen and return errno EOPNOTSUPP to user space. Currently the fstest btrfs/035 tests the data loss case but it totally ignores this - i.e. expects the operation to succeed and does not check the we got data loss. The following test case for fstests exercises all these cases that result in file corruption and data loss: seq=`basename $0` seqres=$RESULT_DIR/$seq echo "QA output created by $seq" tmp=/tmp/$$ status=1 # failure is the default! trap "_cleanup; exit \$status" 0 1 2 3 15 _cleanup() { rm -f $tmp.* } # get standard environment, filters and checks . ./common/rc . ./common/filter # real QA test starts here _need_to_be_root _supported_fs btrfs _supported_os Linux _require_scratch _require_cloner _require_btrfs_fs_feature "no_holes" _require_btrfs_mkfs_feature "no-holes" rm -f $seqres.full test_cloning_inline_extents() { local mkfs_opts=$1 local mount_opts=$2 _scratch_mkfs $mkfs_opts >>$seqres.full 2>&1 _scratch_mount $mount_opts # File bar, the source for all the following clone operations, consists # of a single inline extent (50 bytes). $XFS_IO_PROG -f -c "pwrite -S 0xbb 0 50" $SCRATCH_MNT/bar \ | _filter_xfs_io # Test cloning into a file with an extent (non-inlined) where the # destination offset overlaps that extent. It should not be possible to # clone the inline extent from file bar into this file. $XFS_IO_PROG -f -c "pwrite -S 0xaa 0K 16K" $SCRATCH_MNT/foo \ | _filter_xfs_io $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo # Doing IO against any range in the first 4K of the file should work. # Due to a past clone ioctl bug which allowed cloning the inline extent, # these operations resulted in EIO errors. echo "File foo data after clone operation:" # All bytes should have the value 0xaa (clone operation failed and did # not modify our file). od -t x1 $SCRATCH_MNT/foo $XFS_IO_PROG -c "pwrite -S 0xcc 0 100" $SCRATCH_MNT/foo | _filter_xfs_io # Test cloning the inline extent against a file which has a hole in its # first 4K followed by a non-inlined extent. It should not be possible # as well to clone the inline extent from file bar into this file. $XFS_IO_PROG -f -c "pwrite -S 0xdd 4K 12K" $SCRATCH_MNT/foo2 \ | _filter_xfs_io $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo2 # Doing IO against any range in the first 4K of the file should work. # Due to a past clone ioctl bug which allowed cloning the inline extent, # these operations resulted in EIO errors. echo "File foo2 data after clone operation:" # All bytes should have the value 0x00 (clone operation failed and did # not modify our file). od -t x1 $SCRATCH_MNT/foo2 $XFS_IO_PROG -c "pwrite -S 0xee 0 90" $SCRATCH_MNT/foo2 | _filter_xfs_io # Test cloning the inline extent against a file which has a size of zero # but has a prealloc extent. It should not be possible as well to clone # the inline extent from file bar into this file. $XFS_IO_PROG -f -c "falloc -k 0 1M" $SCRATCH_MNT/foo3 | _filter_xfs_io $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo3 # Doing IO against any range in the first 4K of the file should work. # Due to a past clone ioctl bug which allowed cloning the inline extent, # these operations resulted in EIO errors. echo "First 50 bytes of foo3 after clone operation:" # Should not be able to read any bytes, file has 0 bytes i_size (the # clone operation failed and did not modify our file). od -t x1 $SCRATCH_MNT/foo3 $XFS_IO_PROG -c "pwrite -S 0xff 0 90" $SCRATCH_MNT/foo3 | _filter_xfs_io # Test cloning the inline extent against a file which consists of a # single inline extent that has a size not greater than the size of # bar's inline extent (40 < 50). # It should be possible to do the extent cloning from bar to this file. $XFS_IO_PROG -f -c "pwrite -S 0x01 0 40" $SCRATCH_MNT/foo4 \ | _filter_xfs_io $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo4 # Doing IO against any range in the first 4K of the file should work. echo "File foo4 data after clone operation:" # Must match file bar's content. od -t x1 $SCRATCH_MNT/foo4 $XFS_IO_PROG -c "pwrite -S 0x02 0 90" $SCRATCH_MNT/foo4 | _filter_xfs_io # Test cloning the inline extent against a file which consists of a # single inline extent that has a size greater than the size of bar's # inline extent (60 > 50). # It should not be possible to clone the inline extent from file bar # into this file. $XFS_IO_PROG -f -c "pwrite -S 0x03 0 60" $SCRATCH_MNT/foo5 \ | _filter_xfs_io $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo5 # Reading the file should not fail. echo "File foo5 data after clone operation:" # Must have a size of 60 bytes, with all bytes having a value of 0x03 # (the clone operation failed and did not modify our file). od -t x1 $SCRATCH_MNT/foo5 # Test cloning the inline extent against a file which has no extents but # has a size greater than bar's inline extent (16K > 50). # It should not be possible to clone the inline extent from file bar # into this file. $XFS_IO_PROG -f -c "truncate 16K" $SCRATCH_MNT/foo6 | _filter_xfs_io $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo6 # Reading the file should not fail. echo "File foo6 data after clone operation:" # Must have a size of 16K, with all bytes having a value of 0x00 (the # clone operation failed and did not modify our file). od -t x1 $SCRATCH_MNT/foo6 # Test cloning the inline extent against a file which has no extents but # has a size not greater than bar's inline extent (30 < 50). # It should be possible to clone the inline extent from file bar into # this file. $XFS_IO_PROG -f -c "truncate 30" $SCRATCH_MNT/foo7 | _filter_xfs_io $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo7 # Reading the file should not fail. echo "File foo7 data after clone operation:" # Must have a size of 50 bytes, with all bytes having a value of 0xbb. od -t x1 $SCRATCH_MNT/foo7 # Test cloning the inline extent against a file which has a size not # greater than the size of bar's inline extent (20 < 50) but has # a prealloc extent that goes beyond the file's size. It should not be # possible to clone the inline extent from bar into this file. $XFS_IO_PROG -f -c "falloc -k 0 1M" \ -c "pwrite -S 0x88 0 20" \ $SCRATCH_MNT/foo8 | _filter_xfs_io $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/bar $SCRATCH_MNT/foo8 echo "File foo8 data after clone operation:" # Must have a size of 20 bytes, with all bytes having a value of 0x88 # (the clone operation did not modify our file). od -t x1 $SCRATCH_MNT/foo8 _scratch_unmount } echo -e "\nTesting without compression and without the no-holes feature...\n" test_cloning_inline_extents echo -e "\nTesting with compression and without the no-holes feature...\n" test_cloning_inline_extents "" "-o compress" echo -e "\nTesting without compression and with the no-holes feature...\n" test_cloning_inline_extents "-O no-holes" "" echo -e "\nTesting with compression and with the no-holes feature...\n" test_cloning_inline_extents "-O no-holes" "-o compress" status=0 exit Signed-off-by: Filipe Manana Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/ioctl.c | 195 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 152 insertions(+), 43 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8d20f3b1cab0a..30dca32804eeb 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3328,6 +3328,150 @@ static void clone_update_extent_map(struct inode *inode, &BTRFS_I(inode)->runtime_flags); } +/* + * Make sure we do not end up inserting an inline extent into a file that has + * already other (non-inline) extents. If a file has an inline extent it can + * not have any other extents and the (single) inline extent must start at the + * file offset 0. Failing to respect these rules will lead to file corruption, + * resulting in EIO errors on read/write operations, hitting BUG_ON's in mm, etc + * + * We can have extents that have been already written to disk or we can have + * dirty ranges still in delalloc, in which case the extent maps and items are + * created only when we run delalloc, and the delalloc ranges might fall outside + * the range we are currently locking in the inode's io tree. So we check the + * inode's i_size because of that (i_size updates are done while holding the + * i_mutex, which we are holding here). + * We also check to see if the inode has a size not greater than "datal" but has + * extents beyond it, due to an fallocate with FALLOC_FL_KEEP_SIZE (and we are + * protected against such concurrent fallocate calls by the i_mutex). + * + * If the file has no extents but a size greater than datal, do not allow the + * copy because we would need turn the inline extent into a non-inline one (even + * with NO_HOLES enabled). If we find our destination inode only has one inline + * extent, just overwrite it with the source inline extent if its size is less + * than the source extent's size, or we could copy the source inline extent's + * data into the destination inode's inline extent if the later is greater then + * the former. + */ +static int clone_copy_inline_extent(struct inode *src, + struct inode *dst, + struct btrfs_trans_handle *trans, + struct btrfs_path *path, + struct btrfs_key *new_key, + const u64 drop_start, + const u64 datal, + const u64 skip, + const u64 size, + char *inline_data) +{ + struct btrfs_root *root = BTRFS_I(dst)->root; + const u64 aligned_end = ALIGN(new_key->offset + datal, + root->sectorsize); + int ret; + struct btrfs_key key; + + if (new_key->offset > 0) + return -EOPNOTSUPP; + + key.objectid = btrfs_ino(dst); + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = 0; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) { + return ret; + } else if (ret > 0) { + if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + return ret; + else if (ret > 0) + goto copy_inline_extent; + } + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + if (key.objectid == btrfs_ino(dst) && + key.type == BTRFS_EXTENT_DATA_KEY) { + ASSERT(key.offset > 0); + return -EOPNOTSUPP; + } + } else if (i_size_read(dst) <= datal) { + struct btrfs_file_extent_item *ei; + u64 ext_len; + + /* + * If the file size is <= datal, make sure there are no other + * extents following (can happen do to an fallocate call with + * the flag FALLOC_FL_KEEP_SIZE). + */ + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + /* + * If it's an inline extent, it can not have other extents + * following it. + */ + if (btrfs_file_extent_type(path->nodes[0], ei) == + BTRFS_FILE_EXTENT_INLINE) + goto copy_inline_extent; + + ext_len = btrfs_file_extent_num_bytes(path->nodes[0], ei); + if (ext_len > aligned_end) + return -EOPNOTSUPP; + + ret = btrfs_next_item(root, path); + if (ret < 0) { + return ret; + } else if (ret == 0) { + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == btrfs_ino(dst) && + key.type == BTRFS_EXTENT_DATA_KEY) + return -EOPNOTSUPP; + } + } + +copy_inline_extent: + /* + * We have no extent items, or we have an extent at offset 0 which may + * or may not be inlined. All these cases are dealt the same way. + */ + if (i_size_read(dst) > datal) { + /* + * If the destination inode has an inline extent... + * This would require copying the data from the source inline + * extent into the beginning of the destination's inline extent. + * But this is really complex, both extents can be compressed + * or just one of them, which would require decompressing and + * re-compressing data (which could increase the new compressed + * size, not allowing the compressed data to fit anymore in an + * inline extent). + * So just don't support this case for now (it should be rare, + * we are not really saving space when cloning inline extents). + */ + return -EOPNOTSUPP; + } + + btrfs_release_path(path); + ret = btrfs_drop_extents(trans, root, dst, drop_start, aligned_end, 1); + if (ret) + return ret; + ret = btrfs_insert_empty_item(trans, root, path, new_key, size); + if (ret) + return ret; + + if (skip) { + const u32 start = btrfs_file_extent_calc_inline_size(0); + + memmove(inline_data + start, inline_data + start + skip, datal); + } + + write_extent_buffer(path->nodes[0], inline_data, + btrfs_item_ptr_offset(path->nodes[0], + path->slots[0]), + size); + inode_add_bytes(dst, datal); + + return 0; +} + /** * btrfs_clone() - clone a range from inode file to another * @@ -3594,21 +3738,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode, } else if (type == BTRFS_FILE_EXTENT_INLINE) { u64 skip = 0; u64 trim = 0; - u64 aligned_end = 0; - - /* - * Don't copy an inline extent into an offset - * greater than zero. Having an inline extent - * at such an offset results in chaos as btrfs - * isn't prepared for such cases. Just skip - * this case for the same reasons as commented - * at btrfs_ioctl_clone(). - */ - if (last_dest_end > 0) { - ret = -EOPNOTSUPP; - btrfs_end_transaction(trans, root); - goto out; - } if (off > key.offset) { skip = off - key.offset; @@ -3626,42 +3755,22 @@ static int btrfs_clone(struct inode *src, struct inode *inode, size -= skip + trim; datal -= skip + trim; - aligned_end = ALIGN(new_key.offset + datal, - root->sectorsize); - ret = btrfs_drop_extents(trans, root, inode, - drop_start, - aligned_end, - 1); + ret = clone_copy_inline_extent(src, inode, + trans, path, + &new_key, + drop_start, + datal, + skip, size, buf); if (ret) { if (ret != -EOPNOTSUPP) btrfs_abort_transaction(trans, - root, ret); - btrfs_end_transaction(trans, root); - goto out; - } - - ret = btrfs_insert_empty_item(trans, root, path, - &new_key, size); - if (ret) { - btrfs_abort_transaction(trans, root, - ret); + root, + ret); btrfs_end_transaction(trans, root); goto out; } - - if (skip) { - u32 start = - btrfs_file_extent_calc_inline_size(0); - memmove(buf+start, buf+start+skip, - datal); - } - leaf = path->nodes[0]; slot = path->slots[0]; - write_extent_buffer(leaf, buf, - btrfs_item_ptr_offset(leaf, slot), - size); - inode_add_bytes(inode, datal); } /* If we have an implicit hole (NO_HOLES feature). */ From 90291b48b1d907425d8741861fff1dfe4cf7156f Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 16 Oct 2015 12:34:25 +0100 Subject: [PATCH 176/201] Btrfs: fix truncation of compressed and inlined extents commit 0305cd5f7fca85dae392b9ba85b116896eb7c1c7 upstream. When truncating a file to a smaller size which consists of an inline extent that is compressed, we did not discard (or made unusable) the data between the new file size and the old file size, wasting metadata space and allowing for the truncated data to be leaked and the data corruption/loss mentioned below. We were also not correctly decrementing the number of bytes used by the inode, we were setting it to zero, giving a wrong report for callers of the stat(2) syscall. The fsck tool also reported an error about a mismatch between the nbytes of the file versus the real space used by the file. Now because we weren't discarding the truncated region of the file, it was possible for a caller of the clone ioctl to actually read the data that was truncated, allowing for a security breach without requiring root access to the system, using only standard filesystem operations. The scenario is the following: 1) User A creates a file which consists of an inline and compressed extent with a size of 2000 bytes - the file is not accessible to any other users (no read, write or execution permission for anyone else); 2) The user truncates the file to a size of 1000 bytes; 3) User A makes the file world readable; 4) User B creates a file consisting of an inline extent of 2000 bytes; 5) User B issues a clone operation from user A's file into its own file (using a length argument of 0, clone the whole range); 6) User B now gets to see the 1000 bytes that user A truncated from its file before it made its file world readbale. User B also lost the bytes in the range [1000, 2000[ bytes from its own file, but that might be ok if his/her intention was reading stale data from user A that was never supposed to be public. Note that this contrasts with the case where we truncate a file from 2000 bytes to 1000 bytes and then truncate it back from 1000 to 2000 bytes. In this case reading any byte from the range [1000, 2000[ will return a value of 0x00, instead of the original data. This problem exists since the clone ioctl was added and happens both with and without my recent data loss and file corruption fixes for the clone ioctl (patch "Btrfs: fix file corruption and data loss after cloning inline extents"). So fix this by truncating the compressed inline extents as we do for the non-compressed case, which involves decompressing, if the data isn't already in the page cache, compressing the truncated version of the extent, writing the compressed content into the inline extent and then truncate it. The following test case for fstests reproduces the problem. In order for the test to pass both this fix and my previous fix for the clone ioctl that forbids cloning a smaller inline extent into a larger one, which is titled "Btrfs: fix file corruption and data loss after cloning inline extents", are needed. Without that other fix the test fails in a different way that does not leak the truncated data, instead part of destination file gets replaced with zeroes (because the destination file has a larger inline extent than the source). seq=`basename $0` seqres=$RESULT_DIR/$seq echo "QA output created by $seq" tmp=/tmp/$$ status=1 # failure is the default! trap "_cleanup; exit \$status" 0 1 2 3 15 _cleanup() { rm -f $tmp.* } # get standard environment, filters and checks . ./common/rc . ./common/filter # real QA test starts here _need_to_be_root _supported_fs btrfs _supported_os Linux _require_scratch _require_cloner rm -f $seqres.full _scratch_mkfs >>$seqres.full 2>&1 _scratch_mount "-o compress" # Create our test files. File foo is going to be the source of a clone operation # and consists of a single inline extent with an uncompressed size of 512 bytes, # while file bar consists of a single inline extent with an uncompressed size of # 256 bytes. For our test's purpose, it's important that file bar has an inline # extent with a size smaller than foo's inline extent. $XFS_IO_PROG -f -c "pwrite -S 0xa1 0 128" \ -c "pwrite -S 0x2a 128 384" \ $SCRATCH_MNT/foo | _filter_xfs_io $XFS_IO_PROG -f -c "pwrite -S 0xbb 0 256" $SCRATCH_MNT/bar | _filter_xfs_io # Now durably persist all metadata and data. We do this to make sure that we get # on disk an inline extent with a size of 512 bytes for file foo. sync # Now truncate our file foo to a smaller size. Because it consists of a # compressed and inline extent, btrfs did not shrink the inline extent to the # new size (if the extent was not compressed, btrfs would shrink it to 128 # bytes), it only updates the inode's i_size to 128 bytes. $XFS_IO_PROG -c "truncate 128" $SCRATCH_MNT/foo # Now clone foo's inline extent into bar. # This clone operation should fail with errno EOPNOTSUPP because the source # file consists only of an inline extent and the file's size is smaller than # the inline extent of the destination (128 bytes < 256 bytes). However the # clone ioctl was not prepared to deal with a file that has a size smaller # than the size of its inline extent (something that happens only for compressed # inline extents), resulting in copying the full inline extent from the source # file into the destination file. # # Note that btrfs' clone operation for inline extents consists of removing the # inline extent from the destination inode and copy the inline extent from the # source inode into the destination inode, meaning that if the destination # inode's inline extent is larger (N bytes) than the source inode's inline # extent (M bytes), some bytes (N - M bytes) will be lost from the destination # file. Btrfs could copy the source inline extent's data into the destination's # inline extent so that we would not lose any data, but that's currently not # done due to the complexity that would be needed to deal with such cases # (specially when one or both extents are compressed), returning EOPNOTSUPP, as # it's normally not a very common case to clone very small files (only case # where we get inline extents) and copying inline extents does not save any # space (unlike for normal, non-inlined extents). $CLONER_PROG -s 0 -d 0 -l 0 $SCRATCH_MNT/foo $SCRATCH_MNT/bar # Now because the above clone operation used to succeed, and due to foo's inline # extent not being shinked by the truncate operation, our file bar got the whole # inline extent copied from foo, making us lose the last 128 bytes from bar # which got replaced by the bytes in range [128, 256[ from foo before foo was # truncated - in other words, data loss from bar and being able to read old and # stale data from foo that should not be possible to read anymore through normal # filesystem operations. Contrast with the case where we truncate a file from a # size N to a smaller size M, truncate it back to size N and then read the range # [M, N[, we should always get the value 0x00 for all the bytes in that range. # We expected the clone operation to fail with errno EOPNOTSUPP and therefore # not modify our file's bar data/metadata. So its content should be 256 bytes # long with all bytes having the value 0xbb. # # Without the btrfs bug fix, the clone operation succeeded and resulted in # leaking truncated data from foo, the bytes that belonged to its range # [128, 256[, and losing data from bar in that same range. So reading the # file gave us the following content: # # 0000000 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 a1 # * # 0000200 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a 2a # * # 0000400 echo "File bar's content after the clone operation:" od -t x1 $SCRATCH_MNT/bar # Also because the foo's inline extent was not shrunk by the truncate # operation, btrfs' fsck, which is run by the fstests framework everytime a # test completes, failed reporting the following error: # # root 5 inode 257 errors 400, nbytes wrong status=0 exit Signed-off-by: Filipe Manana Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 82 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 68 insertions(+), 14 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 611b66d73e80b..1141090423e7e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4217,6 +4217,47 @@ static int truncate_space_check(struct btrfs_trans_handle *trans, } +static int truncate_inline_extent(struct inode *inode, + struct btrfs_path *path, + struct btrfs_key *found_key, + const u64 item_end, + const u64 new_size) +{ + struct extent_buffer *leaf = path->nodes[0]; + int slot = path->slots[0]; + struct btrfs_file_extent_item *fi; + u32 size = (u32)(new_size - found_key->offset); + struct btrfs_root *root = BTRFS_I(inode)->root; + + fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); + + if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) { + loff_t offset = new_size; + loff_t page_end = ALIGN(offset, PAGE_CACHE_SIZE); + + /* + * Zero out the remaining of the last page of our inline extent, + * instead of directly truncating our inline extent here - that + * would be much more complex (decompressing all the data, then + * compressing the truncated data, which might be bigger than + * the size of the inline extent, resize the extent, etc). + * We release the path because to get the page we might need to + * read the extent item from disk (data not in the page cache). + */ + btrfs_release_path(path); + return btrfs_truncate_page(inode, offset, page_end - offset, 0); + } + + btrfs_set_file_extent_ram_bytes(leaf, fi, size); + size = btrfs_file_extent_calc_inline_size(size); + btrfs_truncate_item(root, path, size, 1); + + if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) + inode_sub_bytes(inode, item_end + 1 - new_size); + + return 0; +} + /* * this can truncate away extent items, csum items and directory items. * It starts at a high offset and removes keys until it can't find @@ -4411,27 +4452,40 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, * special encodings */ if (!del_item && - btrfs_file_extent_compression(leaf, fi) == 0 && btrfs_file_extent_encryption(leaf, fi) == 0 && btrfs_file_extent_other_encoding(leaf, fi) == 0) { - u32 size = new_size - found_key.offset; - - if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) - inode_sub_bytes(inode, item_end + 1 - - new_size); /* - * update the ram bytes to properly reflect - * the new size of our item + * Need to release path in order to truncate a + * compressed extent. So delete any accumulated + * extent items so far. */ - btrfs_set_file_extent_ram_bytes(leaf, fi, size); - size = - btrfs_file_extent_calc_inline_size(size); - btrfs_truncate_item(root, path, size, 1); + if (btrfs_file_extent_compression(leaf, fi) != + BTRFS_COMPRESS_NONE && pending_del_nr) { + err = btrfs_del_items(trans, root, path, + pending_del_slot, + pending_del_nr); + if (err) { + btrfs_abort_transaction(trans, + root, + err); + goto error; + } + pending_del_nr = 0; + } + + err = truncate_inline_extent(inode, path, + &found_key, + item_end, + new_size); + if (err) { + btrfs_abort_transaction(trans, + root, err); + goto error; + } } else if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { - inode_sub_bytes(inode, item_end + 1 - - found_key.offset); + inode_sub_bytes(inode, item_end + 1 - new_size); } } delete: From fcb184a168452e315676f306b054c908e8ff94ab Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 22 Oct 2015 09:47:34 +0100 Subject: [PATCH 177/201] Btrfs: fix regression when running delayed references MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 2c3cf7d5f6105bb957df125dfce61d4483b8742d upstream. In the kernel 4.2 merge window we had a refactoring/rework of the delayed references implementation in order to fix certain problems with qgroups. However that rework introduced one more regression that leads to the following trace when running delayed references for metadata: [35908.064664] kernel BUG at fs/btrfs/extent-tree.c:1832! [35908.065201] invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC [35908.065201] Modules linked in: dm_flakey dm_mod btrfs crc32c_generic xor raid6_pq nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscache sunrpc loop fuse parport_pc psmouse i2 [35908.065201] CPU: 14 PID: 15014 Comm: kworker/u32:9 Tainted: G W 4.3.0-rc5-btrfs-next-17+ #1 [35908.065201] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org 04/01/2014 [35908.065201] Workqueue: btrfs-extent-refs btrfs_extent_refs_helper [btrfs] [35908.065201] task: ffff880114b7d780 ti: ffff88010c4c8000 task.ti: ffff88010c4c8000 [35908.065201] RIP: 0010:[] [] insert_inline_extent_backref+0x52/0xb1 [btrfs] [35908.065201] RSP: 0018:ffff88010c4cbb08 EFLAGS: 00010293 [35908.065201] RAX: 0000000000000000 RBX: ffff88008a661000 RCX: 0000000000000000 [35908.065201] RDX: ffffffffa04dd58f RSI: 0000000000000001 RDI: 0000000000000000 [35908.065201] RBP: ffff88010c4cbb40 R08: 0000000000001000 R09: ffff88010c4cb9f8 [35908.065201] R10: 0000000000000000 R11: 000000000000002c R12: 0000000000000000 [35908.065201] R13: ffff88020a74c578 R14: 0000000000000000 R15: 0000000000000000 [35908.065201] FS: 0000000000000000(0000) GS:ffff88023edc0000(0000) knlGS:0000000000000000 [35908.065201] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [35908.065201] CR2: 00000000015e8708 CR3: 0000000102185000 CR4: 00000000000006e0 [35908.065201] Stack: [35908.065201] ffff88010c4cbb18 0000000000000f37 ffff88020a74c578 ffff88015a408000 [35908.065201] ffff880154a44000 0000000000000000 0000000000000005 ffff88010c4cbbd8 [35908.065201] ffffffffa0492b9a 0000000000000005 0000000000000000 0000000000000000 [35908.065201] Call Trace: [35908.065201] [] __btrfs_inc_extent_ref+0x8b/0x208 [btrfs] [35908.065201] [] ? __btrfs_run_delayed_refs+0x4d4/0xd33 [btrfs] [35908.065201] [] __btrfs_run_delayed_refs+0xafa/0xd33 [btrfs] [35908.065201] [] ? join_transaction.isra.10+0x25/0x41f [btrfs] [35908.065201] [] ? join_transaction.isra.10+0xa8/0x41f [btrfs] [35908.065201] [] btrfs_run_delayed_refs+0x75/0x1dd [btrfs] [35908.065201] [] delayed_ref_async_start+0x3c/0x7b [btrfs] [35908.065201] [] normal_work_helper+0x14c/0x32a [btrfs] [35908.065201] [] btrfs_extent_refs_helper+0x12/0x14 [btrfs] [35908.065201] [] process_one_work+0x24a/0x4ac [35908.065201] [] worker_thread+0x206/0x2c2 [35908.065201] [] ? rescuer_thread+0x2cb/0x2cb [35908.065201] [] ? rescuer_thread+0x2cb/0x2cb [35908.065201] [] kthread+0xef/0xf7 [35908.065201] [] ? kthread_parkme+0x24/0x24 [35908.065201] [] ret_from_fork+0x3f/0x70 [35908.065201] [] ? kthread_parkme+0x24/0x24 [35908.065201] Code: 6a 01 41 56 41 54 ff 75 10 41 51 4d 89 c1 49 89 c8 48 8d 4d d0 e8 f6 f1 ff ff 48 83 c4 28 85 c0 75 2c 49 81 fc ff 00 00 00 77 02 <0f> 0b 4c 8b 45 30 8b 4d 28 45 31 [35908.065201] RIP [] insert_inline_extent_backref+0x52/0xb1 [btrfs] [35908.065201] RSP [35908.310885] ---[ end trace fe4299baf0666457 ]--- This happens because the new delayed references code no longer merges delayed references that have different sequence values. The following steps are an example sequence leading to this issue: 1) Transaction N starts, fs_info->tree_mod_seq has value 0; 2) Extent buffer (btree node) A is allocated, delayed reference Ref1 for bytenr A is created, with a value of 1 and a seq value of 0; 3) fs_info->tree_mod_seq is incremented to 1; 4) Extent buffer A is deleted through btrfs_del_items(), which calls btrfs_del_leaf(), which in turn calls btrfs_free_tree_block(). The later returns the metadata extent associated to extent buffer A to the free space cache (the range is not pinned), because the extent buffer was created in the current transaction (N) and writeback never happened for the extent buffer (flag BTRFS_HEADER_FLAG_WRITTEN not set in the extent buffer). This creates the delayed reference Ref2 for bytenr A, with a value of -1 and a seq value of 1; 5) Delayed reference Ref2 is not merged with Ref1 when we create it, because they have different sequence numbers (decided at add_delayed_ref_tail_merge()); 6) fs_info->tree_mod_seq is incremented to 2; 7) Some task attempts to allocate a new extent buffer (done at extent-tree.c:find_free_extent()), but due to heavy fragmentation and running low on metadata space the clustered allocation fails and we fall back to unclustered allocation, which finds the extent at offset A, so a new extent buffer at offset A is allocated. This creates delayed reference Ref3 for bytenr A, with a value of 1 and a seq value of 2; 8) Ref3 is not merged neither with Ref2 nor Ref1, again because they all have different seq values; 9) We start running the delayed references (__btrfs_run_delayed_refs()); 10) The delayed Ref1 is the first one being applied, which ends up creating an inline extent backref in the extent tree; 10) Next the delayed reference Ref3 is selected for execution, and not Ref2, because select_delayed_ref() always gives a preference for positive references (that have an action of BTRFS_ADD_DELAYED_REF); 11) When running Ref3 we encounter alreay the inline extent backref in the extent tree at insert_inline_extent_backref(), which makes us hit the following BUG_ON: BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); This is always true because owner corresponds to the level of the extent buffer/btree node in the btree. For the scenario described above we hit the BUG_ON because we never merge references that have different seq values. We used to do the merging before the 4.2 kernel, more specifically, before the commmits: c6fc24549960 ("btrfs: delayed-ref: Use list to replace the ref_root in ref_head.") c43d160fcd5e ("btrfs: delayed-ref: Cleanup the unneeded functions.") This issue became more exposed after the following change that was added to 4.2 as well: cffc3374e567 ("Btrfs: fix order by which delayed references are run") Which in turn fixed another regression by the two commits previously mentioned. So fix this by bringing back the delayed reference merge code, with the proper adaptations so that it operates against the new data structure (linked list vs old red black tree implementation). This issue was hit running fstest btrfs/063 in a loop. Several people have reported this issue in the mailing list when running on kernels 4.2+. Very special thanks to Stéphane Lesimple for helping debugging this issue and testing this fix on his multi terabyte filesystem (which took more than one day to balance alone, plus fsck, etc). Fixes: c6fc24549960 ("btrfs: delayed-ref: Use list to replace the ref_root in ref_head.") Reported-by: Peter Becker Reported-by: Stéphane Lesimple Tested-by: Stéphane Lesimple Reported-by: Malte Schröder Reported-by: Derek Dongray Reported-by: Erkki Seppala Signed-off-by: Filipe Manana Reviewed-by: Liu Bo Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/delayed-ref.c | 113 +++++++++++++++++++++++++++++++++++++++++ fs/btrfs/extent-tree.c | 14 +++++ 2 files changed, 127 insertions(+) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index ac3e81da6d4ed..4832943aaa5cd 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -197,6 +197,119 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans, trans->delayed_ref_updates--; } +static bool merge_ref(struct btrfs_trans_handle *trans, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_head *head, + struct btrfs_delayed_ref_node *ref, + u64 seq) +{ + struct btrfs_delayed_ref_node *next; + bool done = false; + + next = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node, + list); + while (!done && &next->list != &head->ref_list) { + int mod; + struct btrfs_delayed_ref_node *next2; + + next2 = list_next_entry(next, list); + + if (next == ref) + goto next; + + if (seq && next->seq >= seq) + goto next; + + if (next->type != ref->type || next->no_quota != ref->no_quota) + goto next; + + if ((ref->type == BTRFS_TREE_BLOCK_REF_KEY || + ref->type == BTRFS_SHARED_BLOCK_REF_KEY) && + comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref), + btrfs_delayed_node_to_tree_ref(next), + ref->type)) + goto next; + if ((ref->type == BTRFS_EXTENT_DATA_REF_KEY || + ref->type == BTRFS_SHARED_DATA_REF_KEY) && + comp_data_refs(btrfs_delayed_node_to_data_ref(ref), + btrfs_delayed_node_to_data_ref(next))) + goto next; + + if (ref->action == next->action) { + mod = next->ref_mod; + } else { + if (ref->ref_mod < next->ref_mod) { + swap(ref, next); + done = true; + } + mod = -next->ref_mod; + } + + drop_delayed_ref(trans, delayed_refs, head, next); + ref->ref_mod += mod; + if (ref->ref_mod == 0) { + drop_delayed_ref(trans, delayed_refs, head, ref); + done = true; + } else { + /* + * Can't have multiples of the same ref on a tree block. + */ + WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY || + ref->type == BTRFS_SHARED_BLOCK_REF_KEY); + } +next: + next = next2; + } + + return done; +} + +void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_head *head) +{ + struct btrfs_delayed_ref_node *ref; + u64 seq = 0; + + assert_spin_locked(&head->lock); + + if (list_empty(&head->ref_list)) + return; + + /* We don't have too many refs to merge for data. */ + if (head->is_data) + return; + + spin_lock(&fs_info->tree_mod_seq_lock); + if (!list_empty(&fs_info->tree_mod_seq_list)) { + struct seq_list *elem; + + elem = list_first_entry(&fs_info->tree_mod_seq_list, + struct seq_list, list); + seq = elem->seq; + } + spin_unlock(&fs_info->tree_mod_seq_lock); + + ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node, + list); + while (&ref->list != &head->ref_list) { + if (seq && ref->seq >= seq) + goto next; + + if (merge_ref(trans, delayed_refs, head, ref, seq)) { + if (list_empty(&head->ref_list)) + break; + ref = list_first_entry(&head->ref_list, + struct btrfs_delayed_ref_node, + list); + continue; + } +next: + ref = list_next_entry(ref, list); + } +} + int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_root *delayed_refs, u64 seq) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 601d7d45d164a..00158bfd1771e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2433,7 +2433,21 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, } } + /* + * We need to try and merge add/drops of the same ref since we + * can run into issues with relocate dropping the implicit ref + * and then it being added back again before the drop can + * finish. If we merged anything we need to re-loop so we can + * get a good ref. + * Or we can get node references of the same type that weren't + * merged when created due to bumps in the tree mod seq, and + * we need to merge them to prevent adding an inline extent + * backref before dropping it (triggering a BUG_ON at + * insert_inline_extent_backref()). + */ spin_lock(&locked_ref->lock); + btrfs_merge_delayed_refs(trans, fs_info, delayed_refs, + locked_ref); /* * locked_ref is the head node, so we have to go one From 280bd68e66a0fe027ee186a01a9edd869700a295 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 6 Nov 2015 13:33:33 +0000 Subject: [PATCH 178/201] Btrfs: fix race leading to incorrect item deletion when dropping extents commit aeafbf8486c9e2bd53f5cc3c10c0b7fd7149d69c upstream. While running a stress test I got the following warning triggered: [191627.672810] ------------[ cut here ]------------ [191627.673949] WARNING: CPU: 8 PID: 8447 at fs/btrfs/file.c:779 __btrfs_drop_extents+0x391/0xa50 [btrfs]() (...) [191627.701485] Call Trace: [191627.702037] [] dump_stack+0x4f/0x7b [191627.702992] [] ? console_unlock+0x356/0x3a2 [191627.704091] [] warn_slowpath_common+0xa1/0xbb [191627.705380] [] ? __btrfs_drop_extents+0x391/0xa50 [btrfs] [191627.706637] [] warn_slowpath_null+0x1a/0x1c [191627.707789] [] __btrfs_drop_extents+0x391/0xa50 [btrfs] [191627.709155] [] ? cache_alloc_debugcheck_after.isra.32+0x171/0x1d0 [191627.712444] [] ? kmemleak_alloc_recursive.constprop.40+0x16/0x18 [191627.714162] [] insert_reserved_file_extent.constprop.40+0x83/0x24e [btrfs] [191627.715887] [] ? start_transaction+0x3bb/0x610 [btrfs] [191627.717287] [] btrfs_finish_ordered_io+0x273/0x4e2 [btrfs] [191627.728865] [] finish_ordered_fn+0x15/0x17 [btrfs] [191627.730045] [] normal_work_helper+0x14c/0x32c [btrfs] [191627.731256] [] btrfs_endio_write_helper+0x12/0x14 [btrfs] [191627.732661] [] process_one_work+0x24c/0x4ae [191627.733822] [] worker_thread+0x206/0x2c2 [191627.734857] [] ? process_scheduled_works+0x2f/0x2f [191627.736052] [] ? process_scheduled_works+0x2f/0x2f [191627.737349] [] kthread+0xef/0xf7 [191627.738267] [] ? time_hardirqs_on+0x15/0x28 [191627.739330] [] ? __kthread_parkme+0xad/0xad [191627.741976] [] ret_from_fork+0x42/0x70 [191627.743080] [] ? __kthread_parkme+0xad/0xad [191627.744206] ---[ end trace bbfddacb7aaada8d ]--- $ cat -n fs/btrfs/file.c 691 int __btrfs_drop_extents(struct btrfs_trans_handle *trans, (...) 758 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 759 if (key.objectid > ino || 760 key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) 761 break; 762 763 fi = btrfs_item_ptr(leaf, path->slots[0], 764 struct btrfs_file_extent_item); 765 extent_type = btrfs_file_extent_type(leaf, fi); 766 767 if (extent_type == BTRFS_FILE_EXTENT_REG || 768 extent_type == BTRFS_FILE_EXTENT_PREALLOC) { (...) 774 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { (...) 778 } else { 779 WARN_ON(1); 780 extent_end = search_start; 781 } (...) This happened because the item we were processing did not match a file extent item (its key type != BTRFS_EXTENT_DATA_KEY), and even on this case we cast the item to a struct btrfs_file_extent_item pointer and then find a type field value that does not match any of the expected values (BTRFS_FILE_EXTENT_[REG|PREALLOC|INLINE]). This scenario happens due to a tiny time window where a race can happen as exemplified below. For example, consider the following scenario where we're using the NO_HOLES feature and we have the following two neighbour leafs: Leaf X (has N items) Leaf Y [ ... (257 INODE_ITEM 0) (257 INODE_REF 256) ] [ (257 EXTENT_DATA 8192), ... ] slot N - 2 slot N - 1 slot 0 Our inode 257 has an implicit hole in the range [0, 8K[ (implicit rather than explicit because NO_HOLES is enabled). Now if our inode has an ordered extent for the range [4K, 8K[ that is finishing, the following can happen: CPU 1 CPU 2 btrfs_finish_ordered_io() insert_reserved_file_extent() __btrfs_drop_extents() Searches for the key (257 EXTENT_DATA 4096) through btrfs_lookup_file_extent() Key not found and we get a path where path->nodes[0] == leaf X and path->slots[0] == N Because path->slots[0] is >= btrfs_header_nritems(leaf X), we call btrfs_next_leaf() btrfs_next_leaf() releases the path inserts key (257 INODE_REF 4096) at the end of leaf X, leaf X now has N + 1 keys, and the new key is at slot N btrfs_next_leaf() searches for key (257 INODE_REF 256), with path->keep_locks set to 1, because it was the last key it saw in leaf X finds it in leaf X again and notices it's no longer the last key of the leaf, so it returns 0 with path->nodes[0] == leaf X and path->slots[0] == N (which is now < btrfs_header_nritems(leaf X)), pointing to the new key (257 INODE_REF 4096) __btrfs_drop_extents() casts the item at path->nodes[0], slot path->slots[0], to a struct btrfs_file_extent_item - it does not skip keys for the target inode with a type less than BTRFS_EXTENT_DATA_KEY (BTRFS_INODE_REF_KEY < BTRFS_EXTENT_DATA_KEY) sees a bogus value for the type field triggering the WARN_ON in the trace shown above, and sets extent_end = search_start (4096) does the if-then-else logic to fixup 0 length extent items created by a past bug from hole punching: if (extent_end == key.offset && extent_end >= search_start) goto delete_extent_item; that evaluates to true and it ends up deleting the key pointed to by path->slots[0], (257 INODE_REF 4096), from leaf X The same could happen for example for a xattr that ends up having a key with an offset value that matches search_start (very unlikely but not impossible). So fix this by ensuring that keys smaller than BTRFS_EXTENT_DATA_KEY are skipped, never casted to struct btrfs_file_extent_item and never deleted by accident. Also protect against the unexpected case of getting a key for a lower inode number by skipping that key and issuing a warning. Signed-off-by: Filipe Manana Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/file.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 8c6f247ba81d4..10b1471e17905 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -756,8 +756,16 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, } btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - if (key.objectid > ino || - key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) + + if (key.objectid > ino) + break; + if (WARN_ON_ONCE(key.objectid < ino) || + key.type < BTRFS_EXTENT_DATA_KEY) { + ASSERT(del_nr == 0); + path->slots[0]++; + goto next_slot; + } + if (key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end) break; fi = btrfs_item_ptr(leaf, path->slots[0], @@ -776,8 +784,8 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, btrfs_file_extent_inline_len(leaf, path->slots[0], fi); } else { - WARN_ON(1); - extent_end = search_start; + /* can't happen */ + BUG(); } /* From 42c6cb15b97363f55f34f4845fe2c1503a9558a4 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 9 Nov 2015 00:33:58 +0000 Subject: [PATCH 179/201] Btrfs: fix race leading to BUG_ON when running delalloc for nodatacow commit 1d512cb77bdbda80f0dd0620a3b260d697fd581d upstream. If we are using the NO_HOLES feature, we have a tiny time window when running delalloc for a nodatacow inode where we can race with a concurrent link or xattr add operation leading to a BUG_ON. This happens because at run_delalloc_nocow() we end up casting a leaf item of type BTRFS_INODE_[REF|EXTREF]_KEY or of type BTRFS_XATTR_ITEM_KEY to a file extent item (struct btrfs_file_extent_item) and then analyse its extent type field, which won't match any of the expected extent types (values BTRFS_FILE_EXTENT_[REG|PREALLOC|INLINE]) and therefore trigger an explicit BUG_ON(1). The following sequence diagram shows how the race happens when running a no-cow dellaloc range [4K, 8K[ for inode 257 and we have the following neighbour leafs: Leaf X (has N items) Leaf Y [ ... (257 INODE_ITEM 0) (257 INODE_REF 256) ] [ (257 EXTENT_DATA 8192), ... ] slot N - 2 slot N - 1 slot 0 (Note the implicit hole for inode 257 regarding the [0, 8K[ range) CPU 1 CPU 2 run_dealloc_nocow() btrfs_lookup_file_extent() --> searches for a key with value (257 EXTENT_DATA 4096) in the fs/subvol tree --> returns us a path with path->nodes[0] == leaf X and path->slots[0] == N because path->slots[0] is >= btrfs_header_nritems(leaf X), it calls btrfs_next_leaf() btrfs_next_leaf() --> releases the path hard link added to our inode, with key (257 INODE_REF 500) added to the end of leaf X, so leaf X now has N + 1 keys --> searches for the key (257 INODE_REF 256), because it was the last key in leaf X before it released the path, with path->keep_locks set to 1 --> ends up at leaf X again and it verifies that the key (257 INODE_REF 256) is no longer the last key in the leaf, so it returns with path->nodes[0] == leaf X and path->slots[0] == N, pointing to the new item with key (257 INODE_REF 500) the loop iteration of run_dealloc_nocow() does not break out the loop and continues because the key referenced in the path at path->nodes[0] and path->slots[0] is for inode 257, its type is < BTRFS_EXTENT_DATA_KEY and its offset (500) is less then our delalloc range's end (8192) the item pointed by the path, an inode reference item, is (incorrectly) interpreted as a file extent item and we get an invalid extent type, leading to the BUG_ON(1): if (extent_type == BTRFS_FILE_EXTENT_REG || extent_type == BTRFS_FILE_EXTENT_PREALLOC) { (...) } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { (...) } else { BUG_ON(1) } The same can happen if a xattr is added concurrently and ends up having a key with an offset smaller then the delalloc's range end. So fix this by skipping keys with a type smaller than BTRFS_EXTENT_DATA_KEY. Signed-off-by: Filipe Manana Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/inode.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1141090423e7e..e0c17e072a7a7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1294,8 +1294,14 @@ static noinline int run_delalloc_nocow(struct inode *inode, num_bytes = 0; btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); - if (found_key.objectid > ino || - found_key.type > BTRFS_EXTENT_DATA_KEY || + if (found_key.objectid > ino) + break; + if (WARN_ON_ONCE(found_key.objectid < ino) || + found_key.type < BTRFS_EXTENT_DATA_KEY) { + path->slots[0]++; + goto next_slot; + } + if (found_key.type > BTRFS_EXTENT_DATA_KEY || found_key.offset > end) break; From 82c82fadbd75f62bfb9d83eb3a308b55f4c4507d Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 9 Nov 2015 18:06:38 +0000 Subject: [PATCH 180/201] Btrfs: fix race when listing an inode's xattrs commit f1cd1f0b7d1b5d4aaa5711e8f4e4898b0045cb6d upstream. When listing a inode's xattrs we have a time window where we race against a concurrent operation for adding a new hard link for our inode that makes us not return any xattr to user space. In order for this to happen, the first xattr of our inode needs to be at slot 0 of a leaf and the previous leaf must still have room for an inode ref (or extref) item, and this can happen because an inode's listxattrs callback does not lock the inode's i_mutex (nor does the VFS does it for us), but adding a hard link to an inode makes the VFS lock the inode's i_mutex before calling the inode's link callback. If we have the following leafs: Leaf X (has N items) Leaf Y [ ... (257 INODE_ITEM 0) (257 INODE_REF 256) ] [ (257 XATTR_ITEM 12345), ... ] slot N - 2 slot N - 1 slot 0 The race illustrated by the following sequence diagram is possible: CPU 1 CPU 2 btrfs_listxattr() searches for key (257 XATTR_ITEM 0) gets path with path->nodes[0] == leaf X and path->slots[0] == N because path->slots[0] is >= btrfs_header_nritems(leaf X), it calls btrfs_next_leaf() btrfs_next_leaf() releases the path adds key (257 INODE_REF 666) to the end of leaf X (slot N), and leaf X now has N + 1 items searches for the key (257 INODE_REF 256), with path->keep_locks == 1, because that is the last key it saw in leaf X before releasing the path ends up at leaf X again and it verifies that the key (257 INODE_REF 256) is no longer the last key in leaf X, so it returns with path->nodes[0] == leaf X and path->slots[0] == N, pointing to the new item with key (257 INODE_REF 666) btrfs_listxattr's loop iteration sees that the type of the key pointed by the path is different from the type BTRFS_XATTR_ITEM_KEY and so it breaks the loop and stops looking for more xattr items --> the application doesn't get any xattr listed for our inode So fix this by breaking the loop only if the key's type is greater than BTRFS_XATTR_ITEM_KEY and skip the current key if its type is smaller. Signed-off-by: Filipe Manana Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/xattr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 6f518c90e1c14..1fcd7b6e7564d 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -313,8 +313,10 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) /* check to make sure this item is what we want */ if (found_key.objectid != key.objectid) break; - if (found_key.type != BTRFS_XATTR_ITEM_KEY) + if (found_key.type > BTRFS_XATTR_ITEM_KEY) break; + if (found_key.type < BTRFS_XATTR_ITEM_KEY) + goto next; di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); if (verify_dir_item(root, leaf, di)) From f0009492b51e67fc498ab6bcc127c9b418806e71 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Mon, 9 Nov 2015 11:44:45 +0100 Subject: [PATCH 181/201] btrfs: fix signed overflows in btrfs_sync_file commit 9dcbeed4d7e11e1dcf5e55475de3754f0855d1c2 upstream. The calculation of range length in btrfs_sync_file leads to signed overflow. This was caught by PaX gcc SIZE_OVERFLOW plugin. https://forums.grsecurity.net/viewtopic.php?f=1&t=4284 The fsync call passes 0 and LLONG_MAX, the range length does not fit to loff_t and overflows, but the value is converted to u64 so it silently works as expected. The minimal fix is a typecast to u64, switching functions to take (start, end) instead of (start, len) would be more intrusive. Coccinelle script found that there's one more opencoded calculation of the length. @@ loff_t start, end; @@ * end - start Signed-off-by: David Sterba Signed-off-by: Chris Mason Signed-off-by: Greg Kroah-Hartman --- fs/btrfs/file.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 10b1471e17905..6e80b78f797f8 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1876,8 +1876,13 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) struct btrfs_log_ctx ctx; int ret = 0; bool full_sync = 0; - const u64 len = end - start + 1; + u64 len; + /* + * The range length can be represented by u64, we have to do the typecasts + * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync() + */ + len = (u64)end - (u64)start + 1; trace_btrfs_sync_file(file, datasync); /* @@ -2065,8 +2070,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) } } if (!full_sync) { - ret = btrfs_wait_ordered_range(inode, start, - end - start + 1); + ret = btrfs_wait_ordered_range(inode, start, len); if (ret) { btrfs_end_transaction(trans, root); goto out; From 4d8dce2c10dfaba12c767e48d254082333424c7e Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 27 Nov 2015 19:23:24 +0100 Subject: [PATCH 182/201] rbd: don't put snap_context twice in rbd_queue_workfn() commit 70b16db86f564977df074072143284aec2cb1162 upstream. Commit 4e752f0ab0e8 ("rbd: access snapshot context and mapping size safely") moved ceph_get_snap_context() out of rbd_img_request_create() and into rbd_queue_workfn(), adding a ceph_put_snap_context() to the error path in rbd_queue_workfn(). However, rbd_img_request_create() consumes a ref on snapc, so calling ceph_put_snap_context() after a successful rbd_img_request_create() leads to an extra put. Fix it. Signed-off-by: Ilya Dryomov Reviewed-by: Josh Durgin Signed-off-by: Greg Kroah-Hartman --- drivers/block/rbd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 128e7df5b8072..8630a77ea4625 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -3444,6 +3444,7 @@ static void rbd_queue_workfn(struct work_struct *work) goto err_rq; } img_request->rq = rq; + snapc = NULL; /* img_request consumes a ref */ if (op_type == OBJ_OP_DISCARD) result = rbd_img_request_fill(img_request, OBJ_REQUEST_NODATA, From 99d17a1f12d6e56fa197f9e6c131236df50ef391 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Fri, 2 Oct 2015 23:54:58 -0400 Subject: [PATCH 183/201] ext4 crypto: fix memory leak in ext4_bio_write_page() commit 937d7b84dca58f2565715f2c8e52f14c3d65fb22 upstream. There are times when ext4_bio_write_page() is called even though we don't actually need to do any I/O. This happens when ext4_writepage() gets called by the jbd2 commit path when an inode needs to force its pages written out in order to provide data=ordered guarantees --- and a page is backed by an unwritten (e.g., uninitialized) block on disk, or if delayed allocation means the page's backing store hasn't been allocated yet. In that case, we need to skip the call to ext4_encrypt_page(), since in addition to wasting CPU, it leads to a bounce page and an ext4 crypto context getting leaked. Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/page-io.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 84ba4d2b3a35f..17fbe3882b8eb 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -425,6 +425,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, struct buffer_head *bh, *head; int ret = 0; int nr_submitted = 0; + int nr_to_submit = 0; blocksize = 1 << inode->i_blkbits; @@ -477,11 +478,13 @@ int ext4_bio_write_page(struct ext4_io_submit *io, unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); } set_buffer_async_write(bh); + nr_to_submit++; } while ((bh = bh->b_this_page) != head); bh = head = page_buffers(page); - if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { + if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode) && + nr_to_submit) { data_page = ext4_encrypt(inode, page); if (IS_ERR(data_page)) { ret = PTR_ERR(data_page); From bcdde051c4086a43f97119fefd70735b26f924b6 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sat, 3 Oct 2015 10:49:29 -0400 Subject: [PATCH 184/201] ext4 crypto: fix bugs in ext4_encrypted_zeroout() commit 36086d43f6575c081067de9855786a2fc91df77b upstream. Fix multiple bugs in ext4_encrypted_zeroout(), including one that could cause us to write an encrypted zero page to the wrong location on disk, potentially causing data and file system corruption. Fortunately, this tends to only show up in stress tests, but even with these fixes, we are seeing some test failures with generic/127 --- but these are now caused by data failures instead of metadata corruption. Since ext4_encrypted_zeroout() is only used for some optimizations to keep the extent tree from being too fragmented, and ext4_encrypted_zeroout() itself isn't all that optimized from a time or IOPS perspective, disable the extent tree optimization for encrypted inodes for now. This prevents the data corruption issues reported by generic/127 until we can figure out what's going wrong. Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/crypto.c | 23 +++++++++++++++++++---- fs/ext4/extents.c | 3 +++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index 45731558138c8..2fab243a4c9e9 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -411,7 +411,13 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex) ext4_lblk_t lblk = ex->ee_block; ext4_fsblk_t pblk = ext4_ext_pblock(ex); unsigned int len = ext4_ext_get_actual_len(ex); - int err = 0; + int ret, err = 0; + +#if 0 + ext4_msg(inode->i_sb, KERN_CRIT, + "ext4_encrypted_zeroout ino %lu lblk %u len %u", + (unsigned long) inode->i_ino, lblk, len); +#endif BUG_ON(inode->i_sb->s_blocksize != PAGE_CACHE_SIZE); @@ -437,17 +443,26 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex) goto errout; } bio->bi_bdev = inode->i_sb->s_bdev; - bio->bi_iter.bi_sector = pblk; - err = bio_add_page(bio, ciphertext_page, + bio->bi_iter.bi_sector = + pblk << (inode->i_sb->s_blocksize_bits - 9); + ret = bio_add_page(bio, ciphertext_page, inode->i_sb->s_blocksize, 0); - if (err) { + if (ret != inode->i_sb->s_blocksize) { + /* should never happen! */ + ext4_msg(inode->i_sb, KERN_ERR, + "bio_add_page failed: %d", ret); + WARN_ON(1); bio_put(bio); + err = -EIO; goto errout; } err = submit_bio_wait(WRITE, bio); + if ((err == 0) && bio->bi_error) + err = -EIO; bio_put(bio); if (err) goto errout; + lblk++; pblk++; } err = 0; errout: diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 2553aa8b608d8..7f486e350d15d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -3558,6 +3558,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle, max_zeroout = sbi->s_extent_max_zeroout_kb >> (inode->i_sb->s_blocksize_bits - 10); + if (ext4_encrypted_inode(inode)) + max_zeroout = 0; + /* If extent is less than s_max_zeroout_kb, zeroout directly */ if (max_zeroout && (ee_len <= max_zeroout)) { err = ext4_ext_zeroout(inode, ex); From 5a4ead78e6a00d20924ea1485d51529d9d6c335f Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Sat, 17 Oct 2015 22:57:06 -0400 Subject: [PATCH 185/201] ext4: fix potential use after free in __ext4_journal_stop commit 6934da9238da947628be83635e365df41064b09b upstream. There is a use-after-free possibility in __ext4_journal_stop() in the case that we free the handle in the first jbd2_journal_stop() because we're referencing handle->h_err afterwards. This was introduced in 9705acd63b125dee8b15c705216d7186daea4625 and it is wrong. Fix it by storing the handle->h_err value beforehand and avoid referencing potentially freed handle. Fixes: 9705acd63b125dee8b15c705216d7186daea4625 Signed-off-by: Lukas Czerner Reviewed-by: Andreas Dilger Signed-off-by: Greg Kroah-Hartman --- fs/ext4/ext4_jbd2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index d418431818187..e770c1ee4613e 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -88,13 +88,13 @@ int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) return 0; } + err = handle->h_err; if (!handle->h_transaction) { - err = jbd2_journal_stop(handle); - return handle->h_err ? handle->h_err : err; + rc = jbd2_journal_stop(handle); + return err ? err : rc; } sb = handle->h_transaction->t_journal->j_private; - err = handle->h_err; rc = jbd2_journal_stop(handle); if (!err) From 5cbe4871e7fd45910472918126b9f1e77a5bda84 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Sun, 18 Oct 2015 17:02:56 -0400 Subject: [PATCH 186/201] ext4, jbd2: ensure entering into panic after recording an error in superblock commit 4327ba52afd03fc4b5afa0ee1d774c9c5b0e85c5 upstream. If a EXT4 filesystem utilizes JBD2 journaling and an error occurs, the journaling will be aborted first and the error number will be recorded into JBD2 superblock and, finally, the system will enter into the panic state in "errors=panic" option. But, in the rare case, this sequence is little twisted like the below figure and it will happen that the system enters into panic state, which means the system reset in mobile environment, before completion of recording an error in the journal superblock. In this case, e2fsck cannot recognize that the filesystem failure occurred in the previous run and the corruption wouldn't be fixed. Task A Task B ext4_handle_error() -> jbd2_journal_abort() -> __journal_abort_soft() -> __jbd2_journal_abort_hard() | -> journal->j_flags |= JBD2_ABORT; | | __ext4_abort() | -> jbd2_journal_abort() | | -> __journal_abort_soft() | | -> if (journal->j_flags & JBD2_ABORT) | | return; | -> panic() | -> jbd2_journal_update_sb_errno() Tested-by: Hobin Woo Signed-off-by: Daeho Jeong Signed-off-by: Theodore Ts'o Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 12 ++++++++++-- fs/jbd2/journal.c | 6 +++++- include/linux/jbd2.h | 1 + 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index a63c7b0a10cfc..df84bd256c9f5 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -394,9 +394,13 @@ static void ext4_handle_error(struct super_block *sb) smp_wmb(); sb->s_flags |= MS_RDONLY; } - if (test_opt(sb, ERRORS_PANIC)) + if (test_opt(sb, ERRORS_PANIC)) { + if (EXT4_SB(sb)->s_journal && + !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR)) + return; panic("EXT4-fs (device %s): panic forced after error\n", sb->s_id); + } } #define ext4_error_ratelimit(sb) \ @@ -585,8 +589,12 @@ void __ext4_abort(struct super_block *sb, const char *function, jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); save_error_info(sb, function, line); } - if (test_opt(sb, ERRORS_PANIC)) + if (test_opt(sb, ERRORS_PANIC)) { + if (EXT4_SB(sb)->s_journal && + !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR)) + return; panic("EXT4-fs panic from previous error\n"); + } } void __ext4_msg(struct super_block *sb, diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 8270fe9e3641e..37023d0bdae49 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -2071,8 +2071,12 @@ static void __journal_abort_soft (journal_t *journal, int errno) __jbd2_journal_abort_hard(journal); - if (errno) + if (errno) { jbd2_journal_update_sb_errno(journal); + write_lock(&journal->j_state_lock); + journal->j_flags |= JBD2_REC_ERR; + write_unlock(&journal->j_state_lock); + } } /** diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index df07e78487d56..1abeb820a630b 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1046,6 +1046,7 @@ struct journal_s #define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file * data write error in ordered * mode */ +#define JBD2_REC_ERR 0x080 /* The errno in the sb has been recorded */ /* * Function declarations for the journaling transaction and buffer From eee26fbfd7a6970ad29daa83f5d7d83df17b017c Mon Sep 17 00:00:00 2001 From: Stefan Richter Date: Tue, 3 Nov 2015 01:46:21 +0100 Subject: [PATCH 187/201] firewire: ohci: fix JMicron JMB38x IT context discovery commit 100ceb66d5c40cc0c7018e06a9474302470be73c upstream. Reported by Clifford and Craig for JMicron OHCI-1394 + SDHCI combo controllers: Often or even most of the time, the controller is initialized with the message "added OHCI v1.10 device as card 0, 4 IR + 0 IT contexts, quirks 0x10". With 0 isochronous transmit DMA contexts (IT contexts), applications like audio output are impossible. However, OHCI-1394 demands that at least 4 IT contexts are implemented by the link layer controller, and indeed JMicron JMB38x do implement four of them. Only their IsoXmitIntMask register is unreliable at early access. With my own JMB381 single function controller I found: - I can reproduce the problem with a lower probability than Craig's. - If I put a loop around the section which clears and reads IsoXmitIntMask, then either the first or the second attempt will return the correct initial mask of 0x0000000f. I never encountered a case of needing more than a second attempt. - Consequently, if I put a dummy reg_read(...IsoXmitIntMaskSet) before the first write, the subsequent read will return the correct result. - If I merely ignore a wrong read result and force the known real result, later isochronous transmit DMA usage works just fine. So let's just fix this chip bug up by the latter method. Tested with JMB381 on kernel 3.13 and 4.3. Since OHCI-1394 generally requires 4 IT contexts at a minium, this workaround is simply applied whenever the initial read of IsoXmitIntMask returns 0, regardless whether it's a JMicron chip or not. I never heard of this issue together with any other chip though. I am not 100% sure that this fix works on the OHCI-1394 part of JMB380 and JMB388 combo controllers exactly the same as on the JMB381 single- function controller, but so far I haven't had a chance to let an owner of a combo chip run a patched kernel. Strangely enough, IsoRecvIntMask is always reported correctly, even though it is probed right before IsoXmitIntMask. Reported-by: Clifford Dunn Reported-by: Craig Moore Signed-off-by: Stefan Richter Signed-off-by: Greg Kroah-Hartman --- drivers/firewire/ohci.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index f51d376d10ba6..c2f5117fd8cb0 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -3675,6 +3675,11 @@ static int pci_probe(struct pci_dev *dev, reg_write(ohci, OHCI1394_IsoXmitIntMaskSet, ~0); ohci->it_context_support = reg_read(ohci, OHCI1394_IsoXmitIntMaskSet); + /* JMicron JMB38x often shows 0 at first read, just ignore it */ + if (!ohci->it_context_support) { + ohci_notice(ohci, "overriding IsoXmitIntMask\n"); + ohci->it_context_support = 0xf; + } reg_write(ohci, OHCI1394_IsoXmitIntMaskClear, ~0); ohci->it_context_mask = ohci->it_context_support; ohci->n_it = hweight32(ohci->it_context_mask); From 9a4a72786164f9aa3ca99189ef2948e020d31068 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Thu, 17 Sep 2015 07:47:08 -0400 Subject: [PATCH 188/201] nfsd: serialize state seqid morphing operations commit 35a92fe8770ce54c5eb275cd76128645bea2d200 upstream. Andrew was seeing a race occur when an OPEN and OPEN_DOWNGRADE were running in parallel. The server would receive the OPEN_DOWNGRADE first and check its seqid, but then an OPEN would race in and bump it. The OPEN_DOWNGRADE would then complete and bump the seqid again. The result was that the OPEN_DOWNGRADE would be applied after the OPEN, even though it should have been rejected since the seqid changed. The only recourse we have here I think is to serialize operations that bump the seqid in a stateid, particularly when we're given a seqid in the call. To address this, we add a new rw_semaphore to the nfs4_ol_stateid struct. We do a down_write prior to checking the seqid after looking up the stateid to ensure that nothing else is going to bump it while we're operating on it. In the case of OPEN, we do a down_read, as the call doesn't contain a seqid. Those can run in parallel -- we just need to serialize them when there is a concurrent OPEN_DOWNGRADE or CLOSE. LOCK and LOCKU however always take the write lock as there is no opportunity for parallelizing those. Reported-and-Tested-by: Andrew W Elble Signed-off-by: Jeff Layton Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 33 ++++++++++++++++++++++++++++----- fs/nfsd/state.h | 19 ++++++++++--------- 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0f1d5691b7957..1b39edf10b67d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3360,6 +3360,7 @@ static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, stp->st_access_bmap = 0; stp->st_deny_bmap = 0; stp->st_openstp = NULL; + init_rwsem(&stp->st_rwsem); spin_lock(&oo->oo_owner.so_client->cl_lock); list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids); spin_lock(&fp->fi_lock); @@ -4187,15 +4188,20 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf */ if (stp) { /* Stateid was found, this is an OPEN upgrade */ + down_read(&stp->st_rwsem); status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open); - if (status) + if (status) { + up_read(&stp->st_rwsem); goto out; + } } else { stp = open->op_stp; open->op_stp = NULL; init_open_stateid(stp, fp, open); + down_read(&stp->st_rwsem); status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open); if (status) { + up_read(&stp->st_rwsem); release_open_stateid(stp); goto out; } @@ -4207,6 +4213,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf } update_stateid(&stp->st_stid.sc_stateid); memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + up_read(&stp->st_rwsem); if (nfsd4_has_session(&resp->cstate)) { if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) { @@ -4819,10 +4826,13 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_ * revoked delegations are kept only for free_stateid. */ return nfserr_bad_stateid; + down_write(&stp->st_rwsem); status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate)); - if (status) - return status; - return nfs4_check_fh(current_fh, &stp->st_stid); + if (status == nfs_ok) + status = nfs4_check_fh(current_fh, &stp->st_stid); + if (status != nfs_ok) + up_write(&stp->st_rwsem); + return status; } /* @@ -4869,6 +4879,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs return status; oo = openowner(stp->st_stateowner); if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) { + up_write(&stp->st_rwsem); nfs4_put_stid(&stp->st_stid); return nfserr_bad_stateid; } @@ -4899,11 +4910,14 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; oo = openowner(stp->st_stateowner); status = nfserr_bad_stateid; - if (oo->oo_flags & NFS4_OO_CONFIRMED) + if (oo->oo_flags & NFS4_OO_CONFIRMED) { + up_write(&stp->st_rwsem); goto put_stateid; + } oo->oo_flags |= NFS4_OO_CONFIRMED; update_stateid(&stp->st_stid.sc_stateid); memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + up_write(&stp->st_rwsem); dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n", __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid)); @@ -4982,6 +4996,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); status = nfs_ok; put_stateid: + up_write(&stp->st_rwsem); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); @@ -5035,6 +5050,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; update_stateid(&stp->st_stid.sc_stateid); memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); + up_write(&stp->st_rwsem); nfsd4_close_open_stateid(stp); @@ -5260,6 +5276,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo, stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; + init_rwsem(&stp->st_rwsem); list_add(&stp->st_locks, &open_stp->st_locks); list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids); spin_lock(&fp->fi_lock); @@ -5428,6 +5445,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, &open_stp, nn); if (status) goto out; + up_write(&open_stp->st_rwsem); open_sop = openowner(open_stp->st_stateowner); status = nfserr_bad_stateid; if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid, @@ -5435,6 +5453,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, goto out; status = lookup_or_create_lock_state(cstate, open_stp, lock, &lock_stp, &new); + if (status == nfs_ok) + down_write(&lock_stp->st_rwsem); } else { status = nfs4_preprocess_seqid_op(cstate, lock->lk_old_lock_seqid, @@ -5540,6 +5560,8 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, seqid_mutating_err(ntohl(status))) lock_sop->lo_owner.so_seqid++; + up_write(&lock_stp->st_rwsem); + /* * If this is a new, never-before-used stateid, and we are * returning an error, then just go ahead and release it. @@ -5709,6 +5731,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, fput: fput(filp); put_stateid: + up_write(&stp->st_rwsem); nfs4_put_stid(&stp->st_stid); out: nfsd4_bump_seqid(cstate, status); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 583ffc13cae27..31bde12feefee 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -534,15 +534,16 @@ struct nfs4_file { * Better suggestions welcome. */ struct nfs4_ol_stateid { - struct nfs4_stid st_stid; /* must be first field */ - struct list_head st_perfile; - struct list_head st_perstateowner; - struct list_head st_locks; - struct nfs4_stateowner * st_stateowner; - struct nfs4_clnt_odstate * st_clnt_odstate; - unsigned char st_access_bmap; - unsigned char st_deny_bmap; - struct nfs4_ol_stateid * st_openstp; + struct nfs4_stid st_stid; + struct list_head st_perfile; + struct list_head st_perstateowner; + struct list_head st_locks; + struct nfs4_stateowner *st_stateowner; + struct nfs4_clnt_odstate *st_clnt_odstate; + unsigned char st_access_bmap; + unsigned char st_deny_bmap; + struct nfs4_ol_stateid *st_openstp; + struct rw_semaphore st_rwsem; }; static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s) From 4e9f452926507ecabd3030efda61413a7cd9bef2 Mon Sep 17 00:00:00 2001 From: Andrew Elble Date: Thu, 15 Oct 2015 12:07:28 -0400 Subject: [PATCH 189/201] nfsd: eliminate sending duplicate and repeated delegations commit 34ed9872e745fa56f10e9bef2cf3d2336c6c8816 upstream. We've observed the nfsd server in a state where there are multiple delegations on the same nfs4_file for the same client. The nfs client does attempt to DELEGRETURN these when they are presented to it - but apparently under some (unknown) circumstances the client does not manage to return all of them. This leads to the eventual attempt to CB_RECALL more than one delegation with the same nfs filehandle to the same client. The first recall will succeed, but the next recall will fail with NFS4ERR_BADHANDLE. This leads to the server having delegations on cl_revoked that the client has no way to FREE or DELEGRETURN, with resulting inability to recover. The state manager on the server will continually assert SEQ4_STATUS_RECALLABLE_STATE_REVOKED, and the state manager on the client will be looping unable to satisfy the server. List discussion also reports a race between OPEN and DELEGRETURN that will be avoided by only sending the delegation once to the client. This is also logically in accordance with RFC5561 9.1.1 and 10.2. So, let's: 1.) Not hand out duplicate delegations. 2.) Only send them to the client once. RFC 5561: 9.1.1: "Delegations and layouts, on the other hand, are not associated with a specific owner but are associated with the client as a whole (identified by a client ID)." 10.2: "...the stateid for a delegation is associated with a client ID and may be used on behalf of all the open-owners for the given client. A delegation is made to the client as a whole and not to any specific process or thread of control within it." Reported-by: Eric Meddaugh Cc: Trond Myklebust Cc: Olga Kornievskaia Signed-off-by: Andrew Elble Signed-off-by: J. Bruce Fields Signed-off-by: Greg Kroah-Hartman --- fs/nfsd/nfs4state.c | 94 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 84 insertions(+), 10 deletions(-) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1b39edf10b67d..0dea0c254ddf1 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -765,16 +765,68 @@ void nfs4_unhash_stid(struct nfs4_stid *s) s->sc_type = 0; } -static void +/** + * nfs4_get_existing_delegation - Discover if this delegation already exists + * @clp: a pointer to the nfs4_client we're granting a delegation to + * @fp: a pointer to the nfs4_file we're granting a delegation on + * + * Return: + * On success: NULL if an existing delegation was not found. + * + * On error: -EAGAIN if one was previously granted to this nfs4_client + * for this nfs4_file. + * + */ + +static int +nfs4_get_existing_delegation(struct nfs4_client *clp, struct nfs4_file *fp) +{ + struct nfs4_delegation *searchdp = NULL; + struct nfs4_client *searchclp = NULL; + + lockdep_assert_held(&state_lock); + lockdep_assert_held(&fp->fi_lock); + + list_for_each_entry(searchdp, &fp->fi_delegations, dl_perfile) { + searchclp = searchdp->dl_stid.sc_client; + if (clp == searchclp) { + return -EAGAIN; + } + } + return 0; +} + +/** + * hash_delegation_locked - Add a delegation to the appropriate lists + * @dp: a pointer to the nfs4_delegation we are adding. + * @fp: a pointer to the nfs4_file we're granting a delegation on + * + * Return: + * On success: NULL if the delegation was successfully hashed. + * + * On error: -EAGAIN if one was previously granted to this + * nfs4_client for this nfs4_file. Delegation is not hashed. + * + */ + +static int hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp) { + int status; + struct nfs4_client *clp = dp->dl_stid.sc_client; + lockdep_assert_held(&state_lock); lockdep_assert_held(&fp->fi_lock); + status = nfs4_get_existing_delegation(clp, fp); + if (status) + return status; + ++fp->fi_delegees; atomic_inc(&dp->dl_stid.sc_count); dp->dl_stid.sc_type = NFS4_DELEG_STID; list_add(&dp->dl_perfile, &fp->fi_delegations); - list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations); + list_add(&dp->dl_perclnt, &clp->cl_delegations); + return 0; } static bool @@ -3946,6 +3998,18 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag) return fl; } +/** + * nfs4_setlease - Obtain a delegation by requesting lease from vfs layer + * @dp: a pointer to the nfs4_delegation we're adding. + * + * Return: + * On success: Return code will be 0 on success. + * + * On error: -EAGAIN if there was an existing delegation. + * nonzero if there is an error in other cases. + * + */ + static int nfs4_setlease(struct nfs4_delegation *dp) { struct nfs4_file *fp = dp->dl_stid.sc_file; @@ -3977,16 +4041,19 @@ static int nfs4_setlease(struct nfs4_delegation *dp) goto out_unlock; /* Race breaker */ if (fp->fi_deleg_file) { - status = 0; - ++fp->fi_delegees; - hash_delegation_locked(dp, fp); + status = hash_delegation_locked(dp, fp); goto out_unlock; } fp->fi_deleg_file = filp; - fp->fi_delegees = 1; - hash_delegation_locked(dp, fp); + fp->fi_delegees = 0; + status = hash_delegation_locked(dp, fp); spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); + if (status) { + /* Should never happen, this is a new fi_deleg_file */ + WARN_ON_ONCE(1); + goto out_fput; + } return 0; out_unlock: spin_unlock(&fp->fi_lock); @@ -4006,6 +4073,15 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, if (fp->fi_had_conflict) return ERR_PTR(-EAGAIN); + spin_lock(&state_lock); + spin_lock(&fp->fi_lock); + status = nfs4_get_existing_delegation(clp, fp); + spin_unlock(&fp->fi_lock); + spin_unlock(&state_lock); + + if (status) + return ERR_PTR(status); + dp = alloc_init_deleg(clp, fh, odstate); if (!dp) return ERR_PTR(-ENOMEM); @@ -4024,9 +4100,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, status = -EAGAIN; goto out_unlock; } - ++fp->fi_delegees; - hash_delegation_locked(dp, fp); - status = 0; + status = hash_delegation_locked(dp, fp); out_unlock: spin_unlock(&fp->fi_lock); spin_unlock(&state_lock); From 8489733094edbec8f3d62f96cd8b15402e66350a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 5 Nov 2015 00:01:51 +0100 Subject: [PATCH 190/201] debugfs: fix refcount imbalance in start_creating commit 0ee9608c89e81a1ccee52ecb58a7ff040e2522d9 upstream. In debugfs' start_creating(), we pin the file system to safely access its root. When we failed to create a file, we unpin the file system via failed_creating() to release the mount count and eventually the reference of the vfsmount. However, when we run into an error during lookup_one_len() when still in start_creating(), we only release the parent's mutex but not so the reference on the mount. Looks like it was done in the past, but after splitting portions of __create_file() into start_creating() and end_creating() via 190afd81e4a5 ("debugfs: split the beginning and the end of __create_file() off"), this seemed missed. Noticed during code review. Fixes: 190afd81e4a5 ("debugfs: split the beginning and the end of __create_file() off") Signed-off-by: Daniel Borkmann Signed-off-by: Al Viro Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/inode.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index c711be8d6a3cc..9c8d23316da18 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -271,8 +271,12 @@ static struct dentry *start_creating(const char *name, struct dentry *parent) dput(dentry); dentry = ERR_PTR(-EEXIST); } - if (IS_ERR(dentry)) + + if (IS_ERR(dentry)) { mutex_unlock(&d_inode(parent)->i_mutex); + simple_release_fs(&debugfs_mount, &debugfs_mount_count); + } + return dentry; } From 9a08c6f60bbc067199214b4730df27979716f242 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Fri, 20 Nov 2015 09:56:20 -0500 Subject: [PATCH 191/201] nfs4: start callback_ident at idr 1 commit c68a027c05709330fe5b2f50c50d5fa02124b5d8 upstream. If clp->cl_cb_ident is zero, then nfs_cb_idr_remove_locked() skips removing it when the nfs_client is freed. A decoding or server bug can then find and try to put that first nfs_client which would lead to a crash. Signed-off-by: Benjamin Coddington Fixes: d6870312659d ("nfs4client: convert to idr_alloc()") Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/nfs4client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 223bedda64ae4..10410e8b58530 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -33,7 +33,7 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) return ret; idr_preload(GFP_KERNEL); spin_lock(&nn->nfs_client_lock); - ret = idr_alloc(&nn->cb_ident_idr, clp, 0, 0, GFP_NOWAIT); + ret = idr_alloc(&nn->cb_ident_idr, clp, 1, 0, GFP_NOWAIT); if (ret >= 0) clp->cl_cb_ident = ret; spin_unlock(&nn->nfs_client_lock); From 4ffcc6f992ca1bef2e710ebe2fde80ced92ec263 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 25 Nov 2015 13:43:14 -0500 Subject: [PATCH 192/201] nfs4: resend LAYOUTGET when there is a race that changes the seqid commit 4f2e9dce0c6348a95eaa56ade9bab18572221088 upstream. pnfs_layout_process will check the returned layout stateid against what the kernel has in-core. If it turns out that the stateid we received is older, then we should resend the LAYOUTGET instead of falling back to MDS I/O. Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/pnfs.c | 56 ++++++++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 8abe27165ad04..abf5caea20c93 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -872,33 +872,38 @@ send_layoutget(struct pnfs_layout_hdr *lo, dprintk("--> %s\n", __func__); - lgp = kzalloc(sizeof(*lgp), gfp_flags); - if (lgp == NULL) - return NULL; + /* + * Synchronously retrieve layout information from server and + * store in lseg. If we race with a concurrent seqid morphing + * op, then re-send the LAYOUTGET. + */ + do { + lgp = kzalloc(sizeof(*lgp), gfp_flags); + if (lgp == NULL) + return NULL; + + i_size = i_size_read(ino); + + lgp->args.minlength = PAGE_CACHE_SIZE; + if (lgp->args.minlength > range->length) + lgp->args.minlength = range->length; + if (range->iomode == IOMODE_READ) { + if (range->offset >= i_size) + lgp->args.minlength = 0; + else if (i_size - range->offset < lgp->args.minlength) + lgp->args.minlength = i_size - range->offset; + } + lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; + lgp->args.range = *range; + lgp->args.type = server->pnfs_curr_ld->id; + lgp->args.inode = ino; + lgp->args.ctx = get_nfs_open_context(ctx); + lgp->gfp_flags = gfp_flags; + lgp->cred = lo->plh_lc_cred; - i_size = i_size_read(ino); + lseg = nfs4_proc_layoutget(lgp, gfp_flags); + } while (lseg == ERR_PTR(-EAGAIN)); - lgp->args.minlength = PAGE_CACHE_SIZE; - if (lgp->args.minlength > range->length) - lgp->args.minlength = range->length; - if (range->iomode == IOMODE_READ) { - if (range->offset >= i_size) - lgp->args.minlength = 0; - else if (i_size - range->offset < lgp->args.minlength) - lgp->args.minlength = i_size - range->offset; - } - lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; - lgp->args.range = *range; - lgp->args.type = server->pnfs_curr_ld->id; - lgp->args.inode = ino; - lgp->args.ctx = get_nfs_open_context(ctx); - lgp->gfp_flags = gfp_flags; - lgp->cred = lo->plh_lc_cred; - - /* Synchronously retrieve layout information from server and - * store in lseg. - */ - lseg = nfs4_proc_layoutget(lgp, gfp_flags); if (IS_ERR(lseg)) { switch (PTR_ERR(lseg)) { case -ENOMEM: @@ -1687,6 +1692,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) /* existing state ID, make sure the sequence number matches. */ if (pnfs_layout_stateid_blocked(lo, &res->stateid)) { dprintk("%s forget reply due to sequence\n", __func__); + status = -EAGAIN; goto out_forget_reply; } pnfs_set_layout_stateid(lo, &res->stateid, false); From 3994c2dee338ddc51adc2426d1208b41f04d15eb Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 25 Nov 2015 13:50:11 -0500 Subject: [PATCH 193/201] nfs: if we have no valid attrs, then don't declare the attribute cache valid commit c812012f9ca7cf89c9e1a1cd512e6c3b5be04b85 upstream. If we pass in an empty nfs_fattr struct to nfs_update_inode, it will (correctly) not update any of the attributes, but it then clears the NFS_INO_INVALID_ATTR flag, which indicates that the attributes are up to date. Don't clear the flag if the fattr struct has no valid attrs to apply. Reviewed-by: Steve French Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust Signed-off-by: Greg Kroah-Hartman --- fs/nfs/inode.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 326d9e10d8337..ffdf9b9e88ab6 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1824,7 +1824,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0) nfsi->attr_gencount = fattr->gencount; } - invalid &= ~NFS_INO_INVALID_ATTR; + + /* Don't declare attrcache up to date if there were no attrs! */ + if (fattr->valid != 0) + invalid &= ~NFS_INO_INVALID_ATTR; + /* Don't invalidate the data if we were to blame */ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) From eeec50cb5826af4c70564ed49d88626439598327 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Fri, 20 Nov 2015 15:57:30 -0800 Subject: [PATCH 194/201] ocfs2: fix umask ignored issue commit 8f1eb48758aacf6c1ffce18179295adbf3bd7640 upstream. New created file's mode is not masked with umask, and this makes umask not work for ocfs2 volume. Fixes: 702e5bc ("ocfs2: use generic posix ACL infrastructure") Signed-off-by: Junxiao Bi Cc: Gang He Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/ocfs2/namei.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index b7dfac226b1e2..12bfa9ca55832 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -374,6 +374,8 @@ static int ocfs2_mknod(struct inode *dir, mlog_errno(status); goto leave; } + /* update inode->i_mode after mask with "umask". */ + inode->i_mode = mode; handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb, S_ISDIR(mode), From 278b54df14299e1cfbe6175e2a098bf0b6616611 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 24 Nov 2015 10:35:29 +0800 Subject: [PATCH 195/201] block: fix segment split commit 578270bfbd2803dc7b0b03fbc2ac119efbc73195 upstream. Inside blk_bio_segment_split(), previous bvec pointer(bvprvp) always points to the iterator local variable, which is obviously wrong, so fix it by pointing to the local variable of 'bvprv'. Fixes: 5014c311baa2b(block: fix bogus compiler warnings in blk-merge.c) Reported-by: Michael Ellerman Reported-by: Mark Salter Tested-by: Laurent Dufour Tested-by: Mark Salter Signed-off-by: Ming Lei Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- block/blk-merge.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/blk-merge.c b/block/blk-merge.c index c4e9c37f3e381..0e5f4fc124491 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -91,7 +91,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, seg_size += bv.bv_len; bvprv = bv; - bvprvp = &bv; + bvprvp = &bvprv; sectors += bv.bv_len >> 9; continue; } @@ -101,7 +101,7 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, nsegs++; bvprv = bv; - bvprvp = &bv; + bvprvp = &bvprv; seg_size = bv.bv_len; sectors += bv.bv_len >> 9; } From 4bf8855ea964d7faa5dece0a3dd032d47f4b9a42 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 30 Sep 2015 15:04:42 +0200 Subject: [PATCH 196/201] ceph: fix message length computation commit 777d738a5e58ba3b6f3932ab1543ce93703f4873 upstream. create_request_message() computes the maximum length of a message, but uses the wrong type for the time stamp: sizeof(struct timespec) may be 8 or 16 depending on the architecture, while sizeof(struct ceph_timespec) is always 8, and that is what gets put into the message. Found while auditing the uses of timespec for y2038 problems. Fixes: b8e69066d8af ("ceph: include time stamp in every MDS request") Signed-off-by: Arnd Bergmann Signed-off-by: Yan, Zheng Signed-off-by: Greg Kroah-Hartman --- fs/ceph/mds_client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 51cb02da75d98..fe2c982764e78 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1935,7 +1935,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, len = sizeof(*head) + pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) + - sizeof(struct timespec); + sizeof(struct ceph_timespec); /* calculate (max) length for cap releases */ len += sizeof(struct ceph_mds_request_release) * From 4ba2bd2c4729caaf6702c5426ffb7c65b9b1405d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 12 Nov 2015 12:13:57 -0800 Subject: [PATCH 197/201] ALSA: pci: depend on ZONE_DMA commit 2db1a57986d37653583e67ccbf13082aadc8f25d upstream. There are several sound drivers that 'select ZONE_DMA'. This is backwards as ZONE_DMA is an architecture capability exported to drivers. Switch the polarity of the dependency to disable these drivers when the architecture does not support ZONE_DMA. This was discovered in the context of testing/enabling devm_memremap_pages() which depends on ZONE_DEVICE. ZONE_DEVICE in turn depends on !ZONE_DMA. Reported-by: Jeff Moyer Signed-off-by: Dan Williams Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/Kconfig | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig index edfc1b8d553ee..656ce39bddbc5 100644 --- a/sound/pci/Kconfig +++ b/sound/pci/Kconfig @@ -25,7 +25,7 @@ config SND_ALS300 select SND_PCM select SND_AC97_CODEC select SND_OPL3_LIB - select ZONE_DMA + depends on ZONE_DMA help Say 'Y' or 'M' to include support for Avance Logic ALS300/ALS300+ @@ -50,7 +50,7 @@ config SND_ALI5451 tristate "ALi M5451 PCI Audio Controller" select SND_MPU401_UART select SND_AC97_CODEC - select ZONE_DMA + depends on ZONE_DMA help Say Y here to include support for the integrated AC97 sound device on motherboards using the ALi M5451 Audio Controller @@ -155,7 +155,7 @@ config SND_AZT3328 select SND_PCM select SND_RAWMIDI select SND_AC97_CODEC - select ZONE_DMA + depends on ZONE_DMA help Say Y here to include support for Aztech AZF3328 (PCI168) soundcards. @@ -463,7 +463,7 @@ config SND_EMU10K1 select SND_HWDEP select SND_RAWMIDI select SND_AC97_CODEC - select ZONE_DMA + depends on ZONE_DMA help Say Y to include support for Sound Blaster PCI 512, Live!, Audigy and E-mu APS (partially supported) soundcards. @@ -479,7 +479,7 @@ config SND_EMU10K1X tristate "Emu10k1X (Dell OEM Version)" select SND_AC97_CODEC select SND_RAWMIDI - select ZONE_DMA + depends on ZONE_DMA help Say Y here to include support for the Dell OEM version of the Sound Blaster Live!. @@ -513,7 +513,7 @@ config SND_ES1938 select SND_OPL3_LIB select SND_MPU401_UART select SND_AC97_CODEC - select ZONE_DMA + depends on ZONE_DMA help Say Y here to include support for soundcards based on ESS Solo-1 (ES1938, ES1946, ES1969) chips. @@ -525,7 +525,7 @@ config SND_ES1968 tristate "ESS ES1968/1978 (Maestro-1/2/2E)" select SND_MPU401_UART select SND_AC97_CODEC - select ZONE_DMA + depends on ZONE_DMA help Say Y here to include support for soundcards based on ESS Maestro 1/2/2E chips. @@ -612,7 +612,7 @@ config SND_ICE1712 select SND_MPU401_UART select SND_AC97_CODEC select BITREVERSE - select ZONE_DMA + depends on ZONE_DMA help Say Y here to include support for soundcards based on the ICE1712 (Envy24) chip. @@ -700,7 +700,7 @@ config SND_LX6464ES config SND_MAESTRO3 tristate "ESS Allegro/Maestro3" select SND_AC97_CODEC - select ZONE_DMA + depends on ZONE_DMA help Say Y here to include support for soundcards based on ESS Maestro 3 (Allegro) chips. @@ -806,7 +806,7 @@ config SND_SIS7019 tristate "SiS 7019 Audio Accelerator" depends on X86_32 select SND_AC97_CODEC - select ZONE_DMA + depends on ZONE_DMA help Say Y here to include support for the SiS 7019 Audio Accelerator. @@ -818,7 +818,7 @@ config SND_SONICVIBES select SND_OPL3_LIB select SND_MPU401_UART select SND_AC97_CODEC - select ZONE_DMA + depends on ZONE_DMA help Say Y here to include support for soundcards based on the S3 SonicVibes chip. @@ -830,7 +830,7 @@ config SND_TRIDENT tristate "Trident 4D-Wave DX/NX; SiS 7018" select SND_MPU401_UART select SND_AC97_CODEC - select ZONE_DMA + depends on ZONE_DMA help Say Y here to include support for soundcards based on Trident 4D-Wave DX/NX or SiS 7018 chips. From de48652f0e6acd07b4fa0e58a5e76f43477f8e2a Mon Sep 17 00:00:00 2001 From: "Lu, Han" Date: Wed, 11 Nov 2015 16:54:27 +0800 Subject: [PATCH 198/201] ALSA: hda/hdmi - apply Skylake fix-ups to Broxton display codec commit e2656412f2a7343ecfd13eb74bac0a6e6e9c5aad upstream. Broxton and Skylake have the same behavior on display audio. So this patch applys Skylake fix-ups to Broxton. Signed-off-by: Lu, Han Signed-off-by: Takashi Iwai Signed-off-by: Greg Kroah-Hartman --- sound/pci/hda/patch_hdmi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index acbfbe087ee86..f22f5c4094473 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -50,8 +50,9 @@ MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info"); #define is_haswell(codec) ((codec)->core.vendor_id == 0x80862807) #define is_broadwell(codec) ((codec)->core.vendor_id == 0x80862808) #define is_skylake(codec) ((codec)->core.vendor_id == 0x80862809) +#define is_broxton(codec) ((codec)->core.vendor_id == 0x8086280a) #define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \ - || is_skylake(codec)) + || is_skylake(codec) || is_broxton(codec)) #define is_valleyview(codec) ((codec)->core.vendor_id == 0x80862882) #define is_cherryview(codec) ((codec)->core.vendor_id == 0x80862883) From 4f2347e64803932bc354a12830e3cd10d86e5b49 Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Mon, 21 Sep 2015 08:42:04 -0300 Subject: [PATCH 199/201] cobalt: fix Kconfig dependency commit fc88dd16a0e430f57458e6bd9b62a631c6ea53a1 upstream. The cobalt driver should depend on VIDEO_V4L2_SUBDEV_API. This fixes this kbuild error: tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master head: 99bc7215bc60f6cd414cf1b85cd9d52cc596cccb commit: 85756a069c55e0315ac5990806899cfb607b987f [media] cobalt: add new driver config: x86_64-randconfig-s0-09201514 (attached as .config) reproduce: git checkout 85756a069c55e0315ac5990806899cfb607b987f # save the attached .config to linux build tree make ARCH=x86_64 All error/warnings (new ones prefixed by >>): drivers/media/i2c/adv7604.c: In function 'adv76xx_get_format': >> drivers/media/i2c/adv7604.c:1853:9: error: implicit declaration of function 'v4l2_subdev_get_try_format' [-Werror=implicit-function-declaration] fmt = v4l2_subdev_get_try_format(sd, cfg, format->pad); ^ drivers/media/i2c/adv7604.c:1853:7: warning: assignment makes pointer from integer without a cast [-Wint-conversion] fmt = v4l2_subdev_get_try_format(sd, cfg, format->pad); ^ drivers/media/i2c/adv7604.c: In function 'adv76xx_set_format': drivers/media/i2c/adv7604.c:1882:7: warning: assignment makes pointer from integer without a cast [-Wint-conversion] fmt = v4l2_subdev_get_try_format(sd, cfg, format->pad); ^ cc1: some warnings being treated as errors Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Greg Kroah-Hartman --- drivers/media/pci/cobalt/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/media/pci/cobalt/Kconfig b/drivers/media/pci/cobalt/Kconfig index 1f88ccc174daa..a01f0cc745cc3 100644 --- a/drivers/media/pci/cobalt/Kconfig +++ b/drivers/media/pci/cobalt/Kconfig @@ -1,6 +1,6 @@ config VIDEO_COBALT tristate "Cisco Cobalt support" - depends on VIDEO_V4L2 && I2C && MEDIA_CONTROLLER + depends on VIDEO_V4L2 && I2C && VIDEO_V4L2_SUBDEV_API depends on PCI_MSI && MTD_COMPLEX_MAPPINGS depends on GPIOLIB || COMPILE_TEST depends on SND From e0aa614dc9ce697f51a0992507b36b3edbc7a641 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 23 Oct 2015 07:52:54 +0100 Subject: [PATCH 200/201] Btrfs: fix regression running delayed references when using qgroups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit b06c4bf5c874a57254b197f53ddf588e7a24a2bf upstream. In the kernel 4.2 merge window we had a big changes to the implementation of delayed references and qgroups which made the no_quota field of delayed references not used anymore. More specifically the no_quota field is not used anymore as of: commit 0ed4792af0e8 ("btrfs: qgroup: Switch to new extent-oriented qgroup mechanism.") Leaving the no_quota field actually prevents delayed references from getting merged, which in turn cause the following BUG_ON(), at fs/btrfs/extent-tree.c, to be hit when qgroups are enabled: static int run_delayed_tree_ref(...) { (...) BUG_ON(node->ref_mod != 1); (...) } This happens on a scenario like the following: 1) Ref1 bytenr X, action = BTRFS_ADD_DELAYED_REF, no_quota = 1, added. 2) Ref2 bytenr X, action = BTRFS_DROP_DELAYED_REF, no_quota = 0, added. It's not merged with Ref1 because Ref1->no_quota != Ref2->no_quota. 3) Ref3 bytenr X, action = BTRFS_ADD_DELAYED_REF, no_quota = 1, added. It's not merged with the reference at the tail of the list of refs for bytenr X because the reference at the tail, Ref2 is incompatible due to Ref2->no_quota != Ref3->no_quota. 4) Ref4 bytenr X, action = BTRFS_DROP_DELAYED_REF, no_quota = 0, added. It's not merged with the reference at the tail of the list of refs for bytenr X because the reference at the tail, Ref3 is incompatible due to Ref3->no_quota != Ref4->no_quota. 5) We run delayed references, trigger merging of delayed references, through __btrfs_run_delayed_refs() -> btrfs_merge_delayed_refs(). 6) Ref1 and Ref3 are merged as Ref1->no_quota = Ref3->no_quota and all other conditions are satisfied too. So Ref1 gets a ref_mod value of 2. 7) Ref2 and Ref4 are merged as Ref2->no_quota = Ref4->no_quota and all other conditions are satisfied too. So Ref2 gets a ref_mod value of 2. 8) Ref1 and Ref2 aren't merged, because they have different values for their no_quota field. 9) Delayed reference Ref1 is picked for running (select_delayed_ref() always prefers references with an action == BTRFS_ADD_DELAYED_REF). So run_delayed_tree_ref() is called for Ref1 which triggers the BUG_ON because Ref1->red_mod != 1 (equals 2). So fix this by removing the no_quota field, as it's not used anymore as of commit 0ed4792af0e8 ("btrfs: qgroup: Switch to new extent-oriented qgroup mechanism."). The use of no_quota was also buggy in at least two places: 1) At delayed-refs.c:btrfs_add_delayed_tree_ref() - we were setting no_quota to 0 instead of 1 when the following condition was true: is_fstree(ref_root) || !fs_info->quota_enabled 2) At extent-tree.c:__btrfs_inc_extent_ref() - we were attempting to reset a node's no_quota when the condition "!is_fstree(root_objectid) || !root->fs_info->quota_enabled" was true but we did it only in an unused local stack variable, that is, we never reset the no_quota value in the node itself. This fixes the remainder of problems several people have been having when running delayed references, mostly while a balance is running in parallel, on a 4.2+ kernel. Very special thanks to Stéphane Lesimple for helping debugging this issue and testing this fix on his multi terabyte filesystem (which took more than one day to balance alone, plus fsck, etc). Also, this fixes deadlock issue when using the clone ioctl with qgroups enabled, as reported by Elias Probst in the mailing list. The deadlock happens because after calling btrfs_insert_empty_item we have our path holding a write lock on a leaf of the fs/subvol tree and then before releasing the path we called check_ref() which did backref walking, when qgroups are enabled, and tried to read lock the same leaf. The trace for this case is the following: INFO: task systemd-nspawn:6095 blocked for more than 120 seconds. (...) Call Trace: [] schedule+0x74/0x83 [] btrfs_tree_read_lock+0xc0/0xea [] ? wait_woken+0x74/0x74 [] btrfs_search_old_slot+0x51a/0x810 [] btrfs_next_old_leaf+0xdf/0x3ce [] ? ulist_add_merge+0x1b/0x127 [] __resolve_indirect_refs+0x62a/0x667 [] ? btrfs_clear_lock_blocking_rw+0x78/0xbe [] find_parent_nodes+0xaf3/0xfc6 [] __btrfs_find_all_roots+0x92/0xf0 [] btrfs_find_all_roots+0x45/0x65 [] ? btrfs_get_tree_mod_seq+0x2b/0x88 [] check_ref+0x64/0xc4 [] btrfs_clone+0x66e/0xb5d [] btrfs_ioctl_clone+0x48f/0x5bb [] ? native_sched_clock+0x28/0x77 [] btrfs_ioctl+0xabc/0x25cb (...) The problem goes away by eleminating check_ref(), which no longer is needed as its purpose was to get a value for the no_quota field of a delayed reference (this patch removes the no_quota field as mentioned earlier). Reported-by: Stéphane Lesimple Tested-by: Stéphane Lesimple Reported-by: Elias Probst Reported-by: Peter Becker Reported-by: Malte Schröder Reported-by: Derek Dongray Reported-by: Erkki Seppala Cc: stable@vger.kernel.org # 4.2+ Signed-off-by: Filipe Manana Reviewed-by: Qu Wenruo --- fs/btrfs/ctree.h | 4 +-- fs/btrfs/delayed-ref.c | 28 ++++++------------- fs/btrfs/delayed-ref.h | 7 ++--- fs/btrfs/extent-tree.c | 45 +++++++++++------------------- fs/btrfs/file.c | 10 +++---- fs/btrfs/inode.c | 4 +-- fs/btrfs/ioctl.c | 62 +----------------------------------------- fs/btrfs/relocation.c | 16 +++++------ fs/btrfs/tree-log.c | 2 +- 9 files changed, 44 insertions(+), 134 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 938efe33be809..94eea1f432801 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3398,7 +3398,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, - u64 owner, u64 offset, int no_quota); + u64 owner, u64 offset); int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len, int delalloc); @@ -3411,7 +3411,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, - u64 root_objectid, u64 owner, u64 offset, int no_quota); + u64 root_objectid, u64 owner, u64 offset); int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans, struct btrfs_root *root); diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 4832943aaa5cd..7832031fef68d 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -220,7 +220,7 @@ static bool merge_ref(struct btrfs_trans_handle *trans, if (seq && next->seq >= seq) goto next; - if (next->type != ref->type || next->no_quota != ref->no_quota) + if (next->type != ref->type) goto next; if ((ref->type == BTRFS_TREE_BLOCK_REF_KEY || @@ -405,8 +405,7 @@ add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans, exist = list_entry(href->ref_list.prev, struct btrfs_delayed_ref_node, list); /* No need to compare bytenr nor is_head */ - if (exist->type != ref->type || exist->no_quota != ref->no_quota || - exist->seq != ref->seq) + if (exist->type != ref->type || exist->seq != ref->seq) goto add_tail; if ((exist->type == BTRFS_TREE_BLOCK_REF_KEY || @@ -637,7 +636,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_head *head_ref, struct btrfs_delayed_ref_node *ref, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, int level, - int action, int no_quota) + int action) { struct btrfs_delayed_tree_ref *full_ref; struct btrfs_delayed_ref_root *delayed_refs; @@ -659,7 +658,6 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info, ref->action = action; ref->is_head = 0; ref->in_tree = 1; - ref->no_quota = no_quota; ref->seq = seq; full_ref = btrfs_delayed_node_to_tree_ref(ref); @@ -692,7 +690,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_head *head_ref, struct btrfs_delayed_ref_node *ref, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, u64 owner, - u64 offset, int action, int no_quota) + u64 offset, int action) { struct btrfs_delayed_data_ref *full_ref; struct btrfs_delayed_ref_root *delayed_refs; @@ -715,7 +713,6 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info, ref->action = action; ref->is_head = 0; ref->in_tree = 1; - ref->no_quota = no_quota; ref->seq = seq; full_ref = btrfs_delayed_node_to_data_ref(ref); @@ -746,17 +743,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, int level, int action, - struct btrfs_delayed_extent_op *extent_op, - int no_quota) + struct btrfs_delayed_extent_op *extent_op) { struct btrfs_delayed_tree_ref *ref; struct btrfs_delayed_ref_head *head_ref; struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_qgroup_extent_record *record = NULL; - if (!is_fstree(ref_root) || !fs_info->quota_enabled) - no_quota = 0; - BUG_ON(extent_op && extent_op->is_data); ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); if (!ref) @@ -785,8 +778,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, bytenr, num_bytes, action, 0); add_delayed_tree_ref(fs_info, trans, head_ref, &ref->node, bytenr, - num_bytes, parent, ref_root, level, action, - no_quota); + num_bytes, parent, ref_root, level, action); spin_unlock(&delayed_refs->lock); return 0; @@ -807,17 +799,13 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, u64 owner, u64 offset, int action, - struct btrfs_delayed_extent_op *extent_op, - int no_quota) + struct btrfs_delayed_extent_op *extent_op) { struct btrfs_delayed_data_ref *ref; struct btrfs_delayed_ref_head *head_ref; struct btrfs_delayed_ref_root *delayed_refs; struct btrfs_qgroup_extent_record *record = NULL; - if (!is_fstree(ref_root) || !fs_info->quota_enabled) - no_quota = 0; - BUG_ON(extent_op && !extent_op->is_data); ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS); if (!ref) @@ -853,7 +841,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, add_delayed_data_ref(fs_info, trans, head_ref, &ref->node, bytenr, num_bytes, parent, ref_root, owner, offset, - action, no_quota); + action); spin_unlock(&delayed_refs->lock); return 0; diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 13fb5e6090fe0..930887a4275f5 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -68,7 +68,6 @@ struct btrfs_delayed_ref_node { unsigned int action:8; unsigned int type:8; - unsigned int no_quota:1; /* is this node still in the rbtree? */ unsigned int is_head:1; unsigned int in_tree:1; @@ -233,15 +232,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, int level, int action, - struct btrfs_delayed_extent_op *extent_op, - int no_quota); + struct btrfs_delayed_extent_op *extent_op); int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, u64 owner, u64 offset, int action, - struct btrfs_delayed_extent_op *extent_op, - int no_quota); + struct btrfs_delayed_extent_op *extent_op); int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 00158bfd1771e..cadacf643bd0a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -95,8 +95,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 parent, u64 root_objectid, u64 flags, struct btrfs_disk_key *key, - int level, struct btrfs_key *ins, - int no_quota); + int level, struct btrfs_key *ins); static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 flags, int force); @@ -2009,8 +2008,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, - u64 root_objectid, u64 owner, u64 offset, - int no_quota) + u64 root_objectid, u64 owner, u64 offset) { int ret; struct btrfs_fs_info *fs_info = root->fs_info; @@ -2022,12 +2020,12 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, num_bytes, parent, root_objectid, (int)owner, - BTRFS_ADD_DELAYED_REF, NULL, no_quota); + BTRFS_ADD_DELAYED_REF, NULL); } else { ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, num_bytes, parent, root_objectid, owner, offset, - BTRFS_ADD_DELAYED_REF, NULL, no_quota); + BTRFS_ADD_DELAYED_REF, NULL); } return ret; } @@ -2048,15 +2046,11 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, u64 num_bytes = node->num_bytes; u64 refs; int ret; - int no_quota = node->no_quota; path = btrfs_alloc_path(); if (!path) return -ENOMEM; - if (!is_fstree(root_objectid) || !root->fs_info->quota_enabled) - no_quota = 1; - path->reada = 1; path->leave_spinning = 1; /* this will setup the path even if it fails to insert the back ref */ @@ -2291,8 +2285,7 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, parent, ref_root, extent_op->flags_to_set, &extent_op->key, - ref->level, &ins, - node->no_quota); + ref->level, &ins); } else if (node->action == BTRFS_ADD_DELAYED_REF) { ret = __btrfs_inc_extent_ref(trans, root, node, parent, ref_root, @@ -3123,7 +3116,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, int level; int ret = 0; int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, - u64, u64, u64, u64, u64, u64, int); + u64, u64, u64, u64, u64, u64); if (btrfs_test_is_dummy_root(root)) @@ -3164,15 +3157,14 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, key.offset -= btrfs_file_extent_offset(buf, fi); ret = process_func(trans, root, bytenr, num_bytes, parent, ref_root, key.objectid, - key.offset, 1); + key.offset); if (ret) goto fail; } else { bytenr = btrfs_node_blockptr(buf, i); num_bytes = root->nodesize; ret = process_func(trans, root, bytenr, num_bytes, - parent, ref_root, level - 1, 0, - 1); + parent, ref_root, level - 1, 0); if (ret) goto fail; } @@ -6247,7 +6239,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, int extent_slot = 0; int found_extent = 0; int num_to_del = 1; - int no_quota = node->no_quota; u32 item_size; u64 refs; u64 bytenr = node->bytenr; @@ -6256,9 +6247,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, bool skinny_metadata = btrfs_fs_incompat(root->fs_info, SKINNY_METADATA); - if (!info->quota_enabled || !is_fstree(root_objectid)) - no_quota = 1; - path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -6584,7 +6572,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, buf->start, buf->len, parent, root->root_key.objectid, btrfs_header_level(buf), - BTRFS_DROP_DELAYED_REF, NULL, 0); + BTRFS_DROP_DELAYED_REF, NULL); BUG_ON(ret); /* -ENOMEM */ } @@ -6632,7 +6620,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, /* Can return -ENOMEM */ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, - u64 owner, u64 offset, int no_quota) + u64 owner, u64 offset) { int ret; struct btrfs_fs_info *fs_info = root->fs_info; @@ -6655,13 +6643,13 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, num_bytes, parent, root_objectid, (int)owner, - BTRFS_DROP_DELAYED_REF, NULL, no_quota); + BTRFS_DROP_DELAYED_REF, NULL); } else { ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, num_bytes, parent, root_objectid, owner, offset, BTRFS_DROP_DELAYED_REF, - NULL, no_quota); + NULL); } return ret; } @@ -7443,8 +7431,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 parent, u64 root_objectid, u64 flags, struct btrfs_disk_key *key, - int level, struct btrfs_key *ins, - int no_quota) + int level, struct btrfs_key *ins) { int ret; struct btrfs_fs_info *fs_info = root->fs_info; @@ -7534,7 +7521,7 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, ret = btrfs_add_delayed_data_ref(root->fs_info, trans, ins->objectid, ins->offset, 0, root_objectid, owner, offset, - BTRFS_ADD_DELAYED_EXTENT, NULL, 0); + BTRFS_ADD_DELAYED_EXTENT, NULL); return ret; } @@ -7748,7 +7735,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, ins.objectid, ins.offset, parent, root_objectid, level, BTRFS_ADD_DELAYED_EXTENT, - extent_op, 0); + extent_op); if (ret) goto out_free_delayed; } @@ -8296,7 +8283,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, } } ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, - root->root_key.objectid, level - 1, 0, 0); + root->root_key.objectid, level - 1, 0); BUG_ON(ret); /* -ENOMEM */ } btrfs_tree_unlock(next); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 6e80b78f797f8..e27ea7ae7f26a 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -855,7 +855,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, disk_bytenr, num_bytes, 0, root->root_key.objectid, new_key.objectid, - start - extent_offset, 1); + start - extent_offset); BUG_ON(ret); /* -ENOMEM */ } key.offset = start; @@ -933,7 +933,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, disk_bytenr, num_bytes, 0, root->root_key.objectid, key.objectid, key.offset - - extent_offset, 0); + extent_offset); BUG_ON(ret); /* -ENOMEM */ inode_sub_bytes(inode, extent_end - key.offset); @@ -1212,7 +1212,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, root->root_key.objectid, - ino, orig_offset, 1); + ino, orig_offset); BUG_ON(ret); /* -ENOMEM */ if (split == start) { @@ -1239,7 +1239,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, del_nr++; ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 0, root->root_key.objectid, - ino, orig_offset, 0); + ino, orig_offset); BUG_ON(ret); /* -ENOMEM */ } other_start = 0; @@ -1256,7 +1256,7 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, del_nr++; ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 0, root->root_key.objectid, - ino, orig_offset, 0); + ino, orig_offset); BUG_ON(ret); /* -ENOMEM */ } if (del_nr == 0) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index e0c17e072a7a7..396e3d5c4e835 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2579,7 +2579,7 @@ static noinline int relink_extent_backref(struct btrfs_path *path, ret = btrfs_inc_extent_ref(trans, root, new->bytenr, new->disk_len, 0, backref->root_id, backref->inum, - new->file_pos, 0); /* start - extent_offset */ + new->file_pos); /* start - extent_offset */ if (ret) { btrfs_abort_transaction(trans, root, ret); goto out_free_path; @@ -4521,7 +4521,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, ret = btrfs_free_extent(trans, root, extent_start, extent_num_bytes, 0, btrfs_header_owner(leaf), - ino, extent_offset, 0); + ino, extent_offset); BUG_ON(ret); if (btrfs_should_throttle_delayed_refs(trans, root)) btrfs_async_run_delayed_refs(root, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 30dca32804eeb..6548a36823bc9 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -3203,41 +3203,6 @@ static long btrfs_ioctl_file_extent_same(struct file *file, return ret; } -/* Helper to check and see if this root currently has a ref on the given disk - * bytenr. If it does then we need to update the quota for this root. This - * doesn't do anything if quotas aren't enabled. - */ -static int check_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - u64 disko) -{ - struct seq_list tree_mod_seq_elem = SEQ_LIST_INIT(tree_mod_seq_elem); - struct ulist *roots; - struct ulist_iterator uiter; - struct ulist_node *root_node = NULL; - int ret; - - if (!root->fs_info->quota_enabled) - return 1; - - btrfs_get_tree_mod_seq(root->fs_info, &tree_mod_seq_elem); - ret = btrfs_find_all_roots(trans, root->fs_info, disko, - tree_mod_seq_elem.seq, &roots); - if (ret < 0) - goto out; - ret = 0; - ULIST_ITER_INIT(&uiter); - while ((root_node = ulist_next(roots, &uiter))) { - if (root_node->val == root->objectid) { - ret = 1; - break; - } - } - ulist_free(roots); -out: - btrfs_put_tree_mod_seq(root->fs_info, &tree_mod_seq_elem); - return ret; -} - static int clone_finish_inode_update(struct btrfs_trans_handle *trans, struct inode *inode, u64 endoff, @@ -3496,9 +3461,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode, u32 nritems; int slot; int ret; - int no_quota; const u64 len = olen_aligned; - u64 last_disko = 0; u64 last_dest_end = destoff; ret = -ENOMEM; @@ -3544,7 +3507,6 @@ static int btrfs_clone(struct inode *src, struct inode *inode, nritems = btrfs_header_nritems(path->nodes[0]); process_slot: - no_quota = 1; if (path->slots[0] >= nritems) { ret = btrfs_next_leaf(BTRFS_I(src)->root, path); if (ret < 0) @@ -3696,35 +3658,13 @@ static int btrfs_clone(struct inode *src, struct inode *inode, btrfs_set_file_extent_num_bytes(leaf, extent, datal); - /* - * We need to look up the roots that point at - * this bytenr and see if the new root does. If - * it does not we need to make sure we update - * quotas appropriately. - */ - if (disko && root != BTRFS_I(src)->root && - disko != last_disko) { - no_quota = check_ref(trans, root, - disko); - if (no_quota < 0) { - btrfs_abort_transaction(trans, - root, - ret); - btrfs_end_transaction(trans, - root); - ret = no_quota; - goto out; - } - } - if (disko) { inode_add_bytes(inode, datal); ret = btrfs_inc_extent_ref(trans, root, disko, diskl, 0, root->root_key.objectid, btrfs_ino(inode), - new_key.offset - datao, - no_quota); + new_key.offset - datao); if (ret) { btrfs_abort_transaction(trans, root, diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 303babeef5057..ab507e3d536bf 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1716,7 +1716,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans, ret = btrfs_inc_extent_ref(trans, root, new_bytenr, num_bytes, parent, btrfs_header_owner(leaf), - key.objectid, key.offset, 1); + key.objectid, key.offset); if (ret) { btrfs_abort_transaction(trans, root, ret); break; @@ -1724,7 +1724,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans, ret = btrfs_free_extent(trans, root, bytenr, num_bytes, parent, btrfs_header_owner(leaf), - key.objectid, key.offset, 1); + key.objectid, key.offset); if (ret) { btrfs_abort_transaction(trans, root, ret); break; @@ -1900,23 +1900,21 @@ int replace_path(struct btrfs_trans_handle *trans, ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize, path->nodes[level]->start, - src->root_key.objectid, level - 1, 0, - 1); + src->root_key.objectid, level - 1, 0); BUG_ON(ret); ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize, 0, dest->root_key.objectid, level - 1, - 0, 1); + 0); BUG_ON(ret); ret = btrfs_free_extent(trans, src, new_bytenr, blocksize, path->nodes[level]->start, - src->root_key.objectid, level - 1, 0, - 1); + src->root_key.objectid, level - 1, 0); BUG_ON(ret); ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize, 0, dest->root_key.objectid, level - 1, - 0, 1); + 0); BUG_ON(ret); btrfs_unlock_up_safe(path, 0); @@ -2745,7 +2743,7 @@ static int do_relocation(struct btrfs_trans_handle *trans, node->eb->start, blocksize, upper->eb->start, btrfs_header_owner(upper->eb), - node->level, 0, 1); + node->level, 0); BUG_ON(ret); ret = btrfs_drop_subtree(trans, root, eb, upper->eb); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1bbaace733838..6f8af2de5912c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -691,7 +691,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, ret = btrfs_inc_extent_ref(trans, root, ins.objectid, ins.offset, 0, root->root_key.objectid, - key->objectid, offset, 0); + key->objectid, offset); if (ret) goto out; } else { From 35483418917d63df90bae5b2d0b7b047d7ed8ec7 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 14 Dec 2015 21:41:43 -0800 Subject: [PATCH 201/201] Linux 4.3.3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1a4953b3e10ff..2070d16bb5a4d 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ VERSION = 4 PATCHLEVEL = 3 -SUBLEVEL = 2 +SUBLEVEL = 3 EXTRAVERSION = NAME = Blurry Fish Butt