From 8053f21675b073b379cbca258ee4a3f3850dfa94 Mon Sep 17 00:00:00 2001 From: Nicolin Chen <nicoleotsuka@gmail.com> Date: Tue, 26 Jul 2016 14:55:51 -0700 Subject: [PATCH 001/513] ASoC: dapm: Add a dummy snd_pcm_runtime to avoid NULL pointer access The SND_SOC_DAPM_PRE_PMU case would call startup()/hw_params() that might access substream->runtime through other functions. For example: Unable to handle kernel NULL pointer dereference at virtual address [....] PC is at snd_pcm_hw_rule_add+0x24/0x1b0 LR is at snd_pcm_hw_constraint_list+0x20/0x28 [....] Process arecord (pid: 424, stack limit = 0xffffffc1ecaf0020) Call trace: [<ffffffc00086be68>] snd_pcm_hw_rule_add+0x24/0x1b0 [<ffffffc00086c014>] snd_pcm_hw_constraint_list+0x20/0x28 [<ffffffc0008b47a4>] cs53l30_pcm_startup+0x24/0x30 [<ffffffc0008a6260>] snd_soc_dai_link_event+0x290/0x354 [<ffffffc0008a7528>] dapm_seq_check_event.isra.31+0x134/0x2c8 [<ffffffc0008a7768>] dapm_seq_run_coalesced+0x94/0x1c8 [<ffffffc0008a7940>] dapm_seq_run+0xa4/0x404 [<ffffffc0008a8bac>] dapm_power_widgets+0x524/0x984 [<ffffffc0008ab1c4>] snd_soc_dapm_stream_event+0x8c/0xa8 [<ffffffc0008ac7f4>] soc_pcm_prepare+0x10c/0x1ec [<ffffffc000865b9c>] snd_pcm_do_prepare+0x1c/0x38 [<ffffffc000865600>] snd_pcm_action_single+0x40/0x88 [<ffffffc0008656b8>] snd_pcm_action_nonatomic+0x70/0x90 [<ffffffc000868d28>] snd_pcm_common_ioctl1+0xb6c/0xdd8 [<ffffffc000869508>] snd_pcm_capture_ioctl1+0x200/0x334 [<ffffffc00086a084>] snd_pcm_ioctl_compat+0x648/0x95c [<ffffffc0001ff4b4>] compat_SyS_ioctl+0xac/0xfc4 [<ffffffc000084cf0>] el0_svc_naked+0x24/0x28 ---[ end trace 0dc4f99c2759c35c ]--- So this patch adds a dummy runtime for the original dummy substream to merely avoid the NULL pointer access. Signed-off-by: Nicolin Chen <nicoleotsuka@gmail.com> Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/soc-dapm.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index 8698c26773b3c..d908ff8f97554 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -3493,6 +3493,7 @@ static int snd_soc_dai_link_event(struct snd_soc_dapm_widget *w, const struct snd_soc_pcm_stream *config = w->params + w->params_select; struct snd_pcm_substream substream; struct snd_pcm_hw_params *params = NULL; + struct snd_pcm_runtime *runtime = NULL; u64 fmt; int ret; @@ -3541,6 +3542,14 @@ static int snd_soc_dai_link_event(struct snd_soc_dapm_widget *w, memset(&substream, 0, sizeof(substream)); + /* Allocate a dummy snd_pcm_runtime for startup() and other ops() */ + runtime = kzalloc(sizeof(*runtime), GFP_KERNEL); + if (!runtime) { + ret = -ENOMEM; + goto out; + } + substream.runtime = runtime; + switch (event) { case SND_SOC_DAPM_PRE_PMU: substream.stream = SNDRV_PCM_STREAM_CAPTURE; @@ -3606,6 +3615,7 @@ static int snd_soc_dai_link_event(struct snd_soc_dapm_widget *w, } out: + kfree(runtime); kfree(params); return ret; } From 93ca33c99f22a0a096f83f19c9f887aeb67507a1 Mon Sep 17 00:00:00 2001 From: Hiroyuki Yokoyama <hiroyuki.yokoyama.vx@renesas.com> Date: Mon, 25 Jul 2016 01:52:43 +0000 Subject: [PATCH 002/513] ASoC: rsnd: Fixup SRCm_IFSVR calculate method This patch fixes the calculation accuracy degradation of SRCm_IFSVR register value. Signed-off-by: Hiroyuki Yokoyama <hiroyuki.yokoyama.vx@renesas.com> Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/sh/rcar/src.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sound/soc/sh/rcar/src.c b/sound/soc/sh/rcar/src.c index e39f916d0f2fa..969a5169de255 100644 --- a/sound/soc/sh/rcar/src.c +++ b/sound/soc/sh/rcar/src.c @@ -226,8 +226,12 @@ static void rsnd_src_set_convert_rate(struct rsnd_dai_stream *io, ifscr = 0; fsrate = 0; if (fin != fout) { + u64 n; + ifscr = 1; - fsrate = 0x0400000 / fout * fin; + n = (u64)0x0400000 * fin; + do_div(n, fout); + fsrate = n; } /* From 2392f7fd695246df4fb5f0b5fb88ce37cdb01764 Mon Sep 17 00:00:00 2001 From: Vinod Koul <vinod.koul@intel.com> Date: Tue, 26 Jul 2016 18:06:39 +0530 Subject: [PATCH 003/513] ASoC: Intel: Skylake: Check list empty while getting module info Module list can be NULL so check if the list is empty before accessing the list. Signed-off-by: Senthilnathan Veppur <senthilnathanx.veppur@intel.com> Signed-off-by: Vinod Koul <vinod.koul@intel.com> Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/intel/skylake/skl-sst-utils.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/soc/intel/skylake/skl-sst-utils.c b/sound/soc/intel/skylake/skl-sst-utils.c index 25fcb796bd864..ddcb52a518548 100644 --- a/sound/soc/intel/skylake/skl-sst-utils.c +++ b/sound/soc/intel/skylake/skl-sst-utils.c @@ -123,6 +123,11 @@ int snd_skl_get_module_info(struct skl_sst *ctx, u8 *uuid, uuid_mod = (uuid_le *)uuid; + if (list_empty(&ctx->uuid_list)) { + dev_err(ctx->dev, "Module list is empty\n"); + return -EINVAL; + } + list_for_each_entry(module, &ctx->uuid_list, list) { if (uuid_le_cmp(*uuid_mod, module->uuid) == 0) { dfw_config->module_id = module->id; From 680682d4d537565e2c358483e1feeca30a8cf3d4 Mon Sep 17 00:00:00 2001 From: Colin Ian King <colin.king@canonical.com> Date: Sun, 17 Jul 2016 19:55:27 +0100 Subject: [PATCH 004/513] cfg80211: fix missing break in NL8211_CHAN_WIDTH_80P80 case The switch on chandef->width is missing a break on the NL8211_CHAN_WIDTH_80P80 case; currently we get a WARN_ON when center_freq2 is non-zero because of the missing break. Signed-off-by: Colin Ian King <colin.king@canonical.com> Signed-off-by: Johannes Berg <johannes.berg@intel.com> --- net/wireless/chan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/wireless/chan.c b/net/wireless/chan.c index da49c0b1fd32e..bb3d64ee68aaa 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -513,6 +513,7 @@ static bool cfg80211_chandef_dfs_available(struct wiphy *wiphy, r = cfg80211_get_chans_dfs_available(wiphy, chandef->center_freq2, width); + break; default: WARN_ON(chandef->center_freq2); break; From 4e3f21bc7bfbdc4a348852e4da1540227e1b0171 Mon Sep 17 00:00:00 2001 From: Felix Fietkau <nbd@nbd.name> Date: Mon, 11 Jul 2016 15:09:15 +0200 Subject: [PATCH 005/513] mac80211: fix check for buffered powersave frames with txq The logic was inverted here, set the bit if frames are pending. Fixes: ba8c3d6f16a1 ("mac80211: add an intermediate software queue implementation") Signed-off-by: Felix Fietkau <nbd@nbd.name> Signed-off-by: Johannes Berg <johannes.berg@intel.com> --- net/mac80211/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 2e8a9024625a2..9dce3b157908b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1268,7 +1268,7 @@ static void sta_ps_start(struct sta_info *sta) for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) { struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]); - if (!txqi->tin.backlog_packets) + if (txqi->tin.backlog_packets) set_bit(tid, &sta->txq_buffered_tids); else clear_bit(tid, &sta->txq_buffered_tids); From c37a54ac37e7fbf8abe2b7b886ac5f1b3e8f8d29 Mon Sep 17 00:00:00 2001 From: Maital Hahn <maitalm@ti.com> Date: Wed, 13 Jul 2016 14:44:41 +0300 Subject: [PATCH 006/513] mac80211: mesh: flush stations before beacons are stopped Some drivers (e.g. wl18xx) expect that the last stage in the de-initialization process will be stopping the beacons, similar to AP flow. Update ieee80211_stop_mesh() flow accordingly. As peers can be removed dynamically, this would not impact other drivers. Tested also on Ralink RT3572 chipset. Signed-off-by: Maital Hahn <maitalm@ti.com> Signed-off-by: Yaniv Machani <yanivma@ti.com> Signed-off-by: Johannes Berg <johannes.berg@intel.com> --- net/mac80211/mesh.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index c66411df98631..42120d965263d 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -881,20 +881,22 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) netif_carrier_off(sdata->dev); + /* flush STAs and mpaths on this iface */ + sta_info_flush(sdata); + mesh_path_flush_by_iface(sdata); + /* stop the beacon */ ifmsh->mesh_id_len = 0; sdata->vif.bss_conf.enable_beacon = false; clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED); + + /* remove beacon */ bcn = rcu_dereference_protected(ifmsh->beacon, lockdep_is_held(&sdata->wdev.mtx)); RCU_INIT_POINTER(ifmsh->beacon, NULL); kfree_rcu(bcn, rcu_head); - /* flush STAs and mpaths on this iface */ - sta_info_flush(sdata); - mesh_path_flush_by_iface(sdata); - /* free all potentially still buffered group-addressed frames */ local->total_ps_buffered -= skb_queue_len(&ifmsh->ps.bc_buf); skb_queue_purge(&ifmsh->ps.bc_buf); From 1f85e118c81d15aa9e002604dfb69c823d4aac16 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> Date: Wed, 3 Aug 2016 01:24:05 +0000 Subject: [PATCH 007/513] ASoC: simple-card-utils: add missing MODULE_xxx() simple-card-utils might be used as module, but MODULE_xxx() information was missed. This patch adds it. Otherwise, we will have below error, and can't use it. Specil thanks to Kevin. > insmod simple-card-utils.ko simple_card_utils: module license 'unspecified' taints kernel. Disabling lock debugging due to kernel taint simple_card_utils: Unknown symbol snd_soc_of_parse_daifmt (err 0) simple_card_utils: Unknown symbol snd_soc_of_parse_card_name (err 0) insmod: can't insert 'simple-card-utils.ko': \ unknown symbol in module, or unknown parameter Reported-by: Kevin Hilman <khilman@baylibre.com> Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com> Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/generic/Makefile | 6 +++--- sound/soc/generic/simple-card-utils.c | 6 ++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/sound/soc/generic/Makefile b/sound/soc/generic/Makefile index 45602ca8536ea..2d53c8d70705b 100644 --- a/sound/soc/generic/Makefile +++ b/sound/soc/generic/Makefile @@ -1,5 +1,5 @@ -obj-$(CONFIG_SND_SIMPLE_CARD_UTILS) := simple-card-utils.o - +snd-soc-simple-card-utils-objs := simple-card-utils.o snd-soc-simple-card-objs := simple-card.o -obj-$(CONFIG_SND_SIMPLE_CARD) += snd-soc-simple-card.o +obj-$(CONFIG_SND_SIMPLE_CARD_UTILS) += snd-soc-simple-card-utils.o +obj-$(CONFIG_SND_SIMPLE_CARD) += snd-soc-simple-card.o diff --git a/sound/soc/generic/simple-card-utils.c b/sound/soc/generic/simple-card-utils.c index d89a9a1b24715..9599de69a880e 100644 --- a/sound/soc/generic/simple-card-utils.c +++ b/sound/soc/generic/simple-card-utils.c @@ -7,6 +7,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#include <linux/module.h> #include <linux/of.h> #include <sound/simple_card_utils.h> @@ -95,3 +96,8 @@ int asoc_simple_card_parse_card_name(struct snd_soc_card *card, return 0; } EXPORT_SYMBOL_GPL(asoc_simple_card_parse_card_name); + +/* Module information */ +MODULE_AUTHOR("Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>"); +MODULE_DESCRIPTION("ALSA SoC Simple Card Utils"); +MODULE_LICENSE("GPL v2"); From bce91f8a4247905b8c40a53f72c14db908cd0710 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme <jarno@ovn.org> Date: Mon, 1 Aug 2016 19:36:07 -0700 Subject: [PATCH 008/513] openvswitch: Remove incorrect WARN_ONCE(). ovs_ct_find_existing() issues a warning if an existing conntrack entry classified as IP_CT_NEW is found, with the premise that this should not happen. However, a newly confirmed, non-expected conntrack entry remains IP_CT_NEW as long as no reply direction traffic is seen. This has resulted into somewhat confusing kernel log messages. This patch removes this check and warning. Fixes: 289f2253 ("openvswitch: Find existing conntrack entry after upcall.") Suggested-by: Joe Stringer <joe@ovn.org> Signed-off-by: Jarno Rajahalme <jarno@ovn.org> Acked-by: Joe Stringer <joe@ovn.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/openvswitch/conntrack.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index c644c78ed485d..e054a748ff250 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -433,7 +433,6 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, struct nf_conntrack_l4proto *l4proto; struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_hash *h; - enum ip_conntrack_info ctinfo; struct nf_conn *ct; unsigned int dataoff; u8 protonum; @@ -458,13 +457,8 @@ ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, ct = nf_ct_tuplehash_to_ctrack(h); - ctinfo = ovs_ct_get_info(h); - if (ctinfo == IP_CT_NEW) { - /* This should not happen. */ - WARN_ONCE(1, "ovs_ct_find_existing: new packet for %p\n", ct); - } skb->nfct = &ct->ct_general; - skb->nfctinfo = ctinfo; + skb->nfctinfo = ovs_ct_get_info(h); return ct; } From ea966cb6b9e11e87f7b146549aef8e13cad5c6ff Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Tue, 2 Aug 2016 12:23:29 +0200 Subject: [PATCH 009/513] net: xgene: fix maybe-uninitialized variable Building with -Wmaybe-uninitialized shows a potential use of an uninitialized variable: drivers/net/ethernet/apm/xgene/xgene_enet_hw.c: In function 'xgene_enet_phy_connect': drivers/net/ethernet/apm/xgene/xgene_enet_hw.c:802:23: warning: 'phy_dev' may be used uninitialized in this function [-Wmaybe-uninitialized] Although the compiler correctly identified this based on the function, the current code is still safe as long dev->of_node is non-NULL for the case of CONFIG_ACPI=n, which is currently the case. The warning is now disabled by default, but still appears when building with W=1, and other build test tools should be able to detect it as well. Adding an #else clause here makes the code more robust and makes it clear to the compiler that this cannot happen. Signed-off-by: Arnd Bergmann <arnd@arndb.de> Fixes: 8089a96f601b ("drivers: net: xgene: Add backward compatibility") Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/apm/xgene/xgene_enet_hw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c index 37a0f463b8de5..18bb9556dd006 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c @@ -793,6 +793,8 @@ int xgene_enet_phy_connect(struct net_device *ndev) netdev_err(ndev, "Could not connect to PHY\n"); return -ENODEV; } +#else + return -ENODEV; #endif } From 1f415a74b0ca64b5bfacbb12d71ed2ec050a8cfb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski <jakub.kicinski@netronome.com> Date: Tue, 2 Aug 2016 16:12:14 +0100 Subject: [PATCH 010/513] bpf: fix method of PTR_TO_PACKET reg id generation Using per-register incrementing ID can lead to find_good_pkt_pointers() confusing registers which have completely different values. Consider example: 0: (bf) r6 = r1 1: (61) r8 = *(u32 *)(r6 +76) 2: (61) r0 = *(u32 *)(r6 +80) 3: (bf) r7 = r8 4: (07) r8 += 32 5: (2d) if r8 > r0 goto pc+9 R0=pkt_end R1=ctx R6=ctx R7=pkt(id=0,off=0,r=32) R8=pkt(id=0,off=32,r=32) R10=fp 6: (bf) r8 = r7 7: (bf) r9 = r7 8: (71) r1 = *(u8 *)(r7 +0) 9: (0f) r8 += r1 10: (71) r1 = *(u8 *)(r7 +1) 11: (0f) r9 += r1 12: (07) r8 += 32 13: (2d) if r8 > r0 goto pc+1 R0=pkt_end R1=inv56 R6=ctx R7=pkt(id=0,off=0,r=32) R8=pkt(id=1,off=32,r=32) R9=pkt(id=1,off=0,r=32) R10=fp 14: (71) r1 = *(u8 *)(r9 +16) 15: (b7) r7 = 0 16: (bf) r0 = r7 17: (95) exit We need to get a UNKNOWN_VALUE with imm to force id generation so lines 0-5 make r7 a valid packet pointer. We then read two different bytes from the packet and add them to copies of the constructed packet pointer. r8 (line 9) and r9 (line 11) will get the same id of 1, independently. When either of them is validated (line 13) - find_good_pkt_pointers() will also mark the other as safe. This leads to access on line 14 being mistakenly considered safe. Fixes: 969bf05eb3ce ("bpf: direct packet access") Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com> Acked-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: David S. Miller <davem@davemloft.net> --- kernel/bpf/verifier.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f72f23b8fdab4..7094c69ac1995 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -194,6 +194,7 @@ struct verifier_env { struct verifier_state_list **explored_states; /* search pruning optimization */ struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ u32 used_map_cnt; /* number of used maps */ + u32 id_gen; /* used to generate unique reg IDs */ bool allow_ptr_leaks; }; @@ -1301,7 +1302,7 @@ static int check_packet_ptr_add(struct verifier_env *env, struct bpf_insn *insn) /* dst_reg stays as pkt_ptr type and since some positive * integer value was added to the pointer, increment its 'id' */ - dst_reg->id++; + dst_reg->id = ++env->id_gen; /* something was added to pkt_ptr, set range and off to zero */ dst_reg->off = 0; From 087d7a8c917491e6e7feb707a858d624bf5b5f14 Mon Sep 17 00:00:00 2001 From: Satish Baddipadige <satish.baddipadige@broadcom.com> Date: Wed, 3 Aug 2016 09:43:59 +0530 Subject: [PATCH 011/513] tg3: Fix for diasllow rx coalescing time to be 0 When the rx coalescing time is 0, interrupts are not generated from the controller and rx path hangs. To avoid this rx hang, updating the driver to not allow rx coalescing time to be 0. Signed-off-by: Satish Baddipadige <satish.baddipadige@broadcom.com> Signed-off-by: Siva Reddy Kallam <siva.kallam@broadcom.com> Signed-off-by: Michael Chan <michael.chan@broadcom.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/broadcom/tg3.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index ff300f7cf5295..f3c6c915e4235 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -14014,6 +14014,7 @@ static int tg3_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec) } if ((ec->rx_coalesce_usecs > MAX_RXCOL_TICKS) || + (!ec->rx_coalesce_usecs) || (ec->tx_coalesce_usecs > MAX_TXCOL_TICKS) || (ec->rx_max_coalesced_frames > MAX_RXMAX_FRAMES) || (ec->tx_max_coalesced_frames > MAX_TXMAX_FRAMES) || From 9ce6fd7a81e6f787756be2f4b85f4f7bb3658de3 Mon Sep 17 00:00:00 2001 From: Siva Reddy Kallam <siva.kallam@broadcom.com> Date: Wed, 3 Aug 2016 09:44:00 +0530 Subject: [PATCH 012/513] tg3: Report the correct number of RSS queues through tg3_get_rxnfc This patch remove the wrong substraction from info->data in tg3_get_rxnfc function. Without this patch, the number of RSS queues reported is less by one. Reported-by: Michal Soltys <soltys@ziu.info> Signed-off-by: Siva Reddy Kallam <siva.kallam@broadcom.com> Signed-off-by: Michael Chan <michael.chan@broadcom.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/broadcom/tg3.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index f3c6c915e4235..659261218d9f6 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -12552,10 +12552,6 @@ static int tg3_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, info->data = TG3_RSS_MAX_NUM_QS; } - /* The first interrupt vector only - * handles link interrupts. - */ - info->data -= 1; return 0; default: From 2b10d3ecf2dac737653828889ff85f614318f01a Mon Sep 17 00:00:00 2001 From: Manish Chopra <manish.chopra@qlogic.com> Date: Wed, 3 Aug 2016 04:02:02 -0400 Subject: [PATCH 013/513] qlcnic: fix data structure corruption in async mbx command handling This patch fixes a data structure corruption bug in the SRIOV VF mailbox handler code. While handling mailbox commands from the atomic context, driver is accessing and updating qlcnic_async_work_list_struct entry fields in the async work list. These fields could be concurrently accessed by the work function resulting in data corruption. This patch restructures async mbx command handling by using a separate async command list instead of using a list of work_struct structures. A single work_struct is used to schedule and handle the async commands with proper locking mechanism. Signed-off-by: Rajesh Borundia <rajesh.borundia@qlogic.com> Signed-off-by: Sony Chacko <sony.chacko@qlogic.com> Signed-off-by: Manish Chopra <manish.chopra@qlogic.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- .../net/ethernet/qlogic/qlcnic/qlcnic_sriov.h | 9 +- .../qlogic/qlcnic/qlcnic_sriov_common.c | 95 +++++++++++-------- 2 files changed, 60 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h index 017d8c2c8285a..24061b9b92e8c 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h @@ -156,10 +156,8 @@ struct qlcnic_vf_info { spinlock_t vlan_list_lock; /* Lock for VLAN list */ }; -struct qlcnic_async_work_list { +struct qlcnic_async_cmd { struct list_head list; - struct work_struct work; - void *ptr; struct qlcnic_cmd_args *cmd; }; @@ -168,7 +166,10 @@ struct qlcnic_back_channel { struct workqueue_struct *bc_trans_wq; struct workqueue_struct *bc_async_wq; struct workqueue_struct *bc_flr_wq; - struct list_head async_list; + struct qlcnic_adapter *adapter; + struct list_head async_cmd_list; + struct work_struct vf_async_work; + spinlock_t queue_lock; /* async_cmd_list queue lock */ }; struct qlcnic_sriov { diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c index 7327b729ba2ea..d7107055ec603 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c @@ -29,6 +29,7 @@ #define QLC_83XX_VF_RESET_FAIL_THRESH 8 #define QLC_BC_CMD_MAX_RETRY_CNT 5 +static void qlcnic_sriov_handle_async_issue_cmd(struct work_struct *work); static void qlcnic_sriov_vf_free_mac_list(struct qlcnic_adapter *); static int qlcnic_sriov_alloc_bc_mbx_args(struct qlcnic_cmd_args *, u32); static void qlcnic_sriov_vf_poll_dev_state(struct work_struct *); @@ -177,7 +178,10 @@ int qlcnic_sriov_init(struct qlcnic_adapter *adapter, int num_vfs) } bc->bc_async_wq = wq; - INIT_LIST_HEAD(&bc->async_list); + INIT_LIST_HEAD(&bc->async_cmd_list); + INIT_WORK(&bc->vf_async_work, qlcnic_sriov_handle_async_issue_cmd); + spin_lock_init(&bc->queue_lock); + bc->adapter = adapter; for (i = 0; i < num_vfs; i++) { vf = &sriov->vf_info[i]; @@ -1517,17 +1521,21 @@ static void qlcnic_vf_add_mc_list(struct net_device *netdev, const u8 *mac, void qlcnic_sriov_cleanup_async_list(struct qlcnic_back_channel *bc) { - struct list_head *head = &bc->async_list; - struct qlcnic_async_work_list *entry; + struct list_head *head = &bc->async_cmd_list; + struct qlcnic_async_cmd *entry; flush_workqueue(bc->bc_async_wq); + cancel_work_sync(&bc->vf_async_work); + + spin_lock(&bc->queue_lock); while (!list_empty(head)) { - entry = list_entry(head->next, struct qlcnic_async_work_list, + entry = list_entry(head->next, struct qlcnic_async_cmd, list); - cancel_work_sync(&entry->work); list_del(&entry->list); + kfree(entry->cmd); kfree(entry); } + spin_unlock(&bc->queue_lock); } void qlcnic_sriov_vf_set_multi(struct net_device *netdev) @@ -1587,57 +1595,64 @@ void qlcnic_sriov_vf_set_multi(struct net_device *netdev) static void qlcnic_sriov_handle_async_issue_cmd(struct work_struct *work) { - struct qlcnic_async_work_list *entry; - struct qlcnic_adapter *adapter; + struct qlcnic_async_cmd *entry, *tmp; + struct qlcnic_back_channel *bc; struct qlcnic_cmd_args *cmd; + struct list_head *head; + LIST_HEAD(del_list); + + bc = container_of(work, struct qlcnic_back_channel, vf_async_work); + head = &bc->async_cmd_list; + + spin_lock(&bc->queue_lock); + list_splice_init(head, &del_list); + spin_unlock(&bc->queue_lock); + + list_for_each_entry_safe(entry, tmp, &del_list, list) { + list_del(&entry->list); + cmd = entry->cmd; + __qlcnic_sriov_issue_cmd(bc->adapter, cmd); + kfree(entry); + } + + if (!list_empty(head)) + queue_work(bc->bc_async_wq, &bc->vf_async_work); - entry = container_of(work, struct qlcnic_async_work_list, work); - adapter = entry->ptr; - cmd = entry->cmd; - __qlcnic_sriov_issue_cmd(adapter, cmd); return; } -static struct qlcnic_async_work_list * -qlcnic_sriov_get_free_node_async_work(struct qlcnic_back_channel *bc) +static struct qlcnic_async_cmd * +qlcnic_sriov_alloc_async_cmd(struct qlcnic_back_channel *bc, + struct qlcnic_cmd_args *cmd) { - struct list_head *node; - struct qlcnic_async_work_list *entry = NULL; - u8 empty = 0; + struct qlcnic_async_cmd *entry = NULL; - list_for_each(node, &bc->async_list) { - entry = list_entry(node, struct qlcnic_async_work_list, list); - if (!work_pending(&entry->work)) { - empty = 1; - break; - } - } + entry = kzalloc(sizeof(*entry), GFP_ATOMIC); + if (!entry) + return NULL; - if (!empty) { - entry = kzalloc(sizeof(struct qlcnic_async_work_list), - GFP_ATOMIC); - if (entry == NULL) - return NULL; - list_add_tail(&entry->list, &bc->async_list); - } + entry->cmd = cmd; + + spin_lock(&bc->queue_lock); + list_add_tail(&entry->list, &bc->async_cmd_list); + spin_unlock(&bc->queue_lock); return entry; } static void qlcnic_sriov_schedule_async_cmd(struct qlcnic_back_channel *bc, - work_func_t func, void *data, struct qlcnic_cmd_args *cmd) { - struct qlcnic_async_work_list *entry = NULL; + struct qlcnic_async_cmd *entry = NULL; - entry = qlcnic_sriov_get_free_node_async_work(bc); - if (!entry) + entry = qlcnic_sriov_alloc_async_cmd(bc, cmd); + if (!entry) { + qlcnic_free_mbx_args(cmd); + kfree(cmd); return; + } - entry->ptr = data; - entry->cmd = cmd; - INIT_WORK(&entry->work, func); - queue_work(bc->bc_async_wq, &entry->work); + queue_work(bc->bc_async_wq, &bc->vf_async_work); } static int qlcnic_sriov_async_issue_cmd(struct qlcnic_adapter *adapter, @@ -1649,8 +1664,8 @@ static int qlcnic_sriov_async_issue_cmd(struct qlcnic_adapter *adapter, if (adapter->need_fw_reset) return -EIO; - qlcnic_sriov_schedule_async_cmd(bc, qlcnic_sriov_handle_async_issue_cmd, - adapter, cmd); + qlcnic_sriov_schedule_async_cmd(bc, cmd); + return 0; } From fc4ca987f7cc0cb7ea8cb8bb673447939a84bb07 Mon Sep 17 00:00:00 2001 From: Manish Chopra <manish.chopra@qlogic.com> Date: Wed, 3 Aug 2016 04:02:03 -0400 Subject: [PATCH 014/513] qlcnic: fix napi budget alteration Driver modifies the supplied NAPI budget in qlcnic_83xx_msix_tx_poll() function. Instead, it should use the budget as it is. Signed-off-by: Manish Chopra <manish.chopra@qlogic.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 87c642d3b075b..fedd7366713cf 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -102,7 +102,6 @@ #define QLCNIC_RESPONSE_DESC 0x05 #define QLCNIC_LRO_DESC 0x12 -#define QLCNIC_TX_POLL_BUDGET 128 #define QLCNIC_TCP_HDR_SIZE 20 #define QLCNIC_TCP_TS_OPTION_SIZE 12 #define QLCNIC_FETCH_RING_ID(handle) ((handle) >> 63) @@ -2008,7 +2007,6 @@ static int qlcnic_83xx_msix_tx_poll(struct napi_struct *napi, int budget) struct qlcnic_host_tx_ring *tx_ring; struct qlcnic_adapter *adapter; - budget = QLCNIC_TX_POLL_BUDGET; tx_ring = container_of(napi, struct qlcnic_host_tx_ring, napi); adapter = tx_ring->adapter; work_done = qlcnic_process_cmd_ring(adapter, tx_ring, budget); From b8b2372de9cc00d5ed667c7b8db29b6cfbf037f5 Mon Sep 17 00:00:00 2001 From: Manish Chopra <manish.chopra@qlogic.com> Date: Wed, 3 Aug 2016 04:02:04 -0400 Subject: [PATCH 015/513] qlcnic: Update version to 5.3.65 Signed-off-by: Manish Chopra <manish.chopra@qlogic.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/qlogic/qlcnic/qlcnic.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h index fd973f4f16c7d..49bad00a0f8f9 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h @@ -37,8 +37,8 @@ #define _QLCNIC_LINUX_MAJOR 5 #define _QLCNIC_LINUX_MINOR 3 -#define _QLCNIC_LINUX_SUBVERSION 64 -#define QLCNIC_LINUX_VERSIONID "5.3.64" +#define _QLCNIC_LINUX_SUBVERSION 65 +#define QLCNIC_LINUX_VERSIONID "5.3.65" #define QLCNIC_DRV_IDC_VER 0x01 #define QLCNIC_DRIVER_VERSION ((_QLCNIC_LINUX_MAJOR << 16) |\ (_QLCNIC_LINUX_MINOR << 8) | (_QLCNIC_LINUX_SUBVERSION)) From 1bc610e7a17dcf5165f1ed4e0201ee080ba1a0df Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki <s.nawrocki@samsung.com> Date: Thu, 4 Aug 2016 11:51:25 +0200 Subject: [PATCH 016/513] ASoC: samsung: Fix clock handling in S3C24XX_UDA134X card There is no "pclk" alias in the s3c2440 clk driver for "soc-audio" device so related clk_get() fails, which prevents any operation of the S3C24XX_UDA134X sound card. Instead we get the clock on behalf of the I2S device, i.e. we use the I2S block gate clock which has PCLK is its parent clock. Without this patch there is an error like: s3c24xx_uda134x_startup cannot get pclk ASoC: UDA134X startup failed: -2 Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com> Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/samsung/s3c24xx_uda134x.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/soc/samsung/s3c24xx_uda134x.c b/sound/soc/samsung/s3c24xx_uda134x.c index 50849e137fc0e..92e88bca386e8 100644 --- a/sound/soc/samsung/s3c24xx_uda134x.c +++ b/sound/soc/samsung/s3c24xx_uda134x.c @@ -58,10 +58,12 @@ static struct platform_device *s3c24xx_uda134x_snd_device; static int s3c24xx_uda134x_startup(struct snd_pcm_substream *substream) { - int ret = 0; + struct snd_soc_pcm_runtime *rtd = substream->private_data; + struct snd_soc_dai *cpu_dai = rtd->cpu_dai; #ifdef ENFORCE_RATES struct snd_pcm_runtime *runtime = substream->runtime; #endif + int ret = 0; mutex_lock(&clk_lock); pr_debug("%s %d\n", __func__, clk_users); @@ -71,8 +73,7 @@ static int s3c24xx_uda134x_startup(struct snd_pcm_substream *substream) printk(KERN_ERR "%s cannot get xtal\n", __func__); ret = PTR_ERR(xtal); } else { - pclk = clk_get(&s3c24xx_uda134x_snd_device->dev, - "pclk"); + pclk = clk_get(cpu_dai->dev, "iis"); if (IS_ERR(pclk)) { printk(KERN_ERR "%s cannot get pclk\n", __func__); From ca6ac305f017472a172e53345264abdb495eba46 Mon Sep 17 00:00:00 2001 From: John Hsu <KCHSU0@nuvoton.com> Date: Thu, 4 Aug 2016 16:52:06 +0800 Subject: [PATCH 017/513] ASoC: nau8825: fix bug in playback when suspend In chromium, the following steps will make codec function fail. \1. plug in headphones, Play music \2. run "powerd_dbus_suspend" \3. resume from S3 After resume, the jack detection will restart and make configuration for the headset. Meanwhile, the playback prepares and starts to work. The two sequences will conflict and make wrong register configuration. Originally, the driver adds protection for the case when it finds the playback is active. But the "powerd_dbus_suspend" command will close the pcm stream before suspend. Therefore, the driver can't detect the playback after resume, and the protection not works. For the issue, the driver raises protection every time after resume. The protection will release after jack detection and configuration completes, and then the playback just will goes on. Signed-off-by: John Hsu <KCHSU0@nuvoton.com> Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/codecs/nau8825.c | 51 +++++++++----------------------------- 1 file changed, 12 insertions(+), 39 deletions(-) diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c index 5c9707ac4bbff..20d2f1578b876 100644 --- a/sound/soc/codecs/nau8825.c +++ b/sound/soc/codecs/nau8825.c @@ -212,31 +212,6 @@ static const unsigned short logtable[256] = { 0xfa2f, 0xfaea, 0xfba5, 0xfc60, 0xfd1a, 0xfdd4, 0xfe8e, 0xff47 }; -static struct snd_soc_dai *nau8825_get_codec_dai(struct nau8825 *nau8825) -{ - struct snd_soc_codec *codec = snd_soc_dapm_to_codec(nau8825->dapm); - struct snd_soc_component *component = &codec->component; - struct snd_soc_dai *codec_dai, *_dai; - - list_for_each_entry_safe(codec_dai, _dai, &component->dai_list, list) { - if (!strncmp(codec_dai->name, NUVOTON_CODEC_DAI, - strlen(NUVOTON_CODEC_DAI))) - return codec_dai; - } - return NULL; -} - -static bool nau8825_dai_is_active(struct nau8825 *nau8825) -{ - struct snd_soc_dai *codec_dai = nau8825_get_codec_dai(nau8825); - - if (codec_dai) { - if (codec_dai->playback_active || codec_dai->capture_active) - return true; - } - return false; -} - /** * nau8825_sema_acquire - acquire the semaphore of nau88l25 * @nau8825: component to register the codec private data with @@ -1205,6 +1180,8 @@ static int nau8825_hw_params(struct snd_pcm_substream *substream, struct nau8825 *nau8825 = snd_soc_codec_get_drvdata(codec); unsigned int val_len = 0; + nau8825_sema_acquire(nau8825, 2 * HZ); + switch (params_width(params)) { case 16: val_len |= NAU8825_I2S_DL_16; @@ -1225,6 +1202,9 @@ static int nau8825_hw_params(struct snd_pcm_substream *substream, regmap_update_bits(nau8825->regmap, NAU8825_REG_I2S_PCM_CTRL1, NAU8825_I2S_DL_MASK, val_len); + /* Release the semaphone. */ + nau8825_sema_release(nau8825); + return 0; } @@ -1234,6 +1214,8 @@ static int nau8825_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) struct nau8825 *nau8825 = snd_soc_codec_get_drvdata(codec); unsigned int ctrl1_val = 0, ctrl2_val = 0; + nau8825_sema_acquire(nau8825, 2 * HZ); + switch (fmt & SND_SOC_DAIFMT_MASTER_MASK) { case SND_SOC_DAIFMT_CBM_CFM: ctrl2_val |= NAU8825_I2S_MS_MASTER; @@ -1282,6 +1264,9 @@ static int nau8825_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) regmap_update_bits(nau8825->regmap, NAU8825_REG_I2S_PCM_CTRL2, NAU8825_I2S_MS_MASK, ctrl2_val); + /* Release the semaphone. */ + nau8825_sema_release(nau8825); + return 0; } @@ -2241,20 +2226,8 @@ static int __maybe_unused nau8825_resume(struct snd_soc_codec *codec) regcache_cache_only(nau8825->regmap, false); regcache_sync(nau8825->regmap); - if (nau8825_is_jack_inserted(nau8825->regmap)) { - /* If the jack is inserted, we need to check whether the play- - * back is active before suspend. If active, the driver has to - * raise the protection for cross talk function to avoid the - * playback recovers before cross talk process finish. Other- - * wise, the playback will be interfered by cross talk func- - * tion. It is better to apply hardware related parameters - * before starting playback or record. - */ - if (nau8825_dai_is_active(nau8825)) { - nau8825->xtalk_protect = true; - nau8825_sema_acquire(nau8825, 0); - } - } + nau8825->xtalk_protect = true; + nau8825_sema_acquire(nau8825, 0); enable_irq(nau8825->irq); return 0; From 06746c672cdd60715ee57ab1aced95a9e536fd4d Mon Sep 17 00:00:00 2001 From: John Hsu <KCHSU0@nuvoton.com> Date: Thu, 4 Aug 2016 16:52:07 +0800 Subject: [PATCH 018/513] ASoC: nau8825: fix static check error about semaphone control The patch is to fix the static check error as the following. The patch commit b50455fab459 ("ASoC: nau8825: cross talk suppression measurement function") from Jun 7, 2016, leads to the following static checker warning: sound/soc/codecs/nau8825.c:265 nau8825_sema_acquire() warn: 'sem:&nau8825->xtalk_sem' is sometimes locked here and sometimes unlocked. The semaphone acquire function has return value, and some callers can do error handling when lock fails. Signed-off-by: John Hsu <KCHSU0@nuvoton.com> Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/codecs/nau8825.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/sound/soc/codecs/nau8825.c b/sound/soc/codecs/nau8825.c index 20d2f1578b876..2e59a85e360b6 100644 --- a/sound/soc/codecs/nau8825.c +++ b/sound/soc/codecs/nau8825.c @@ -225,19 +225,26 @@ static const unsigned short logtable[256] = { * Acquires the semaphore without jiffies. If no more tasks are allowed * to acquire the semaphore, calling this function will put the task to * sleep until the semaphore is released. - * It returns if the semaphore was acquired. + * If the semaphore is not released within the specified number of jiffies, + * this function returns -ETIME. + * If the sleep is interrupted by a signal, this function will return -EINTR. + * It returns 0 if the semaphore was acquired successfully. */ -static void nau8825_sema_acquire(struct nau8825 *nau8825, long timeout) +static int nau8825_sema_acquire(struct nau8825 *nau8825, long timeout) { int ret; - if (timeout) + if (timeout) { ret = down_timeout(&nau8825->xtalk_sem, timeout); - else + if (ret < 0) + dev_warn(nau8825->dev, "Acquire semaphone timeout\n"); + } else { ret = down_interruptible(&nau8825->xtalk_sem); + if (ret < 0) + dev_warn(nau8825->dev, "Acquire semaphone fail\n"); + } - if (ret < 0) - dev_warn(nau8825->dev, "Acquire semaphone fail\n"); + return ret; } /** @@ -1596,8 +1603,11 @@ static irqreturn_t nau8825_interrupt(int irq, void *data) * cess and restore changes if process * is ongoing when ejection. */ + int ret; nau8825->xtalk_protect = true; - nau8825_sema_acquire(nau8825, 0); + ret = nau8825_sema_acquire(nau8825, 0); + if (ret < 0) + nau8825->xtalk_protect = false; } /* Startup cross talk detection process */ nau8825->xtalk_state = NAU8825_XTALK_PREPARE; @@ -2223,11 +2233,14 @@ static int __maybe_unused nau8825_suspend(struct snd_soc_codec *codec) static int __maybe_unused nau8825_resume(struct snd_soc_codec *codec) { struct nau8825 *nau8825 = snd_soc_codec_get_drvdata(codec); + int ret; regcache_cache_only(nau8825->regmap, false); regcache_sync(nau8825->regmap); nau8825->xtalk_protect = true; - nau8825_sema_acquire(nau8825, 0); + ret = nau8825_sema_acquire(nau8825, 0); + if (ret < 0) + nau8825->xtalk_protect = false; enable_irq(nau8825->irq); return 0; From 5d764912a0ee6db83e962c1501b5b9e58ba14e15 Mon Sep 17 00:00:00 2001 From: Adam Thomson <Adam.Thomson.Opensource@diasemi.com> Date: Thu, 4 Aug 2016 15:35:37 +0100 Subject: [PATCH 019/513] ASoC: da7213: Default to 64 BCLKs per WCLK to support all formats Previously code defaulted to 32 BCLKS per WCLK which meant 24 and 32 bit DAI formats would not work properly. This patch fixes the issue by defaulting to 64 BCLKs per WCLK. Signed-off-by: Adam Thomson <Adam.Thomson.Opensource@diasemi.com> Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/codecs/da7213.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sound/soc/codecs/da7213.c b/sound/soc/codecs/da7213.c index e5527bc570ae5..bcf1834c56481 100644 --- a/sound/soc/codecs/da7213.c +++ b/sound/soc/codecs/da7213.c @@ -1247,8 +1247,8 @@ static int da7213_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt) return -EINVAL; } - /* By default only 32 BCLK per WCLK is supported */ - dai_clk_mode |= DA7213_DAI_BCLKS_PER_WCLK_32; + /* By default only 64 BCLK per WCLK is supported */ + dai_clk_mode |= DA7213_DAI_BCLKS_PER_WCLK_64; snd_soc_write(codec, DA7213_DAI_CLK_MODE, dai_clk_mode); snd_soc_update_bits(codec, DA7213_DAI_CTRL, DA7213_DAI_FORMAT_MASK, From ea0a95d7f162bfa1c9df74471f0064f71cdf80ea Mon Sep 17 00:00:00 2001 From: Wei Yongjun <weiyj.lk@gmail.com> Date: Mon, 25 Jul 2016 07:00:33 +0000 Subject: [PATCH 020/513] fcoe: Use kfree_skb() instead of kfree() Use kfree_skb() instead of kfree() to free sk_buff. Signed-off-by: Wei Yongjun <weiyj.lk@gmail.com> Acked-by: Johannes Thumshirn <jth@kernel.org> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> --- drivers/scsi/fcoe/fcoe_ctlr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c index a569c65f22b18..dcf36537a767c 100644 --- a/drivers/scsi/fcoe/fcoe_ctlr.c +++ b/drivers/scsi/fcoe/fcoe_ctlr.c @@ -2923,7 +2923,7 @@ static int fcoe_ctlr_vlan_recv(struct fcoe_ctlr *fip, struct sk_buff *skb) mutex_unlock(&fip->ctlr_mutex); drop: - kfree(skb); + kfree_skb(skb); return rc; } From 6b07d9ca9b5363dda959b9582a3fc9c0b89ef3b5 Mon Sep 17 00:00:00 2001 From: Felix Fietkau <nbd@nbd.name> Date: Tue, 2 Aug 2016 11:13:41 +0200 Subject: [PATCH 021/513] mac80211: fix purging multicast PS buffer queue The code currently assumes that buffered multicast PS frames don't have a pending ACK frame for tx status reporting. However, hostapd sends a broadcast deauth frame on teardown for which tx status is requested. This can lead to the "Have pending ack frames" warning on module reload. Fix this by using ieee80211_free_txskb/ieee80211_purge_tx_queue. Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau <nbd@nbd.name> Signed-off-by: Johannes Berg <johannes.berg@intel.com> --- net/mac80211/cfg.c | 2 +- net/mac80211/tx.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 47e99ab8d97a9..543b1d4fc33d5 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -869,7 +869,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev) /* free all potentially still buffered bcast frames */ local->total_ps_buffered -= skb_queue_len(&sdata->u.ap.ps.bc_buf); - skb_queue_purge(&sdata->u.ap.ps.bc_buf); + ieee80211_purge_tx_queue(&local->hw, &sdata->u.ap.ps.bc_buf); mutex_lock(&local->mtx); ieee80211_vif_copy_chanctx_to_vlans(sdata, true); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 91461c4155255..502396694f479 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -368,7 +368,7 @@ static void purge_old_ps_buffers(struct ieee80211_local *local) skb = skb_dequeue(&ps->bc_buf); if (skb) { purged++; - dev_kfree_skb(skb); + ieee80211_free_txskb(&local->hw, skb); } total += skb_queue_len(&ps->bc_buf); } @@ -451,7 +451,7 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) if (skb_queue_len(&ps->bc_buf) >= AP_MAX_BC_BUFFER) { ps_dbg(tx->sdata, "BC TX buffer full - dropping the oldest frame\n"); - dev_kfree_skb(skb_dequeue(&ps->bc_buf)); + ieee80211_free_txskb(&tx->local->hw, skb_dequeue(&ps->bc_buf)); } else tx->local->total_ps_buffered++; @@ -4275,7 +4275,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, sdata = IEEE80211_DEV_TO_SUB_IF(skb->dev); if (!ieee80211_tx_prepare(sdata, &tx, NULL, skb)) break; - dev_kfree_skb_any(skb); + ieee80211_free_txskb(hw, skb); } info = IEEE80211_SKB_CB(skb); From 71f2c3470fca51be27f6fc5975675f7e1c3b91e5 Mon Sep 17 00:00:00 2001 From: Masashi Honma <masashi.honma@gmail.com> Date: Tue, 2 Aug 2016 17:16:57 +0900 Subject: [PATCH 022/513] mac80211: End the MPSP even if EOSP frame was not acked If QoS frame with EOSP (end of service period) subfield=1 sent by local peer was not acked by remote peer, local peer did not end the MPSP. This prevents local peer from going to DOZE state. And if the remote peer goes away without closing connection, local peer continues AWAKE state and wastes battery. Signed-off-by: Masashi Honma <masashi.honma@gmail.com> Acked-by: Bob Copeland <me@bobcopeland.com> Signed-off-by: Johannes Berg <johannes.berg@intel.com> --- net/mac80211/status.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/mac80211/status.c b/net/mac80211/status.c index c6d5c724e0326..a2a68269675de 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -771,6 +771,13 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) clear_sta_flag(sta, WLAN_STA_SP); acked = !!(info->flags & IEEE80211_TX_STAT_ACK); + + /* mesh Peer Service Period support */ + if (ieee80211_vif_is_mesh(&sta->sdata->vif) && + ieee80211_is_data_qos(fc)) + ieee80211_mpsp_trigger_process( + ieee80211_get_qos_ctl(hdr), sta, true, acked); + if (!acked && test_sta_flag(sta, WLAN_STA_PS_STA)) { /* * The STA is in power save mode, so assume @@ -781,13 +788,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) return; } - /* mesh Peer Service Period support */ - if (ieee80211_vif_is_mesh(&sta->sdata->vif) && - ieee80211_is_data_qos(fc)) - ieee80211_mpsp_trigger_process( - ieee80211_get_qos_ctl(hdr), - sta, true, acked); - if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL) && (ieee80211_is_data(hdr->frame_control)) && (rates_idx != -1)) From 9757235f451c27deaa88925399f070ff6fcea832 Mon Sep 17 00:00:00 2001 From: Masashi Honma <masashi.honma@gmail.com> Date: Wed, 3 Aug 2016 10:07:44 +0900 Subject: [PATCH 023/513] nl80211: correct checks for NL80211_MESHCONF_HT_OPMODE value Previously, NL80211_MESHCONF_HT_OPMODE validation rejected correct flag combinations, e.g. IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED | IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT. Doing just a range-check allows setting flags that don't exist (0x8) and invalid flag combinations. Implements some checks based on IEEE 802.11 2012 8.4.2.59 "HT Operation element". Signed-off-by: Masashi Honma <masashi.honma@gmail.com> [reword commit message, simplify a bit] Signed-off-by: Johannes Berg <johannes.berg@intel.com> --- net/wireless/nl80211.c | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 46417f9cce681..f02653a089933 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5380,6 +5380,7 @@ static int nl80211_parse_mesh_config(struct genl_info *info, { struct nlattr *tb[NL80211_MESHCONF_ATTR_MAX + 1]; u32 mask = 0; + u16 ht_opmode; #define FILL_IN_MESH_PARAM_IF_SET(tb, cfg, param, min, max, mask, attr, fn) \ do { \ @@ -5471,9 +5472,36 @@ do { \ FILL_IN_MESH_PARAM_IF_SET(tb, cfg, rssi_threshold, -255, 0, mask, NL80211_MESHCONF_RSSI_THRESHOLD, nl80211_check_s32); - FILL_IN_MESH_PARAM_IF_SET(tb, cfg, ht_opmode, 0, 16, - mask, NL80211_MESHCONF_HT_OPMODE, - nl80211_check_u16); + /* + * Check HT operation mode based on + * IEEE 802.11 2012 8.4.2.59 HT Operation element. + */ + if (tb[NL80211_MESHCONF_HT_OPMODE]) { + ht_opmode = nla_get_u16(tb[NL80211_MESHCONF_HT_OPMODE]); + + if (ht_opmode & ~(IEEE80211_HT_OP_MODE_PROTECTION | + IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT | + IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)) + return -EINVAL; + + if ((ht_opmode & IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT) && + (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)) + return -EINVAL; + + switch (ht_opmode & IEEE80211_HT_OP_MODE_PROTECTION) { + case IEEE80211_HT_OP_MODE_PROTECTION_NONE: + case IEEE80211_HT_OP_MODE_PROTECTION_20MHZ: + if (ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT) + return -EINVAL; + break; + case IEEE80211_HT_OP_MODE_PROTECTION_NONMEMBER: + case IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED: + if (!(ht_opmode & IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT)) + return -EINVAL; + break; + } + cfg->ht_opmode = ht_opmode; + } FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPactivePathToRootTimeout, 1, 65535, mask, NL80211_MESHCONF_HWMP_PATH_TO_ROOT_TIMEOUT, From 2439ca0402091badb24415e1b073ba12b34ba423 Mon Sep 17 00:00:00 2001 From: Maxim Altshul <maxim.altshul@ti.com> Date: Thu, 4 Aug 2016 15:43:04 +0300 Subject: [PATCH 024/513] mac80211: Add ieee80211_hw pointer to get_expected_throughput The variable is added to allow the driver an easy access to it's own hw->priv when the op is invoked. This fixes a crash in wlcore because it was relying on a station pointer that wasn't initialized yet. It's the wrong way to fix the crash, but it solves the problem for now and it does make sense to have the hw pointer here. Signed-off-by: Maxim Altshul <maxim.altshul@ti.com> [rewrite commit message, fix indentation] Signed-off-by: Johannes Berg <johannes.berg@intel.com> --- drivers/net/wireless/ti/wlcore/main.c | 5 +++-- include/net/mac80211.h | 3 ++- net/mac80211/driver-ops.h | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 1d689169da768..9e1f2d9c98659 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -5700,10 +5700,11 @@ static void wlcore_op_sta_statistics(struct ieee80211_hw *hw, mutex_unlock(&wl->mutex); } -static u32 wlcore_op_get_expected_throughput(struct ieee80211_sta *sta) +static u32 wlcore_op_get_expected_throughput(struct ieee80211_hw *hw, + struct ieee80211_sta *sta) { struct wl1271_station *wl_sta = (struct wl1271_station *)sta->drv_priv; - struct wl1271 *wl = wl_sta->wl; + struct wl1271 *wl = hw->priv; u8 hlid = wl_sta->hlid; /* return in units of Kbps */ diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b4faadbb4e01f..cca510a585c3d 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3620,7 +3620,8 @@ struct ieee80211_ops { int (*join_ibss)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); void (*leave_ibss)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); - u32 (*get_expected_throughput)(struct ieee80211_sta *sta); + u32 (*get_expected_throughput)(struct ieee80211_hw *hw, + struct ieee80211_sta *sta); int (*get_txpower)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, int *dbm); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 184473c257eb9..ba5fc1f01e535 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1094,7 +1094,7 @@ static inline u32 drv_get_expected_throughput(struct ieee80211_local *local, trace_drv_get_expected_throughput(sta); if (local->ops->get_expected_throughput) - ret = local->ops->get_expected_throughput(sta); + ret = local->ops->get_expected_throughput(&local->hw, sta); trace_drv_return_u32(local, ret); return ret; From 5ef9f289c4e698054e5687edb54f0da3cdc9173a Mon Sep 17 00:00:00 2001 From: Ian Wienand <iwienand@redhat.com> Date: Wed, 3 Aug 2016 15:44:57 +1000 Subject: [PATCH 025/513] OVS: Ignore negative headroom value net_device->ndo_set_rx_headroom (introduced in 871b642adebe300be2e50aa5f65a418510f636ec) says "Setting a negtaive value reset the rx headroom to the default value". It seems that the OVS implementation in 3a927bc7cf9d0fbe8f4a8189dd5f8440228f64e7 overlooked this and sets dev->needed_headroom unconditionally. This doesn't have an immediate effect, but can mess up later LL_RESERVED_SPACE calculations, such as done in net/ipv6/mcast.c:mld_newpack. For reference, this issue was found from a skb_panic raised there after the length calculations had given the wrong result. Note the other current users of this interface (drivers/net/tun.c:tun_set_headroom and drivers/net/veth.c:veth_set_rx_headroom) are both checking this correctly thus need no modification. Thanks to Ben for some pointers from the crash dumps! Cc: Benjamin Poirier <bpoirier@suse.com> Cc: Paolo Abeni <pabeni@redhat.com> Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1361414 Signed-off-by: Ian Wienand <iwienand@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/openvswitch/vport-internal_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 434e04c3a189b..95c36147a6e13 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -140,7 +140,7 @@ internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) static void internal_set_rx_headroom(struct net_device *dev, int new_hr) { - dev->needed_headroom = new_hr; + dev->needed_headroom = new_hr < 0 ? 0 : new_hr; } static const struct net_device_ops internal_dev_netdev_ops = { From 54447f1ad73414ebb052e9a33d079cabed3a03e8 Mon Sep 17 00:00:00 2001 From: Wei Yongjun <weiyj.lk@gmail.com> Date: Wed, 3 Aug 2016 10:58:35 +0000 Subject: [PATCH 026/513] net: arc_emac: add missing of_node_put() in arc_emac_probe() commit a94efbd7cc45 ("ethernet: arc: emac_main: add missing of_node_put after calling of_parse_phandle") added missing of_node_put after calling of_parse_phandle, but missing the devm_ioremap_resource() error handling case. Signed-off-by: Wei Yongjun <weiyj.lk@gmail.com> Reviewed-by: Peter Chen <peter.chen@nxp.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/arc/emac_main.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c index 4bff0f3040df7..b0da9693f28a1 100644 --- a/drivers/net/ethernet/arc/emac_main.c +++ b/drivers/net/ethernet/arc/emac_main.c @@ -771,8 +771,10 @@ int arc_emac_probe(struct net_device *ndev, int interface) priv->dev = dev; priv->regs = devm_ioremap_resource(dev, &res_regs); - if (IS_ERR(priv->regs)) - return PTR_ERR(priv->regs); + if (IS_ERR(priv->regs)) { + err = PTR_ERR(priv->regs); + goto out_put_node; + } dev_dbg(dev, "Registers base address is 0x%p\n", priv->regs); From 372ee16386bbf6dc5eeb0387e1ede963debba82a Mon Sep 17 00:00:00 2001 From: David Howells <dhowells@redhat.com> Date: Wed, 3 Aug 2016 14:11:40 +0100 Subject: [PATCH 027/513] rxrpc: Fix races between skb free, ACK generation and replying Inside the kafs filesystem it is possible to occasionally have a call processed and terminated before we've had a chance to check whether we need to clean up the rx queue for that call because afs_send_simple_reply() ends the call when it is done, but this is done in a workqueue item that might happen to run to completion before afs_deliver_to_call() completes. Further, it is possible for rxrpc_kernel_send_data() to be called to send a reply before the last request-phase data skb is released. The rxrpc skb destructor is where the ACK processing is done and the call state is advanced upon release of the last skb. ACK generation is also deferred to a work item because it's possible that the skb destructor is not called in a context where kernel_sendmsg() can be invoked. To this end, the following changes are made: (1) kernel_rxrpc_data_consumed() is added. This should be called whenever an skb is emptied so as to crank the ACK and call states. This does not release the skb, however. kernel_rxrpc_free_skb() must now be called to achieve that. These together replace rxrpc_kernel_data_delivered(). (2) kernel_rxrpc_data_consumed() is wrapped by afs_data_consumed(). This makes afs_deliver_to_call() easier to work as the skb can simply be discarded unconditionally here without trying to work out what the return value of the ->deliver() function means. The ->deliver() functions can, via afs_data_complete(), afs_transfer_reply() and afs_extract_data() mark that an skb has been consumed (thereby cranking the state) without the need to conditionally free the skb to make sure the state is correct on an incoming call for when the call processor tries to send the reply. (3) rxrpc_recvmsg() now has to call kernel_rxrpc_data_consumed() when it has finished with a packet and MSG_PEEK isn't set. (4) rxrpc_packet_destructor() no longer calls rxrpc_hard_ACK_data(). Because of this, we no longer need to clear the destructor and put the call before we free the skb in cases where we don't want the ACK/call state to be cranked. (5) The ->deliver() call-type callbacks are made to return -EAGAIN rather than 0 if they expect more data (afs_extract_data() returns -EAGAIN to the delivery function already), and the caller is now responsible for producing an abort if that was the last packet. (6) There are many bits of unmarshalling code where: ret = afs_extract_data(call, skb, last, ...); switch (ret) { case 0: break; case -EAGAIN: return 0; default: return ret; } is to be found. As -EAGAIN can now be passed back to the caller, we now just return if ret < 0: ret = afs_extract_data(call, skb, last, ...); if (ret < 0) return ret; (7) Checks for trailing data and empty final data packets has been consolidated as afs_data_complete(). So: if (skb->len > 0) return -EBADMSG; if (!last) return 0; becomes: ret = afs_data_complete(call, skb, last); if (ret < 0) return ret; (8) afs_transfer_reply() now checks the amount of data it has against the amount of data desired and the amount of data in the skb and returns an error to induce an abort if we don't get exactly what we want. Without these changes, the following oops can occasionally be observed, particularly if some printks are inserted into the delivery path: general protection fault: 0000 [#1] SMP Modules linked in: kafs(E) af_rxrpc(E) [last unloaded: af_rxrpc] CPU: 0 PID: 1305 Comm: kworker/u8:3 Tainted: G E 4.7.0-fsdevel+ #1303 Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 Workqueue: kafsd afs_async_workfn [kafs] task: ffff88040be041c0 ti: ffff88040c070000 task.ti: ffff88040c070000 RIP: 0010:[<ffffffff8108fd3c>] [<ffffffff8108fd3c>] __lock_acquire+0xcf/0x15a1 RSP: 0018:ffff88040c073bc0 EFLAGS: 00010002 RAX: 6b6b6b6b6b6b6b6b RBX: 0000000000000000 RCX: ffff88040d29a710 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff88040d29a710 RBP: ffff88040c073c70 R08: 0000000000000001 R09: 0000000000000001 R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: ffff88040be041c0 R15: ffffffff814c928f FS: 0000000000000000(0000) GS:ffff88041fa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fa4595f4750 CR3: 0000000001c14000 CR4: 00000000001406f0 Stack: 0000000000000006 000000000be04930 0000000000000000 ffff880400000000 ffff880400000000 ffffffff8108f847 ffff88040be041c0 ffffffff81050446 ffff8803fc08a920 ffff8803fc08a958 ffff88040be041c0 ffff88040c073c38 Call Trace: [<ffffffff8108f847>] ? mark_held_locks+0x5e/0x74 [<ffffffff81050446>] ? __local_bh_enable_ip+0x9b/0xa1 [<ffffffff8108f9ca>] ? trace_hardirqs_on_caller+0x16d/0x189 [<ffffffff810915f4>] lock_acquire+0x122/0x1b6 [<ffffffff810915f4>] ? lock_acquire+0x122/0x1b6 [<ffffffff814c928f>] ? skb_dequeue+0x18/0x61 [<ffffffff81609dbf>] _raw_spin_lock_irqsave+0x35/0x49 [<ffffffff814c928f>] ? skb_dequeue+0x18/0x61 [<ffffffff814c928f>] skb_dequeue+0x18/0x61 [<ffffffffa009aa92>] afs_deliver_to_call+0x344/0x39d [kafs] [<ffffffffa009ab37>] afs_process_async_call+0x4c/0xd5 [kafs] [<ffffffffa0099e9c>] afs_async_workfn+0xe/0x10 [kafs] [<ffffffff81063a3a>] process_one_work+0x29d/0x57c [<ffffffff81064ac2>] worker_thread+0x24a/0x385 [<ffffffff81064878>] ? rescuer_thread+0x2d0/0x2d0 [<ffffffff810696f5>] kthread+0xf3/0xfb [<ffffffff8160a6ff>] ret_from_fork+0x1f/0x40 [<ffffffff81069602>] ? kthread_create_on_node+0x1cf/0x1cf Signed-off-by: David Howells <dhowells@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- Documentation/networking/rxrpc.txt | 21 +-- fs/afs/cmservice.c | 78 +++++----- fs/afs/fsclient.c | 221 ++++++++++------------------- fs/afs/internal.h | 14 +- fs/afs/rxrpc.c | 73 ++++++---- fs/afs/vlclient.c | 11 +- include/net/af_rxrpc.h | 2 +- net/rxrpc/ar-internal.h | 1 + net/rxrpc/call_accept.c | 1 + net/rxrpc/call_event.c | 3 + net/rxrpc/call_object.c | 8 +- net/rxrpc/input.c | 12 +- net/rxrpc/recvmsg.c | 25 +--- net/rxrpc/skbuff.c | 41 +++++- 14 files changed, 225 insertions(+), 286 deletions(-) diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index 16a924c486bf3..70c926ae212d3 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -790,13 +790,12 @@ The kernel interface functions are as follows: Data messages can have their contents extracted with the usual bunch of socket buffer manipulation functions. A data message can be determined to be the last one in a sequence with rxrpc_kernel_is_data_last(). When a - data message has been used up, rxrpc_kernel_data_delivered() should be - called on it.. + data message has been used up, rxrpc_kernel_data_consumed() should be + called on it. - Non-data messages should be handled to rxrpc_kernel_free_skb() to dispose - of. It is possible to get extra refs on all types of message for later - freeing, but this may pin the state of a call until the message is finally - freed. + Messages should be handled to rxrpc_kernel_free_skb() to dispose of. It + is possible to get extra refs on all types of message for later freeing, + but this may pin the state of a call until the message is finally freed. (*) Accept an incoming call. @@ -821,12 +820,14 @@ The kernel interface functions are as follows: Other errors may be returned if the call had been aborted (-ECONNABORTED) or had timed out (-ETIME). - (*) Record the delivery of a data message and free it. + (*) Record the delivery of a data message. - void rxrpc_kernel_data_delivered(struct sk_buff *skb); + void rxrpc_kernel_data_consumed(struct rxrpc_call *call, + struct sk_buff *skb); - This is used to record a data message as having been delivered and to - update the ACK state for the call. The socket buffer will be freed. + This is used to record a data message as having been consumed and to + update the ACK state for the call. The message must still be passed to + rxrpc_kernel_free_skb() for disposal by the caller. (*) Free a message. diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 4b0eff6da6740..85737e96ab8b5 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -189,11 +189,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, case 1: _debug("extract FID count"); ret = afs_extract_data(call, skb, last, &call->tmp, 4); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; call->count = ntohl(call->tmp); _debug("FID count: %u", call->count); @@ -210,11 +207,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, _debug("extract FID array"); ret = afs_extract_data(call, skb, last, call->buffer, call->count * 3 * 4); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; _debug("unmarshall FID array"); call->request = kcalloc(call->count, @@ -239,11 +233,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, case 3: _debug("extract CB count"); ret = afs_extract_data(call, skb, last, &call->tmp, 4); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; tmp = ntohl(call->tmp); _debug("CB count: %u", tmp); @@ -258,11 +249,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, _debug("extract CB array"); ret = afs_extract_data(call, skb, last, call->request, call->count * 3 * 4); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; _debug("unmarshall CB array"); cb = call->request; @@ -278,9 +266,9 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, call->unmarshall++; case 5: - _debug("trailer"); - if (skb->len != 0) - return -EBADMSG; + ret = afs_data_complete(call, skb, last); + if (ret < 0) + return ret; /* Record that the message was unmarshalled successfully so * that the call destructor can know do the callback breaking @@ -294,8 +282,6 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, break; } - if (!last) - return 0; call->state = AFS_CALL_REPLYING; @@ -335,13 +321,13 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call, { struct afs_server *server; struct in_addr addr; + int ret; _enter(",{%u},%d", skb->len, last); - if (skb->len > 0) - return -EBADMSG; - if (!last) - return 0; + ret = afs_data_complete(call, skb, last); + if (ret < 0) + return ret; /* no unmarshalling required */ call->state = AFS_CALL_REPLYING; @@ -371,8 +357,10 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call, _enter(",{%u},%d", skb->len, last); + /* There are some arguments that we ignore */ + afs_data_consumed(call, skb); if (!last) - return 0; + return -EAGAIN; /* no unmarshalling required */ call->state = AFS_CALL_REPLYING; @@ -408,12 +396,13 @@ static void SRXAFSCB_Probe(struct work_struct *work) static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb, bool last) { + int ret; + _enter(",{%u},%d", skb->len, last); - if (skb->len > 0) - return -EBADMSG; - if (!last) - return 0; + ret = afs_data_complete(call, skb, last); + if (ret < 0) + return ret; /* no unmarshalling required */ call->state = AFS_CALL_REPLYING; @@ -460,10 +449,9 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb, _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); - if (skb->len > 0) - return -EBADMSG; - if (!last) - return 0; + ret = afs_data_complete(call, skb, last); + if (ret < 0) + return ret; switch (call->unmarshall) { case 0: @@ -509,8 +497,9 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb, break; } - if (!last) - return 0; + ret = afs_data_complete(call, skb, last); + if (ret < 0) + return ret; call->state = AFS_CALL_REPLYING; @@ -588,12 +577,13 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work) static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call, struct sk_buff *skb, bool last) { + int ret; + _enter(",{%u},%d", skb->len, last); - if (skb->len > 0) - return -EBADMSG; - if (!last) - return 0; + ret = afs_data_complete(call, skb, last); + if (ret < 0) + return ret; /* no unmarshalling required */ call->state = AFS_CALL_REPLYING; diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index c2e930ec28889..9312b92e54bed 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -240,15 +240,13 @@ static int afs_deliver_fs_fetch_status(struct afs_call *call, { struct afs_vnode *vnode = call->reply; const __be32 *bp; + int ret; _enter(",,%u", last); - afs_transfer_reply(call, skb); - if (!last) - return 0; - - if (call->reply_size != call->reply_max) - return -EBADMSG; + ret = afs_transfer_reply(call, skb, last); + if (ret < 0) + return ret; /* unmarshall the reply once we've received all of it */ bp = call->buffer; @@ -335,11 +333,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, case 1: _debug("extract data length (MSW)"); ret = afs_extract_data(call, skb, last, &call->tmp, 4); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; call->count = ntohl(call->tmp); _debug("DATA length MSW: %u", call->count); @@ -353,11 +348,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, case 2: _debug("extract data length"); ret = afs_extract_data(call, skb, last, &call->tmp, 4); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; call->count = ntohl(call->tmp); _debug("DATA length: %u", call->count); @@ -375,11 +367,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, ret = afs_extract_data(call, skb, last, buffer, call->count); kunmap_atomic(buffer); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; } call->offset = 0; @@ -389,11 +378,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, case 4: ret = afs_extract_data(call, skb, last, call->buffer, (21 + 3 + 6) * 4); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; bp = call->buffer; xdr_decode_AFSFetchStatus(&bp, &vnode->status, vnode, NULL); @@ -405,15 +391,12 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, call->unmarshall++; case 5: - _debug("trailer"); - if (skb->len != 0) - return -EBADMSG; + ret = afs_data_complete(call, skb, last); + if (ret < 0) + return ret; break; } - if (!last) - return 0; - if (call->count < PAGE_SIZE) { _debug("clear"); page = call->reply3; @@ -537,9 +520,8 @@ static int afs_deliver_fs_give_up_callbacks(struct afs_call *call, { _enter(",{%u},%d", skb->len, last); - if (skb->len > 0) - return -EBADMSG; /* shouldn't be any reply data */ - return 0; + /* shouldn't be any reply data */ + return afs_data_complete(call, skb, last); } /* @@ -622,15 +604,13 @@ static int afs_deliver_fs_create_vnode(struct afs_call *call, { struct afs_vnode *vnode = call->reply; const __be32 *bp; + int ret; _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); - afs_transfer_reply(call, skb); - if (!last) - return 0; - - if (call->reply_size != call->reply_max) - return -EBADMSG; + ret = afs_transfer_reply(call, skb, last); + if (ret < 0) + return ret; /* unmarshall the reply once we've received all of it */ bp = call->buffer; @@ -721,15 +701,13 @@ static int afs_deliver_fs_remove(struct afs_call *call, { struct afs_vnode *vnode = call->reply; const __be32 *bp; + int ret; _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); - afs_transfer_reply(call, skb); - if (!last) - return 0; - - if (call->reply_size != call->reply_max) - return -EBADMSG; + ret = afs_transfer_reply(call, skb, last); + if (ret < 0) + return ret; /* unmarshall the reply once we've received all of it */ bp = call->buffer; @@ -804,15 +782,13 @@ static int afs_deliver_fs_link(struct afs_call *call, { struct afs_vnode *dvnode = call->reply, *vnode = call->reply2; const __be32 *bp; + int ret; _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); - afs_transfer_reply(call, skb); - if (!last) - return 0; - - if (call->reply_size != call->reply_max) - return -EBADMSG; + ret = afs_transfer_reply(call, skb, last); + if (ret < 0) + return ret; /* unmarshall the reply once we've received all of it */ bp = call->buffer; @@ -892,15 +868,13 @@ static int afs_deliver_fs_symlink(struct afs_call *call, { struct afs_vnode *vnode = call->reply; const __be32 *bp; + int ret; _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); - afs_transfer_reply(call, skb); - if (!last) - return 0; - - if (call->reply_size != call->reply_max) - return -EBADMSG; + ret = afs_transfer_reply(call, skb, last); + if (ret < 0) + return ret; /* unmarshall the reply once we've received all of it */ bp = call->buffer; @@ -999,15 +973,13 @@ static int afs_deliver_fs_rename(struct afs_call *call, { struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2; const __be32 *bp; + int ret; _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); - afs_transfer_reply(call, skb); - if (!last) - return 0; - - if (call->reply_size != call->reply_max) - return -EBADMSG; + ret = afs_transfer_reply(call, skb, last); + if (ret < 0) + return ret; /* unmarshall the reply once we've received all of it */ bp = call->buffer; @@ -1105,20 +1077,13 @@ static int afs_deliver_fs_store_data(struct afs_call *call, { struct afs_vnode *vnode = call->reply; const __be32 *bp; + int ret; _enter(",,%u", last); - afs_transfer_reply(call, skb); - if (!last) { - _leave(" = 0 [more]"); - return 0; - } - - if (call->reply_size != call->reply_max) { - _leave(" = -EBADMSG [%u != %u]", - call->reply_size, call->reply_max); - return -EBADMSG; - } + ret = afs_transfer_reply(call, skb, last); + if (ret < 0) + return ret; /* unmarshall the reply once we've received all of it */ bp = call->buffer; @@ -1292,20 +1257,13 @@ static int afs_deliver_fs_store_status(struct afs_call *call, afs_dataversion_t *store_version; struct afs_vnode *vnode = call->reply; const __be32 *bp; + int ret; _enter(",,%u", last); - afs_transfer_reply(call, skb); - if (!last) { - _leave(" = 0 [more]"); - return 0; - } - - if (call->reply_size != call->reply_max) { - _leave(" = -EBADMSG [%u != %u]", - call->reply_size, call->reply_max); - return -EBADMSG; - } + ret = afs_transfer_reply(call, skb, last); + if (ret < 0) + return ret; /* unmarshall the reply once we've received all of it */ store_version = NULL; @@ -1504,11 +1462,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, _debug("extract status"); ret = afs_extract_data(call, skb, last, call->buffer, 12 * 4); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; bp = call->buffer; xdr_decode_AFSFetchVolumeStatus(&bp, call->reply2); @@ -1518,11 +1473,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, /* extract the volume name length */ case 2: ret = afs_extract_data(call, skb, last, &call->tmp, 4); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; call->count = ntohl(call->tmp); _debug("volname length: %u", call->count); @@ -1537,11 +1489,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, if (call->count > 0) { ret = afs_extract_data(call, skb, last, call->reply3, call->count); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; } p = call->reply3; @@ -1561,11 +1510,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, case 4: ret = afs_extract_data(call, skb, last, call->buffer, call->count); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; call->offset = 0; call->unmarshall++; @@ -1574,11 +1520,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, /* extract the offline message length */ case 5: ret = afs_extract_data(call, skb, last, &call->tmp, 4); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; call->count = ntohl(call->tmp); _debug("offline msg length: %u", call->count); @@ -1593,11 +1536,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, if (call->count > 0) { ret = afs_extract_data(call, skb, last, call->reply3, call->count); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; } p = call->reply3; @@ -1617,11 +1557,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, case 7: ret = afs_extract_data(call, skb, last, call->buffer, call->count); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; call->offset = 0; call->unmarshall++; @@ -1630,11 +1567,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, /* extract the message of the day length */ case 8: ret = afs_extract_data(call, skb, last, &call->tmp, 4); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; call->count = ntohl(call->tmp); _debug("motd length: %u", call->count); @@ -1649,11 +1583,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, if (call->count > 0) { ret = afs_extract_data(call, skb, last, call->reply3, call->count); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; } p = call->reply3; @@ -1673,26 +1604,20 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, case 10: ret = afs_extract_data(call, skb, last, call->buffer, call->count); - switch (ret) { - case 0: break; - case -EAGAIN: return 0; - default: return ret; - } + if (ret < 0) + return ret; call->offset = 0; call->unmarshall++; no_motd_padding: case 11: - _debug("trailer %d", skb->len); - if (skb->len != 0) - return -EBADMSG; + ret = afs_data_complete(call, skb, last); + if (ret < 0) + return ret; break; } - if (!last) - return 0; - _leave(" = 0 [done]"); return 0; } @@ -1764,15 +1689,13 @@ static int afs_deliver_fs_xxxx_lock(struct afs_call *call, struct sk_buff *skb, bool last) { const __be32 *bp; + int ret; _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); - afs_transfer_reply(call, skb); - if (!last) - return 0; - - if (call->reply_size != call->reply_max) - return -EBADMSG; + ret = afs_transfer_reply(call, skb, last); + if (ret < 0) + return ret; /* unmarshall the reply once we've received all of it */ bp = call->buffer; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 71d5982312f3d..df976b2a7f40f 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -609,17 +609,29 @@ extern void afs_proc_cell_remove(struct afs_cell *); */ extern int afs_open_socket(void); extern void afs_close_socket(void); +extern void afs_data_consumed(struct afs_call *, struct sk_buff *); extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t, const struct afs_wait_mode *); extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *, size_t, size_t); extern void afs_flat_call_destructor(struct afs_call *); -extern void afs_transfer_reply(struct afs_call *, struct sk_buff *); +extern int afs_transfer_reply(struct afs_call *, struct sk_buff *, bool); extern void afs_send_empty_reply(struct afs_call *); extern void afs_send_simple_reply(struct afs_call *, const void *, size_t); extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *, size_t); +static inline int afs_data_complete(struct afs_call *call, struct sk_buff *skb, + bool last) +{ + if (skb->len > 0) + return -EBADMSG; + afs_data_consumed(call, skb); + if (!last) + return -EAGAIN; + return 0; +} + /* * security.c */ diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 4832de84d52cb..14d04c848465a 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -150,10 +150,9 @@ void afs_close_socket(void) } /* - * note that the data in a socket buffer is now delivered and that the buffer - * should be freed + * Note that the data in a socket buffer is now consumed. */ -static void afs_data_delivered(struct sk_buff *skb) +void afs_data_consumed(struct afs_call *call, struct sk_buff *skb) { if (!skb) { _debug("DLVR NULL [%d]", atomic_read(&afs_outstanding_skbs)); @@ -161,9 +160,7 @@ static void afs_data_delivered(struct sk_buff *skb) } else { _debug("DLVR %p{%u} [%d]", skb, skb->mark, atomic_read(&afs_outstanding_skbs)); - if (atomic_dec_return(&afs_outstanding_skbs) == -1) - BUG(); - rxrpc_kernel_data_delivered(skb); + rxrpc_kernel_data_consumed(call->rxcall, skb); } } @@ -489,9 +486,15 @@ static void afs_deliver_to_call(struct afs_call *call) last = rxrpc_kernel_is_data_last(skb); ret = call->type->deliver(call, skb, last); switch (ret) { + case -EAGAIN: + if (last) { + _debug("short data"); + goto unmarshal_error; + } + break; case 0: - if (last && - call->state == AFS_CALL_AWAIT_REPLY) + ASSERT(last); + if (call->state == AFS_CALL_AWAIT_REPLY) call->state = AFS_CALL_COMPLETE; break; case -ENOTCONN: @@ -501,6 +504,7 @@ static void afs_deliver_to_call(struct afs_call *call) abort_code = RX_INVALID_OPERATION; goto do_abort; default: + unmarshal_error: abort_code = RXGEN_CC_UNMARSHAL; if (call->state != AFS_CALL_AWAIT_REPLY) abort_code = RXGEN_SS_UNMARSHAL; @@ -511,9 +515,7 @@ static void afs_deliver_to_call(struct afs_call *call) call->state = AFS_CALL_ERROR; break; } - afs_data_delivered(skb); - skb = NULL; - continue; + break; case RXRPC_SKB_MARK_FINAL_ACK: _debug("Rcv ACK"); call->state = AFS_CALL_COMPLETE; @@ -685,15 +687,35 @@ static void afs_process_async_call(struct afs_call *call) } /* - * empty a socket buffer into a flat reply buffer + * Empty a socket buffer into a flat reply buffer. */ -void afs_transfer_reply(struct afs_call *call, struct sk_buff *skb) +int afs_transfer_reply(struct afs_call *call, struct sk_buff *skb, bool last) { size_t len = skb->len; - if (skb_copy_bits(skb, 0, call->buffer + call->reply_size, len) < 0) - BUG(); - call->reply_size += len; + if (len > call->reply_max - call->reply_size) { + _leave(" = -EBADMSG [%zu > %u]", + len, call->reply_max - call->reply_size); + return -EBADMSG; + } + + if (len > 0) { + if (skb_copy_bits(skb, 0, call->buffer + call->reply_size, + len) < 0) + BUG(); + call->reply_size += len; + } + + afs_data_consumed(call, skb); + if (!last) + return -EAGAIN; + + if (call->reply_size != call->reply_max) { + _leave(" = -EBADMSG [%u != %u]", + call->reply_size, call->reply_max); + return -EBADMSG; + } + return 0; } /* @@ -745,7 +767,8 @@ static void afs_collect_incoming_call(struct work_struct *work) } /* - * grab the operation ID from an incoming cache manager call + * Grab the operation ID from an incoming cache manager call. The socket + * buffer is discarded on error or if we don't yet have sufficient data. */ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb, bool last) @@ -766,12 +789,9 @@ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb, call->offset += len; if (call->offset < 4) { - if (last) { - _leave(" = -EBADMSG [op ID short]"); - return -EBADMSG; - } - _leave(" = 0 [incomplete]"); - return 0; + afs_data_consumed(call, skb); + _leave(" = -EAGAIN"); + return -EAGAIN; } call->state = AFS_CALL_AWAIT_REQUEST; @@ -855,7 +875,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) } /* - * extract a piece of data from the received data socket buffers + * Extract a piece of data from the received data socket buffers. */ int afs_extract_data(struct afs_call *call, struct sk_buff *skb, bool last, void *buf, size_t count) @@ -873,10 +893,7 @@ int afs_extract_data(struct afs_call *call, struct sk_buff *skb, call->offset += len; if (call->offset < count) { - if (last) { - _leave(" = -EBADMSG [%d < %zu]", call->offset, count); - return -EBADMSG; - } + afs_data_consumed(call, skb); _leave(" = -EAGAIN"); return -EAGAIN; } diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index 340afd0cd1829..f94d1abdc3ebc 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -64,16 +64,13 @@ static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call, struct afs_cache_vlocation *entry; __be32 *bp; u32 tmp; - int loop; + int loop, ret; _enter(",,%u", last); - afs_transfer_reply(call, skb); - if (!last) - return 0; - - if (call->reply_size != call->reply_max) - return -EBADMSG; + ret = afs_transfer_reply(call, skb, last); + if (ret < 0) + return ret; /* unmarshall the reply once we've received all of it */ entry = call->reply; diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index ac1bc3c49fbdf..7b0f88699b25e 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -40,12 +40,12 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *, unsigned long, gfp_t); int rxrpc_kernel_send_data(struct rxrpc_call *, struct msghdr *, size_t); +void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); void rxrpc_kernel_abort_call(struct rxrpc_call *, u32); void rxrpc_kernel_end_call(struct rxrpc_call *); bool rxrpc_kernel_is_data_last(struct sk_buff *); u32 rxrpc_kernel_get_abort_code(struct sk_buff *); int rxrpc_kernel_get_error_number(struct sk_buff *); -void rxrpc_kernel_data_delivered(struct sk_buff *); void rxrpc_kernel_free_skb(struct sk_buff *); struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long); int rxrpc_kernel_reject_call(struct socket *); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 1bb9e7ac9e142..ff83fb1ddd47f 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -425,6 +425,7 @@ struct rxrpc_call { spinlock_t lock; rwlock_t state_lock; /* lock for state transition */ atomic_t usage; + atomic_t skb_count; /* Outstanding packets on this call */ atomic_t sequence; /* Tx data packet sequence counter */ u32 local_abort; /* local abort code */ u32 remote_abort; /* remote abort code */ diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 0b2832141bd07..9bae21e66d654 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -130,6 +130,7 @@ static int rxrpc_accept_incoming_call(struct rxrpc_local *local, call->state = RXRPC_CALL_SERVER_ACCEPTING; list_add_tail(&call->accept_link, &rx->acceptq); rxrpc_get_call(call); + atomic_inc(&call->skb_count); nsp = rxrpc_skb(notification); nsp->call = call; diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index fc32aa5764a24..f5e99163a09e0 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -460,6 +460,7 @@ static void rxrpc_insert_oos_packet(struct rxrpc_call *call, ASSERTCMP(sp->call, ==, NULL); sp->call = call; rxrpc_get_call(call); + atomic_inc(&call->skb_count); /* insert into the buffer in sequence order */ spin_lock_bh(&call->lock); @@ -734,6 +735,7 @@ static int rxrpc_process_rx_queue(struct rxrpc_call *call, skb->mark = RXRPC_SKB_MARK_FINAL_ACK; sp->call = call; rxrpc_get_call(call); + atomic_inc(&call->skb_count); spin_lock_bh(&call->lock); if (rxrpc_queue_rcv_skb(call, skb, true, true) < 0) BUG(); @@ -793,6 +795,7 @@ static int rxrpc_post_message(struct rxrpc_call *call, u32 mark, u32 error, sp->error = error; sp->call = call; rxrpc_get_call(call); + atomic_inc(&call->skb_count); spin_lock_bh(&call->lock); ret = rxrpc_queue_rcv_skb(call, skb, true, fatal); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 91287c9d01bb4..c47f14fc5e888 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -491,13 +491,6 @@ void rxrpc_release_call(struct rxrpc_call *call) spin_lock_bh(&call->lock); while ((skb = skb_dequeue(&call->rx_queue)) || (skb = skb_dequeue(&call->rx_oos_queue))) { - sp = rxrpc_skb(skb); - if (sp->call) { - ASSERTCMP(sp->call, ==, call); - rxrpc_put_call(call); - sp->call = NULL; - } - skb->destructor = NULL; spin_unlock_bh(&call->lock); _debug("- zap %s %%%u #%u", @@ -605,6 +598,7 @@ void __rxrpc_put_call(struct rxrpc_call *call) if (atomic_dec_and_test(&call->usage)) { _debug("call %d dead", call->debug_id); + WARN_ON(atomic_read(&call->skb_count) != 0); ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD); rxrpc_queue_work(&call->destroyer); } diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 991a20d250930..9e0f58edcd016 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -55,9 +55,6 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, if (test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) { _debug("already terminated"); ASSERTCMP(call->state, >=, RXRPC_CALL_COMPLETE); - skb->destructor = NULL; - sp->call = NULL; - rxrpc_put_call(call); rxrpc_free_skb(skb); return 0; } @@ -111,13 +108,7 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, ret = 0; out: - /* release the socket buffer */ - if (skb) { - skb->destructor = NULL; - sp->call = NULL; - rxrpc_put_call(call); - rxrpc_free_skb(skb); - } + rxrpc_free_skb(skb); _leave(" = %d", ret); return ret; @@ -200,6 +191,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call, sp->call = call; rxrpc_get_call(call); + atomic_inc(&call->skb_count); terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) && !(sp->hdr.flags & RXRPC_CLIENT_INITIATED)); ret = rxrpc_queue_rcv_skb(call, skb, false, terminal); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index a3fa2ed85d630..9ed66d533002d 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -203,6 +203,9 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, } /* we transferred the whole data packet */ + if (!(flags & MSG_PEEK)) + rxrpc_kernel_data_consumed(call, skb); + if (sp->hdr.flags & RXRPC_LAST_PACKET) { _debug("last"); if (rxrpc_conn_is_client(call->conn)) { @@ -359,28 +362,6 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, } -/** - * rxrpc_kernel_data_delivered - Record delivery of data message - * @skb: Message holding data - * - * Record the delivery of a data message. This permits RxRPC to keep its - * tracking correct. The socket buffer will be deleted. - */ -void rxrpc_kernel_data_delivered(struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxrpc_call *call = sp->call; - - ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv); - ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1); - call->rx_data_recv = sp->hdr.seq; - - ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten); - rxrpc_free_skb(skb); -} - -EXPORT_SYMBOL(rxrpc_kernel_data_delivered); - /** * rxrpc_kernel_is_data_last - Determine if data message is last one * @skb: Message holding data diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index eee0cfd9ac8c0..06c51d4b622d6 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -98,11 +98,39 @@ static void rxrpc_hard_ACK_data(struct rxrpc_call *call, spin_unlock_bh(&call->lock); } +/** + * rxrpc_kernel_data_consumed - Record consumption of data message + * @call: The call to which the message pertains. + * @skb: Message holding data + * + * Record the consumption of a data message and generate an ACK if appropriate. + * The call state is shifted if this was the final packet. The caller must be + * in process context with no spinlocks held. + * + * TODO: Actually generate the ACK here rather than punting this to the + * workqueue. + */ +void rxrpc_kernel_data_consumed(struct rxrpc_call *call, struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + _enter("%d,%p{%u}", call->debug_id, skb, sp->hdr.seq); + + ASSERTCMP(sp->call, ==, call); + ASSERTCMP(sp->hdr.type, ==, RXRPC_PACKET_TYPE_DATA); + + /* TODO: Fix the sequence number tracking */ + ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv); + ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1); + ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten); + + call->rx_data_recv = sp->hdr.seq; + rxrpc_hard_ACK_data(call, sp); +} +EXPORT_SYMBOL(rxrpc_kernel_data_consumed); + /* - * destroy a packet that has an RxRPC control buffer - * - advance the hard-ACK state of the parent call (done here in case something - * in the kernel bypasses recvmsg() and steals the packet directly off of the - * socket receive queue) + * Destroy a packet that has an RxRPC control buffer */ void rxrpc_packet_destructor(struct sk_buff *skb) { @@ -112,9 +140,8 @@ void rxrpc_packet_destructor(struct sk_buff *skb) _enter("%p{%p}", skb, call); if (call) { - /* send the final ACK on a client call */ - if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA) - rxrpc_hard_ACK_data(call, sp); + if (atomic_dec_return(&call->skb_count) < 0) + BUG(); rxrpc_put_call(call); sp->call = NULL; } From 94d9f1c5906b20053efe375b6d66610bca4b8b64 Mon Sep 17 00:00:00 2001 From: David Forster <dforster@brocade.com> Date: Wed, 3 Aug 2016 15:13:01 +0100 Subject: [PATCH 028/513] ipv4: panic in leaf_walk_rcu due to stale node pointer Panic occurs when issuing "cat /proc/net/route" whilst populating FIB with > 1M routes. Use of cached node pointer in fib_route_get_idx is unsafe. BUG: unable to handle kernel paging request at ffffc90001630024 IP: [<ffffffff814cf6a0>] leaf_walk_rcu+0x10/0xe0 PGD 11b08d067 PUD 11b08e067 PMD dac4b067 PTE 0 Oops: 0000 [#1] SMP Modules linked in: nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscac snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep virti acpi_cpufreq button parport_pc ppdev lp parport autofs4 ext4 crc16 mbcache jbd tio_ring virtio floppy uhci_hcd ehci_hcd usbcore usb_common libata scsi_mod CPU: 1 PID: 785 Comm: cat Not tainted 4.2.0-rc8+ #4 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 task: ffff8800da1c0bc0 ti: ffff88011a05c000 task.ti: ffff88011a05c000 RIP: 0010:[<ffffffff814cf6a0>] [<ffffffff814cf6a0>] leaf_walk_rcu+0x10/0xe0 RSP: 0018:ffff88011a05fda0 EFLAGS: 00010202 RAX: ffff8800d8a40c00 RBX: ffff8800da4af940 RCX: ffff88011a05ff20 RDX: ffffc90001630020 RSI: 0000000001013531 RDI: ffff8800da4af950 RBP: 0000000000000000 R08: ffff8800da1f9a00 R09: 0000000000000000 R10: ffff8800db45b7e4 R11: 0000000000000246 R12: ffff8800da4af950 R13: ffff8800d97a74c0 R14: 0000000000000000 R15: ffff8800d97a7480 FS: 00007fd3970e0700(0000) GS:ffff88011fd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: ffffc90001630024 CR3: 000000011a7e4000 CR4: 00000000000006e0 Stack: ffffffff814d00d3 0000000000000000 ffff88011a05ff20 ffff8800da1f9a00 ffffffff811dd8b9 0000000000000800 0000000000020000 00007fd396f35000 ffffffff811f8714 0000000000003431 ffffffff8138dce0 0000000000000f80 Call Trace: [<ffffffff814d00d3>] ? fib_route_seq_start+0x93/0xc0 [<ffffffff811dd8b9>] ? seq_read+0x149/0x380 [<ffffffff811f8714>] ? fsnotify+0x3b4/0x500 [<ffffffff8138dce0>] ? process_echoes+0x70/0x70 [<ffffffff8121cfa7>] ? proc_reg_read+0x47/0x70 [<ffffffff811bb823>] ? __vfs_read+0x23/0xd0 [<ffffffff811bbd42>] ? rw_verify_area+0x52/0xf0 [<ffffffff811bbe61>] ? vfs_read+0x81/0x120 [<ffffffff811bcbc2>] ? SyS_read+0x42/0xa0 [<ffffffff81549ab2>] ? entry_SYSCALL_64_fastpath+0x16/0x75 Code: 48 85 c0 75 d8 f3 c3 31 c0 c3 f3 c3 66 66 66 66 66 66 2e 0f 1f 84 00 00 a 04 89 f0 33 02 44 89 c9 48 d3 e8 0f b6 4a 05 49 89 RIP [<ffffffff814cf6a0>] leaf_walk_rcu+0x10/0xe0 RSP <ffff88011a05fda0> CR2: ffffc90001630024 Signed-off-by: Dave Forster <dforster@brocade.com> Acked-by: Alexander Duyck <alexander.h.duyck@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv4/fib_trie.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index d07fc076bea0a..febca0f1008cb 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2452,9 +2452,7 @@ struct fib_route_iter { static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos) { - struct fib_table *tb = iter->main_tb; struct key_vector *l, **tp = &iter->tnode; - struct trie *t; t_key key; /* use cache location of next-to-find key */ @@ -2462,8 +2460,6 @@ static struct key_vector *fib_route_get_idx(struct fib_route_iter *iter, pos -= iter->pos; key = iter->key; } else { - t = (struct trie *)tb->tb_data; - iter->tnode = t->kv; iter->pos = 0; key = 0; } @@ -2504,12 +2500,12 @@ static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos) return NULL; iter->main_tb = tb; + t = (struct trie *)tb->tb_data; + iter->tnode = t->kv; if (*pos != 0) return fib_route_get_idx(iter, *pos); - t = (struct trie *)tb->tb_data; - iter->tnode = t->kv; iter->pos = 0; iter->key = 0; From 5e3b724e2767fb6495df1dcccaf7c79585c78ae9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven <geert@linux-m68k.org> Date: Wed, 3 Aug 2016 21:42:18 +0200 Subject: [PATCH 029/513] net: dsa: b53: Add missing ULL suffix for 64-bit constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On 32-bit (e.g. with m68k-linux-gnu-gcc-4.1): drivers/net/dsa/b53/b53_common.c: In function ‘b53_arl_read’: drivers/net/dsa/b53/b53_common.c:1072: warning: integer constant is too large for ‘long’ type Fixes: 1da6df85c6fbed8f ("net: dsa: b53: Implement ARL add/del/dump operations") Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/dsa/b53/b53_regs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index 8f12bddd5dc90..a0b453ea34c90 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -258,7 +258,7 @@ * BCM5325 and BCM5365 share most definitions below */ #define B53_ARLTBL_MAC_VID_ENTRY(n) (0x10 * (n)) -#define ARLTBL_MAC_MASK 0xffffffffffff +#define ARLTBL_MAC_MASK 0xffffffffffffULL #define ARLTBL_VID_S 48 #define ARLTBL_VID_MASK_25 0xff #define ARLTBL_VID_MASK 0xfff From a6ed3ea65d9868fdf9eff84e6fe4f666b8d14b02 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov <ast@fb.com> Date: Fri, 5 Aug 2016 14:01:27 -0700 Subject: [PATCH 030/513] bpf: restore behavior of bpf_map_update_elem The introduction of pre-allocated hash elements inadvertently broke the behavior of bpf hash maps where users expected to call bpf_map_update_elem() without considering that the map can be full. Some programs do: old_value = bpf_map_lookup_elem(map, key); if (old_value) { ... prepare new_value on stack ... bpf_map_update_elem(map, key, new_value); } Before pre-alloc the update() for existing element would work even in 'map full' condition. Restore this behavior. The above program could have updated old_value in place instead of update() which would be faster and most programs use that approach, but sometimes the values are large and the programs use update() helper to do atomic replacement of the element. Note we cannot simply update element's value in-place like percpu hash map does and have to allocate extra num_possible_cpu elements and use this extra reserve when the map is full. Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- kernel/bpf/hashtab.c | 84 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 73 insertions(+), 11 deletions(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index fff3650d52fc7..570eeca7bdfa7 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -26,11 +26,18 @@ struct bpf_htab { struct bucket *buckets; void *elems; struct pcpu_freelist freelist; + void __percpu *extra_elems; atomic_t count; /* number of elements in this hashtable */ u32 n_buckets; /* number of hash buckets */ u32 elem_size; /* size of each element in bytes */ }; +enum extra_elem_state { + HTAB_NOT_AN_EXTRA_ELEM = 0, + HTAB_EXTRA_ELEM_FREE, + HTAB_EXTRA_ELEM_USED +}; + /* each htab element is struct htab_elem + key + value */ struct htab_elem { union { @@ -38,7 +45,10 @@ struct htab_elem { struct bpf_htab *htab; struct pcpu_freelist_node fnode; }; - struct rcu_head rcu; + union { + struct rcu_head rcu; + enum extra_elem_state state; + }; u32 hash; char key[0] __aligned(8); }; @@ -113,6 +123,23 @@ static int prealloc_elems_and_freelist(struct bpf_htab *htab) return err; } +static int alloc_extra_elems(struct bpf_htab *htab) +{ + void __percpu *pptr; + int cpu; + + pptr = __alloc_percpu_gfp(htab->elem_size, 8, GFP_USER | __GFP_NOWARN); + if (!pptr) + return -ENOMEM; + + for_each_possible_cpu(cpu) { + ((struct htab_elem *)per_cpu_ptr(pptr, cpu))->state = + HTAB_EXTRA_ELEM_FREE; + } + htab->extra_elems = pptr; + return 0; +} + /* Called from syscall */ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) { @@ -185,6 +212,8 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) if (percpu) cost += (u64) round_up(htab->map.value_size, 8) * num_possible_cpus() * htab->map.max_entries; + else + cost += (u64) htab->elem_size * num_possible_cpus(); if (cost >= U32_MAX - PAGE_SIZE) /* make sure page count doesn't overflow */ @@ -212,14 +241,22 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr) raw_spin_lock_init(&htab->buckets[i].lock); } + if (!percpu) { + err = alloc_extra_elems(htab); + if (err) + goto free_buckets; + } + if (!(attr->map_flags & BPF_F_NO_PREALLOC)) { err = prealloc_elems_and_freelist(htab); if (err) - goto free_buckets; + goto free_extra_elems; } return &htab->map; +free_extra_elems: + free_percpu(htab->extra_elems); free_buckets: kvfree(htab->buckets); free_htab: @@ -349,7 +386,6 @@ static void htab_elem_free(struct bpf_htab *htab, struct htab_elem *l) if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) free_percpu(htab_elem_get_ptr(l, htab->map.key_size)); kfree(l); - } static void htab_elem_free_rcu(struct rcu_head *head) @@ -370,6 +406,11 @@ static void htab_elem_free_rcu(struct rcu_head *head) static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) { + if (l->state == HTAB_EXTRA_ELEM_USED) { + l->state = HTAB_EXTRA_ELEM_FREE; + return; + } + if (!(htab->map.map_flags & BPF_F_NO_PREALLOC)) { pcpu_freelist_push(&htab->freelist, &l->fnode); } else { @@ -381,25 +422,44 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key, void *value, u32 key_size, u32 hash, - bool percpu, bool onallcpus) + bool percpu, bool onallcpus, + bool old_elem_exists) { u32 size = htab->map.value_size; bool prealloc = !(htab->map.map_flags & BPF_F_NO_PREALLOC); struct htab_elem *l_new; void __percpu *pptr; + int err = 0; if (prealloc) { l_new = (struct htab_elem *)pcpu_freelist_pop(&htab->freelist); if (!l_new) - return ERR_PTR(-E2BIG); + err = -E2BIG; } else { if (atomic_inc_return(&htab->count) > htab->map.max_entries) { atomic_dec(&htab->count); - return ERR_PTR(-E2BIG); + err = -E2BIG; + } else { + l_new = kmalloc(htab->elem_size, + GFP_ATOMIC | __GFP_NOWARN); + if (!l_new) + return ERR_PTR(-ENOMEM); } - l_new = kmalloc(htab->elem_size, GFP_ATOMIC | __GFP_NOWARN); - if (!l_new) - return ERR_PTR(-ENOMEM); + } + + if (err) { + if (!old_elem_exists) + return ERR_PTR(err); + + /* if we're updating the existing element and the hash table + * is full, use per-cpu extra elems + */ + l_new = this_cpu_ptr(htab->extra_elems); + if (l_new->state != HTAB_EXTRA_ELEM_FREE) + return ERR_PTR(-E2BIG); + l_new->state = HTAB_EXTRA_ELEM_USED; + } else { + l_new->state = HTAB_NOT_AN_EXTRA_ELEM; } memcpy(l_new->key, key, key_size); @@ -489,7 +549,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, if (ret) goto err; - l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false); + l_new = alloc_htab_elem(htab, key, value, key_size, hash, false, false, + !!l_old); if (IS_ERR(l_new)) { /* all pre-allocated elements are in use or memory exhausted */ ret = PTR_ERR(l_new); @@ -563,7 +624,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key, } } else { l_new = alloc_htab_elem(htab, key, value, key_size, - hash, true, onallcpus); + hash, true, onallcpus, false); if (IS_ERR(l_new)) { ret = PTR_ERR(l_new); goto err; @@ -652,6 +713,7 @@ static void htab_map_free(struct bpf_map *map) htab_free_elems(htab); pcpu_freelist_destroy(&htab->freelist); } + free_percpu(htab->extra_elems); kvfree(htab->buckets); kfree(htab); } From ba0cc3c153590e3d31985b8f8914d205a20b0d7a Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov <ast@fb.com> Date: Fri, 5 Aug 2016 14:01:28 -0700 Subject: [PATCH 031/513] samples/bpf: add bpf_map_update_elem() tests increase test coverage to check previously missing 'update when full' Signed-off-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- samples/bpf/test_maps.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/samples/bpf/test_maps.c b/samples/bpf/test_maps.c index 47bf0858f9e47..cce2b59751ebc 100644 --- a/samples/bpf/test_maps.c +++ b/samples/bpf/test_maps.c @@ -68,7 +68,16 @@ static void test_hashmap_sanity(int i, void *data) assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == -1 && errno == E2BIG); + /* update existing element, thought the map is full */ + key = 1; + assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == 0); + key = 2; + assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0); + key = 1; + assert(bpf_update_elem(map_fd, &key, &value, BPF_ANY) == 0); + /* check that key = 0 doesn't exist */ + key = 0; assert(bpf_delete_elem(map_fd, &key) == -1 && errno == ENOENT); /* iterate over two elements */ @@ -413,10 +422,12 @@ static void do_work(int fn, void *data) for (i = fn; i < MAP_SIZE; i += TASKS) { key = value = i; - if (do_update) + if (do_update) { assert(bpf_update_elem(map_fd, &key, &value, BPF_NOEXIST) == 0); - else + assert(bpf_update_elem(map_fd, &key, &value, BPF_EXIST) == 0); + } else { assert(bpf_delete_elem(map_fd, &key) == 0); + } } } From c518189567eaf42b2ec50a4d982484c8e38799f8 Mon Sep 17 00:00:00 2001 From: Harini Katakam <harini.katakam@xilinx.com> Date: Fri, 5 Aug 2016 10:31:58 +0530 Subject: [PATCH 032/513] net: macb: Correct CAPS mask USRIO and JUMBO CAPS have the same mask. Fix the same. Fixes: ce721a702197 ("net: ethernet: cadence-macb: Add disabled usrio caps") Cc: stable@vger.kernel.org # v4.5+ Signed-off-by: Harini Katakam <harinik@xilinx.com> Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/cadence/macb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index 36893d8958d4d..b6fcf10621b63 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -403,11 +403,11 @@ #define MACB_CAPS_USRIO_DEFAULT_IS_MII_GMII 0x00000004 #define MACB_CAPS_NO_GIGABIT_HALF 0x00000008 #define MACB_CAPS_USRIO_DISABLED 0x00000010 +#define MACB_CAPS_JUMBO 0x00000020 #define MACB_CAPS_FIFO_MODE 0x10000000 #define MACB_CAPS_GIGABIT_MODE_AVAILABLE 0x20000000 #define MACB_CAPS_SG_DISABLED 0x40000000 #define MACB_CAPS_MACB_IS_GEM 0x80000000 -#define MACB_CAPS_JUMBO 0x00000010 /* Bit manipulation macros */ #define MACB_BIT(name) \ From c5b48fa7e298b9a8968a1c1fc0ef013069ca2dd2 Mon Sep 17 00:00:00 2001 From: Lukasz Odzioba <lukasz.odzioba@intel.com> Date: Sat, 23 Jul 2016 01:44:49 +0200 Subject: [PATCH 033/513] EDAC, sb_edac: Fix channel reporting on Knights Landing On Intel Xeon Phi Knights Landing processor family the channels of the memory controller have untypical arrangement - MC0 is mapped to CH3,4,5 and MC1 is mapped to CH0,1,2. This causes the EDAC driver to report the channel name incorrectly. We missed this change earlier, so the code already contains similar comment, but the translation function is incorrect. Without this patch: errors in DIMM_A and DIMM_D were reported in DIMM_D errors in DIMM_B and DIMM_E were reported in DIMM_E errors in DIMM_C and DIMM_F were reported in DIMM_F Correct this. Hubert Chrzaniuk: - rebased to 4.8 - comments and code cleanup Fixes: d0cdf9003140 ("sb_edac: Add Knights Landing (Xeon Phi gen 2) support") Reviewed-by: Tony Luck <tony.luck@intel.com> Cc: Mauro Carvalho Chehab <mchehab@kernel.org> Cc: Hubert Chrzaniuk <hubert.chrzaniuk@intel.com> Cc: linux-edac <linux-edac@vger.kernel.org> Cc: lukasz.anaczkowski@intel.com Cc: lukasz.odzioba@intel.com Cc: mchehab@kernel.org Cc: <stable@vger.kernel.org> # v4.5.. Link: http://lkml.kernel.org/r/1469231089-22837-1-git-send-email-lukasz.odzioba@intel.com Signed-off-by: Lukasz Odzioba <lukasz.odzioba@intel.com> [ Boris: Simplify a bit by removing char mc. ] Signed-off-by: Borislav Petkov <bp@suse.de> --- drivers/edac/sb_edac.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 4fb2eb7c800d8..ce0067b7a2f67 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -552,9 +552,9 @@ static const struct pci_id_table pci_dev_descr_haswell_table[] = { /* Knight's Landing Support */ /* * KNL's memory channels are swizzled between memory controllers. - * MC0 is mapped to CH3,5,6 and MC1 is mapped to CH0,1,2 + * MC0 is mapped to CH3,4,5 and MC1 is mapped to CH0,1,2 */ -#define knl_channel_remap(channel) ((channel + 3) % 6) +#define knl_channel_remap(mc, chan) ((mc) ? (chan) : (chan) + 3) /* Memory controller, TAD tables, error injection - 2-8-0, 2-9-0 (2 of these) */ #define PCI_DEVICE_ID_INTEL_KNL_IMC_MC 0x7840 @@ -1286,7 +1286,7 @@ static u32 knl_get_mc_route(int entry, u32 reg) mc = GET_BITFIELD(reg, entry*3, (entry*3)+2); chan = GET_BITFIELD(reg, (entry*2) + 18, (entry*2) + 18 + 1); - return knl_channel_remap(mc*3 + chan); + return knl_channel_remap(mc, chan); } /* @@ -2997,8 +2997,15 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, } else { char A = *("A"); - channel = knl_channel_remap(channel); + /* + * Reported channel is in range 0-2, so we can't map it + * back to mc. To figure out mc we check machine check + * bank register that reported this error. + * bank15 means mc0 and bank16 means mc1. + */ + channel = knl_channel_remap(m->bank == 16, channel); channel_mask = 1 << channel; + snprintf(msg, sizeof(msg), "%s%s err_code:%04x:%04x channel:%d (DIMM_%c)", overflow ? " OVERFLOW" : "", From 707e6835f89419ff1c05e828c2d9939fd95a7ad8 Mon Sep 17 00:00:00 2001 From: Liping Zhang <liping.zhang@spreadtrum.com> Date: Sat, 23 Jul 2016 22:16:56 +0800 Subject: [PATCH 034/513] netfilter: nf_ct_h323: do not re-activate already expired timer Commit 96d1327ac2e3 ("netfilter: h323: Use mod_timer instead of set_expect_timeout") just simplify the source codes if (!del_timer(&exp->timeout)) return 0; add_timer(&exp->timeout); to mod_timer(&exp->timeout, jiffies + info->timeout * HZ); This is not correct, and introduce a race codition: CPU0 CPU1 - timer expire process_rcf expectation_timed_out lock(exp_lock) - find_exp waiting exp_lock... re-activate timer!! waiting exp_lock... unlock(exp_lock) lock(exp_lock) - unlink expect - free(expect) - unlock(exp_lock) So when the timer expires again, we will access the memory that was already freed. Replace mod_timer with mod_timer_pending here to fix this problem. Fixes: 96d1327ac2e3 ("netfilter: h323: Use mod_timer instead of set_expect_timeout") Cc: Gao Feng <fgao@ikuai8.com> Signed-off-by: Liping Zhang <liping.zhang@spreadtrum.com> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- net/netfilter/nf_conntrack_h323_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index bb77a97961bfd..5c0db5c64734a 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -1473,7 +1473,8 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, "timeout to %u seconds for", info->timeout); nf_ct_dump_tuple(&exp->tuple); - mod_timer(&exp->timeout, jiffies + info->timeout * HZ); + mod_timer_pending(&exp->timeout, + jiffies + info->timeout * HZ); } spin_unlock_bh(&nf_conntrack_expect_lock); } From 2c86943c20e375b0fe562af0626f2e5461d8d203 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso <pablo@netfilter.org> Date: Sun, 24 Jul 2016 19:53:19 +0200 Subject: [PATCH 035/513] netfilter: nf_tables: s/MFT_REG32_01/NFT_REG32_01 MFT_REG32_01 is a typo, rename this to NFT_REG32_01. Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- include/uapi/linux/netfilter/nf_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 01751faccaf87..c674ba2563b7d 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -24,7 +24,7 @@ enum nft_registers { __NFT_REG_MAX, NFT_REG32_00 = 8, - MFT_REG32_01, + NFT_REG32_01, NFT_REG32_02, NFT_REG32_03, NFT_REG32_04, From 925baf394bb0e65d85330afe467393f66f910353 Mon Sep 17 00:00:00 2001 From: Baruch Siach <baruch@tkos.co.il> Date: Wed, 27 Jul 2016 08:41:49 +0300 Subject: [PATCH 036/513] tools/gpio: fix gpio-event-mon header comment Fixes: 97f69747d8b1 ('tools/gpio: add the gpio-event-mon tool') Signed-off-by: Baruch Siach <baruch@tkos.co.il> Signed-off-by: Linus Walleij <linus.walleij@linaro.org> --- tools/gpio/gpio-event-mon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c index 448ed96b3b4f8..1c14c25951583 100644 --- a/tools/gpio/gpio-event-mon.c +++ b/tools/gpio/gpio-event-mon.c @@ -1,5 +1,5 @@ /* - * gpio-hammer - example swiss army knife to shake GPIO lines on a system + * gpio-event-mon - monitor GPIO line events from userspace * * Copyright (C) 2016 Linus Walleij * From c1eda3c6394f805886b2afa8c7ea5e04305ec698 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso <pablo@netfilter.org> Date: Mon, 1 Aug 2016 13:13:08 +0200 Subject: [PATCH 037/513] netfilter: nft_rbtree: ignore inactive matching element with no descendants If we find a matching element that is inactive with no descendants, we jump to the found label, then crash because of nul-dereference on the left branch. Fix this by checking that the element is active and not an interval end and skipping the logic that only applies to the tree iteration. Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> Tested-by: Anders K. Pedersen <akp@akp.dk> --- net/netfilter/nft_rbtree.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index 6473936d05c67..ffe9ae062d23e 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -70,7 +70,6 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, } else if (d > 0) parent = parent->rb_right; else { -found: if (!nft_set_elem_active(&rbe->ext, genmask)) { parent = parent->rb_left; continue; @@ -84,9 +83,12 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, } } - if (set->flags & NFT_SET_INTERVAL && interval != NULL) { - rbe = interval; - goto found; + if (set->flags & NFT_SET_INTERVAL && interval != NULL && + nft_set_elem_active(&interval->ext, genmask) && + !nft_rbtree_interval_end(interval)) { + spin_unlock_bh(&nft_rbtree_lock); + *ext = &interval->ext; + return true; } out: spin_unlock_bh(&nft_rbtree_lock); From cf1b18030de29e4e5b0a57695ae5db4a89da0ff7 Mon Sep 17 00:00:00 2001 From: Lubomir Rintel <lkundrak@v3.sk> Date: Sun, 24 Jul 2016 13:53:30 +0200 Subject: [PATCH 038/513] USB: serial: option: add D-Link DWM-156/A3 The device has four interfaces; the three serial ports ought to be handled by this driver: 00 Diagnostic interface serial port 01 NMEA device serial port 02 Mass storage (sd card) 03 Modem serial port The other product ids listed in the Windows driver are present already. Signed-off-by: Lubomir Rintel <lkundrak@v3.sk> Cc: stable <stable@vger.kernel.org> Signed-off-by: Johan Hovold <johan@kernel.org> --- drivers/usb/serial/option.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 8e07536c233a0..0338851e31a2b 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1966,6 +1966,7 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&net_intf4_blacklist }, { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/A3 */ { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) }, /* OLICARD300 - MT6225 */ { USB_DEVICE(INOVIA_VENDOR_ID, INOVIA_SEW858) }, { USB_DEVICE(VIATELECOM_VENDOR_ID, VIATELECOM_PRODUCT_CDS7) }, From 0d35d0815b19216f852f257afe420f7c7d182780 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@c-s.fr> Date: Wed, 3 Aug 2016 12:41:40 +0200 Subject: [PATCH 039/513] netfilter: nf_conntrack_sip: CSeq 0 is a valid CSeq Do not drop packet when CSeq is 0 as 0 is also a valid value for CSeq. simple_strtoul() will return 0 either when all digits are 0 or if there are no digits at all. Therefore when simple_strtoul() returns 0 we check if first character is digit 0 or not. Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- net/netfilter/nf_conntrack_sip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 8d9db9d4702b0..7d77217de6a3b 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1383,7 +1383,7 @@ static int process_sip_response(struct sk_buff *skb, unsigned int protoff, return NF_DROP; } cseq = simple_strtoul(*dptr + matchoff, NULL, 10); - if (!cseq) { + if (!cseq && *(*dptr + matchoff) != '0') { nf_ct_helper_log(skb, ct, "cannot get cseq"); return NF_DROP; } @@ -1446,7 +1446,7 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff, return NF_DROP; } cseq = simple_strtoul(*dptr + matchoff, NULL, 10); - if (!cseq) { + if (!cseq && *(*dptr + matchoff) != '0') { nf_ct_helper_log(skb, ct, "cannot get cseq"); return NF_DROP; } From 6977495c06f7f47636a076ee5a0ca571279d9697 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Robert=20Deli=C3=ABn?= <robert@delien.nl> Date: Thu, 28 Jul 2016 18:52:55 +0000 Subject: [PATCH 040/513] USB: serial: ftdi_sio: add PIDs for Ivium Technologies devices Ivium Technologies uses the FTDI VID with custom PIDs for their line of electrochemical interfaces and the PalmSens they developed for PalmSens BV. Signed-off-by: Robert Delien <robert@delien.nl> Cc: stable <stable@vger.kernel.org> Signed-off-by: Johan Hovold <johan@kernel.org> --- drivers/usb/serial/ftdi_sio.c | 2 ++ drivers/usb/serial/ftdi_sio_ids.h | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 00820809139a0..8962cdcfe9ff5 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -648,6 +648,8 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(FTDI_VID, FTDI_ELV_TFD128_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_FM3RX_PID) }, { USB_DEVICE(FTDI_VID, FTDI_ELV_WS777_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_PALMSENS_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_IVIUM_XSTAT_PID) }, { USB_DEVICE(FTDI_VID, LINX_SDMUSBQSS_PID) }, { USB_DEVICE(FTDI_VID, LINX_MASTERDEVEL2_PID) }, { USB_DEVICE(FTDI_VID, LINX_FUTURE_0_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index c5d6c1e73e8e0..067e3a6300d15 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -405,6 +405,12 @@ #define FTDI_4N_GALAXY_DE_2_PID 0xF3C1 #define FTDI_4N_GALAXY_DE_3_PID 0xF3C2 +/* + * Ivium Technologies product IDs + */ +#define FTDI_PALMSENS_PID 0xf440 +#define FTDI_IVIUM_XSTAT_PID 0xf441 + /* * Linx Technologies product ids */ From ae34d12cc1e212ffcd92e069030e54dae69c832f Mon Sep 17 00:00:00 2001 From: "Sheng-Hui J. Chu" <s.jeffrey.chu@gmail.com> Date: Thu, 28 Jul 2016 17:01:45 -0400 Subject: [PATCH 041/513] USB: serial: ftdi_sio: add device ID for WICED USB UART dev board BCM20706V2_EVAL is a WICED dev board designed with FT2232H USB 2.0 UART/FIFO IC. To support BCM920706V2_EVAL dev board for WICED development on Linux. Add the VID(0a5c) and PID(6422) to ftdi_sio driver to allow loading ftdi_sio for this board. Signed-off-by: Sheng-Hui J. Chu <s.jeffrey.chu@gmail.com> Cc: stable <stable@vger.kernel.org> Signed-off-by: Johan Hovold <johan@kernel.org> --- drivers/usb/serial/ftdi_sio.c | 1 + drivers/usb/serial/ftdi_sio_ids.h | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 8962cdcfe9ff5..b2d767e743fc2 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1010,6 +1010,7 @@ static const struct usb_device_id id_table_combined[] = { { USB_DEVICE(ICPDAS_VID, ICPDAS_I7560U_PID) }, { USB_DEVICE(ICPDAS_VID, ICPDAS_I7561U_PID) }, { USB_DEVICE(ICPDAS_VID, ICPDAS_I7563U_PID) }, + { USB_DEVICE(WICED_VID, WICED_USB20706V2_PID) }, { } /* Terminating entry */ }; diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index 067e3a6300d15..f87a938cf0057 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -678,6 +678,12 @@ #define INTREPID_VALUECAN_PID 0x0601 #define INTREPID_NEOVI_PID 0x0701 +/* + * WICED USB UART + */ +#define WICED_VID 0x0A5C +#define WICED_USB20706V2_PID 0x6422 + /* * Definitions for ID TECH (www.idt-net.com) devices */ From 01d7956b58e644ea0d2e8d9340c5727a8fc39d70 Mon Sep 17 00:00:00 2001 From: Daniele Palmas <dnlplm@gmail.com> Date: Tue, 2 Aug 2016 11:29:25 +0200 Subject: [PATCH 042/513] USB: serial: option: add support for Telit LE920A4 This patch adds a set of compositions for Telit LE920A4. Compositions in short are: 0x1207: tty + tty 0x1208: tty + adb + tty + tty 0x1211: tty + adb + ecm 0x1212: tty + adb 0x1213: ecm + tty 0x1214: tty + adb + ecm + tty telit_le922_blacklist_usbcfg3 is reused for compositions 0x1211 and 0x1214 due to the same interfaces positions. Signed-off-by: Daniele Palmas <dnlplm@gmail.com> Cc: stable <stable@vger.kernel.org> Signed-off-by: Johan Hovold <johan@kernel.org> --- drivers/usb/serial/option.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 0338851e31a2b..bc472584a229d 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -274,6 +274,12 @@ static void option_instat_callback(struct urb *urb); #define TELIT_PRODUCT_LE920 0x1200 #define TELIT_PRODUCT_LE910 0x1201 #define TELIT_PRODUCT_LE910_USBCFG4 0x1206 +#define TELIT_PRODUCT_LE920A4_1207 0x1207 +#define TELIT_PRODUCT_LE920A4_1208 0x1208 +#define TELIT_PRODUCT_LE920A4_1211 0x1211 +#define TELIT_PRODUCT_LE920A4_1212 0x1212 +#define TELIT_PRODUCT_LE920A4_1213 0x1213 +#define TELIT_PRODUCT_LE920A4_1214 0x1214 /* ZTE PRODUCTS */ #define ZTE_VENDOR_ID 0x19d2 @@ -628,6 +634,11 @@ static const struct option_blacklist_info telit_le920_blacklist = { .reserved = BIT(1) | BIT(5), }; +static const struct option_blacklist_info telit_le920a4_blacklist_1 = { + .sendsetup = BIT(0), + .reserved = BIT(1), +}; + static const struct option_blacklist_info telit_le922_blacklist_usbcfg0 = { .sendsetup = BIT(2), .reserved = BIT(0) | BIT(1) | BIT(3), @@ -1203,6 +1214,16 @@ static const struct usb_device_id option_ids[] = { .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 }, { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920), .driver_info = (kernel_ulong_t)&telit_le920_blacklist }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1207) }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1208), + .driver_info = (kernel_ulong_t)&telit_le920a4_blacklist_1 }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1211), + .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1212), + .driver_info = (kernel_ulong_t)&telit_le920a4_blacklist_1 }, + { USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1213, 0xff) }, + { USB_DEVICE(TELIT_VENDOR_ID, TELIT_PRODUCT_LE920A4_1214), + .driver_info = (kernel_ulong_t)&telit_le922_blacklist_usbcfg3 }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, ZTE_PRODUCT_MF622, 0xff, 0xff, 0xff) }, /* ZTE WCDMA products */ { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x0002, 0xff, 0xff, 0xff), .driver_info = (kernel_ulong_t)&net_intf1_blacklist }, From 647024a7df36014bbc4479d92d88e6b77c0afcf6 Mon Sep 17 00:00:00 2001 From: Alexey Klimov <klimov.linux@gmail.com> Date: Mon, 8 Aug 2016 02:34:46 +0100 Subject: [PATCH 043/513] USB: serial: fix memleak in driver-registration error path udriver struct allocated by kzalloc() will not be freed if usb_register() and next calls fail. This patch fixes this by adding one more step with kfree(udriver) in error path. Signed-off-by: Alexey Klimov <klimov.linux@gmail.com> Acked-by: Alan Stern <stern@rowland.harvard.edu> Cc: stable <stable@vger.kernel.org> Signed-off-by: Johan Hovold <johan@kernel.org> --- drivers/usb/serial/usb-serial.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c index b1b9bac440161..d213cf44a7e45 100644 --- a/drivers/usb/serial/usb-serial.c +++ b/drivers/usb/serial/usb-serial.c @@ -1433,7 +1433,7 @@ int usb_serial_register_drivers(struct usb_serial_driver *const serial_drivers[] rc = usb_register(udriver); if (rc) - return rc; + goto failed_usb_register; for (sd = serial_drivers; *sd; ++sd) { (*sd)->usb_driver = udriver; @@ -1451,6 +1451,8 @@ int usb_serial_register_drivers(struct usb_serial_driver *const serial_drivers[] while (sd-- > serial_drivers) usb_serial_deregister(*sd); usb_deregister(udriver); +failed_usb_register: + kfree(udriver); return rc; } EXPORT_SYMBOL_GPL(usb_serial_register_drivers); From bf47dc572aaf53c3f2311c71e03ec01be3131fd6 Mon Sep 17 00:00:00 2001 From: Sascha Silbe <silbe@linux.vnet.ibm.com> Date: Mon, 4 Jul 2016 17:16:47 +0200 Subject: [PATCH 044/513] s390: clarify compressed image code path The way the decompressor is hooked into the start-up code is rather subtle, with a mix of multiply-defined symbols and hardcoded address literals. Add some comments at the junction points to clarify how it works. Signed-off-by: Sascha Silbe <silbe@linux.vnet.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- arch/s390/boot/compressed/head.S | 11 ++++++++--- arch/s390/kernel/head.S | 4 +++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/arch/s390/boot/compressed/head.S b/arch/s390/boot/compressed/head.S index f86a4eef28a92..28c4f96a2d9ce 100644 --- a/arch/s390/boot/compressed/head.S +++ b/arch/s390/boot/compressed/head.S @@ -21,16 +21,21 @@ ENTRY(startup_continue) lg %r15,.Lstack-.LPG1(%r13) aghi %r15,-160 brasl %r14,decompress_kernel - # setup registers for memory mover & branch to target + # Set up registers for memory mover. We move the decompressed image to + # 0x11000, starting at offset 0x11000 in the decompressed image so + # that code living at 0x11000 in the image will end up at 0x11000 in + # memory. lgr %r4,%r2 lg %r2,.Loffset-.LPG1(%r13) la %r4,0(%r2,%r4) lg %r3,.Lmvsize-.LPG1(%r13) lgr %r5,%r3 - # move the memory mover someplace safe + # Move the memory mover someplace safe so it doesn't overwrite itself. la %r1,0x200 mvc 0(mover_end-mover,%r1),mover-.LPG1(%r13) - # decompress image is started at 0x11000 + # When the memory mover is done we pass control to + # arch/s390/kernel/head64.S:startup_continue which lives at 0x11000 in + # the decompressed image. lgr %r6,%r2 br %r1 mover: diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S index 56e4d8234ef2e..4431905f8cfa2 100644 --- a/arch/s390/kernel/head.S +++ b/arch/s390/kernel/head.S @@ -309,7 +309,9 @@ ENTRY(startup_kdump) l %r15,.Lstack-.LPG0(%r13) ahi %r15,-STACK_FRAME_OVERHEAD brasl %r14,verify_facilities - /* Continue with startup code in head64.S */ +# For uncompressed images, continue in +# arch/s390/kernel/head64.S. For compressed images, continue in +# arch/s390/boot/compressed/head.S. jg startup_continue .Lstack: From 134a24cd895c5d138d639a0741ad27e389cd4562 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger <borntraeger@de.ibm.com> Date: Wed, 3 Aug 2016 17:55:00 +0200 Subject: [PATCH 045/513] s390/crc32-vx: Fix checksum calculation for small sizes The current prealign logic will fail for sizes < alignment, as the new datalen passed to the vector function is smaller than zero. Being a size_t this gets wrapped to a huge number causing memory overruns and wrong data. Let's add an early exit if the size is smaller than the minimal size with alignment. This will also avoid calling the software fallback twice for all sizes smaller than the minimum size (prealign + remaining) Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Fixes: f848dbd3bc1a ("s390/crc32-vx: add crypto API module for optimized CRC-32 algorithms") Reviewed-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- arch/s390/crypto/crc32-vx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c index 577ae1d4ae894..2bad9d8370299 100644 --- a/arch/s390/crypto/crc32-vx.c +++ b/arch/s390/crypto/crc32-vx.c @@ -51,6 +51,9 @@ u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size); struct kernel_fpu vxstate; \ unsigned long prealign, aligned, remaining; \ \ + if (datalen < VX_MIN_LEN + VX_ALIGN_MASK) \ + return ___crc32_sw(crc, data, datalen); \ + \ if ((unsigned long)data & VX_ALIGN_MASK) { \ prealign = VX_ALIGNMENT - \ ((unsigned long)data & VX_ALIGN_MASK); \ @@ -59,9 +62,6 @@ u32 crc32c_le_vgfm_16(u32 crc, unsigned char const *buf, size_t size); data = (void *)((unsigned long)data + prealign); \ } \ \ - if (datalen < VX_MIN_LEN) \ - return ___crc32_sw(crc, data, datalen); \ - \ aligned = datalen & ~VX_ALIGN_MASK; \ remaining = datalen & VX_ALIGN_MASK; \ \ From e2efc424549eb66d227dfe6d073d1789a24bc698 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger <borntraeger@de.ibm.com> Date: Thu, 4 Aug 2016 10:24:30 +0200 Subject: [PATCH 046/513] s390/lib: fix memcmp and strstr if two string compare equal the clcle instruction will update the string addresses to point _after_ the string. This might already be on a different page, so we should not use these pointer to calculate the difference as in that case the calculation of the difference can cause oopses. The return value of memcmp does not need the difference, we can just reuse the condition code and return for CC=1 (All bytes compared, first operand low) -1 and for CC=2 (All bytes compared, first operand high) +1 strstr also does not need the diff. While fixing this, make the common function clcle "correct on its own" by using l1 instead of l2 for the first length. strstr will call this with l2 for both strings. Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Fixes: db7f5eef3dc0 ("s390/lib: use basic blocks for inline assemblies") Reviewed-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- arch/s390/lib/string.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/arch/s390/lib/string.c b/arch/s390/lib/string.c index e390bbb16443d..48352bffbc929 100644 --- a/arch/s390/lib/string.c +++ b/arch/s390/lib/string.c @@ -237,11 +237,10 @@ char * strrchr(const char * s, int c) EXPORT_SYMBOL(strrchr); static inline int clcle(const char *s1, unsigned long l1, - const char *s2, unsigned long l2, - int *diff) + const char *s2, unsigned long l2) { register unsigned long r2 asm("2") = (unsigned long) s1; - register unsigned long r3 asm("3") = (unsigned long) l2; + register unsigned long r3 asm("3") = (unsigned long) l1; register unsigned long r4 asm("4") = (unsigned long) s2; register unsigned long r5 asm("5") = (unsigned long) l2; int cc; @@ -252,7 +251,6 @@ static inline int clcle(const char *s1, unsigned long l1, " srl %0,28" : "=&d" (cc), "+a" (r2), "+a" (r3), "+a" (r4), "+a" (r5) : : "cc"); - *diff = *(char *)r2 - *(char *)r4; return cc; } @@ -270,9 +268,9 @@ char * strstr(const char * s1,const char * s2) return (char *) s1; l1 = __strend(s1) - s1; while (l1-- >= l2) { - int cc, dummy; + int cc; - cc = clcle(s1, l1, s2, l2, &dummy); + cc = clcle(s1, l2, s2, l2); if (!cc) return (char *) s1; s1++; @@ -313,11 +311,11 @@ EXPORT_SYMBOL(memchr); */ int memcmp(const void *cs, const void *ct, size_t n) { - int ret, diff; + int ret; - ret = clcle(cs, n, ct, n, &diff); + ret = clcle(cs, n, ct, n); if (ret) - ret = diff; + ret = ret == 1 ? -1 : 1; return ret; } EXPORT_SYMBOL(memcmp); From 6e127efeeae5bbbc323e028d96c43dec0cc7b1b8 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger <borntraeger@de.ibm.com> Date: Thu, 4 Aug 2016 12:51:09 +0200 Subject: [PATCH 047/513] s390/config: make the vector optimized crc function builtin For all configs with CONFIG_BTRFS_FS = y we should also make the optimized crc module builtin. Otherwise early mounts will fall back to the software variant. Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- arch/s390/configs/default_defconfig | 2 +- arch/s390/configs/gcov_defconfig | 2 +- arch/s390/configs/performance_defconfig | 2 +- arch/s390/defconfig | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig index 889ea34502109..26e0c7f088141 100644 --- a/arch/s390/configs/default_defconfig +++ b/arch/s390/configs/default_defconfig @@ -678,7 +678,7 @@ CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m -CONFIG_CRYPTO_CRC32_S390=m +CONFIG_CRYPTO_CRC32_S390=y CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m CONFIG_X509_CERTIFICATE_PARSER=m diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig index 1bcfd764910a7..24879dab47bc1 100644 --- a/arch/s390/configs/gcov_defconfig +++ b/arch/s390/configs/gcov_defconfig @@ -616,7 +616,7 @@ CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m -CONFIG_CRYPTO_CRC32_S390=m +CONFIG_CRYPTO_CRC32_S390=y CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m CONFIG_X509_CERTIFICATE_PARSER=m diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index 13ff090139c86..a5c1e5f2a0cab 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -615,7 +615,7 @@ CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m CONFIG_CRYPTO_GHASH_S390=m -CONFIG_CRYPTO_CRC32_S390=m +CONFIG_CRYPTO_CRC32_S390=y CONFIG_ASYMMETRIC_KEY_TYPE=y CONFIG_ASYMMETRIC_PUBLIC_KEY_SUBTYPE=m CONFIG_X509_CERTIFICATE_PARSER=m diff --git a/arch/s390/defconfig b/arch/s390/defconfig index ccccebeeaaf67..73610f2e3b4fb 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -234,7 +234,7 @@ CONFIG_CRYPTO_SHA256_S390=m CONFIG_CRYPTO_SHA512_S390=m CONFIG_CRYPTO_DES_S390=m CONFIG_CRYPTO_AES_S390=m -CONFIG_CRYPTO_CRC32_S390=m +CONFIG_CRYPTO_CRC32_S390=y CONFIG_CRC7=m # CONFIG_XZ_DEC_X86 is not set # CONFIG_XZ_DEC_POWERPC is not set From 7de6a63ca2ff21ea67e50a546ca0e9bb6e2b0718 Mon Sep 17 00:00:00 2001 From: Sebastian Ott <sebott@linux.vnet.ibm.com> Date: Thu, 28 Jul 2016 20:26:37 +0200 Subject: [PATCH 048/513] s390/cio: stop using subchannel_id from ccw_device_private We want to get rid of the copy of struct subchannel_id maintained in ccw_device_private, so obtain it from the subchannel directly. Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com> Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- drivers/s390/cio/device_status.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/s390/cio/device_status.c b/drivers/s390/cio/device_status.c index 15b56a15db151..9bc3512374c90 100644 --- a/drivers/s390/cio/device_status.c +++ b/drivers/s390/cio/device_status.c @@ -26,6 +26,7 @@ static void ccw_device_msg_control_check(struct ccw_device *cdev, struct irb *irb) { + struct subchannel *sch = to_subchannel(cdev->dev.parent); char dbf_text[15]; if (!scsw_is_valid_cstat(&irb->scsw) || @@ -36,10 +37,10 @@ ccw_device_msg_control_check(struct ccw_device *cdev, struct irb *irb) "received" " ... device %04x on subchannel 0.%x.%04x, dev_stat " ": %02X sch_stat : %02X\n", - cdev->private->dev_id.devno, cdev->private->schid.ssid, - cdev->private->schid.sch_no, + cdev->private->dev_id.devno, sch->schid.ssid, + sch->schid.sch_no, scsw_dstat(&irb->scsw), scsw_cstat(&irb->scsw)); - sprintf(dbf_text, "chk%x", cdev->private->schid.sch_no); + sprintf(dbf_text, "chk%x", sch->schid.sch_no); CIO_TRACE_EVENT(0, dbf_text); CIO_HEX_EVENT(0, irb, sizeof(struct irb)); } From 9080c92494f0d7e2bca1197bbddcc417117057c3 Mon Sep 17 00:00:00 2001 From: Sebastian Ott <sebott@linux.vnet.ibm.com> Date: Thu, 28 Jul 2016 20:30:31 +0200 Subject: [PATCH 049/513] s390/qdio: obtain subchannel_id via ccw_device_get_schid() We want to get rid of the copy of struct subchannel_id maintained in ccw_device_private, so obtain it using ccw_device_get_schid(). Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com> Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com> Reviewed-by: Ursula Braun <ubraun@linux.vnet.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- drivers/s390/cio/qdio_main.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 4bb5262f7aee7..94e96207d42dd 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -1066,10 +1066,12 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, struct irb *irb) { struct qdio_irq *irq_ptr = cdev->private->qdio_data; + struct subchannel_id schid; int cstat, dstat; if (!intparm || !irq_ptr) { - DBF_ERROR("qint:%4x", cdev->private->schid.sch_no); + ccw_device_get_schid(cdev, &schid); + DBF_ERROR("qint:%4x", schid.sch_no); return; } @@ -1122,12 +1124,14 @@ void qdio_int_handler(struct ccw_device *cdev, unsigned long intparm, int qdio_get_ssqd_desc(struct ccw_device *cdev, struct qdio_ssqd_desc *data) { + struct subchannel_id schid; if (!cdev || !cdev->private) return -EINVAL; - DBF_EVENT("get ssqd:%4x", cdev->private->schid.sch_no); - return qdio_setup_get_ssqd(NULL, &cdev->private->schid, data); + ccw_device_get_schid(cdev, &schid); + DBF_EVENT("get ssqd:%4x", schid.sch_no); + return qdio_setup_get_ssqd(NULL, &schid, data); } EXPORT_SYMBOL_GPL(qdio_get_ssqd_desc); @@ -1154,6 +1158,7 @@ static void qdio_shutdown_queues(struct ccw_device *cdev) int qdio_shutdown(struct ccw_device *cdev, int how) { struct qdio_irq *irq_ptr = cdev->private->qdio_data; + struct subchannel_id schid; int rc; unsigned long flags; @@ -1161,7 +1166,8 @@ int qdio_shutdown(struct ccw_device *cdev, int how) return -ENODEV; WARN_ON_ONCE(irqs_disabled()); - DBF_EVENT("qshutdown:%4x", cdev->private->schid.sch_no); + ccw_device_get_schid(cdev, &schid); + DBF_EVENT("qshutdown:%4x", schid.sch_no); mutex_lock(&irq_ptr->setup_mutex); /* @@ -1228,11 +1234,13 @@ EXPORT_SYMBOL_GPL(qdio_shutdown); int qdio_free(struct ccw_device *cdev) { struct qdio_irq *irq_ptr = cdev->private->qdio_data; + struct subchannel_id schid; if (!irq_ptr) return -ENODEV; - DBF_EVENT("qfree:%4x", cdev->private->schid.sch_no); + ccw_device_get_schid(cdev, &schid); + DBF_EVENT("qfree:%4x", schid.sch_no); DBF_DEV_EVENT(DBF_ERR, irq_ptr, "dbf abandoned"); mutex_lock(&irq_ptr->setup_mutex); @@ -1251,9 +1259,11 @@ EXPORT_SYMBOL_GPL(qdio_free); */ int qdio_allocate(struct qdio_initialize *init_data) { + struct subchannel_id schid; struct qdio_irq *irq_ptr; - DBF_EVENT("qallocate:%4x", init_data->cdev->private->schid.sch_no); + ccw_device_get_schid(init_data->cdev, &schid); + DBF_EVENT("qallocate:%4x", schid.sch_no); if ((init_data->no_input_qs && !init_data->input_handler) || (init_data->no_output_qs && !init_data->output_handler)) @@ -1331,12 +1341,14 @@ static void qdio_detect_hsicq(struct qdio_irq *irq_ptr) */ int qdio_establish(struct qdio_initialize *init_data) { - struct qdio_irq *irq_ptr; struct ccw_device *cdev = init_data->cdev; + struct subchannel_id schid; + struct qdio_irq *irq_ptr; unsigned long saveflags; int rc; - DBF_EVENT("qestablish:%4x", cdev->private->schid.sch_no); + ccw_device_get_schid(cdev, &schid); + DBF_EVENT("qestablish:%4x", schid.sch_no); irq_ptr = cdev->private->qdio_data; if (!irq_ptr) @@ -1407,11 +1419,13 @@ EXPORT_SYMBOL_GPL(qdio_establish); */ int qdio_activate(struct ccw_device *cdev) { + struct subchannel_id schid; struct qdio_irq *irq_ptr; int rc; unsigned long saveflags; - DBF_EVENT("qactivate:%4x", cdev->private->schid.sch_no); + ccw_device_get_schid(cdev, &schid); + DBF_EVENT("qactivate:%4x", schid.sch_no); irq_ptr = cdev->private->qdio_data; if (!irq_ptr) From 1ab50a99f8cc6f4d761421d55a4044ad7f185cb5 Mon Sep 17 00:00:00 2001 From: Sebastian Ott <sebott@linux.vnet.ibm.com> Date: Fri, 29 Jul 2016 10:43:53 +0200 Subject: [PATCH 050/513] s390/cio: remove subchannel_id from ccw_device_private A copy of struct subchannel_id is maintained in ccw_device_private. The subchannel id is a property of the subchannel. The additional copy is not needed. Internal users can obtain it from subchannel.schid - device drivers can use ccw_device_get_schid(). Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com> Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- drivers/s390/cio/device.c | 2 -- drivers/s390/cio/io_sch.h | 1 - 2 files changed, 3 deletions(-) diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 7ada078ffdd04..6a58bc8f46e2a 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -762,7 +762,6 @@ static int io_subchannel_initialize_dev(struct subchannel *sch, priv->state = DEV_STATE_NOT_OPER; priv->dev_id.devno = sch->schib.pmcw.dev; priv->dev_id.ssid = sch->schid.ssid; - priv->schid = sch->schid; INIT_WORK(&priv->todo_work, ccw_device_todo); INIT_LIST_HEAD(&priv->cmb_list); @@ -1000,7 +999,6 @@ static int ccw_device_move_to_sch(struct ccw_device *cdev, put_device(&old_sch->dev); /* Initialize new subchannel. */ spin_lock_irq(sch->lock); - cdev->private->schid = sch->schid; cdev->ccwlock = sch->lock; if (!sch_is_pseudo_sch(sch)) sch_set_cdev(sch, cdev); diff --git a/drivers/s390/cio/io_sch.h b/drivers/s390/cio/io_sch.h index 8975060af96cb..220f49145b2f9 100644 --- a/drivers/s390/cio/io_sch.h +++ b/drivers/s390/cio/io_sch.h @@ -120,7 +120,6 @@ struct ccw_device_private { int state; /* device state */ atomic_t onoff; struct ccw_dev_id dev_id; /* device id */ - struct subchannel_id schid; /* subchannel number */ struct ccw_request req; /* internal I/O request */ int iretry; u8 pgid_valid_mask; /* mask of valid PGIDs */ From a48ed867153c6d2f6d058267213a82dbd8b6737a Mon Sep 17 00:00:00 2001 From: Sebastian Ott <sebott@linux.vnet.ibm.com> Date: Fri, 29 Jul 2016 13:41:20 +0200 Subject: [PATCH 051/513] s390/qdio: get rid of spin_lock_irqsave usage All qdio functions that use spin_lock_irqsave are never used from irq context. Thus it is safe to convert all of them to use spin_lock_irq. Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com> Reviewed-by: Ursula Braun <ubraun@linux.vnet.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- drivers/s390/cio/qdio_main.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index 94e96207d42dd..b200f27489a70 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -1160,7 +1160,6 @@ int qdio_shutdown(struct ccw_device *cdev, int how) struct qdio_irq *irq_ptr = cdev->private->qdio_data; struct subchannel_id schid; int rc; - unsigned long flags; if (!irq_ptr) return -ENODEV; @@ -1190,7 +1189,7 @@ int qdio_shutdown(struct ccw_device *cdev, int how) qdio_shutdown_debug_entries(irq_ptr); /* cleanup subchannel */ - spin_lock_irqsave(get_ccwdev_lock(cdev), flags); + spin_lock_irq(get_ccwdev_lock(cdev)); if (how & QDIO_FLAG_CLEANUP_USING_CLEAR) rc = ccw_device_clear(cdev, QDIO_DOING_CLEANUP); @@ -1204,12 +1203,12 @@ int qdio_shutdown(struct ccw_device *cdev, int how) } qdio_set_state(irq_ptr, QDIO_IRQ_STATE_CLEANUP); - spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); + spin_unlock_irq(get_ccwdev_lock(cdev)); wait_event_interruptible_timeout(cdev->private->wait_q, irq_ptr->state == QDIO_IRQ_STATE_INACTIVE || irq_ptr->state == QDIO_IRQ_STATE_ERR, 10 * HZ); - spin_lock_irqsave(get_ccwdev_lock(cdev), flags); + spin_lock_irq(get_ccwdev_lock(cdev)); no_cleanup: qdio_shutdown_thinint(irq_ptr); @@ -1217,7 +1216,7 @@ int qdio_shutdown(struct ccw_device *cdev, int how) /* restore interrupt handler */ if ((void *)cdev->handler == (void *)qdio_int_handler) cdev->handler = irq_ptr->orig_handler; - spin_unlock_irqrestore(get_ccwdev_lock(cdev), flags); + spin_unlock_irq(get_ccwdev_lock(cdev)); qdio_set_state(irq_ptr, QDIO_IRQ_STATE_INACTIVE); mutex_unlock(&irq_ptr->setup_mutex); @@ -1344,7 +1343,6 @@ int qdio_establish(struct qdio_initialize *init_data) struct ccw_device *cdev = init_data->cdev; struct subchannel_id schid; struct qdio_irq *irq_ptr; - unsigned long saveflags; int rc; ccw_device_get_schid(cdev, &schid); @@ -1373,7 +1371,7 @@ int qdio_establish(struct qdio_initialize *init_data) irq_ptr->ccw.count = irq_ptr->equeue.count; irq_ptr->ccw.cda = (u32)((addr_t)irq_ptr->qdr); - spin_lock_irqsave(get_ccwdev_lock(cdev), saveflags); + spin_lock_irq(get_ccwdev_lock(cdev)); ccw_device_set_options_mask(cdev, 0); rc = ccw_device_start(cdev, &irq_ptr->ccw, QDIO_DOING_ESTABLISH, 0, 0); @@ -1381,7 +1379,7 @@ int qdio_establish(struct qdio_initialize *init_data) DBF_ERROR("%4x est IO ERR", irq_ptr->schid.sch_no); DBF_ERROR("rc:%4x", rc); } - spin_unlock_irqrestore(get_ccwdev_lock(cdev), saveflags); + spin_unlock_irq(get_ccwdev_lock(cdev)); if (rc) { mutex_unlock(&irq_ptr->setup_mutex); @@ -1422,7 +1420,6 @@ int qdio_activate(struct ccw_device *cdev) struct subchannel_id schid; struct qdio_irq *irq_ptr; int rc; - unsigned long saveflags; ccw_device_get_schid(cdev, &schid); DBF_EVENT("qactivate:%4x", schid.sch_no); @@ -1445,7 +1442,7 @@ int qdio_activate(struct ccw_device *cdev) irq_ptr->ccw.count = irq_ptr->aqueue.count; irq_ptr->ccw.cda = 0; - spin_lock_irqsave(get_ccwdev_lock(cdev), saveflags); + spin_lock_irq(get_ccwdev_lock(cdev)); ccw_device_set_options(cdev, CCWDEV_REPORT_ALL); rc = ccw_device_start(cdev, &irq_ptr->ccw, QDIO_DOING_ACTIVATE, @@ -1454,7 +1451,7 @@ int qdio_activate(struct ccw_device *cdev) DBF_ERROR("%4x act IO ERR", irq_ptr->schid.sch_no); DBF_ERROR("rc:%4x", rc); } - spin_unlock_irqrestore(get_ccwdev_lock(cdev), saveflags); + spin_unlock_irq(get_ccwdev_lock(cdev)); if (rc) goto out; From ddebf6612c5dcc479725867b9fd9a4d98f41350f Mon Sep 17 00:00:00 2001 From: Sebastian Ott <sebott@linux.vnet.ibm.com> Date: Fri, 29 Jul 2016 14:00:27 +0200 Subject: [PATCH 052/513] s390/qdio: fix double return code evaluation qdio sometimes checks return codes twice. First with the ccw device's lock held and then a 2nd time after the lock is released. Simplify the code by releasing the lock earlier and unify the return code evaluation. Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com> Reviewed-by: Ursula Braun <ubraun@linux.vnet.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- drivers/s390/cio/qdio_main.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index b200f27489a70..da969789936d5 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -1375,13 +1375,10 @@ int qdio_establish(struct qdio_initialize *init_data) ccw_device_set_options_mask(cdev, 0); rc = ccw_device_start(cdev, &irq_ptr->ccw, QDIO_DOING_ESTABLISH, 0, 0); + spin_unlock_irq(get_ccwdev_lock(cdev)); if (rc) { DBF_ERROR("%4x est IO ERR", irq_ptr->schid.sch_no); DBF_ERROR("rc:%4x", rc); - } - spin_unlock_irq(get_ccwdev_lock(cdev)); - - if (rc) { mutex_unlock(&irq_ptr->setup_mutex); qdio_shutdown(cdev, QDIO_FLAG_CLEANUP_USING_CLEAR); return rc; @@ -1447,14 +1444,12 @@ int qdio_activate(struct ccw_device *cdev) rc = ccw_device_start(cdev, &irq_ptr->ccw, QDIO_DOING_ACTIVATE, 0, DOIO_DENY_PREFETCH); + spin_unlock_irq(get_ccwdev_lock(cdev)); if (rc) { DBF_ERROR("%4x act IO ERR", irq_ptr->schid.sch_no); DBF_ERROR("rc:%4x", rc); - } - spin_unlock_irq(get_ccwdev_lock(cdev)); - - if (rc) goto out; + } if (is_thinint_irq(irq_ptr)) tiqdio_add_input_queues(irq_ptr); From 6e30c549f6cab7a41d0934cea80822a2211132ae Mon Sep 17 00:00:00 2001 From: Sebastian Ott <sebott@linux.vnet.ibm.com> Date: Tue, 2 Aug 2016 16:01:39 +0200 Subject: [PATCH 053/513] s390/qdio: remove checks for ccw device internal state Prior to starting IO qdio checks for the internal state of the ccw device. These checks happen without locking, so consistency between state evaluation and starting of the IO is not guaranteed. Since the internal state is checked during ccw_device_start it is safe to get rid of these additional checks. Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com> Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- drivers/s390/cio/qdio_main.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index da969789936d5..fda3b2617ec94 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -1352,9 +1352,6 @@ int qdio_establish(struct qdio_initialize *init_data) if (!irq_ptr) return -ENODEV; - if (cdev->private->state != DEV_STATE_ONLINE) - return -EINVAL; - mutex_lock(&irq_ptr->setup_mutex); qdio_setup_irq(init_data); @@ -1425,9 +1422,6 @@ int qdio_activate(struct ccw_device *cdev) if (!irq_ptr) return -ENODEV; - if (cdev->private->state != DEV_STATE_ONLINE) - return -EINVAL; - mutex_lock(&irq_ptr->setup_mutex); if (irq_ptr->state == QDIO_IRQ_STATE_INACTIVE) { rc = -EBUSY; From 9bce8b2cbebf9f11b41021ccb98c6b18d1796edd Mon Sep 17 00:00:00 2001 From: Ursula Braun <ursula.braun@de.ibm.com> Date: Fri, 5 Aug 2016 12:33:10 +0200 Subject: [PATCH 054/513] s390/qdio: avoid reschedule of outbound tasklet once killed During qdio_shutdown the queue tasklets are killed for all inbound and outbound queues. The queue structures might be freed after qdio_shutdown. Thus it must be guaranteed that these queue tasklets are not rescheduled after that. In addition the outbound queue timers are deleted and it must be guaranteed that these timers are not restarted after qdio_shutdown processing. Timer deletion should make use of del_timer_sync() to make sure qdio_outbound_timer() is finished on other CPUs as well. Queue tasklets should be scheduled in state QDIO_IRQ_STATE_ACTIVE only. Signed-off-by: Ursula Braun <ubraun@linux.vnet.ibm.com> Reviewed-by: Benjamin Block <bblock@linux.vnet.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- drivers/s390/cio/qdio_main.c | 49 ++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/drivers/s390/cio/qdio_main.c b/drivers/s390/cio/qdio_main.c index fda3b2617ec94..71bf9bded4851 100644 --- a/drivers/s390/cio/qdio_main.c +++ b/drivers/s390/cio/qdio_main.c @@ -686,6 +686,15 @@ static void qdio_kick_handler(struct qdio_q *q) q->qdio_error = 0; } +static inline int qdio_tasklet_schedule(struct qdio_q *q) +{ + if (likely(q->irq_ptr->state == QDIO_IRQ_STATE_ACTIVE)) { + tasklet_schedule(&q->tasklet); + return 0; + } + return -EPERM; +} + static void __qdio_inbound_processing(struct qdio_q *q) { qperf_inc(q, tasklet_inbound); @@ -698,10 +707,8 @@ static void __qdio_inbound_processing(struct qdio_q *q) if (!qdio_inbound_q_done(q)) { /* means poll time is not yet over */ qperf_inc(q, tasklet_inbound_resched); - if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED)) { - tasklet_schedule(&q->tasklet); + if (!qdio_tasklet_schedule(q)) return; - } } qdio_stop_polling(q); @@ -711,8 +718,7 @@ static void __qdio_inbound_processing(struct qdio_q *q) */ if (!qdio_inbound_q_done(q)) { qperf_inc(q, tasklet_inbound_resched2); - if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED)) - tasklet_schedule(&q->tasklet); + qdio_tasklet_schedule(q); } } @@ -869,16 +875,15 @@ static void __qdio_outbound_processing(struct qdio_q *q) * is noticed and outbound_handler is called after some time. */ if (qdio_outbound_q_done(q)) - del_timer(&q->u.out.timer); + del_timer_sync(&q->u.out.timer); else - if (!timer_pending(&q->u.out.timer)) + if (!timer_pending(&q->u.out.timer) && + likely(q->irq_ptr->state == QDIO_IRQ_STATE_ACTIVE)) mod_timer(&q->u.out.timer, jiffies + 10 * HZ); return; sched: - if (unlikely(q->irq_ptr->state == QDIO_IRQ_STATE_STOPPED)) - return; - tasklet_schedule(&q->tasklet); + qdio_tasklet_schedule(q); } /* outbound tasklet */ @@ -892,9 +897,7 @@ void qdio_outbound_timer(unsigned long data) { struct qdio_q *q = (struct qdio_q *)data; - if (unlikely(q->irq_ptr->state == QDIO_IRQ_STATE_STOPPED)) - return; - tasklet_schedule(&q->tasklet); + qdio_tasklet_schedule(q); } static inline void qdio_check_outbound_after_thinint(struct qdio_q *q) @@ -907,7 +910,7 @@ static inline void qdio_check_outbound_after_thinint(struct qdio_q *q) for_each_output_queue(q->irq_ptr, out, i) if (!qdio_outbound_q_done(out)) - tasklet_schedule(&out->tasklet); + qdio_tasklet_schedule(out); } static void __tiqdio_inbound_processing(struct qdio_q *q) @@ -929,10 +932,8 @@ static void __tiqdio_inbound_processing(struct qdio_q *q) if (!qdio_inbound_q_done(q)) { qperf_inc(q, tasklet_inbound_resched); - if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED)) { - tasklet_schedule(&q->tasklet); + if (!qdio_tasklet_schedule(q)) return; - } } qdio_stop_polling(q); @@ -942,8 +943,7 @@ static void __tiqdio_inbound_processing(struct qdio_q *q) */ if (!qdio_inbound_q_done(q)) { qperf_inc(q, tasklet_inbound_resched2); - if (likely(q->irq_ptr->state != QDIO_IRQ_STATE_STOPPED)) - tasklet_schedule(&q->tasklet); + qdio_tasklet_schedule(q); } } @@ -977,7 +977,7 @@ static void qdio_int_handler_pci(struct qdio_irq *irq_ptr) int i; struct qdio_q *q; - if (unlikely(irq_ptr->state == QDIO_IRQ_STATE_STOPPED)) + if (unlikely(irq_ptr->state != QDIO_IRQ_STATE_ACTIVE)) return; for_each_input_queue(irq_ptr, q, i) { @@ -1003,7 +1003,7 @@ static void qdio_int_handler_pci(struct qdio_irq *irq_ptr) continue; if (need_siga_sync(q) && need_siga_sync_out_after_pci(q)) qdio_siga_sync_q(q); - tasklet_schedule(&q->tasklet); + qdio_tasklet_schedule(q); } } @@ -1145,7 +1145,7 @@ static void qdio_shutdown_queues(struct ccw_device *cdev) tasklet_kill(&q->tasklet); for_each_output_queue(irq_ptr, q, i) { - del_timer(&q->u.out.timer); + del_timer_sync(&q->u.out.timer); tasklet_kill(&q->tasklet); } } @@ -1585,10 +1585,11 @@ static int handle_outbound(struct qdio_q *q, unsigned int callflags, /* in case of SIGA errors we must process the error immediately */ if (used >= q->u.out.scan_threshold || rc) - tasklet_schedule(&q->tasklet); + qdio_tasklet_schedule(q); else /* free the SBALs in case of no further traffic */ - if (!timer_pending(&q->u.out.timer)) + if (!timer_pending(&q->u.out.timer) && + likely(q->irq_ptr->state == QDIO_IRQ_STATE_ACTIVE)) mod_timer(&q->u.out.timer, jiffies + HZ); return rc; } From f743e70e8f8f3200dde1dcf50c14aa2900d65071 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven <geert@linux-m68k.org> Date: Wed, 3 Aug 2016 19:37:03 +0200 Subject: [PATCH 055/513] crypto: sha3 - Add missing ULL suffixes for 64-bit constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On 32-bit (e.g. with m68k-linux-gnu-gcc-4.1): crypto/sha3_generic.c:27: warning: integer constant is too large for ‘long’ type crypto/sha3_generic.c:28: warning: integer constant is too large for ‘long’ type crypto/sha3_generic.c:29: warning: integer constant is too large for ‘long’ type crypto/sha3_generic.c:29: warning: integer constant is too large for ‘long’ type crypto/sha3_generic.c:31: warning: integer constant is too large for ‘long’ type crypto/sha3_generic.c:31: warning: integer constant is too large for ‘long’ type crypto/sha3_generic.c:32: warning: integer constant is too large for ‘long’ type crypto/sha3_generic.c:32: warning: integer constant is too large for ‘long’ type crypto/sha3_generic.c:32: warning: integer constant is too large for ‘long’ type crypto/sha3_generic.c:33: warning: integer constant is too large for ‘long’ type crypto/sha3_generic.c:33: warning: integer constant is too large for ‘long’ type crypto/sha3_generic.c:34: warning: integer constant is too large for ‘long’ type crypto/sha3_generic.c:34: warning: integer constant is too large for ‘long’ type Fixes: 53964b9ee63b7075 ("crypto: sha3 - Add SHA-3 hash algorithm") Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> --- crypto/sha3_generic.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/crypto/sha3_generic.c b/crypto/sha3_generic.c index 62264397a2d28..7e8ed96236cef 100644 --- a/crypto/sha3_generic.c +++ b/crypto/sha3_generic.c @@ -24,14 +24,14 @@ #define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y)))) static const u64 keccakf_rndc[24] = { - 0x0000000000000001, 0x0000000000008082, 0x800000000000808a, - 0x8000000080008000, 0x000000000000808b, 0x0000000080000001, - 0x8000000080008081, 0x8000000000008009, 0x000000000000008a, - 0x0000000000000088, 0x0000000080008009, 0x000000008000000a, - 0x000000008000808b, 0x800000000000008b, 0x8000000000008089, - 0x8000000000008003, 0x8000000000008002, 0x8000000000000080, - 0x000000000000800a, 0x800000008000000a, 0x8000000080008081, - 0x8000000000008080, 0x0000000080000001, 0x8000000080008008 + 0x0000000000000001ULL, 0x0000000000008082ULL, 0x800000000000808aULL, + 0x8000000080008000ULL, 0x000000000000808bULL, 0x0000000080000001ULL, + 0x8000000080008081ULL, 0x8000000000008009ULL, 0x000000000000008aULL, + 0x0000000000000088ULL, 0x0000000080008009ULL, 0x000000008000000aULL, + 0x000000008000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, + 0x8000000000008003ULL, 0x8000000000008002ULL, 0x8000000000000080ULL, + 0x000000000000800aULL, 0x800000008000000aULL, 0x8000000080008081ULL, + 0x8000000000008080ULL, 0x0000000080000001ULL, 0x8000000080008008ULL }; static const int keccakf_rotc[24] = { From 1d2d87e81ea21f64c19b95ef228b865a6880e17e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com> Date: Thu, 4 Aug 2016 20:02:46 +0300 Subject: [PATCH 056/513] crypto: caam - fix echainiv(authenc) encrypt shared descriptor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are a few things missed by the conversion to the new AEAD interface: 1 - echainiv(authenc) encrypt shared descriptor The shared descriptor is incorrect: due to the order of operations, at some point in time MATH3 register is being overwritten. 2 - buffer used for echainiv(authenc) encrypt shared descriptor Encrypt and givencrypt shared descriptors (for AEAD ops) are mutually exclusive and thus use the same buffer in context state: sh_desc_enc. However, there's one place missed by s/sh_desc_givenc/sh_desc_enc, leading to errors when echainiv(authenc(...)) algorithms are used: DECO: desc idx 14: Header Error. Invalid length or parity, or certain other problems. While here, also fix a typo: dma_mapping_error() is checking for validity of sh_desc_givenc_dma instead of sh_desc_enc_dma. Cc: <stable@vger.kernel.org> # 4.3+ Fixes: 479bcc7c5b9e ("crypto: caam - Convert authenc to new AEAD interface") Signed-off-by: Horia Geantă <horia.geanta@nxp.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> --- drivers/crypto/caam/caamalg.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index ea8189f4b0212..e356005a72123 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -614,7 +614,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead) keys_fit_inline = true; /* aead_givencrypt shared descriptor */ - desc = ctx->sh_desc_givenc; + desc = ctx->sh_desc_enc; /* Note: Context registers are saved. */ init_sh_desc_key_aead(desc, ctx, keys_fit_inline, is_rfc3686); @@ -645,13 +645,13 @@ static int aead_set_sh_desc(struct crypto_aead *aead) append_operation(desc, ctx->class2_alg_type | OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); - /* ivsize + cryptlen = seqoutlen - authsize */ - append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize); - /* Read and write assoclen bytes */ append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); + /* ivsize + cryptlen = seqoutlen - authsize */ + append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize); + /* Skip assoc data */ append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF); @@ -697,7 +697,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead) ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); - if (dma_mapping_error(jrdev, ctx->sh_desc_givenc_dma)) { + if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) { dev_err(jrdev, "unable to map shared descriptor\n"); return -ENOMEM; } From 2fdea258fde036a87d3396ec9c0ef66f10768530 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= <horia.geanta@nxp.com> Date: Thu, 4 Aug 2016 20:02:47 +0300 Subject: [PATCH 057/513] crypto: caam - defer aead_set_sh_desc in case of zero authsize MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To be able to generate shared descriptors for AEAD, the authentication size needs to be known. However, there is no imposed order of calling .setkey, .setauthsize callbacks. Thus, in case authentication size is not known at .setkey time, defer it until .setauthsize is called. The authsize != 0 check was incorrectly removed when converting the driver to the new AEAD interface. Cc: <stable@vger.kernel.org> # 4.3+ Fixes: 479bcc7c5b9e ("crypto: caam - Convert authenc to new AEAD interface") Signed-off-by: Horia Geantă <horia.geanta@nxp.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> --- drivers/crypto/caam/caamalg.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index e356005a72123..6dc597126b79e 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -441,6 +441,9 @@ static int aead_set_sh_desc(struct crypto_aead *aead) OP_ALG_AAI_CTR_MOD128); const bool is_rfc3686 = alg->caam.rfc3686; + if (!ctx->authsize) + return 0; + /* NULL encryption / decryption */ if (!ctx->enckeylen) return aead_null_set_sh_desc(aead); From dca3f53c02e325bb19dfc05b1571b9a706226fea Mon Sep 17 00:00:00 2001 From: Phil Sutter <phil@nwl.cc> Date: Thu, 4 Aug 2016 12:11:55 +0200 Subject: [PATCH 058/513] sctp: Export struct sctp_info to userspace This is required to correctly interpret INET_DIAG_INFO messages exported by sctp_diag module. Signed-off-by: Phil Sutter <phil@nwl.cc> Signed-off-by: David S. Miller <davem@davemloft.net> --- include/linux/sctp.h | 64 --------------------------------------- include/uapi/linux/sctp.h | 64 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 64 deletions(-) diff --git a/include/linux/sctp.h b/include/linux/sctp.h index de1f64318fc4e..fcb4c36461732 100644 --- a/include/linux/sctp.h +++ b/include/linux/sctp.h @@ -705,70 +705,6 @@ typedef struct sctp_auth_chunk { sctp_authhdr_t auth_hdr; } __packed sctp_auth_chunk_t; -struct sctp_info { - __u32 sctpi_tag; - __u32 sctpi_state; - __u32 sctpi_rwnd; - __u16 sctpi_unackdata; - __u16 sctpi_penddata; - __u16 sctpi_instrms; - __u16 sctpi_outstrms; - __u32 sctpi_fragmentation_point; - __u32 sctpi_inqueue; - __u32 sctpi_outqueue; - __u32 sctpi_overall_error; - __u32 sctpi_max_burst; - __u32 sctpi_maxseg; - __u32 sctpi_peer_rwnd; - __u32 sctpi_peer_tag; - __u8 sctpi_peer_capable; - __u8 sctpi_peer_sack; - __u16 __reserved1; - - /* assoc status info */ - __u64 sctpi_isacks; - __u64 sctpi_osacks; - __u64 sctpi_opackets; - __u64 sctpi_ipackets; - __u64 sctpi_rtxchunks; - __u64 sctpi_outofseqtsns; - __u64 sctpi_idupchunks; - __u64 sctpi_gapcnt; - __u64 sctpi_ouodchunks; - __u64 sctpi_iuodchunks; - __u64 sctpi_oodchunks; - __u64 sctpi_iodchunks; - __u64 sctpi_octrlchunks; - __u64 sctpi_ictrlchunks; - - /* primary transport info */ - struct sockaddr_storage sctpi_p_address; - __s32 sctpi_p_state; - __u32 sctpi_p_cwnd; - __u32 sctpi_p_srtt; - __u32 sctpi_p_rto; - __u32 sctpi_p_hbinterval; - __u32 sctpi_p_pathmaxrxt; - __u32 sctpi_p_sackdelay; - __u32 sctpi_p_sackfreq; - __u32 sctpi_p_ssthresh; - __u32 sctpi_p_partial_bytes_acked; - __u32 sctpi_p_flight_size; - __u16 sctpi_p_error; - __u16 __reserved2; - - /* sctp sock info */ - __u32 sctpi_s_autoclose; - __u32 sctpi_s_adaptation_ind; - __u32 sctpi_s_pd_point; - __u8 sctpi_s_nodelay; - __u8 sctpi_s_disable_fragments; - __u8 sctpi_s_v4mapped; - __u8 sctpi_s_frag_interleave; - __u32 sctpi_s_type; - __u32 __reserved3; -}; - struct sctp_infox { struct sctp_info *sctpinfo; struct sctp_association *asoc; diff --git a/include/uapi/linux/sctp.h b/include/uapi/linux/sctp.h index d304f4c9792c4..a406adcc0793e 100644 --- a/include/uapi/linux/sctp.h +++ b/include/uapi/linux/sctp.h @@ -944,4 +944,68 @@ struct sctp_default_prinfo { __u16 pr_policy; }; +struct sctp_info { + __u32 sctpi_tag; + __u32 sctpi_state; + __u32 sctpi_rwnd; + __u16 sctpi_unackdata; + __u16 sctpi_penddata; + __u16 sctpi_instrms; + __u16 sctpi_outstrms; + __u32 sctpi_fragmentation_point; + __u32 sctpi_inqueue; + __u32 sctpi_outqueue; + __u32 sctpi_overall_error; + __u32 sctpi_max_burst; + __u32 sctpi_maxseg; + __u32 sctpi_peer_rwnd; + __u32 sctpi_peer_tag; + __u8 sctpi_peer_capable; + __u8 sctpi_peer_sack; + __u16 __reserved1; + + /* assoc status info */ + __u64 sctpi_isacks; + __u64 sctpi_osacks; + __u64 sctpi_opackets; + __u64 sctpi_ipackets; + __u64 sctpi_rtxchunks; + __u64 sctpi_outofseqtsns; + __u64 sctpi_idupchunks; + __u64 sctpi_gapcnt; + __u64 sctpi_ouodchunks; + __u64 sctpi_iuodchunks; + __u64 sctpi_oodchunks; + __u64 sctpi_iodchunks; + __u64 sctpi_octrlchunks; + __u64 sctpi_ictrlchunks; + + /* primary transport info */ + struct sockaddr_storage sctpi_p_address; + __s32 sctpi_p_state; + __u32 sctpi_p_cwnd; + __u32 sctpi_p_srtt; + __u32 sctpi_p_rto; + __u32 sctpi_p_hbinterval; + __u32 sctpi_p_pathmaxrxt; + __u32 sctpi_p_sackdelay; + __u32 sctpi_p_sackfreq; + __u32 sctpi_p_ssthresh; + __u32 sctpi_p_partial_bytes_acked; + __u32 sctpi_p_flight_size; + __u16 sctpi_p_error; + __u16 __reserved2; + + /* sctp sock info */ + __u32 sctpi_s_autoclose; + __u32 sctpi_s_adaptation_ind; + __u32 sctpi_s_pd_point; + __u8 sctpi_s_nodelay; + __u8 sctpi_s_disable_fragments; + __u8 sctpi_s_v4mapped; + __u8 sctpi_s_frag_interleave; + __u32 sctpi_s_type; + __u32 __reserved3; +}; + #endif /* _UAPI_SCTP_H */ From 12474e8e58d82e2b815cd35956c0c06fab104ee7 Mon Sep 17 00:00:00 2001 From: Phil Sutter <phil@nwl.cc> Date: Thu, 4 Aug 2016 12:11:56 +0200 Subject: [PATCH 059/513] sctp_diag: Fix T3_rtx timer export The asoc's timer value is not kept in asoc->timeouts array but in it's primary transport instead. Furthermore, we must export the timer only if it is pending, otherwise the value will underrun when stored in an unsigned variable and user space will only see a very large timeout value. Signed-off-by: Phil Sutter <phil@nwl.cc> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/sctp/sctp_diag.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index f69edcf219e51..f728ef04a7b2d 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -13,6 +13,7 @@ static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r, { union sctp_addr laddr, paddr; struct dst_entry *dst; + struct timer_list *t3_rtx = &asoc->peer.primary_path->T3_rtx_timer; laddr = list_entry(asoc->base.bind_addr.address_list.next, struct sctp_sockaddr_entry, list)->a; @@ -40,10 +41,15 @@ static void inet_diag_msg_sctpasoc_fill(struct inet_diag_msg *r, } r->idiag_state = asoc->state; - r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX; - r->idiag_retrans = asoc->rtx_data_chunks; - r->idiag_expires = jiffies_to_msecs( - asoc->timeouts[SCTP_EVENT_TIMEOUT_T3_RTX] - jiffies); + if (timer_pending(t3_rtx)) { + r->idiag_timer = SCTP_EVENT_TIMEOUT_T3_RTX; + r->idiag_retrans = asoc->rtx_data_chunks; + r->idiag_expires = jiffies_to_msecs(t3_rtx->expires - jiffies); + } else { + r->idiag_timer = 0; + r->idiag_retrans = 0; + r->idiag_expires = 0; + } } static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb, From 1ba8d77f410d3b7423df39e8a10a02af272d3c42 Mon Sep 17 00:00:00 2001 From: Phil Sutter <phil@nwl.cc> Date: Thu, 4 Aug 2016 12:11:57 +0200 Subject: [PATCH 060/513] sctp_diag: Respect ss adding TCPF_CLOSE to idiag_states Since 'ss' always adds TCPF_CLOSE to idiag_states flags, sctp_diag can't rely upon TCPF_LISTEN flag solely being present when listening sockets are requested. Signed-off-by: Phil Sutter <phil@nwl.cc> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/sctp/sctp_diag.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index f728ef04a7b2d..bb691538adc8e 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -356,7 +356,7 @@ static int sctp_ep_dump(struct sctp_endpoint *ep, void *p) if (cb->args[4] < cb->args[1]) goto next; - if ((r->idiag_states & ~TCPF_LISTEN) && !list_empty(&ep->asocs)) + if (!(r->idiag_states & TCPF_LISTEN) && !list_empty(&ep->asocs)) goto next; if (r->sdiag_family != AF_UNSPEC && @@ -471,7 +471,7 @@ static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, * 3 : to mark if we have dumped the ep info of the current asoc * 4 : to work as a temporary variable to traversal list */ - if (!(idiag_states & ~TCPF_LISTEN)) + if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE))) goto done; sctp_for_each_transport(sctp_tsp_dump, net, cb->args[2], &commp); done: From 3b3bf80b994f0c6c35a25ef8965ab956b4bcced5 Mon Sep 17 00:00:00 2001 From: Phil Sutter <phil@nwl.cc> Date: Thu, 4 Aug 2016 12:37:17 +0200 Subject: [PATCH 061/513] rhashtable-test: Fix max_size parameter description Looks like a simple copy'n'paste error. Fixes: 1aa661f5c3df1 ("rhashtable-test: Measure time to insert, remove & traverse entries") Signed-off-by: Phil Sutter <phil@nwl.cc> Signed-off-by: David S. Miller <davem@davemloft.net> --- lib/test_rhashtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 297fdb5e74bd0..64e899b633371 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -38,7 +38,7 @@ MODULE_PARM_DESC(runs, "Number of test runs per variant (default: 4)"); static int max_size = 0; module_param(max_size, int, 0); -MODULE_PARM_DESC(runs, "Maximum table size (default: calculated)"); +MODULE_PARM_DESC(max_size, "Maximum table size (default: calculated)"); static bool shrinking = false; module_param(shrinking, bool, 0); From b489a2000f19e414710d2887fe3e24e903242766 Mon Sep 17 00:00:00 2001 From: Ido Schimmel <idosch@mellanox.com> Date: Thu, 4 Aug 2016 17:36:20 +0300 Subject: [PATCH 062/513] mlxsw: spectrum: Do not assume PAUSE frames are disabled When ieee_setpfc() gets called, PAUSE frames are not necessarily disabled on the port. Check if PAUSE frames are disabled or enabled and configure the port's headroom buffer accordingly. Fixes: d81a6bdb87ce ("mlxsw: spectrum: Add IEEE 802.1Qbb PFC support") Signed-off-by: Ido Schimmel <idosch@mellanox.com> Reviewed-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c index 01cfb75128278..3c4a178730ae7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c @@ -351,17 +351,17 @@ static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev, struct ieee_pfc *pfc) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + bool pause_en = mlxsw_sp_port_is_pause_en(mlxsw_sp_port); int err; - if ((mlxsw_sp_port->link.tx_pause || mlxsw_sp_port->link.rx_pause) && - pfc->pfc_en) { + if (pause_en && pfc->pfc_en) { netdev_err(dev, "PAUSE frames already enabled on port\n"); return -EINVAL; } err = __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, mlxsw_sp_port->dcb.ets->prio_tc, - false, pfc); + pause_en, pfc); if (err) { netdev_err(dev, "Failed to configure port's headroom for PFC\n"); return err; @@ -380,7 +380,7 @@ static int mlxsw_sp_dcbnl_ieee_setpfc(struct net_device *dev, err_port_pfc_set: __mlxsw_sp_port_headroom_set(mlxsw_sp_port, dev->mtu, - mlxsw_sp_port->dcb.ets->prio_tc, false, + mlxsw_sp_port->dcb.ets->prio_tc, pause_en, mlxsw_sp_port->dcb.pfc); return err; } From 07d50cae0661e5479d54d6e3e21cad15b1198103 Mon Sep 17 00:00:00 2001 From: Ido Schimmel <idosch@mellanox.com> Date: Thu, 4 Aug 2016 17:36:21 +0300 Subject: [PATCH 063/513] mlxsw: spectrum: Do not override PAUSE settings The PFCC register is used to configure both PAUSE and PFC frames. Therefore, when PFC frames are disabled we must make sure we don't mistakenly also disable PAUSE frames (which might be enabled). Fix this by packing the PFCC register with the current PAUSE settings. Note that this register is also accessed via ethtool ops, but there we are guaranteed to have PFC disabled. Fixes: d81a6bdb87ce ("mlxsw: spectrum: Add IEEE 802.1Qbb PFC support") Signed-off-by: Ido Schimmel <idosch@mellanox.com> Reviewed-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c index 3c4a178730ae7..b6ed7f7c531ee 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dcb.c @@ -341,6 +341,8 @@ static int mlxsw_sp_port_pfc_set(struct mlxsw_sp_port *mlxsw_sp_port, char pfcc_pl[MLXSW_REG_PFCC_LEN]; mlxsw_reg_pfcc_pack(pfcc_pl, mlxsw_sp_port->local_port); + mlxsw_reg_pfcc_pprx_set(pfcc_pl, mlxsw_sp_port->link.rx_pause); + mlxsw_reg_pfcc_pptx_set(pfcc_pl, mlxsw_sp_port->link.tx_pause); mlxsw_reg_pfcc_prio_pack(pfcc_pl, pfc->pfc_en); return mlxsw_reg_write(mlxsw_sp_port->mlxsw_sp->core, MLXSW_REG(pfcc), From 4de34eb5743f720dc4798f0647f75c21d44aa1f8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel <idosch@mellanox.com> Date: Thu, 4 Aug 2016 17:36:22 +0300 Subject: [PATCH 064/513] mlxsw: spectrum: Add missing DCB rollback in error path We correctly execute mlxsw_sp_port_dcb_fini() when port is removed, but I missed its rollback in the error path of port creation, so add it. Fixes: f00817df2b42 ("mlxsw: spectrum: Introduce support for Data Center Bridging (DCB)") Signed-off-by: Ido Schimmel <idosch@mellanox.com> Reviewed-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index c3e61500819d0..e1b8f62ccaedd 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2220,6 +2220,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, err_core_port_init: unregister_netdev(dev); err_register_netdev: + mlxsw_sp_port_dcb_fini(mlxsw_sp_port); err_port_dcb_init: err_port_ets_init: err_port_buffers_init: From 66cf3504f4c528793b3d8986bab606f7cfb1c4bb Mon Sep 17 00:00:00 2001 From: Paul Gortmaker <paul.gortmaker@windriver.com> Date: Thu, 4 Aug 2016 16:07:58 -0400 Subject: [PATCH 065/513] net/ethernet: tundra: fix dump_eth_one warning in tsi108_eth The call site for this function appears as: #ifdef DEBUG data->msg_enable = DEBUG; dump_eth_one(dev); #endif ...leading to the following warning for !DEBUG builds: drivers/net/ethernet/tundra/tsi108_eth.c:169:13: warning: 'dump_eth_one' defined but not used [-Wunused-function] static void dump_eth_one(struct net_device *dev) ^ ...when using the arch/powerpc/configs/mpc7448_hpc2_defconfig Put the function definition under the same #ifdef as the call site to avoid the warning. Cc: "David S. Miller" <davem@davemloft.net> Cc: netdev@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/tundra/tsi108_eth.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c index 01a77145a0fa4..8fd131207ee10 100644 --- a/drivers/net/ethernet/tundra/tsi108_eth.c +++ b/drivers/net/ethernet/tundra/tsi108_eth.c @@ -166,6 +166,7 @@ static struct platform_driver tsi_eth_driver = { static void tsi108_timed_checker(unsigned long dev_ptr); +#ifdef DEBUG static void dump_eth_one(struct net_device *dev) { struct tsi108_prv_data *data = netdev_priv(dev); @@ -190,6 +191,7 @@ static void dump_eth_one(struct net_device *dev) TSI_READ(TSI108_EC_RXESTAT), TSI_READ(TSI108_EC_RXERR), data->rxpending); } +#endif /* Synchronization is needed between the thread and up/down events. * Note that the PHY is accessed through the same registers for both From a2bfe6bf09a5f38df2bd0b1734f5fdee76f9366f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann <daniel@iogearbox.net> Date: Fri, 5 Aug 2016 00:11:11 +0200 Subject: [PATCH 066/513] bpf: also call skb_postpush_rcsum on xmit occasions Follow-up to commit f8ffad69c9f8 ("bpf: add skb_postpush_rcsum and fix dev_forward_skb occasions") to fix an issue for dev_queue_xmit() redirect locations which need CHECKSUM_COMPLETE fixups on ingress. For the same reasons as described in f8ffad69c9f8 already, we of course also need this here, since dev_queue_xmit() on a veth device will let us end up in the dev_forward_skb() helper again to cross namespaces. Latter then calls into skb_postpull_rcsum() to pull out L2 header, so that netif_rx_internal() sees CHECKSUM_COMPLETE as it is expected. That is, CHECKSUM_COMPLETE on ingress covering L2 _payload_, not L2 headers. Also here we have to address bpf_redirect() and bpf_clone_redirect(). Fixes: 3896d655f4d4 ("bpf: introduce bpf_clone_redirect() helper") Fixes: 27b29f63058d ("bpf: add bpf_redirect() helper") Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/core/filter.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 5708999f8a794..c46244f83a8f9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1365,6 +1365,12 @@ static inline int bpf_try_make_writable(struct sk_buff *skb, return err; } +static inline void bpf_push_mac_rcsum(struct sk_buff *skb) +{ + if (skb_at_tc_ingress(skb)) + skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len); +} + static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) { struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp); @@ -1607,9 +1613,6 @@ static const struct bpf_func_proto bpf_csum_diff_proto = { static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb) { - if (skb_at_tc_ingress(skb)) - skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len); - return dev_forward_skb(dev, skb); } @@ -1648,6 +1651,8 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) if (unlikely(!skb)) return -ENOMEM; + bpf_push_mac_rcsum(skb); + return flags & BPF_F_INGRESS ? __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); } @@ -1693,6 +1698,8 @@ int skb_do_redirect(struct sk_buff *skb) return -EINVAL; } + bpf_push_mac_rcsum(skb); + return ri->flags & BPF_F_INGRESS ? __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); } From 479ffcccefd7b04442b0e949f8779b0612e8c75f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann <daniel@iogearbox.net> Date: Fri, 5 Aug 2016 00:11:12 +0200 Subject: [PATCH 067/513] bpf: fix checksum fixups on bpf_skb_store_bytes bpf_skb_store_bytes() invocations above L2 header need BPF_F_RECOMPUTE_CSUM flag for updates, so that CHECKSUM_COMPLETE will be fixed up along the way. Where we ran into an issue with bpf_skb_store_bytes() is when we did a single-byte update on the IPv6 hoplimit despite using BPF_F_RECOMPUTE_CSUM flag; simple ping via ICMPv6 triggered a hw csum failure as a result. The underlying issue has been tracked down to a buffer alignment issue. Meaning, that csum_partial() computations via skb_postpull_rcsum() and skb_postpush_rcsum() pair invoked had a wrong result since they operated on an odd address for the hoplimit, while other computations were done on an even address. This mix doesn't work as-is with skb_postpull_rcsum(), skb_postpush_rcsum() pair as it always expects at least half-word alignment of input buffers, which is normally the case. Thus, instead of these helpers using csum_sub() and (implicitly) csum_add(), we need to use csum_block_sub(), csum_block_add(), respectively. For unaligned offsets, they rotate the sum to align it to a half-word boundary again, otherwise they work the same as csum_sub() and csum_add(). Adding __skb_postpull_rcsum(), __skb_postpush_rcsum() variants that take the offset as an input and adapting bpf_skb_store_bytes() to them fixes the hw csum failures again. The skb_postpull_rcsum(), skb_postpush_rcsum() helpers use a 0 constant for offset so that the compiler optimizes the offset & 1 test away and generates the same code as with csum_sub()/_add(). Fixes: 608cd71a9c7c ("tc: bpf: generalize pedit action") Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- include/linux/skbuff.h | 52 +++++++++++++++++++++++++++--------------- net/core/filter.c | 4 ++-- 2 files changed, 35 insertions(+), 21 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6f0b3e0adc736..0f665cb26b505 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2847,6 +2847,18 @@ static inline int skb_linearize_cow(struct sk_buff *skb) __skb_linearize(skb) : 0; } +static __always_inline void +__skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len, + unsigned int off) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_block_sub(skb->csum, + csum_partial(start, len, 0), off); + else if (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_start_offset(skb) < 0) + skb->ip_summed = CHECKSUM_NONE; +} + /** * skb_postpull_rcsum - update checksum for received skb after pull * @skb: buffer to update @@ -2857,36 +2869,38 @@ static inline int skb_linearize_cow(struct sk_buff *skb) * update the CHECKSUM_COMPLETE checksum, or set ip_summed to * CHECKSUM_NONE so that it can be recomputed from scratch. */ - static inline void skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0)); - else if (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_start_offset(skb) < 0) - skb->ip_summed = CHECKSUM_NONE; + __skb_postpull_rcsum(skb, start, len, 0); } -unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); +static __always_inline void +__skb_postpush_rcsum(struct sk_buff *skb, const void *start, unsigned int len, + unsigned int off) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_block_add(skb->csum, + csum_partial(start, len, 0), off); +} +/** + * skb_postpush_rcsum - update checksum for received skb after push + * @skb: buffer to update + * @start: start of data after push + * @len: length of data pushed + * + * After doing a push on a received packet, you need to call this to + * update the CHECKSUM_COMPLETE checksum. + */ static inline void skb_postpush_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { - /* For performing the reverse operation to skb_postpull_rcsum(), - * we can instead of ... - * - * skb->csum = csum_add(skb->csum, csum_partial(start, len, 0)); - * - * ... just use this equivalent version here to save a few - * instructions. Feeding csum of 0 in csum_partial() and later - * on adding skb->csum is equivalent to feed skb->csum in the - * first place. - */ - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_partial(start, len, skb->csum); + __skb_postpush_rcsum(skb, start, len, 0); } +unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); + /** * skb_push_rcsum - push skb and update receive checksum * @skb: buffer to update diff --git a/net/core/filter.c b/net/core/filter.c index c46244f83a8f9..5ecd5c9836c37 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1401,7 +1401,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) return -EFAULT; if (flags & BPF_F_RECOMPUTE_CSUM) - skb_postpull_rcsum(skb, ptr, len); + __skb_postpull_rcsum(skb, ptr, len, offset); memcpy(ptr, from, len); @@ -1410,7 +1410,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) skb_store_bits(skb, offset, ptr, len); if (flags & BPF_F_RECOMPUTE_CSUM) - skb_postpush_rcsum(skb, ptr, len); + __skb_postpush_rcsum(skb, ptr, len, offset); if (flags & BPF_F_INVALIDATE_HASH) skb_clear_hash(skb); From 8065694e6519d234ccee93d952127e3f05b81ba5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann <daniel@iogearbox.net> Date: Fri, 5 Aug 2016 00:11:13 +0200 Subject: [PATCH 068/513] bpf: fix checksum for vlan push/pop helper When having skbs on ingress with CHECKSUM_COMPLETE, tc BPF programs don't push rcsum of mac header back in and after BPF run back pull out again as opposed to some other subsystems (ovs, for example). For cases like q-in-q, meaning when a vlan tag for offloading is already present and we're about to push another one, then skb_vlan_push() pushes the inner one into the skb, increasing mac header and skb_postpush_rcsum()'ing the 4 bytes vlan header diff. Likewise, for the reverse operation in skb_vlan_pop() for the case where vlan header needs to be pulled out of the skb, we're decreasing the mac header and skb_postpull_rcsum()'ing the 4 bytes rcsum of the vlan header that was removed. However mangling the rcsum here will lead to hw csum failure for BPF case, since we're pulling or pushing data that was not part of the current rcsum. Changing tc BPF programs in general to push/pull rcsum around BPF_PROG_RUN() is also not really an option since current behaviour is ABI by now, but apart from that would also mean to do quite a bit of useless work in the sense that usually 12 bytes need to be rcsum pushed/pulled also when we don't need to touch this vlan related corner case. One way to fix it would be to push the necessary rcsum fixup down into vlan helpers that are (mostly) slow-path anyway. Fixes: 4e10df9a60d9 ("bpf: introduce bpf_skb_vlan_push/pop() helpers") Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/core/filter.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/net/core/filter.c b/net/core/filter.c index 5ecd5c9836c37..b5add4ef0d1d2 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1371,6 +1371,12 @@ static inline void bpf_push_mac_rcsum(struct sk_buff *skb) skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len); } +static inline void bpf_pull_mac_rcsum(struct sk_buff *skb) +{ + if (skb_at_tc_ingress(skb)) + skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len); +} + static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) { struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp); @@ -1763,7 +1769,10 @@ static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5) vlan_proto != htons(ETH_P_8021AD))) vlan_proto = htons(ETH_P_8021Q); + bpf_push_mac_rcsum(skb); ret = skb_vlan_push(skb, vlan_proto, vlan_tci); + bpf_pull_mac_rcsum(skb); + bpf_compute_data_end(skb); return ret; } @@ -1783,7 +1792,10 @@ static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) struct sk_buff *skb = (struct sk_buff *) (long) r1; int ret; + bpf_push_mac_rcsum(skb); ret = skb_vlan_pop(skb); + bpf_pull_mac_rcsum(skb); + bpf_compute_data_end(skb); return ret; } From 272d96a5ab10662691b4ec90c4a66fdbf30ea7ba Mon Sep 17 00:00:00 2001 From: pravin shelar <pshelar@ovn.org> Date: Fri, 5 Aug 2016 17:45:36 -0700 Subject: [PATCH 069/513] net: vxlan: lwt: Use source ip address during route lookup. LWT user can specify destination as well as source ip address for given tunnel endpoint. But vxlan is ignoring given source ip address. Following patch uses both ip address to route the tunnel packet. This consistent with other LWT implementations, like GENEVE and GRE. Fixes: ee122c79d42 ("vxlan: Flow based tunneling"). Signed-off-by: Pravin B Shelar <pshelar@ovn.org> Acked-by: Jiri Benc <jbenc@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/vxlan.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index da4e3d6632f64..b81223422cf07 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1811,7 +1811,7 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, fl4.flowi4_mark = skb->mark; fl4.flowi4_proto = IPPROTO_UDP; fl4.daddr = daddr; - fl4.saddr = vxlan->cfg.saddr.sin.sin_addr.s_addr; + fl4.saddr = *saddr; rt = ip_route_output_key(vxlan->net, &fl4); if (!IS_ERR(rt)) { @@ -1847,7 +1847,7 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; fl6.daddr = *daddr; - fl6.saddr = vxlan->cfg.saddr.sin6.sin6_addr; + fl6.saddr = *saddr; fl6.flowlabel = ip6_make_flowinfo(RT_TOS(tos), label); fl6.flowi6_mark = skb->mark; fl6.flowi6_proto = IPPROTO_UDP; @@ -1920,7 +1920,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct rtable *rt = NULL; const struct iphdr *old_iph; union vxlan_addr *dst; - union vxlan_addr remote_ip; + union vxlan_addr remote_ip, local_ip; + union vxlan_addr *src; struct vxlan_metadata _md; struct vxlan_metadata *md = &_md; __be16 src_port = 0, dst_port; @@ -1938,6 +1939,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port; vni = rdst->remote_vni; dst = &rdst->remote_ip; + src = &vxlan->cfg.saddr; dst_cache = &rdst->dst_cache; } else { if (!info) { @@ -1948,11 +1950,15 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port; vni = vxlan_tun_id_to_vni(info->key.tun_id); remote_ip.sa.sa_family = ip_tunnel_info_af(info); - if (remote_ip.sa.sa_family == AF_INET) + if (remote_ip.sa.sa_family == AF_INET) { remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst; - else + local_ip.sin.sin_addr.s_addr = info->key.u.ipv4.src; + } else { remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst; + local_ip.sin6.sin6_addr = info->key.u.ipv6.src; + } dst = &remote_ip; + src = &local_ip; dst_cache = &info->dst_cache; } @@ -1992,15 +1998,14 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } if (dst->sa.sa_family == AF_INET) { - __be32 saddr; - if (!vxlan->vn4_sock) goto drop; sk = vxlan->vn4_sock->sock->sk; rt = vxlan_get_route(vxlan, skb, rdst ? rdst->remote_ifindex : 0, tos, - dst->sin.sin_addr.s_addr, &saddr, + dst->sin.sin_addr.s_addr, + &src->sin.sin_addr.s_addr, dst_cache, info); if (IS_ERR(rt)) { netdev_dbg(dev, "no route to %pI4\n", @@ -2043,13 +2048,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, if (err < 0) goto xmit_tx_error; - udp_tunnel_xmit_skb(rt, sk, skb, saddr, + udp_tunnel_xmit_skb(rt, sk, skb, src->sin.sin_addr.s_addr, dst->sin.sin_addr.s_addr, tos, ttl, df, src_port, dst_port, xnet, !udp_sum); #if IS_ENABLED(CONFIG_IPV6) } else { struct dst_entry *ndst; - struct in6_addr saddr; u32 rt6i_flags; if (!vxlan->vn6_sock) @@ -2058,7 +2062,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, ndst = vxlan6_get_route(vxlan, skb, rdst ? rdst->remote_ifindex : 0, tos, - label, &dst->sin6.sin6_addr, &saddr, + label, &dst->sin6.sin6_addr, + &src->sin6.sin6_addr, dst_cache, info); if (IS_ERR(ndst)) { netdev_dbg(dev, "no route to %pI6\n", @@ -2104,7 +2109,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, return; } udp_tunnel6_xmit_skb(ndst, sk, skb, dev, - &saddr, &dst->sin6.sin6_addr, tos, ttl, + &src->sin6.sin6_addr, + &dst->sin6.sin6_addr, tos, ttl, label, src_port, dst_port, !udp_sum); #endif } From bbec7802c6948c8626b71a4fe31283cb4691c358 Mon Sep 17 00:00:00 2001 From: pravin shelar <pshelar@ovn.org> Date: Fri, 5 Aug 2016 17:45:37 -0700 Subject: [PATCH 070/513] net: vxlan: lwt: Fix vxlan local traffic. vxlan driver has bypass for local vxlan traffic, but that depends on information about all VNIs on local system in vxlan driver. This is not available in case of LWT. Therefore following patch disable encap bypass for LWT vxlan traffic. Fixes: ee122c79d42 ("vxlan: Flow based tunneling"). Reported-by: Jakub Libosvar <jlibosva@redhat.com> Signed-off-by: Pravin B Shelar <pshelar@ovn.org> Acked-by: Jiri Benc <jbenc@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/vxlan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index b81223422cf07..c0dda6fc09217 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2022,7 +2022,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, } /* Bypass encapsulation if the destination is local */ - if (rt->rt_flags & RTCF_LOCAL && + if (!info && rt->rt_flags & RTCF_LOCAL && !(rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) { struct vxlan_dev *dst_vxlan; @@ -2082,7 +2082,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, /* Bypass encapsulation if the destination is local */ rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags; - if (rt6i_flags & RTF_LOCAL && + if (!info && rt6i_flags & RTF_LOCAL && !(rt6i_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) { struct vxlan_dev *dst_vxlan; From 1fe323aa1b2390a0c57fb0b06a782f128d49094c Mon Sep 17 00:00:00 2001 From: Xin Long <lucien.xin@gmail.com> Date: Sun, 7 Aug 2016 14:15:13 +0800 Subject: [PATCH 071/513] sctp: use event->chunk when it's valid Commit 52253db924d1 ("sctp: also point GSO head_skb to the sk when it's available") used event->chunk->head_skb to get the head_skb in sctp_ulpevent_set_owner(). But at that moment, the event->chunk was NULL, as it cloned the skb in sctp_ulpevent_make_rcvmsg(). Therefore, that patch didn't really work. This patch is to move the event->chunk initialization before calling sctp_ulpevent_receive_data() so that it uses event->chunk when it's valid. Fixes: 52253db924d1 ("sctp: also point GSO head_skb to the sk when it's available") Signed-off-by: Xin Long <lucien.xin@gmail.com> Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Acked-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/sctp/ulpevent.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 1bc4f71aaba86..d85b803da11d2 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -702,14 +702,14 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, */ sctp_ulpevent_init(event, 0, skb->len + sizeof(struct sk_buff)); - sctp_ulpevent_receive_data(event, asoc); - /* And hold the chunk as we need it for getting the IP headers * later in recvmsg */ sctp_chunk_hold(chunk); event->chunk = chunk; + sctp_ulpevent_receive_data(event, asoc); + event->stream = ntohs(chunk->subh.data_hdr->stream); event->ssn = ntohs(chunk->subh.data_hdr->ssn); event->ppid = chunk->subh.data_hdr->ppid; From fa00c437eef8dc2e7b25f8cd868cfa405fcc2bb3 Mon Sep 17 00:00:00 2001 From: Dave Carroll <david.carroll@microsemi.com> Date: Fri, 5 Aug 2016 13:44:10 -0600 Subject: [PATCH 072/513] aacraid: Check size values after double-fetch from user In aacraid's ioctl_send_fib() we do two fetches from userspace, one the get the fib header's size and one for the fib itself. Later we use the size field from the second fetch to further process the fib. If for some reason the size from the second fetch is different than from the first fix, we may encounter an out-of- bounds access in aac_fib_send(). We also check the sender size to insure it is not out of bounds. This was reported in https://bugzilla.kernel.org/show_bug.cgi?id=116751 and was assigned CVE-2016-6480. Reported-by: Pengfei Wang <wpengfeinudt@gmail.com> Fixes: 7c00ffa31 '[SCSI] 2.6 aacraid: Variable FIB size (updated patch)' Cc: stable@vger.kernel.org Signed-off-by: Dave Carroll <david.carroll@microsemi.com> Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> --- drivers/scsi/aacraid/commctrl.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/aacraid/commctrl.c b/drivers/scsi/aacraid/commctrl.c index b381b3718a98f..5648b715fed9c 100644 --- a/drivers/scsi/aacraid/commctrl.c +++ b/drivers/scsi/aacraid/commctrl.c @@ -63,7 +63,7 @@ static int ioctl_send_fib(struct aac_dev * dev, void __user *arg) struct fib *fibptr; struct hw_fib * hw_fib = (struct hw_fib *)0; dma_addr_t hw_fib_pa = (dma_addr_t)0LL; - unsigned size; + unsigned int size, osize; int retval; if (dev->in_reset) { @@ -87,7 +87,8 @@ static int ioctl_send_fib(struct aac_dev * dev, void __user *arg) * will not overrun the buffer when we copy the memory. Return * an error if we would. */ - size = le16_to_cpu(kfib->header.Size) + sizeof(struct aac_fibhdr); + osize = size = le16_to_cpu(kfib->header.Size) + + sizeof(struct aac_fibhdr); if (size < le16_to_cpu(kfib->header.SenderSize)) size = le16_to_cpu(kfib->header.SenderSize); if (size > dev->max_fib_size) { @@ -118,6 +119,14 @@ static int ioctl_send_fib(struct aac_dev * dev, void __user *arg) goto cleanup; } + /* Sanity check the second copy */ + if ((osize != le16_to_cpu(kfib->header.Size) + + sizeof(struct aac_fibhdr)) + || (size < le16_to_cpu(kfib->header.SenderSize))) { + retval = -EINVAL; + goto cleanup; + } + if (kfib->header.Command == cpu_to_le16(TakeABreakPt)) { aac_adapter_interrupt(dev); /* From c39b487f195b93235ee76384427467786f7bf29f Mon Sep 17 00:00:00 2001 From: Alex Deucher <alexander.deucher@amd.com> Date: Tue, 9 Aug 2016 00:20:28 -0400 Subject: [PATCH 073/513] Revert "drm/amdgpu: work around lack of upstream ACPI support for D3cold" This reverts commit c63695cc5e5f685e924e25a8f9555f6e846f1fc6. Now that d3cold support is upstream, there is no more need for this workaround. bug: https://bugs.freedesktop.org/show_bug.cgi?id=97248 --- drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c index 49de92600074d..10b5ddf2c5887 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c @@ -200,16 +200,7 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx) atpx->is_hybrid = false; if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) { printk("ATPX Hybrid Graphics\n"); -#if 1 - /* This is a temporary hack until the D3 cold support - * makes it upstream. The ATPX power_control method seems - * to still work on even if the system should be using - * the new standardized hybrid D3 cold ACPI interface. - */ - atpx->functions.power_cntl = true; -#else atpx->functions.power_cntl = false; -#endif atpx->is_hybrid = true; } From b817634276f7f68c9d1d6d4a27117ff3c2f16956 Mon Sep 17 00:00:00 2001 From: Alex Deucher <alexander.deucher@amd.com> Date: Tue, 9 Aug 2016 00:21:45 -0400 Subject: [PATCH 074/513] Revert "drm/radeon: work around lack of upstream ACPI support for D3cold" This reverts commit bdfb76040068d960cb9e226876be8a508d741c4a. Now that d3cold is upstream, there is no more need for this workaround. --- drivers/gpu/drm/radeon/radeon_atpx_handler.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c index 6de3428612027..ddef0d4940843 100644 --- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c +++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c @@ -198,16 +198,7 @@ static int radeon_atpx_validate(struct radeon_atpx *atpx) atpx->is_hybrid = false; if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) { printk("ATPX Hybrid Graphics\n"); -#if 1 - /* This is a temporary hack until the D3 cold support - * makes it upstream. The ATPX power_control method seems - * to still work on even if the system should be using - * the new standardized hybrid D3 cold ACPI interface. - */ - atpx->functions.power_cntl = true; -#else atpx->functions.power_cntl = false; -#endif atpx->is_hybrid = true; } From c0c45a6bd7d054efd80c1033bf4285830c72835b Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru <sudarsana.kalluru@qlogic.com> Date: Mon, 8 Aug 2016 21:57:40 -0400 Subject: [PATCH 075/513] qed: Remove the endian-ness conversion for pri_to_tc value. Endian-ness conversion is not needed for priority-to-TC field as the field is already being read/written by the driver in big-endian way. Signed-off-by: Sudarsana Reddy Kalluru <sudarsana.kalluru@qlogic.com> Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index d0dc28f93c0e2..68693309f59ef 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -483,7 +483,7 @@ qed_dcbx_get_ets_data(struct qed_hwfn *p_hwfn, bw_map[1] = be32_to_cpu(p_ets->tc_bw_tbl[1]); tsa_map[0] = be32_to_cpu(p_ets->tc_tsa_tbl[0]); tsa_map[1] = be32_to_cpu(p_ets->tc_tsa_tbl[1]); - pri_map = be32_to_cpu(p_ets->pri_tc_tbl[0]); + pri_map = p_ets->pri_tc_tbl[0]; for (i = 0; i < QED_MAX_PFC_PRIORITIES; i++) { p_params->ets_tc_bw_tbl[i] = ((u8 *)bw_map)[i]; p_params->ets_tc_tsa_tbl[i] = ((u8 *)tsa_map)[i]; @@ -944,7 +944,6 @@ qed_dcbx_set_ets_data(struct qed_hwfn *p_hwfn, val = (((u32)p_params->ets_pri_tc_tbl[i]) << ((7 - i) * 4)); p_ets->pri_tc_tbl[0] |= val; } - p_ets->pri_tc_tbl[0] = cpu_to_be32(p_ets->pri_tc_tbl[0]); for (i = 0; i < 2; i++) { p_ets->tc_bw_tbl[i] = cpu_to_be32(p_ets->tc_bw_tbl[i]); p_ets->tc_tsa_tbl[i] = cpu_to_be32(p_ets->tc_tsa_tbl[i]); From fb9ea8a9b70c79f38d2758c25d3acff4a2cd5bfb Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru <sudarsana.kalluru@qlogic.com> Date: Mon, 8 Aug 2016 21:57:41 -0400 Subject: [PATCH 076/513] qed: Use ieee mfw-mask to get ethtype in ieee-dcbx mode. Ethtype value is being read incorrectly in ieee-dcbx mode. Use the correct mfw mask value. Signed-off-by: Sudarsana Reddy Kalluru <sudarsana.kalluru@qlogic.com> Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 88 ++++++++++++++-------- drivers/net/ethernet/qlogic/qed/qed_hsi.h | 8 ++ 2 files changed, 66 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index 68693309f59ef..f07f0ac239f67 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -52,16 +52,33 @@ static bool qed_dcbx_app_ethtype(u32 app_info_bitmap) DCBX_APP_SF_ETHTYPE); } +static bool qed_dcbx_ieee_app_ethtype(u32 app_info_bitmap) +{ + u8 mfw_val = QED_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF_IEEE); + + /* Old MFW */ + if (mfw_val == DCBX_APP_SF_IEEE_RESERVED) + return qed_dcbx_app_ethtype(app_info_bitmap); + + return !!(mfw_val == DCBX_APP_SF_IEEE_ETHTYPE); +} + static bool qed_dcbx_app_port(u32 app_info_bitmap) { return !!(QED_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF) == DCBX_APP_SF_PORT); } -static bool qed_dcbx_default_tlv(u32 app_info_bitmap, u16 proto_id) +static bool qed_dcbx_default_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee) { - return !!(qed_dcbx_app_ethtype(app_info_bitmap) && - proto_id == QED_ETH_TYPE_DEFAULT); + bool ethtype; + + if (ieee) + ethtype = qed_dcbx_ieee_app_ethtype(app_info_bitmap); + else + ethtype = qed_dcbx_app_ethtype(app_info_bitmap); + + return !!(ethtype && (proto_id == QED_ETH_TYPE_DEFAULT)); } static bool qed_dcbx_iscsi_tlv(u32 app_info_bitmap, u16 proto_id) @@ -70,16 +87,28 @@ static bool qed_dcbx_iscsi_tlv(u32 app_info_bitmap, u16 proto_id) proto_id == QED_TCP_PORT_ISCSI); } -static bool qed_dcbx_fcoe_tlv(u32 app_info_bitmap, u16 proto_id) +static bool qed_dcbx_fcoe_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee) { - return !!(qed_dcbx_app_ethtype(app_info_bitmap) && - proto_id == QED_ETH_TYPE_FCOE); + bool ethtype; + + if (ieee) + ethtype = qed_dcbx_ieee_app_ethtype(app_info_bitmap); + else + ethtype = qed_dcbx_app_ethtype(app_info_bitmap); + + return !!(ethtype && (proto_id == QED_ETH_TYPE_FCOE)); } -static bool qed_dcbx_roce_tlv(u32 app_info_bitmap, u16 proto_id) +static bool qed_dcbx_roce_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee) { - return !!(qed_dcbx_app_ethtype(app_info_bitmap) && - proto_id == QED_ETH_TYPE_ROCE); + bool ethtype; + + if (ieee) + ethtype = qed_dcbx_ieee_app_ethtype(app_info_bitmap); + else + ethtype = qed_dcbx_app_ethtype(app_info_bitmap); + + return !!(ethtype && (proto_id == QED_ETH_TYPE_ROCE)); } static bool qed_dcbx_roce_v2_tlv(u32 app_info_bitmap, u16 proto_id) @@ -164,15 +193,15 @@ qed_dcbx_update_app_info(struct qed_dcbx_results *p_data, static bool qed_dcbx_get_app_protocol_type(struct qed_hwfn *p_hwfn, u32 app_prio_bitmap, - u16 id, enum dcbx_protocol_type *type) + u16 id, enum dcbx_protocol_type *type, bool ieee) { - if (qed_dcbx_fcoe_tlv(app_prio_bitmap, id)) { + if (qed_dcbx_fcoe_tlv(app_prio_bitmap, id, ieee)) { *type = DCBX_PROTOCOL_FCOE; - } else if (qed_dcbx_roce_tlv(app_prio_bitmap, id)) { + } else if (qed_dcbx_roce_tlv(app_prio_bitmap, id, ieee)) { *type = DCBX_PROTOCOL_ROCE; } else if (qed_dcbx_iscsi_tlv(app_prio_bitmap, id)) { *type = DCBX_PROTOCOL_ISCSI; - } else if (qed_dcbx_default_tlv(app_prio_bitmap, id)) { + } else if (qed_dcbx_default_tlv(app_prio_bitmap, id, ieee)) { *type = DCBX_PROTOCOL_ETH; } else if (qed_dcbx_roce_v2_tlv(app_prio_bitmap, id)) { *type = DCBX_PROTOCOL_ROCE_V2; @@ -194,17 +223,18 @@ static int qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn, struct qed_dcbx_results *p_data, struct dcbx_app_priority_entry *p_tbl, - u32 pri_tc_tbl, int count, bool dcbx_enabled) + u32 pri_tc_tbl, int count, u8 dcbx_version) { u8 tc, priority_map; enum dcbx_protocol_type type; + bool enable, ieee; u16 protocol_id; int priority; - bool enable; int i; DP_VERBOSE(p_hwfn, QED_MSG_DCB, "Num APP entries = %d\n", count); + ieee = (dcbx_version == DCBX_CONFIG_VERSION_IEEE); /* Parse APP TLV */ for (i = 0; i < count; i++) { protocol_id = QED_MFW_GET_FIELD(p_tbl[i].entry, @@ -219,7 +249,7 @@ qed_dcbx_process_tlv(struct qed_hwfn *p_hwfn, tc = QED_DCBX_PRIO2TC(pri_tc_tbl, priority); if (qed_dcbx_get_app_protocol_type(p_hwfn, p_tbl[i].entry, - protocol_id, &type)) { + protocol_id, &type, ieee)) { /* ETH always have the enable bit reset, as it gets * vlan information per packet. For other protocols, * should be set according to the dcbx_enabled @@ -275,15 +305,12 @@ static int qed_dcbx_process_mib_info(struct qed_hwfn *p_hwfn) struct dcbx_ets_feature *p_ets; struct qed_hw_info *p_info; u32 pri_tc_tbl, flags; - bool dcbx_enabled; + u8 dcbx_version; int num_entries; int rc = 0; - /* If DCBx version is non zero, then negotiation was - * successfuly performed - */ flags = p_hwfn->p_dcbx_info->operational.flags; - dcbx_enabled = !!QED_MFW_GET_FIELD(flags, DCBX_CONFIG_VERSION); + dcbx_version = QED_MFW_GET_FIELD(flags, DCBX_CONFIG_VERSION); p_app = &p_hwfn->p_dcbx_info->operational.features.app; p_tbl = p_app->app_pri_tbl; @@ -295,13 +322,13 @@ static int qed_dcbx_process_mib_info(struct qed_hwfn *p_hwfn) num_entries = QED_MFW_GET_FIELD(p_app->flags, DCBX_APP_NUM_ENTRIES); rc = qed_dcbx_process_tlv(p_hwfn, &data, p_tbl, pri_tc_tbl, - num_entries, dcbx_enabled); + num_entries, dcbx_version); if (rc) return rc; p_info->num_tc = QED_MFW_GET_FIELD(p_ets->flags, DCBX_ETS_MAX_TCS); data.pf_id = p_hwfn->rel_pf_id; - data.dcbx_enabled = dcbx_enabled; + data.dcbx_enabled = !!dcbx_version; qed_dcbx_dp_protocol(p_hwfn, &data); @@ -400,7 +427,7 @@ static void qed_dcbx_get_app_data(struct qed_hwfn *p_hwfn, struct dcbx_app_priority_feature *p_app, struct dcbx_app_priority_entry *p_tbl, - struct qed_dcbx_params *p_params) + struct qed_dcbx_params *p_params, bool ieee) { struct qed_app_entry *entry; u8 pri_map; @@ -422,7 +449,7 @@ qed_dcbx_get_app_data(struct qed_hwfn *p_hwfn, DCBX_APP_PROTOCOL_ID); qed_dcbx_get_app_protocol_type(p_hwfn, p_tbl[i].entry, entry->proto_id, - &entry->proto_type); + &entry->proto_type, ieee); } DP_VERBOSE(p_hwfn, QED_MSG_DCB, @@ -500,9 +527,9 @@ qed_dcbx_get_common_params(struct qed_hwfn *p_hwfn, struct dcbx_app_priority_feature *p_app, struct dcbx_app_priority_entry *p_tbl, struct dcbx_ets_feature *p_ets, - u32 pfc, struct qed_dcbx_params *p_params) + u32 pfc, struct qed_dcbx_params *p_params, bool ieee) { - qed_dcbx_get_app_data(p_hwfn, p_app, p_tbl, p_params); + qed_dcbx_get_app_data(p_hwfn, p_app, p_tbl, p_params, ieee); qed_dcbx_get_ets_data(p_hwfn, p_ets, p_params); qed_dcbx_get_pfc_data(p_hwfn, pfc, p_params); } @@ -516,7 +543,7 @@ qed_dcbx_get_local_params(struct qed_hwfn *p_hwfn, p_feat = &p_hwfn->p_dcbx_info->local_admin.features; qed_dcbx_get_common_params(p_hwfn, &p_feat->app, p_feat->app.app_pri_tbl, &p_feat->ets, - p_feat->pfc, ¶ms->local.params); + p_feat->pfc, ¶ms->local.params, false); params->local.valid = true; } @@ -529,7 +556,7 @@ qed_dcbx_get_remote_params(struct qed_hwfn *p_hwfn, p_feat = &p_hwfn->p_dcbx_info->remote.features; qed_dcbx_get_common_params(p_hwfn, &p_feat->app, p_feat->app.app_pri_tbl, &p_feat->ets, - p_feat->pfc, ¶ms->remote.params); + p_feat->pfc, ¶ms->remote.params, false); params->remote.valid = true; } @@ -574,7 +601,8 @@ qed_dcbx_get_operational_params(struct qed_hwfn *p_hwfn, qed_dcbx_get_common_params(p_hwfn, &p_feat->app, p_feat->app.app_pri_tbl, &p_feat->ets, - p_feat->pfc, ¶ms->operational.params); + p_feat->pfc, ¶ms->operational.params, + p_operational->ieee); qed_dcbx_get_priority_info(p_hwfn, &p_operational->app_prio, p_results); err = QED_MFW_GET_FIELD(p_feat->app.flags, DCBX_APP_ERROR); p_operational->err = err; diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 592784019994f..6f9d3b831a2a0 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -6850,6 +6850,14 @@ struct dcbx_app_priority_entry { #define DCBX_APP_SF_SHIFT 8 #define DCBX_APP_SF_ETHTYPE 0 #define DCBX_APP_SF_PORT 1 +#define DCBX_APP_SF_IEEE_MASK 0x0000f000 +#define DCBX_APP_SF_IEEE_SHIFT 12 +#define DCBX_APP_SF_IEEE_RESERVED 0 +#define DCBX_APP_SF_IEEE_ETHTYPE 1 +#define DCBX_APP_SF_IEEE_TCP_PORT 2 +#define DCBX_APP_SF_IEEE_UDP_PORT 3 +#define DCBX_APP_SF_IEEE_TCP_UDP_PORT 4 + #define DCBX_APP_PROTOCOL_ID_MASK 0xffff0000 #define DCBX_APP_PROTOCOL_ID_SHIFT 16 }; From 59bcb7972fc5d53a621ee6b2c3cf1654cebb3dc5 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru <sudarsana.kalluru@qlogic.com> Date: Mon, 8 Aug 2016 21:57:42 -0400 Subject: [PATCH 077/513] qed: Add dcbx app support for IEEE Selection Field. MFW now supports the Selection field for IEEE mode. Add driver changes to use the newer MFW masks to read/write the port-id value. Signed-off-by: Sudarsana Reddy Kalluru <sudarsana.kalluru@qlogic.com> Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 124 +++++++++++++++++---- include/linux/qed/qed_if.h | 8 ++ 2 files changed, 112 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index f07f0ac239f67..b157a6aba277b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -69,6 +69,17 @@ static bool qed_dcbx_app_port(u32 app_info_bitmap) DCBX_APP_SF_PORT); } +static bool qed_dcbx_ieee_app_port(u32 app_info_bitmap, u8 type) +{ + u8 mfw_val = QED_MFW_GET_FIELD(app_info_bitmap, DCBX_APP_SF_IEEE); + + /* Old MFW */ + if (mfw_val == DCBX_APP_SF_IEEE_RESERVED) + return qed_dcbx_app_port(app_info_bitmap); + + return !!(mfw_val == type || mfw_val == DCBX_APP_SF_IEEE_TCP_UDP_PORT); +} + static bool qed_dcbx_default_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee) { bool ethtype; @@ -81,10 +92,17 @@ static bool qed_dcbx_default_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee) return !!(ethtype && (proto_id == QED_ETH_TYPE_DEFAULT)); } -static bool qed_dcbx_iscsi_tlv(u32 app_info_bitmap, u16 proto_id) +static bool qed_dcbx_iscsi_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee) { - return !!(qed_dcbx_app_port(app_info_bitmap) && - proto_id == QED_TCP_PORT_ISCSI); + bool port; + + if (ieee) + port = qed_dcbx_ieee_app_port(app_info_bitmap, + DCBX_APP_SF_IEEE_TCP_PORT); + else + port = qed_dcbx_app_port(app_info_bitmap); + + return !!(port && (proto_id == QED_TCP_PORT_ISCSI)); } static bool qed_dcbx_fcoe_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee) @@ -111,10 +129,17 @@ static bool qed_dcbx_roce_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee) return !!(ethtype && (proto_id == QED_ETH_TYPE_ROCE)); } -static bool qed_dcbx_roce_v2_tlv(u32 app_info_bitmap, u16 proto_id) +static bool qed_dcbx_roce_v2_tlv(u32 app_info_bitmap, u16 proto_id, bool ieee) { - return !!(qed_dcbx_app_port(app_info_bitmap) && - proto_id == QED_UDP_PORT_TYPE_ROCE_V2); + bool port; + + if (ieee) + port = qed_dcbx_ieee_app_port(app_info_bitmap, + DCBX_APP_SF_IEEE_UDP_PORT); + else + port = qed_dcbx_app_port(app_info_bitmap); + + return !!(port && (proto_id == QED_UDP_PORT_TYPE_ROCE_V2)); } static void @@ -199,11 +224,11 @@ qed_dcbx_get_app_protocol_type(struct qed_hwfn *p_hwfn, *type = DCBX_PROTOCOL_FCOE; } else if (qed_dcbx_roce_tlv(app_prio_bitmap, id, ieee)) { *type = DCBX_PROTOCOL_ROCE; - } else if (qed_dcbx_iscsi_tlv(app_prio_bitmap, id)) { + } else if (qed_dcbx_iscsi_tlv(app_prio_bitmap, id, ieee)) { *type = DCBX_PROTOCOL_ISCSI; } else if (qed_dcbx_default_tlv(app_prio_bitmap, id, ieee)) { *type = DCBX_PROTOCOL_ETH; - } else if (qed_dcbx_roce_v2_tlv(app_prio_bitmap, id)) { + } else if (qed_dcbx_roce_v2_tlv(app_prio_bitmap, id, ieee)) { *type = DCBX_PROTOCOL_ROCE_V2; } else { *type = DCBX_MAX_PROTOCOL_TYPE; @@ -441,8 +466,39 @@ qed_dcbx_get_app_data(struct qed_hwfn *p_hwfn, DCBX_APP_NUM_ENTRIES); for (i = 0; i < DCBX_MAX_APP_PROTOCOL; i++) { entry = &p_params->app_entry[i]; - entry->ethtype = !(QED_MFW_GET_FIELD(p_tbl[i].entry, - DCBX_APP_SF)); + if (ieee) { + u8 sf_ieee; + u32 val; + + sf_ieee = QED_MFW_GET_FIELD(p_tbl[i].entry, + DCBX_APP_SF_IEEE); + switch (sf_ieee) { + case DCBX_APP_SF_IEEE_RESERVED: + /* Old MFW */ + val = QED_MFW_GET_FIELD(p_tbl[i].entry, + DCBX_APP_SF); + entry->sf_ieee = val ? + QED_DCBX_SF_IEEE_TCP_UDP_PORT : + QED_DCBX_SF_IEEE_ETHTYPE; + break; + case DCBX_APP_SF_IEEE_ETHTYPE: + entry->sf_ieee = QED_DCBX_SF_IEEE_ETHTYPE; + break; + case DCBX_APP_SF_IEEE_TCP_PORT: + entry->sf_ieee = QED_DCBX_SF_IEEE_TCP_PORT; + break; + case DCBX_APP_SF_IEEE_UDP_PORT: + entry->sf_ieee = QED_DCBX_SF_IEEE_UDP_PORT; + break; + case DCBX_APP_SF_IEEE_TCP_UDP_PORT: + entry->sf_ieee = QED_DCBX_SF_IEEE_TCP_UDP_PORT; + break; + } + } else { + entry->ethtype = !(QED_MFW_GET_FIELD(p_tbl[i].entry, + DCBX_APP_SF)); + } + pri_map = QED_MFW_GET_FIELD(p_tbl[i].entry, DCBX_APP_PRI_MAP); entry->prio = ffs(pri_map) - 1; entry->proto_id = QED_MFW_GET_FIELD(p_tbl[i].entry, @@ -981,7 +1037,7 @@ qed_dcbx_set_ets_data(struct qed_hwfn *p_hwfn, static void qed_dcbx_set_app_data(struct qed_hwfn *p_hwfn, struct dcbx_app_priority_feature *p_app, - struct qed_dcbx_params *p_params) + struct qed_dcbx_params *p_params, bool ieee) { u32 *entry; int i; @@ -1002,12 +1058,36 @@ qed_dcbx_set_app_data(struct qed_hwfn *p_hwfn, for (i = 0; i < DCBX_MAX_APP_PROTOCOL; i++) { entry = &p_app->app_pri_tbl[i].entry; - *entry &= ~DCBX_APP_SF_MASK; - if (p_params->app_entry[i].ethtype) - *entry |= ((u32)DCBX_APP_SF_ETHTYPE << - DCBX_APP_SF_SHIFT); - else - *entry |= ((u32)DCBX_APP_SF_PORT << DCBX_APP_SF_SHIFT); + if (ieee) { + *entry &= ~DCBX_APP_SF_IEEE_MASK; + switch (p_params->app_entry[i].sf_ieee) { + case QED_DCBX_SF_IEEE_ETHTYPE: + *entry |= ((u32)DCBX_APP_SF_IEEE_ETHTYPE << + DCBX_APP_SF_IEEE_SHIFT); + break; + case QED_DCBX_SF_IEEE_TCP_PORT: + *entry |= ((u32)DCBX_APP_SF_IEEE_TCP_PORT << + DCBX_APP_SF_IEEE_SHIFT); + break; + case QED_DCBX_SF_IEEE_UDP_PORT: + *entry |= ((u32)DCBX_APP_SF_IEEE_UDP_PORT << + DCBX_APP_SF_IEEE_SHIFT); + break; + case QED_DCBX_SF_IEEE_TCP_UDP_PORT: + *entry |= ((u32)DCBX_APP_SF_IEEE_TCP_UDP_PORT << + DCBX_APP_SF_IEEE_SHIFT); + break; + } + } else { + *entry &= ~DCBX_APP_SF_MASK; + if (p_params->app_entry[i].ethtype) + *entry |= ((u32)DCBX_APP_SF_ETHTYPE << + DCBX_APP_SF_SHIFT); + else + *entry |= ((u32)DCBX_APP_SF_PORT << + DCBX_APP_SF_SHIFT); + } + *entry &= ~DCBX_APP_PROTOCOL_ID_MASK; *entry |= ((u32)p_params->app_entry[i].proto_id << DCBX_APP_PROTOCOL_ID_SHIFT); @@ -1022,15 +1102,19 @@ qed_dcbx_set_local_params(struct qed_hwfn *p_hwfn, struct dcbx_local_params *local_admin, struct qed_dcbx_set *params) { + bool ieee = false; + local_admin->flags = 0; memcpy(&local_admin->features, &p_hwfn->p_dcbx_info->operational.features, sizeof(local_admin->features)); - if (params->enabled) + if (params->enabled) { local_admin->config = params->ver_num; - else + ieee = !!(params->ver_num & DCBX_CONFIG_VERSION_IEEE); + } else { local_admin->config = DCBX_CONFIG_VERSION_DISABLED; + } if (params->override_flags & QED_DCBX_OVERRIDE_PFC_CFG) qed_dcbx_set_pfc_data(p_hwfn, &local_admin->features.pfc, @@ -1042,7 +1126,7 @@ qed_dcbx_set_local_params(struct qed_hwfn *p_hwfn, if (params->override_flags & QED_DCBX_OVERRIDE_APP_CFG) qed_dcbx_set_app_data(p_hwfn, &local_admin->features.app, - ¶ms->config.params); + ¶ms->config.params, ieee); } int qed_dcbx_config_params(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index b1e3c57c7117c..d6c4177df7cb6 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -70,8 +70,16 @@ struct qed_dbcx_pfc_params { u8 max_tc; }; +enum qed_dcbx_sf_ieee_type { + QED_DCBX_SF_IEEE_ETHTYPE, + QED_DCBX_SF_IEEE_TCP_PORT, + QED_DCBX_SF_IEEE_UDP_PORT, + QED_DCBX_SF_IEEE_TCP_UDP_PORT +}; + struct qed_app_entry { bool ethtype; + enum qed_dcbx_sf_ieee_type sf_ieee; bool enabled; u8 prio; u16 proto_id; From 1d7406ce7bdfc48cd7390f793d23ef81fff75880 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru <sudarsana.kalluru@qlogic.com> Date: Mon, 8 Aug 2016 21:57:43 -0400 Subject: [PATCH 078/513] qed: Update app count when adding a new dcbx app entry to the table. App count is not updated while adding new app entry to the dcbx app table. Signed-off-by: Sudarsana Reddy Kalluru <sudarsana.kalluru@qlogic.com> Signed-off-by: Yuval Mintz <Yuval.Mintz@qlogic.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/qlogic/qed/qed_dcbx.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index b157a6aba277b..226cb08cc055f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -1707,8 +1707,10 @@ static int qed_dcbnl_setapp(struct qed_dev *cdev, if ((entry->ethtype == ethtype) && (entry->proto_id == idval)) break; /* First empty slot */ - if (!entry->proto_id) + if (!entry->proto_id) { + dcbx_set.config.params.num_app_entries++; break; + } } if (i == QED_DCBX_MAX_APP_PROTOCOL) { @@ -2228,8 +2230,10 @@ int qed_dcbnl_ieee_setapp(struct qed_dev *cdev, struct dcb_app *app) (entry->proto_id == app->protocol)) break; /* First empty slot */ - if (!entry->proto_id) + if (!entry->proto_id) { + dcbx_set.config.params.num_app_entries++; break; + } } if (i == QED_DCBX_MAX_APP_PROTOCOL) { From 6dae61627d1004895bfcee81d24482ec64cbecc5 Mon Sep 17 00:00:00 2001 From: Colin Ian King <colin.king@canonical.com> Date: Wed, 13 Jul 2016 08:36:03 +0100 Subject: [PATCH 079/513] drm/amdkfd: print doorbell offset as a hex value The doorbell offset is formatted with a 0x prefix to suggest it is a hexadecimal value, when in fact %d is being used and this is confusing. Use %X instead to match the proceeding 0x prefix. Signed-off-by: Colin Ian King <colin.king@canonical.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com> --- drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index e621eba63126a..a7d3cb3fead0f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -184,7 +184,7 @@ u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, sizeof(u32)) + inx; pr_debug("kfd: get kernel queue doorbell\n" - " doorbell offset == 0x%08d\n" + " doorbell offset == 0x%08X\n" " kernel address == 0x%08lX\n", *doorbell_off, (uintptr_t)(kfd->doorbell_kernel_ptr + inx)); From b173a28f62cf929324a8a6adcc45adadce311d16 Mon Sep 17 00:00:00 2001 From: Liping Zhang <liping.zhang@spreadtrum.com> Date: Mon, 8 Aug 2016 21:57:58 +0800 Subject: [PATCH 080/513] netfilter: nf_ct_expect: remove the redundant slash when policy name is empty The 'name' filed in struct nf_conntrack_expect_policy{} is not a pointer, so check it is NULL or not will always return true. Even if the name is empty, slash will always be displayed like follows: # cat /proc/net/nf_conntrack_expect 297 l3proto = 2 proto=6 src=1.1.1.1 dst=2.2.2.2 sport=1 dport=1025 ftp/ ^ Fixes: 3a8fc53a45c4 ("netfilter: nf_ct_helper: allocate 16 bytes for the helper and policy names") Signed-off-by: Liping Zhang <liping.zhang@spreadtrum.com> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- net/netfilter/nf_conntrack_expect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 9e3693128313f..f8dbacf66795d 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -574,7 +574,7 @@ static int exp_seq_show(struct seq_file *s, void *v) helper = rcu_dereference(nfct_help(expect->master)->helper); if (helper) { seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name); - if (helper->expect_policy[expect->class].name) + if (helper->expect_policy[expect->class].name[0]) seq_printf(s, "/%s", helper->expect_policy[expect->class].name); } From b18bcb0019cf57a3db0917b77e65c29bd9b00f54 Mon Sep 17 00:00:00 2001 From: Liping Zhang <liping.zhang@spreadtrum.com> Date: Mon, 8 Aug 2016 22:03:40 +0800 Subject: [PATCH 081/513] netfilter: nfnetlink_queue: fix memory leak when attach expectation successfully User can use NFQA_EXP to attach expectations to conntracks, but we forget to put back nf_conntrack_expect when it is inserted successfully, i.e. in this normal case, expect's use refcnt will be 3. So even we unlink it and put it back later, the use refcnt is still 1, then the memory will be leaked forever. Signed-off-by: Liping Zhang <liping.zhang@spreadtrum.com> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- net/netfilter/nf_conntrack_netlink.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 050bb3420a6ba..b9bfe64e232c6 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2362,12 +2362,8 @@ ctnetlink_glue_attach_expect(const struct nlattr *attr, struct nf_conn *ct, return PTR_ERR(exp); err = nf_ct_expect_related_report(exp, portid, report); - if (err < 0) { - nf_ct_expect_put(exp); - return err; - } - - return 0; + nf_ct_expect_put(exp); + return err; } static void ctnetlink_glue_seqadj(struct sk_buff *skb, struct nf_conn *ct, From 00a3101f561816e58de054a470484996f78eb5eb Mon Sep 17 00:00:00 2001 From: Liping Zhang <liping.zhang@spreadtrum.com> Date: Mon, 8 Aug 2016 22:07:27 +0800 Subject: [PATCH 082/513] netfilter: nfnetlink_queue: reject verdict request from different portid Like NFQNL_MSG_VERDICT_BATCH do, we should also reject the verdict request when the portid is not same with the initial portid(maybe from another process). Fixes: 97d32cf9440d ("netfilter: nfnetlink_queue: batch verdict support") Signed-off-by: Liping Zhang <liping.zhang@spreadtrum.com> Reviewed-by: Florian Westphal <fw@strlen.de> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- net/netfilter/nfnetlink_queue.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 5d36a0926b4a4..f49f45081acb2 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1145,10 +1145,8 @@ static int nfqnl_recv_verdict(struct net *net, struct sock *ctnl, struct nfnl_queue_net *q = nfnl_queue_pernet(net); int err; - queue = instance_lookup(q, queue_num); - if (!queue) - queue = verdict_instance_lookup(q, queue_num, - NETLINK_CB(skb).portid); + queue = verdict_instance_lookup(q, queue_num, + NETLINK_CB(skb).portid); if (IS_ERR(queue)) return PTR_ERR(queue); From aa0c2c68abd1e2915656cc81afdb195bc8595dec Mon Sep 17 00:00:00 2001 From: Liping Zhang <liping.zhang@spreadtrum.com> Date: Mon, 8 Aug 2016 22:10:26 +0800 Subject: [PATCH 083/513] netfilter: ctnetlink: reject new conntrack request with different l4proto Currently, user can add a conntrack with different l4proto via nfnetlink. For example, original tuple is TCP while reply tuple is SCTP. This is invalid combination, we should report EINVAL to userspace. Signed-off-by: Liping Zhang <liping.zhang@spreadtrum.com> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- net/netfilter/nf_conntrack_netlink.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index b9bfe64e232c6..fdfc71f416b7a 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1894,6 +1894,8 @@ static int ctnetlink_new_conntrack(struct net *net, struct sock *ctnl, if (!cda[CTA_TUPLE_ORIG] || !cda[CTA_TUPLE_REPLY]) return -EINVAL; + if (otuple.dst.protonum != rtuple.dst.protonum) + return -EINVAL; ct = ctnetlink_create_conntrack(net, &zone, cda, &otuple, &rtuple, u3); From 55cae7a403f3b52de3dd6e4a614582541c9631af Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Mon, 8 Aug 2016 12:13:45 +0200 Subject: [PATCH 084/513] rxrpc: fix uninitialized pointer dereference in debug code A newly added bugfix caused an uninitialized variable to be used for printing debug output. This is harmless as long as the debug setting is disabled, but otherwise leads to an immediate crash. gcc warns about this when -Wmaybe-uninitialized is enabled: net/rxrpc/call_object.c: In function 'rxrpc_release_call': net/rxrpc/call_object.c:496:163: error: 'sp' may be used uninitialized in this function [-Werror=maybe-uninitialized] The initialization was removed but one of the users remains. This adds back the initialization. Signed-off-by: Arnd Bergmann <arnd@arndb.de> Fixes: 372ee16386bb ("rxrpc: Fix races between skb free, ACK generation and replying") Signed-off-by: David Howells <dhowells@redhat.com> --- net/rxrpc/call_object.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index c47f14fc5e888..e8c953c48cb8f 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -493,6 +493,7 @@ void rxrpc_release_call(struct rxrpc_call *call) (skb = skb_dequeue(&call->rx_oos_queue))) { spin_unlock_bh(&call->lock); + sp = rxrpc_skb(skb); _debug("- zap %s %%%u #%u", rxrpc_pkts[sp->hdr.type], sp->hdr.serial, sp->hdr.seq); From c12abf346456416ca7f7ba45f363cf92d2480a99 Mon Sep 17 00:00:00 2001 From: Michael Ellerman <mpe@ellerman.id.au> Date: Tue, 9 Aug 2016 08:46:15 +1000 Subject: [PATCH 085/513] crypto: powerpc - CRYPT_CRC32C_VPMSUM should depend on ALTIVEC The optimised crc32c implementation depends on VMX (aka. Altivec) instructions, so the kernel must be built with Altivec support in order for the crc32c code to build. Fixes: 6dd7a82cc54e ("crypto: powerpc - Add POWER8 optimised crc32c") Acked-by: Anton Blanchard <anton@samba.org> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> --- crypto/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index a9377bef25e3e..84d71482bf080 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -439,7 +439,7 @@ config CRYPTO_CRC32C_INTEL config CRYPT_CRC32C_VPMSUM tristate "CRC32c CRC algorithm (powerpc64)" - depends on PPC64 + depends on PPC64 && ALTIVEC select CRYPTO_HASH select CRC32 help From a0118c8b2be9297aed8e915c60b4013326b256d4 Mon Sep 17 00:00:00 2001 From: Russell King <rmk+kernel@armlinux.org.uk> Date: Tue, 9 Aug 2016 08:27:17 +0100 Subject: [PATCH 086/513] crypto: caam - fix non-hmac hashes Since 6de62f15b581 ("crypto: algif_hash - Require setkey before accept(2)"), the AF_ALG interface requires userspace to provide a key to any algorithm that has a setkey method. However, the non-HMAC algorithms are not keyed, so setting a key is unnecessary. Fix this by removing the setkey method from the non-keyed hash algorithms. Fixes: 6de62f15b581 ("crypto: algif_hash - Require setkey before accept(2)") Cc: <stable@vger.kernel.org> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> --- drivers/crypto/caam/caamhash.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index f1ecc8df8d41e..36365b3efdfdc 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -1898,6 +1898,7 @@ caam_hash_alloc(struct caam_hash_template *template, template->name); snprintf(alg->cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s", template->driver_name); + t_alg->ahash_alg.setkey = NULL; } alg->cra_module = THIS_MODULE; alg->cra_init = caam_hash_cra_init; From 6bb47e8ab98accb1319bd43c64966340ba3bba9a Mon Sep 17 00:00:00 2001 From: Viresh Kumar <viresh.kumar@linaro.org> Date: Thu, 4 Aug 2016 13:32:22 -0700 Subject: [PATCH 087/513] usb: hub: Fix unbalanced reference count/memory leak/deadlocks Memory leak and unbalanced reference count: If the hub gets disconnected while the core is still activating it, this can result in leaking memory of few USB structures. This will happen if we have done a kref_get() from hub_activate() and scheduled a delayed work item for HUB_INIT2/3. Now if hub_disconnect() gets called before the delayed work expires, then we will cancel the work from hub_quiesce(), but wouldn't do a kref_put(). And so the unbalance. kmemleak reports this as (with the commit e50293ef9775 backported to 3.10 kernel with other changes, though the same is true for mainline as well): unreferenced object 0xffffffc08af5b800 (size 1024): comm "khubd", pid 73, jiffies 4295051211 (age 6482.350s) hex dump (first 32 bytes): 30 68 f3 8c c0 ff ff ff 00 a0 b2 2e c0 ff ff ff 0h.............. 01 00 00 00 00 00 00 00 00 94 7d 40 c0 ff ff ff ..........}@.... backtrace: [<ffffffc0003079ec>] create_object+0x148/0x2a0 [<ffffffc000cc150c>] kmemleak_alloc+0x80/0xbc [<ffffffc000303a7c>] kmem_cache_alloc_trace+0x120/0x1ac [<ffffffc0006fa610>] hub_probe+0x120/0xb84 [<ffffffc000702b20>] usb_probe_interface+0x1ec/0x298 [<ffffffc0005d50cc>] driver_probe_device+0x160/0x374 [<ffffffc0005d5308>] __device_attach+0x28/0x4c [<ffffffc0005d3164>] bus_for_each_drv+0x78/0xac [<ffffffc0005d4ee0>] device_attach+0x6c/0x9c [<ffffffc0005d42b8>] bus_probe_device+0x28/0xa0 [<ffffffc0005d23a4>] device_add+0x324/0x604 [<ffffffc000700fcc>] usb_set_configuration+0x660/0x6cc [<ffffffc00070a350>] generic_probe+0x44/0x84 [<ffffffc000702914>] usb_probe_device+0x54/0x74 [<ffffffc0005d50cc>] driver_probe_device+0x160/0x374 [<ffffffc0005d5308>] __device_attach+0x28/0x4c Deadlocks: If the hub gets disconnected early enough (i.e. before INIT2/INIT3 are finished and the init_work is still queued), the core may call hub_quiesce() after acquiring interface device locks and it will wait for the work to be cancelled synchronously. But if the work handler is already running in parallel, it may try to acquire the same interface device lock and this may result in deadlock. Fix both the issues by removing the call to cancel_delayed_work_sync(). CC: <stable@vger.kernel.org> #4.4+ Fixes: e50293ef9775 ("USB: fix invalid memory access in hub_activate()") Reported-by: Manu Gautam <mgautam@codeaurora.org> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> Acked-by: Alan Stern <stern@rowland.harvard.edu> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/usb/core/hub.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index bee13517676f9..3ccffac0f6472 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1315,8 +1315,6 @@ static void hub_quiesce(struct usb_hub *hub, enum hub_quiescing_type type) struct usb_device *hdev = hub->hdev; int i; - cancel_delayed_work_sync(&hub->init_work); - /* hub_wq and related activity won't re-trigger */ hub->quiescing = 1; From ca5cbc8b02f9b21cc8cd1ab36668763ec34f9ee8 Mon Sep 17 00:00:00 2001 From: Alan Stern <stern@rowland.harvard.edu> Date: Fri, 5 Aug 2016 11:49:45 -0400 Subject: [PATCH 088/513] USB: hub: fix up early-exit pathway in hub_activate The early-exit pathway in hub_activate, added by commit e50293ef9775 ("USB: fix invalid memory access in hub_activate()") needs improvement. It duplicates code that is already present at the end of the subroutine, and it neglects to undo the effect of a usb_autopm_get_interface_no_resume() call. This patch fixes both problems by making the early-exit pathway jump directly to the end of the subroutine. It simplifies the code at the end by merging two conditionals that actually test the same condition although they appear different: If type < HUB_INIT3 then type must be either HUB_INIT2 or HUB_INIT, and it can't be HUB_INIT because in that case the subroutine would have exited earlier. Signed-off-by: Alan Stern <stern@rowland.harvard.edu> CC: <stable@vger.kernel.org> #4.4+ Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/usb/core/hub.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index 3ccffac0f6472..bb69d262b6f95 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1055,11 +1055,8 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) device_lock(hub->intfdev); /* Was the hub disconnected while we were waiting? */ - if (hub->disconnected) { - device_unlock(hub->intfdev); - kref_put(&hub->kref, hub_release); - return; - } + if (hub->disconnected) + goto disconnected; if (type == HUB_INIT2) goto init2; goto init3; @@ -1281,12 +1278,12 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) /* Scan all ports that need attention */ kick_hub_wq(hub); - /* Allow autosuspend if it was suppressed */ - if (type <= HUB_INIT3) + if (type == HUB_INIT2 || type == HUB_INIT3) { + /* Allow autosuspend if it was suppressed */ + disconnected: usb_autopm_put_interface_async(to_usb_interface(hub->intfdev)); - - if (type == HUB_INIT2 || type == HUB_INIT3) device_unlock(hub->intfdev); + } kref_put(&hub->kref, hub_release); } From 07d316a22e119fa301fd7dba7f1e1adfd4f72c05 Mon Sep 17 00:00:00 2001 From: Alan Stern <stern@rowland.harvard.edu> Date: Fri, 5 Aug 2016 11:51:30 -0400 Subject: [PATCH 089/513] USB: hub: change the locking in hub_activate The locking in hub_activate() is not adequate to provide full mutual exclusion with hub_quiesce(). The subroutine locks the hub's usb_interface, but the callers of hub_quiesce() (such as hub_pre_reset() and hub_event()) hold the lock to the hub's usb_device. This patch changes hub_activate() to make it acquire the same lock as those other routines. Signed-off-by: Alan Stern <stern@rowland.harvard.edu> CC: <stable@vger.kernel.org> #4.4+ Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/usb/core/hub.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index bb69d262b6f95..1d5fc32d06d00 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1052,7 +1052,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) /* Continue a partial initialization */ if (type == HUB_INIT2 || type == HUB_INIT3) { - device_lock(hub->intfdev); + device_lock(&hdev->dev); /* Was the hub disconnected while we were waiting? */ if (hub->disconnected) @@ -1259,7 +1259,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) queue_delayed_work(system_power_efficient_wq, &hub->init_work, msecs_to_jiffies(delay)); - device_unlock(hub->intfdev); + device_unlock(&hdev->dev); return; /* Continues at init3: below */ } else { msleep(delay); @@ -1282,7 +1282,7 @@ static void hub_activate(struct usb_hub *hub, enum hub_activation_type type) /* Allow autosuspend if it was suppressed */ disconnected: usb_autopm_put_interface_async(to_usb_interface(hub->intfdev)); - device_unlock(hub->intfdev); + device_unlock(&hdev->dev); } kref_put(&hub->kref, hub_release); From 28324936f3d672bbf83472fece8f36a158a52276 Mon Sep 17 00:00:00 2001 From: Peter Chen <peter.chen@nxp.com> Date: Tue, 26 Jul 2016 16:01:30 +0800 Subject: [PATCH 090/513] usb: misc: usbtest: usbtest_do_ioctl may return positive integer For case 14 and case 21, their correct return value is the number of bytes transferred, so it is a positive integer. But in usbtest_ioctl, it takes non-zero as false return value for usbtest_do_ioctl, so it will treat the correct test as wrong test, then the time on tests will be the minus value. Signed-off-by: Peter Chen <peter.chen@nxp.com> Cc: stable <stable@vger.kernel.org> Fixes: 18fc4ebdc705 ("usb: misc: usbtest: Remove timeval usage") Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/usb/misc/usbtest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c index 6b978f04b8d72..5e3464e8d4e5a 100644 --- a/drivers/usb/misc/usbtest.c +++ b/drivers/usb/misc/usbtest.c @@ -2602,7 +2602,7 @@ usbtest_ioctl(struct usb_interface *intf, unsigned int code, void *buf) ktime_get_ts64(&start); retval = usbtest_do_ioctl(intf, param_32); - if (retval) + if (retval < 0) goto free_mutex; ktime_get_ts64(&end); From 9c6256a5e707a9eb8b91962b550050b13aa75334 Mon Sep 17 00:00:00 2001 From: Xiao Han <xiao.han@orange.fr> Date: Tue, 14 Jun 2016 16:22:54 +0200 Subject: [PATCH 091/513] usb: misc: ftdi-elan: Fix off-by-one memory corruptions This patch fixes fives off-by-one bugs in the ftdi-elan driver code. The bug can be triggered by plugging a USB adapter for CardBus 3G cards (model U132 manufactured by Elan Digital Systems, Ltd), causing a kernel panic. The fix was tested on Ubuntu 14.04.4 with 4.7.0-rc14.2.0-27-generic+ and 4.4.0-22-generic+ kernel. In the ftdi_elan_synchronize function, an off-by-one memory corruption occurs when packet_bytes is equal or bigger than m. After having read m bytes, that is bytes_read is equal to m, " ..\x00" is still copied to the stack variable causing an out bounds write of 4 bytes, which overwrites the stack canary and results in a kernel panic. This off-by-one requires physical access to the machine. It is not exploitable since we have no control on the overwritten data. Similar off-by-one bugs have been observed in 4 other functions: ftdi_elan_stuck_waiting, ftdi_elan_read, ftdi_elan_edset_output and ftdi_elan_flush_input_fifo. Reported-by: Alex Palesandro <palexster@gmail.com> Signed-off-by: Xiao Han <xiao.han@orange.fr> Tested-by: Paul Chaignon <pchaigno@gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/usb/misc/ftdi-elan.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/usb/misc/ftdi-elan.c b/drivers/usb/misc/ftdi-elan.c index 52c27cab78c3e..9b5b3b2281cae 100644 --- a/drivers/usb/misc/ftdi-elan.c +++ b/drivers/usb/misc/ftdi-elan.c @@ -665,7 +665,7 @@ static ssize_t ftdi_elan_read(struct file *file, char __user *buffer, { char data[30 *3 + 4]; char *d = data; - int m = (sizeof(data) - 1) / 3; + int m = (sizeof(data) - 1) / 3 - 1; int bytes_read = 0; int retry_on_empty = 10; int retry_on_timeout = 5; @@ -1684,7 +1684,7 @@ wait:if (ftdi->disconnected > 0) { int i = 0; char data[30 *3 + 4]; char *d = data; - int m = (sizeof(data) - 1) / 3; + int m = (sizeof(data) - 1) / 3 - 1; int l = 0; struct u132_target *target = &ftdi->target[ed]; struct u132_command *command = &ftdi->command[ @@ -1876,7 +1876,7 @@ more:{ if (packet_bytes > 2) { char diag[30 *3 + 4]; char *d = diag; - int m = (sizeof(diag) - 1) / 3; + int m = (sizeof(diag) - 1) / 3 - 1; char *b = ftdi->bulk_in_buffer; int bytes_read = 0; diag[0] = 0; @@ -2053,7 +2053,7 @@ static int ftdi_elan_synchronize(struct usb_ftdi *ftdi) if (packet_bytes > 2) { char diag[30 *3 + 4]; char *d = diag; - int m = (sizeof(diag) - 1) / 3; + int m = (sizeof(diag) - 1) / 3 - 1; char *b = ftdi->bulk_in_buffer; int bytes_read = 0; unsigned char c = 0; @@ -2155,7 +2155,7 @@ more:{ if (packet_bytes > 2) { char diag[30 *3 + 4]; char *d = diag; - int m = (sizeof(diag) - 1) / 3; + int m = (sizeof(diag) - 1) / 3 - 1; char *b = ftdi->bulk_in_buffer; int bytes_read = 0; diag[0] = 0; From 9a8a5dcf20eee254ce490f12d579ef80ee776eb6 Mon Sep 17 00:00:00 2001 From: Joerg Roedel <jroedel@suse.de> Date: Tue, 9 Aug 2016 15:46:46 +0200 Subject: [PATCH 092/513] iommu/mediatek: Mark static functions in headers inline This was an oversight while merging these functions. Fix it. Cc: Honghui Zhang <honghui.zhang@mediatek.com> Fixes: 9ca340c98c0d ('iommu/mediatek: move the common struct into header file') Signed-off-by: Joerg Roedel <jroedel@suse.de> --- drivers/iommu/mtk_iommu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index 9ed0a8462ccf2..3dab13b4a2112 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -55,19 +55,19 @@ struct mtk_iommu_data { bool enable_4GB; }; -static int compare_of(struct device *dev, void *data) +static inline int compare_of(struct device *dev, void *data) { return dev->of_node == data; } -static int mtk_iommu_bind(struct device *dev) +static inline int mtk_iommu_bind(struct device *dev) { struct mtk_iommu_data *data = dev_get_drvdata(dev); return component_bind_all(dev, &data->smi_imu); } -static void mtk_iommu_unbind(struct device *dev) +static inline void mtk_iommu_unbind(struct device *dev) { struct mtk_iommu_data *data = dev_get_drvdata(dev); From aed9d65ac3278d4febd8665bd7db59ef53e825fe Mon Sep 17 00:00:00 2001 From: Alan Stern <stern@rowland.harvard.edu> Date: Mon, 1 Aug 2016 15:25:56 -0400 Subject: [PATCH 093/513] USB: validate wMaxPacketValue entries in endpoint descriptors Erroneous or malicious endpoint descriptors may have non-zero bits in reserved positions, or out-of-bounds values. This patch helps prevent these from causing problems by bounds-checking the wMaxPacketValue entries in endpoint descriptors and capping the values at the maximum allowed. This issue was first discovered and tests were conducted by Jake Lamberson <jake.lamberson1@gmail.com>, an intern working for Rosie Hall. Signed-off-by: Alan Stern <stern@rowland.harvard.edu> Reported-by: roswest <roswest@cisco.com> Tested-by: roswest <roswest@cisco.com> CC: <stable@vger.kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/usb/core/config.c | 66 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c index 31ccdccd7a04f..051163189810d 100644 --- a/drivers/usb/core/config.c +++ b/drivers/usb/core/config.c @@ -171,6 +171,31 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno, ep, buffer, size); } +static const unsigned short low_speed_maxpacket_maxes[4] = { + [USB_ENDPOINT_XFER_CONTROL] = 8, + [USB_ENDPOINT_XFER_ISOC] = 0, + [USB_ENDPOINT_XFER_BULK] = 0, + [USB_ENDPOINT_XFER_INT] = 8, +}; +static const unsigned short full_speed_maxpacket_maxes[4] = { + [USB_ENDPOINT_XFER_CONTROL] = 64, + [USB_ENDPOINT_XFER_ISOC] = 1023, + [USB_ENDPOINT_XFER_BULK] = 64, + [USB_ENDPOINT_XFER_INT] = 64, +}; +static const unsigned short high_speed_maxpacket_maxes[4] = { + [USB_ENDPOINT_XFER_CONTROL] = 64, + [USB_ENDPOINT_XFER_ISOC] = 1024, + [USB_ENDPOINT_XFER_BULK] = 512, + [USB_ENDPOINT_XFER_INT] = 1023, +}; +static const unsigned short super_speed_maxpacket_maxes[4] = { + [USB_ENDPOINT_XFER_CONTROL] = 512, + [USB_ENDPOINT_XFER_ISOC] = 1024, + [USB_ENDPOINT_XFER_BULK] = 1024, + [USB_ENDPOINT_XFER_INT] = 1024, +}; + static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, int asnum, struct usb_host_interface *ifp, int num_ep, unsigned char *buffer, int size) @@ -179,6 +204,8 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, struct usb_endpoint_descriptor *d; struct usb_host_endpoint *endpoint; int n, i, j, retval; + unsigned int maxp; + const unsigned short *maxpacket_maxes; d = (struct usb_endpoint_descriptor *) buffer; buffer += d->bLength; @@ -286,6 +313,42 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, endpoint->desc.wMaxPacketSize = cpu_to_le16(8); } + /* Validate the wMaxPacketSize field */ + maxp = usb_endpoint_maxp(&endpoint->desc); + + /* Find the highest legal maxpacket size for this endpoint */ + i = 0; /* additional transactions per microframe */ + switch (to_usb_device(ddev)->speed) { + case USB_SPEED_LOW: + maxpacket_maxes = low_speed_maxpacket_maxes; + break; + case USB_SPEED_FULL: + maxpacket_maxes = full_speed_maxpacket_maxes; + break; + case USB_SPEED_HIGH: + /* Bits 12..11 are allowed only for HS periodic endpoints */ + if (usb_endpoint_xfer_int(d) || usb_endpoint_xfer_isoc(d)) { + i = maxp & (BIT(12) | BIT(11)); + maxp &= ~i; + } + /* fallthrough */ + default: + maxpacket_maxes = high_speed_maxpacket_maxes; + break; + case USB_SPEED_SUPER: + case USB_SPEED_SUPER_PLUS: + maxpacket_maxes = super_speed_maxpacket_maxes; + break; + } + j = maxpacket_maxes[usb_endpoint_type(&endpoint->desc)]; + + if (maxp > j) { + dev_warn(ddev, "config %d interface %d altsetting %d endpoint 0x%X has invalid maxpacket %d, setting to %d\n", + cfgno, inum, asnum, d->bEndpointAddress, maxp, j); + maxp = j; + endpoint->desc.wMaxPacketSize = cpu_to_le16(i | maxp); + } + /* * Some buggy high speed devices have bulk endpoints using * maxpacket sizes other than 512. High speed HCDs may not @@ -293,9 +356,6 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum, */ if (to_usb_device(ddev)->speed == USB_SPEED_HIGH && usb_endpoint_xfer_bulk(d)) { - unsigned maxp; - - maxp = usb_endpoint_maxp(&endpoint->desc) & 0x07ff; if (maxp != 512) dev_warn(ddev, "config %d interface %d altsetting %d " "bulk endpoint 0x%X has invalid maxpacket %d\n", From bc337b51508beb2d039aff5074a76cfe1c212030 Mon Sep 17 00:00:00 2001 From: Marc Ohlf <ohlf@mkt-sys.de> Date: Wed, 3 Aug 2016 11:51:54 +0200 Subject: [PATCH 094/513] usb: ehci: change order of register cleanup during shutdown In ehci_turn_off_all_ports() all EHCI port registers are cleared to zero. On some hardware, this can lead to an system hang, when ehci_port_power() accesses the already cleared registers. This patch changes the order of cleanup. First call ehci_port_power() which respects the current bits in port status registers and afterwards cleanup the hard way by setting everything to zero. Signed-off-by: Marc Ohlf <ohlf@mkt-sys.de> Acked-by: Alan Stern <stern@rowland.harvard.edu> CC: <stable@vger.kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/usb/host/ehci-hcd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c index a962b89b65a66..1e5f529d51a21 100644 --- a/drivers/usb/host/ehci-hcd.c +++ b/drivers/usb/host/ehci-hcd.c @@ -332,11 +332,11 @@ static void ehci_turn_off_all_ports(struct ehci_hcd *ehci) int port = HCS_N_PORTS(ehci->hcs_params); while (port--) { - ehci_writel(ehci, PORT_RWC_BITS, - &ehci->regs->port_status[port]); spin_unlock_irq(&ehci->lock); ehci_port_power(ehci, port, false); spin_lock_irq(&ehci->lock); + ehci_writel(ehci, PORT_RWC_BITS, + &ehci->regs->port_status[port]); } } From 70f7ca9a0262784d0b80727860a63d64ab228e7b Mon Sep 17 00:00:00 2001 From: Jiri Slaby <jslaby@suse.cz> Date: Wed, 15 Jun 2016 15:56:11 +0200 Subject: [PATCH 095/513] usb: devio, do not warn when allocation fails usbdev_mmap allocates a buffer. The size of the buffer is determined by a user. So with this code (no need to be root): int fd = open("/dev/bus/usb/001/001", O_RDONLY); mmap(NULL, 0x800000, PROT_READ, MAP_SHARED, fd, 0); we can see a warning: WARNING: CPU: 0 PID: 21771 at ../mm/page_alloc.c:3563 __alloc_pages_slowpath+0x1036/0x16e0() ... Call Trace: [<ffffffff8117a3ae>] ? warn_slowpath_null+0x2e/0x40 [<ffffffff815178b6>] ? __alloc_pages_slowpath+0x1036/0x16e0 [<ffffffff81516880>] ? warn_alloc_failed+0x250/0x250 [<ffffffff8151226b>] ? get_page_from_freelist+0x75b/0x28b0 [<ffffffff815184e3>] ? __alloc_pages_nodemask+0x583/0x6b0 [<ffffffff81517f60>] ? __alloc_pages_slowpath+0x16e0/0x16e0 [<ffffffff810565d4>] ? dma_generic_alloc_coherent+0x104/0x220 [<ffffffffa0269e56>] ? hcd_buffer_alloc+0x1d6/0x3e0 [usbcore] [<ffffffffa0269c80>] ? hcd_buffer_destroy+0xa0/0xa0 [usbcore] [<ffffffffa0228f05>] ? usb_alloc_coherent+0x65/0x90 [usbcore] [<ffffffffa0275c05>] ? usbdev_mmap+0x1a5/0x770 [usbcore] ... Allocations like this one should be marked as __GFP_NOWARN. So do so. The size could be also clipped by something like: if (size >= (1 << (MAX_ORDER + PAGE_SHIFT - 1))) return -ENOMEM; But I think the overall limit of 16M (by usbfs_increase_memory_usage) is enough, so that we only silence the warning here. Signed-off-by: Jiri Slaby <jslaby@suse.cz> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Alan Stern <stern@rowland.harvard.edu> Cc: Steinar H. Gunderson <sesse@google.com> Cc: Markus Rechberger <mrechberger@gmail.com> Fixes: f7d34b445a (USB: Add support for usbfs zerocopy.) Cc: 4.6+ <stable@vger.kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/usb/core/devio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index e9f5043a2167c..472cbcdf74560 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -241,7 +241,8 @@ static int usbdev_mmap(struct file *file, struct vm_area_struct *vma) goto error_decrease_mem; } - mem = usb_alloc_coherent(ps->dev, size, GFP_USER, &dma_handle); + mem = usb_alloc_coherent(ps->dev, size, GFP_USER | __GFP_NOWARN, + &dma_handle); if (!mem) { ret = -ENOMEM; goto error_free_usbm; From 5cce438298a0d2a7a857a4a3c3e26aeb8f77b941 Mon Sep 17 00:00:00 2001 From: Alan Stern <stern@rowland.harvard.edu> Date: Fri, 10 Jun 2016 14:42:55 -0400 Subject: [PATCH 096/513] USB: remove race condition in usbfs/libusb when using reap-after-disconnect Hans de Goede has reported a difficulty in the Linux port of libusb. When a device is removed, the poll() system call in usbfs starts returning POLLERR as soon as udev->state is set to USB_STATE_NOTATTACHED, but the outstanding URBs are not available for reaping until some time later (after usbdev_remove() has been called). This is awkward for libusb or other usbfs clients, although not an insuperable problem. At any rate, it's easy to change usbfs so that it returns POLLHUP as soon as the state becomes USB_STATE_NOTATTACHED but it doesn't return POLLERR until after the outstanding URBs have completed. That's what this patch does; it uses the fact that ps->list is always on the dev->filelist list until usbdev_remove() takes it off, which happens after all the outstanding URBs have been cancelled. Signed-off-by: Alan Stern <stern@rowland.harvard.edu> Reported-by: Hans de Goede <hdegoede@redhat.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/usb/core/devio.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 472cbcdf74560..e6a6d67c87058 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -2583,7 +2583,9 @@ static unsigned int usbdev_poll(struct file *file, if (file->f_mode & FMODE_WRITE && !list_empty(&ps->async_completed)) mask |= POLLOUT | POLLWRNORM; if (!connected(ps)) - mask |= POLLERR | POLLHUP; + mask |= POLLHUP; + if (list_empty(&ps->list)) + mask |= POLLERR; return mask; } From 59b71f774fc2ec2d985251e72fde0f9f88164547 Mon Sep 17 00:00:00 2001 From: Jaewon Kim <jaewon02.kim@gmail.com> Date: Thu, 21 Jul 2016 22:20:53 +0900 Subject: [PATCH 097/513] usb: host: max3421-hcd: fix mask of IO control register GPIO control register is divided into IOPINS1 and IOPINS2. And low 4-bit of register is controls output. So, this patch fixes wrong mask of GPIO output. Signed-off-by: Jaewon Kim <jaewon02.kim@samsung.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/usb/host/max3421-hcd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/host/max3421-hcd.c b/drivers/usb/host/max3421-hcd.c index c369c29e496d7..2f7690092a7ff 100644 --- a/drivers/usb/host/max3421-hcd.c +++ b/drivers/usb/host/max3421-hcd.c @@ -1675,7 +1675,7 @@ max3421_gpout_set_value(struct usb_hcd *hcd, u8 pin_number, u8 value) if (pin_number > 7) return; - mask = 1u << pin_number; + mask = 1u << (pin_number % 4); idx = pin_number / 4; if (value) From fd837b08d98c0c9f4f31998f2ed55b9d8694082c Mon Sep 17 00:00:00 2001 From: Andre Przywara <andre.przywara@arm.com> Date: Mon, 8 Aug 2016 17:29:28 +0100 Subject: [PATCH 098/513] KVM: arm64: ITS: return 1 on successful MSI injection According to the KVM API documentation a successful MSI injection should return a value > 0 on success. Return possible errors in vgic_its_trigger_msi() and report a successful injection back to userland, while also reporting the case where the MSI could not be delivered due to the guest not having the LPI mapped, for instance. Signed-off-by: Andre Przywara <andre.przywara@arm.com> Reviewed-by: Eric Auger <eric.auger@redhat.com> Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> --- include/linux/irqchip/arm-gic-v3.h | 1 + virt/kvm/arm/vgic/vgic-its.c | 56 ++++++++++++++++++++---------- 2 files changed, 38 insertions(+), 19 deletions(-) diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 56b0b7ec66aac..99ac022edc606 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -337,6 +337,7 @@ */ #define E_ITS_MOVI_UNMAPPED_INTERRUPT 0x010107 #define E_ITS_MOVI_UNMAPPED_COLLECTION 0x010109 +#define E_ITS_INT_UNMAPPED_INTERRUPT 0x010307 #define E_ITS_CLEAR_UNMAPPED_INTERRUPT 0x010507 #define E_ITS_MAPD_DEVICE_OOR 0x010801 #define E_ITS_MAPC_PROCNUM_OOR 0x010902 diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 07411cf967b98..1bd8adbeae260 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -441,39 +441,48 @@ static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm, * Find the target VCPU and the LPI number for a given devid/eventid pair * and make this IRQ pending, possibly injecting it. * Must be called with the its_lock mutex held. + * Returns 0 on success, a positive error value for any ITS mapping + * related errors and negative error values for generic errors. */ -static void vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, - u32 devid, u32 eventid) +static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, + u32 devid, u32 eventid) { + struct kvm_vcpu *vcpu; struct its_itte *itte; if (!its->enabled) - return; + return -EBUSY; itte = find_itte(its, devid, eventid); - /* Triggering an unmapped IRQ gets silently dropped. */ - if (itte && its_is_collection_mapped(itte->collection)) { - struct kvm_vcpu *vcpu; - - vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr); - if (vcpu && vcpu->arch.vgic_cpu.lpis_enabled) { - spin_lock(&itte->irq->irq_lock); - itte->irq->pending = true; - vgic_queue_irq_unlock(kvm, itte->irq); - } - } + if (!itte || !its_is_collection_mapped(itte->collection)) + return E_ITS_INT_UNMAPPED_INTERRUPT; + + vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr); + if (!vcpu) + return E_ITS_INT_UNMAPPED_INTERRUPT; + + if (!vcpu->arch.vgic_cpu.lpis_enabled) + return -EBUSY; + + spin_lock(&itte->irq->irq_lock); + itte->irq->pending = true; + vgic_queue_irq_unlock(kvm, itte->irq); + + return 0; } /* * Queries the KVM IO bus framework to get the ITS pointer from the given * doorbell address. * We then call vgic_its_trigger_msi() with the decoded data. + * According to the KVM_SIGNAL_MSI API description returns 1 on success. */ int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi) { u64 address; struct kvm_io_device *kvm_io_dev; struct vgic_io_device *iodev; + int ret; if (!vgic_has_its(kvm)) return -ENODEV; @@ -490,10 +499,21 @@ int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi) iodev = container_of(kvm_io_dev, struct vgic_io_device, dev); mutex_lock(&iodev->its->its_lock); - vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data); + ret = vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data); mutex_unlock(&iodev->its->its_lock); - return 0; + if (ret < 0) + return ret; + + /* + * KVM_SIGNAL_MSI demands a return value > 0 for success and 0 + * if the guest has blocked the MSI. So we map any LPI mapping + * related error to that. + */ + if (ret) + return 0; + else + return 1; } /* Requires the its_lock to be held. */ @@ -981,9 +1001,7 @@ static int vgic_its_cmd_handle_int(struct kvm *kvm, struct vgic_its *its, u32 msi_data = its_cmd_get_id(its_cmd); u64 msi_devid = its_cmd_get_deviceid(its_cmd); - vgic_its_trigger_msi(kvm, its, msi_devid, msi_data); - - return 0; + return vgic_its_trigger_msi(kvm, its, msi_devid, msi_data); } /* From 3ec60043f7c02e1f79e4a90045ff2d2e80042941 Mon Sep 17 00:00:00 2001 From: Robin Murphy <robin.murphy@arm.com> Date: Tue, 9 Aug 2016 16:23:17 +0100 Subject: [PATCH 099/513] iommu/dma: Don't put uninitialised IOVA domains Due to the limitations of having to wait until we see a device's DMA restrictions before we know how we want an IOVA domain initialised, there is a window for error if a DMA ops domain is allocated but later freed without ever being used. In that case, init_iova_domain() was never called, so calling put_iova_domain() from iommu_put_dma_cookie() ends up trying to take an uninitialised lock and crashing. Make things robust by skipping the call unless the IOVA domain actually has been initialised, as we probably should have done from the start. Fixes: 0db2e5d18f76 ("iommu: Implement common IOMMU ops for DMA mapping") Cc: stable@vger.kernel.org Reported-by: Nate Watterson <nwatters@codeaurora.org> Reviewed-by: Nate Watterson <nwatters@codeaurora.org> Tested-by: Nate Watterson <nwatters@codeaurora.org> Reviewed-by: Eric Auger <eric.auger@redhat.com> Tested-by: Eric Auger <eric.auger@redhat.com> Signed-off-by: Robin Murphy <robin.murphy@arm.com> Signed-off-by: Joerg Roedel <jroedel@suse.de> --- drivers/iommu/dma-iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 08a1e2f3690f1..7d991c81c4fa1 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -68,7 +68,8 @@ void iommu_put_dma_cookie(struct iommu_domain *domain) if (!iovad) return; - put_iova_domain(iovad); + if (iovad->granule) + put_iova_domain(iovad); kfree(iovad); domain->iova_cookie = NULL; } From 17b963e319449f709e78dc1ef445d797a13eecbc Mon Sep 17 00:00:00 2001 From: David Howells <dhowells@redhat.com> Date: Mon, 8 Aug 2016 13:06:41 +0100 Subject: [PATCH 100/513] rxrpc: Need to flag call as being released on connect failure If rxrpc_new_client_call() fails to make a connection, the call record that it allocated needs to be marked as RXRPC_CALL_RELEASED before it is passed to rxrpc_put_call() to indicate that it no longer has any attachment to the AF_RXRPC socket. Without this, an assertion failure may occur at: net/rxrpc/call_object:635 Signed-off-by: David Howells <dhowells@redhat.com> --- net/rxrpc/call_object.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index e8c953c48cb8f..ae057e0740f3d 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -275,6 +275,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, list_del_init(&call->link); write_unlock_bh(&rxrpc_call_lock); + set_bit(RXRPC_CALL_RELEASED, &call->flags); call->state = RXRPC_CALL_DEAD; rxrpc_put_call(call); _leave(" = %d", ret); @@ -287,6 +288,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, */ found_user_ID_now_present: write_unlock(&rx->call_lock); + set_bit(RXRPC_CALL_RELEASED, &call->flags); call->state = RXRPC_CALL_DEAD; rxrpc_put_call(call); _leave(" = -EEXIST [%p]", call); From f9dc575725baeaad8eb5262589f9aebeeaf13433 Mon Sep 17 00:00:00 2001 From: David Howells <dhowells@redhat.com> Date: Mon, 8 Aug 2016 10:27:26 +0100 Subject: [PATCH 101/513] rxrpc: Don't access connection from call if pointer is NULL The call state machine processor sets up the message parameters for a UDP message that it might need to transmit in advance on the basis that there's a very good chance it's going to have to transmit either an ACK or an ABORT. This requires it to look in the connection struct to retrieve some of the parameters. However, if the call is complete, the call connection pointer may be NULL to dissuade the processor from transmitting a message. However, there are some situations where the processor is still going to be called - and it's still going to set up message parameters whether it needs them or not. This results in a NULL pointer dereference at: net/rxrpc/call_event.c:837 To fix this, skip the message pre-initialisation if there's no connection attached. Signed-off-by: David Howells <dhowells@redhat.com> --- net/rxrpc/call_event.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index f5e99163a09e0..e60cf65c22323 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -837,6 +837,9 @@ void rxrpc_process_call(struct work_struct *work) return; } + if (!call->conn) + goto skip_msg_init; + /* there's a good chance we're going to have to send a message, so set * one up in advance */ msg.msg_name = &call->conn->params.peer->srx.transport; @@ -859,6 +862,7 @@ void rxrpc_process_call(struct work_struct *work) memset(iov, 0, sizeof(iov)); iov[0].iov_base = &whdr; iov[0].iov_len = sizeof(whdr); +skip_msg_init: /* deal with events of a final nature */ if (test_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events)) { From 2e7e9758b2b680fa615d5cf70b7af416e96162d2 Mon Sep 17 00:00:00 2001 From: David Howells <dhowells@redhat.com> Date: Tue, 9 Aug 2016 10:11:48 +0100 Subject: [PATCH 102/513] rxrpc: Once packet posted in data_ready, don't retry posting Once a packet has been posted to a connection in the data_ready handler, we mustn't try reposting if we then find that the connection is dying as the refcount has been given over to the dying connection and the packet might no longer exist. Losing the packet isn't a problem as the peer will retransmit. Signed-off-by: David Howells <dhowells@redhat.com> --- net/rxrpc/input.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 9e0f58edcd016..04afdc09557b1 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -567,13 +567,13 @@ static void rxrpc_post_packet_to_call(struct rxrpc_call *call, * post connection-level events to the connection * - this includes challenges, responses and some aborts */ -static bool rxrpc_post_packet_to_conn(struct rxrpc_connection *conn, +static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn, struct sk_buff *skb) { _enter("%p,%p", conn, skb); skb_queue_tail(&conn->rx_queue, skb); - return rxrpc_queue_conn(conn); + rxrpc_queue_conn(conn); } /* @@ -694,7 +694,6 @@ void rxrpc_data_ready(struct sock *sk) rcu_read_lock(); -retry_find_conn: conn = rxrpc_find_connection_rcu(local, skb); if (!conn) goto cant_route_call; @@ -702,8 +701,7 @@ void rxrpc_data_ready(struct sock *sk) if (sp->hdr.callNumber == 0) { /* Connection-level packet */ _debug("CONN %p {%d}", conn, conn->debug_id); - if (!rxrpc_post_packet_to_conn(conn, skb)) - goto retry_find_conn; + rxrpc_post_packet_to_conn(conn, skb); } else { /* Call-bound packets are routed by connection channel. */ unsigned int channel = sp->hdr.cid & RXRPC_CHANNELMASK; From 50fd85a1f94753b86a7f540794dfd4f799e81ac2 Mon Sep 17 00:00:00 2001 From: David Howells <dhowells@redhat.com> Date: Tue, 9 Aug 2016 11:30:43 +0100 Subject: [PATCH 103/513] rxrpc: Fix a use-after-push in data_ready handler Fix a use of a packet after it has been enqueued onto the packet processing queue in the data_ready handler. Once on a call's Rx queue, we mustn't touch it any more as it may be dequeued and freed by the call processor running on a work queue. Save the values we need before enqueuing. Without this, we can get an oops like the following: BUG: unable to handle kernel NULL pointer dereference at 000000000000009c IP: [<ffffffffa01854e8>] rxrpc_fast_process_packet+0x724/0xa11 [af_rxrpc] PGD 0 Oops: 0000 [#1] SMP Modules linked in: kafs(E) af_rxrpc(E) [last unloaded: af_rxrpc] CPU: 2 PID: 0 Comm: swapper/2 Tainted: G E 4.7.0-fsdevel+ #1336 Hardware name: ASUS All Series/H97-PLUS, BIOS 2306 10/09/2014 task: ffff88040d6863c0 task.stack: ffff88040d68c000 RIP: 0010:[<ffffffffa01854e8>] [<ffffffffa01854e8>] rxrpc_fast_process_packet+0x724/0xa11 [af_rxrpc] RSP: 0018:ffff88041fb03a78 EFLAGS: 00010246 RAX: ffffffffffffffff RBX: ffff8803ff195b00 RCX: 0000000000000001 RDX: ffffffffa01854d1 RSI: 0000000000000008 RDI: ffff8803ff195b00 RBP: ffff88041fb03ab0 R08: 0000000000000000 R09: 0000000000000001 R10: ffff88041fb038c8 R11: 0000000000000000 R12: ffff880406874800 R13: 0000000000000001 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88041fb00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 000000000000009c CR3: 0000000001c14000 CR4: 00000000001406e0 Stack: ffff8803ff195ea0 ffff880408348800 ffff880406874800 ffff8803ff195b00 ffff880408348800 ffff8803ff195ed8 0000000000000000 ffff88041fb03af0 ffffffffa0186072 0000000000000000 ffff8804054da000 0000000000000000 Call Trace: <IRQ> [<ffffffffa0186072>] rxrpc_data_ready+0x89d/0xbae [af_rxrpc] [<ffffffff814c94d7>] __sock_queue_rcv_skb+0x24c/0x2b2 [<ffffffff8155c59a>] __udp_queue_rcv_skb+0x4b/0x1bd [<ffffffff8155e048>] udp_queue_rcv_skb+0x281/0x4db [<ffffffff8155ea8f>] __udp4_lib_rcv+0x7ed/0x963 [<ffffffff8155ef9a>] udp_rcv+0x15/0x17 [<ffffffff81531d86>] ip_local_deliver_finish+0x1c3/0x318 [<ffffffff81532544>] ip_local_deliver+0xbb/0xc4 [<ffffffff81531bc3>] ? inet_del_offload+0x40/0x40 [<ffffffff815322a9>] ip_rcv_finish+0x3ce/0x42c [<ffffffff81532851>] ip_rcv+0x304/0x33d [<ffffffff81531edb>] ? ip_local_deliver_finish+0x318/0x318 [<ffffffff814dff9d>] __netif_receive_skb_core+0x601/0x6e8 [<ffffffff814e072e>] __netif_receive_skb+0x13/0x54 [<ffffffff814e082a>] netif_receive_skb_internal+0xbb/0x17c [<ffffffff814e1838>] napi_gro_receive+0xf9/0x1bd [<ffffffff8144eb9f>] rtl8169_poll+0x32b/0x4a8 [<ffffffff814e1c7b>] net_rx_action+0xe8/0x357 [<ffffffff81051074>] __do_softirq+0x1aa/0x414 [<ffffffff810514ab>] irq_exit+0x3d/0xb0 [<ffffffff810184a2>] do_IRQ+0xe4/0xfc [<ffffffff81612053>] common_interrupt+0x93/0x93 <EOI> [<ffffffff814af837>] ? cpuidle_enter_state+0x1ad/0x2be [<ffffffff814af832>] ? cpuidle_enter_state+0x1a8/0x2be [<ffffffff814af96a>] cpuidle_enter+0x12/0x14 [<ffffffff8108956f>] call_cpuidle+0x39/0x3b [<ffffffff81089855>] cpu_startup_entry+0x230/0x35d [<ffffffff810312ea>] start_secondary+0xf4/0xf7 Signed-off-by: David Howells <dhowells@redhat.com> --- net/rxrpc/input.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 04afdc09557b1..789719031462b 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -124,11 +124,15 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call, struct rxrpc_skb_priv *sp; bool terminal; int ret, ackbit, ack; + u32 serial; + u8 flags; _enter("{%u,%u},,{%u}", call->rx_data_post, call->rx_first_oos, seq); sp = rxrpc_skb(skb); ASSERTCMP(sp->call, ==, NULL); + flags = sp->hdr.flags; + serial = sp->hdr.serial; spin_lock(&call->lock); @@ -192,8 +196,8 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call, sp->call = call; rxrpc_get_call(call); atomic_inc(&call->skb_count); - terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) && - !(sp->hdr.flags & RXRPC_CLIENT_INITIATED)); + terminal = ((flags & RXRPC_LAST_PACKET) && + !(flags & RXRPC_CLIENT_INITIATED)); ret = rxrpc_queue_rcv_skb(call, skb, false, terminal); if (ret < 0) { if (ret == -ENOMEM || ret == -ENOBUFS) { @@ -205,12 +209,13 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call, } skb = NULL; + sp = NULL; _debug("post #%u", seq); ASSERTCMP(call->rx_data_post, ==, seq); call->rx_data_post++; - if (sp->hdr.flags & RXRPC_LAST_PACKET) + if (flags & RXRPC_LAST_PACKET) set_bit(RXRPC_CALL_RCVD_LAST, &call->flags); /* if we've reached an out of sequence packet then we need to drain @@ -226,7 +231,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call, spin_unlock(&call->lock); atomic_inc(&call->ackr_not_idle); - rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, sp->hdr.serial, false); + rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, false); _leave(" = 0 [posted]"); return 0; @@ -239,7 +244,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call, discard_and_ack: _debug("discard and ACK packet %p", skb); - __rxrpc_propose_ACK(call, ack, sp->hdr.serial, true); + __rxrpc_propose_ACK(call, ack, serial, true); discard: spin_unlock(&call->lock); rxrpc_free_skb(skb); @@ -247,7 +252,7 @@ static int rxrpc_fast_process_data(struct rxrpc_call *call, return 0; enqueue_and_ack: - __rxrpc_propose_ACK(call, ack, sp->hdr.serial, true); + __rxrpc_propose_ACK(call, ack, serial, true); enqueue_packet: _net("defer skb %p", skb); spin_unlock(&call->lock); From 992c273af9d9f12a2e470731f69bb3baa694562b Mon Sep 17 00:00:00 2001 From: David Howells <dhowells@redhat.com> Date: Tue, 9 Aug 2016 16:58:42 +0100 Subject: [PATCH 104/513] rxrpc: Free packets discarded in data_ready Under certain conditions, the data_ready handler will discard a packet. These need to be freed. Signed-off-by: David Howells <dhowells@redhat.com> --- net/rxrpc/input.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 789719031462b..70bb77818deab 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -744,6 +744,8 @@ void rxrpc_data_ready(struct sock *sk) if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) { _debug("reject type %d",sp->hdr.type); rxrpc_reject_packet(local, skb); + } else { + rxrpc_free_skb(skb); } _leave(" [no call]"); return; From 8b078c603249239f597dc395ac182667c8e0af9c Mon Sep 17 00:00:00 2001 From: Mathias Koehrer <mathias.koehrer@etas.com> Date: Tue, 9 Aug 2016 10:33:31 +0200 Subject: [PATCH 105/513] PCI: Update "pci=resource_alignment" documentation Some uio based PCI drivers, e.g., uio_cif, do not work if the assigned PCI memory resources are not page aligned. By using the kernel option "pci=resource_alignment=<align>@<bus>:<slot>.<func>" it is possible to request page alignment for memory resources of devices. However, this is cumbersome when using several devices, and the bus/slot/func addresses may change if devices are added to or removed from the system. Extend the "pci=resource_alignment" option so we can specify the relevant devices via PCI vendor, device, subvendor, and subdevice IDs. The specification of the devices via IDs is indicated by a leading string "pci:" as argument to "pci=resource_alignment". The format of the specification is pci:<vendor>:<device>[:<subvendor>:<subdevice>] Examples: pci=resource_alignment=4096@pci:8086:9c22:103c:198f pci=resource_alignment=pci:8086:9c22 # defaults to PAGE_SIZE align [bhelgaas: changelog, use actual vendor/device IDs in examples] Signed-off-by: Mathias Koehrer <mathias.koehrer@etas.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> --- Documentation/kernel-parameters.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 46c030a49186f..a4f4d693e2c12 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3032,6 +3032,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. PAGE_SIZE is used as alignment. PCI-PCI bridge can be specified, if resource windows need to be expanded. + To specify the alignment for several + instances of a device, the PCI vendor, + device, subvendor, and subdevice may be + specified, e.g., 4096@pci:8086:9c22:103c:198f ecrc= Enable/disable PCIe ECRC (transaction layer end-to-end CRC checking). bios: Use BIOS/firmware settings. This is the From beab47d55b883e85bec7771a899ac791c6f92c80 Mon Sep 17 00:00:00 2001 From: Markus Elfring <elfring@users.sourceforge.net> Date: Tue, 19 Jul 2016 22:42:22 +0200 Subject: [PATCH 106/513] of: Delete an unnecessary check before the function call "of_node_put" The of_node_put() function tests whether its argument is NULL and then returns immediately. Thus the test around the call is not needed. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring <elfring@users.sourceforge.net> Signed-off-by: Rob Herring <robh@kernel.org> --- drivers/of/base.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index 7792266db2597..cb255a07baa0f 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -1631,8 +1631,7 @@ static int __of_parse_phandle_with_args(const struct device_node *np, */ err: - if (it.node) - of_node_put(it.node); + of_node_put(it.node); return rc; } From 89c67752ae4dedc1244344b266c837ddb4053ebd Mon Sep 17 00:00:00 2001 From: Gavin Shan <gwshan@linux.vnet.ibm.com> Date: Mon, 1 Aug 2016 17:17:53 +1000 Subject: [PATCH 107/513] drivers/of: Validate device node in __unflatten_device_tree() @mynodes is set to NULL when __unflatten_device_tree() is called to unflatten device sub-tree in PCI hot add scenario on PowerPC PowerNV platform. Marking @mynodes detached unconditionally causes kernel crash as below backtrace shows: Unable to handle kernel paging request for data at address 0x00000000 Faulting instruction address: 0xc000000000b26f64 cpu 0x0: Vector: 300 (Data Access) at [c000003fcc7cf740] pc: c000000000b26f64: __unflatten_device_tree+0xf4/0x190 lr: c000000000b26f40: __unflatten_device_tree+0xd0/0x190 sp: c000003fcc7cf9c0 msr: 900000000280b033 dar: 0 dsisr: 40000000 current = 0xc000003fcc281680 paca = 0xc00000000ff00000 softe: 0 irq_happened: 0x01 pid = 2724, comm = sh Linux version 4.7.0-gavin-07754-g92a6836 (gwshan@gwshan) (gcc version \ 4.9.3 (Buildroot 2016.02-rc2-00093-g5ea3bce) ) #539 SMP Mon Aug 1 \ 12:40:29 AEST 2016 enter ? for help [c000003fcc7cfa50] c000000000b27060 of_fdt_unflatten_tree+0x60/0x90 [c000003fcc7cfaa0] c0000000004c6288 pnv_php_set_slot_power_state+0x118/0x440 [c000003fcc7cfb80] c0000000004c6a10 pnv_php_enable+0xc0/0x170 [c000003fcc7cfbd0] c0000000004c4d80 power_write_file+0xa0/0x190 [c000003fcc7cfc50] c0000000004be93c pci_slot_attr_store+0x3c/0x60 [c000003fcc7cfc70] c0000000002d3fd4 sysfs_kf_write+0x94/0xc0 [c000003fcc7cfcb0] c0000000002d2c30 kernfs_fop_write+0x180/0x260 [c000003fcc7cfd00] c000000000230fe0 __vfs_write+0x40/0x190 [c000003fcc7cfd90] c000000000232278 vfs_write+0xc8/0x240 [c000003fcc7cfde0] c000000000233d90 SyS_write+0x60/0x110 [c000003fcc7cfe30] c000000000009524 system_call+0x38/0x108 This avoids the kernel crash by marking @mynodes detached only when @mynodes is dereferencing valid device node in __unflatten_device_tree(). Fixes: 1d1bde550ea3 ("of: fdt: mark unflattened tree as detached") Reported-by: Meng Li <shlimeng@cn.ibm.com> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com> Signed-off-by: Rob Herring <robh@kernel.org> --- drivers/of/fdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index 55f1b83911492..085c6389afd13 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -517,7 +517,7 @@ static void *__unflatten_device_tree(const void *blob, pr_warning("End of tree marker overwritten: %08x\n", be32_to_cpup(mem + size)); - if (detached) { + if (detached && mynodes) { of_node_set_flag(*mynodes, OF_DETACHED); pr_debug("unflattened tree is detached\n"); } From e55aeb6ba4e8cc3549bff1e75ea1d029324bce21 Mon Sep 17 00:00:00 2001 From: Philipp Zabel <p.zabel@pengutronix.de> Date: Tue, 9 Aug 2016 16:18:51 +0200 Subject: [PATCH 108/513] of/irq: Mark interrupt controllers as populated before initialisation That way the init callback may clear the flag again, in case of drivers split between early irq chip and a normal platform driver. Fixes: 15cc2ed6dcf9 ("of/irq: Mark initialised interrupt controllers as populated") Suggested-by: Rob Herring <robh@kernel.org> Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de> Acked-by: Jon Hunter <jonathanh@nvidia.com> Signed-off-by: Rob Herring <robh@kernel.org> --- drivers/of/irq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/of/irq.c b/drivers/of/irq.c index 89a71c6074fc9..a2e68f740edac 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -544,12 +544,15 @@ void __init of_irq_init(const struct of_device_id *matches) list_del(&desc->list); + of_node_set_flag(desc->dev, OF_POPULATED); + pr_debug("of_irq_init: init %s (%p), parent %p\n", desc->dev->full_name, desc->dev, desc->interrupt_parent); ret = desc->irq_init_cb(desc->dev, desc->interrupt_parent); if (ret) { + of_node_clear_flag(desc->dev, OF_POPULATED); kfree(desc); continue; } @@ -559,8 +562,6 @@ void __init of_irq_init(const struct of_device_id *matches) * its children can get processed in a subsequent pass. */ list_add_tail(&desc->list, &intc_parent_list); - - of_node_set_flag(desc->dev, OF_POPULATED); } /* Get the next pending parent that might have children */ From 255c0397bc402e3fcc8833c214f49b2108f04d1a Mon Sep 17 00:00:00 2001 From: Philipp Zabel <p.zabel@pengutronix.de> Date: Tue, 9 Aug 2016 16:18:52 +0200 Subject: [PATCH 109/513] ARM: imx6: mark GPC node as not populated after irq init to probe pm domain driver Since IRQCHIP_DECLARE now flags the GPC node as already populated, the GPC power domain driver is never probed unless we clear the flag again. Fixes: 15cc2ed6dcf9 ("of/irq: Mark initialised interrupt controllers as populated") Suggested-by: Rob Herring <robh@kernel.org> Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de> Cc: Jon Hunter <jonathanh@nvidia.com> Signed-off-by: Rob Herring <robh@kernel.org> --- arch/arm/mach-imx/gpc.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c index fd87205324710..0df062d8b2c94 100644 --- a/arch/arm/mach-imx/gpc.c +++ b/arch/arm/mach-imx/gpc.c @@ -271,6 +271,12 @@ static int __init imx_gpc_init(struct device_node *node, for (i = 0; i < IMR_NUM; i++) writel_relaxed(~0, gpc_base + GPC_IMR1 + i * 4); + /* + * Clear the OF_POPULATED flag set in of_irq_init so that + * later the GPC power domain driver will not be skipped. + */ + of_node_clear_flag(node, OF_POPULATED); + return 0; } IRQCHIP_DECLARE(imx_gpc, "fsl,imx6q-gpc", imx_gpc_init); From a5d0dc810abf3d6b241777467ee1d6efb02575fc Mon Sep 17 00:00:00 2001 From: Lance Richardson <lrichard@redhat.com> Date: Tue, 9 Aug 2016 15:29:42 -0400 Subject: [PATCH 110/513] vti: flush x-netns xfrm cache when vti interface is removed When executing the script included below, the netns delete operation hangs with the following message (repeated at 10 second intervals): kernel:unregister_netdevice: waiting for lo to become free. Usage count = 1 This occurs because a reference to the lo interface in the "secure" netns is still held by a dst entry in the xfrm bundle cache in the init netns. Address this problem by garbage collecting the tunnel netns flow cache when a cross-namespace vti interface receives a NETDEV_DOWN notification. A more detailed description of the problem scenario (referencing commands in the script below): (1) ip link add vti_test type vti local 1.1.1.1 remote 1.1.1.2 key 1 The vti_test interface is created in the init namespace. vti_tunnel_init() attaches a struct ip_tunnel to the vti interface's netdev_priv(dev), setting the tunnel net to &init_net. (2) ip link set vti_test netns secure The vti_test interface is moved to the "secure" netns. Note that the associated struct ip_tunnel still has tunnel->net set to &init_net. (3) ip netns exec secure ping -c 4 -i 0.02 -I 192.168.100.1 192.168.200.1 The first packet sent using the vti device causes xfrm_lookup() to be called as follows: dst = xfrm_lookup(tunnel->net, skb_dst(skb), fl, NULL, 0); Note that tunnel->net is the init namespace, while skb_dst(skb) references the vti_test interface in the "secure" namespace. The returned dst references an interface in the init namespace. Also note that the first parameter to xfrm_lookup() determines which flow cache is used to store the computed xfrm bundle, so after xfrm_lookup() returns there will be a cached bundle in the init namespace flow cache with a dst referencing a device in the "secure" namespace. (4) ip netns del secure Kernel begins to delete the "secure" namespace. At some point the vti_test interface is deleted, at which point dst_ifdown() changes the dst->dev in the cached xfrm bundle flow from vti_test to lo (still in the "secure" namespace however). Since nothing has happened to cause the init namespace's flow cache to be garbage collected, this dst remains attached to the flow cache, so the kernel loops waiting for the last reference to lo to go away. <Begin script> ip link add br1 type bridge ip link set dev br1 up ip addr add dev br1 1.1.1.1/8 ip netns add secure ip link add vti_test type vti local 1.1.1.1 remote 1.1.1.2 key 1 ip link set vti_test netns secure ip netns exec secure ip link set vti_test up ip netns exec secure ip link s lo up ip netns exec secure ip addr add dev lo 192.168.100.1/24 ip netns exec secure ip route add 192.168.200.0/24 dev vti_test ip xfrm policy flush ip xfrm state flush ip xfrm policy add dir out tmpl src 1.1.1.1 dst 1.1.1.2 \ proto esp mode tunnel mark 1 ip xfrm policy add dir in tmpl src 1.1.1.2 dst 1.1.1.1 \ proto esp mode tunnel mark 1 ip xfrm state add src 1.1.1.1 dst 1.1.1.2 proto esp spi 1 \ mode tunnel enc des3_ede 0x112233445566778811223344556677881122334455667788 ip xfrm state add src 1.1.1.2 dst 1.1.1.1 proto esp spi 1 \ mode tunnel enc des3_ede 0x112233445566778811223344556677881122334455667788 ip netns exec secure ping -c 4 -i 0.02 -I 192.168.100.1 192.168.200.1 ip netns del secure <End script> Reported-by: Hangbin Liu <haliu@redhat.com> Reported-by: Jan Tluka <jtluka@redhat.com> Signed-off-by: Lance Richardson <lrichard@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv4/ip_vti.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index a917903d5e974..cc701fa70b123 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -557,6 +557,33 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = { .get_link_net = ip_tunnel_get_link_net, }; +static bool is_vti_tunnel(const struct net_device *dev) +{ + return dev->netdev_ops == &vti_netdev_ops; +} + +static int vti_device_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct ip_tunnel *tunnel = netdev_priv(dev); + + if (!is_vti_tunnel(dev)) + return NOTIFY_DONE; + + switch (event) { + case NETDEV_DOWN: + if (!net_eq(tunnel->net, dev_net(dev))) + xfrm_garbage_collect(tunnel->net); + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block vti_notifier_block __read_mostly = { + .notifier_call = vti_device_event, +}; + static int __init vti_init(void) { const char *msg; @@ -564,6 +591,8 @@ static int __init vti_init(void) pr_info("IPv4 over IPsec tunneling driver\n"); + register_netdevice_notifier(&vti_notifier_block); + msg = "tunnel device"; err = register_pernet_device(&vti_net_ops); if (err < 0) @@ -596,6 +625,7 @@ static int __init vti_init(void) xfrm_proto_esp_failed: unregister_pernet_device(&vti_net_ops); pernet_dev_failed: + unregister_netdevice_notifier(&vti_notifier_block); pr_err("vti init: failed to register %s\n", msg); return err; } @@ -607,6 +637,7 @@ static void __exit vti_fini(void) xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); unregister_pernet_device(&vti_net_ops); + unregister_netdevice_notifier(&vti_notifier_block); } module_init(vti_init); From 64827adc9e14c8d17cf5f3d5a9ee47a42e95dd8c Mon Sep 17 00:00:00 2001 From: Chunming Zhou <David1.Zhou@amd.com> Date: Thu, 28 Jul 2016 17:20:32 +0800 Subject: [PATCH 111/513] drm/amdgpu: fix vm init error path Signed-off-by: Chunming Zhou <David1.Zhou@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 8e642fc48df45..80120fa4092c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1535,7 +1535,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) r = amd_sched_entity_init(&ring->sched, &vm->entity, rq, amdgpu_sched_jobs); if (r) - return r; + goto err; vm->page_directory_fence = NULL; @@ -1565,6 +1565,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) error_free_sched_entity: amd_sched_entity_fini(&ring->sched, &vm->entity); +err: + drm_free_large(vm->page_tables); + return r; } From 254a49d5139a70828d652ef4faec40763993e403 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko <grygorii.strashko@ti.com> Date: Tue, 9 Aug 2016 15:09:44 +0300 Subject: [PATCH 112/513] drivers: net: cpsw: fix kmemleak false-positive reports for sk buffers Kmemleak reports following false positive memory leaks for each sk buffers allocated by CPSW (__netdev_alloc_skb_ip_align()) in cpsw_ndo_open() and cpsw_rx_handler(): unreferenced object 0xea915000 (size 2048): comm "systemd-network", pid 713, jiffies 4294938323 (age 102.180s) hex dump (first 32 bytes): 00 58 91 ea ff ff ff ff ff ff ff ff ff ff ff ff .X.............. ff ff ff ff ff ff fd 0f 00 00 00 00 00 00 00 00 ................ backtrace: [<c0108680>] __kmalloc_track_caller+0x1a4/0x230 [<c0529eb4>] __alloc_skb+0x68/0x16c [<c052c884>] __netdev_alloc_skb+0x40/0x104 [<bf1ad29c>] cpsw_ndo_open+0x374/0x670 [ti_cpsw] [<c053c3d4>] __dev_open+0xb0/0x114 [<c053c690>] __dev_change_flags+0x9c/0x14c [<c053c760>] dev_change_flags+0x20/0x50 [<c054bdcc>] do_setlink+0x2cc/0x78c [<c054c358>] rtnl_setlink+0xcc/0x100 [<c054b34c>] rtnetlink_rcv_msg+0x184/0x224 [<c056467c>] netlink_rcv_skb+0xa8/0xc4 [<c054b1c0>] rtnetlink_rcv+0x2c/0x34 [<c0564018>] netlink_unicast+0x16c/0x1f8 [<c0564498>] netlink_sendmsg+0x334/0x348 [<c052015c>] sock_sendmsg+0x1c/0x2c [<c05213e0>] SyS_sendto+0xc0/0xe8 unreferenced object 0xec861780 (size 192): comm "softirq", pid 0, jiffies 4294938759 (age 109.540s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 b0 5a ed 00 00 00 00 00 00 00 00 ......Z......... backtrace: [<c0107830>] kmem_cache_alloc+0x190/0x208 [<c052c768>] __build_skb+0x30/0x98 [<c052c8fc>] __netdev_alloc_skb+0xb8/0x104 [<bf1abc54>] cpsw_rx_handler+0x68/0x1e4 [ti_cpsw] [<bf11aa30>] __cpdma_chan_free+0xa8/0xc4 [davinci_cpdma] [<bf11ab98>] __cpdma_chan_process+0x14c/0x16c [davinci_cpdma] [<bf11abfc>] cpdma_chan_process+0x44/0x5c [davinci_cpdma] [<bf1adc78>] cpsw_rx_poll+0x1c/0x9c [ti_cpsw] [<c0539180>] net_rx_action+0x1f0/0x2ec [<c003881c>] __do_softirq+0x134/0x258 [<c0038a00>] do_softirq+0x68/0x70 [<c0038adc>] __local_bh_enable_ip+0xd4/0xe8 [<c0640994>] _raw_spin_unlock_bh+0x30/0x34 [<c05f4e9c>] igmp6_group_added+0x4c/0x1bc [<c05f6600>] ipv6_dev_mc_inc+0x398/0x434 [<c05dba74>] addrconf_dad_work+0x224/0x39c This happens because CPSW allocates SK buffers and then passes pointers on them in CPDMA where they stored in internal CPPI RAM (SRAM) which belongs to DEV MMIO space. Kmemleak does not scan IO memory and so reports memory leaks. Hence, mark allocated sk buffers as false positive explicitly. Cc: Catalin Marinas <catalin.marinas@arm.com> Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com> Acked-by: Catalin Marinas <catalin.marinas@arm.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/ti/cpsw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index c51f34693eae4..f85d605e45606 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -734,6 +734,7 @@ static void cpsw_rx_handler(void *token, int len, int status) netif_receive_skb(skb); ndev->stats.rx_bytes += len; ndev->stats.rx_packets++; + kmemleak_not_leak(new_skb); } else { ndev->stats.rx_dropped++; new_skb = skb; @@ -1325,6 +1326,7 @@ static int cpsw_ndo_open(struct net_device *ndev) kfree_skb(skb); goto err_cleanup; } + kmemleak_not_leak(skb); } /* continue even if we didn't manage to submit all * receive descs From 0d039f337f45c48fb78b80cbf7b706b4de7f07ea Mon Sep 17 00:00:00 2001 From: Zhu Yanjun <zyjzyj2000@gmail.com> Date: Tue, 9 Aug 2016 21:36:04 +0800 Subject: [PATCH 113/513] bonding: fix the typo The message "803.ad" should be "802.3ad". Signed-off-by: Zhu Yanjun <zyjzyj2000@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 1f276fa30ba68..217e8da0628ca 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -152,7 +152,7 @@ module_param(lacp_rate, charp, 0); MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner; " "0 for slow, 1 for fast"); module_param(ad_select, charp, 0); -MODULE_PARM_DESC(ad_select, "803.ad aggregation selection logic; " +MODULE_PARM_DESC(ad_select, "802.3ad aggregation selection logic; " "0 for stable (default), 1 for bandwidth, " "2 for count"); module_param(min_links, int, 0); From b9a019899f61acca18df5fb5e38a8fcdfea86fcd Mon Sep 17 00:00:00 2001 From: Nicolas Pitre <nicolas.pitre@linaro.org> Date: Thu, 28 Jul 2016 19:38:07 +0100 Subject: [PATCH 114/513] ARM: 8590/1: sanity_check_meminfo(): avoid overflow on vmalloc_limit To limit the amount of mapped low memory, we determine a physical address boundary based on the start of the vmalloc area using __pa(). Strictly speaking, the vmalloc area location is arbitrary and does not necessarily corresponds to a valid physical address. For example, if PAGE_OFFSET = 0x80000000 PHYS_OFFSET = 0x90000000 vmalloc_min = 0xf0000000 then __pa(vmalloc_min) overflows and returns a wrapped 0 when phys_addr_t is a 32-bit type. Then the code that follows determines that the entire physical memory is above that boundary and no low memory gets mapped at all: |[...] |Machine model: Freescale i.MX51 NA04 Board |Ignoring RAM at 0x90000000-0xb0000000 (!CONFIG_HIGHMEM) |Consider using a HIGHMEM enabled kernel. To avoid this problem let's make vmalloc_limit a 64-bit value all the time and determine that boundary explicitly without using __pa(). Reported-by: Emil Renner Berthing <kernel@esmil.dk> Signed-off-by: Nicolas Pitre <nico@linaro.org> Tested-by: Emil Renner Berthing <kernel@esmil.dk> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk> --- arch/arm/mm/mmu.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 62f4d01941f71..12774c8e770c4 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1155,10 +1155,19 @@ void __init sanity_check_meminfo(void) { phys_addr_t memblock_limit = 0; int highmem = 0; - phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1; + u64 vmalloc_limit; struct memblock_region *reg; bool should_use_highmem = false; + /* + * Let's use our own (unoptimized) equivalent of __pa() that is + * not affected by wrap-arounds when sizeof(phys_addr_t) == 4. + * The result is used as the upper bound on physical memory address + * and may itself be outside the valid range for which phys_addr_t + * and therefore __pa() is defined. + */ + vmalloc_limit = (u64)(uintptr_t)vmalloc_min - PAGE_OFFSET + PHYS_OFFSET; + for_each_memblock(memory, reg) { phys_addr_t block_start = reg->base; phys_addr_t block_end = reg->base + reg->size; @@ -1183,10 +1192,11 @@ void __init sanity_check_meminfo(void) if (reg->size > size_limit) { phys_addr_t overlap_size = reg->size - size_limit; - pr_notice("Truncating RAM at %pa-%pa to -%pa", - &block_start, &block_end, &vmalloc_limit); - memblock_remove(vmalloc_limit, overlap_size); + pr_notice("Truncating RAM at %pa-%pa", + &block_start, &block_end); block_end = vmalloc_limit; + pr_cont(" to -%pa", &block_end); + memblock_remove(vmalloc_limit, overlap_size); should_use_highmem = true; } } From 61444cde9170e256c238a02c9a4861930db04f5f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel <ard.biesheuvel@linaro.org> Date: Thu, 28 Jul 2016 19:48:44 +0100 Subject: [PATCH 115/513] ARM: 8591/1: mm: use fully constructed struct pages for EFI pgd allocations The late_alloc() PTE allocation function used by create_mapping_late() does not call pgtable_page_ctor() on PTE pages it allocates, leaving the per-page spinlock uninitialized. Since generic page table manipulation code may assume that translation table pages that are not owned by init_mm are covered by fully constructed struct pages, the following crash may occur with the new UEFI memory attributes table code. efi: memattr: Processing EFI Memory Attributes table: efi: memattr: 0x0000ffa16000-0x0000ffa82fff [Runtime Code |RUN| | |XP| | | | | | | | ] Unable to handle kernel NULL pointer dereference at virtual address 00000010 pgd = c0204000 [00000010] *pgd=00000000 Internal error: Oops: 5 [#1] SMP ARM Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.7.0-rc4-00063-g3882aa7b340b #361 Hardware name: Generic DT based system task: ed858000 ti: ed842000 task.ti: ed842000 PC is at __lock_acquire+0xa0/0x19a8 ... [<c038c830>] (__lock_acquire) from [<c038e4f8>] (lock_acquire+0x6c/0x88) [<c038e4f8>] (lock_acquire) from [<c0c06134>] (_raw_spin_lock+0x2c/0x3c) [<c0c06134>] (_raw_spin_lock) from [<c0410384>] (apply_to_page_range+0xe8/0x238) [<c0410384>] (apply_to_page_range) from [<c1205f34>] (efi_set_mapping_permissions+0x54/0x5c) [<c1205f34>] (efi_set_mapping_permissions) from [<c1247474>] (efi_memattr_apply_permissions+0x2b8/0x378) [<c1247474>] (efi_memattr_apply_permissions) from [<c1248258>] (arm_enable_runtime_services+0x1f0/0x22c) [<c1248258>] (arm_enable_runtime_services) from [<c0301f0c>] (do_one_initcall+0x44/0x174) [<c0301f0c>] (do_one_initcall) from [<c1200d10>] (kernel_init_freeable+0x90/0x1e8) [<c1200d10>] (kernel_init_freeable) from [<c0bff690>] (kernel_init+0x8/0x114) [<c0bff690>] (kernel_init) from [<c0307ed0>] (ret_from_fork+0x14/0x24) The crash is due to the fact that the UEFI page tables are not owned by init_mm, but are not covered by fully constructed struct pages. Given that the UEFI subsystem is currently the only user of create_mapping_late(), add an unconditional call to pgtable_page_ctor() to late_alloc(). Fixes: 9fc68b717c24 ("ARM/efi: Apply strict permissions for UEFI Runtime Services regions") Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk> --- arch/arm/mm/mmu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 12774c8e770c4..6344913f0804a 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -728,7 +728,8 @@ static void *__init late_alloc(unsigned long sz) { void *ptr = (void *)__get_free_pages(PGALLOC_GFP, get_order(sz)); - BUG_ON(!ptr); + if (!ptr || !pgtable_page_ctor(virt_to_page(ptr))) + BUG(); return ptr; } From 87eed3c74d7c65556f744230a90bf9556dd29146 Mon Sep 17 00:00:00 2001 From: Russell King <rmk+kernel@armlinux.org.uk> Date: Wed, 3 Aug 2016 10:33:35 +0100 Subject: [PATCH 116/513] ARM: fix address limit restoration for undefined instructions During boot, sometimes the kernel will test to see if an instruction causes an undefined instruction exception. Unfortunately, the exit path for these exceptions did not restore the address limit, which causes the rootfs mount code to fail. Fix the missing address limit restoration. Tested-by: Guenter Roeck <linux@roeck-us.net> Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> --- arch/arm/kernel/entry-armv.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index bc5f50799d756..9f157e7c51e75 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -295,6 +295,7 @@ __und_svc_fault: bl __und_fault __und_svc_finish: + get_thread_info tsk ldr r5, [sp, #S_PSR] @ Get SVC cpsr svc_exit r5 @ return from exception UNWIND(.fnend ) From a96d3b7593a3eefab62dd930e5c99201c3678ee4 Mon Sep 17 00:00:00 2001 From: Sylwester Nawrocki <s.nawrocki@samsung.com> Date: Tue, 9 Aug 2016 18:00:08 +0200 Subject: [PATCH 117/513] dm9000: Fix irq trigger type setup on non-dt platforms Commit b5a099c67a1c36b "net: ethernet: davicom: fix devicetree irq resource" causes an interrupt storm after the ethernet interface is activated on S3C24XX platform (ARM non-dt), due to the interrupt trigger type not being set properly. It seems, after adding parsing of IRQ flags in commit 7085a7401ba54e92b "drivers: platform: parse IRQ flags from resources", there is no path for non-dt platforms where irq_set_type callback could be invoked when we don't pass the trigger type flags to the request_irq() call. In case of a board where the regression is seen the interrupt trigger type flags are passed through a platform device's resource and it is not currently handled properly without passing the irq trigger type flags to the request_irq() call. In case of OF an of_irq_get() call within platform_get_irq() function seems to be ensuring required irq_chip setup, but there is no equivalent code for non OF/ACPI platforms. This patch mostly restores irq trigger type setting code which has been removed in commit ("net: ethernet: davicom: fix devicetree irq resource"). Fixes: b5a099c67a1c36b913 ("net: ethernet: davicom: fix devicetree irq resource") Signed-off-by: Sylwester Nawrocki <s.nawrocki@samsung.com> Acked-by: Robert Jarzmik <robert.jarzmik@free.fr> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/davicom/dm9000.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index 1471e16ba7199..f45385f5c6e58 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -1299,6 +1299,7 @@ static int dm9000_open(struct net_device *dev) { struct board_info *db = netdev_priv(dev); + unsigned int irq_flags = irq_get_trigger_type(dev->irq); if (netif_msg_ifup(db)) dev_dbg(db->dev, "enabling %s\n", dev->name); @@ -1306,9 +1307,11 @@ dm9000_open(struct net_device *dev) /* If there is no IRQ type specified, tell the user that this is a * problem */ - if (irq_get_trigger_type(dev->irq) == IRQF_TRIGGER_NONE) + if (irq_flags == IRQF_TRIGGER_NONE) dev_warn(db->dev, "WARNING: no IRQ resource flags set.\n"); + irq_flags |= IRQF_SHARED; + /* GPIO0 on pre-activate PHY, Reg 1F is not set by reset */ iow(db, DM9000_GPR, 0); /* REG_1F bit0 activate phyxcer */ mdelay(1); /* delay needs by DM9000B */ @@ -1316,8 +1319,7 @@ dm9000_open(struct net_device *dev) /* Initialize DM9000 board */ dm9000_init_dm9000(dev); - if (request_irq(dev->irq, dm9000_interrupt, IRQF_SHARED, - dev->name, dev)) + if (request_irq(dev->irq, dm9000_interrupt, irq_flags, dev->name, dev)) return -EAGAIN; /* Now that we have an interrupt handler hooked up we can unmask * our interrupts From 836384d2501dee87b1c437f3e268871980c857bf Mon Sep 17 00:00:00 2001 From: Wenyou Yang <wenyou.yang@atmel.com> Date: Fri, 5 Aug 2016 14:35:41 +0800 Subject: [PATCH 118/513] net: phy: micrel: Add specific suspend Disable all interrupts when suspend, they will be enabled when resume. Otherwise, the suspend/resume process will be blocked occasionally. Signed-off-by: Wenyou Yang <wenyou.yang@atmel.com> Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/phy/micrel.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 1882d9828c998..053e87905b944 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -677,17 +677,28 @@ static void kszphy_get_stats(struct phy_device *phydev, data[i] = kszphy_get_stat(phydev, i); } -static int kszphy_resume(struct phy_device *phydev) +static int kszphy_suspend(struct phy_device *phydev) { - int value; + /* Disable PHY Interrupts */ + if (phy_interrupt_is_valid(phydev)) { + phydev->interrupts = PHY_INTERRUPT_DISABLED; + if (phydev->drv->config_intr) + phydev->drv->config_intr(phydev); + } - mutex_lock(&phydev->lock); + return genphy_suspend(phydev); +} - value = phy_read(phydev, MII_BMCR); - phy_write(phydev, MII_BMCR, value & ~BMCR_PDOWN); +static int kszphy_resume(struct phy_device *phydev) +{ + genphy_resume(phydev); - kszphy_config_intr(phydev); - mutex_unlock(&phydev->lock); + /* Enable PHY Interrupts */ + if (phy_interrupt_is_valid(phydev)) { + phydev->interrupts = PHY_INTERRUPT_ENABLED; + if (phydev->drv->config_intr) + phydev->drv->config_intr(phydev); + } return 0; } @@ -900,7 +911,7 @@ static struct phy_driver ksphy_driver[] = { .get_sset_count = kszphy_get_sset_count, .get_strings = kszphy_get_strings, .get_stats = kszphy_get_stats, - .suspend = genphy_suspend, + .suspend = kszphy_suspend, .resume = kszphy_resume, }, { .phy_id = PHY_ID_KSZ8061, From 7bb90c3715a496c650b2e879225030f9dd9cfafb Mon Sep 17 00:00:00 2001 From: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> Date: Thu, 4 Aug 2016 11:11:19 +0900 Subject: [PATCH 119/513] bridge: Fix problems around fdb entries pointing to the bridge device Adding fdb entries pointing to the bridge device uses fdb_insert(), which lacks various checks and does not respect added_by_user flag. As a result, some inconsistent behavior can happen: * Adding temporary entries succeeds but results in permanent entries. * Same goes for "dynamic" and "use". * Changing mac address of the bridge device causes deletion of user-added entries. * Replacing existing entries looks successful from userspace but actually not, regardless of NLM_F_EXCL flag. Use the same logic as other entries and fix them. Fixes: 3741873b4f73 ("bridge: allow adding of fdb entries pointing to the bridge device") Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/bridge/br_fdb.c | 52 +++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index c18080ad40857..cd620fab41b07 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -267,7 +267,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) /* If old entry was unassociated with any port, then delete it. */ f = __br_fdb_get(br, br->dev->dev_addr, 0); - if (f && f->is_local && !f->dst) + if (f && f->is_local && !f->dst && !f->added_by_user) fdb_delete_local(br, NULL, f); fdb_insert(br, NULL, newaddr, 0); @@ -282,7 +282,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) if (!br_vlan_should_use(v)) continue; f = __br_fdb_get(br, br->dev->dev_addr, v->vid); - if (f && f->is_local && !f->dst) + if (f && f->is_local && !f->dst && !f->added_by_user) fdb_delete_local(br, NULL, f); fdb_insert(br, NULL, newaddr, v->vid); } @@ -764,20 +764,25 @@ int br_fdb_dump(struct sk_buff *skb, } /* Update (create or replace) forwarding database entry */ -static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, - __u16 state, __u16 flags, __u16 vid) +static int fdb_add_entry(struct net_bridge *br, struct net_bridge_port *source, + const __u8 *addr, __u16 state, __u16 flags, __u16 vid) { - struct net_bridge *br = source->br; struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)]; struct net_bridge_fdb_entry *fdb; bool modified = false; /* If the port cannot learn allow only local and static entries */ - if (!(state & NUD_PERMANENT) && !(state & NUD_NOARP) && + if (source && !(state & NUD_PERMANENT) && !(state & NUD_NOARP) && !(source->state == BR_STATE_LEARNING || source->state == BR_STATE_FORWARDING)) return -EPERM; + if (!source && !(state & NUD_PERMANENT)) { + pr_info("bridge: RTM_NEWNEIGH %s without NUD_PERMANENT\n", + br->dev->name); + return -EINVAL; + } + fdb = fdb_find(head, addr, vid); if (fdb == NULL) { if (!(flags & NLM_F_CREATE)) @@ -832,22 +837,28 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr, return 0; } -static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p, - const unsigned char *addr, u16 nlh_flags, u16 vid) +static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge *br, + struct net_bridge_port *p, const unsigned char *addr, + u16 nlh_flags, u16 vid) { int err = 0; if (ndm->ndm_flags & NTF_USE) { + if (!p) { + pr_info("bridge: RTM_NEWNEIGH %s with NTF_USE is not supported\n", + br->dev->name); + return -EINVAL; + } local_bh_disable(); rcu_read_lock(); - br_fdb_update(p->br, p, addr, vid, true); + br_fdb_update(br, p, addr, vid, true); rcu_read_unlock(); local_bh_enable(); } else { - spin_lock_bh(&p->br->hash_lock); - err = fdb_add_entry(p, addr, ndm->ndm_state, + spin_lock_bh(&br->hash_lock); + err = fdb_add_entry(br, p, addr, ndm->ndm_state, nlh_flags, vid); - spin_unlock_bh(&p->br->hash_lock); + spin_unlock_bh(&br->hash_lock); } return err; @@ -884,6 +895,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], dev->name); return -EINVAL; } + br = p->br; vg = nbp_vlan_group(p); } @@ -895,15 +907,9 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], } /* VID was specified, so use it. */ - if (dev->priv_flags & IFF_EBRIDGE) - err = br_fdb_insert(br, NULL, addr, vid); - else - err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); + err = __br_fdb_add(ndm, br, p, addr, nlh_flags, vid); } else { - if (dev->priv_flags & IFF_EBRIDGE) - err = br_fdb_insert(br, NULL, addr, 0); - else - err = __br_fdb_add(ndm, p, addr, nlh_flags, 0); + err = __br_fdb_add(ndm, br, p, addr, nlh_flags, 0); if (err || !vg || !vg->num_vlans) goto out; @@ -914,11 +920,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], list_for_each_entry(v, &vg->vlan_list, vlist) { if (!br_vlan_should_use(v)) continue; - if (dev->priv_flags & IFF_EBRIDGE) - err = br_fdb_insert(br, NULL, addr, v->vid); - else - err = __br_fdb_add(ndm, p, addr, nlh_flags, - v->vid); + err = __br_fdb_add(ndm, br, p, addr, nlh_flags, v->vid); if (err) goto out; } From 9ba333dc55cbb9523553df973adb3024d223e905 Mon Sep 17 00:00:00 2001 From: Stefan Haberland <sth@linux.vnet.ibm.com> Date: Mon, 8 Aug 2016 14:08:17 +0200 Subject: [PATCH 120/513] s390/dasd: fix hanging device after clear subchannel When a device is in a status where CIO has killed all I/O by itself the interrupt for a clear request may not contain an irb to determine the clear function. Instead it contains an error pointer -EIO. This was ignored by the DASD int_handler leading to a hanging device waiting for a clear interrupt. Handle -EIO error pointer correctly for requests that are clear pending and treat the clear as successful. Signed-off-by: Stefan Haberland <sth@linux.vnet.ibm.com> Reviewed-by: Sebastian Ott <sebott@linux.vnet.ibm.com> Cc: stable@vger.kernel.org Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- drivers/s390/block/dasd.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 8973d34ce5ba0..fb1b56a714753 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1643,9 +1643,18 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, u8 *sense = NULL; int expires; + cqr = (struct dasd_ccw_req *) intparm; if (IS_ERR(irb)) { switch (PTR_ERR(irb)) { case -EIO: + if (cqr && cqr->status == DASD_CQR_CLEAR_PENDING) { + device = (struct dasd_device *) cqr->startdev; + cqr->status = DASD_CQR_CLEARED; + dasd_device_clear_timer(device); + wake_up(&dasd_flush_wq); + dasd_schedule_device_bh(device); + return; + } break; case -ETIMEDOUT: DBF_EVENT_DEVID(DBF_WARNING, cdev, "%s: " @@ -1661,7 +1670,6 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, } now = get_tod_clock(); - cqr = (struct dasd_ccw_req *) intparm; /* check for conditions that should be handled immediately */ if (!cqr || !(scsw_dstat(&irb->scsw) == (DEV_STAT_CHN_END | DEV_STAT_DEV_END) && From 4d81aaa53c2dea220ddf88e19c33033d6cf4f8cb Mon Sep 17 00:00:00 2001 From: Heiko Carstens <heiko.carstens@de.ibm.com> Date: Tue, 9 Aug 2016 12:26:28 +0200 Subject: [PATCH 121/513] s390/pageattr: handle numpages parameter correctly Both set_memory_ro() and set_memory_rw() will modify the page attributes of at least one page, even if the numpages parameter is zero. The author expected that calling these functions with numpages == zero would never happen. However with the new 444d13ff10fb ("modules: add ro_after_init support") feature this happens frequently. Therefore do the right thing and make these two functions return gracefully if nothing should be done. Fixes crashes on module load like this one: Unable to handle kernel pointer dereference in virtual kernel address space Failing address: 000003ff80008000 TEID: 000003ff80008407 Fault in home space mode while using kernel ASCE. AS:0000000000d18007 R3:00000001e6aa4007 S:00000001e6a10800 P:00000001e34ee21d Oops: 0004 ilc:3 [#1] SMP Modules linked in: x_tables CPU: 10 PID: 1 Comm: systemd Not tainted 4.7.0-11895-g3fa9045 #4 Hardware name: IBM 2964 N96 703 (LPAR) task: 00000001e9118000 task.stack: 00000001e9120000 Krnl PSW : 0704e00180000000 00000000005677f8 (rb_erase+0xf0/0x4d0) R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3 Krnl GPRS: 000003ff80008b20 000003ff80008b20 000003ff80008b70 0000000000b9d608 000003ff80008b20 0000000000000000 00000001e9123e88 000003ff80008950 00000001e485ab40 000003ff00000000 000003ff80008b00 00000001e4858480 0000000100000000 000003ff80008b68 00000000001d5998 00000001e9123c28 Krnl Code: 00000000005677e8: ec1801c3007c cgij %r1,0,8,567b6e 00000000005677ee: e32010100020 cg %r2,16(%r1) #00000000005677f4: a78401c2 brc 8,567b78 >00000000005677f8: e35010080024 stg %r5,8(%r1) 00000000005677fe: ec5801af007c cgij %r5,0,8,567b5c 0000000000567804: e30050000024 stg %r0,0(%r5) 000000000056780a: ebacf0680004 lmg %r10,%r12,104(%r15) 0000000000567810: 07fe bcr 15,%r14 Call Trace: ([<000003ff80008900>] __this_module+0x0/0xffffffffffffd700 [x_tables]) ([<0000000000264fd4>] do_init_module+0x12c/0x220) ([<00000000001da14a>] load_module+0x24e2/0x2b10) ([<00000000001da976>] SyS_finit_module+0xbe/0xd8) ([<0000000000803b26>] system_call+0xd6/0x264) Last Breaking-Event-Address: [<000000000056771a>] rb_erase+0x12/0x4d0 Kernel panic - not syncing: Fatal exception: panic_on_oops Reported-by: Christian Borntraeger <borntraeger@de.ibm.com> Reported-and-tested-by: Sebastian Ott <sebott@linux.vnet.ibm.com> Fixes: e8a97e42dc98 ("s390/pageattr: allow kernel page table splitting") Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> --- arch/s390/mm/pageattr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 7104ffb5a67f8..af7cf28cf97ed 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -252,6 +252,8 @@ static int change_page_attr(unsigned long addr, unsigned long end, int rc = -EINVAL; pgd_t *pgdp; + if (addr == end) + return 0; if (end >= MODULES_END) return -EINVAL; mutex_lock(&cpa_mutex); From 99e5e886a0a59df267ff6838f763b789847df982 Mon Sep 17 00:00:00 2001 From: Christoffer Dall <christoffer.dall@linaro.org> Date: Mon, 1 Aug 2016 20:25:33 +0200 Subject: [PATCH 122/513] KVM: arm64: vgic-its: Handle errors from vgic_add_lpi During low memory conditions, we could be dereferencing a NULL pointer when vgic_add_lpi fails to allocate memory. Consider for example this call sequence: vgic_its_cmd_handle_mapi itte->irq = vgic_add_lpi(kvm, lpi_nr); update_lpi_config(kvm, itte->irq, NULL); ret = kvm_read_guest(kvm, propbase + irq->intid ^^^^ kaboom? Instead, return an error pointer from vgic_add_lpi and check the return value from its single caller. Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> --- virt/kvm/arm/vgic/vgic-its.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 1bd8adbeae260..d06330abd5e86 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -51,7 +51,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid) irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL); if (!irq) - return NULL; + return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&irq->lpi_list); INIT_LIST_HEAD(&irq->ap_list); @@ -522,7 +522,8 @@ static void its_free_itte(struct kvm *kvm, struct its_itte *itte) list_del(&itte->itte_list); /* This put matches the get in vgic_add_lpi. */ - vgic_put_irq(kvm, itte->irq); + if (itte->irq) + vgic_put_irq(kvm, itte->irq); kfree(itte); } @@ -713,10 +714,11 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, u32 device_id = its_cmd_get_deviceid(its_cmd); u32 event_id = its_cmd_get_id(its_cmd); u32 coll_id = its_cmd_get_collection(its_cmd); - struct its_itte *itte; + struct its_itte *itte, *new_itte = NULL; struct its_device *device; struct its_collection *collection, *new_coll = NULL; int lpi_nr; + struct vgic_irq *irq; device = find_its_device(its, device_id); if (!device) @@ -747,13 +749,24 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, return -ENOMEM; } + new_itte = itte; itte->event_id = event_id; list_add_tail(&itte->itte_list, &device->itt_head); } itte->collection = collection; itte->lpi = lpi_nr; - itte->irq = vgic_add_lpi(kvm, lpi_nr); + + irq = vgic_add_lpi(kvm, lpi_nr); + if (IS_ERR(irq)) { + if (new_coll) + vgic_its_free_collection(its, coll_id); + if (new_itte) + its_free_itte(kvm, new_itte); + return PTR_ERR(irq); + } + itte->irq = irq; + update_affinity_itte(kvm, itte); /* From 2cccbb368a2bf27d98cf36bb424fbbf5572c0fab Mon Sep 17 00:00:00 2001 From: Christoffer Dall <christoffer.dall@linaro.org> Date: Tue, 2 Aug 2016 22:05:42 +0200 Subject: [PATCH 123/513] KVM: arm64: vgic-its: Plug race in vgic_put_irq Right now the following sequence of events can happen: 1. Thread X calls vgic_put_irq 2. Thread Y calls vgic_add_lpi 3. Thread Y gets lpi_list_lock 4. Thread X drops the ref count to 0 and blocks on lpi_list_lock 5. Thread Y finds the irq via the lpi_list_lock, raises the ref count to 1, and release the lpi_list_lock. 6. Thread X proceeds and frees the irq. Avoid this by holding the spinlock around the kref_put. Reviewed-by: Andre Przywara <andre.przywara@arm.com> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> --- virt/kvm/arm/vgic/vgic.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index e7aeac719e091..e83b7fe4baaed 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -117,17 +117,17 @@ static void vgic_irq_release(struct kref *ref) void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) { - struct vgic_dist *dist; + struct vgic_dist *dist = &kvm->arch.vgic; if (irq->intid < VGIC_MIN_LPI) return; - if (!kref_put(&irq->refcount, vgic_irq_release)) + spin_lock(&dist->lpi_list_lock); + if (!kref_put(&irq->refcount, vgic_irq_release)) { + spin_unlock(&dist->lpi_list_lock); return; + }; - dist = &kvm->arch.vgic; - - spin_lock(&dist->lpi_list_lock); list_del(&irq->lpi_list); dist->lpi_list_count--; spin_unlock(&dist->lpi_list_lock); From c987ff0d3cb37d7fe1ddaa370811dfd9f73643fa Mon Sep 17 00:00:00 2001 From: Robin Murphy <robin.murphy@arm.com> Date: Tue, 9 Aug 2016 17:31:35 +0100 Subject: [PATCH 124/513] iommu/dma: Respect IOMMU aperture when allocating Where a device driver has set a 64-bit DMA mask to indicate the absence of addressing limitations, we still need to ensure that we don't allocate IOVAs beyond the actual input size of the IOMMU. The reported aperture is the most reliable way we have of inferring that input address size, so use that to enforce a hard upper limit where available. Fixes: 0db2e5d18f76 ("iommu: Implement common IOMMU ops for DMA mapping") Signed-off-by: Robin Murphy <robin.murphy@arm.com> Signed-off-by: Joerg Roedel <jroedel@suse.de> --- drivers/iommu/dma-iommu.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 7d991c81c4fa1..00c8a08d56e72 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -152,12 +152,15 @@ int dma_direction_to_prot(enum dma_data_direction dir, bool coherent) } } -static struct iova *__alloc_iova(struct iova_domain *iovad, size_t size, +static struct iova *__alloc_iova(struct iommu_domain *domain, size_t size, dma_addr_t dma_limit) { + struct iova_domain *iovad = domain->iova_cookie; unsigned long shift = iova_shift(iovad); unsigned long length = iova_align(iovad, size) >> shift; + if (domain->geometry.force_aperture) + dma_limit = min(dma_limit, domain->geometry.aperture_end); /* * Enforce size-alignment to be safe - there could perhaps be an * attribute to control this per-device, or at least per-domain... @@ -315,7 +318,7 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, if (!pages) return NULL; - iova = __alloc_iova(iovad, size, dev->coherent_dma_mask); + iova = __alloc_iova(domain, size, dev->coherent_dma_mask); if (!iova) goto out_free_pages; @@ -387,7 +390,7 @@ dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, phys_addr_t phys = page_to_phys(page) + offset; size_t iova_off = iova_offset(iovad, phys); size_t len = iova_align(iovad, size + iova_off); - struct iova *iova = __alloc_iova(iovad, len, dma_get_mask(dev)); + struct iova *iova = __alloc_iova(domain, len, dma_get_mask(dev)); if (!iova) return DMA_ERROR_CODE; @@ -539,7 +542,7 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, prev = s; } - iova = __alloc_iova(iovad, iova_len, dma_get_mask(dev)); + iova = __alloc_iova(domain, iova_len, dma_get_mask(dev)); if (!iova) goto out_restore_sg; From 4da449ae1df9cfeb167e78f250b250eff64bc65e Mon Sep 17 00:00:00 2001 From: Laura Garcia Liebana <nevola@gmail.com> Date: Tue, 9 Aug 2016 20:46:16 +0200 Subject: [PATCH 125/513] netfilter: nft_exthdr: Add size check on u8 nft_exthdr attributes Fix the direct assignment of offset and length attributes included in nft_exthdr structure from u32 data to u8. Signed-off-by: Laura Garcia Liebana <nevola@gmail.com> Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org> --- net/netfilter/nft_exthdr.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index ba7aed13e1749..82c264e402781 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -59,6 +59,7 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_exthdr *priv = nft_expr_priv(expr); + u32 offset, len; if (tb[NFTA_EXTHDR_DREG] == NULL || tb[NFTA_EXTHDR_TYPE] == NULL || @@ -66,9 +67,15 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, tb[NFTA_EXTHDR_LEN] == NULL) return -EINVAL; + offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET])); + len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN])); + + if (offset > U8_MAX || len > U8_MAX) + return -ERANGE; + priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); - priv->offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET])); - priv->len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN])); + priv->offset = offset; + priv->len = len; priv->dreg = nft_parse_register(tb[NFTA_EXTHDR_DREG]); return nft_validate_register_store(ctx, priv->dreg, NULL, From c7de573471832dff7d31f0c13b0f143d6f017799 Mon Sep 17 00:00:00 2001 From: Felipe Balbi <felipe.balbi@linux.intel.com> Date: Fri, 29 Jul 2016 03:17:58 +0300 Subject: [PATCH 126/513] usb: dwc3: gadget: increment request->actual once When using SG lists, we would end up setting request->actual to: num_mapped_sgs * (request->length - count) Let's fix that up by incrementing request->actual only once. Cc: <stable@vger.kernel.org> Reported-by: Brian E Rogers <brian.e.rogers@intel.com> Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com> --- drivers/usb/dwc3/gadget.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 8f8c2157910e6..863c306ebb694 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2013,14 +2013,6 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep, s_pkt = 1; } - /* - * We assume here we will always receive the entire data block - * which we should receive. Meaning, if we program RX to - * receive 4K but we receive only 2K, we assume that's all we - * should receive and we simply bounce the request back to the - * gadget driver for further processing. - */ - req->request.actual += req->request.length - count; if (s_pkt) return 1; if ((event->status & DEPEVT_STATUS_LST) && @@ -2040,6 +2032,7 @@ static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep, struct dwc3_trb *trb; unsigned int slot; unsigned int i; + int count = 0; int ret; do { @@ -2054,6 +2047,8 @@ static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep, slot++; slot %= DWC3_TRB_NUM; trb = &dep->trb_pool[slot]; + count += trb->size & DWC3_TRB_SIZE_MASK; + ret = __dwc3_cleanup_done_trbs(dwc, dep, req, trb, event, status); @@ -2061,6 +2056,14 @@ static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep, break; } while (++i < req->request.num_mapped_sgs); + /* + * We assume here we will always receive the entire data block + * which we should receive. Meaning, if we program RX to + * receive 4K but we receive only 2K, we assume that's all we + * should receive and we simply bounce the request back to the + * gadget driver for further processing. + */ + req->request.actual += req->request.length - count; dwc3_gadget_giveback(dep, req, status); if (ret) From e5b36ae2f851024d43c76e51f395d32ce8d769ce Mon Sep 17 00:00:00 2001 From: Felipe Balbi <felipe.balbi@linux.intel.com> Date: Wed, 10 Aug 2016 11:13:26 +0300 Subject: [PATCH 127/513] usb: dwc3: gadget: fix for short pkts during chained xfers DWC3 has one interesting peculiarity with chained transfers. If we setup N chained transfers and we get a short packet before processing all N TRBs, DWC3 will (conditionally) issue a XferComplete or XferInProgress event and retire all TRBs from the one which got a short packet to the last without clearing their HWO bits. This means SW must clear HWO bit manually, which this patch is doing. Cc: <stable@vger.kernel.org> Cc: Brian E Rogers <brian.e.rogers@intel.com> Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com> --- drivers/usb/dwc3/gadget.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 863c306ebb694..241f5c7b34bf6 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1955,7 +1955,8 @@ static void dwc3_gadget_free_endpoints(struct dwc3 *dwc) static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep, struct dwc3_request *req, struct dwc3_trb *trb, - const struct dwc3_event_depevt *event, int status) + const struct dwc3_event_depevt *event, int status, + int chain) { unsigned int count; unsigned int s_pkt = 0; @@ -1964,6 +1965,19 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep, dep->queued_requests--; trace_dwc3_complete_trb(dep, trb); + /* + * If we're in the middle of series of chained TRBs and we + * receive a short transfer along the way, DWC3 will skip + * through all TRBs including the last TRB in the chain (the + * where CHN bit is zero. DWC3 will also avoid clearing HWO + * bit and SW has to do it manually. + * + * We're going to do that here to avoid problems of HW trying + * to use bogus TRBs for transfers. + */ + if (chain && (trb->ctrl & DWC3_TRB_CTRL_HWO)) + trb->ctrl &= ~DWC3_TRB_CTRL_HWO; + if ((trb->ctrl & DWC3_TRB_CTRL_HWO) && status != -ESHUTDOWN) /* * We continue despite the error. There is not much we @@ -1975,6 +1989,7 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep, */ dev_err(dwc->dev, "%s's TRB (%p) still owned by HW\n", dep->name, trb); + count = trb->size & DWC3_TRB_SIZE_MASK; if (dep->direction) { @@ -2036,10 +2051,13 @@ static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep, int ret; do { + int chain; + req = next_request(&dep->started_list); if (WARN_ON_ONCE(!req)) return 1; + chain = req->request.num_mapped_sgs > 0; i = 0; do { slot = req->first_trb_index + i; @@ -2049,9 +2067,8 @@ static int dwc3_cleanup_done_reqs(struct dwc3 *dwc, struct dwc3_ep *dep, trb = &dep->trb_pool[slot]; count += trb->size & DWC3_TRB_SIZE_MASK; - ret = __dwc3_cleanup_done_trbs(dwc, dep, req, trb, - event, status); + event, status, chain); if (ret) break; } while (++i < req->request.num_mapped_sgs); From 7c705dfe2ebe731c8fd068623b6b4df2d3512c08 Mon Sep 17 00:00:00 2001 From: Felipe Balbi <felipe.balbi@linux.intel.com> Date: Wed, 10 Aug 2016 12:35:30 +0300 Subject: [PATCH 128/513] usb: dwc3: gadget: always cleanup all TRBs If we stop earlier due to short packet, we will not be able to giveback all TRBs. Cc: <stable@vger.kernel.org> Cc: Brian E Rogers <brian.e.rogers@intel.com> Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com> --- drivers/usb/dwc3/gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 241f5c7b34bf6..eb820e4ab49d2 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -2028,7 +2028,7 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep, s_pkt = 1; } - if (s_pkt) + if (s_pkt && !chain) return 1; if ((event->status & DEPEVT_STATUS_LST) && (trb->ctrl & (DWC3_TRB_CTRL_LST | From 4491ed5042f0419b22a4b08331adb54af31e2caa Mon Sep 17 00:00:00 2001 From: Heikki Krogerus <heikki.krogerus@linux.intel.com> Date: Fri, 1 Apr 2016 17:13:11 +0300 Subject: [PATCH 129/513] usb: dwc3: pci: add Intel Kabylake PCI ID Intel Kabylake PCH has the same DWC3 than Intel Sunrisepoint. Add the new ID to the supported devices. Cc: <stable@vger.kernel.org> Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com> Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com> --- drivers/usb/dwc3/dwc3-pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index 45f5a232d9fb6..2eb84d6c24a69 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -37,6 +37,7 @@ #define PCI_DEVICE_ID_INTEL_BXT 0x0aaa #define PCI_DEVICE_ID_INTEL_BXT_M 0x1aaa #define PCI_DEVICE_ID_INTEL_APL 0x5aaa +#define PCI_DEVICE_ID_INTEL_KBP 0xa2b0 static const struct acpi_gpio_params reset_gpios = { 0, 0, false }; static const struct acpi_gpio_params cs_gpios = { 1, 0, false }; @@ -227,6 +228,7 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BXT), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BXT_M), }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_APL), }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBP), }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_NL_USB), }, { } /* Terminating Entry */ }; From 6f4deb18a505523eb7925d646574a95f9e982ff7 Mon Sep 17 00:00:00 2001 From: Christophe Leroy <christophe.leroy@c-s.fr> Date: Mon, 8 Aug 2016 15:58:56 +0200 Subject: [PATCH 130/513] gpio: max730x: set gpiochip data pointer before using it gpiochip_add_data() has to be called before calling max7301_direction_input() [ 4.389883] Unable to handle kernel paging request for data at address 0x00000018 [ 4.397282] Faulting instruction address: 0xc01a8cbc [ 4.402023] Oops: Kernel access of bad area, sig: 11 [#1] [ 4.407331] PREEMPT CMPC885 [ 4.410131] CPU: 0 PID: 6 Comm: kworker/u2:0 Not tainted 4.5.0-gacdfdee #39 [ 4.418592] Workqueue: deferwq deferred_probe_work_func [ 4.423711] task: c60798b0 ti: c608a000 task.ti: c608a000 [ 4.429038] NIP: c01a8cbc LR: c01a8e24 CTR: c01ff028 [ 4.433953] REGS: c608bad0 TRAP: 0300 Not tainted (4.5.0-s3k-dev-gacdfdee-svn-dirty) [ 4.441847] MSR: 00009032 <EE,ME,IR,DR,RI> CR: 33039553 XER: a000f940 [ 4.448395] DAR: 00000018 DSISR: c0000000 GPR00: c01a8e24 c608bb80 c60798b0 c60d6f6c 00000004 00000002 07de2900 00700000 GPR08: 00000000 00000000 c608a000 00001032 35039553 00000000 c002f37c c6010b64 GPR16: c6010a48 c6010a14 c6010a00 00000000 c0450000 c0453568 c0453438 c050db14 GPR24: c62662bc 00000009 ffffffaa c60d6f5d 00000001 00000000 00000000 00000000 [ 4.480371] NIP [c01a8cbc] max7301_direction_input+0x20/0x9c [ 4.485951] LR [c01a8e24] __max730x_probe+0xec/0x138 [ 4.490812] Call Trace: [ 4.493268] [c608bba0] [c01a8e24] __max730x_probe+0xec/0x138 [ 4.498878] [c608bbc0] [c01cc368] driver_probe_device+0x190/0x38c [ 4.504895] [c608bbf0] [c01ca918] bus_for_each_drv+0x58/0xb4 [ 4.510489] [c608bc20] [c01cc04c] __device_attach+0x8c/0x110 [ 4.516082] [c608bc50] [c01cab80] bus_probe_device+0x34/0xb8 [ 4.521673] [c608bc70] [c01c96c8] device_add+0x3c0/0x598 [ 4.526925] [c608bcb0] [c0200f90] spi_add_device+0x114/0x160 [ 4.532512] [c608bcd0] [c02018d0] spi_register_master+0x6e0/0x7c8 [ 4.538537] [c608bd20] [c02019fc] devm_spi_register_master+0x44/0x8c [ 4.544824] [c608bd40] [c0203854] of_fsl_spi_probe+0x458/0x57c [ 4.550587] [c608bda0] [c01cd828] platform_drv_probe+0x30/0x74 [ 4.556366] [c608bdb0] [c01cc368] driver_probe_device+0x190/0x38c [ 4.562383] [c608bde0] [c01ca918] bus_for_each_drv+0x58/0xb4 [ 4.567977] [c608be10] [c01cc04c] __device_attach+0x8c/0x110 [ 4.573572] [c608be40] [c01cab80] bus_probe_device+0x34/0xb8 [ 4.579170] [c608be60] [c01cb9b4] deferred_probe_work_func+0xa4/0xc4 [ 4.585438] [c608be80] [c0029c04] process_one_work+0x22c/0x414 [ 4.591201] [c608bea0] [c002a100] worker_thread+0x314/0x5c0 [ 4.596722] [c608bef0] [c002f444] kthread+0xc8/0xcc [ 4.601538] [c608bf40] [c000af84] ret_from_kernel_thread+0x5c/0x64 [ 4.607596] Instruction dump: [ 4.610530] 7c0803a6 bba10014 38210020 4e800020 7c0802a6 9421ffe0 38840004 bf810010 [ 4.618188] 90010024 549cf0be 83c30010 549d0f7c <813e0018> 7fc3f378 7d3f2430 57ff07fe [ 4.626041] ---[ end trace 303adb021dd4caf2 ]--- Cc: stable@vger.kernel.org fixes: 5e45e01916197 ("gpio: max730x: use gpiochip data pointer") Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr> Signed-off-by: Linus Walleij <linus.walleij@linaro.org> --- drivers/gpio/gpio-max730x.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-max730x.c b/drivers/gpio/gpio-max730x.c index 08807368f0078..946d09195598f 100644 --- a/drivers/gpio/gpio-max730x.c +++ b/drivers/gpio/gpio-max730x.c @@ -192,6 +192,10 @@ int __max730x_probe(struct max7301 *ts) ts->chip.parent = dev; ts->chip.owner = THIS_MODULE; + ret = gpiochip_add_data(&ts->chip, ts); + if (ret) + goto exit_destroy; + /* * initialize pullups according to platform data and cache the * register values for later use. @@ -213,10 +217,6 @@ int __max730x_probe(struct max7301 *ts) } } - ret = gpiochip_add_data(&ts->chip, ts); - if (ret) - goto exit_destroy; - return ret; exit_destroy: From 5b236d0fde21d88351420ef0b9a6cb7aeeea0c54 Mon Sep 17 00:00:00 2001 From: Wei Yongjun <weiyj.lk@gmail.com> Date: Tue, 26 Jul 2016 14:51:58 +0000 Subject: [PATCH 131/513] pinctrl: meson: Drop pinctrl_unregister for devm_ registered device It's not necessary to unregister pin controller device registered with devm_pinctrl_register() and using pinctrl_unregister() leads to a double free. This is detected by Coccinelle semantic patch. Fixes: e649f7ec8c5f ("pinctrl: meson: Use devm_pinctrl_register() for pinctrl registration") Signed-off-by: Wei Yongjun <weiyj.lk@gmail.com> Reviewed-by: Dmitry Torokhov <dmitry.torokhov@gmail.com> Acked-by: Kevin Hilman <khilman@baylibre.com> Signed-off-by: Linus Walleij <linus.walleij@linaro.org> --- drivers/pinctrl/meson/pinctrl-meson.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/pinctrl/meson/pinctrl-meson.c b/drivers/pinctrl/meson/pinctrl-meson.c index 11623c6b0cb30..44e69c963f5da 100644 --- a/drivers/pinctrl/meson/pinctrl-meson.c +++ b/drivers/pinctrl/meson/pinctrl-meson.c @@ -727,13 +727,7 @@ static int meson_pinctrl_probe(struct platform_device *pdev) return PTR_ERR(pc->pcdev); } - ret = meson_gpiolib_register(pc); - if (ret) { - pinctrl_unregister(pc->pcdev); - return ret; - } - - return 0; + return meson_gpiolib_register(pc); } static struct platform_driver meson_pinctrl_driver = { From b120a3c286520ca465c54e8afa442be10560053b Mon Sep 17 00:00:00 2001 From: Wei Yongjun <weiyj.lk@gmail.com> Date: Tue, 26 Jul 2016 14:52:57 +0000 Subject: [PATCH 132/513] pinctrl: pistachio: Drop pinctrl_unregister for devm_ registered device It's not necessary to unregister pin controller device registered with devm_pinctrl_register() and using pinctrl_unregister() leads to a double free. This is detected by Coccinelle semantic patch. Signed-off-by: Wei Yongjun <weiyj.lk@gmail.com> Signed-off-by: Linus Walleij <linus.walleij@linaro.org> --- drivers/pinctrl/pinctrl-pistachio.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/pinctrl/pinctrl-pistachio.c b/drivers/pinctrl/pinctrl-pistachio.c index c6d410ef8de08..7bad200bd67c5 100644 --- a/drivers/pinctrl/pinctrl-pistachio.c +++ b/drivers/pinctrl/pinctrl-pistachio.c @@ -1432,7 +1432,6 @@ static int pistachio_pinctrl_probe(struct platform_device *pdev) { struct pistachio_pinctrl *pctl; struct resource *res; - int ret; pctl = devm_kzalloc(&pdev->dev, sizeof(*pctl), GFP_KERNEL); if (!pctl) @@ -1464,13 +1463,7 @@ static int pistachio_pinctrl_probe(struct platform_device *pdev) return PTR_ERR(pctl->pctldev); } - ret = pistachio_gpio_register(pctl); - if (ret < 0) { - pinctrl_unregister(pctl->pctldev); - return ret; - } - - return 0; + return pistachio_gpio_register(pctl); } static struct platform_driver pistachio_pinctrl_driver = { From 8cf4345575a416e6856a6856ac6eaa31ad883126 Mon Sep 17 00:00:00 2001 From: "Agrawal, Nitesh-kumar" <Nitesh-kumar.Agrawal@amd.com> Date: Tue, 26 Jul 2016 08:28:19 +0000 Subject: [PATCH 133/513] pinctrl/amd: Remove the default de-bounce time MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the function amd_gpio_irq_enable() and amd_gpio_direction_input(), remove the code which is setting the default de-bounce time to 2.75ms. The driver code shall use the same settings as specified in BIOS. Any default assignment impacts TouchPad behaviour when the LevelTrig is set to EDGE FALLING. Cc: stable@vger.kernel.org Reviewed-by: Ken Xue <Ken.Xue@amd.com> Signed-off-by: Nitesh Kumar Agrawal <Nitesh-kumar.Agrawal@amd.com> Signed-off-by: Linus Walleij <linus.walleij@linaro.org> --- drivers/pinctrl/pinctrl-amd.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c index 634b4d30eefb1..b3e772390ab66 100644 --- a/drivers/pinctrl/pinctrl-amd.c +++ b/drivers/pinctrl/pinctrl-amd.c @@ -43,17 +43,6 @@ static int amd_gpio_direction_input(struct gpio_chip *gc, unsigned offset) spin_lock_irqsave(&gpio_dev->lock, flags); pin_reg = readl(gpio_dev->base + offset * 4); - /* - * Suppose BIOS or Bootloader sets specific debounce for the - * GPIO. if not, set debounce to be 2.75ms and remove glitch. - */ - if ((pin_reg & DB_TMR_OUT_MASK) == 0) { - pin_reg |= 0xf; - pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF); - pin_reg |= DB_TYPE_REMOVE_GLITCH << DB_CNTRL_OFF; - pin_reg &= ~BIT(DB_TMR_LARGE_OFF); - } - pin_reg &= ~BIT(OUTPUT_ENABLE_OFF); writel(pin_reg, gpio_dev->base + offset * 4); spin_unlock_irqrestore(&gpio_dev->lock, flags); @@ -326,15 +315,6 @@ static void amd_gpio_irq_enable(struct irq_data *d) spin_lock_irqsave(&gpio_dev->lock, flags); pin_reg = readl(gpio_dev->base + (d->hwirq)*4); - /* - Suppose BIOS or Bootloader sets specific debounce for the - GPIO. if not, set debounce to be 2.75ms. - */ - if ((pin_reg & DB_TMR_OUT_MASK) == 0) { - pin_reg |= 0xf; - pin_reg |= BIT(DB_TMR_OUT_UNIT_OFF); - pin_reg &= ~BIT(DB_TMR_LARGE_OFF); - } pin_reg |= BIT(INTERRUPT_ENABLE_OFF); pin_reg |= BIT(INTERRUPT_MASK_OFF); writel(pin_reg, gpio_dev->base + (d->hwirq)*4); From e95d0dfb229fffe96dc4c29054f6c7a7302e111e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Date: Tue, 2 Aug 2016 18:18:33 +0300 Subject: [PATCH 134/513] pinctrl: intel: merrifield: Add missed header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On x86 builds the absense of <linux/io.h> makes static analyzer and compiler unhappy which fails to build the driver. CHECK drivers/pinctrl/intel/pinctrl-merrifield.c drivers/pinctrl/intel/pinctrl-merrifield.c:518:17: error: undefined identifier 'readl' drivers/pinctrl/intel/pinctrl-merrifield.c:570:17: error: undefined identifier 'readl' drivers/pinctrl/intel/pinctrl-merrifield.c:575:9: error: undefined identifier 'writel' drivers/pinctrl/intel/pinctrl-merrifield.c:645:17: error: undefined identifier 'readl' CC drivers/pinctrl/intel/pinctrl-merrifield.o drivers/pinctrl/intel/pinctrl-merrifield.c: In function ‘mrfld_pin_dbg_show’: drivers/pinctrl/intel/pinctrl-merrifield.c:518:10: error: implicit declaration of function ‘readl’ [-Werror=implicit-function-declaration] value = readl(bufcfg); ^ drivers/pinctrl/intel/pinctrl-merrifield.c: In function ‘mrfld_update_bufcfg’: drivers/pinctrl/intel/pinctrl-merrifield.c:575:2: error: implicit declaration of function ‘writel’ [-Werror=implicit-function-declaration] writel(value, bufcfg); ^ cc1: some warnings being treated as errors Add header to the top of the module. Fixes: 4e80c8f50574 ("pinctrl: intel: Add Intel Merrifield pin controller support") Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Signed-off-by: Linus Walleij <linus.walleij@linaro.org> --- drivers/pinctrl/intel/pinctrl-merrifield.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pinctrl/intel/pinctrl-merrifield.c b/drivers/pinctrl/intel/pinctrl-merrifield.c index eb4990ff26ca5..7fb765642ee78 100644 --- a/drivers/pinctrl/intel/pinctrl-merrifield.c +++ b/drivers/pinctrl/intel/pinctrl-merrifield.c @@ -11,6 +11,7 @@ #include <linux/bitops.h> #include <linux/err.h> +#include <linux/io.h> #include <linux/module.h> #include <linux/platform_device.h> #include <linux/pinctrl/pinconf.h> From b06bc7ec4da26dccda040a65896ea22ee6638a7a Mon Sep 17 00:00:00 2001 From: Chris Wilson <chris@chris-wilson.co.uk> Date: Wed, 13 Jul 2016 09:10:31 +0100 Subject: [PATCH 135/513] drm/i915: Flush GT idle status upon reset Upon resetting the GPU, we force the engines to be idle by clearing their request lists. However, I neglected to clear the GT active status and so the next request following the reset was not marking the device as busy again. (We had to wait until any outstanding retire worker finally ran and cleared the active status.) Fixes: 67d97da34917 ("drm/i915: Only start retire worker when idle") Testcase: igt/pm_rps/reset Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1468397438-21226-1-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> (cherry picked from commit b913b33c43db849778f044d4b9e74b167898a9bc) Signed-off-by: Jani Nikula <jani.nikula@intel.com> --- drivers/gpu/drm/i915/i915_gem.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 11681501d7b13..27ef10e4923e2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -3169,6 +3169,8 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) } intel_ring_init_seqno(engine, engine->last_submitted_seqno); + + engine->i915->gt.active_engines &= ~intel_engine_flag(engine); } void i915_gem_reset(struct drm_device *dev) @@ -3186,6 +3188,7 @@ void i915_gem_reset(struct drm_device *dev) for_each_engine(engine, dev_priv) i915_gem_reset_engine_cleanup(engine); + mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0); i915_gem_context_reset(dev); From c2a4c5b75a717db8bbe1c2fad6473063a7d2f930 Mon Sep 17 00:00:00 2001 From: Jay Cornwall <jay@jcornwall.me> Date: Wed, 3 Aug 2016 13:39:42 -0500 Subject: [PATCH 136/513] drm/amdgpu: Fix memory trashing if UVD ring test fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fence_put was called on an uninitialized variable. Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Jay Cornwall <jay@jcornwall.me> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index b11f4e8868d76..4aa993d190189 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1187,7 +1187,8 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = 0; } -error: fence_put(fence); + +error: return r; } From 18b43e89d295cc65151c505c643c98fb2c320e59 Mon Sep 17 00:00:00 2001 From: Daniel Mentz <danielmentz@google.com> Date: Thu, 4 Aug 2016 17:56:53 -0700 Subject: [PATCH 137/513] ARC: Call trace_hardirqs_on() before enabling irqs trace_hardirqs_on_caller() in lockdep.c expects to be called before, not after interrupts are actually enabled. The following comment in kernel/locking/lockdep.c substantiates this claim: " /* * We're enabling irqs and according to our state above irqs weren't * already enabled, yet we find the hardware thinks they are in fact * enabled.. someone messed up their IRQ state tracing. */ " An example can be found in include/linux/irqflags.h: do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0) Without this change, we hit the following DEBUG_LOCKS_WARN_ON. [ 7.760000] ------------[ cut here ]------------ [ 7.760000] WARNING: CPU: 0 PID: 1 at kernel/locking/lockdep.c:2711 resume_user_mode_begin+0x48/0xf0 [ 7.770000] DEBUG_LOCKS_WARN_ON(!irqs_disabled()) [ 7.780000] Modules linked in: [ 7.780000] CPU: 0 PID: 1 Comm: init Not tainted 4.7.0-00003-gc668bb9-dirty #366 [ 7.790000] [ 7.790000] Stack Trace: [ 7.790000] arc_unwind_core.constprop.1+0xa4/0x118 [ 7.800000] warn_slowpath_fmt+0x72/0x158 [ 7.800000] resume_user_mode_begin+0x48/0xf0 [ 7.810000] ---[ end trace 6f6a7a8fae20d2f0 ]--- Signed-off-by: Daniel Mentz <danielmentz@google.com> Cc: <stable@vger.kernel.org> Signed-off-by: Vineet Gupta <vgupta@synopsys.com> --- arch/arc/include/asm/irqflags-compact.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/include/asm/irqflags-compact.h b/arch/arc/include/asm/irqflags-compact.h index c1d36458bfb7a..4c6eed80cd8ba 100644 --- a/arch/arc/include/asm/irqflags-compact.h +++ b/arch/arc/include/asm/irqflags-compact.h @@ -188,10 +188,10 @@ static inline int arch_irqs_disabled(void) .endm .macro IRQ_ENABLE scratch + TRACE_ASM_IRQ_ENABLE lr \scratch, [status32] or \scratch, \scratch, (STATUS_E1_MASK | STATUS_E2_MASK) flag \scratch - TRACE_ASM_IRQ_ENABLE .endm #endif /* __ASSEMBLY__ */ From 45c3b08a117e2232fc8d7b9e849ead36386f4f96 Mon Sep 17 00:00:00 2001 From: Vineet Gupta <vgupta@synopsys.com> Date: Mon, 13 Jun 2016 16:38:27 +0200 Subject: [PATCH 138/513] ARC: Elide redundant setup of DMA callbacks For resources shared by all cores such as SLC and IOC, only the master core needs to do any setups / enabling / disabling etc. Cc: <stable@vger.kernel.org> Signed-off-by: Vineet Gupta <vgupta@synopsys.com> --- arch/arc/mm/cache.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 5a294b2c3cb30..0b10efe3a6a75 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -921,6 +921,15 @@ void arc_cache_init(void) printk(arc_cache_mumbojumbo(0, str, sizeof(str))); + /* + * Only master CPU needs to execute rest of function: + * - Assume SMP so all cores will have same cache config so + * any geomtry checks will be same for all + * - IOC setup / dma callbacks only need to be setup once + */ + if (cpu) + return; + if (IS_ENABLED(CONFIG_ARC_HAS_ICACHE)) { struct cpuinfo_arc_cache *ic = &cpuinfo_arc700[cpu].icache; From fae82e59d2ff7d789878c9eb3a007ae7eb6641b2 Mon Sep 17 00:00:00 2001 From: Chris Wilson <chris@chris-wilson.co.uk> Date: Sat, 16 Jul 2016 18:42:36 +0100 Subject: [PATCH 139/513] drm/i915: Handle ENOSPC after failing to insert a mappable node Even after adding individual page support for GTT mmaping, we can still fail to find any space within the mappable region, and drm_mm_insert_node() will then report ENOSPC. We have to then handle this error by using the shmem access to the pages. Fixes: b50a53715f09 ("drm/i915: Support for pread/pwrite ... objects") Testcase: igt/gem_concurrent_blit Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com Link: http://patchwork.freedesktop.org/patch/msgid/1468690956-23480-1-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> (cherry picked from commit d1054ee492a89b134fb0ac527b0714c277ae9c0f) Signed-off-by: Jani Nikula <jani.nikula@intel.com> --- drivers/gpu/drm/i915/i915_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 27ef10e4923e2..aceaad0c2f18a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1306,7 +1306,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, * textures). Fallback to the shmem path in that case. */ } - if (ret == -EFAULT) { + if (ret == -EFAULT || ret == -ENOSPC) { if (obj->phys_handle) ret = i915_gem_phys_pwrite(obj, args, file); else if (i915_gem_object_has_struct_page(obj)) From 5728e0de741a3581e9900c5cbee3a51425daf211 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com> Date: Tue, 12 Jul 2016 15:59:28 +0300 Subject: [PATCH 140/513] drm/i915: Fix iboost setting for DDI with 4 lanes on SKL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bspec says: "For DDIA with x4 capability (DDI_BUF_CTL DDIA Lane Capability Control = DDIA x4), the I_boost value has to be programmed in both tx_blnclegsctl_0 and tx_blnclegsctl_4." Currently we only program tx_blnclegsctl_0. Let's do the other one as well. Cc: stable@vger.kernel.org Fixes: f8896f5d58e6 ("drm/i915/skl: Buffer translation improvements") Cc: David Weinehall <david.weinehall@linux.intel.com> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1468328376-6380-2-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: David Weinehall <david.weinehall@linux.intel.com> (cherry picked from commit a7d8dbc07c8f0faaace983b1e4c6e9495dd0aa75) Signed-off-by: Jani Nikula <jani.nikula@intel.com> --- drivers/gpu/drm/i915/i915_reg.h | 1 + drivers/gpu/drm/i915/intel_ddi.c | 36 ++++++++++++++++++++------------ 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index ce14fe09d9623..5c06413ae0e61 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -1536,6 +1536,7 @@ enum skl_disp_power_wells { #define BALANCE_LEG_MASK(port) (7<<(8+3*(port))) /* Balance leg disable bits */ #define BALANCE_LEG_DISABLE_SHIFT 23 +#define BALANCE_LEG_DISABLE(port) (1 << (23 + (port))) /* * Fence registers diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index dd1d6fe122976..75354cd9bbab6 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -1379,14 +1379,30 @@ void intel_ddi_disable_pipe_clock(struct intel_crtc *intel_crtc) TRANS_CLK_SEL_DISABLED); } -static void skl_ddi_set_iboost(struct drm_i915_private *dev_priv, - u32 level, enum port port, int type) +static void _skl_ddi_set_iboost(struct drm_i915_private *dev_priv, + enum port port, uint8_t iboost) { + u32 tmp; + + tmp = I915_READ(DISPIO_CR_TX_BMU_CR0); + tmp &= ~(BALANCE_LEG_MASK(port) | BALANCE_LEG_DISABLE(port)); + if (iboost) + tmp |= iboost << BALANCE_LEG_SHIFT(port); + else + tmp |= BALANCE_LEG_DISABLE(port); + I915_WRITE(DISPIO_CR_TX_BMU_CR0, tmp); +} + +static void skl_ddi_set_iboost(struct intel_encoder *encoder, u32 level) +{ + struct intel_digital_port *intel_dig_port = enc_to_dig_port(&encoder->base); + struct drm_i915_private *dev_priv = to_i915(intel_dig_port->base.base.dev); + enum port port = intel_dig_port->port; + int type = encoder->type; const struct ddi_buf_trans *ddi_translations; uint8_t iboost; uint8_t dp_iboost, hdmi_iboost; int n_entries; - u32 reg; /* VBT may override standard boost values */ dp_iboost = dev_priv->vbt.ddi_port_info[port].dp_boost_level; @@ -1428,16 +1444,10 @@ static void skl_ddi_set_iboost(struct drm_i915_private *dev_priv, return; } - reg = I915_READ(DISPIO_CR_TX_BMU_CR0); - reg &= ~BALANCE_LEG_MASK(port); - reg &= ~(1 << (BALANCE_LEG_DISABLE_SHIFT + port)); - - if (iboost) - reg |= iboost << BALANCE_LEG_SHIFT(port); - else - reg |= 1 << (BALANCE_LEG_DISABLE_SHIFT + port); + _skl_ddi_set_iboost(dev_priv, port, iboost); - I915_WRITE(DISPIO_CR_TX_BMU_CR0, reg); + if (port == PORT_A && intel_dig_port->max_lanes == 4) + _skl_ddi_set_iboost(dev_priv, PORT_E, iboost); } static void bxt_ddi_vswing_sequence(struct drm_i915_private *dev_priv, @@ -1568,7 +1578,7 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp) level = translate_signal_level(signal_levels); if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) - skl_ddi_set_iboost(dev_priv, level, port, encoder->type); + skl_ddi_set_iboost(encoder, level); else if (IS_BROXTON(dev_priv)) bxt_ddi_vswing_sequence(dev_priv, level, port, encoder->type); From 7ff9a55614712adf13ce7990565be0263e620f5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com> Date: Tue, 12 Jul 2016 15:59:30 +0300 Subject: [PATCH 141/513] drm/i915: Program iboost settings for HDMI/DVI on SKL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently we fail to program the iboost stuff for HDMI/DVI. Let's remedy that. Cc: stable@vger.kernel.org Fixes: f8896f5d58e6 ("drm/i915/skl: Buffer translation improvements") Cc: David Weinehall <david.weinehall@linux.intel.com> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1468328376-6380-4-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: David Weinehall <david.weinehall@linux.intel.com> (cherry picked from commit 8d8bb85eb7d859aa9bbe36e588690a1d22af7608) Signed-off-by: Jani Nikula <jani.nikula@intel.com> --- drivers/gpu/drm/i915/intel_ddi.c | 51 +++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 75354cd9bbab6..415c061e29698 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -388,6 +388,40 @@ skl_get_buf_trans_hdmi(struct drm_i915_private *dev_priv, int *n_entries) } } +static int intel_ddi_hdmi_level(struct drm_i915_private *dev_priv, enum port port) +{ + int n_hdmi_entries; + int hdmi_level; + int hdmi_default_entry; + + hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift; + + if (IS_BROXTON(dev_priv)) + return hdmi_level; + + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) { + skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); + hdmi_default_entry = 8; + } else if (IS_BROADWELL(dev_priv)) { + n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + hdmi_default_entry = 7; + } else if (IS_HASWELL(dev_priv)) { + n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); + hdmi_default_entry = 6; + } else { + WARN(1, "ddi translation table missing\n"); + n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); + hdmi_default_entry = 7; + } + + /* Choose a good default if VBT is badly populated */ + if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN || + hdmi_level >= n_hdmi_entries) + hdmi_level = hdmi_default_entry; + + return hdmi_level; +} + /* * Starting with Haswell, DDI port buffers must be programmed with correct * values in advance. The buffer values are different for FDI and DP modes, @@ -399,7 +433,7 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) { struct drm_i915_private *dev_priv = to_i915(encoder->base.dev); u32 iboost_bit = 0; - int i, n_hdmi_entries, n_dp_entries, n_edp_entries, hdmi_default_entry, + int i, n_hdmi_entries, n_dp_entries, n_edp_entries, size; int hdmi_level; enum port port; @@ -410,7 +444,7 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) const struct ddi_buf_trans *ddi_translations; port = intel_ddi_get_encoder_port(encoder); - hdmi_level = dev_priv->vbt.ddi_port_info[port].hdmi_level_shift; + hdmi_level = intel_ddi_hdmi_level(dev_priv, port); if (IS_BROXTON(dev_priv)) { if (encoder->type != INTEL_OUTPUT_HDMI) @@ -430,7 +464,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) skl_get_buf_trans_edp(dev_priv, &n_edp_entries); ddi_translations_hdmi = skl_get_buf_trans_hdmi(dev_priv, &n_hdmi_entries); - hdmi_default_entry = 8; /* If we're boosting the current, set bit 31 of trans1 */ if (dev_priv->vbt.ddi_port_info[port].hdmi_boost_level || dev_priv->vbt.ddi_port_info[port].dp_boost_level) @@ -456,7 +489,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); - hdmi_default_entry = 7; } else if (IS_HASWELL(dev_priv)) { ddi_translations_fdi = hsw_ddi_translations_fdi; ddi_translations_dp = hsw_ddi_translations_dp; @@ -464,7 +496,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) ddi_translations_hdmi = hsw_ddi_translations_hdmi; n_dp_entries = n_edp_entries = ARRAY_SIZE(hsw_ddi_translations_dp); n_hdmi_entries = ARRAY_SIZE(hsw_ddi_translations_hdmi); - hdmi_default_entry = 6; } else { WARN(1, "ddi translation table missing\n"); ddi_translations_edp = bdw_ddi_translations_dp; @@ -474,7 +505,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) n_edp_entries = ARRAY_SIZE(bdw_ddi_translations_edp); n_dp_entries = ARRAY_SIZE(bdw_ddi_translations_dp); n_hdmi_entries = ARRAY_SIZE(bdw_ddi_translations_hdmi); - hdmi_default_entry = 7; } switch (encoder->type) { @@ -505,11 +535,6 @@ void intel_prepare_ddi_buffer(struct intel_encoder *encoder) if (encoder->type != INTEL_OUTPUT_HDMI) return; - /* Choose a good default if VBT is badly populated */ - if (hdmi_level == HDMI_LEVEL_SHIFT_UNKNOWN || - hdmi_level >= n_hdmi_entries) - hdmi_level = hdmi_default_entry; - /* Entry 9 is for HDMI: */ I915_WRITE(DDI_BUF_TRANS_LO(port, i), ddi_translations_hdmi[hdmi_level].trans1 | iboost_bit); @@ -1647,6 +1672,10 @@ static void intel_ddi_pre_enable(struct intel_encoder *intel_encoder) intel_dp_stop_link_train(intel_dp); } else if (type == INTEL_OUTPUT_HDMI) { struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); + int level = intel_ddi_hdmi_level(dev_priv, port); + + if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) + skl_ddi_set_iboost(intel_encoder, level); intel_hdmi->set_infoframes(encoder, crtc->config->has_hdmi_sink, From d8b6161f7294b63180da443c9ff48c7efce182eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com> Date: Tue, 2 Aug 2016 14:07:33 +0300 Subject: [PATCH 142/513] drm/i915: Clean up the extra RPM ref on CHV with i915.enable_rc6=0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the CHV early bail out from intel_cleanup_gt_powersave() so that we'll clean up the extra RPM reference held due to i915.enable_rc6=0. Cc: Imre Deak <imre.deak@intel.com> Fixes: b268c699aca5 ("drm/i915: refactor RPM disabling due to RC6 being disabled") Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1470136053-23276-1-git-send-email-ville.syrjala@linux.intel.com Reviewed-by: Imre Deak <imre.deak@intel.com> (cherry picked from commit 8dac1e1f2068321fb4b7062d3c5408971f7a7e35) Signed-off-by: Jani Nikula <jani.nikula@intel.com> --- drivers/gpu/drm/i915/intel_pm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f4f3fcc8b3bec..1ff5c63f17284 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6573,9 +6573,7 @@ void intel_init_gt_powersave(struct drm_i915_private *dev_priv) void intel_cleanup_gt_powersave(struct drm_i915_private *dev_priv) { - if (IS_CHERRYVIEW(dev_priv)) - return; - else if (IS_VALLEYVIEW(dev_priv)) + if (IS_VALLEYVIEW(dev_priv)) valleyview_cleanup_gt_powersave(dev_priv); if (!i915.enable_rc6) From 0a491b96aa59a7232f6c1a81414aa57fb8de8594 Mon Sep 17 00:00:00 2001 From: Chris Wilson <chris@chris-wilson.co.uk> Date: Thu, 4 Aug 2016 08:43:53 +0100 Subject: [PATCH 143/513] drm/i915/fbc: FBC causes display flicker when VT-d is enabled on Skylake MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Erratum SKL075: Display Flicker May Occur When Both VT-d And FBC Are Enabled "Display flickering may occur when both FBC (Frame Buffer Compression) and VT - d (Intel® Virtualization Technology for Directed I/O) are enabled and in use by the display controller." Ville found the w/a name in the database: WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt and also dug out that it affects Broxton. v2: Log when the quirk is applied. v3: Ensure i915.enable_fbc is false when !HAS_FBC() v4: Fix function name after rebase v5: Add Broxton to the workaround Note for backporting to stable, we need to add #define mkwrite_device_info(ptr) \ ((struct intel_device_info *)INTEL_INFO(ptr)) Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Paulo Zanoni <paulo.r.zanoni@intel.com> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Cc: stable@vger.kernel.org Link: http://patchwork.freedesktop.org/patch/msgid/1470296633-20388-1-git-send-email-chris@chris-wilson.co.uk (cherry picked from commit 36dbc4d76918d7557b686f807106dcc799174b12) Signed-off-by: Jani Nikula <jani.nikula@intel.com> --- drivers/gpu/drm/i915/intel_fbc.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c index 6a7ad3ed14632..3836a1c797141 100644 --- a/drivers/gpu/drm/i915/intel_fbc.c +++ b/drivers/gpu/drm/i915/intel_fbc.c @@ -1230,12 +1230,29 @@ static int intel_sanitize_fbc_option(struct drm_i915_private *dev_priv) if (i915.enable_fbc >= 0) return !!i915.enable_fbc; + if (!HAS_FBC(dev_priv)) + return 0; + if (IS_BROADWELL(dev_priv)) return 1; return 0; } +static bool need_fbc_vtd_wa(struct drm_i915_private *dev_priv) +{ +#ifdef CONFIG_INTEL_IOMMU + /* WaFbcTurnOffFbcWhenHyperVisorIsUsed:skl,bxt */ + if (intel_iommu_gfx_mapped && + (IS_SKYLAKE(dev_priv) || IS_BROXTON(dev_priv))) { + DRM_INFO("Disabling framebuffer compression (FBC) to prevent screen flicker with VT-d enabled\n"); + return true; + } +#endif + + return false; +} + /** * intel_fbc_init - Initialize FBC * @dev_priv: the i915 device @@ -1253,6 +1270,9 @@ void intel_fbc_init(struct drm_i915_private *dev_priv) fbc->active = false; fbc->work.scheduled = false; + if (need_fbc_vtd_wa(dev_priv)) + mkwrite_device_info(dev_priv)->has_fbc = false; + i915.enable_fbc = intel_sanitize_fbc_option(dev_priv); DRM_DEBUG_KMS("Sanitized enable_fbc value: %d\n", i915.enable_fbc); From 2ca17b87e85ffada1e3da941d5c1c9ee4f56ca9f Mon Sep 17 00:00:00 2001 From: Chris Wilson <chris@chris-wilson.co.uk> Date: Thu, 4 Aug 2016 09:09:53 +0100 Subject: [PATCH 144/513] drm/i915: Add missing rpm wakelock to GGTT pread Joonas spotted a discrepancy between the pwrite and pread ioctls, in that pwrite takes the rpm wakelock around its GGTT access, The wakelock is required in order for the GTT to function. In disregard for the current convention, we take the rpm wakelock around the access itself rather than around the struct_mutex as the nesting is not strictly required and such ordering will one day be fixed by explicitly noting the barrier dependencies between the GGTT and rpm. Fixes: b50a53715f09 ("drm/i915: Support for pread/pwrite ...") Reported-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Cc: drm-intel-fixes@lists.freedesktop.org Link: http://patchwork.freedesktop.org/patch/msgid/1470298193-21765-1-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> (cherry picked from commit 1dd5b6f2020389e75bb3d269c038497f065e68c9) Signed-off-by: Jani Nikula <jani.nikula@intel.com> --- drivers/gpu/drm/i915/i915_gem.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index aceaad0c2f18a..a77ce9983f69c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -879,9 +879,12 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, ret = i915_gem_shmem_pread(dev, obj, args, file); /* pread for non shmem backed objects */ - if (ret == -EFAULT || ret == -ENODEV) + if (ret == -EFAULT || ret == -ENODEV) { + intel_runtime_pm_get(to_i915(dev)); ret = i915_gem_gtt_pread(dev, obj, args->size, args->offset, args->data_ptr); + intel_runtime_pm_put(to_i915(dev)); + } out: drm_gem_object_unreference(&obj->base); From 3cffb0a44750726cdc1cc07399efe3cbb45e028b Mon Sep 17 00:00:00 2001 From: Chris Wilson <chris@chris-wilson.co.uk> Date: Wed, 3 Aug 2016 17:09:00 +0100 Subject: [PATCH 145/513] drm/i915: Acquire audio powerwell for HD-Audio registers On Haswell/Broadwell, the HD-Audio block is inside the HDMI/display power well and so the sna-hda audio codec acquires the display power well while it is operational. However, Skylake separates the powerwells again, but yet we still need the audio powerwell to setup the registers. (But then the hardware uses those registers even while powered off???) Acquiring the powerwell around setting the chicken bits when setting up the audio channel does at least silence the WARNs from touching our registers whilst unpowered. We silence our own test cases, but maybe there is a latent bug in using the audio channel? v2: Grab both rpm wakelock and audio wakelock Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96214 Fixes: 03b135cebc47 "ALSA: hda - remove dependency on i915 power well for SKL") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Libin Yang <libin.yang@intel.com> Cc: Takashi Iwai <tiwai@suse.de> Cc: Marius Vlad <marius.c.vlad@intel.com> Tested-by: Hans de Goede <hdegoede@redhat.com> Cc: stable@vger.kernel.org Link: http://patchwork.freedesktop.org/patch/msgid/1470240540-29004-1-git-send-email-chris@chris-wilson.co.uk Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> (cherry picked from commit d838a110f0b310d408ebe6b5a97e36ec27555ebf) Signed-off-by: Jani Nikula <jani.nikula@intel.com> --- drivers/gpu/drm/i915/intel_audio.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c index 6700a7be7f787..d32f586f9c057 100644 --- a/drivers/gpu/drm/i915/intel_audio.c +++ b/drivers/gpu/drm/i915/intel_audio.c @@ -600,6 +600,8 @@ static void i915_audio_component_codec_wake_override(struct device *dev, if (!IS_SKYLAKE(dev_priv) && !IS_KABYLAKE(dev_priv)) return; + i915_audio_component_get_power(dev); + /* * Enable/disable generating the codec wake signal, overriding the * internal logic to generate the codec wake to controller. @@ -615,6 +617,8 @@ static void i915_audio_component_codec_wake_override(struct device *dev, I915_WRITE(HSW_AUD_CHICKENBIT, tmp); usleep_range(1000, 1500); } + + i915_audio_component_put_power(dev); } /* Get CDCLK in kHz */ @@ -648,6 +652,7 @@ static int i915_audio_component_sync_audio_rate(struct device *dev, !IS_HASWELL(dev_priv)) return 0; + i915_audio_component_get_power(dev); mutex_lock(&dev_priv->av_mutex); /* 1. get the pipe */ intel_encoder = dev_priv->dig_port_map[port]; @@ -698,6 +703,7 @@ static int i915_audio_component_sync_audio_rate(struct device *dev, unlock: mutex_unlock(&dev_priv->av_mutex); + i915_audio_component_put_power(dev); return err; } From 58e311b09c319183254d9220c50a533e7157c9ab Mon Sep 17 00:00:00 2001 From: Matt Roper <matthew.d.roper@intel.com> Date: Thu, 4 Aug 2016 14:08:00 -0700 Subject: [PATCH 146/513] drm/i915/gen9: Give one extra block per line for SKL plane WM calculations The bspec was updated a couple weeks ago to add an extra block per line to plane watermark calculations for linear pixel formats. Bspec update 115327 description: "Gen9+ - Updated the plane blocks per line calculation for linear cases. Adds +1 for all linear cases to handle the non-block aligned stride cases." Cc: Lyude <cpaul@redhat.com> Cc: drm-intel-fixes@lists.freedesktop.org Signed-off-by: Matt Roper <matthew.d.roper@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1470344880-27394-1-git-send-email-matthew.d.roper@intel.com Reviewed-by: Lyude <cpaul@redhat.com> (cherry picked from commit 055c3ff69d440928964228455ec29b071258d5fa) Signed-off-by: Jani Nikula <jani.nikula@intel.com> --- drivers/gpu/drm/i915/intel_pm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 1ff5c63f17284..3c7b382d57885 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3344,6 +3344,8 @@ static uint32_t skl_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal, plane_bytes_per_line *= 4; plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); plane_blocks_per_line /= 4; + } else if (tiling == DRM_FORMAT_MOD_NONE) { + plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1; } else { plane_blocks_per_line = DIV_ROUND_UP(plane_bytes_per_line, 512); } From 85bf59d188721dca37bc8276457e68351213f38f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com> Date: Tue, 2 Aug 2016 15:21:57 +0300 Subject: [PATCH 147/513] drm/i915: Fix iboost setting for SKL Y/U DP DDI buffer translation entry 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The spec was recently fixed to have the correct iboost setting for the SKL Y/U DP DDI buffer translation table entry 2. Update our tables to match. Cc: David Weinehall <david.weinehall@linux.intel.com> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1470140517-13011-1-git-send-email-ville.syrjala@linux.intel.com Cc: stable@vger.kernel.org Reviewed-by: David Weinehall <david.weinehall@linux.intel.com> (cherry picked from commit 5ac9056753e79ac5ad1ccc3c99b311688e46e8c9) Signed-off-by: Jani Nikula <jani.nikula@intel.com> --- drivers/gpu/drm/i915/intel_ddi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c index 415c061e29698..1a7efac65fd5d 100644 --- a/drivers/gpu/drm/i915/intel_ddi.c +++ b/drivers/gpu/drm/i915/intel_ddi.c @@ -145,7 +145,7 @@ static const struct ddi_buf_trans skl_ddi_translations_dp[] = { static const struct ddi_buf_trans skl_u_ddi_translations_dp[] = { { 0x0000201B, 0x000000A2, 0x0 }, { 0x00005012, 0x00000088, 0x0 }, - { 0x80007011, 0x000000CD, 0x0 }, + { 0x80007011, 0x000000CD, 0x1 }, { 0x80009010, 0x000000C0, 0x1 }, { 0x0000201B, 0x0000009D, 0x0 }, { 0x80005012, 0x000000C0, 0x1 }, @@ -158,7 +158,7 @@ static const struct ddi_buf_trans skl_u_ddi_translations_dp[] = { static const struct ddi_buf_trans skl_y_ddi_translations_dp[] = { { 0x00000018, 0x000000A2, 0x0 }, { 0x00005012, 0x00000088, 0x0 }, - { 0x80007011, 0x000000CD, 0x0 }, + { 0x80007011, 0x000000CD, 0x3 }, { 0x80009010, 0x000000C0, 0x3 }, { 0x00000018, 0x0000009D, 0x0 }, { 0x80005012, 0x000000C0, 0x3 }, From bf74c93cd7696793eceadaf02b176500523e0b93 Mon Sep 17 00:00:00 2001 From: Matthew Auld <matthew.auld@intel.com> Date: Tue, 2 Aug 2016 09:36:53 +0100 Subject: [PATCH 148/513] drm/i915: fix WaInsertDummyPushConstPs As pointed out by Chris Harris, we are using the wrong WA name, it should in fact be WaToEnableHwFixForPushConstHWBug, also it should be applied from C0 onwards for both BXT and KBL. Fixes: 7b9005cd45f3 ("drm/i915: Add WaInsertDummyPushConstP for bxt and kbl") Cc: Chris Harris <chris.harris@intel.com> Cc: Mika Kuoppala <mika.kuoppala@intel.com> Reported-by: Chris Harris <chris.harris@intel.com> Signed-off-by: Matthew Auld <matthew.auld@intel.com> Reviewed-by: Arun Siluvery <arun.siluvery@linux.intel.com> Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1470127013-29653-1-git-send-email-matthew.auld@intel.com (cherry picked from commit 575e3ccbce4582395d57612b289178bad4af3be8) Signed-off-by: Jani Nikula <jani.nikula@intel.com> --- drivers/gpu/drm/i915/intel_ringbuffer.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index cca7792f26d50..1d3161bbea24e 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1178,8 +1178,8 @@ static int bxt_init_workarounds(struct intel_engine_cs *engine) I915_WRITE(GEN8_L3SQCREG1, L3_GENERAL_PRIO_CREDITS(62) | L3_HIGH_PRIO_CREDITS(2)); - /* WaInsertDummyPushConstPs:bxt */ - if (IS_BXT_REVID(dev_priv, 0, BXT_REVID_B0)) + /* WaToEnableHwFixForPushConstHWBug:bxt */ + if (IS_BXT_REVID(dev_priv, BXT_REVID_C0, REVID_FOREVER)) WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); @@ -1222,8 +1222,8 @@ static int kbl_init_workarounds(struct intel_engine_cs *engine) I915_WRITE(GEN8_L3SQCREG4, I915_READ(GEN8_L3SQCREG4) | GEN8_LQSC_RO_PERF_DIS); - /* WaInsertDummyPushConstPs:kbl */ - if (IS_KBL_REVID(dev_priv, 0, KBL_REVID_B0)) + /* WaToEnableHwFixForPushConstHWBug:kbl */ + if (IS_KBL_REVID(dev_priv, KBL_REVID_C0, REVID_FOREVER)) WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); From 3871f42a57efcdc6a9da751a8cb6fa196c212289 Mon Sep 17 00:00:00 2001 From: Matthew Auld <matthew.auld@intel.com> Date: Fri, 5 Aug 2016 19:04:40 +0100 Subject: [PATCH 149/513] drm/i915: fix aliasing_ppgtt leak In i915_ggtt_cleanup_hw we need to remember to free aliasing_ppgtt. This fixes the following kmemleak message: unreferenced object 0xffff880213cca000 (size 8192): comm "modprobe", pid 1298, jiffies 4294745402 (age 703.930s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<ffffffff817c808e>] kmemleak_alloc+0x4e/0xb0 [<ffffffff8121f9c2>] kmem_cache_alloc_trace+0x142/0x1d0 [<ffffffffa06d11ef>] i915_gem_init_ggtt+0x10f/0x210 [i915] [<ffffffffa06d71bb>] i915_gem_init+0x5b/0xd0 [i915] [<ffffffffa069749a>] i915_driver_load+0x97a/0x1460 [i915] [<ffffffffa06a26ef>] i915_pci_probe+0x4f/0x70 [i915] [<ffffffff81423015>] local_pci_probe+0x45/0xa0 [<ffffffff81424463>] pci_device_probe+0x103/0x150 [<ffffffff81515e6c>] driver_probe_device+0x22c/0x440 [<ffffffff81516151>] __driver_attach+0xd1/0xf0 [<ffffffff8151379c>] bus_for_each_dev+0x6c/0xc0 [<ffffffff8151555e>] driver_attach+0x1e/0x20 [<ffffffff81514fa3>] bus_add_driver+0x1c3/0x280 [<ffffffff81516aa0>] driver_register+0x60/0xe0 [<ffffffff8142297c>] __pci_register_driver+0x4c/0x50 [<ffffffffa013605b>] 0xffffffffa013605b Signed-off-by: Matthew Auld <matthew.auld@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Fixes: b18b6bde300e ("drm/i915/bdw: Free PPGTT struct") Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/1470420280-21417-1-git-send-email-matthew.auld@intel.com (cherry picked from commit cb7f27601c81a1e0454e9461e96f65b31fafbea0) Signed-off-by: Jani Nikula <jani.nikula@intel.com> --- drivers/gpu/drm/i915/i915_gem_gtt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 10f1e32767e60..7a30af79d7995 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2873,6 +2873,7 @@ void i915_ggtt_cleanup_hw(struct drm_device *dev) struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt; ppgtt->base.cleanup(&ppgtt->base); + kfree(ppgtt); } i915_gem_cleanup_stolen(dev); From dfa2997055659b4e706a85fba481050cc7e7ad82 Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Date: Fri, 5 Aug 2016 23:28:27 +0300 Subject: [PATCH 150/513] drm/i915: Fix modeset handling during gpu reset, v5. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function would call drm_modeset_lock_all, while the suspend/resume functions already have their own locking. Fix this by factoring out __intel_display_resume, and calling the atomic helpers for duplicating atomic state and disabling all crtc's during suspend. Changes since v1: - Deal with -EDEADLK right after lock_all and clean up calls to hw readout. - Always take all modeset locks so updates during gpu reset are blocked. Changes since v2: - Fix deadlock in intel_update_primary_planes. - Move WARN_ON(EDEADLK) to __intel_display_resume. - pctx -> ctx - only call __intel_display_resume on success in intel_display_resume. Changes since v3: - Rebase on top of dev_priv -> dev change. - Use drm_modeset_lock_all_ctx instead of drm_modeset_lock_all. Changes since v4 [by vsyrjala]: - Deal with skip_intermediate_wm - Update comment w.r.t. mode_config.mutex vs. ->detect() - Rebase due to INTEL_GEN() etc. Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Fixes: e2c8b8701e2d ("drm/i915: Use atomic helpers for suspend, v2.") Cc: stable@vger.kernel.org Tested-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1470428910-12125-2-git-send-email-ville.syrjala@linux.intel.com (cherry picked from commit 739748939974791b84629a8790527a16f76873a4) Signed-off-by: Jani Nikula <jani.nikula@intel.com> --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/intel_display.c | 170 +++++++++++++++++---------- 2 files changed, 111 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 21f939074abc8..20fe9d52e2562 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1854,6 +1854,7 @@ struct drm_i915_private { enum modeset_restore modeset_restore; struct mutex modeset_restore_lock; struct drm_atomic_state *modeset_restore_state; + struct drm_modeset_acquire_ctx reset_ctx; struct list_head vm_list; /* Global list of all address spaces */ struct i915_ggtt ggtt; /* VM representing the global address space */ diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index c457eed76f1f7..fbb8574fef662 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -3093,40 +3093,110 @@ static void intel_update_primary_planes(struct drm_device *dev) for_each_crtc(dev, crtc) { struct intel_plane *plane = to_intel_plane(crtc->primary); - struct intel_plane_state *plane_state; - - drm_modeset_lock_crtc(crtc, &plane->base); - plane_state = to_intel_plane_state(plane->base.state); + struct intel_plane_state *plane_state = + to_intel_plane_state(plane->base.state); if (plane_state->visible) plane->update_plane(&plane->base, to_intel_crtc_state(crtc->state), plane_state); + } +} + +static int +__intel_display_resume(struct drm_device *dev, + struct drm_atomic_state *state) +{ + struct drm_crtc_state *crtc_state; + struct drm_crtc *crtc; + int i, ret; + + intel_modeset_setup_hw_state(dev); + i915_redisable_vga(dev); - drm_modeset_unlock_crtc(crtc); + if (!state) + return 0; + + for_each_crtc_in_state(state, crtc, crtc_state, i) { + /* + * Force recalculation even if we restore + * current state. With fast modeset this may not result + * in a modeset when the state is compatible. + */ + crtc_state->mode_changed = true; } + + /* ignore any reset values/BIOS leftovers in the WM registers */ + to_intel_atomic_state(state)->skip_intermediate_wm = true; + + ret = drm_atomic_commit(state); + + WARN_ON(ret == -EDEADLK); + return ret; } void intel_prepare_reset(struct drm_i915_private *dev_priv) { + struct drm_device *dev = &dev_priv->drm; + struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx; + struct drm_atomic_state *state; + int ret; + /* no reset support for gen2 */ if (IS_GEN2(dev_priv)) return; - /* reset doesn't touch the display */ + /* + * Need mode_config.mutex so that we don't + * trample ongoing ->detect() and whatnot. + */ + mutex_lock(&dev->mode_config.mutex); + drm_modeset_acquire_init(ctx, 0); + while (1) { + ret = drm_modeset_lock_all_ctx(dev, ctx); + if (ret != -EDEADLK) + break; + + drm_modeset_backoff(ctx); + } + + /* reset doesn't touch the display, but flips might get nuked anyway, */ if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) return; - drm_modeset_lock_all(&dev_priv->drm); /* * Disabling the crtcs gracefully seems nicer. Also the * g33 docs say we should at least disable all the planes. */ - intel_display_suspend(&dev_priv->drm); + state = drm_atomic_helper_duplicate_state(dev, ctx); + if (IS_ERR(state)) { + ret = PTR_ERR(state); + state = NULL; + DRM_ERROR("Duplicating state failed with %i\n", ret); + goto err; + } + + ret = drm_atomic_helper_disable_all(dev, ctx); + if (ret) { + DRM_ERROR("Suspending crtc's failed with %i\n", ret); + goto err; + } + + dev_priv->modeset_restore_state = state; + state->acquire_ctx = ctx; + return; + +err: + drm_atomic_state_free(state); } void intel_finish_reset(struct drm_i915_private *dev_priv) { + struct drm_device *dev = &dev_priv->drm; + struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx; + struct drm_atomic_state *state = dev_priv->modeset_restore_state; + int ret; + /* * Flips in the rings will be nuked by the reset, * so complete all pending flips so that user space @@ -3138,6 +3208,8 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) if (IS_GEN2(dev_priv)) return; + dev_priv->modeset_restore_state = NULL; + /* reset doesn't touch the display */ if (INTEL_GEN(dev_priv) >= 5 || IS_G4X(dev_priv)) { /* @@ -3149,29 +3221,32 @@ void intel_finish_reset(struct drm_i915_private *dev_priv) * FIXME: Atomic will make this obsolete since we won't schedule * CS-based flips (which might get lost in gpu resets) any more. */ - intel_update_primary_planes(&dev_priv->drm); - return; - } - - /* - * The display has been reset as well, - * so need a full re-initialization. - */ - intel_runtime_pm_disable_interrupts(dev_priv); - intel_runtime_pm_enable_interrupts(dev_priv); + intel_update_primary_planes(dev); + } else { + /* + * The display has been reset as well, + * so need a full re-initialization. + */ + intel_runtime_pm_disable_interrupts(dev_priv); + intel_runtime_pm_enable_interrupts(dev_priv); - intel_modeset_init_hw(&dev_priv->drm); + intel_modeset_init_hw(dev); - spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->display.hpd_irq_setup) - dev_priv->display.hpd_irq_setup(dev_priv); - spin_unlock_irq(&dev_priv->irq_lock); + spin_lock_irq(&dev_priv->irq_lock); + if (dev_priv->display.hpd_irq_setup) + dev_priv->display.hpd_irq_setup(dev_priv); + spin_unlock_irq(&dev_priv->irq_lock); - intel_display_resume(&dev_priv->drm); + ret = __intel_display_resume(dev, state); + if (ret) + DRM_ERROR("Restoring old state failed with %i\n", ret); - intel_hpd_init(dev_priv); + intel_hpd_init(dev_priv); + } - drm_modeset_unlock_all(&dev_priv->drm); + drm_modeset_drop_locks(ctx); + drm_modeset_acquire_fini(ctx); + mutex_unlock(&dev->mode_config.mutex); } static bool intel_crtc_has_pending_flip(struct drm_crtc *crtc) @@ -16174,9 +16249,10 @@ void intel_display_resume(struct drm_device *dev) struct drm_atomic_state *state = dev_priv->modeset_restore_state; struct drm_modeset_acquire_ctx ctx; int ret; - bool setup = false; dev_priv->modeset_restore_state = NULL; + if (state) + state->acquire_ctx = &ctx; /* * This is a cludge because with real atomic modeset mode_config.mutex @@ -16187,43 +16263,17 @@ void intel_display_resume(struct drm_device *dev) mutex_lock(&dev->mode_config.mutex); drm_modeset_acquire_init(&ctx, 0); -retry: - ret = drm_modeset_lock_all_ctx(dev, &ctx); - - if (ret == 0 && !setup) { - setup = true; - - intel_modeset_setup_hw_state(dev); - i915_redisable_vga(dev); - } - - if (ret == 0 && state) { - struct drm_crtc_state *crtc_state; - struct drm_crtc *crtc; - int i; - - state->acquire_ctx = &ctx; - - /* ignore any reset values/BIOS leftovers in the WM registers */ - to_intel_atomic_state(state)->skip_intermediate_wm = true; - - for_each_crtc_in_state(state, crtc, crtc_state, i) { - /* - * Force recalculation even if we restore - * current state. With fast modeset this may not result - * in a modeset when the state is compatible. - */ - crtc_state->mode_changed = true; - } - - ret = drm_atomic_commit(state); - } + while (1) { + ret = drm_modeset_lock_all_ctx(dev, &ctx); + if (ret != -EDEADLK) + break; - if (ret == -EDEADLK) { drm_modeset_backoff(&ctx); - goto retry; } + if (!ret) + ret = __intel_display_resume(dev, state); + drm_modeset_drop_locks(&ctx); drm_modeset_acquire_fini(&ctx); mutex_unlock(&dev->mode_config.mutex); From 672ca65d9aa8578f382784fe73578cd499664828 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com> Date: Wed, 10 Aug 2016 14:07:34 +0200 Subject: [PATCH 151/513] tipc: fix variable dereference before NULL check In commit cf6f7e1d5109 ("tipc: dump monitor attributes"), I dereferenced a pointer before checking if its valid. This is reported by static check Smatch as: net/tipc/monitor.c:733 tipc_nl_add_monitor_peer() warn: variable dereferenced before check 'mon' (see line 731) In this commit, we check for a valid monitor before proceeding with any other operation. Fixes: cf6f7e1d5109 ("tipc: dump monitor attributes") Reported-by: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/tipc/monitor.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index b62caa1c770c0..ed97a5876ebef 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -728,12 +728,13 @@ int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg, u32 bearer_id, u32 *prev_node) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); - struct tipc_peer *peer = mon->self; + struct tipc_peer *peer; if (!mon) return -EINVAL; read_lock_bh(&mon->lock); + peer = mon->self; do { if (*prev_node) { if (peer->addr == *prev_node) From dafa6b0db2d62164c5ef81a40312d5ba514126b9 Mon Sep 17 00:00:00 2001 From: Fabian Frederick <fabf@skynet.be> Date: Wed, 10 Aug 2016 17:48:36 +0200 Subject: [PATCH 152/513] net: hns: fix typo in g_gmac_stats_string[] s/gamc/gmac/ Signed-off-by: Fabian Frederick <fabf@skynet.be> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c index 1235c7f2564bd..1e1eb92998fb3 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c @@ -17,7 +17,7 @@ static const struct mac_stats_string g_gmac_stats_string[] = { {"gmac_rx_octets_total_ok", MAC_STATS_FIELD_OFF(rx_good_bytes)}, {"gmac_rx_octets_bad", MAC_STATS_FIELD_OFF(rx_bad_bytes)}, {"gmac_rx_uc_pkts", MAC_STATS_FIELD_OFF(rx_uc_pkts)}, - {"gamc_rx_mc_pkts", MAC_STATS_FIELD_OFF(rx_mc_pkts)}, + {"gmac_rx_mc_pkts", MAC_STATS_FIELD_OFF(rx_mc_pkts)}, {"gmac_rx_bc_pkts", MAC_STATS_FIELD_OFF(rx_bc_pkts)}, {"gmac_rx_pkts_64octets", MAC_STATS_FIELD_OFF(rx_64bytes)}, {"gmac_rx_pkts_65to127", MAC_STATS_FIELD_OFF(rx_65to127)}, From e7f851684efb3377e9c93aca7fae6e76212e5680 Mon Sep 17 00:00:00 2001 From: Yinghai Lu <yinghai@kernel.org> Date: Fri, 5 Aug 2016 23:37:34 -0700 Subject: [PATCH 153/513] megaraid_sas: Fix probing cards without io port Found one megaraid_sas HBA probe fails, [ 187.235190] scsi host2: Avago SAS based MegaRAID driver [ 191.112365] megaraid_sas 0000:89:00.0: BAR 0: can't reserve [io 0x0000-0x00ff] [ 191.120548] megaraid_sas 0000:89:00.0: IO memory region busy! and the card has resource like, [ 125.097714] pci 0000:89:00.0: [1000:005d] type 00 class 0x010400 [ 125.104446] pci 0000:89:00.0: reg 0x10: [io 0x0000-0x00ff] [ 125.110686] pci 0000:89:00.0: reg 0x14: [mem 0xce400000-0xce40ffff 64bit] [ 125.118286] pci 0000:89:00.0: reg 0x1c: [mem 0xce300000-0xce3fffff 64bit] [ 125.125891] pci 0000:89:00.0: reg 0x30: [mem 0xce200000-0xce2fffff pref] that does not io port resource allocated from BIOS, and kernel can not assign one as io port shortage. The driver is only looking for MEM, and should not fail. It turns out megasas_init_fw() etc are using bar index as mask. index 1 is used as mask 1, so that pci_request_selected_regions() is trying to request BAR0 instead of BAR1. Fix all related reference. Fixes: b6d5d8808b4c ("megaraid_sas: Use lowest memory bar for SR-IOV VF support") Signed-off-by: Yinghai Lu <yinghai@kernel.org> Acked-by: Kashyap Desai <kashyap.desai@broadcom.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> --- drivers/scsi/megaraid/megaraid_sas_base.c | 6 +++--- drivers/scsi/megaraid/megaraid_sas_fusion.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c index 2dab3dc2aa69b..c1ed25adb17ec 100644 --- a/drivers/scsi/megaraid/megaraid_sas_base.c +++ b/drivers/scsi/megaraid/megaraid_sas_base.c @@ -5037,7 +5037,7 @@ static int megasas_init_fw(struct megasas_instance *instance) /* Find first memory bar */ bar_list = pci_select_bars(instance->pdev, IORESOURCE_MEM); instance->bar = find_first_bit(&bar_list, sizeof(unsigned long)); - if (pci_request_selected_regions(instance->pdev, instance->bar, + if (pci_request_selected_regions(instance->pdev, 1<<instance->bar, "megasas: LSI")) { dev_printk(KERN_DEBUG, &instance->pdev->dev, "IO memory region busy!\n"); return -EBUSY; @@ -5339,7 +5339,7 @@ static int megasas_init_fw(struct megasas_instance *instance) iounmap(instance->reg_set); fail_ioremap: - pci_release_selected_regions(instance->pdev, instance->bar); + pci_release_selected_regions(instance->pdev, 1<<instance->bar); return -EINVAL; } @@ -5360,7 +5360,7 @@ static void megasas_release_mfi(struct megasas_instance *instance) iounmap(instance->reg_set); - pci_release_selected_regions(instance->pdev, instance->bar); + pci_release_selected_regions(instance->pdev, 1<<instance->bar); } /** diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c index ec837544f7847..52d8bbf7feb5c 100644 --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c @@ -2603,7 +2603,7 @@ megasas_release_fusion(struct megasas_instance *instance) iounmap(instance->reg_set); - pci_release_selected_regions(instance->pdev, instance->bar); + pci_release_selected_regions(instance->pdev, 1<<instance->bar); } /** From 0d7826ddded60dbe36c451b462c24387e2727275 Mon Sep 17 00:00:00 2001 From: Brian King <brking@linux.vnet.ibm.com> Date: Wed, 10 Aug 2016 22:45:57 -0400 Subject: [PATCH 154/513] ipr: Fix sync scsi scan Commit b195d5e2bffd ("ipr: Wait to do async scan until scsi host is initialized") fixed async scan for ipr, but broke sync scan. This fixes sync scan back up. Signed-off-by: Brian King <brking@linux.vnet.ibm.com> Tested-by: Michael Ellerman <mpe@ellerman.id.au> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> --- drivers/scsi/ipr.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c index bf85974be8621..17d04c702e1ba 100644 --- a/drivers/scsi/ipr.c +++ b/drivers/scsi/ipr.c @@ -10410,8 +10410,11 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) __ipr_remove(pdev); return rc; } + spin_lock_irqsave(ioa_cfg->host->host_lock, flags); + ioa_cfg->scan_enabled = 1; + schedule_work(&ioa_cfg->work_q); + spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); - scsi_scan_host(ioa_cfg->host); ioa_cfg->iopoll_weight = ioa_cfg->chip_cfg->iopoll_weight; if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) { @@ -10421,10 +10424,8 @@ static int ipr_probe(struct pci_dev *pdev, const struct pci_device_id *dev_id) } } - spin_lock_irqsave(ioa_cfg->host->host_lock, flags); - ioa_cfg->scan_enabled = 1; - schedule_work(&ioa_cfg->work_q); - spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags); + scsi_scan_host(ioa_cfg->host); + return 0; } From 4b5b9ba553f9aa5f484ab972fc9b58061885ceca Mon Sep 17 00:00:00 2001 From: Martynas Pumputis <martynas@weave.works> Date: Tue, 9 Aug 2016 16:24:50 +0100 Subject: [PATCH 155/513] openvswitch: do not ignore netdev errors when creating tunnel vports The creation of a tunnel vport (geneve, gre, vxlan) brings up a corresponding netdev, a multi-step operation which can fail. For example, changing a vxlan vport's netdev state to 'up' binds the vport's socket to a UDP port - if the binding fails (e.g. due to the port being in use), the error is currently ignored giving the appearance that the tunnel vport creation completed successfully. Signed-off-by: Martynas Pumputis <martynas@weave.works> Acked-by: Pravin B Shelar <pshelar@ovn.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/openvswitch/vport-geneve.c | 9 ++++++++- net/openvswitch/vport-gre.c | 11 +++++++++-- net/openvswitch/vport-vxlan.c | 9 ++++++++- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c index 1a1fcec886959..5aaf3babfc3fa 100644 --- a/net/openvswitch/vport-geneve.c +++ b/net/openvswitch/vport-geneve.c @@ -93,7 +93,14 @@ static struct vport *geneve_tnl_create(const struct vport_parms *parms) return ERR_CAST(dev); } - dev_change_flags(dev, dev->flags | IFF_UP); + err = dev_change_flags(dev, dev->flags | IFF_UP); + if (err < 0) { + rtnl_delete_link(dev); + rtnl_unlock(); + ovs_vport_free(vport); + goto error; + } + rtnl_unlock(); return vport; error: diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c index 7f8897f33a67f..0e72d95b0e8f1 100644 --- a/net/openvswitch/vport-gre.c +++ b/net/openvswitch/vport-gre.c @@ -54,6 +54,7 @@ static struct vport *gre_tnl_create(const struct vport_parms *parms) struct net *net = ovs_dp_get_net(parms->dp); struct net_device *dev; struct vport *vport; + int err; vport = ovs_vport_alloc(0, &ovs_gre_vport_ops, parms); if (IS_ERR(vport)) @@ -67,9 +68,15 @@ static struct vport *gre_tnl_create(const struct vport_parms *parms) return ERR_CAST(dev); } - dev_change_flags(dev, dev->flags | IFF_UP); - rtnl_unlock(); + err = dev_change_flags(dev, dev->flags | IFF_UP); + if (err < 0) { + rtnl_delete_link(dev); + rtnl_unlock(); + ovs_vport_free(vport); + return ERR_PTR(err); + } + rtnl_unlock(); return vport; } diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 5eb7694348b5b..7eb955e453e6d 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -130,7 +130,14 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms) return ERR_CAST(dev); } - dev_change_flags(dev, dev->flags | IFF_UP); + err = dev_change_flags(dev, dev->flags | IFF_UP); + if (err < 0) { + rtnl_delete_link(dev); + rtnl_unlock(); + ovs_vport_free(vport); + goto error; + } + rtnl_unlock(); return vport; error: From 851da9ac53a5e7121f7c38a052f74b574dd73ec3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Wed, 10 Aug 2016 23:54:09 +0200 Subject: [PATCH 156/513] drm/mediatek: add COMMON_CLK dependency On kernel builds without COMMON_CLK, the newly added mediatek drm driver fails to build: drivers/gpu/drm/mediatek/mtk_mipi_tx.c:130:16: error: field 'pll_hw' has incomplete type struct clk_hw pll_hw; ^~~~~~ In file included from ../include/linux/clk.h:16:0, from ../drivers/gpu/drm/mediatek/mtk_mipi_tx.c:14: drivers/gpu/drm/mediatek/mtk_mipi_tx.c: In function 'mtk_mipi_tx_from_clk_hw': include/linux/kernel.h:831:48: error: initialization from incompatible pointer type [-Werror=incompatible-pointer-types] const typeof( ((type *)0)->member ) *__mptr = (ptr); \ ^ /drivers/gpu/drm/mediatek/mtk_mipi_tx.c:136:9: note: in expansion of macro 'container_of' return container_of(hw, struct mtk_mipi_tx, pll_hw); ^~~~~~~~~~~~ drivers/gpu/drm/mediatek/mtk_mipi_tx.c: At top level: drivers/gpu/drm/mediatek/mtk_mipi_tx.c:302:21: error: variable 'mtk_mipi_tx_pll_ops' has initializer but incomplete type static const struct clk_ops mtk_mipi_tx_pll_ops = { This adds the required Kconfig dependency. Signed-off-by: Arnd Bergmann <arnd@arndb.de> Acked-by: Philipp Zabel <p.zabel@pengutronix.de> Link: https://patchwork.kernel.org/patch/9069061/ Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de> --- drivers/gpu/drm/mediatek/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/mediatek/Kconfig b/drivers/gpu/drm/mediatek/Kconfig index 23ac8041c5629..5c2163737691e 100644 --- a/drivers/gpu/drm/mediatek/Kconfig +++ b/drivers/gpu/drm/mediatek/Kconfig @@ -2,6 +2,7 @@ config DRM_MEDIATEK tristate "DRM Support for Mediatek SoCs" depends on DRM depends on ARCH_MEDIATEK || (ARM && COMPILE_TEST) + depends on COMMON_CLK select DRM_GEM_CMA_HELPER select DRM_KMS_HELPER select DRM_MIPI_DSI From b5db361eb7746cc9ada51554a6b996170c439f0b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Wed, 10 Aug 2016 23:54:10 +0200 Subject: [PATCH 157/513] drm/mediatek: add CONFIG_OF dependency The mediatek DRM driver can be configured for compile testing with CONFIG_OF disabled, but then fails to link: drivers/gpu/built-in.o: In function `mtk_drm_bind': analogix_dp_reg.c:(.text+0x52888): undefined reference to `of_find_device_by_node' analogix_dp_reg.c:(.text+0x52930): undefined reference to `of_find_device_by_node' This adds an explicit Kconfig dependency. Signed-off-by: Arnd Bergmann <arnd@arndb.de> Link: https://patchwork.kernel.org/patch/9120871/ Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de> --- drivers/gpu/drm/mediatek/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/mediatek/Kconfig b/drivers/gpu/drm/mediatek/Kconfig index 5c2163737691e..96ebf8bb6024b 100644 --- a/drivers/gpu/drm/mediatek/Kconfig +++ b/drivers/gpu/drm/mediatek/Kconfig @@ -3,6 +3,7 @@ config DRM_MEDIATEK depends on DRM depends on ARCH_MEDIATEK || (ARM && COMPILE_TEST) depends on COMMON_CLK + depends on OF select DRM_GEM_CMA_HELPER select DRM_KMS_HELPER select DRM_MIPI_DSI From 306d674a2ddd565adde7d9d6703157105c8444a6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Wed, 10 Aug 2016 23:54:11 +0200 Subject: [PATCH 158/513] drm/mediatek: add ARM_SMCCC dependency ARM SMCCC is only set for ARMv7 and ARMv8 CPUs, but we currently allow the driver to be build for older architecture levels as well, which results in a link failure: drivers/gpu/built-in.o: In function `mtk_hdmi_hw_make_reg_writable': :(.text+0x1e737c): undefined reference to `arm_smccc_smc' This adds a Kconfig dependency. The patch applies on my two previous fixes that are not yet applied, so please apply all three to get randconfig builds to work correctly. Signed-off-by: Arnd Bergmann <arnd@arndb.de> Fixes: 8f83f26891e1 ("drm/mediatek: Add HDMI support") Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com> Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de> --- drivers/gpu/drm/mediatek/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/mediatek/Kconfig b/drivers/gpu/drm/mediatek/Kconfig index 96ebf8bb6024b..294de4549922a 100644 --- a/drivers/gpu/drm/mediatek/Kconfig +++ b/drivers/gpu/drm/mediatek/Kconfig @@ -3,6 +3,7 @@ config DRM_MEDIATEK depends on DRM depends on ARCH_MEDIATEK || (ARM && COMPILE_TEST) depends on COMMON_CLK + depends on HAVE_ARM_SMCCC depends on OF select DRM_GEM_CMA_HELPER select DRM_KMS_HELPER From af7752106e4f12b4ee47b4eca3e7ba4bcec6e7e5 Mon Sep 17 00:00:00 2001 From: Stefan Haberland <sth@linux.vnet.ibm.com> Date: Tue, 9 Aug 2016 15:58:48 +0200 Subject: [PATCH 159/513] s390/dasd: fix failing CUIR assignment under LPAR On LPAR the read message buffer command should be executed on the path it was received on otherwise there is a chance that the CUIR assignment might be faulty and the wrong channel path is set online/offline. Fix by setting the path mask accordingly. On z/VM we might not be able to do I/O on this path but there it does not matter on which path the read message buffer command is executed. Therefor implement a retry with an open path mask. Signed-off-by: Stefan Haberland <sth@linux.vnet.ibm.com> --- drivers/s390/block/dasd_eckd.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index fd2eff4400980..98bbec44bcd05 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -5078,6 +5078,8 @@ static int dasd_eckd_read_message_buffer(struct dasd_device *device, return PTR_ERR(cqr); } + cqr->lpm = lpum; +retry: cqr->startdev = device; cqr->memdev = device; cqr->block = NULL; @@ -5122,6 +5124,14 @@ static int dasd_eckd_read_message_buffer(struct dasd_device *device, (prssdp + 1); memcpy(messages, message_buf, sizeof(struct dasd_rssd_messages)); + } else if (cqr->lpm) { + /* + * on z/VM we might not be able to do I/O on the requested path + * but instead we get the required information on any path + * so retry with open path mask + */ + cqr->lpm = 0; + goto retry; } else DBF_EVENT_DEVID(DBF_WARNING, device->cdev, "Reading messages failed with rc=%d\n" From 772ce81264b179c0e61340998e3b29e900b2fa6d Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com> Date: Mon, 8 Aug 2016 21:50:51 +0900 Subject: [PATCH 160/513] usb: renesas_usbhs: Fix receiving data corrupt on R-Car Gen3 with dmac Since R-Car Gen3 SoC has the USB-DMAC, this driver should set dparam->has_usb_dmac to 1. Otherwise, behavior of this driver and the usb-dmac driver will be mismatch, then sometimes receiving data will be corrupt. Fixes: de18757e272d ("usb: renesas_usbhs: add R-Car Gen3 power control") Cc: <stable@vger.kernel.org> # v4.5+ Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com> Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com> --- drivers/usb/renesas_usbhs/common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/renesas_usbhs/common.c b/drivers/usb/renesas_usbhs/common.c index 8fbbc2d32371a..ac67bab9124cc 100644 --- a/drivers/usb/renesas_usbhs/common.c +++ b/drivers/usb/renesas_usbhs/common.c @@ -514,7 +514,8 @@ static struct renesas_usbhs_platform_info *usbhs_parse_dt(struct device *dev) if (gpio > 0) dparam->enable_gpio = gpio; - if (dparam->type == USBHS_TYPE_RCAR_GEN2) + if (dparam->type == USBHS_TYPE_RCAR_GEN2 || + dparam->type == USBHS_TYPE_RCAR_GEN3) dparam->has_usb_dmac = 1; return info; From 9ab967e6db7412b675ecbff80d5371d53c82cb2e Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com> Date: Mon, 8 Aug 2016 21:50:52 +0900 Subject: [PATCH 161/513] usb: renesas_usbhs: clear the BRDYSTS in usbhsg_ep_enable() This patch fixes an issue that unexpected BRDY interruption happens when the usb_ep_{enable,disable}() are called with different direction. In this case, the driver will cause the following message: renesas_usbhs e6590000.usb: irq_ready run_error 1 : -16 This issue causes the followings: 1) A pipe is enabled as transmission 2) The pipe sent a data 3) The pipe is disabled and re-enabled as reception. 4) The pipe got a queue Since the driver doesn't clear the BRDYSTS flags after 2) above, the issue happens. If we add such clearing the flags into the driver, the code will become complicate. So, this patch clears the BRDYSTS flag of reception in usbhsg_ep_enable() to avoid complicate. Cc: <stable@vger.kernel.org> # v4.1+ (usbhs_xxxsts_clear() is needed) Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com> Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com> --- drivers/usb/renesas_usbhs/mod_gadget.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c index 50f3363cc382b..92bc83b92d10d 100644 --- a/drivers/usb/renesas_usbhs/mod_gadget.c +++ b/drivers/usb/renesas_usbhs/mod_gadget.c @@ -617,10 +617,13 @@ static int usbhsg_ep_enable(struct usb_ep *ep, * use dmaengine if possible. * It will use pio handler if impossible. */ - if (usb_endpoint_dir_in(desc)) + if (usb_endpoint_dir_in(desc)) { pipe->handler = &usbhs_fifo_dma_push_handler; - else + } else { pipe->handler = &usbhs_fifo_dma_pop_handler; + usbhs_xxxsts_clear(priv, BRDYSTS, + usbhs_pipe_number(pipe)); + } ret = 0; } From 700aa7ff8d2c2b9cc669c99375e2ccd06d3cd38d Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com> Date: Mon, 8 Aug 2016 21:50:53 +0900 Subject: [PATCH 162/513] usb: renesas_usbhs: Use dmac only if the pipe type is bulk This patch fixes an issue that isochronous transfer's data is possible to be lost as a workaround. Since this driver uses a workqueue to start the dmac, the transfer is possible to be delayed when system load is high. Fixes: 6e4b74e4690d ("usb: renesas: fix scheduling in atomic context bug") Cc: <stable@vger.kernel.org> # v3.4+ Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com> Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com> --- drivers/usb/renesas_usbhs/fifo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c index 280ed5ff021bd..857e78337324b 100644 --- a/drivers/usb/renesas_usbhs/fifo.c +++ b/drivers/usb/renesas_usbhs/fifo.c @@ -871,7 +871,7 @@ static int usbhsf_dma_prepare_push(struct usbhs_pkt *pkt, int *is_done) /* use PIO if packet is less than pio_dma_border or pipe is DCP */ if ((len < usbhs_get_dparam(priv, pio_dma_border)) || - usbhs_pipe_is_dcp(pipe)) + usbhs_pipe_type_is(pipe, USB_ENDPOINT_XFER_ISOC)) goto usbhsf_pio_prepare_push; /* check data length if this driver don't use USB-DMAC */ @@ -976,7 +976,7 @@ static int usbhsf_dma_prepare_pop_with_usb_dmac(struct usbhs_pkt *pkt, /* use PIO if packet is less than pio_dma_border or pipe is DCP */ if ((pkt->length < usbhs_get_dparam(priv, pio_dma_border)) || - usbhs_pipe_is_dcp(pipe)) + usbhs_pipe_type_is(pipe, USB_ENDPOINT_XFER_ISOC)) goto usbhsf_pio_prepare_pop; fifo = usbhsf_get_dma_fifo(priv, pkt); From c526c62d565ea5a5bba9433f28756079734f430d Mon Sep 17 00:00:00 2001 From: Peter Chen <peter.chen@nxp.com> Date: Fri, 1 Jul 2016 15:33:28 +0800 Subject: [PATCH 163/513] usb: gadget: composite: fix dereference after null check coverify warning cdev->config is checked for null pointer at above code, so cdev->config might be null, fix it by adding null pointer check. Signed-off-by: Peter Chen <peter.chen@nxp.com> Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com> --- drivers/usb/gadget/composite.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index eb648485a58c5..a8ecc7a612b93 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -1913,6 +1913,8 @@ composite_setup(struct usb_gadget *gadget, const struct usb_ctrlrequest *ctrl) break; case USB_RECIP_ENDPOINT: + if (!cdev->config) + break; endp = ((w_index & 0x80) >> 3) | (w_index & 0x0f); list_for_each_entry(f, &cdev->config->functions, list) { if (test_bit(endp, f->endpoints)) From 88c09eacf560c3303d0ee8cf91b8b7ff7f000350 Mon Sep 17 00:00:00 2001 From: Peter Chen <peter.chen@nxp.com> Date: Fri, 1 Jul 2016 15:33:29 +0800 Subject: [PATCH 164/513] usb: gadget: u_ether: fix dereference after null check coverify warning dev->port_usb is checked for null pointer at above code, so dev->port_usb might be null, fix it by adding null pointer check. Signed-off-by: Peter Chen <peter.chen@nxp.com> Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com> --- drivers/usb/gadget/function/u_ether.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c index a3f7e7c55ebb1..5f562c1ec7957 100644 --- a/drivers/usb/gadget/function/u_ether.c +++ b/drivers/usb/gadget/function/u_ether.c @@ -556,7 +556,8 @@ static netdev_tx_t eth_start_xmit(struct sk_buff *skb, /* Multi frame CDC protocols may store the frame for * later which is not a dropped frame. */ - if (dev->port_usb->supports_multi_frame) + if (dev->port_usb && + dev->port_usb->supports_multi_frame) goto multiframe; goto drop; } From cee51c33f52ebf673a088a428ac0fecc33ab77fa Mon Sep 17 00:00:00 2001 From: Winter Wang <wente.wang@nxp.com> Date: Wed, 27 Jul 2016 10:03:19 +0800 Subject: [PATCH 165/513] usb: gadget: configfs: add mutex lock before unregister gadget There may be a race condition if f_fs calls unregister_gadget_item in ffs_closed() when unregister_gadget is called by UDC store at the same time. this leads to a kernel NULL pointer dereference: [ 310.644928] Unable to handle kernel NULL pointer dereference at virtual address 00000004 [ 310.645053] init: Service 'adbd' is being killed... [ 310.658938] pgd = c9528000 [ 310.662515] [00000004] *pgd=19451831, *pte=00000000, *ppte=00000000 [ 310.669702] Internal error: Oops: 817 [#1] PREEMPT SMP ARM [ 310.675211] Modules linked in: [ 310.678294] CPU: 0 PID: 1537 Comm: ->transport Not tainted 4.1.15-03725-g793404c #2 [ 310.685958] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) [ 310.692493] task: c8e24200 ti: c945e000 task.ti: c945e000 [ 310.697911] PC is at usb_gadget_unregister_driver+0xb4/0xd0 [ 310.703502] LR is at __mutex_lock_slowpath+0x10c/0x16c [ 310.708648] pc : [<c075efc0>] lr : [<c0bfb0bc>] psr: 600f0113 <snip..> [ 311.565585] [<c075efc0>] (usb_gadget_unregister_driver) from [<c075e2b8>] (unregister_gadget_item+0x1c/0x34) [ 311.575426] [<c075e2b8>] (unregister_gadget_item) from [<c076fcc8>] (ffs_closed+0x8c/0x9c) [ 311.583702] [<c076fcc8>] (ffs_closed) from [<c07736b8>] (ffs_data_reset+0xc/0xa0) [ 311.591194] [<c07736b8>] (ffs_data_reset) from [<c07738ac>] (ffs_data_closed+0x90/0xd0) [ 311.599208] [<c07738ac>] (ffs_data_closed) from [<c07738f8>] (ffs_ep0_release+0xc/0x14) [ 311.607224] [<c07738f8>] (ffs_ep0_release) from [<c023e030>] (__fput+0x80/0x1d0) [ 311.614635] [<c023e030>] (__fput) from [<c014e688>] (task_work_run+0xb0/0xe8) [ 311.621788] [<c014e688>] (task_work_run) from [<c010afdc>] (do_work_pending+0x7c/0xa4) [ 311.629718] [<c010afdc>] (do_work_pending) from [<c010770c>] (work_pending+0xc/0x20) for functions using functionFS, i.e. android adbd will close /dev/usb-ffs/adb/ep0 when usb IO thread fails, but switch adb from on to off also triggers write "none" > UDC. These 2 operations both call unregister_gadget, which will lead to the panic above. add a mutex before calling unregister_gadget for api used in f_fs. Signed-off-by: Winter Wang <wente.wang@nxp.com> Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com> --- drivers/usb/gadget/configfs.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 70cf3477f951a..f9237fe2be056 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -1490,7 +1490,9 @@ void unregister_gadget_item(struct config_item *item) { struct gadget_info *gi = to_gadget_info(item); + mutex_lock(&gi->lock); unregister_gadget(gi); + mutex_unlock(&gi->lock); } EXPORT_SYMBOL_GPL(unregister_gadget_item); From ec57fcd042a9448096705847290d4e4e97fbb7e1 Mon Sep 17 00:00:00 2001 From: Wei Yongjun <weiyj.lk@gmail.com> Date: Tue, 26 Jul 2016 14:48:39 +0000 Subject: [PATCH 166/513] usb: phy: omap-otg: Fix missing platform_set_drvdata() in omap_otg_probe() Add missing platform_set_drvdata() in omap_otg_probe(), otherwise calling platform_get_drvdata() in remove returns NULL. This is detected by Coccinelle semantic patch. Signed-off-by: Wei Yongjun <weiyj.lk@gmail.com> Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com> --- drivers/usb/phy/phy-omap-otg.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/phy/phy-omap-otg.c b/drivers/usb/phy/phy-omap-otg.c index 6f6d2a7fd5a07..6523af4f8f93f 100644 --- a/drivers/usb/phy/phy-omap-otg.c +++ b/drivers/usb/phy/phy-omap-otg.c @@ -140,6 +140,8 @@ static int omap_otg_probe(struct platform_device *pdev) (rev >> 4) & 0xf, rev & 0xf, config->extcon, otg_dev->id, otg_dev->vbus); + platform_set_drvdata(pdev, otg_dev); + return 0; } From 4c4f106c032ff32b89c98a4d819e68e6e643c14e Mon Sep 17 00:00:00 2001 From: Wei Yongjun <weiyj.lk@gmail.com> Date: Tue, 26 Jul 2016 14:47:00 +0000 Subject: [PATCH 167/513] usb: dwc3: fix missing platform_set_drvdata() in dwc3_of_simple_probe() Add missing platform_set_drvdata() in dwc3_of_simple_probe(), otherwise calling platform_get_drvdata() in remove returns NULL. This is detected by Coccinelle semantic patch. Signed-off-by: Wei Yongjun <weiyj.lk@gmail.com> Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com> --- drivers/usb/dwc3/dwc3-of-simple.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/dwc3/dwc3-of-simple.c b/drivers/usb/dwc3/dwc3-of-simple.c index 974335377d9f1..e56d59b19a0ec 100644 --- a/drivers/usb/dwc3/dwc3-of-simple.c +++ b/drivers/usb/dwc3/dwc3-of-simple.c @@ -61,6 +61,7 @@ static int dwc3_of_simple_probe(struct platform_device *pdev) if (!simple->clks) return -ENOMEM; + platform_set_drvdata(pdev, simple); simple->dev = dev; for (i = 0; i < simple->num_clocks; i++) { From 528d28138f91009f230903bd89ccd44719667831 Mon Sep 17 00:00:00 2001 From: Peter Chen <peter.chen@nxp.com> Date: Tue, 26 Jul 2016 16:01:30 +0800 Subject: [PATCH 168/513] usb: misc: usbtest: usbtest_do_ioctl may return positive integer For case 14 and case 21, their correct return value is the number of bytes transferred, so it is a positive integer. But in usbtest_ioctl, it takes non-zero as false return value for usbtest_do_ioctl, so it will treat the correct test as wrong test, then the time on tests will be the minus value. Signed-off-by: Peter Chen <peter.chen@nxp.com> Cc: stable <stable@vger.kernel.org> Fixes: 18fc4ebdc705 ("usb: misc: usbtest: Remove timeval usage") Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com> --- drivers/usb/misc/usbtest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c index 6b978f04b8d72..5e3464e8d4e5a 100644 --- a/drivers/usb/misc/usbtest.c +++ b/drivers/usb/misc/usbtest.c @@ -2602,7 +2602,7 @@ usbtest_ioctl(struct usb_interface *intf, unsigned int code, void *buf) ktime_get_ts64(&start); retval = usbtest_do_ioctl(intf, param_32); - if (retval) + if (retval < 0) goto free_mutex; ktime_get_ts64(&end); From 63196e989699ddffb4e3d30ca8c3567d408820f4 Mon Sep 17 00:00:00 2001 From: Binyamin Sharet <s.binyamin@gmail.com> Date: Thu, 7 Jul 2016 22:22:04 +0300 Subject: [PATCH 169/513] usb: gadget: fix check in sync read from ep in gadgetfs When reading synchronously from a non-zero endpoint, gadgetfs will return -EFAULT even if the read succeeds, due to a bad check of the copy_to_iter() return value. This fix compares the return value of copy_to_iter to the amount of bytes that was passed, and only fails if they are not the same. Signed-off-by: Binyamin Sharet <s.binyamin@gmail.com> Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com> --- drivers/usb/gadget/legacy/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index aa3707bdebb4a..8560f2fe3af50 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -606,7 +606,7 @@ ep_read_iter(struct kiocb *iocb, struct iov_iter *to) } if (is_sync_kiocb(iocb)) { value = ep_io(epdata, buf, len); - if (value >= 0 && copy_to_iter(buf, value, to)) + if (value >= 0 && (copy_to_iter(buf, value, to) != value)) value = -EFAULT; } else { struct kiocb_priv *priv = kzalloc(sizeof *priv, GFP_KERNEL); From bd610c5aa9fcc9817d2629274a27aab81aa77cec Mon Sep 17 00:00:00 2001 From: Christophe JAILLET <christophe.jaillet@wanadoo.fr> Date: Sat, 16 Jul 2016 09:04:40 +0200 Subject: [PATCH 170/513] usb: gadget: uvc: Fix return value in case of error If this memory allocation fail, we will return 0, which means success. Return -ENOMEM instead. Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com> Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr> Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com> --- drivers/usb/gadget/function/uvc_configfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c index 66753ba7a42eb..31125a4a26589 100644 --- a/drivers/usb/gadget/function/uvc_configfs.c +++ b/drivers/usb/gadget/function/uvc_configfs.c @@ -2023,7 +2023,7 @@ static int uvcg_streaming_class_allow_link(struct config_item *src, if (!data) { kfree(*class_array); *class_array = NULL; - ret = PTR_ERR(data); + ret = -ENOMEM; goto unlock; } cl_arr = *class_array; From 3887db5c2b6531c59e9649a4293071b575d6eb3b Mon Sep 17 00:00:00 2001 From: Christophe JAILLET <christophe.jaillet@wanadoo.fr> Date: Sat, 16 Jul 2016 08:34:33 +0200 Subject: [PATCH 171/513] usb: gadget: composite: Fix return value in case of error In 'composite_os_desc_req_prepare', if one of the memory allocations fail, 0 will be returned, which means success. We should return -ENOMEM instead. Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr> Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com> --- drivers/usb/gadget/composite.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c index a8ecc7a612b93..5ebe6af7976ec 100644 --- a/drivers/usb/gadget/composite.c +++ b/drivers/usb/gadget/composite.c @@ -2126,14 +2126,14 @@ int composite_os_desc_req_prepare(struct usb_composite_dev *cdev, cdev->os_desc_req = usb_ep_alloc_request(ep0, GFP_KERNEL); if (!cdev->os_desc_req) { - ret = PTR_ERR(cdev->os_desc_req); + ret = -ENOMEM; goto end; } /* OS feature descriptor length <= 4kB */ cdev->os_desc_req->buf = kmalloc(4096, GFP_KERNEL); if (!cdev->os_desc_req->buf) { - ret = PTR_ERR(cdev->os_desc_req->buf); + ret = -ENOMEM; kfree(cdev->os_desc_req); goto end; } From 327b21da884fe1a29f733e41792ddd53e4a30379 Mon Sep 17 00:00:00 2001 From: Mathieu Laurendeau <mat.lau@laposte.net> Date: Fri, 15 Jul 2016 14:58:41 +0200 Subject: [PATCH 172/513] usb/gadget: fix gadgetfs aio support. Fix io submissions failing with ENODEV. Signed-off-by: Mathieu Laurendeau <mat.lau@laposte.net> Fixes: 7fe3976e0f3a ("gadget: switch ep_io_operations to ->read_iter/->write_iter") Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com> --- drivers/usb/gadget/legacy/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 8560f2fe3af50..16104b5ebdcb7 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -542,7 +542,7 @@ static ssize_t ep_aio(struct kiocb *iocb, */ spin_lock_irq(&epdata->dev->lock); value = -ENODEV; - if (unlikely(epdata->ep)) + if (unlikely(epdata->ep == NULL)) goto fail; req = usb_ep_alloc_request(epdata->ep, GFP_ATOMIC); From 7442e6db5bdd0dce4615205508301f9b22e502d6 Mon Sep 17 00:00:00 2001 From: Dan Carpenter <dan.carpenter@oracle.com> Date: Wed, 13 Jul 2016 13:14:33 +0300 Subject: [PATCH 173/513] usb: gadget: fsl_qe_udc: off by one in setup_received_handle() The udc->eps[] array has USB_MAX_ENDPOINTS elements so > should be >=. Fixes: 3948f0e0c999 ('usb: add Freescale QE/CPM USB peripheral controller driver') Acked-by: Peter Chen <peter.chen@nxp.com> Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com> --- drivers/usb/gadget/udc/fsl_qe_udc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/fsl_qe_udc.c b/drivers/usb/gadget/udc/fsl_qe_udc.c index 93d28cb00b76f..cf8819a5c5b26 100644 --- a/drivers/usb/gadget/udc/fsl_qe_udc.c +++ b/drivers/usb/gadget/udc/fsl_qe_udc.c @@ -2053,7 +2053,7 @@ static void setup_received_handle(struct qe_udc *udc, struct qe_ep *ep; if (wValue != 0 || wLength != 0 - || pipe > USB_MAX_ENDPOINTS) + || pipe >= USB_MAX_ENDPOINTS) break; ep = &udc->eps[pipe]; From 17a1dc5e22d25ba374d8c8dd8d5bf10bd20d3265 Mon Sep 17 00:00:00 2001 From: Peter Chen <peter.chen@nxp.com> Date: Mon, 11 Jul 2016 09:58:16 +0800 Subject: [PATCH 174/513] usb: udc: core: fix error handling The udc device needs to be deleted if error occurs Fixes: 855ed04a3758 ("usb: gadget: udc-core: independent registration of gadgets and gadget drivers") Signed-off-by: Peter Chen <peter.chen@nxp.com> Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com> --- drivers/usb/gadget/udc/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index ff8685ea72193..934f83881c307 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -1145,7 +1145,7 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget, if (ret != -EPROBE_DEFER) list_del(&driver->pending); if (ret) - goto err4; + goto err5; break; } } @@ -1154,6 +1154,9 @@ int usb_add_gadget_udc_release(struct device *parent, struct usb_gadget *gadget, return 0; +err5: + device_del(&udc->dev); + err4: list_del(&udc->list); mutex_unlock(&udc_lock); From 207707d8fd48ebc977fb2b2794004a020e1ee08e Mon Sep 17 00:00:00 2001 From: Xerox Lin <xerox_lin@htc.com> Date: Wed, 29 Jun 2016 14:34:21 +0530 Subject: [PATCH 175/513] usb: gadget: rndis: free response queue during REMOTE_NDIS_RESET_MSG When rndis data transfer is in progress, some Windows7 Host PC is not sending the GET_ENCAPSULATED_RESPONSE command for receiving the response for the previous SEND_ENCAPSULATED_COMMAND processed. The rndis function driver appends each response for the SEND_ENCAPSULATED_COMMAND in a queue. As the above process got corrupted, the Host sends a REMOTE_NDIS_RESET_MSG command to do a soft-reset. As the rndis response queue is not freed, the previous response is sent as a part of this REMOTE_NDIS_RESET_MSG's reset response and the Host block any more Rndis transfers. Hence free the rndis response queue as a part of this soft-reset so that the correct response for REMOTE_NDIS_RESET_MSG is sent properly during the response command. Signed-off-by: Rajkumar Raghupathy <raghup@codeaurora.org> Signed-off-by: Xerox Lin <xerox_lin@htc.com> [AmitP: Cherry-picked this patch and folded other relevant fixes from Android common kernel android-4.4] Signed-off-by: Amit Pundir <amit.pundir@linaro.org> Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com> --- drivers/usb/gadget/function/rndis.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/usb/gadget/function/rndis.c b/drivers/usb/gadget/function/rndis.c index 943c21aafd3b5..ab6ac1b74ac0f 100644 --- a/drivers/usb/gadget/function/rndis.c +++ b/drivers/usb/gadget/function/rndis.c @@ -680,6 +680,12 @@ static int rndis_reset_response(struct rndis_params *params, { rndis_reset_cmplt_type *resp; rndis_resp_t *r; + u8 *xbuf; + u32 length; + + /* drain the response queue */ + while ((xbuf = rndis_get_next_response(params, &length))) + rndis_free_response(params, xbuf); r = rndis_add_response(params, sizeof(rndis_reset_cmplt_type)); if (!r) From 79d17482a4091056e128a5048e253f2ed53440cc Mon Sep 17 00:00:00 2001 From: Janusz Dziedzic <januszx.dziedzic@intel.com> Date: Thu, 7 Jul 2016 08:39:09 +0200 Subject: [PATCH 176/513] usb: dwc3: don't set last bit for ISOC endpoints According to Synopsys Databook 2.60a, section 8.3.4, it's stated that: The LST bit should be set to 0 (isochronous transfers normally continue until the endpoint is removed entirely, at which time an End Transfer command is used to stop the transfer). This patch makes sure that detail is observed and fixes a regression with Android Audio playback caused by recent changes to DWC3 gadget. Signed-off-by: Janusz Dziedzic <januszx.dziedzic@linux.intel.com> Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com> --- drivers/usb/dwc3/gadget.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index eb820e4ab49d2..7a4d4d2534d49 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -829,7 +829,7 @@ static void dwc3_prepare_one_trb(struct dwc3_ep *dep, if (!req->request.no_interrupt && !chain) trb->ctrl |= DWC3_TRB_CTRL_IOC | DWC3_TRB_CTRL_ISP_IMI; - if (last) + if (last && !usb_endpoint_xfer_isoc(dep->endpoint.desc)) trb->ctrl |= DWC3_TRB_CTRL_LST; if (chain) From a0ad85ae866f8a01f29a18ffa1e9b1aa8ca888bd Mon Sep 17 00:00:00 2001 From: Felipe Balbi <felipe.balbi@linux.intel.com> Date: Wed, 10 Aug 2016 18:07:46 +0300 Subject: [PATCH 177/513] usb: dwc3: gadget: stop processing on HWO set stop consuming TRBs when we reach one with HWO bit already set. This will prevent us from prematurely retiring a TRB. Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com> --- drivers/usb/dwc3/gadget.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index 7a4d4d2534d49..1f5597ef945d4 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -1979,16 +1979,7 @@ static int __dwc3_cleanup_done_trbs(struct dwc3 *dwc, struct dwc3_ep *dep, trb->ctrl &= ~DWC3_TRB_CTRL_HWO; if ((trb->ctrl & DWC3_TRB_CTRL_HWO) && status != -ESHUTDOWN) - /* - * We continue despite the error. There is not much we - * can do. If we don't clean it up we loop forever. If - * we skip the TRB then it gets overwritten after a - * while since we use them in a ring buffer. A BUG() - * would help. Lets hope that if this occurs, someone - * fixes the root cause instead of looking away :) - */ - dev_err(dwc->dev, "%s's TRB (%p) still owned by HW\n", - dep->name, trb); + return 1; count = trb->size & DWC3_TRB_SIZE_MASK; From 70fcad495b9903d362bdbf9ffba23259294064c8 Mon Sep 17 00:00:00 2001 From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com> Date: Thu, 11 Aug 2016 14:39:00 +0100 Subject: [PATCH 178/513] ASoC: Fix leak of rtd in soc_bind_dai_link If we fail to find a platform we simply return EPROBE_DEFER, but we have allocated the rtd pointer. All error paths before soc_add_pcm_runtime need to call soc_free_pcm_runtime first to avoid leaking the rtd pointer. A suitable error path already exists and is used else where in the function so simply use that here as well. Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com> Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/soc-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 16369cad48038..b0e23db836959 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -1056,7 +1056,7 @@ static int soc_bind_dai_link(struct snd_soc_card *card, if (!rtd->platform) { dev_err(card->dev, "ASoC: platform %s not registered\n", dai_link->platform_name); - return -EPROBE_DEFER; + goto _err_defer; } soc_add_pcm_runtime(card, rtd); From fa54aade5338937146bb6b8af93d2c27f6787ae1 Mon Sep 17 00:00:00 2001 From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com> Date: Thu, 11 Aug 2016 14:40:04 +0100 Subject: [PATCH 179/513] ASoC: wm2000: Fix return of uninitialised varible Anything that sets ret in wm2000_anc_transition will have immediately returned anyway as such we will always return an uninitialised ret at the bottom of the function. Simply replace the return with a return 0; Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com> Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/codecs/wm2000.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/soc/codecs/wm2000.c b/sound/soc/codecs/wm2000.c index a67ea10f41a1a..f2664396be6fd 100644 --- a/sound/soc/codecs/wm2000.c +++ b/sound/soc/codecs/wm2000.c @@ -581,7 +581,7 @@ static int wm2000_anc_transition(struct wm2000_priv *wm2000, if (anc_transitions[i].dest == ANC_OFF) clk_disable_unprepare(wm2000->mclk); - return ret; + return 0; } static int wm2000_anc_set_mode(struct wm2000_priv *wm2000) From 539587511835ea12d8daa444cbed766cf2bc3612 Mon Sep 17 00:00:00 2001 From: Lu Baolu <baolu.lu@linux.intel.com> Date: Thu, 11 Aug 2016 10:31:14 +0800 Subject: [PATCH 180/513] usb: misc: usbtest: add fix for driver hang In sg_timeout(), req->status is set to "-ETIMEDOUT" before calling into usb_sg_cancel(). usb_sg_cancel() will do nothing and return directly if req->status has been set to a non-zero value. This will cause driver hang whenever transfer time out is triggered. This patch fixes this issue. It could be backported to stable kernel with version later than v3.15. Cc: stable@vger.kernel.org # 3.15+ Cc: Alan Stern <stern@rowland.harvard.edu> Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com> Suggested-by: Alan Stern <stern@rowland.harvard.edu> Acked-by: Alan Stern <stern@rowland.harvard.edu> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/usb/misc/usbtest.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c index 5e3464e8d4e5a..5c8210dc6fd9c 100644 --- a/drivers/usb/misc/usbtest.c +++ b/drivers/usb/misc/usbtest.c @@ -585,7 +585,6 @@ static void sg_timeout(unsigned long _req) { struct usb_sg_request *req = (struct usb_sg_request *) _req; - req->status = -ETIMEDOUT; usb_sg_cancel(req); } @@ -616,8 +615,10 @@ static int perform_sglist( mod_timer(&sg_timer, jiffies + msecs_to_jiffies(SIMPLE_IO_TIMEOUT)); usb_sg_wait(req); - del_timer_sync(&sg_timer); - retval = req->status; + if (!del_timer_sync(&sg_timer)) + retval = -ETIMEDOUT; + else + retval = req->status; /* FIXME check resulting data pattern */ From 104a493390940e85fb7c840a9fd5214aba5cb3bd Mon Sep 17 00:00:00 2001 From: Jason Wang <jasowang@redhat.com> Date: Thu, 11 Aug 2016 18:15:56 +0800 Subject: [PATCH 181/513] macvtap: fix use after free for skb_array during release We've clean skb_array in macvtap_put_queue() but still try to pop from it during macvtap_sock_destruct(). Fix this use after free by moving the skb array cleanup to macvtap_sock_destruct() instead. Fixes: 362899b8725b ("macvtap: switch to use skb array") Reported-by: Cornelia Huck <cornelia.huck@de.ibm.com> Tested-by: Cornelia Huck <cornelia.huck@de.ibm.com> Signed-off-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/macvtap.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index a38c0dac514b8..070e3290aa6ef 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -275,7 +275,6 @@ static void macvtap_put_queue(struct macvtap_queue *q) rtnl_unlock(); synchronize_rcu(); - skb_array_cleanup(&q->skb_array); sock_put(&q->sk); } @@ -533,10 +532,8 @@ static void macvtap_sock_write_space(struct sock *sk) static void macvtap_sock_destruct(struct sock *sk) { struct macvtap_queue *q = container_of(sk, struct macvtap_queue, sk); - struct sk_buff *skb; - while ((skb = skb_array_consume(&q->skb_array)) != NULL) - kfree_skb(skb); + skb_array_cleanup(&q->skb_array); } static int macvtap_open(struct inode *inode, struct file *file) From bbe11fab0b6c1d113776b2898e085bf4d1fdc607 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca <sd@queasysnail.net> Date: Thu, 11 Aug 2016 15:24:27 +0200 Subject: [PATCH 182/513] macsec: use after free when deleting the underlying device macsec_notify() loops over the list of macsec devices configured on the underlying device when this device is being removed. This list is part of the rx_handler data. However, macsec_dellink unregisters the rx_handler and frees the rx_handler data when the last macsec device is removed from the underlying device. Add macsec_common_dellink() to delete macsec devices without unregistering the rx_handler and freeing the associated data. Fixes: 960d5848dbf1 ("macsec: fix memory leaks around rx_handler (un)registration") Signed-off-by: Sabrina Dubroca <sd@queasysnail.net> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/macsec.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index d13e6e15d7b5e..dbd590a8177f7 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3047,22 +3047,29 @@ static void macsec_del_dev(struct macsec_dev *macsec) } } +static void macsec_common_dellink(struct net_device *dev, struct list_head *head) +{ + struct macsec_dev *macsec = macsec_priv(dev); + + unregister_netdevice_queue(dev, head); + list_del_rcu(&macsec->secys); + macsec_del_dev(macsec); + + macsec_generation++; +} + static void macsec_dellink(struct net_device *dev, struct list_head *head) { struct macsec_dev *macsec = macsec_priv(dev); struct net_device *real_dev = macsec->real_dev; struct macsec_rxh_data *rxd = macsec_data_rtnl(real_dev); - macsec_generation++; + macsec_common_dellink(dev, head); - unregister_netdevice_queue(dev, head); - list_del_rcu(&macsec->secys); if (list_empty(&rxd->secys)) { netdev_rx_handler_unregister(real_dev); kfree(rxd); } - - macsec_del_dev(macsec); } static int register_macsec_dev(struct net_device *real_dev, @@ -3382,8 +3389,12 @@ static int macsec_notify(struct notifier_block *this, unsigned long event, rxd = macsec_data_rtnl(real_dev); list_for_each_entry_safe(m, n, &rxd->secys, secys) { - macsec_dellink(m->secy.netdev, &head); + macsec_common_dellink(m->secy.netdev, &head); } + + netdev_rx_handler_unregister(real_dev); + kfree(rxd); + unregister_netdevice_many(&head); break; } From aa8b187eeab5e6757e959b81889ddbfda7d02ad1 Mon Sep 17 00:00:00 2001 From: Guenter Roeck <linux@roeck-us.net> Date: Tue, 9 Aug 2016 22:09:19 -0700 Subject: [PATCH 183/513] hwmon: (it87) Features mask must be 32 bit wide Coverity reports: result_independent_of_operands: data->features & (65536UL /* 1UL << 16 */) is always 0 regardless of the values of its operands. This occurs as the logical operand of if. data->features needs to be 32 bit wide since there are more than 16 features. Fixes: cc18da79d9b7 ("hwmon: (it87) Support up to 6 temperature sensors ... "); Signed-off-by: Guenter Roeck <linux@roeck-us.net> --- drivers/hwmon/it87.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c index 730d840282603..d0203a115effd 100644 --- a/drivers/hwmon/it87.c +++ b/drivers/hwmon/it87.c @@ -491,7 +491,7 @@ struct it87_sio_data { struct it87_data { const struct attribute_group *groups[7]; enum chips type; - u16 features; + u32 features; u8 peci_mask; u8 old_peci_mask; From 979cf59acc9d634cc140aadd0d2915947ab303cc Mon Sep 17 00:00:00 2001 From: Wei Yongjun <weiyj.lk@gmail.com> Date: Fri, 12 Aug 2016 11:45:18 +0000 Subject: [PATCH 184/513] ASoC: Intel: Skylake: Fix error return code in skl_probe() Fix to return error code -ENODEV from the error handling case instead of 0, as done elsewhere in this function. Fixes: 87b2bdf02278 ("ASoC: Intel: Skylake: Initialize NHLT table") Signed-off-by: Wei Yongjun <weiyj.lk@gmail.com> Acked-By: Vinod Koul <vinod.kou@intel.com> Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/intel/skylake/skl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/intel/skylake/skl.c b/sound/soc/intel/skylake/skl.c index cd59536a761dd..e3e7641677657 100644 --- a/sound/soc/intel/skylake/skl.c +++ b/sound/soc/intel/skylake/skl.c @@ -672,8 +672,10 @@ static int skl_probe(struct pci_dev *pci, skl->nhlt = skl_nhlt_init(bus->dev); - if (skl->nhlt == NULL) + if (skl->nhlt == NULL) { + err = -ENODEV; goto out_free; + } skl_nhlt_update_topology_bin(skl); From 4a008c002133aba0276cbc5d116bbb774aaf1b0d Mon Sep 17 00:00:00 2001 From: Thilo Cestonaro <thilo@cestona.ro> Date: Fri, 12 Aug 2016 14:39:10 +0200 Subject: [PATCH 185/513] hwmon: (ftsteutates) Correct ftp urls in driver documentation Signed-off-by: Thilo Cestonaro <thilo@cestona.ro> Signed-off-by: Guenter Roeck <linux@roeck-us.net> --- Documentation/hwmon/ftsteutates | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/hwmon/ftsteutates b/Documentation/hwmon/ftsteutates index 2a1bf69c6a26f..8c10a916de20d 100644 --- a/Documentation/hwmon/ftsteutates +++ b/Documentation/hwmon/ftsteutates @@ -19,5 +19,5 @@ enhancements. It can monitor up to 4 voltages, 16 temperatures and implemented in this driver. Specification of the chip can be found here: -ftp:///pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/BMC-Teutates_Specification_V1.21.pdf -ftp:///pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/Fujitsu_mainboards-1-Sensors_HowTo-en-US.pdf +ftp://ftp.ts.fujitsu.com/pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/BMC-Teutates_Specification_V1.21.pdf +ftp://ftp.ts.fujitsu.com/pub/Mainboard-OEM-Sales/Services/Software&Tools/Linux_SystemMonitoring&Watchdog&GPIO/Fujitsu_mainboards-1-Sensors_HowTo-en-US.pdf From e1717e0485af4f47fc4da1e979ac817f9ad61b0f Mon Sep 17 00:00:00 2001 From: Adrian Hunter <adrian.hunter@intel.com> Date: Wed, 20 Jul 2016 12:00:06 +0300 Subject: [PATCH 186/513] perf intel-pt: Fix ip compression The June 2015 Intel SDM introduced IP Compression types 4 and 6. Refer to section 36.4.2.2 Target IP (TIP) Packet - IP Compression. Existing Intel PT packet decoder did not support type 4, and got type 6 wrong. Because type 3 and type 4 have the same number of bytes, the packet 'count' has been changed from being the number of ip bytes to being the type code. That allows the Intel PT decoder to correctly decide whether to sign-extend or use the last ip. However that also meant the code had to be adjusted in a number of places. Currently hardware is not using the new compression types, so this fix has no effect on existing hardware. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Link: http://lkml.kernel.org/r/1469005206-3049-1-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- .../util/intel-pt-decoder/intel-pt-decoder.c | 44 ++++++++++--------- .../intel-pt-decoder/intel-pt-pkt-decoder.c | 24 +++++++--- 2 files changed, 40 insertions(+), 28 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 9c8f15da86ce8..8ff6c6a61291f 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -123,8 +123,6 @@ struct intel_pt_decoder { bool have_calc_cyc_to_tsc; int exec_mode; unsigned int insn_bytes; - uint64_t sign_bit; - uint64_t sign_bits; uint64_t period; enum intel_pt_period_type period_type; uint64_t tot_insn_cnt; @@ -191,9 +189,6 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) decoder->data = params->data; decoder->return_compression = params->return_compression; - decoder->sign_bit = (uint64_t)1 << 47; - decoder->sign_bits = ~(((uint64_t)1 << 48) - 1); - decoder->period = params->period; decoder->period_type = params->period_type; @@ -362,21 +357,30 @@ int intel_pt__strerror(int code, char *buf, size_t buflen) return 0; } -static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder, - const struct intel_pt_pkt *packet, +static uint64_t intel_pt_calc_ip(const struct intel_pt_pkt *packet, uint64_t last_ip) { uint64_t ip; switch (packet->count) { - case 2: + case 1: ip = (last_ip & (uint64_t)0xffffffffffff0000ULL) | packet->payload; break; - case 4: + case 2: ip = (last_ip & (uint64_t)0xffffffff00000000ULL) | packet->payload; break; + case 3: + ip = packet->payload; + /* Sign-extend 6-byte ip */ + if (ip & (uint64_t)0x800000000000ULL) + ip |= (uint64_t)0xffff000000000000ULL; + break; + case 4: + ip = (last_ip & (uint64_t)0xffff000000000000ULL) | + packet->payload; + break; case 6: ip = packet->payload; break; @@ -384,16 +388,12 @@ static uint64_t intel_pt_calc_ip(struct intel_pt_decoder *decoder, return 0; } - if (ip & decoder->sign_bit) - return ip | decoder->sign_bits; - return ip; } static inline void intel_pt_set_last_ip(struct intel_pt_decoder *decoder) { - decoder->last_ip = intel_pt_calc_ip(decoder, &decoder->packet, - decoder->last_ip); + decoder->last_ip = intel_pt_calc_ip(&decoder->packet, decoder->last_ip); } static inline void intel_pt_set_ip(struct intel_pt_decoder *decoder) @@ -1657,6 +1657,12 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder) } } +static inline bool intel_pt_have_ip(struct intel_pt_decoder *decoder) +{ + return decoder->last_ip || decoder->packet.count == 0 || + decoder->packet.count == 3 || decoder->packet.count == 6; +} + /* Walk PSB+ packets to get in sync. */ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) { @@ -1677,8 +1683,7 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder) case INTEL_PT_FUP: decoder->pge = true; - if (decoder->last_ip || decoder->packet.count == 6 || - decoder->packet.count == 0) { + if (intel_pt_have_ip(decoder)) { uint64_t current_ip = decoder->ip; intel_pt_set_ip(decoder); @@ -1767,8 +1772,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) case INTEL_PT_TIP_PGE: case INTEL_PT_TIP: decoder->pge = decoder->packet.type != INTEL_PT_TIP_PGD; - if (decoder->last_ip || decoder->packet.count == 6 || - decoder->packet.count == 0) + if (intel_pt_have_ip(decoder)) intel_pt_set_ip(decoder); if (decoder->ip) return 0; @@ -1776,9 +1780,7 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder) case INTEL_PT_FUP: if (decoder->overflow) { - if (decoder->last_ip || - decoder->packet.count == 6 || - decoder->packet.count == 0) + if (intel_pt_have_ip(decoder)) intel_pt_set_ip(decoder); if (decoder->ip) return 0; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index b1257c816310f..4f7b320204870 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -292,36 +292,46 @@ static int intel_pt_get_ip(enum intel_pt_pkt_type type, unsigned int byte, const unsigned char *buf, size_t len, struct intel_pt_pkt *packet) { - switch (byte >> 5) { + int ip_len; + + packet->count = byte >> 5; + + switch (packet->count) { case 0: - packet->count = 0; + ip_len = 0; break; case 1: if (len < 3) return INTEL_PT_NEED_MORE_BYTES; - packet->count = 2; + ip_len = 2; packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1)); break; case 2: if (len < 5) return INTEL_PT_NEED_MORE_BYTES; - packet->count = 4; + ip_len = 4; packet->payload = le32_to_cpu(*(uint32_t *)(buf + 1)); break; case 3: - case 6: + case 4: if (len < 7) return INTEL_PT_NEED_MORE_BYTES; - packet->count = 6; + ip_len = 6; memcpy_le64(&packet->payload, buf + 1, 6); break; + case 6: + if (len < 9) + return INTEL_PT_NEED_MORE_BYTES; + ip_len = 8; + packet->payload = le64_to_cpu(*(uint64_t *)(buf + 1)); + break; default: return INTEL_PT_BAD_PACKET; } packet->type = type; - return packet->count + 1; + return ip_len + 1; } static int intel_pt_get_mode(const unsigned char *buf, size_t len, From 33da54fa86e29b87fe1e83bd0f15b4ef2be53ecb Mon Sep 17 00:00:00 2001 From: Jiri Olsa <jolsa@kernel.org> Date: Thu, 11 Aug 2016 10:50:57 +0200 Subject: [PATCH 187/513] perf tools mem: Fix -t store option for record command Michael reported 'perf mem -t store record' being broken. The reason is latest rework of this area: commit acbe613e0c03 ("perf tools: Add monitored events array") We don't mark perf_mem_events store record when -t store option is specified. Committer notes: Before: # perf mem -t store record usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.020 MB perf.data (7 samples) ] # perf evlist cycles:ppp # After: # perf mem -t store record usleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.020 MB perf.data (7 samples) ] # perf evlist cpu/mem-stores/P # Reported-by: Michael Petlan <mpetlan@redhat.com> Signed-off-by: Jiri Olsa <jolsa@redhat.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: David Ahern <dsahern@gmail.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Fixes: acbe613e0c03 ("perf tools: Add monitored events array") Link: http://lkml.kernel.org/r/1470905457-18311-1-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/builtin-mem.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index d608a2c9e48cd..d1ce29be560e5 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -88,6 +88,9 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem) if (mem->operation & MEM_OPERATION_LOAD) perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true; + if (mem->operation & MEM_OPERATION_STORE) + perf_mem_events[PERF_MEM_EVENTS__STORE].record = true; + if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record) rec_argv[i++] = "-W"; From f046f3df665361d2f89e660f8c79ba164069f02a Mon Sep 17 00:00:00 2001 From: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com> Date: Thu, 11 Aug 2016 14:43:59 -0300 Subject: [PATCH 188/513] perf ppc64le: Fix build failure when libelf is not present arch__post_process_probe_trace_events() calls get_target_map() to prepare symbol table. get_target_map() is defined inside util/probe-event.c. probe-event.c will only get included in perf binary if CONFIG_LIBELF is set. Hence arch__post_process_probe_trace_events() needs to be defined inside #ifdef HAVE_LIBELF_SUPPORT to solve compilation error. Reported-and-Tested-by: Anton Blanchard <anton@samba.org> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Balbir Singh <bsingharora@gmail.com> Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com> Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Cc: Namhyung Kim <namhyung@kernel.org> Link: http://lkml.kernel.org/r/57ABFF88.8030905@linux.vnet.ibm.com [ Thunderbird MUA mangled it, fix that ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/arch/powerpc/util/sym-handling.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index 8d4dc97d80bae..35745a733100e 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -97,6 +97,7 @@ void arch__fix_tev_from_maps(struct perf_probe_event *pev, } } +#ifdef HAVE_LIBELF_SUPPORT void arch__post_process_probe_trace_events(struct perf_probe_event *pev, int ntevs) { @@ -118,5 +119,6 @@ void arch__post_process_probe_trace_events(struct perf_probe_event *pev, } } } +#endif /* HAVE_LIBELF_SUPPORT */ #endif From fc520f8b4fa35ceb36f0ad031c1a297968788236 Mon Sep 17 00:00:00 2001 From: Kevin Hao <haokexin@gmail.com> Date: Fri, 12 Aug 2016 13:49:34 +0800 Subject: [PATCH 189/513] of/platform: disable the of_platform_default_populate_init() for all the ppc boards With the commit 44a7185c2ae6 ("of/platform: Add common method to populate default bus"), a default function is introduced to populate the default bus and this function is invoked at the arch_initcall_sync level. But a lot of ppc boards use machine_device_initcall() to populate the default bus. This means that the default populate function has higher priority and would override the arch specific population of the bus. The side effect is that some arch specific bus are not probed, then cause various malfunction due to the miss of some devices. Since it is very possible to introduce bugs if we simply change the initcall level for all these boards(about 30+). This just disable this default function for all the ppc boards. Signed-off-by: Kevin Hao <haokexin@gmail.com> Signed-off-by: Rob Herring <robh@kernel.org> --- drivers/of/platform.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 8aa1976910743..f39ccd5aa7012 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -497,6 +497,7 @@ int of_platform_default_populate(struct device_node *root, } EXPORT_SYMBOL_GPL(of_platform_default_populate); +#ifndef CONFIG_PPC static int __init of_platform_default_populate_init(void) { struct device_node *node; @@ -521,6 +522,7 @@ static int __init of_platform_default_populate_init(void) return 0; } arch_initcall_sync(of_platform_default_populate_init); +#endif static int of_platform_device_destroy(struct device *dev, void *data) { From a7a0729c4532c94f4e8efb4faaf6ef00e5fe19ba Mon Sep 17 00:00:00 2001 From: Jonathan Corbet <corbet@lwn.net> Date: Fri, 12 Aug 2016 14:11:12 -0600 Subject: [PATCH 190/513] docs: Set the Sphinx default highlight language to "guess" This should eliminate a whole class of markup warnings, at the cost of occasionally amusing markup choices; we'll have to see if it works out. Suggested-by: Markus Heiser <markus.heiser@darmarIT.de> Signed-off-by: Jonathan Corbet <corbet@lwn.net> --- Documentation/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/conf.py b/Documentation/conf.py index 96b7aa66c89ca..106ae9c740b99 100644 --- a/Documentation/conf.py +++ b/Documentation/conf.py @@ -131,7 +131,7 @@ todo_include_todos = False primary_domain = 'C' -highlight_language = 'C' +highlight_language = 'guess' # -- Options for HTML output ---------------------------------------------- From ce7c6c9e1d997a2670aead3a7b87f4df32c11118 Mon Sep 17 00:00:00 2001 From: Greg Edwards <gedwards@fireweed.org> Date: Sat, 30 Jul 2016 10:06:26 -0600 Subject: [PATCH 191/513] mpt3sas: Fix resume on WarpDrive flash cards mpt3sas crashes on resume after suspend with WarpDrive flash cards. The reply_post_host_index array is not set back up after the resume, and we deference a stale pointer in _base_interrupt(). [ 47.309711] BUG: unable to handle kernel paging request at ffffc90001f8006c [ 47.318289] IP: [<ffffffffc00863ef>] _base_interrupt+0x49f/0xa30 [mpt3sas] [ 47.326749] PGD 41ccaa067 PUD 41ccab067 PMD 3466c067 PTE 0 [ 47.333848] Oops: 0002 [#1] SMP ... [ 47.452708] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.7.0 #6 [ 47.460506] Hardware name: Dell Inc. OptiPlex 990/06D7TR, BIOS A18 09/24/2013 [ 47.469629] task: ffffffff81c0d500 ti: ffffffff81c00000 task.ti: ffffffff81c00000 [ 47.479112] RIP: 0010:[<ffffffffc00863ef>] [<ffffffffc00863ef>] _base_interrupt+0x49f/0xa30 [mpt3sas] [ 47.490466] RSP: 0018:ffff88041d203e30 EFLAGS: 00010002 [ 47.497801] RAX: 0000000000000001 RBX: ffff880033f4c000 RCX: 0000000000000001 [ 47.506973] RDX: ffffc90001f8006c RSI: 0000000000000082 RDI: 0000000000000082 [ 47.516141] RBP: ffff88041d203eb0 R08: ffff8804118e2820 R09: 0000000000000001 [ 47.525300] R10: 0000000000000001 R11: 00000000100c0000 R12: 0000000000000000 [ 47.534457] R13: ffff880412c487e0 R14: ffff88041a8987d8 R15: 0000000000000001 [ 47.543632] FS: 0000000000000000(0000) GS:ffff88041d200000(0000) knlGS:0000000000000000 [ 47.553796] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 47.561632] CR2: ffffc90001f8006c CR3: 0000000001c06000 CR4: 00000000000406f0 [ 47.570883] Stack: [ 47.575015] 000000001d211228 ffff88041d2100c0 ffff8800c47d8130 0000000000000100 [ 47.584625] ffff8804100c0000 100c000000000000 ffff88041a8992a0 ffff88041a8987f8 [ 47.594230] ffff88041d203e00 ffffffff81111e55 000000000000038c ffff880414ad4280 [ 47.603862] Call Trace: [ 47.608474] <IRQ> [ 47.610413] [<ffffffff81111e55>] ? call_timer_fn+0x35/0x120 [ 47.620539] [<ffffffff81100a1f>] handle_irq_event_percpu+0x7f/0x1c0 [ 47.629061] [<ffffffff81100b8c>] handle_irq_event+0x2c/0x50 [ 47.636859] [<ffffffff81103fff>] handle_edge_irq+0x6f/0x130 [ 47.644654] [<ffffffff8102fbf3>] handle_irq+0x73/0x120 [ 47.652011] [<ffffffff810c6ada>] ? atomic_notifier_call_chain+0x1a/0x20 [ 47.660854] [<ffffffff817e374b>] do_IRQ+0x4b/0xd0 [ 47.667777] [<ffffffff817e160c>] common_interrupt+0x8c/0x8c [ 47.675635] <EOI> Move the reply_post_host_index array setup into mpt3sas_base_map_resources(), which is also in the resume path. Cc: stable@vger.kernel.org Signed-off-by: Greg Edwards <gedwards@fireweed.org> Acked-by: Chaitra P B <chaitra.basappa@broadcom.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> --- drivers/scsi/mpt3sas/mpt3sas_base.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 751f13edece01..750f82c339d4d 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -2188,6 +2188,17 @@ mpt3sas_base_map_resources(struct MPT3SAS_ADAPTER *ioc) } else ioc->msix96_vector = 0; + if (ioc->is_warpdrive) { + ioc->reply_post_host_index[0] = (resource_size_t __iomem *) + &ioc->chip->ReplyPostHostIndex; + + for (i = 1; i < ioc->cpu_msix_table_sz; i++) + ioc->reply_post_host_index[i] = + (resource_size_t __iomem *) + ((u8 __iomem *)&ioc->chip->Doorbell + (0x4000 + ((i - 1) + * 4))); + } + list_for_each_entry(reply_q, &ioc->reply_queue_list, list) pr_info(MPT3SAS_FMT "%s: IRQ %d\n", reply_q->name, ((ioc->msix_enable) ? "PCI-MSI-X enabled" : @@ -5280,17 +5291,6 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc) if (r) goto out_free_resources; - if (ioc->is_warpdrive) { - ioc->reply_post_host_index[0] = (resource_size_t __iomem *) - &ioc->chip->ReplyPostHostIndex; - - for (i = 1; i < ioc->cpu_msix_table_sz; i++) - ioc->reply_post_host_index[i] = - (resource_size_t __iomem *) - ((u8 __iomem *)&ioc->chip->Doorbell + (0x4000 + ((i - 1) - * 4))); - } - pci_set_drvdata(ioc->pdev, ioc->shost); r = _base_get_ioc_facts(ioc, CAN_SLEEP); if (r) From 5381cfb6f0422da24cfa9da35b0433c0415830e0 Mon Sep 17 00:00:00 2001 From: Sven Van Asbroeck <thesven73@gmail.com> Date: Fri, 12 Aug 2016 09:10:27 -0400 Subject: [PATCH 192/513] power: supply: max17042_battery: fix model download bug. The device's model download function returns the model data as an array of u32s, which is later compared to the reference model data. However, since the latter is an array of u16s, the comparison does not happen correctly, and model verification fails. This in turn breaks the POR initialization sequence. Fixes: 39e7213edc4f3 ("max17042_battery: Support regmap to access device's registers") Reported-by: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: Sven Van Asbroeck <TheSven73@googlemail.com> Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com> Signed-off-by: Sebastian Reichel <sre@kernel.org> --- drivers/power/max17042_battery.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/power/max17042_battery.c b/drivers/power/max17042_battery.c index 9c65f134d4474..da7a75f824891 100644 --- a/drivers/power/max17042_battery.c +++ b/drivers/power/max17042_battery.c @@ -457,13 +457,16 @@ static inline void max17042_write_model_data(struct max17042_chip *chip, } static inline void max17042_read_model_data(struct max17042_chip *chip, - u8 addr, u32 *data, int size) + u8 addr, u16 *data, int size) { struct regmap *map = chip->regmap; int i; + u32 tmp; - for (i = 0; i < size; i++) - regmap_read(map, addr + i, &data[i]); + for (i = 0; i < size; i++) { + regmap_read(map, addr + i, &tmp); + data[i] = (u16)tmp; + } } static inline int max17042_model_data_compare(struct max17042_chip *chip, @@ -486,7 +489,7 @@ static int max17042_init_model(struct max17042_chip *chip) { int ret; int table_size = ARRAY_SIZE(chip->pdata->config_data->cell_char_tbl); - u32 *temp_data; + u16 *temp_data; temp_data = kcalloc(table_size, sizeof(*temp_data), GFP_KERNEL); if (!temp_data) @@ -501,7 +504,7 @@ static int max17042_init_model(struct max17042_chip *chip) ret = max17042_model_data_compare( chip, chip->pdata->config_data->cell_char_tbl, - (u16 *)temp_data, + temp_data, table_size); max10742_lock_model(chip); @@ -514,7 +517,7 @@ static int max17042_verify_model_lock(struct max17042_chip *chip) { int i; int table_size = ARRAY_SIZE(chip->pdata->config_data->cell_char_tbl); - u32 *temp_data; + u16 *temp_data; int ret = 0; temp_data = kcalloc(table_size, sizeof(*temp_data), GFP_KERNEL); From 7a4947cf6f26b7d0a5db260d212f6df41a563d23 Mon Sep 17 00:00:00 2001 From: Andy Yan <andy.yan@rock-chips.com> Date: Fri, 12 Aug 2016 18:01:50 +0800 Subject: [PATCH 193/513] power: reset: reboot-mode: fix build error of missing ioremap/iounmap on UM commit 4fcd504edbf7 ("power: reset: add reboot mode driver") uses api from syscon, and syscon uses ioremap/iounmap which depends on HAS_IOMEM, so let's depend on MFD_SYSCON instead of selecting it directly to avoid the um-allyesconfig like build error on archs that without iomem: drivers/mfd/syscon.c: In function 'of_syscon_register': drivers/mfd/syscon.c:67:9: error: implicit declaration of function 'ioremap' [-Werror=implicit-function-declaration] base = ioremap(res.start, resource_size(&res)); ^ drivers/mfd/syscon.c:67:7: warning: assignment makes pointer from integer without a cast [-Wint-conversion] base = ioremap(res.start, resource_size(&res)); ^ drivers/mfd/syscon.c:109:2: error: implicit declaration of function 'iounmap' [-Werror=implicit-function-declaration] iounmap(base); ^ Reported-by: Kbuild test robot <fengguang.wu@intel.com> Fixes: 4fcd504edbf7("power: reset: add reboot mode driver") Signed-off-by: Andy Yan <andy.yan@rock-chips.com> Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com> Signed-off-by: Sebastian Reichel <sre@kernel.org> --- drivers/power/reset/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/power/reset/Kconfig b/drivers/power/reset/Kconfig index 3bfac539334b5..c74c3f67b8da0 100644 --- a/drivers/power/reset/Kconfig +++ b/drivers/power/reset/Kconfig @@ -200,8 +200,8 @@ config REBOOT_MODE config SYSCON_REBOOT_MODE tristate "Generic SYSCON regmap reboot mode driver" depends on OF + depends on MFD_SYSCON select REBOOT_MODE - select MFD_SYSCON help Say y here will enable reboot mode driver. This will get reboot mode arguments and store it in SYSCON mapped From bae170efd6c42bf116f513a1dd07639d68fa71b9 Mon Sep 17 00:00:00 2001 From: Arvind Yadav <arvind.yadav.cs@gmail.com> Date: Fri, 12 Aug 2016 20:49:18 +0530 Subject: [PATCH 194/513] power: reset: hisi-reboot: Unmap region obtained by of_iomap Free memory mapping, if probe is not successful. Fixes: 4a9b37371822 ("power: reset: move hisilicon reboot code") Signed-off-by: Arvind Yadav <arvind.yadav.cs@gmail.com> Signed-off-by: Sebastian Reichel <sre@kernel.org> --- drivers/power/reset/hisi-reboot.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/power/reset/hisi-reboot.c b/drivers/power/reset/hisi-reboot.c index 9ab7f562a83ba..f69387e12c1e5 100644 --- a/drivers/power/reset/hisi-reboot.c +++ b/drivers/power/reset/hisi-reboot.c @@ -53,13 +53,16 @@ static int hisi_reboot_probe(struct platform_device *pdev) if (of_property_read_u32(np, "reboot-offset", &reboot_offset) < 0) { pr_err("failed to find reboot-offset property\n"); + iounmap(base); return -EINVAL; } err = register_restart_handler(&hisi_restart_nb); - if (err) + if (err) { dev_err(&pdev->dev, "cannot register restart handler (err=%d)\n", err); + iounmap(base); + } return err; } From c2d5be14cb39da34b076b461d8aa2344611364c6 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet <corbet@lwn.net> Date: Fri, 12 Aug 2016 15:12:36 -0600 Subject: [PATCH 195/513] docs: kernel-documentation: remove some highlight directives With the conf.py change, we don't need them to avoid warnings anymore. Signed-off-by: Jonathan Corbet <corbet@lwn.net> --- Documentation/kernel-documentation.rst | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Documentation/kernel-documentation.rst b/Documentation/kernel-documentation.rst index c4eb5049da390..391decc66a18f 100644 --- a/Documentation/kernel-documentation.rst +++ b/Documentation/kernel-documentation.rst @@ -366,8 +366,6 @@ Domain`_ references. Cross-referencing from reStructuredText ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. highlight:: none - To cross-reference the functions and types defined in the kernel-doc comments from reStructuredText documents, please use the `Sphinx C Domain`_ references. For example:: @@ -390,8 +388,6 @@ For further details, please refer to the `Sphinx C Domain`_ documentation. Function documentation ---------------------- -.. highlight:: c - The general format of a function and function-like macro kernel-doc comment is:: /** @@ -572,8 +568,6 @@ DocBook XML [DEPRECATED] Converting DocBook to Sphinx ---------------------------- -.. highlight:: none - Over time, we expect all of the documents under ``Documentation/DocBook`` to be converted to Sphinx and reStructuredText. For most DocBook XML documents, a good enough solution is to use the simple ``Documentation/sphinx/tmplcvt`` script, From e120dcb6b2a11840b08f0fc0d2e90256b7b6e842 Mon Sep 17 00:00:00 2001 From: Calvin Owens <calvinowens@fb.com> Date: Fri, 13 May 2016 13:28:21 -0700 Subject: [PATCH 196/513] ses: Fix racy cleanup of /sys in remove_dev() Currently we free the resources backing the enclosure device before we call device_unregister(). This is racy: during rmmod of low-level SCSI drivers that hook into enclosure, we end up with a small window of time during which writing to /sys can OOPS. Example trace with mpt3sas: general protection fault: 0000 [#1] SMP KASAN Modules linked in: mpt3sas(-) <...> RIP: [<ffffffffa0388a98>] ses_get_page2_descriptor.isra.6+0x38/0x220 [ses] Call Trace: [<ffffffffa0389d14>] ses_set_fault+0xf4/0x400 [ses] [<ffffffffa0361069>] set_component_fault+0xa9/0xf0 [enclosure] [<ffffffff8205bffc>] dev_attr_store+0x3c/0x70 [<ffffffff81677df5>] sysfs_kf_write+0x115/0x180 [<ffffffff81675725>] kernfs_fop_write+0x275/0x3a0 [<ffffffff8151f810>] __vfs_write+0xe0/0x3e0 [<ffffffff8152281f>] vfs_write+0x13f/0x4a0 [<ffffffff81526731>] SyS_write+0x111/0x230 [<ffffffff828b401b>] entry_SYSCALL_64_fastpath+0x13/0x94 Fortunately the solution is extremely simple: call device_unregister() before we free the resources, and the race no longer exists. The driver core holds a reference over ->remove_dev(), so AFAICT this is safe. Signed-off-by: Calvin Owens <calvinowens@fb.com> Reviewed-by: James Bottomley <jejb@linux.vnet.ibm.com> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> --- drivers/scsi/ses.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/ses.c b/drivers/scsi/ses.c index 53ef1cb6418e3..0e8601aa877a8 100644 --- a/drivers/scsi/ses.c +++ b/drivers/scsi/ses.c @@ -778,6 +778,8 @@ static void ses_intf_remove_enclosure(struct scsi_device *sdev) if (!edev) return; + enclosure_unregister(edev); + ses_dev = edev->scratch; edev->scratch = NULL; @@ -789,7 +791,6 @@ static void ses_intf_remove_enclosure(struct scsi_device *sdev) kfree(edev->component[0].scratch); put_device(&edev->edev); - enclosure_unregister(edev); } static void ses_intf_remove(struct device *cdev, From 601bbae0bc10d4306857b93d84240b039b3d9a6c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Wed, 10 Aug 2016 23:54:08 +0200 Subject: [PATCH 197/513] dsa: mv88e6xxx: hide unused functions When CONFIG_NET_DSA_HWMON is disabled, we get warnings about two unused functions whose only callers are all inside of an #ifdef: drivers/net/dsa/mv88e6xxx.c:3257:12: 'mv88e6xxx_mdio_page_write' defined but not used [-Werror=unused-function] drivers/net/dsa/mv88e6xxx.c:3244:12: 'mv88e6xxx_mdio_page_read' defined but not used [-Werror=unused-function] This adds another ifdef around the function definitions. The warnings appeared after the functions were marked 'static', but the problem was already there before that. Signed-off-by: Arnd Bergmann <arnd@arndb.de> Fixes: 57d3231057e9 ("net: dsa: mv88e6xxx: fix style issues") Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/dsa/mv88e6xxx/chip.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index d36aedde8cb9b..d1d9d3cf9139f 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -3187,6 +3187,7 @@ static int mv88e6xxx_set_addr(struct dsa_switch *ds, u8 *addr) return err; } +#ifdef CONFIG_NET_DSA_HWMON static int mv88e6xxx_mdio_page_read(struct dsa_switch *ds, int port, int page, int reg) { @@ -3212,6 +3213,7 @@ static int mv88e6xxx_mdio_page_write(struct dsa_switch *ds, int port, int page, return ret; } +#endif static int mv88e6xxx_port_to_mdio_addr(struct mv88e6xxx_chip *chip, int port) { From 947d2c2cd3340d11a1737d3f4781170cc3a4b74f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com> Date: Sat, 13 Aug 2016 02:54:04 +0200 Subject: [PATCH 198/513] PM / sleep: Update some system sleep documentation Update some documentation related to system sleep to document new features and remove outdated information from it. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Pavel Machek <pavel@ucw.cz> Reviewed-by: Chen Yu <yu.c.chen@intel.com> --- Documentation/power/basic-pm-debugging.txt | 27 +++- Documentation/power/interface.txt | 151 +++++++++++---------- 2 files changed, 102 insertions(+), 76 deletions(-) diff --git a/Documentation/power/basic-pm-debugging.txt b/Documentation/power/basic-pm-debugging.txt index b96098ccfe692..708f87f78a756 100644 --- a/Documentation/power/basic-pm-debugging.txt +++ b/Documentation/power/basic-pm-debugging.txt @@ -164,7 +164,32 @@ load n/2 modules more and try again. Again, if you find the offending module(s), it(they) must be unloaded every time before hibernation, and please report the problem with it(them). -c) Advanced debugging +c) Using the "test_resume" hibernation option + +/sys/power/disk generally tells the kernel what to do after creating a +hibernation image. One of the available options is "test_resume" which +causes the just created image to be used for immediate restoration. Namely, +after doing: + +# echo test_resume > /sys/power/disk +# echo disk > /sys/power/state + +a hibernation image will be created and a resume from it will be triggered +immediately without involving the platform firmware in any way. + +That test can be used to check if failures to resume from hibernation are +related to bad interactions with the platform firmware. That is, if the above +works every time, but resume from actual hibernation does not work or is +unreliable, the platform firmware may be responsible for the failures. + +On architectures and platforms that support using different kernels to restore +hibernation images (that is, the kernel used to read the image from storage and +load it into memory is different from the one included in the image) or support +kernel address space randomization, it also can be used to check if failures +to resume may be related to the differences between the restore and image +kernels. + +d) Advanced debugging In case that hibernation does not work on your system even in the minimal configuration and compiling more drivers as modules is not practical or some diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.txt index f1f0f59a7c47d..974916ff6608e 100644 --- a/Documentation/power/interface.txt +++ b/Documentation/power/interface.txt @@ -1,75 +1,76 @@ -Power Management Interface - - -The power management subsystem provides a unified sysfs interface to -userspace, regardless of what architecture or platform one is -running. The interface exists in /sys/power/ directory (assuming sysfs -is mounted at /sys). - -/sys/power/state controls system power state. Reading from this file -returns what states are supported, which is hard-coded to 'freeze', -'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and 'disk' -(Suspend-to-Disk). - -Writing to this file one of those strings causes the system to -transition into that state. Please see the file -Documentation/power/states.txt for a description of each of those -states. - - -/sys/power/disk controls the operating mode of the suspend-to-disk -mechanism. Suspend-to-disk can be handled in several ways. We have a -few options for putting the system to sleep - using the platform driver -(e.g. ACPI or other suspend_ops), powering off the system or rebooting the -system (for testing). - -Additionally, /sys/power/disk can be used to turn on one of the two testing -modes of the suspend-to-disk mechanism: 'testproc' or 'test'. If the -suspend-to-disk mechanism is in the 'testproc' mode, writing 'disk' to -/sys/power/state will cause the kernel to disable nonboot CPUs and freeze -tasks, wait for 5 seconds, unfreeze tasks and enable nonboot CPUs. If it is -in the 'test' mode, writing 'disk' to /sys/power/state will cause the kernel -to disable nonboot CPUs and freeze tasks, shrink memory, suspend devices, wait -for 5 seconds, resume devices, unfreeze tasks and enable nonboot CPUs. Then, -we are able to look in the log messages and work out, for example, which code -is being slow and which device drivers are misbehaving. - -Reading from this file will display all supported modes and the currently -selected one in brackets, for example - - [shutdown] reboot test testproc - -Writing to this file will accept one of - - 'platform' (only if the platform supports it) - 'shutdown' - 'reboot' - 'testproc' - 'test' - -/sys/power/image_size controls the size of the image created by -the suspend-to-disk mechanism. It can be written a string -representing a non-negative integer that will be used as an upper -limit of the image size, in bytes. The suspend-to-disk mechanism will -do its best to ensure the image size will not exceed that number. However, -if this turns out to be impossible, it will try to suspend anyway using the -smallest image possible. In particular, if "0" is written to this file, the -suspend image will be as small as possible. - -Reading from this file will display the current image size limit, which -is set to 2/5 of available RAM by default. - -/sys/power/pm_trace controls the code which saves the last PM event point in -the RTC across reboots, so that you can debug a machine that just hangs -during suspend (or more commonly, during resume). Namely, the RTC is only -used to save the last PM event point if this file contains '1'. Initially it -contains '0' which may be changed to '1' by writing a string representing a -nonzero integer into it. - -To use this debugging feature you should attempt to suspend the machine, then -reboot it and run - - dmesg -s 1000000 | grep 'hash matches' - -CAUTION: Using it will cause your machine's real-time (CMOS) clock to be -set to a random invalid time after a resume. +Power Management Interface for System Sleep + +Copyright (c) 2016 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com> + +The power management subsystem provides userspace with a unified sysfs interface +for system sleep regardless of the underlying system architecture or platform. +The interface is located in the /sys/power/ directory (assuming that sysfs is +mounted at /sys). + +/sys/power/state is the system sleep state control file. + +Reading from it returns a list of supported sleep states, encoded as: + +'freeze' (Suspend-to-Idle) +'standby' (Power-On Suspend) +'mem' (Suspend-to-RAM) +'disk' (Suspend-to-Disk) + +Suspend-to-Idle is always supported. Suspend-to-Disk is always supported +too as long the kernel has been configured to support hibernation at all +(ie. CONFIG_HIBERNATION is set in the kernel configuration file). Support +for Suspend-to-RAM and Power-On Suspend depends on the capabilities of the +platform. + +If one of the strings listed in /sys/power/state is written to it, the system +will attempt to transition into the corresponding sleep state. Refer to +Documentation/power/states.txt for a description of each of those states. + +/sys/power/disk controls the operating mode of hibernation (Suspend-to-Disk). +Specifically, it tells the kernel what to do after creating a hibernation image. + +Reading from it returns a list of supported options encoded as: + +'platform' (put the system into sleep using a platform-provided method) +'shutdown' (shut the system down) +'reboot' (reboot the system) +'suspend' (trigger a Suspend-to-RAM transition) +'test_resume' (resume-after-hibernation test mode) + +The currently selected option is printed in square brackets. + +The 'platform' option is only available if the platform provides a special +mechanism to put the system to sleep after creating a hibernation image (ACPI +does that, for example). The 'suspend' option is available if Suspend-to-RAM +is supported. Refer to Documentation/power/basic_pm_debugging.txt for the +description of the 'test_resume' option. + +To select an option, write the string representing it to /sys/power/disk. + +/sys/power/image_size controls the size of hibernation images. + +It can be written a string representing a non-negative integer that will be +used as a best-effort upper limit of the image size, in bytes. The hibernation +core will do its best to ensure that the image size will not exceed that number. +However, if that turns out to be impossible to achieve, a hibernation image will +still be created and its size will be as small as possible. In particular, +writing '0' to this file will enforce hibernation images to be as small as +possible. + +Reading from this file returns the current image size limit, which is set to +around 2/5 of available RAM by default. + +/sys/power/pm_trace controls the PM trace mechanism saving the last suspend +or resume event point in the RTC across reboots. + +It helps to debug hard lockups or reboots due to device driver failures that +occur during system suspend or resume (which is more common) more effectively. + +If /sys/power/pm_trace contains '1', the fingerprint of each suspend/resume +event point in turn will be stored in the RTC memory (overwriting the actual +RTC information), so it will survive a system crash if one occurs right after +storing it and it can be used later to identify the driver that caused the crash +to happen (see Documentation/power/s2ram.txt for more information). + +Initially it contains '0' which may be changed to '1' by writing a string +representing a nonzero integer into it. From 747ea55e4f78fd980350c39570a986b8c1c3e4aa Mon Sep 17 00:00:00 2001 From: Daniel Borkmann <daniel@iogearbox.net> Date: Fri, 12 Aug 2016 22:17:17 +0200 Subject: [PATCH 199/513] bpf: fix bpf_skb_in_cgroup helper naming While hashing out BPF's current_task_under_cgroup helper bits, it came to discussion that the skb_in_cgroup helper name was suboptimally chosen. Tejun says: So, I think in_cgroup should mean that the object is in that particular cgroup while under_cgroup in the subhierarchy of that cgroup. Let's rename the other subhierarchy test to under too. I think that'd be a lot less confusing going forward. [...] It's more intuitive and gives us the room to implement the real "in" test if ever necessary in the future. Since this touches uapi bits, we need to change this as long as v4.8 is not yet officially released. Thus, change the helper enum and rename related bits. Fixes: 4a482f34afcc ("cgroup: bpf: Add bpf_skb_in_cgroup_proto") Reference: http://patchwork.ozlabs.org/patch/658500/ Suggested-by: Sargun Dhillon <sargun@sargun.me> Suggested-by: Tejun Heo <tj@kernel.org> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Alexei Starovoitov <ast@kernel.org> --- include/uapi/linux/bpf.h | 4 ++-- kernel/bpf/verifier.c | 4 ++-- net/core/filter.c | 10 +++++----- samples/bpf/bpf_helpers.h | 4 ++-- samples/bpf/test_cgrp2_tc_kern.c | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index da218fec60565..9e5fc168c8a3d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -339,7 +339,7 @@ enum bpf_func_id { BPF_FUNC_skb_change_type, /** - * bpf_skb_in_cgroup(skb, map, index) - Check cgroup2 membership of skb + * bpf_skb_under_cgroup(skb, map, index) - Check cgroup2 membership of skb * @skb: pointer to skb * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type * @index: index of the cgroup in the bpf_map @@ -348,7 +348,7 @@ enum bpf_func_id { * == 1 skb succeeded the cgroup2 descendant test * < 0 error */ - BPF_FUNC_skb_in_cgroup, + BPF_FUNC_skb_under_cgroup, /** * bpf_get_hash_recalc(skb) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 7094c69ac1995..daea765d72e6f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1053,7 +1053,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) goto error; break; case BPF_MAP_TYPE_CGROUP_ARRAY: - if (func_id != BPF_FUNC_skb_in_cgroup) + if (func_id != BPF_FUNC_skb_under_cgroup) goto error; break; default: @@ -1075,7 +1075,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) goto error; break; - case BPF_FUNC_skb_in_cgroup: + case BPF_FUNC_skb_under_cgroup: if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY) goto error; break; diff --git a/net/core/filter.c b/net/core/filter.c index b5add4ef0d1d2..bd9bf2e5fafaf 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2317,7 +2317,7 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which) } #ifdef CONFIG_SOCK_CGROUP_DATA -static u64 bpf_skb_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +static u64 bpf_skb_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { struct sk_buff *skb = (struct sk_buff *)(long)r1; struct bpf_map *map = (struct bpf_map *)(long)r2; @@ -2340,8 +2340,8 @@ static u64 bpf_skb_in_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp); } -static const struct bpf_func_proto bpf_skb_in_cgroup_proto = { - .func = bpf_skb_in_cgroup, +static const struct bpf_func_proto bpf_skb_under_cgroup_proto = { + .func = bpf_skb_under_cgroup, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, @@ -2421,8 +2421,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) case BPF_FUNC_get_smp_processor_id: return &bpf_get_smp_processor_id_proto; #ifdef CONFIG_SOCK_CGROUP_DATA - case BPF_FUNC_skb_in_cgroup: - return &bpf_skb_in_cgroup_proto; + case BPF_FUNC_skb_under_cgroup: + return &bpf_skb_under_cgroup_proto; #endif default: return sk_filter_func_proto(func_id); diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index 217c8d507f2e8..7927a090fa0d0 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -72,8 +72,8 @@ static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flag (void *) BPF_FUNC_l3_csum_replace; static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) = (void *) BPF_FUNC_l4_csum_replace; -static int (*bpf_skb_in_cgroup)(void *ctx, void *map, int index) = - (void *) BPF_FUNC_skb_in_cgroup; +static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) = + (void *) BPF_FUNC_skb_under_cgroup; #if defined(__x86_64__) diff --git a/samples/bpf/test_cgrp2_tc_kern.c b/samples/bpf/test_cgrp2_tc_kern.c index 2732c37c8d5be..10ff73404e3a8 100644 --- a/samples/bpf/test_cgrp2_tc_kern.c +++ b/samples/bpf/test_cgrp2_tc_kern.c @@ -57,7 +57,7 @@ int handle_egress(struct __sk_buff *skb) bpf_trace_printk(dont_care_msg, sizeof(dont_care_msg), eth->h_proto, ip6h->nexthdr); return TC_ACT_OK; - } else if (bpf_skb_in_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) { + } else if (bpf_skb_under_cgroup(skb, &test_cgrp2_array_pin, 0) != 1) { bpf_trace_printk(pass_msg, sizeof(pass_msg)); return TC_ACT_OK; } else { From b4c0e0c61f81dedc82dda35c287ea149ff98b434 Mon Sep 17 00:00:00 2001 From: Colin Ian King <colin.king@canonical.com> Date: Thu, 11 Aug 2016 18:17:22 +0100 Subject: [PATCH 200/513] calipso: fix resource leak on calipso_genopt failure Currently, if calipso_genopt fails then the error exit path does not free the ipv6_opt_hdr new causing a memory leak. Fix this by kfree'ing new on the error exit path. Signed-off-by: Colin Ian King <colin.king@canonical.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv6/calipso.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c index c53b92c617c54..37ac9de713c69 100644 --- a/net/ipv6/calipso.c +++ b/net/ipv6/calipso.c @@ -952,8 +952,10 @@ calipso_opt_insert(struct ipv6_opt_hdr *hop, memcpy(new, hop, start); ret_val = calipso_genopt((unsigned char *)new, start, buf_len, doi_def, secattr); - if (ret_val < 0) + if (ret_val < 0) { + kfree(new); return ERR_PTR(ret_val); + } buf_len = start + ret_val; /* At this point buf_len aligns to 4n, so (buf_len & 4) pads to 8n */ From d7005652cd31dfc5660e1e32bf7e53538ef14987 Mon Sep 17 00:00:00 2001 From: "sean.wang@mediatek.com" <sean.wang@mediatek.com> Date: Sat, 13 Aug 2016 19:16:18 +0800 Subject: [PATCH 201/513] net: ethernet: mediatek: fixed that initializing u64_stats_sync is missing To fix runtime warning with lockdep is enabled due that u64_stats_sync is not initialized well, so add it. Signed-off-by: Sean Wang <sean.wang@mediatek.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index b57ae3afb994a..fe17f8cee16b1 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1751,6 +1751,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) goto free_netdev; } spin_lock_init(&mac->hw_stats->stats_lock); + u64_stats_init(&mac->hw_stats->syncp); mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET; SET_NETDEV_DEV(eth->netdev[id], eth->dev); From e8c2993a4c9fdb0c9e6fc983edd5b52716ce7442 Mon Sep 17 00:00:00 2001 From: "sean.wang@mediatek.com" <sean.wang@mediatek.com> Date: Sat, 13 Aug 2016 19:16:19 +0800 Subject: [PATCH 202/513] net: ethernet: mediatek: add the missing of_node_put() after node is used done This patch adds the missing of_node_put() after finishing the usage of of_parse_phandle() or of_node_get() used by fixed_phy. Signed-off-by: Sean Wang <sean.wang@mediatek.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index fe17f8cee16b1..003036114a1c2 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -269,6 +269,8 @@ static int mtk_phy_connect(struct mtk_mac *mac) ADVERTISED_Autoneg; phy_start_aneg(mac->phy_dev); + of_node_put(np); + return 0; } From 0ed661d5a48fa6df0b50ae64d27fe759a3ce42cf Mon Sep 17 00:00:00 2001 From: Daniel Borkmann <daniel@iogearbox.net> Date: Thu, 11 Aug 2016 21:38:37 +0200 Subject: [PATCH 203/513] bpf: fix write helpers with regards to non-linear parts Fix the bpf_try_make_writable() helper and all call sites we have in BPF, it's currently defect with regards to skbs when the write_len spans into non-linear parts, no matter if cloned or not. There are multiple issues at once. First, using skb_store_bits() is not correct since even if we have a cloned skb, page frags can still be shared. To really make them private, we need to pull them in via __pskb_pull_tail() first, which also gets us a private head via pskb_expand_head() implicitly. This is for helpers like bpf_skb_store_bytes(), bpf_l3_csum_replace(), bpf_l4_csum_replace(). Really, the only thing reasonable and working here is to call skb_ensure_writable() before any write operation. Meaning, via pskb_may_pull() it makes sure that parts we want to access are pulled in and if not does so plus unclones the skb implicitly. If our write_len still fits the headlen and we're cloned and our header of the clone is not writable, then we need to make a private copy via pskb_expand_head(). skb_store_bits() is a bit misleading and only safe to store into non-linear data in different contexts such as 357b40a18b04 ("[IPV6]: IPV6_CHECKSUM socket option can corrupt kernel memory"). For above BPF helper functions, it means after fixed bpf_try_make_writable(), we've pulled in enough, so that we operate always based on skb->data. Thus, the call to skb_header_pointer() and skb_store_bits() becomes superfluous. In bpf_skb_store_bytes(), the len check is unnecessary too since it can only pass in maximum of BPF stack size, so adding offset is guaranteed to never overflow. Also bpf_l3/4_csum_replace() helpers must test for proper offset alignment since they use __sum16 pointer for writing resulting csum. The remaining helpers that change skb data not discussed here yet are bpf_skb_vlan_push(), bpf_skb_vlan_pop() and bpf_skb_change_proto(). The vlan helpers internally call either skb_ensure_writable() (pop case) and skb_cow_head() (push case, for head expansion), respectively. Similarly, bpf_skb_proto_xlat() takes care to not mangle page frags. Fixes: 608cd71a9c7c ("tc: bpf: generalize pedit action") Fixes: 91bc4822c3d6 ("tc: bpf: add checksum helpers") Fixes: 3697649ff29e ("bpf: try harder on clones when writing into skb") Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/core/filter.c | 70 ++++++++++++----------------------------------- 1 file changed, 18 insertions(+), 52 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index bd9bf2e5fafaf..cb06aceb512ac 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1355,13 +1355,9 @@ static inline int bpf_try_make_writable(struct sk_buff *skb, { int err; - if (!skb_cloned(skb)) - return 0; - if (skb_clone_writable(skb, write_len)) - return 0; - err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); - if (!err) - bpf_compute_data_end(skb); + err = skb_ensure_writable(skb, write_len); + bpf_compute_data_end(skb); + return err; } @@ -1379,42 +1375,25 @@ static inline void bpf_pull_mac_rcsum(struct sk_buff *skb) static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) { - struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp); struct sk_buff *skb = (struct sk_buff *) (long) r1; - int offset = (int) r2; + unsigned int offset = (unsigned int) r2; void *from = (void *) (long) r3; unsigned int len = (unsigned int) r4; void *ptr; if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH))) return -EINVAL; - - /* bpf verifier guarantees that: - * 'from' pointer points to bpf program stack - * 'len' bytes of it were initialized - * 'len' > 0 - * 'skb' is a valid pointer to 'struct sk_buff' - * - * so check for invalid 'offset' and too large 'len' - */ - if (unlikely((u32) offset > 0xffff || len > sizeof(sp->buff))) + if (unlikely(offset > 0xffff)) return -EFAULT; if (unlikely(bpf_try_make_writable(skb, offset + len))) return -EFAULT; - ptr = skb_header_pointer(skb, offset, len, sp->buff); - if (unlikely(!ptr)) - return -EFAULT; - + ptr = skb->data + offset; if (flags & BPF_F_RECOMPUTE_CSUM) __skb_postpull_rcsum(skb, ptr, len, offset); memcpy(ptr, from, len); - if (ptr == sp->buff) - /* skb_store_bits cannot return -EFAULT here */ - skb_store_bits(skb, offset, ptr, len); - if (flags & BPF_F_RECOMPUTE_CSUM) __skb_postpush_rcsum(skb, ptr, len, offset); if (flags & BPF_F_INVALIDATE_HASH) @@ -1437,12 +1416,12 @@ static const struct bpf_func_proto bpf_skb_store_bytes_proto = { static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) { const struct sk_buff *skb = (const struct sk_buff *)(unsigned long) r1; - int offset = (int) r2; + unsigned int offset = (unsigned int) r2; void *to = (void *)(unsigned long) r3; unsigned int len = (unsigned int) r4; void *ptr; - if (unlikely((u32) offset > 0xffff)) + if (unlikely(offset > 0xffff)) goto err_clear; ptr = skb_header_pointer(skb, offset, len, to); @@ -1470,20 +1449,17 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = { static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) { struct sk_buff *skb = (struct sk_buff *) (long) r1; - int offset = (int) r2; - __sum16 sum, *ptr; + unsigned int offset = (unsigned int) r2; + __sum16 *ptr; if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK))) return -EINVAL; - if (unlikely((u32) offset > 0xffff)) - return -EFAULT; - if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum)))) + if (unlikely(offset > 0xffff || offset & 1)) return -EFAULT; - - ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); - if (unlikely(!ptr)) + if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr)))) return -EFAULT; + ptr = (__sum16 *)(skb->data + offset); switch (flags & BPF_F_HDR_FIELD_MASK) { case 0: if (unlikely(from != 0)) @@ -1501,10 +1477,6 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) return -EINVAL; } - if (ptr == &sum) - /* skb_store_bits guaranteed to not return -EFAULT here */ - skb_store_bits(skb, offset, ptr, sizeof(sum)); - return 0; } @@ -1524,20 +1496,18 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) struct sk_buff *skb = (struct sk_buff *) (long) r1; bool is_pseudo = flags & BPF_F_PSEUDO_HDR; bool is_mmzero = flags & BPF_F_MARK_MANGLED_0; - int offset = (int) r2; - __sum16 sum, *ptr; + unsigned int offset = (unsigned int) r2; + __sum16 *ptr; if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK))) return -EINVAL; - if (unlikely((u32) offset > 0xffff)) + if (unlikely(offset > 0xffff || offset & 1)) return -EFAULT; - if (unlikely(bpf_try_make_writable(skb, offset + sizeof(sum)))) + if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr)))) return -EFAULT; - ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum); - if (unlikely(!ptr)) - return -EFAULT; + ptr = (__sum16 *)(skb->data + offset); if (is_mmzero && !*ptr) return 0; @@ -1560,10 +1530,6 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) if (is_mmzero && !*ptr) *ptr = CSUM_MANGLED_0; - if (ptr == &sum) - /* skb_store_bits guaranteed to not return -EFAULT here */ - skb_store_bits(skb, offset, ptr, sizeof(sum)); - return 0; } From c15c0ab12fd62f2b19181d05c62d24bc9fa55a42 Mon Sep 17 00:00:00 2001 From: Johannes Berg <johannes.berg@intel.com> Date: Fri, 12 Aug 2016 07:48:21 +0200 Subject: [PATCH 204/513] ipv6: suppress sparse warnings in IP6_ECN_set_ce() Pass the correct type __wsum to csum_sub() and csum_add(). This doesn't really change anything since __wsum really *is* __be32, but removes the address space warnings from sparse. Cc: Eric Dumazet <edumazet@google.com> Fixes: 34ae6a1aa054 ("ipv6: update skb->csum when CE mark is propagated") Signed-off-by: Johannes Berg <johannes.berg@intel.com> Acked-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- include/net/inet_ecn.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index 0dc0a51da38fa..dce2d586d9cec 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -128,7 +128,8 @@ static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph) to = from | htonl(INET_ECN_CE << 20); *(__be32 *)iph = to; if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->csum = csum_add(csum_sub(skb->csum, from), to); + skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from), + (__force __wsum)to); return 1; } From 5ba092efc7ddff040777ae7162f1d195f513571b Mon Sep 17 00:00:00 2001 From: Vegard Nossum <vegard.nossum@oracle.com> Date: Fri, 12 Aug 2016 10:29:13 +0200 Subject: [PATCH 205/513] net/irda: handle iriap_register_lsap() allocation failure If iriap_register_lsap() fails to allocate memory, self->lsap is set to NULL. However, none of the callers handle the failure and irlmp_connect_request() will happily dereference it: iriap_register_lsap: Unable to allocated LSAP! ================================================================================ UBSAN: Undefined behaviour in net/irda/irlmp.c:378:2 member access within null pointer of type 'struct lsap_cb' CPU: 1 PID: 15403 Comm: trinity-c0 Not tainted 4.8.0-rc1+ #81 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014 0000000000000000 ffff88010c7e78a8 ffffffff82344f40 0000000041b58ab3 ffffffff84f98000 ffffffff82344e94 ffff88010c7e78d0 ffff88010c7e7880 ffff88010630ad00 ffffffff84a5fae0 ffffffff84d3f5c0 000000000000017a Call Trace: [<ffffffff82344f40>] dump_stack+0xac/0xfc [<ffffffff8242f5a8>] ubsan_epilogue+0xd/0x8a [<ffffffff824302bf>] __ubsan_handle_type_mismatch+0x157/0x411 [<ffffffff83b7bdbc>] irlmp_connect_request+0x7ac/0x970 [<ffffffff83b77cc0>] iriap_connect_request+0xa0/0x160 [<ffffffff83b77f48>] state_s_disconnect+0x88/0xd0 [<ffffffff83b78904>] iriap_do_client_event+0x94/0x120 [<ffffffff83b77710>] iriap_getvaluebyclass_request+0x3e0/0x6d0 [<ffffffff83ba6ebb>] irda_find_lsap_sel+0x1eb/0x630 [<ffffffff83ba90c8>] irda_connect+0x828/0x12d0 [<ffffffff833c0dfb>] SYSC_connect+0x22b/0x340 [<ffffffff833c7e09>] SyS_connect+0x9/0x10 [<ffffffff81007bd3>] do_syscall_64+0x1b3/0x4b0 [<ffffffff845f946a>] entry_SYSCALL64_slow_path+0x25/0x25 ================================================================================ The bug seems to have been around since forever. There's more problems with missing error checks in iriap_init() (and indeed all of irda_init()), but that's a bigger problem that needs very careful review and testing. This patch will fix the most serious bug (as it's easily reached from unprivileged userspace). I have tested my patch with a reproducer. Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/irda/iriap.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/irda/iriap.c b/net/irda/iriap.c index 4a7ae32afa09b..1138eaf5c6829 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -185,8 +185,12 @@ struct iriap_cb *iriap_open(__u8 slsap_sel, int mode, void *priv, self->magic = IAS_MAGIC; self->mode = mode; - if (mode == IAS_CLIENT) - iriap_register_lsap(self, slsap_sel, mode); + if (mode == IAS_CLIENT) { + if (iriap_register_lsap(self, slsap_sel, mode)) { + kfree(self); + return NULL; + } + } self->confirm = callback; self->priv = priv; From 54236ab09e9696a27baaae693c288920a26e8588 Mon Sep 17 00:00:00 2001 From: Vegard Nossum <vegard.nossum@oracle.com> Date: Fri, 12 Aug 2016 09:50:51 +0200 Subject: [PATCH 206/513] net/sctp: always initialise sctp_ht_iter::start_fail sctp_transport_seq_start() does not currently clear iter->start_fail on success, but relies on it being zero when it is allocated (by seq_open_net()). This can be a problem in the following sequence: open() // allocates iter (and implicitly sets iter->start_fail = 0) read() - iter->start() // fails and sets iter->start_fail = 1 - iter->stop() // doesn't call sctp_transport_walk_stop() (correct) read() again - iter->start() // succeeds, but doesn't change iter->start_fail - iter->stop() // doesn't call sctp_transport_walk_stop() (wrong) We should initialize sctp_ht_iter::start_fail to zero if ->start() succeeds, otherwise it's possible that we leave an old value of 1 there, which will cause ->stop() to not call sctp_transport_walk_stop(), which causes all sorts of problems like not calling rcu_read_unlock() (and preempt_enable()), eventually leading to more warnings like this: BUG: sleeping function called from invalid context at mm/slab.h:388 in_atomic(): 0, irqs_disabled(): 0, pid: 16551, name: trinity-c2 Preemption disabled at:[<ffffffff819bceb6>] rhashtable_walk_start+0x46/0x150 [<ffffffff81149abb>] preempt_count_add+0x1fb/0x280 [<ffffffff83295892>] _raw_spin_lock+0x12/0x40 [<ffffffff819bceb6>] rhashtable_walk_start+0x46/0x150 [<ffffffff82ec665f>] sctp_transport_walk_start+0x2f/0x60 [<ffffffff82edda1d>] sctp_transport_seq_start+0x4d/0x150 [<ffffffff81439e50>] traverse+0x170/0x850 [<ffffffff8143aeec>] seq_read+0x7cc/0x1180 [<ffffffff814f996c>] proc_reg_read+0xbc/0x180 [<ffffffff813d0384>] do_loop_readv_writev+0x134/0x210 [<ffffffff813d2a95>] do_readv_writev+0x565/0x660 [<ffffffff813d6857>] vfs_readv+0x67/0xa0 [<ffffffff813d6c16>] do_preadv+0x126/0x170 [<ffffffff813d710c>] SyS_preadv+0xc/0x10 [<ffffffff8100334c>] do_syscall_64+0x19c/0x410 [<ffffffff83296225>] return_from_SYSCALL_64+0x0/0x6a [<ffffffffffffffff>] 0xffffffffffffffff Notice that this is a subtly different stacktrace from the one in commit 5fc382d875 ("net/sctp: terminate rhashtable walk correctly"). Cc: Xin Long <lucien.xin@gmail.com> Cc: Herbert Xu <herbert@gondor.apana.org.au> Cc: Eric W. Biederman <ebiederm@xmission.com> Cc: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com> Acked-By: Neil Horman <nhorman@tuxdriver.com> Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/sctp/proc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 4cb5aedfe3ee2..ef8ba77a5beac 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -293,6 +293,7 @@ static void *sctp_transport_seq_start(struct seq_file *seq, loff_t *pos) return ERR_PTR(err); } + iter->start_fail = 0; return sctp_transport_get_idx(seq_file_net(seq), &iter->hti, *pos); } From bc561632dddd5af0c4444d919f01cbf6d553aa0a Mon Sep 17 00:00:00 2001 From: Mike Manning <mmanning@brocade.com> Date: Fri, 12 Aug 2016 12:02:38 +0100 Subject: [PATCH 207/513] net: ipv6: Do not keep IPv6 addresses when IPv6 is disabled If IPv6 is disabled when the option is set to keep IPv6 addresses on link down, userspace is unaware of this as there is no such indication via netlink. The solution is to remove the IPv6 addresses in this case, which results in netlink messages indicating removal of addresses in the usual manner. This fix also makes the behavior consistent with the case of having IPv6 disabled first, which stops IPv6 addresses from being added. Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional") Signed-off-by: Mike Manning <mmanning@brocade.com> Acked-by: David Ahern <dsa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv6/addrconf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ab3e796596b1d..df8425fcbc2ca 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3543,7 +3543,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) /* combine the user config with event to determine if permanent * addresses are to be removed from address hash table */ - keep_addr = !(how || _keep_addr <= 0); + keep_addr = !(how || _keep_addr <= 0 || idev->cnf.disable_ipv6); /* Step 2: clear hash table */ for (i = 0; i < IN6_ADDR_HSIZE; i++) { @@ -3599,7 +3599,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) /* re-combine the user config with event to determine if permanent * addresses are to be removed from the interface list */ - keep_addr = (!how && _keep_addr > 0); + keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6); INIT_LIST_HEAD(&del_list); list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { From e20038724552cd05e351cd7d7526d646953d26b7 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca <sd@queasysnail.net> Date: Fri, 12 Aug 2016 16:10:32 +0200 Subject: [PATCH 208/513] macsec: fix lockdep splats when nesting devices Currently, trying to setup a vlan over a macsec device, or other combinations of devices, triggers a lockdep warning. Use netdev_lockdep_set_classes and ndo_get_lock_subclass, similar to what macvlan does. Signed-off-by: Sabrina Dubroca <sd@queasysnail.net> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/macsec.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index dbd590a8177f7..2043e8c97a819 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -270,6 +270,7 @@ struct macsec_dev { struct pcpu_secy_stats __percpu *stats; struct list_head secys; struct gro_cells gro_cells; + unsigned int nest_level; }; /** @@ -2699,6 +2700,8 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb, #define MACSEC_FEATURES \ (NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST) +static struct lock_class_key macsec_netdev_addr_lock_key; + static int macsec_dev_init(struct net_device *dev) { struct macsec_dev *macsec = macsec_priv(dev); @@ -2910,6 +2913,13 @@ static int macsec_get_iflink(const struct net_device *dev) return macsec_priv(dev)->real_dev->ifindex; } + +static int macsec_get_nest_level(struct net_device *dev) +{ + return macsec_priv(dev)->nest_level; +} + + static const struct net_device_ops macsec_netdev_ops = { .ndo_init = macsec_dev_init, .ndo_uninit = macsec_dev_uninit, @@ -2923,6 +2933,7 @@ static const struct net_device_ops macsec_netdev_ops = { .ndo_start_xmit = macsec_start_xmit, .ndo_get_stats64 = macsec_get_stats64, .ndo_get_iflink = macsec_get_iflink, + .ndo_get_lock_subclass = macsec_get_nest_level, }; static const struct device_type macsec_type = { @@ -3050,10 +3061,12 @@ static void macsec_del_dev(struct macsec_dev *macsec) static void macsec_common_dellink(struct net_device *dev, struct list_head *head) { struct macsec_dev *macsec = macsec_priv(dev); + struct net_device *real_dev = macsec->real_dev; unregister_netdevice_queue(dev, head); list_del_rcu(&macsec->secys); macsec_del_dev(macsec); + netdev_upper_dev_unlink(real_dev, dev); macsec_generation++; } @@ -3188,6 +3201,16 @@ static int macsec_newlink(struct net *net, struct net_device *dev, dev_hold(real_dev); + macsec->nest_level = dev_get_nest_level(real_dev, netif_is_macsec) + 1; + netdev_lockdep_set_classes(dev); + lockdep_set_class_and_subclass(&dev->addr_list_lock, + &macsec_netdev_addr_lock_key, + macsec_get_nest_level(dev)); + + err = netdev_upper_dev_link(real_dev, dev); + if (err < 0) + goto unregister; + /* need to be already registered so that ->init has run and * the MAC addr is set */ @@ -3200,12 +3223,12 @@ static int macsec_newlink(struct net *net, struct net_device *dev, if (rx_handler && sci_exists(real_dev, sci)) { err = -EBUSY; - goto unregister; + goto unlink; } err = macsec_add_dev(dev, sci, icv_len); if (err) - goto unregister; + goto unlink; if (data) macsec_changelink_common(dev, data); @@ -3220,6 +3243,8 @@ static int macsec_newlink(struct net *net, struct net_device *dev, del_dev: macsec_del_dev(macsec); +unlink: + netdev_upper_dev_unlink(real_dev, dev); unregister: unregister_netdevice(dev); return err; From 952fcfd08c8109951622579d0ae7b9cd6cafd688 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca <sd@queasysnail.net> Date: Fri, 12 Aug 2016 16:10:33 +0200 Subject: [PATCH 209/513] net: remove type_check from dev_get_nest_level() The idea for type_check in dev_get_nest_level() was to count the number of nested devices of the same type (currently, only macvlan or vlan devices). This prevented the false positive lockdep warning on configurations such as: eth0 <--- macvlan0 <--- vlan0 <--- macvlan1 However, this doesn't prevent a warning on a configuration such as: eth0 <--- macvlan0 <--- vlan0 eth1 <--- vlan1 <--- macvlan1 In this case, all the locks end up with a nesting subclass of 1, so lockdep thinks that there is still a deadlock: - in the first case we have (macvlan_netdev_addr_lock_key, 1) and then take (vlan_netdev_xmit_lock_key, 1) - in the second case, we have (vlan_netdev_xmit_lock_key, 1) and then take (macvlan_netdev_addr_lock_key, 1) By removing the linktype check in dev_get_nest_level() and always incrementing the nesting depth, lockdep considers this configuration valid. Signed-off-by: Sabrina Dubroca <sd@queasysnail.net> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/macsec.c | 2 +- drivers/net/macvlan.c | 2 +- include/linux/netdevice.h | 3 +-- net/8021q/vlan.c | 2 +- net/core/dev.c | 10 +++------- 5 files changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 2043e8c97a819..351e701eb043b 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3201,7 +3201,7 @@ static int macsec_newlink(struct net *net, struct net_device *dev, dev_hold(real_dev); - macsec->nest_level = dev_get_nest_level(real_dev, netif_is_macsec) + 1; + macsec->nest_level = dev_get_nest_level(real_dev) + 1; netdev_lockdep_set_classes(dev); lockdep_set_class_and_subclass(&dev->addr_list_lock, &macsec_netdev_addr_lock_key, diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index cd9b53834bf60..3234fcdea3174 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1315,7 +1315,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev, vlan->dev = dev; vlan->port = port; vlan->set_features = MACVLAN_FEATURES; - vlan->nest_level = dev_get_nest_level(lowerdev, netif_is_macvlan) + 1; + vlan->nest_level = dev_get_nest_level(lowerdev) + 1; vlan->mode = MACVLAN_MODE_VEPA; if (data && data[IFLA_MACVLAN_MODE]) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 076df5360ba50..3a788bf0affdc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3891,8 +3891,7 @@ void netdev_default_l2upper_neigh_destroy(struct net_device *dev, extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN] __read_mostly; void netdev_rss_key_fill(void *buffer, size_t len); -int dev_get_nest_level(struct net_device *dev, - bool (*type_check)(const struct net_device *dev)); +int dev_get_nest_level(struct net_device *dev); int skb_checksum_help(struct sk_buff *skb); struct sk_buff *__skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 82a116ba590eb..8de138d3306bd 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -169,7 +169,7 @@ int register_vlan_dev(struct net_device *dev) if (err < 0) goto out_uninit_mvrp; - vlan->nest_level = dev_get_nest_level(real_dev, is_vlan_dev) + 1; + vlan->nest_level = dev_get_nest_level(real_dev) + 1; err = register_netdevice(dev); if (err < 0) goto out_uninit_mvrp; diff --git a/net/core/dev.c b/net/core/dev.c index 4ce07dc25573e..dd6ce598de897 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6045,8 +6045,7 @@ void *netdev_lower_dev_get_private(struct net_device *dev, EXPORT_SYMBOL(netdev_lower_dev_get_private); -int dev_get_nest_level(struct net_device *dev, - bool (*type_check)(const struct net_device *dev)) +int dev_get_nest_level(struct net_device *dev) { struct net_device *lower = NULL; struct list_head *iter; @@ -6056,15 +6055,12 @@ int dev_get_nest_level(struct net_device *dev, ASSERT_RTNL(); netdev_for_each_lower_dev(dev, lower, iter) { - nest = dev_get_nest_level(lower, type_check); + nest = dev_get_nest_level(lower); if (max_nest < nest) max_nest = nest; } - if (type_check(dev)) - max_nest++; - - return max_nest; + return max_nest + 1; } EXPORT_SYMBOL(dev_get_nest_level); From d1669c8288a2c86daa6fd59c7fb938c08589d053 Mon Sep 17 00:00:00 2001 From: Markus Heiser <markus.heiser@darmarIT.de> Date: Fri, 5 Aug 2016 11:19:43 +0200 Subject: [PATCH 210/513] doc-rst: customize RTD theme, drop padding of inline literal Remove the distracting (left/right) padding of inline literals. (HTML <code>). Requested and discussed in [1]. [1] http://www.spinics.net/lists/linux-media/msg103991.html Signed-off-by: Markus Heiser <markus.heiser@darmarIT.de> Acked-by: Hans Verkuil <hans.verkuil@cisco.com> Signed-off-by: Jonathan Corbet <corbet@lwn.net> --- Documentation/sphinx-static/theme_overrides.css | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/sphinx-static/theme_overrides.css b/Documentation/sphinx-static/theme_overrides.css index 3a2ac4bcfd789..e88461c4c1e69 100644 --- a/Documentation/sphinx-static/theme_overrides.css +++ b/Documentation/sphinx-static/theme_overrides.css @@ -42,11 +42,12 @@ caption a.headerlink { opacity: 0; } caption a.headerlink:hover { opacity: 1; } - /* inline literal: drop the borderbox and red color */ + /* inline literal: drop the borderbox, padding and red color */ code, .rst-content tt, .rst-content code { color: inherit; border: none; + padding: unset; background: inherit; font-size: 85%; } From ce8cb803d8b90458495f23606c706f0c0c857cdc Mon Sep 17 00:00:00 2001 From: Wolfram Sang <wsa+renesas@sang-engineering.com> Date: Fri, 12 Aug 2016 18:40:23 +0200 Subject: [PATCH 211/513] i2c: mux: demux-pinctrl: properly roll back when adding adapter fails We also need to revert the dynamic OF change, so we get a consistent state again. Otherwise, we might have two devices enabled e.g. after pinctrl setup fails. Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com> Signed-off-by: Wolfram Sang <wsa@the-dreams.de> Cc: stable@kernel.org --- drivers/i2c/muxes/i2c-demux-pinctrl.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/muxes/i2c-demux-pinctrl.c b/drivers/i2c/muxes/i2c-demux-pinctrl.c index 8de073aed0014..215ac87f606d2 100644 --- a/drivers/i2c/muxes/i2c-demux-pinctrl.c +++ b/drivers/i2c/muxes/i2c-demux-pinctrl.c @@ -68,7 +68,7 @@ static int i2c_demux_activate_master(struct i2c_demux_pinctrl_priv *priv, u32 ne adap = of_find_i2c_adapter_by_node(priv->chan[new_chan].parent_np); if (!adap) { ret = -ENODEV; - goto err; + goto err_with_revert; } p = devm_pinctrl_get_select(adap->dev.parent, priv->bus_name); @@ -103,6 +103,8 @@ static int i2c_demux_activate_master(struct i2c_demux_pinctrl_priv *priv, u32 ne err_with_put: i2c_put_adapter(adap); + err_with_revert: + of_changeset_revert(&priv->chan[new_chan].chgset); err: dev_err(priv->dev, "failed to setup demux-adapter %d (%d)\n", new_chan, ret); return ret; From 4d01d88019261d05ec3bff5f1a6013393faa3b9e Mon Sep 17 00:00:00 2001 From: Brian Norris <briannorris@chromium.org> Date: Wed, 10 Aug 2016 13:37:18 -0700 Subject: [PATCH 212/513] i2c: cros-ec-tunnel: Fix usage of cros_ec_cmd_xfer() cros_ec_cmd_xfer returns success status if the command transport completes successfully, but the execution result is incorrectly ignored. In many cases, the execution result is assumed to be successful, leading to ignored errors and operating on uninitialized data. We've recently introduced the cros_ec_cmd_xfer_status() helper to avoid these problems. Let's use it. [Regarding the 'Fixes' tag; there is significant refactoring since the driver's introduction, but the underlying logical error exists throughout I believe] Fixes: 9d230c9e4f4e ("i2c: ChromeOS EC tunnel driver") Cc: <stable@vger.kernel.org> # 9798ac6d32c1 mfd: cros_ec: Add cros_ec_cmd_xfer_status() helper Signed-off-by: Brian Norris <briannorris@chromium.org> Reviewed-by: Javier Martinez Canillas <javier@osg.samsung.com> Reviewed-by: Guenter Roeck <linux@roeck-us.net> Signed-off-by: Wolfram Sang <wsa@the-dreams.de> --- drivers/i2c/busses/i2c-cros-ec-tunnel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-cros-ec-tunnel.c b/drivers/i2c/busses/i2c-cros-ec-tunnel.c index a0d95ff682ae1..2d5ff86398d09 100644 --- a/drivers/i2c/busses/i2c-cros-ec-tunnel.c +++ b/drivers/i2c/busses/i2c-cros-ec-tunnel.c @@ -215,7 +215,7 @@ static int ec_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg i2c_msgs[], msg->outsize = request_len; msg->insize = response_len; - result = cros_ec_cmd_xfer(bus->ec, msg); + result = cros_ec_cmd_xfer_status(bus->ec, msg); if (result < 0) { dev_err(dev, "Error transferring EC i2c message %d\n", result); goto exit; From 97ccd4af120c6ee183f424f17672870809bd6efd Mon Sep 17 00:00:00 2001 From: Alexey Khoroshilov <khoroshilov@ispras.ru> Date: Thu, 4 Aug 2016 02:38:44 +0300 Subject: [PATCH 213/513] i2c: ocores: add missed clk_disable_unprepare() on failure paths clk_disable_unprepare() is missed on failure paths in ocores_i2c_probe(). Found by Linux Driver Verification project (linuxtesting.org). Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru> Acked-by: Peter Korsgaard <peter@korsgaard.com> Signed-off-by: Wolfram Sang <wsa@the-dreams.de> --- drivers/i2c/busses/i2c-ocores.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c index dfa7a4b4a91d7..ac88a524143e0 100644 --- a/drivers/i2c/busses/i2c-ocores.c +++ b/drivers/i2c/busses/i2c-ocores.c @@ -379,6 +379,7 @@ static int ocores_i2c_of_probe(struct platform_device *pdev, if (!clock_frequency_present) { dev_err(&pdev->dev, "Missing required parameter 'opencores,ip-clock-frequency'\n"); + clk_disable_unprepare(i2c->clk); return -ENODEV; } i2c->ip_clock_khz = clock_frequency / 1000; @@ -467,20 +468,21 @@ static int ocores_i2c_probe(struct platform_device *pdev) default: dev_err(&pdev->dev, "Unsupported I/O width (%d)\n", i2c->reg_io_width); - return -EINVAL; + ret = -EINVAL; + goto err_clk; } } ret = ocores_init(&pdev->dev, i2c); if (ret) - return ret; + goto err_clk; init_waitqueue_head(&i2c->wait); ret = devm_request_irq(&pdev->dev, irq, ocores_isr, 0, pdev->name, i2c); if (ret) { dev_err(&pdev->dev, "Cannot claim IRQ\n"); - return ret; + goto err_clk; } /* hook up driver to tree */ @@ -494,7 +496,7 @@ static int ocores_i2c_probe(struct platform_device *pdev) ret = i2c_add_adapter(&i2c->adap); if (ret) { dev_err(&pdev->dev, "Failed to add adapter\n"); - return ret; + goto err_clk; } /* add in known devices to the bus */ @@ -504,6 +506,10 @@ static int ocores_i2c_probe(struct platform_device *pdev) } return 0; + +err_clk: + clk_disable_unprepare(i2c->clk); + return ret; } static int ocores_i2c_remove(struct platform_device *pdev) From 434f14e745442a4230a40cf47f84deac1028f64f Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen <cyrille.pitchen@atmel.com> Date: Wed, 3 Aug 2016 16:58:26 +0200 Subject: [PATCH 214/513] i2c: at91: fix support of the "alternative command" feature The "alternative command" feature was introduced with sama5d2 SoCs. Its purpose is to let the hardware i2c controller automatically send the STOP condition on the i2c bus at the end of a data transfer. Without this feature, the i2c driver has to write the 'STOP' bit into the Control Register so the hardware i2c controller is triggered to send the STOP condition on the bus. Using the "alternative command" feature requires to set the transfer data length into the 8bit DATAL field of the Alternative Command Register. Hence only data transfers up to 255 bytes can take advantage of the "alternative command" feature. For greater data transfer sizes, the driver should use the previous implementation, when the "alternative command" support was not implemented yet. Signed-off-by: Cyrille Pitchen <cyrille.pitchen@atmel.com> Signed-off-by: Ludovic Desroches <ludovic.desroches@atmel.com> Signed-off-by: Wolfram Sang <wsa@the-dreams.de> --- drivers/i2c/busses/i2c-at91.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/drivers/i2c/busses/i2c-at91.c b/drivers/i2c/busses/i2c-at91.c index f23372669f770..1bb97f658b47a 100644 --- a/drivers/i2c/busses/i2c-at91.c +++ b/drivers/i2c/busses/i2c-at91.c @@ -38,6 +38,7 @@ #define AT91_I2C_TIMEOUT msecs_to_jiffies(100) /* transfer timeout */ #define AT91_I2C_DMA_THRESHOLD 8 /* enable DMA if transfer size is bigger than this threshold */ #define AUTOSUSPEND_TIMEOUT 2000 +#define AT91_I2C_MAX_ALT_CMD_DATA_SIZE 256 /* AT91 TWI register definitions */ #define AT91_TWI_CR 0x0000 /* Control Register */ @@ -141,6 +142,7 @@ struct at91_twi_dev { unsigned twi_cwgr_reg; struct at91_twi_pdata *pdata; bool use_dma; + bool use_alt_cmd; bool recv_len_abort; u32 fifo_size; struct at91_twi_dma dma; @@ -269,7 +271,7 @@ static void at91_twi_write_next_byte(struct at91_twi_dev *dev) /* send stop when last byte has been written */ if (--dev->buf_len == 0) - if (!dev->pdata->has_alt_cmd) + if (!dev->use_alt_cmd) at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP); dev_dbg(dev->dev, "wrote 0x%x, to go %d\n", *dev->buf, dev->buf_len); @@ -292,7 +294,7 @@ static void at91_twi_write_data_dma_callback(void *data) * we just have to enable TXCOMP one. */ at91_twi_write(dev, AT91_TWI_IER, AT91_TWI_TXCOMP); - if (!dev->pdata->has_alt_cmd) + if (!dev->use_alt_cmd) at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP); } @@ -410,7 +412,7 @@ static void at91_twi_read_next_byte(struct at91_twi_dev *dev) } /* send stop if second but last byte has been read */ - if (!dev->pdata->has_alt_cmd && dev->buf_len == 1) + if (!dev->use_alt_cmd && dev->buf_len == 1) at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_STOP); dev_dbg(dev->dev, "read 0x%x, to go %d\n", *dev->buf, dev->buf_len); @@ -426,7 +428,7 @@ static void at91_twi_read_data_dma_callback(void *data) dma_unmap_single(dev->dev, sg_dma_address(&dev->dma.sg[0]), dev->buf_len, DMA_FROM_DEVICE); - if (!dev->pdata->has_alt_cmd) { + if (!dev->use_alt_cmd) { /* The last two bytes have to be read without using dma */ dev->buf += dev->buf_len - 2; dev->buf_len = 2; @@ -443,7 +445,7 @@ static void at91_twi_read_data_dma(struct at91_twi_dev *dev) struct dma_chan *chan_rx = dma->chan_rx; size_t buf_len; - buf_len = (dev->pdata->has_alt_cmd) ? dev->buf_len : dev->buf_len - 2; + buf_len = (dev->use_alt_cmd) ? dev->buf_len : dev->buf_len - 2; dma->direction = DMA_FROM_DEVICE; /* Keep in mind that we won't use dma to read the last two bytes */ @@ -651,7 +653,7 @@ static int at91_do_twi_transfer(struct at91_twi_dev *dev) unsigned start_flags = AT91_TWI_START; /* if only one byte is to be read, immediately stop transfer */ - if (!has_alt_cmd && dev->buf_len <= 1 && + if (!dev->use_alt_cmd && dev->buf_len <= 1 && !(dev->msg->flags & I2C_M_RECV_LEN)) start_flags |= AT91_TWI_STOP; at91_twi_write(dev, AT91_TWI_CR, start_flags); @@ -745,7 +747,7 @@ static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num) int ret; unsigned int_addr_flag = 0; struct i2c_msg *m_start = msg; - bool is_read, use_alt_cmd = false; + bool is_read; dev_dbg(&adap->dev, "at91_xfer: processing %d messages:\n", num); @@ -768,14 +770,16 @@ static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num) at91_twi_write(dev, AT91_TWI_IADR, internal_address); } + dev->use_alt_cmd = false; is_read = (m_start->flags & I2C_M_RD); if (dev->pdata->has_alt_cmd) { - if (m_start->len > 0) { + if (m_start->len > 0 && + m_start->len < AT91_I2C_MAX_ALT_CMD_DATA_SIZE) { at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_ACMEN); at91_twi_write(dev, AT91_TWI_ACR, AT91_TWI_ACR_DATAL(m_start->len) | ((is_read) ? AT91_TWI_ACR_DIR : 0)); - use_alt_cmd = true; + dev->use_alt_cmd = true; } else { at91_twi_write(dev, AT91_TWI_CR, AT91_TWI_ACMDIS); } @@ -784,7 +788,7 @@ static int at91_twi_xfer(struct i2c_adapter *adap, struct i2c_msg *msg, int num) at91_twi_write(dev, AT91_TWI_MMR, (m_start->addr << 16) | int_addr_flag | - ((!use_alt_cmd && is_read) ? AT91_TWI_MREAD : 0)); + ((!dev->use_alt_cmd && is_read) ? AT91_TWI_MREAD : 0)); dev->buf_len = m_start->len; dev->buf = m_start->buf; From 8d263be8716da67bd8746e266c1934cb9c9933cc Mon Sep 17 00:00:00 2001 From: Daniel Wagner <daniel.wagner@bmw-carit.de> Date: Wed, 3 Aug 2016 14:03:08 +0200 Subject: [PATCH 215/513] i2c: bcm-iproc: Use complete() instead of complete_all() There is only one waiter for the completion, therefore there is no need to use complete_all(). Let's make that clear by using complete() instead of complete_all(). The usage pattern of the completion is: bcm_iproc_i2c_xfer_single_msg() reinit_completion() ... (activate the transfer) ... wait_for_completion_timeout() Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de> Acked-by: Ray Jui <ray.jui@broadcom.com> Signed-off-by: Wolfram Sang <wsa@the-dreams.de> --- drivers/i2c/busses/i2c-bcm-iproc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c index 19c843828fe2c..95f7cac76f89b 100644 --- a/drivers/i2c/busses/i2c-bcm-iproc.c +++ b/drivers/i2c/busses/i2c-bcm-iproc.c @@ -158,7 +158,7 @@ static irqreturn_t bcm_iproc_i2c_isr(int irq, void *data) if (status & BIT(IS_M_START_BUSY_SHIFT)) { iproc_i2c->xfer_is_done = 1; - complete_all(&iproc_i2c->done); + complete(&iproc_i2c->done); } writel(status, iproc_i2c->base + IS_OFFSET); From 752c3899f97c469efea42b1ad6de66d18a21ce09 Mon Sep 17 00:00:00 2001 From: Daniel Wagner <daniel.wagner@bmw-carit.de> Date: Wed, 3 Aug 2016 14:03:09 +0200 Subject: [PATCH 216/513] i2c: bcm-kona: Use complete() instead of complete_all() There is only one waiter for the completion, therefore there is no need to use complete_all(). Let's make that clear by using complete() instead of complete_all(). The usage pattern of the completion is: bcm_kona_send_i2c_cmd() reinit_completion() ... bcm_kona_i2c_send_cmd_to_ctrl() ... wait_for_completion_timeout() Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de> Acked-by: Ray Jui <ray.jui@broadcom.com> Reviewed-by: Tim Kryger <tim.kryger@gmail.com> Signed-off-by: Wolfram Sang <wsa@the-dreams.de> --- drivers/i2c/busses/i2c-bcm-kona.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-bcm-kona.c b/drivers/i2c/busses/i2c-bcm-kona.c index ac9f47679c3a4..f98743277e3c7 100644 --- a/drivers/i2c/busses/i2c-bcm-kona.c +++ b/drivers/i2c/busses/i2c-bcm-kona.c @@ -229,7 +229,7 @@ static irqreturn_t bcm_kona_i2c_isr(int irq, void *devid) dev->base + TXFCR_OFFSET); writel(status & ~ISR_RESERVED_MASK, dev->base + ISR_OFFSET); - complete_all(&dev->done); + complete(&dev->done); return IRQ_HANDLED; } From fea03a6ab1d8e456c0319c194150621629504f80 Mon Sep 17 00:00:00 2001 From: Daniel Wagner <daniel.wagner@bmw-carit.de> Date: Wed, 3 Aug 2016 14:03:10 +0200 Subject: [PATCH 217/513] i2c: brcmstb: Use complete() instead of complete_all() There is only one waiter for the completion, therefore there is no need to use complete_all(). Let's make that clear by using complete() instead of complete_all(). The usage pattern of the completion is: brcmstb_send_i2c_cmd() reinit_completion() ... /* initiate transfer by setting iic_enable */ ... brcmstb_i2c_wait_for_completion() Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de> Reviewed-by: Kamal Dasu <kdasu.kdev@gmail.com> Signed-off-by: Wolfram Sang <wsa@the-dreams.de> --- drivers/i2c/busses/i2c-brcmstb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/i2c/busses/i2c-brcmstb.c b/drivers/i2c/busses/i2c-brcmstb.c index 3f5a4d71d3bf3..385b57bfcb386 100644 --- a/drivers/i2c/busses/i2c-brcmstb.c +++ b/drivers/i2c/busses/i2c-brcmstb.c @@ -228,7 +228,7 @@ static irqreturn_t brcmstb_i2c_isr(int irq, void *devid) return IRQ_NONE; brcmstb_i2c_enable_disable_irq(dev, INT_DISABLE); - complete_all(&dev->done); + complete(&dev->done); dev_dbg(dev->device, "isr handled"); return IRQ_HANDLED; From 0268263f0cf2239eb3a747936d8d36ef5f5e1688 Mon Sep 17 00:00:00 2001 From: Daniel Wagner <daniel.wagner@bmw-carit.de> Date: Wed, 3 Aug 2016 14:03:11 +0200 Subject: [PATCH 218/513] i2c: meson: Use complete() instead of complete_all() There is only one waiter for the completion, therefore there is no need to use complete_all(). Let's make that clear by using complete() instead of complete_all(). The usage pattern of the completion is: meson_i2c_xfer_msg() reinit_completion() ... /* Start the transfer */ ... wait_for_completion_timeout() Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de> Signed-off-by: Wolfram Sang <wsa@the-dreams.de> --- drivers/i2c/busses/i2c-meson.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/i2c/busses/i2c-meson.c b/drivers/i2c/busses/i2c-meson.c index 71d3929adf54e..76e28980904f0 100644 --- a/drivers/i2c/busses/i2c-meson.c +++ b/drivers/i2c/busses/i2c-meson.c @@ -211,7 +211,7 @@ static void meson_i2c_stop(struct meson_i2c *i2c) meson_i2c_add_token(i2c, TOKEN_STOP); } else { i2c->state = STATE_IDLE; - complete_all(&i2c->done); + complete(&i2c->done); } } @@ -238,7 +238,7 @@ static irqreturn_t meson_i2c_irq(int irqno, void *dev_id) dev_dbg(i2c->dev, "error bit set\n"); i2c->error = -ENXIO; i2c->state = STATE_IDLE; - complete_all(&i2c->done); + complete(&i2c->done); goto out; } @@ -269,7 +269,7 @@ static irqreturn_t meson_i2c_irq(int irqno, void *dev_id) break; case STATE_STOP: i2c->state = STATE_IDLE; - complete_all(&i2c->done); + complete(&i2c->done); break; case STATE_IDLE: break; From 52012619e5a2ca0491426c3712fb9054692d4a3c Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" <mst@redhat.com> Date: Sun, 14 Aug 2016 23:44:21 +0300 Subject: [PATCH 219/513] ringtest: test build fix Recent changes to ptr_ring broke the ringtest which lacks a likely() stub. Fix it up. Fixes: 982fb490c298896d15e9323a882f34a57c11ff56 ("ptr_ring: support zero length ring") Signed-off-by: Michael S. Tsirkin <mst@redhat.com> --- tools/virtio/ringtest/ptr_ring.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c index 68e4f9f0da3ab..bd2ad1d3b7a9e 100644 --- a/tools/virtio/ringtest/ptr_ring.c +++ b/tools/virtio/ringtest/ptr_ring.c @@ -13,6 +13,7 @@ #define cache_line_size() SMP_CACHE_BYTES #define ____cacheline_aligned_in_smp __attribute__ ((aligned (SMP_CACHE_BYTES))) #define unlikely(x) (__builtin_expect(!!(x), 0)) +#define likely(x) (__builtin_expect(!!(x), 1)) #define ALIGN(x, a) (((x) + (a) - 1) / (a) * (a)) typedef pthread_spinlock_t spinlock_t; From 21bc54fc0cdc31de72b57d2b3c79cf9c2b83cf39 Mon Sep 17 00:00:00 2001 From: Gerard Garcia <ggarcia@deic.uab.cat> Date: Wed, 10 Aug 2016 17:24:34 +0200 Subject: [PATCH 220/513] vhost/vsock: drop space available check for TX vq Remove unnecessary use of enable/disable callback notifications and the incorrect more space available check. The virtio_transport_tx_work handles when the TX virtqueue has more buffers available. Signed-off-by: Gerard Garcia <ggarcia@deic.uab.cat> Acked-by: Stefan Hajnoczi <stefanha@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> --- net/vmw_vsock/virtio_transport.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 699dfabdbccd5..936d7eee62d03 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -87,9 +87,6 @@ virtio_transport_send_pkt_work(struct work_struct *work) vq = vsock->vqs[VSOCK_VQ_TX]; - /* Avoid unnecessary interrupts while we're processing the ring */ - virtqueue_disable_cb(vq); - for (;;) { struct virtio_vsock_pkt *pkt; struct scatterlist hdr, buf, *sgs[2]; @@ -99,7 +96,6 @@ virtio_transport_send_pkt_work(struct work_struct *work) spin_lock_bh(&vsock->send_pkt_list_lock); if (list_empty(&vsock->send_pkt_list)) { spin_unlock_bh(&vsock->send_pkt_list_lock); - virtqueue_enable_cb(vq); break; } @@ -118,13 +114,13 @@ virtio_transport_send_pkt_work(struct work_struct *work) } ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, GFP_KERNEL); + /* Usually this means that there is no more space available in + * the vq + */ if (ret < 0) { spin_lock_bh(&vsock->send_pkt_list_lock); list_add(&pkt->list, &vsock->send_pkt_list); spin_unlock_bh(&vsock->send_pkt_list_lock); - - if (!virtqueue_enable_cb(vq) && ret == -ENOSPC) - continue; /* retry now that we have more space */ break; } From 446374d7c7f89603d8151b56824e2cac85ed8e0d Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" <mst@redhat.com> Date: Mon, 15 Aug 2016 04:28:12 +0300 Subject: [PATCH 221/513] vhost/test: fix after swiotlb changes Signed-off-by: Michael S. Tsirkin <mst@redhat.com> --- drivers/vhost/test.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 388eec4e1a90d..97fb2f8fa9304 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -220,20 +220,20 @@ static long vhost_test_reset_owner(struct vhost_test *n) { void *priv = NULL; long err; - struct vhost_memory *memory; + struct vhost_umem *umem; mutex_lock(&n->dev.mutex); err = vhost_dev_check_owner(&n->dev); if (err) goto done; - memory = vhost_dev_reset_owner_prepare(); - if (!memory) { + umem = vhost_dev_reset_owner_prepare(); + if (!umem) { err = -ENOMEM; goto done; } vhost_test_stop(n, &priv); vhost_test_flush(n); - vhost_dev_reset_owner(&n->dev, memory); + vhost_dev_reset_owner(&n->dev, umem); done: mutex_unlock(&n->dev.mutex); return err; From 6be3ffaa0e15c64f560904b025f5c50bef5886f9 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" <mst@redhat.com> Date: Mon, 15 Aug 2016 04:50:55 +0300 Subject: [PATCH 222/513] tools/virtio: add dma stubs Fixes build after recent IOMMU-related changes, mustly by adding more stubs. Signed-off-by: Michael S. Tsirkin <mst@redhat.com> --- tools/virtio/linux/dma-mapping.h | 16 ++++++++++++++++ tools/virtio/linux/kernel.h | 14 ++++++++++++++ tools/virtio/linux/slab.h | 4 ++++ tools/virtio/linux/virtio.h | 6 +++++- tools/virtio/linux/virtio_config.h | 13 +++++++++++++ 5 files changed, 52 insertions(+), 1 deletion(-) diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h index 4f93af89ae165..18601f6689b9e 100644 --- a/tools/virtio/linux/dma-mapping.h +++ b/tools/virtio/linux/dma-mapping.h @@ -14,4 +14,20 @@ enum dma_data_direction { DMA_NONE = 3, }; +#define dma_alloc_coherent(d, s, hp, f) ({ \ + void *__dma_alloc_coherent_p = kmalloc((s), (f)); \ + *(hp) = (unsigned long)__dma_alloc_coherent_p; \ + __dma_alloc_coherent_p; \ +}) + +#define dma_free_coherent(d, s, p, h) kfree(p) + +#define dma_map_page(d, p, o, s, dir) (page_to_phys(p) + (o)) + +#define dma_map_single(d, p, s, dir) (virt_to_phys(p)) +#define dma_mapping_error(...) (0) + +#define dma_unmap_single(...) do { } while (0) +#define dma_unmap_page(...) do { } while (0) + #endif diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h index 0338499482159..d9554fc3f3403 100644 --- a/tools/virtio/linux/kernel.h +++ b/tools/virtio/linux/kernel.h @@ -20,7 +20,9 @@ #define PAGE_SIZE getpagesize() #define PAGE_MASK (~(PAGE_SIZE-1)) +#define PAGE_ALIGN(x) ((x + PAGE_SIZE - 1) & PAGE_MASK) +typedef unsigned long long phys_addr_t; typedef unsigned long long dma_addr_t; typedef size_t __kernel_size_t; typedef unsigned int __wsum; @@ -57,6 +59,11 @@ static inline void *kzalloc(size_t s, gfp_t gfp) return p; } +static inline void *alloc_pages_exact(size_t s, gfp_t gfp) +{ + return kmalloc(s, gfp); +} + static inline void kfree(void *p) { if (p >= __kfree_ignore_start && p < __kfree_ignore_end) @@ -64,6 +71,11 @@ static inline void kfree(void *p) free(p); } +static inline void free_pages_exact(void *p, size_t s) +{ + kfree(p); +} + static inline void *krealloc(void *p, size_t s, gfp_t gfp) { return realloc(p, s); @@ -105,6 +117,8 @@ static inline void free_page(unsigned long addr) #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) +#define WARN_ON_ONCE(cond) ((cond) && fprintf (stderr, "WARNING\n")) + #define min(x, y) ({ \ typeof(x) _min1 = (x); \ typeof(y) _min2 = (y); \ diff --git a/tools/virtio/linux/slab.h b/tools/virtio/linux/slab.h index 81baeac8ae402..7e1c1197d4390 100644 --- a/tools/virtio/linux/slab.h +++ b/tools/virtio/linux/slab.h @@ -1,2 +1,6 @@ #ifndef LINUX_SLAB_H +#define GFP_KERNEL 0 +#define GFP_ATOMIC 0 +#define __GFP_NOWARN 0 +#define __GFP_ZERO 0 #endif diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h index ee125e714053a..9377c8b4ac167 100644 --- a/tools/virtio/linux/virtio.h +++ b/tools/virtio/linux/virtio.h @@ -3,8 +3,12 @@ #include <linux/scatterlist.h> #include <linux/kernel.h> +struct device { + void *parent; +}; + struct virtio_device { - void *dev; + struct device dev; u64 features; }; diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h index 57a6964a1e355..9ba11815e0a16 100644 --- a/tools/virtio/linux/virtio_config.h +++ b/tools/virtio/linux/virtio_config.h @@ -40,6 +40,19 @@ static inline void __virtio_clear_bit(struct virtio_device *vdev, #define virtio_has_feature(dev, feature) \ (__virtio_test_bit((dev), feature)) +/** + * virtio_has_iommu_quirk - determine whether this device has the iommu quirk + * @vdev: the device + */ +static inline bool virtio_has_iommu_quirk(const struct virtio_device *vdev) +{ + /* + * Note the reverse polarity of the quirk feature (compared to most + * other features), this is for compatibility with legacy systems. + */ + return !virtio_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM); +} + static inline bool virtio_is_little_endian(struct virtio_device *vdev) { return virtio_has_feature(vdev, VIRTIO_F_VERSION_1) || From 4cf0b354d92ee2c642532ee39e330f8f580fd985 Mon Sep 17 00:00:00 2001 From: Florian Westphal <fw@strlen.de> Date: Fri, 12 Aug 2016 12:03:52 +0200 Subject: [PATCH 223/513] rhashtable: avoid large lock-array allocations Sander reports following splat after netfilter nat bysrc table got converted to rhashtable: swapper/0: page allocation failure: order:3, mode:0x2084020(GFP_ATOMIC|__GFP_COMP) CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.8.0-rc1 [..] [<ffffffff811633ed>] warn_alloc_failed+0xdd/0x140 [<ffffffff811638b1>] __alloc_pages_nodemask+0x3e1/0xcf0 [<ffffffff811a72ed>] alloc_pages_current+0x8d/0x110 [<ffffffff8117cb7f>] kmalloc_order+0x1f/0x70 [<ffffffff811aec19>] __kmalloc+0x129/0x140 [<ffffffff8146d561>] bucket_table_alloc+0xc1/0x1d0 [<ffffffff8146da1d>] rhashtable_insert_rehash+0x5d/0xe0 [<ffffffff819fcfff>] nf_nat_setup_info+0x2ef/0x400 The failure happens when allocating the spinlock array. Even with GFP_KERNEL its unlikely for such a large allocation to succeed. Thomas Graf pointed me at inet_ehash_locks_alloc(), so in addition to adding NOWARN for atomic allocations this also makes the bucket-array sizing more conservative. In commit 095dc8e0c3686 ("tcp: fix/cleanup inet_ehash_locks_alloc()"), Eric Dumazet says: "Budget 2 cache lines per cpu worth of 'spinlocks'". IOW, consider size needed by a single spinlock when determining number of locks per cpu. So with 64 byte per cacheline and 4 byte per spinlock this gives 32 locks per cpu. Resulting size of the lock-array (sizeof(spinlock) == 4): cpus: 1 2 4 8 16 32 64 old: 1k 1k 4k 8k 16k 16k 16k new: 128 256 512 1k 2k 4k 8k 8k allocation should have decent chance of success even with GFP_ATOMIC, and should not fail with GFP_KERNEL. With 72-byte spinlock (LOCKDEP): cpus : 1 2 old: 9k 18k new: ~2k ~4k Reported-by: Sander Eikelenboom <linux@eikelenboom.it> Suggested-by: Thomas Graf <tgraf@suug.ch> Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: David S. Miller <davem@davemloft.net> --- lib/rhashtable.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 5d845ffd79827..42acd81f10dbc 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -30,7 +30,7 @@ #define HASH_DEFAULT_SIZE 64UL #define HASH_MIN_SIZE 4U -#define BUCKET_LOCKS_PER_CPU 128UL +#define BUCKET_LOCKS_PER_CPU 32UL static u32 head_hashfn(struct rhashtable *ht, const struct bucket_table *tbl, @@ -70,7 +70,7 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl, unsigned int nr_pcpus = num_possible_cpus(); #endif - nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL); + nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL); size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul); /* Never allocate more than 0.5 locks per bucket */ @@ -83,6 +83,9 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl, tbl->locks = vmalloc(size * sizeof(spinlock_t)); else #endif + if (gfp != GFP_KERNEL) + gfp |= __GFP_NOWARN | __GFP_NORETRY; + tbl->locks = kmalloc_array(size, sizeof(spinlock_t), gfp); if (!tbl->locks) From eb8fc32354aa77678dc6e7950a8f0c79cace204f Mon Sep 17 00:00:00 2001 From: Vincent <vincent.stehle@laposte.net> Date: Sun, 14 Aug 2016 15:38:29 +0200 Subject: [PATCH 224/513] mlxsw: spectrum_router: Fix use after free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In mlxsw_sp_router_fib4_add_info_destroy(), the fib_entry pointer is used after it has been freed by mlxsw_sp_fib_entry_destroy(). Use a temporary variable to fix this. Fixes: 61c503f976b5449e ("mlxsw: spectrum_router: Implement fib4 add/del switchdev obj ops") Signed-off-by: Vincent Stehlé <vincent.stehle@laposte.net> Cc: Jiri Pirko <jiri@mellanox.com> Acked-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 81418d6292316..90bb93b037ecc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -1651,9 +1651,10 @@ static void mlxsw_sp_router_fib4_add_info_destroy(void const *data) const struct mlxsw_sp_router_fib4_add_info *info = data; struct mlxsw_sp_fib_entry *fib_entry = info->fib_entry; struct mlxsw_sp *mlxsw_sp = info->mlxsw_sp; + struct mlxsw_sp_vr *vr = fib_entry->vr; mlxsw_sp_fib_entry_destroy(fib_entry); - mlxsw_sp_vr_put(mlxsw_sp, fib_entry->vr); + mlxsw_sp_vr_put(mlxsw_sp, vr); kfree(info); } From d9853490176c88fff71d03ec9a174b77011f5026 Mon Sep 17 00:00:00 2001 From: Lucas Stach <l.stach@pengutronix.de> Date: Thu, 28 Jul 2016 11:50:48 +0200 Subject: [PATCH 225/513] drm/etnaviv: take GPU lock later in the submit process Both the fence and event alloc are safe to be done without holding the GPU lock, as they either don't need any locking (fences) or are protected by their own lock (events). This solves a bad locking interaction between the submit path and the recover worker. If userspace manages to exhaust all available events while the GPU is hung, the submit will wait for events to become available holding the GPU lock. The recover worker waits for this lock to become available before trying to recover the GPU which frees up the allocated events. Essentially both paths are deadlocked until the submit path times out waiting for available events, failing the submit that could otherwise be handled just fine if the recover worker had the chance to bring the GPU back in a working state. Signed-off-by: Lucas Stach <l.stach@pengutronix.de> Reviewed-by: Christian Gmeiner <christian.gmeiner@gmail.com> --- drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c index 87ef34150d466..b382cf505262b 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c @@ -1333,8 +1333,6 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, if (ret < 0) return ret; - mutex_lock(&gpu->lock); - /* * TODO * @@ -1348,16 +1346,18 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, if (unlikely(event == ~0U)) { DRM_ERROR("no free event\n"); ret = -EBUSY; - goto out_unlock; + goto out_pm_put; } fence = etnaviv_gpu_fence_alloc(gpu); if (!fence) { event_free(gpu, event); ret = -ENOMEM; - goto out_unlock; + goto out_pm_put; } + mutex_lock(&gpu->lock); + gpu->event[event].fence = fence; submit->fence = fence->seqno; gpu->active_fence = submit->fence; @@ -1395,9 +1395,9 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, hangcheck_timer_reset(gpu); ret = 0; -out_unlock: mutex_unlock(&gpu->lock); +out_pm_put: etnaviv_gpu_pm_put(gpu); return ret; From 802934b2cfde463b72cc1b9bc1c081895a90be53 Mon Sep 17 00:00:00 2001 From: Mike Snitzer <snitzer@redhat.com> Date: Fri, 5 Aug 2016 12:29:06 -0400 Subject: [PATCH 226/513] dm round robin: do not use this_cpu_ptr() without having preemption disabled Use local_irq_save() to disable preemption before calling this_cpu_ptr(). Reported-by: Benjamin Block <bblock@linux.vnet.ibm.com> Fixes: b0b477c7e0dd ("dm round robin: use percpu 'repeat_count' and 'current_path'") Cc: stable@vger.kernel.org # 4.6+ Suggested-by: Jens Axboe <axboe@kernel.dk> Signed-off-by: Mike Snitzer <snitzer@redhat.com> --- drivers/md/dm-round-robin.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/md/dm-round-robin.c b/drivers/md/dm-round-robin.c index 4ace1da17db8f..6c25213ab38c8 100644 --- a/drivers/md/dm-round-robin.c +++ b/drivers/md/dm-round-robin.c @@ -210,14 +210,17 @@ static struct dm_path *rr_select_path(struct path_selector *ps, size_t nr_bytes) struct path_info *pi = NULL; struct dm_path *current_path = NULL; + local_irq_save(flags); current_path = *this_cpu_ptr(s->current_path); if (current_path) { percpu_counter_dec(&s->repeat_count); - if (percpu_counter_read_positive(&s->repeat_count) > 0) + if (percpu_counter_read_positive(&s->repeat_count) > 0) { + local_irq_restore(flags); return current_path; + } } - spin_lock_irqsave(&s->lock, flags); + spin_lock(&s->lock); if (!list_empty(&s->valid_paths)) { pi = list_entry(s->valid_paths.next, struct path_info, list); list_move_tail(&pi->list, &s->valid_paths); From 0a83df6c8cacafbefc5b56b2fbcb92b0d75b744b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka <mpatocka@redhat.com> Date: Fri, 15 Jul 2016 17:30:20 -0400 Subject: [PATCH 227/513] dm crypt: increase mempool reserve to better support swapping Increase mempool size from 16 to 64 entries. This increase improves swap on dm-crypt performance. When swapping to dm-crypt, all available memory is temporarily exhausted and dm-crypt can only use the mempool reserve. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com> --- drivers/md/dm-crypt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 4e9784b4e0ac6..eedba67b0e3ef 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -181,7 +181,7 @@ struct crypt_config { u8 key[0]; }; -#define MIN_IOS 16 +#define MIN_IOS 64 static void clone_init(struct dm_crypt_io *, struct bio *); static void kcryptd_queue_crypt(struct dm_crypt_io *io); From add125054b8727103631dce116361668436ef6a7 Mon Sep 17 00:00:00 2001 From: Gavin Li <git@thegavinli.com> Date: Fri, 12 Aug 2016 00:52:56 -0700 Subject: [PATCH 228/513] cdc-acm: fix wrong pipe type on rx interrupt xfers This fixes the "BOGUS urb xfer" warning logged by usb_submit_urb(). Signed-off-by: Gavin Li <git@thegavinli.com> Acked-by: Oliver Neukum <oneukum@suse.com> Cc: stable <stable@vger.kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/usb/class/cdc-acm.c | 5 ++--- drivers/usb/class/cdc-acm.h | 1 - 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 71912301ef7f8..0f3f62e81e5b2 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1354,7 +1354,6 @@ static int acm_probe(struct usb_interface *intf, spin_lock_init(&acm->write_lock); spin_lock_init(&acm->read_lock); mutex_init(&acm->mutex); - acm->rx_endpoint = usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress); acm->is_int_ep = usb_endpoint_xfer_int(epread); if (acm->is_int_ep) acm->bInterval = epread->bInterval; @@ -1394,14 +1393,14 @@ static int acm_probe(struct usb_interface *intf, urb->transfer_dma = rb->dma; if (acm->is_int_ep) { usb_fill_int_urb(urb, acm->dev, - acm->rx_endpoint, + usb_rcvintpipe(usb_dev, epread->bEndpointAddress), rb->base, acm->readsize, acm_read_bulk_callback, rb, acm->bInterval); } else { usb_fill_bulk_urb(urb, acm->dev, - acm->rx_endpoint, + usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress), rb->base, acm->readsize, acm_read_bulk_callback, rb); diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h index 05ce308d5d2af..1f1eabfd84628 100644 --- a/drivers/usb/class/cdc-acm.h +++ b/drivers/usb/class/cdc-acm.h @@ -96,7 +96,6 @@ struct acm { struct acm_rb read_buffers[ACM_NR]; struct acm_wb *putbuffer; /* for acm_tty_put_char() */ int rx_buflimit; - int rx_endpoint; spinlock_t read_lock; int write_used; /* number of non-empty write buffers */ int transmitting; From f6b6a28e2dbc401416ff12f775d75281c9b41918 Mon Sep 17 00:00:00 2001 From: Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com> Date: Fri, 29 Jul 2016 16:15:18 -0300 Subject: [PATCH 229/513] nvme: Prevent controller state invalid transition Acquiring the nvme_ctrl lock before reading ctrl->state in nvme_change_ctrl_state() should prevent a theoretical invalid state transition, in the event of two threads racing inside that function. I haven't been able to observe this happening with the current code, and the current state machine seems to be simple enough to not be affected by these invalid transitions, but future modifications could make it more likely to happen. Signed-off-by: Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com> Reviewed-by: Sagi Grimberg <sag@grimberg.me> Reviewed-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Jens Axboe <axboe@fb.com> --- drivers/nvme/host/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 7ff2e820bbf47..7f75d661237f6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -81,10 +81,12 @@ EXPORT_SYMBOL_GPL(nvme_cancel_request); bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, enum nvme_ctrl_state new_state) { - enum nvme_ctrl_state old_state = ctrl->state; + enum nvme_ctrl_state old_state; bool changed = false; spin_lock_irq(&ctrl->lock); + + old_state = ctrl->state; switch (new_state) { case NVME_CTRL_LIVE: switch (old_state) { @@ -140,11 +142,12 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, default: break; } - spin_unlock_irq(&ctrl->lock); if (changed) ctrl->state = new_state; + spin_unlock_irq(&ctrl->lock); + return changed; } EXPORT_SYMBOL_GPL(nvme_change_ctrl_state); From 49a7f01064a94c4fd6afb6b4f7e3ccc37d0edf99 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo <acme@redhat.com> Date: Sat, 13 Aug 2016 01:12:10 -0300 Subject: [PATCH 230/513] perf jitdump: Add the right header to get the major()/minor() definitions Noticed on Fedora Rawhide: $ gcc --version gcc (GCC) 6.1.1 20160721 (Red Hat 6.1.1-4) $ rpm -q glibc glibc-2.24.90-1.fc26.x86_64 $ CC /tmp/build/perf/util/jitdump.o util/jitdump.c: In function 'jit_repipe_code_load': util/jitdump.c:428:2: error: '__major_from_sys_types' is deprecated: In the GNU C Library, `major' is defined by <sys/sysmacros.h>. For historical compatibility, it is currently defined by <sys/types.h> as well, but we plan to remove this soon. To use `major', include <sys/sysmacros.h> directly. If you did not intend to use a system-defined macro `major', you should #undef it after including <sys/types.h>. [-Werror=deprecated-declarations] event->mmap2.maj = major(st.st_dev); ^~~~~ In file included from /usr/include/features.h:397:0, from /usr/include/sys/types.h:25, from util/jitdump.c:1: /usr/include/sys/sysmacros.h:87:1: note: declared here __SYSMACROS_DEFINE_MAJOR (__SYSMACROS_FST_IMPL_TEMPL) Fix it following that recomendation. Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Stephane Eranian <eranian@google.com> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/n/tip-3majvd0adhfr25rvx4v5e9te@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/jitdump.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c index 9f3305f6b6d58..95f0884aae028 100644 --- a/tools/perf/util/jitdump.c +++ b/tools/perf/util/jitdump.c @@ -1,3 +1,4 @@ +#include <sys/sysmacros.h> #include <sys/types.h> #include <stdio.h> #include <stdlib.h> From 34276bb062b8449b3b0a208c9b848a1a27920075 Mon Sep 17 00:00:00 2001 From: Lucas Stach <l.stach@pengutronix.de> Date: Mon, 15 Aug 2016 14:58:43 +0200 Subject: [PATCH 231/513] of: fix reference counting in of_graph_get_endpoint_by_regs The called of_graph_get_next_endpoint() already decrements the refcount of the prev node, so it is wrong to do it again in the calling function. Use the for_each_endpoint_of_node() helper to interate through the endpoint OF nodes, which already does the right thing and simplifies the code a bit. Fixes: 8ccd0d0ca041 (of: add helper for getting endpoint node of specific identifiers) Cc: stable@vger.kernel.org Reported-by: David Jander <david@protonic.nl> Signed-off-by: Lucas Stach <l.stach@pengutronix.de> Acked-by: Philipp Zabel <p.zabel@pengutronix.de> Signed-off-by: Rob Herring <robh@kernel.org> --- drivers/of/base.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/of/base.c b/drivers/of/base.c index cb255a07baa0f..3ce69536a7b3c 100644 --- a/drivers/of/base.c +++ b/drivers/of/base.c @@ -2342,20 +2342,13 @@ struct device_node *of_graph_get_endpoint_by_regs( const struct device_node *parent, int port_reg, int reg) { struct of_endpoint endpoint; - struct device_node *node, *prev_node = NULL; - - while (1) { - node = of_graph_get_next_endpoint(parent, prev_node); - of_node_put(prev_node); - if (!node) - break; + struct device_node *node = NULL; + for_each_endpoint_of_node(parent, node) { of_graph_parse_endpoint(node, &endpoint); if (((port_reg == -1) || (endpoint.port == port_reg)) && ((reg == -1) || (endpoint.id == reg))) return node; - - prev_node = node; } return NULL; From 88ded4d8d94a550624e1827478e13fecf97a7b0a Mon Sep 17 00:00:00 2001 From: He Kuang <hekuang@huawei.com> Date: Thu, 4 Aug 2016 11:25:42 +0000 Subject: [PATCH 232/513] perf script: Show proper message when failed list scripts Perf shows the usage message when perf scripts folder failed to open, which misleads users to let them think the command is being mistyped. This patch shows a proper message and guides users to check the PERF_EXEC_PATH environment variable in that case. Before: $ perf script --list Usage: perf script [<options>] or: perf script [<options>] record <script> [<record-options>] <command> or: perf script [<options>] report <script> [script-args] or: perf script [<options>] <script> [<record-options>] <command> or: perf script [<options>] <top-script> [script-args] -l, --list list available scripts After: $ perf script --list open(/home/user/perf-core/scripts) failed. Check for "PERF_EXEC_PATH" env to set scripts dir. Signed-off-by: He Kuang <hekuang@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/r/1470309943-153909-1-git-send-email-hekuang@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/builtin-script.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 9c640a8081c70..45c51eb6cab47 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1690,8 +1690,13 @@ static int list_available_scripts(const struct option *opt __maybe_unused, snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path()); scripts_dir = opendir(scripts_path); - if (!scripts_dir) - return -1; + if (!scripts_dir) { + fprintf(stdout, + "open(%s) failed.\n" + "Check \"PERF_EXEC_PATH\" env to set scripts dir.\n", + scripts_path); + exit(-1); + } for_each_lang(scripts_path, scripts_dir, lang_dirent) { snprintf(lang_path, MAXPATHLEN, "%s/%s/bin", scripts_path, From 71ac899b5ed7edfd8fa2a4e075194380b1de2d7f Mon Sep 17 00:00:00 2001 From: He Kuang <hekuang@huawei.com> Date: Thu, 4 Aug 2016 11:25:43 +0000 Subject: [PATCH 233/513] perf script: Don't disable use_callchain if input is pipe Because perf data from pipe do not have a header with evsel attr, we should not check that and disable symbol_conf.use_callchain. Otherwise, perf script won't show callchains even if the data stream contains callchain. Before: $ perf record -g -o - uname |perf script Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.000 MB - ] uname 1828 182630.186578: 250000 cpu-clock: ..b9499 setup_arg_pages uname 1828 182630.186850: 250000 cpu-clock: ..83b20 ___might_sleep uname 1828 182630.187153: 250000 cpu-clock: ..4b6be file_map_prot_ch ... After: $ perf record -g -o - uname |perf script Linux [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.000 MB - ] uname 1833 182675.927099: 250000 cpu-clock: ba5520 _raw_spin_lock+0xfe200040 ([kernel.kallsyms]) 389dd4 expand_downwards+0xfe200154 ([kernel.kallsyms]) 389f34 expand_stack+0xfe200024 ([kernel.kallsyms]) 3b957e setup_arg_pages+0xfe20019e ([kernel.kallsyms]) 40c80f load_elf_binary+0xfe20042f ([kernel.kallsyms]) ... Signed-off-by: He Kuang <hekuang@huawei.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/r/1470309943-153909-2-git-send-email-hekuang@huawei.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/builtin-script.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 45c51eb6cab47..c859e59dfe3e7 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -371,14 +371,16 @@ static int perf_session__check_output_opt(struct perf_session *session) if (!no_callchain) { bool use_callchain = false; + bool not_pipe = false; evlist__for_each_entry(session->evlist, evsel) { + not_pipe = true; if (evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) { use_callchain = true; break; } } - if (!use_callchain) + if (not_pipe && !use_callchain) symbol_conf.use_callchain = false; } From 12311959ecf8a3a64676c01b62ce67a0c5f0fd49 Mon Sep 17 00:00:00 2001 From: Vegard Nossum <vegard.nossum@oracle.com> Date: Fri, 12 Aug 2016 20:10:44 +0200 Subject: [PATCH 234/513] rhashtable: fix shift by 64 when shrinking I got this: ================================================================================ UBSAN: Undefined behaviour in ./include/linux/log2.h:63:13 shift exponent 64 is too large for 64-bit type 'long unsigned int' CPU: 1 PID: 721 Comm: kworker/1:1 Not tainted 4.8.0-rc1+ #87 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014 Workqueue: events rht_deferred_worker 0000000000000000 ffff88011661f8d8 ffffffff82344f50 0000000041b58ab3 ffffffff84f98000 ffffffff82344ea4 ffff88011661f900 ffff88011661f8b0 0000000000000001 ffff88011661f6b8 dffffc0000000000 ffffffff867f7640 Call Trace: [<ffffffff82344f50>] dump_stack+0xac/0xfc [<ffffffff82344ea4>] ? _atomic_dec_and_lock+0xc4/0xc4 [<ffffffff8242f5b8>] ubsan_epilogue+0xd/0x8a [<ffffffff82430c41>] __ubsan_handle_shift_out_of_bounds+0x255/0x29a [<ffffffff824309ec>] ? __ubsan_handle_out_of_bounds+0x180/0x180 [<ffffffff84003436>] ? nl80211_req_set_reg+0x256/0x2f0 [<ffffffff812112ba>] ? print_context_stack+0x8a/0x160 [<ffffffff81200031>] ? amd_pmu_reset+0x341/0x380 [<ffffffff823af808>] rht_deferred_worker+0x1618/0x1790 [<ffffffff823af808>] ? rht_deferred_worker+0x1618/0x1790 [<ffffffff823ae1f0>] ? rhashtable_jhash2+0x370/0x370 [<ffffffff8134c12d>] ? process_one_work+0x6fd/0x1970 [<ffffffff8134c1cf>] process_one_work+0x79f/0x1970 [<ffffffff8134c12d>] ? process_one_work+0x6fd/0x1970 [<ffffffff8134ba30>] ? try_to_grab_pending+0x4c0/0x4c0 [<ffffffff8134d564>] ? worker_thread+0x1c4/0x1340 [<ffffffff8134d8ff>] worker_thread+0x55f/0x1340 [<ffffffff845e904f>] ? __schedule+0x4df/0x1d40 [<ffffffff8134d3a0>] ? process_one_work+0x1970/0x1970 [<ffffffff8134d3a0>] ? process_one_work+0x1970/0x1970 [<ffffffff813642f7>] kthread+0x237/0x390 [<ffffffff813640c0>] ? __kthread_parkme+0x280/0x280 [<ffffffff845f8c93>] ? _raw_spin_unlock_irq+0x33/0x50 [<ffffffff845f95df>] ret_from_fork+0x1f/0x40 [<ffffffff813640c0>] ? __kthread_parkme+0x280/0x280 ================================================================================ roundup_pow_of_two() is undefined when called with an argument of 0, so let's avoid the call and just fall back to ht->p.min_size (which should never be smaller than HASH_MIN_SIZE). Cc: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com> Acked-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net> --- lib/rhashtable.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 42acd81f10dbc..5ba520b544d73 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -324,12 +324,14 @@ static int rhashtable_expand(struct rhashtable *ht) static int rhashtable_shrink(struct rhashtable *ht) { struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); - unsigned int size; + unsigned int nelems = atomic_read(&ht->nelems); + unsigned int size = 0; int err; ASSERT_RHT_MUTEX(ht); - size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2); + if (nelems) + size = roundup_pow_of_two(nelems * 3 / 2); if (size < ht->p.min_size) size = ht->p.min_size; From 5e457896986e16c440c97bb94b9ccd95dd157292 Mon Sep 17 00:00:00 2001 From: Lorenzo Colitti <lorenzo@google.com> Date: Sat, 13 Aug 2016 01:13:38 +0900 Subject: [PATCH 235/513] net: ipv6: Fix ping to link-local addresses. ping_v6_sendmsg does not set flowi6_oif in response to sin6_scope_id or sk_bound_dev_if, so it is not possible to use these APIs to ping an IPv6 address on a different interface. Instead, it sets flowi6_iif, which is incorrect but harmless. Stop setting flowi6_iif, and support various ways of setting oif in the same priority order used by udpv6_sendmsg. Tested: https://android-review.googlesource.com/#/c/254470/ Signed-off-by: Lorenzo Colitti <lorenzo@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/ipv6/ping.c | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index fed40d1ec29b1..0900352c924c1 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -55,7 +55,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) struct icmp6hdr user_icmph; int addr_type; struct in6_addr *daddr; - int iif = 0; + int oif = 0; struct flowi6 fl6; int err; struct dst_entry *dst; @@ -78,25 +78,30 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (u->sin6_family != AF_INET6) { return -EAFNOSUPPORT; } - if (sk->sk_bound_dev_if && - sk->sk_bound_dev_if != u->sin6_scope_id) { - return -EINVAL; - } daddr = &(u->sin6_addr); - iif = u->sin6_scope_id; + if (__ipv6_addr_needs_scope_id(ipv6_addr_type(daddr))) + oif = u->sin6_scope_id; } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; daddr = &sk->sk_v6_daddr; } - if (!iif) - iif = sk->sk_bound_dev_if; + if (!oif) + oif = sk->sk_bound_dev_if; + + if (!oif) + oif = np->sticky_pktinfo.ipi6_ifindex; + + if (!oif && ipv6_addr_is_multicast(daddr)) + oif = np->mcast_oif; + else if (!oif) + oif = np->ucast_oif; addr_type = ipv6_addr_type(daddr); - if (__ipv6_addr_needs_scope_id(addr_type) && !iif) - return -EINVAL; - if (addr_type & IPV6_ADDR_MAPPED) + if ((__ipv6_addr_needs_scope_id(addr_type) && !oif) || + (addr_type & IPV6_ADDR_MAPPED) || + (oif && sk->sk_bound_dev_if && oif != sk->sk_bound_dev_if)) return -EINVAL; /* TODO: use ip6_datagram_send_ctl to get options from cmsg */ @@ -106,16 +111,12 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl6.flowi6_proto = IPPROTO_ICMPV6; fl6.saddr = np->saddr; fl6.daddr = *daddr; + fl6.flowi6_oif = oif; fl6.flowi6_mark = sk->sk_mark; fl6.fl6_icmp_type = user_icmph.icmp6_type; fl6.fl6_icmp_code = user_icmph.icmp6_code; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) - fl6.flowi6_oif = np->mcast_oif; - else if (!fl6.flowi6_oif) - fl6.flowi6_oif = np->ucast_oif; - ipc6.tclass = np->tclass; fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel); From 50de1a0c54cdbc69a6dbcbc323f53daf95a4050e Mon Sep 17 00:00:00 2001 From: Anton Blanchard <anton@samba.org> Date: Sat, 13 Aug 2016 11:55:33 +1000 Subject: [PATCH 236/513] perf symbols: Fix annotation of objects with debuginfo files Commit 73cdf0c6ea9c ("perf symbols: Record text offset in dso to calculate objdump address") started storing the offset of the text section for all DSOs: if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL)) dso->text_offset = tshdr.sh_addr - tshdr.sh_offset; Unfortunately this breaks debuginfo files, because we need to calculate the offset of the text section in the associated executable file. As a result perf annotate returns junk for all debuginfo files. Fix this by using runtime_ss->elf which should point at the executable when parsing a debuginfo file. Signed-off-by: Anton Blanchard <anton@samba.org> Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Tested-by: Wang Nan <wangnan0@huawei.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com> Cc: stable@vger.kernel.org # v4.6+ Fixes: 73cdf0c6ea9c ("perf symbols: Record text offset in dso to calculate objdump address") Link: http://lkml.kernel.org/r/20160813115533.6de17912@kryten Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/symbol-elf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index a34321e9b44d8..a811c13a74d66 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -837,7 +837,8 @@ int dso__load_sym(struct dso *dso, struct map *map, sec = syms_ss->symtab; shdr = syms_ss->symshdr; - if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL)) + if (elf_section_by_name(runtime_ss->elf, &runtime_ss->ehdr, &tshdr, + ".text", NULL)) dso->text_offset = tshdr.sh_addr - tshdr.sh_offset; if (runtime_ss->opdsec) From 0325862dc364d8af524bf2db53ef4360ed55b989 Mon Sep 17 00:00:00 2001 From: Colin Ian King <colin.king@canonical.com> Date: Fri, 12 Aug 2016 22:44:56 +0100 Subject: [PATCH 237/513] perf probe: Check for dup and fdopen failures dup and fdopen can potentially fail, so add some extra error handling checks rather than assuming they always work. Signed-off-by: Colin King <colin.king@canonical.com> Acked-by: Masami Hiramatsu <mhiramat@kernel.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/r/1471038296-12956-1-git-send-email-colin.king@canonical.com [ Free resources when those functions (now being verified) fail ] Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/probe-file.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 9aed9c332da65..a8e76233c0883 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -133,7 +133,7 @@ int probe_file__open_both(int *kfd, int *ufd, int flag) /* Get raw string list of current kprobe_events or uprobe_events */ struct strlist *probe_file__get_rawlist(int fd) { - int ret, idx; + int ret, idx, fddup; FILE *fp; char buf[MAX_CMDLEN]; char *p; @@ -144,7 +144,14 @@ struct strlist *probe_file__get_rawlist(int fd) sl = strlist__new(NULL, NULL); - fp = fdopen(dup(fd), "r"); + fddup = dup(fd); + if (fddup < 0) + goto out_free_sl; + + fp = fdopen(fddup, "r"); + if (!fp) + goto out_close_fddup; + while (!feof(fp)) { p = fgets(buf, MAX_CMDLEN, fp); if (!p) @@ -163,6 +170,12 @@ struct strlist *probe_file__get_rawlist(int fd) fclose(fp); return sl; + +out_close_fddup: + close(fddup); +out_free_sl: + strlist__delete(sl); + return NULL; } static struct strlist *__probe_file__get_namelist(int fd, bool include_group) @@ -447,10 +460,13 @@ static int probe_cache__load(struct probe_cache *pcache) { struct probe_cache_entry *entry = NULL; char buf[MAX_CMDLEN], *p; - int ret = 0; + int ret = 0, fddup; FILE *fp; - fp = fdopen(dup(pcache->fd), "r"); + fddup = dup(pcache->fd); + if (fddup < 0) + return -errno; + fp = fdopen(fddup, "r"); if (!fp) return -EINVAL; From 3d7b33209201cbfa090d614db993571ca3c6b090 Mon Sep 17 00:00:00 2001 From: Simon Horman <simon.horman@netronome.com> Date: Mon, 15 Aug 2016 13:06:24 +0200 Subject: [PATCH 238/513] gre: set inner_protocol on xmit Ensure that the inner_protocol is set on transmit so that GSO segmentation, which relies on that field, works correctly. This is achieved by setting the inner_protocol in gre_build_header rather than each caller of that function. It ensures that the inner_protocol is set when gre_fb_xmit() is used to transmit GRE which was not previously the case. I have observed this is not the case when OvS transmits GRE using lwtunnel metadata (which it always does). Fixes: 38720352412a ("gre: Use inner_proto to obtain inner header protocol") Cc: Pravin Shelar <pshelar@ovn.org> Acked-by: Alexander Duyck <alexander.h.duyck@intel.com> Signed-off-by: Simon Horman <simon.horman@netronome.com> Acked-by: Pravin B Shelar <pshelar@ovn.org> Signed-off-by: David S. Miller <davem@davemloft.net> --- include/net/gre.h | 1 + net/ipv4/ip_gre.c | 1 - net/ipv6/ip6_gre.c | 2 -- 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/include/net/gre.h b/include/net/gre.h index 7a54a31d1d4cf..73ea256eb7d79 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -104,6 +104,7 @@ static inline void gre_build_header(struct sk_buff *skb, int hdr_len, skb_push(skb, hdr_len); + skb_set_inner_protocol(skb, proto); skb_reset_transport_header(skb); greh = (struct gre_base_hdr *)skb->data; greh->flags = gre_tnl_flags_to_gre_flags(flags); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 5b1481be02821..113cc43df789a 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -370,7 +370,6 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, tunnel->parms.o_flags, proto, tunnel->parms.o_key, htonl(tunnel->o_seqno)); - skb_set_inner_protocol(skb, proto); ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol); } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 776d145113e13..704274cbd4958 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -519,8 +519,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno)); - skb_set_inner_protocol(skb, protocol); - return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu, NEXTHDR_GRE); } From f9a7da9130ef0143eb900794c7863dc5c9051fbc Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov <vkuznets@redhat.com> Date: Mon, 15 Aug 2016 17:48:39 +0200 Subject: [PATCH 239/513] hv_netvsc: don't lose VF information struct netvsc_device is not suitable for storing VF information as this structure is being destroyed on MTU change / set channel operation (see rndis_filter_device_remove()). Move all VF related stuff to struct net_device_context which is persistent. Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> Acked-by: Haiyang Zhang <haiyangz@microsoft.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/hyperv/hyperv_net.h | 19 +++++++------ drivers/net/hyperv/netvsc.c | 19 ++++++------- drivers/net/hyperv/netvsc_drv.c | 49 ++++++++++++++++++--------------- 3 files changed, 45 insertions(+), 42 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 467fb8b4d0838..3b3ecf237a125 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -647,7 +647,7 @@ struct netvsc_reconfig { struct garp_wrk { struct work_struct dwrk; struct net_device *netdev; - struct netvsc_device *netvsc_dev; + struct net_device_context *net_device_ctx; }; /* The context of the netvsc device */ @@ -678,6 +678,15 @@ struct net_device_context { /* the device is going away */ bool start_remove; + + /* State to manage the associated VF interface. */ + struct net_device *vf_netdev; + bool vf_inject; + atomic_t vf_use_cnt; + /* 1: allocated, serial number is valid. 0: not allocated */ + u32 vf_alloc; + /* Serial number of the VF to team with */ + u32 vf_serial; }; /* Per netvsc device */ @@ -733,15 +742,7 @@ struct netvsc_device { u32 max_pkt; /* max number of pkt in one send, e.g. 8 */ u32 pkt_align; /* alignment bytes, e.g. 8 */ - /* 1: allocated, serial number is valid. 0: not allocated */ - u32 vf_alloc; - /* Serial number of the VF to team with */ - u32 vf_serial; atomic_t open_cnt; - /* State to manage the associated VF interface. */ - bool vf_inject; - struct net_device *vf_netdev; - atomic_t vf_use_cnt; }; static inline struct netvsc_device * diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 20e09174ff624..410fb8e81376f 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -77,13 +77,9 @@ static struct netvsc_device *alloc_net_device(void) init_waitqueue_head(&net_device->wait_drain); net_device->destroy = false; atomic_set(&net_device->open_cnt, 0); - atomic_set(&net_device->vf_use_cnt, 0); net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT; net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT; - net_device->vf_netdev = NULL; - net_device->vf_inject = false; - return net_device; } @@ -1106,16 +1102,16 @@ static void netvsc_send_table(struct hv_device *hdev, nvscdev->send_table[i] = tab[i]; } -static void netvsc_send_vf(struct netvsc_device *nvdev, +static void netvsc_send_vf(struct net_device_context *net_device_ctx, struct nvsp_message *nvmsg) { - nvdev->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated; - nvdev->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial; + net_device_ctx->vf_alloc = nvmsg->msg.v4_msg.vf_assoc.allocated; + net_device_ctx->vf_serial = nvmsg->msg.v4_msg.vf_assoc.serial; } static inline void netvsc_receive_inband(struct hv_device *hdev, - struct netvsc_device *nvdev, - struct nvsp_message *nvmsg) + struct net_device_context *net_device_ctx, + struct nvsp_message *nvmsg) { switch (nvmsg->hdr.msg_type) { case NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE: @@ -1123,7 +1119,7 @@ static inline void netvsc_receive_inband(struct hv_device *hdev, break; case NVSP_MSG4_TYPE_SEND_VF_ASSOCIATION: - netvsc_send_vf(nvdev, nvmsg); + netvsc_send_vf(net_device_ctx, nvmsg); break; } } @@ -1136,6 +1132,7 @@ static void netvsc_process_raw_pkt(struct hv_device *device, struct vmpacket_descriptor *desc) { struct nvsp_message *nvmsg; + struct net_device_context *net_device_ctx = netdev_priv(ndev); nvmsg = (struct nvsp_message *)((unsigned long) desc + (desc->offset8 << 3)); @@ -1150,7 +1147,7 @@ static void netvsc_process_raw_pkt(struct hv_device *device, break; case VM_PKT_DATA_INBAND: - netvsc_receive_inband(device, net_device, nvmsg); + netvsc_receive_inband(device, net_device_ctx, nvmsg); break; default: diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 41bd952cc28d3..794139ba31ab9 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -658,20 +658,19 @@ int netvsc_recv_callback(struct hv_device *device_obj, struct sk_buff *skb; struct sk_buff *vf_skb; struct netvsc_stats *rx_stats; - struct netvsc_device *netvsc_dev = net_device_ctx->nvdev; u32 bytes_recvd = packet->total_data_buflen; int ret = 0; if (!net || net->reg_state != NETREG_REGISTERED) return NVSP_STAT_FAIL; - if (READ_ONCE(netvsc_dev->vf_inject)) { - atomic_inc(&netvsc_dev->vf_use_cnt); - if (!READ_ONCE(netvsc_dev->vf_inject)) { + if (READ_ONCE(net_device_ctx->vf_inject)) { + atomic_inc(&net_device_ctx->vf_use_cnt); + if (!READ_ONCE(net_device_ctx->vf_inject)) { /* * We raced; just move on. */ - atomic_dec(&netvsc_dev->vf_use_cnt); + atomic_dec(&net_device_ctx->vf_use_cnt); goto vf_injection_done; } @@ -683,17 +682,19 @@ int netvsc_recv_callback(struct hv_device *device_obj, * the host). Deliver these via the VF interface * in the guest. */ - vf_skb = netvsc_alloc_recv_skb(netvsc_dev->vf_netdev, packet, - csum_info, *data, vlan_tci); + vf_skb = netvsc_alloc_recv_skb(net_device_ctx->vf_netdev, + packet, csum_info, *data, + vlan_tci); if (vf_skb != NULL) { - ++netvsc_dev->vf_netdev->stats.rx_packets; - netvsc_dev->vf_netdev->stats.rx_bytes += bytes_recvd; + ++net_device_ctx->vf_netdev->stats.rx_packets; + net_device_ctx->vf_netdev->stats.rx_bytes += + bytes_recvd; netif_receive_skb(vf_skb); } else { ++net->stats.rx_dropped; ret = NVSP_STAT_FAIL; } - atomic_dec(&netvsc_dev->vf_use_cnt); + atomic_dec(&net_device_ctx->vf_use_cnt); return ret; } @@ -1158,7 +1159,7 @@ static void netvsc_notify_peers(struct work_struct *wrk) netdev_notify_peers(gwrk->netdev); - atomic_dec(&gwrk->netvsc_dev->vf_use_cnt); + atomic_dec(&gwrk->net_device_ctx->vf_use_cnt); } static struct net_device *get_netvsc_net_device(char *mac) @@ -1211,7 +1212,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev) * Take a reference on the module. */ try_module_get(THIS_MODULE); - netvsc_dev->vf_netdev = vf_netdev; + net_device_ctx->vf_netdev = vf_netdev; return NOTIFY_OK; } @@ -1233,11 +1234,11 @@ static int netvsc_vf_up(struct net_device *vf_netdev) net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if ((netvsc_dev == NULL) || (netvsc_dev->vf_netdev == NULL)) + if (!netvsc_dev || !net_device_ctx->vf_netdev) return NOTIFY_DONE; netdev_info(ndev, "VF up: %s\n", vf_netdev->name); - netvsc_dev->vf_inject = true; + net_device_ctx->vf_inject = true; /* * Open the device before switching data path. @@ -1257,9 +1258,9 @@ static int netvsc_vf_up(struct net_device *vf_netdev) * notify peers; take a reference to prevent * the VF interface from vanishing. */ - atomic_inc(&netvsc_dev->vf_use_cnt); + atomic_inc(&net_device_ctx->vf_use_cnt); net_device_ctx->gwrk.netdev = vf_netdev; - net_device_ctx->gwrk.netvsc_dev = netvsc_dev; + net_device_ctx->gwrk.net_device_ctx = net_device_ctx; schedule_work(&net_device_ctx->gwrk.dwrk); return NOTIFY_OK; @@ -1283,17 +1284,17 @@ static int netvsc_vf_down(struct net_device *vf_netdev) net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if ((netvsc_dev == NULL) || (netvsc_dev->vf_netdev == NULL)) + if (!netvsc_dev || !net_device_ctx->vf_netdev) return NOTIFY_DONE; netdev_info(ndev, "VF down: %s\n", vf_netdev->name); - netvsc_dev->vf_inject = false; + net_device_ctx->vf_inject = false; /* * Wait for currently active users to * drain out. */ - while (atomic_read(&netvsc_dev->vf_use_cnt) != 0) + while (atomic_read(&net_device_ctx->vf_use_cnt) != 0) udelay(50); netvsc_switch_datapath(ndev, false); netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name); @@ -1302,9 +1303,9 @@ static int netvsc_vf_down(struct net_device *vf_netdev) /* * Notify peers. */ - atomic_inc(&netvsc_dev->vf_use_cnt); + atomic_inc(&net_device_ctx->vf_use_cnt); net_device_ctx->gwrk.netdev = ndev; - net_device_ctx->gwrk.netvsc_dev = netvsc_dev; + net_device_ctx->gwrk.net_device_ctx = net_device_ctx; schedule_work(&net_device_ctx->gwrk.dwrk); return NOTIFY_OK; @@ -1331,7 +1332,7 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) return NOTIFY_DONE; netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); - netvsc_dev->vf_netdev = NULL; + net_device_ctx->vf_netdev = NULL; module_put(THIS_MODULE); return NOTIFY_OK; } @@ -1382,6 +1383,10 @@ static int netvsc_probe(struct hv_device *dev, spin_lock_init(&net_device_ctx->lock); INIT_LIST_HEAD(&net_device_ctx->reconfig_events); + atomic_set(&net_device_ctx->vf_use_cnt, 0); + net_device_ctx->vf_netdev = NULL; + net_device_ctx->vf_inject = false; + net->netdev_ops = &device_ops; net->hw_features = NETVSC_HW_FEATURES; From d072218f214929194db06069564495b6b9fff34a Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov <vkuznets@redhat.com> Date: Mon, 15 Aug 2016 17:48:40 +0200 Subject: [PATCH 240/513] hv_netvsc: avoid deadlocks between rtnl lock and vf_use_cnt wait Here is a deadlock scenario: - netvsc_vf_up() schedules netvsc_notify_peers() work and quits. - netvsc_vf_down() runs before netvsc_notify_peers() gets executed. As it is being executed from netdev notifier chain we hold rtnl lock when we get here. - we enter while (atomic_read(&net_device_ctx->vf_use_cnt) != 0) loop and wait till netvsc_notify_peers() drops vf_use_cnt. - netvsc_notify_peers() starts on some other CPU but netdev_notify_peers() will hang on rtnl_lock(). - deadlock! Instead of introducing additional synchronization I suggest we drop gwrk.dwrk completely and call NETDEV_NOTIFY_PEERS directly. As we're acting under rtnl lock this is legitimate. Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> Acked-by: Haiyang Zhang <haiyangz@microsoft.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/hyperv/hyperv_net.h | 7 ------- drivers/net/hyperv/netvsc_drv.c | 33 +++++---------------------------- 2 files changed, 5 insertions(+), 35 deletions(-) diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 3b3ecf237a125..591af71eae568 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -644,12 +644,6 @@ struct netvsc_reconfig { u32 event; }; -struct garp_wrk { - struct work_struct dwrk; - struct net_device *netdev; - struct net_device_context *net_device_ctx; -}; - /* The context of the netvsc device */ struct net_device_context { /* point back to our device context */ @@ -667,7 +661,6 @@ struct net_device_context { struct work_struct work; u32 msg_enable; /* debug level */ - struct garp_wrk gwrk; struct netvsc_stats __percpu *tx_stats; struct netvsc_stats __percpu *rx_stats; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 794139ba31ab9..70317fa24cde8 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1151,17 +1151,6 @@ static void netvsc_free_netdev(struct net_device *netdev) free_netdev(netdev); } -static void netvsc_notify_peers(struct work_struct *wrk) -{ - struct garp_wrk *gwrk; - - gwrk = container_of(wrk, struct garp_wrk, dwrk); - - netdev_notify_peers(gwrk->netdev); - - atomic_dec(&gwrk->net_device_ctx->vf_use_cnt); -} - static struct net_device *get_netvsc_net_device(char *mac) { struct net_device *dev, *found = NULL; @@ -1253,15 +1242,8 @@ static int netvsc_vf_up(struct net_device *vf_netdev) netif_carrier_off(ndev); - /* - * Now notify peers. We are scheduling work to - * notify peers; take a reference to prevent - * the VF interface from vanishing. - */ - atomic_inc(&net_device_ctx->vf_use_cnt); - net_device_ctx->gwrk.netdev = vf_netdev; - net_device_ctx->gwrk.net_device_ctx = net_device_ctx; - schedule_work(&net_device_ctx->gwrk.dwrk); + /* Now notify peers through VF device. */ + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, vf_netdev); return NOTIFY_OK; } @@ -1300,13 +1282,9 @@ static int netvsc_vf_down(struct net_device *vf_netdev) netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name); rndis_filter_close(netvsc_dev); netif_carrier_on(ndev); - /* - * Notify peers. - */ - atomic_inc(&net_device_ctx->vf_use_cnt); - net_device_ctx->gwrk.netdev = ndev; - net_device_ctx->gwrk.net_device_ctx = net_device_ctx; - schedule_work(&net_device_ctx->gwrk.dwrk); + + /* Now notify peers through netvsc device. */ + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, ndev); return NOTIFY_OK; } @@ -1378,7 +1356,6 @@ static int netvsc_probe(struct hv_device *dev, INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change); INIT_WORK(&net_device_ctx->work, do_set_multicast); - INIT_WORK(&net_device_ctx->gwrk.dwrk, netvsc_notify_peers); spin_lock_init(&net_device_ctx->lock); INIT_LIST_HEAD(&net_device_ctx->reconfig_events); From 57c1826b991244d2144eb6e3d5d1b13a53cbea63 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov <vkuznets@redhat.com> Date: Mon, 15 Aug 2016 17:48:41 +0200 Subject: [PATCH 241/513] hv_netvsc: reset vf_inject on VF removal We reset vf_inject on VF going down (netvsc_vf_down()) but we don't on VF removal (netvsc_unregister_vf()) so vf_inject stays 'true' while vf_netdev is already NULL and we're trying to inject packets into NULL net device in netvsc_recv_callback() causing kernel to crash. Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> Acked-by: Haiyang Zhang <haiyangz@microsoft.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/hyperv/netvsc_drv.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 70317fa24cde8..2c90883d089e9 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1205,6 +1205,19 @@ static int netvsc_register_vf(struct net_device *vf_netdev) return NOTIFY_OK; } +static void netvsc_inject_enable(struct net_device_context *net_device_ctx) +{ + net_device_ctx->vf_inject = true; +} + +static void netvsc_inject_disable(struct net_device_context *net_device_ctx) +{ + net_device_ctx->vf_inject = false; + + /* Wait for currently active users to drain out. */ + while (atomic_read(&net_device_ctx->vf_use_cnt) != 0) + udelay(50); +} static int netvsc_vf_up(struct net_device *vf_netdev) { @@ -1227,7 +1240,7 @@ static int netvsc_vf_up(struct net_device *vf_netdev) return NOTIFY_DONE; netdev_info(ndev, "VF up: %s\n", vf_netdev->name); - net_device_ctx->vf_inject = true; + netvsc_inject_enable(net_device_ctx); /* * Open the device before switching data path. @@ -1270,14 +1283,7 @@ static int netvsc_vf_down(struct net_device *vf_netdev) return NOTIFY_DONE; netdev_info(ndev, "VF down: %s\n", vf_netdev->name); - net_device_ctx->vf_inject = false; - /* - * Wait for currently active users to - * drain out. - */ - - while (atomic_read(&net_device_ctx->vf_use_cnt) != 0) - udelay(50); + netvsc_inject_disable(net_device_ctx); netvsc_switch_datapath(ndev, false); netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name); rndis_filter_close(netvsc_dev); @@ -1309,7 +1315,7 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) if (netvsc_dev == NULL) return NOTIFY_DONE; netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); - + netvsc_inject_disable(net_device_ctx); net_device_ctx->vf_netdev = NULL; module_put(THIS_MODULE); return NOTIFY_OK; From 0f20d795f78d182c4b743d880a5e8dc4d39892fe Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov <vkuznets@redhat.com> Date: Mon, 15 Aug 2016 17:48:42 +0200 Subject: [PATCH 242/513] hv_netvsc: protect module refcount by checking net_device_ctx->vf_netdev We're not guaranteed to see NETDEV_REGISTER/NETDEV_UNREGISTER notifications only once per VF but we increase/decrease module refcount unconditionally. Check vf_netdev to make sure we don't take/release it twice. We presume that only one VF per netvsc device may exist. Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> Acked-by: Haiyang Zhang <haiyangz@microsoft.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/hyperv/netvsc_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 2c90883d089e9..62a4e6e3c63ed 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1193,7 +1193,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev) net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if (netvsc_dev == NULL) + if (!netvsc_dev || net_device_ctx->vf_netdev) return NOTIFY_DONE; netdev_info(ndev, "VF registering: %s\n", vf_netdev->name); @@ -1312,7 +1312,7 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev) net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if (netvsc_dev == NULL) + if (!netvsc_dev || !net_device_ctx->vf_netdev) return NOTIFY_DONE; netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); netvsc_inject_disable(net_device_ctx); From 0dbff144a1e7310e2f8b7a957352c4be9aeb38e4 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov <vkuznets@redhat.com> Date: Mon, 15 Aug 2016 17:48:43 +0200 Subject: [PATCH 243/513] hv_netvsc: fix bonding devices check in netvsc_netdev_event() Bonding driver sets IFF_BONDING on both master (the bonding device) and slave (the real NIC) devices and in netvsc_netdev_event() we want to skip master devices only. Currently, there is an uncertainty when a slave interface is removed: if bonding module comes first in netdev_chain it clears IFF_BONDING flag on the netdev and netvsc_netdev_event() correctly handles NETDEV_UNREGISTER event, but in case netvsc comes first on the chain it sees the device with IFF_BONDING still attached and skips it. As we still hold vf_netdev pointer to the device we crash on the next inject. Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> Acked-by: Haiyang Zhang <haiyangz@microsoft.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/hyperv/netvsc_drv.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 62a4e6e3c63ed..3ba29fc80d057 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1482,8 +1482,13 @@ static int netvsc_netdev_event(struct notifier_block *this, { struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); - /* Avoid Vlan, Bonding dev with same MAC registering as VF */ - if (event_dev->priv_flags & (IFF_802_1Q_VLAN | IFF_BONDING)) + /* Avoid Vlan dev with same MAC registering as VF */ + if (event_dev->priv_flags & IFF_802_1Q_VLAN) + return NOTIFY_DONE; + + /* Avoid Bonding master dev with same MAC registering as VF */ + if (event_dev->priv_flags & IFF_BONDING && + event_dev->flags & IFF_MASTER) return NOTIFY_DONE; switch (event) { From d2fbdf76b85bcdfe57b8ef2ba09d20e8ada79abd Mon Sep 17 00:00:00 2001 From: Vegard Nossum <vegard.nossum@oracle.com> Date: Sat, 23 Jul 2016 08:15:04 +0200 Subject: [PATCH 244/513] tipc: fix NULL pointer dereference in shutdown() tipc_msg_create() can return a NULL skb and if so, we shouldn't try to call tipc_node_xmit_skb() on it. general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 3 PID: 30298 Comm: trinity-c0 Not tainted 4.7.0-rc7+ #19 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014 task: ffff8800baf09980 ti: ffff8800595b8000 task.ti: ffff8800595b8000 RIP: 0010:[<ffffffff830bb46b>] [<ffffffff830bb46b>] tipc_node_xmit_skb+0x6b/0x140 RSP: 0018:ffff8800595bfce8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 000000003023b0e0 RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffffffff83d12580 RBP: ffff8800595bfd78 R08: ffffed000b2b7f32 R09: 0000000000000000 R10: fffffbfff0759725 R11: 0000000000000000 R12: 1ffff1000b2b7f9f R13: ffff8800595bfd58 R14: ffffffff83d12580 R15: dffffc0000000000 FS: 00007fcdde242700(0000) GS:ffff88011af80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fcddde1db10 CR3: 000000006874b000 CR4: 00000000000006e0 DR0: 00007fcdde248000 DR1: 00007fcddd73d000 DR2: 00007fcdde248000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000090602 Stack: 0000000000000018 0000000000000018 0000000041b58ab3 ffffffff83954208 ffffffff830bb400 ffff8800595bfd30 ffffffff8309d767 0000000000000018 0000000000000018 ffff8800595bfd78 ffffffff8309da1a 00000000810ee611 Call Trace: [<ffffffff830c84a3>] tipc_shutdown+0x553/0x880 [<ffffffff825b4a3b>] SyS_shutdown+0x14b/0x170 [<ffffffff8100334c>] do_syscall_64+0x19c/0x410 [<ffffffff83295ca5>] entry_SYSCALL64_slow_path+0x25/0x25 Code: 90 00 b4 0b 83 c7 00 f1 f1 f1 f1 4c 8d 6d e0 c7 40 04 00 00 00 f4 c7 40 08 f3 f3 f3 f3 48 89 d8 48 c1 e8 03 c7 45 b4 00 00 00 00 <80> 3c 30 00 75 78 48 8d 7b 08 49 8d 75 c0 48 b8 00 00 00 00 00 RIP [<ffffffff830bb46b>] tipc_node_xmit_skb+0x6b/0x140 RSP <ffff8800595bfce8> ---[ end trace 57b0484e351e71f1 ]--- I feel like we should maybe return -ENOMEM or -ENOBUFS, but I'm not sure userspace is equipped to handle that. Anyway, this is better than a GPF and looks somewhat consistent with other tipc_msg_create() callers. Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com> Acked-by: Ying Xue <ying.xue@windriver.com> Acked-by: Jon Maloy <jon.maloy@ericsson.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/tipc/socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index c49b8df438cbe..f9f5f3c3dab53 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2180,7 +2180,8 @@ static int tipc_shutdown(struct socket *sock, int how) TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, onode, dport, oport, TIPC_CONN_SHUTDOWN); - tipc_node_xmit_skb(net, skb, dnode, tsk->portid); + if (skb) + tipc_node_xmit_skb(net, skb, dnode, tsk->portid); } tsk->connected = 0; sock->state = SS_DISCONNECTING; From d9ae449b3d14d7c55f69af329972f175d180e68d Mon Sep 17 00:00:00 2001 From: Christoffer Dall <christoffer.dall@linaro.org> Date: Wed, 3 Aug 2016 18:03:44 +0200 Subject: [PATCH 245/513] KVM: arm64: vgic-its: Make updates to propbaser/pendbaser atomic There are two problems with the current implementation of the MMIO handlers for the propbaser and pendbaser: First, the write to the value itself is not guaranteed to be an atomic 64-bit write so two concurrent writes to the structure field could be intermixed. Second, because we do a read-modify-update operation without any synchronization, if we have two 32-bit accesses to separate parts of the register, we can loose one of them. By using the atomic cmpxchg64 we should cover both issues above. Reviewed-by: Andre Przywara <andre.przywara@arm.com> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> --- virt/kvm/arm/vgic/vgic-mmio-v3.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index ff668e0dd586d..90d81811fdda0 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -306,16 +306,19 @@ static void vgic_mmio_write_propbase(struct kvm_vcpu *vcpu, { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - u64 propbaser = dist->propbaser; + u64 old_propbaser, propbaser; /* Storing a value with LPIs already enabled is undefined */ if (vgic_cpu->lpis_enabled) return; - propbaser = update_64bit_reg(propbaser, addr & 4, len, val); - propbaser = vgic_sanitise_propbaser(propbaser); - - dist->propbaser = propbaser; + do { + old_propbaser = dist->propbaser; + propbaser = old_propbaser; + propbaser = update_64bit_reg(propbaser, addr & 4, len, val); + propbaser = vgic_sanitise_propbaser(propbaser); + } while (cmpxchg64(&dist->propbaser, old_propbaser, + propbaser) != old_propbaser); } static unsigned long vgic_mmio_read_pendbase(struct kvm_vcpu *vcpu, @@ -331,16 +334,19 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu, unsigned long val) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - u64 pendbaser = vgic_cpu->pendbaser; + u64 old_pendbaser, pendbaser; /* Storing a value with LPIs already enabled is undefined */ if (vgic_cpu->lpis_enabled) return; - pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val); - pendbaser = vgic_sanitise_pendbaser(pendbaser); - - vgic_cpu->pendbaser = pendbaser; + do { + old_pendbaser = vgic_cpu->pendbaser; + pendbaser = old_pendbaser; + pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val); + pendbaser = vgic_sanitise_pendbaser(pendbaser); + } while (cmpxchg64(&vgic_cpu->pendbaser, old_pendbaser, + pendbaser) != old_pendbaser); } /* From c7735769d5dd79afb07254532fabd9ccbd85b1fa Mon Sep 17 00:00:00 2001 From: Andre Przywara <andre.przywara@arm.com> Date: Mon, 8 Aug 2016 16:45:43 +0100 Subject: [PATCH 246/513] KVM: arm64: ITS: move ITS registration into first VCPU run Currently we register an ITS device upon userland issuing the CTLR_INIT ioctl to mark initialization of the ITS as done. This deviates from the initialization sequence of the existing GIC devices and does not play well with the way QEMU handles things. To be more in line with what we are used to, register the ITS(es) just before the first VCPU is about to run, so in the map_resources() call. This involves iterating through the list of KVM devices and map each ITS that we find. Signed-off-by: Andre Przywara <andre.przywara@arm.com> Reviewed-by: Eric Auger <eric.auger@redhat.com> Tested-by: Eric Auger <eric.auger@redhat.com> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> --- virt/kvm/arm/vgic/vgic-its.c | 43 +++++++++++++++++++++++++++--------- virt/kvm/arm/vgic/vgic-v3.c | 8 +++++++ virt/kvm/arm/vgic/vgic.h | 6 +++++ 3 files changed, 47 insertions(+), 10 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index d06330abd5e86..1cf9f598c72a2 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -1319,13 +1319,13 @@ void vgic_enable_lpis(struct kvm_vcpu *vcpu) its_sync_lpi_pending_table(vcpu); } -static int vgic_its_init_its(struct kvm *kvm, struct vgic_its *its) +static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its) { struct vgic_io_device *iodev = &its->iodev; int ret; - if (its->initialized) - return 0; + if (!its->initialized) + return -EBUSY; if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) return -ENXIO; @@ -1342,9 +1342,6 @@ static int vgic_its_init_its(struct kvm *kvm, struct vgic_its *its) KVM_VGIC_V3_ITS_SIZE, &iodev->dev); mutex_unlock(&kvm->slots_lock); - if (!ret) - its->initialized = true; - return ret; } @@ -1466,9 +1463,6 @@ static int vgic_its_set_attr(struct kvm_device *dev, if (type != KVM_VGIC_ITS_ADDR_TYPE) return -ENODEV; - if (its->initialized) - return -EBUSY; - if (copy_from_user(&addr, uaddr, sizeof(addr))) return -EFAULT; @@ -1484,7 +1478,9 @@ static int vgic_its_set_attr(struct kvm_device *dev, case KVM_DEV_ARM_VGIC_GRP_CTRL: switch (attr->attr) { case KVM_DEV_ARM_VGIC_CTRL_INIT: - return vgic_its_init_its(dev->kvm, its); + its->initialized = true; + + return 0; } break; } @@ -1529,3 +1525,30 @@ int kvm_vgic_register_its_device(void) return kvm_register_device_ops(&kvm_arm_vgic_its_ops, KVM_DEV_TYPE_ARM_VGIC_ITS); } + +/* + * Registers all ITSes with the kvm_io_bus framework. + * To follow the existing VGIC initialization sequence, this has to be + * done as late as possible, just before the first VCPU runs. + */ +int vgic_register_its_iodevs(struct kvm *kvm) +{ + struct kvm_device *dev; + int ret = 0; + + list_for_each_entry(dev, &kvm->devices, vm_node) { + if (dev->ops != &kvm_arm_vgic_its_ops) + continue; + + ret = vgic_register_its_iodev(kvm, dev->private); + if (ret) + return ret; + /* + * We don't need to care about tearing down previously + * registered ITSes, as the kvm_io_bus framework removes + * them for us if the VM gets destroyed. + */ + } + + return ret; +} diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 0506543df38a7..9f0dae397d9c8 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -289,6 +289,14 @@ int vgic_v3_map_resources(struct kvm *kvm) goto out; } + if (vgic_has_its(kvm)) { + ret = vgic_register_its_iodevs(kvm); + if (ret) { + kvm_err("Unable to register VGIC ITS MMIO regions\n"); + goto out; + } + } + dist->ready = true; out: diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 1d8e21d5c13f5..6c4625c463684 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -84,6 +84,7 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu); int vgic_v3_probe(const struct gic_kvm_info *info); int vgic_v3_map_resources(struct kvm *kvm); int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address); +int vgic_register_its_iodevs(struct kvm *kvm); bool vgic_has_its(struct kvm *kvm); int kvm_vgic_register_its_device(void); void vgic_enable_lpis(struct kvm_vcpu *vcpu); @@ -140,6 +141,11 @@ static inline int vgic_register_redist_iodevs(struct kvm *kvm, return -ENODEV; } +static inline int vgic_register_its_iodevs(struct kvm *kvm) +{ + return -ENODEV; +} + static inline bool vgic_has_its(struct kvm *kvm) { return false; From 505a19eec49ab36b314a05bc062749ebdfb0aa90 Mon Sep 17 00:00:00 2001 From: Andre Przywara <andre.przywara@arm.com> Date: Tue, 9 Aug 2016 10:54:29 +0100 Subject: [PATCH 247/513] KVM: arm64: check for ITS device on MSI injection When userspace provides the doorbell address for an MSI to be injected into the guest, we find a KVM device which feels responsible. Lets check that this device is really an emulated ITS before we make real use of the container_of-ed pointer. [ Moved NULL-pointer check to caller of static function - Christoffer ] Signed-off-by: Andre Przywara <andre.przywara@arm.com> Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> --- virt/kvm/arm/vgic/vgic-its.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 1cf9f598c72a2..9533080b47d3b 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -471,6 +471,21 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, return 0; } +static struct vgic_io_device *vgic_get_its_iodev(struct kvm_io_device *dev) +{ + struct vgic_io_device *iodev; + + if (dev->ops != &kvm_io_gic_ops) + return NULL; + + iodev = container_of(dev, struct vgic_io_device, dev); + + if (iodev->iodev_type != IODEV_ITS) + return NULL; + + return iodev; +} + /* * Queries the KVM IO bus framework to get the ITS pointer from the given * doorbell address. @@ -494,9 +509,11 @@ int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi) kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address); if (!kvm_io_dev) - return -ENODEV; + return -EINVAL; - iodev = container_of(kvm_io_dev, struct vgic_io_device, dev); + iodev = vgic_get_its_iodev(kvm_io_dev); + if (!iodev) + return -EINVAL; mutex_lock(&iodev->its->its_lock); ret = vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data); From 33e7664a0af6e9a516f01014f39737aaa119b6d9 Mon Sep 17 00:00:00 2001 From: Wei Yongjun <weiyj.lk@gmail.com> Date: Tue, 26 Jul 2016 14:49:04 +0000 Subject: [PATCH 248/513] power_supply: tps65217-charger: fix missing platform_set_drvdata() Add missing platform_set_drvdata() in tps65217_charger_probe(), otherwise calling platform_get_drvdata() in remove returns NULL. This is detected by Coccinelle semantic patch. Fixes: 3636859b280c ("power_supply: Add support for tps65217-charger") Signed-off-by: Wei Yongjun <weiyj.lk@gmail.com> Signed-off-by: Sebastian Reichel <sre@kernel.org> --- drivers/power/tps65217_charger.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/power/tps65217_charger.c b/drivers/power/tps65217_charger.c index 73dfae41def8a..4c56e54af6ace 100644 --- a/drivers/power/tps65217_charger.c +++ b/drivers/power/tps65217_charger.c @@ -206,6 +206,7 @@ static int tps65217_charger_probe(struct platform_device *pdev) if (!charger) return -ENOMEM; + platform_set_drvdata(pdev, charger); charger->tps = tps; charger->dev = &pdev->dev; From 60ebc159817fef86171616510b1228476d979556 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo <acme@redhat.com> Date: Mon, 15 Aug 2016 17:06:47 -0300 Subject: [PATCH 249/513] perf probe: Release resources on error when handling exit paths Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Colin King <colin.king@canonical.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/n/tip-zh2j4iqimralugke5qq7dn6d@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/probe-file.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index a8e76233c0883..9c3b9ed5b3c3e 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -143,6 +143,8 @@ struct strlist *probe_file__get_rawlist(int fd) return NULL; sl = strlist__new(NULL, NULL); + if (sl == NULL) + return NULL; fddup = dup(fd); if (fddup < 0) @@ -163,14 +165,16 @@ struct strlist *probe_file__get_rawlist(int fd) ret = strlist__add(sl, buf); if (ret < 0) { pr_debug("strlist__add failed (%d)\n", ret); - strlist__delete(sl); - return NULL; + goto out_close_fp; } } fclose(fp); return sl; +out_close_fp: + fclose(fp); + goto out_free_sl; out_close_fddup: close(fddup); out_free_sl: @@ -467,8 +471,10 @@ static int probe_cache__load(struct probe_cache *pcache) if (fddup < 0) return -errno; fp = fdopen(fddup, "r"); - if (!fp) + if (!fp) { + close(fddup); return -EINVAL; + } while (!feof(fp)) { if (!fgets(buf, MAX_CMDLEN, fp)) From 9f3156903df3f7d9bba9acde810d78bca94305d5 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo <acme@redhat.com> Date: Mon, 15 Aug 2016 17:42:30 -0300 Subject: [PATCH 250/513] tools: Sync kvm related header files for arm64 and s390 From a quick look nothing stands out as requiring changes to kvm tools such as tools/perf/arch/s390/util/kvm-stat.c. Silences these header checking warnings: $ make -C tools/perf make: Entering directory '/home/acme/git/linux/tools/perf' BUILD: Doing 'make -j4' parallel build Warning: tools/arch/s390/include/uapi/asm/kvm.h differs from kernel Warning: tools/arch/s390/include/uapi/asm/sie.h differs from kernel Warning: tools/arch/arm64/include/uapi/asm/kvm.h differs from kernel <SNIP> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexander Yarygin <yarygin@linux.vnet.ibm.com> Cc: David Ahern <dsahern@gmail.com> Cc: Hemant Kumar <hemant@linux.vnet.ibm.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com> Cc: Scott Wood <scottwood@freescale.com> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Wang Nan <wangnan0@huawei.com> Link: http://lkml.kernel.org/n/tip-btutge414g516qmh6r5ienlj@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/arch/arm64/include/uapi/asm/kvm.h | 2 ++ tools/arch/s390/include/uapi/asm/kvm.h | 41 +++++++++++++++++++++++++ tools/arch/s390/include/uapi/asm/sie.h | 1 + 3 files changed, 44 insertions(+) diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index f209ea151dca8..3051f86a9b5f4 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -87,9 +87,11 @@ struct kvm_regs { /* Supported VGICv3 address types */ #define KVM_VGIC_V3_ADDR_TYPE_DIST 2 #define KVM_VGIC_V3_ADDR_TYPE_REDIST 3 +#define KVM_VGIC_ITS_ADDR_TYPE 4 #define KVM_VGIC_V3_DIST_SIZE SZ_64K #define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K) +#define KVM_VGIC_V3_ITS_SIZE (2 * SZ_64K) #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ #define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */ diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h index 3b8e99ef9d58d..a2ffec4139ad1 100644 --- a/tools/arch/s390/include/uapi/asm/kvm.h +++ b/tools/arch/s390/include/uapi/asm/kvm.h @@ -93,6 +93,47 @@ struct kvm_s390_vm_cpu_machine { __u64 fac_list[256]; }; +#define KVM_S390_VM_CPU_PROCESSOR_FEAT 2 +#define KVM_S390_VM_CPU_MACHINE_FEAT 3 + +#define KVM_S390_VM_CPU_FEAT_NR_BITS 1024 +#define KVM_S390_VM_CPU_FEAT_ESOP 0 +#define KVM_S390_VM_CPU_FEAT_SIEF2 1 +#define KVM_S390_VM_CPU_FEAT_64BSCAO 2 +#define KVM_S390_VM_CPU_FEAT_SIIF 3 +#define KVM_S390_VM_CPU_FEAT_GPERE 4 +#define KVM_S390_VM_CPU_FEAT_GSLS 5 +#define KVM_S390_VM_CPU_FEAT_IB 6 +#define KVM_S390_VM_CPU_FEAT_CEI 7 +#define KVM_S390_VM_CPU_FEAT_IBS 8 +#define KVM_S390_VM_CPU_FEAT_SKEY 9 +#define KVM_S390_VM_CPU_FEAT_CMMA 10 +#define KVM_S390_VM_CPU_FEAT_PFMFI 11 +#define KVM_S390_VM_CPU_FEAT_SIGPIF 12 +struct kvm_s390_vm_cpu_feat { + __u64 feat[16]; +}; + +#define KVM_S390_VM_CPU_PROCESSOR_SUBFUNC 4 +#define KVM_S390_VM_CPU_MACHINE_SUBFUNC 5 +/* for "test bit" instructions MSB 0 bit ordering, for "query" raw blocks */ +struct kvm_s390_vm_cpu_subfunc { + __u8 plo[32]; /* always */ + __u8 ptff[16]; /* with TOD-clock steering */ + __u8 kmac[16]; /* with MSA */ + __u8 kmc[16]; /* with MSA */ + __u8 km[16]; /* with MSA */ + __u8 kimd[16]; /* with MSA */ + __u8 klmd[16]; /* with MSA */ + __u8 pckmo[16]; /* with MSA3 */ + __u8 kmctr[16]; /* with MSA4 */ + __u8 kmf[16]; /* with MSA4 */ + __u8 kmo[16]; /* with MSA4 */ + __u8 pcc[16]; /* with MSA4 */ + __u8 ppno[16]; /* with MSA5 */ + __u8 reserved[1824]; +}; + /* kvm attributes for crypto */ #define KVM_S390_VM_CRYPTO_ENABLE_AES_KW 0 #define KVM_S390_VM_CRYPTO_ENABLE_DEA_KW 1 diff --git a/tools/arch/s390/include/uapi/asm/sie.h b/tools/arch/s390/include/uapi/asm/sie.h index 8fb5d4a6dd25b..3ac6343689394 100644 --- a/tools/arch/s390/include/uapi/asm/sie.h +++ b/tools/arch/s390/include/uapi/asm/sie.h @@ -140,6 +140,7 @@ exit_code_ipa0(0xB2, 0x4c, "TAR"), \ exit_code_ipa0(0xB2, 0x50, "CSP"), \ exit_code_ipa0(0xB2, 0x54, "MVPG"), \ + exit_code_ipa0(0xB2, 0x56, "STHYI"), \ exit_code_ipa0(0xB2, 0x58, "BSG"), \ exit_code_ipa0(0xB2, 0x5a, "BSA"), \ exit_code_ipa0(0xB2, 0x5f, "CHSC"), \ From 3d918fb13abdbeca7947578f5d7e426eafad7f5e Mon Sep 17 00:00:00 2001 From: Adrian Hunter <adrian.hunter@intel.com> Date: Mon, 15 Aug 2016 10:23:04 +0300 Subject: [PATCH 251/513] perf intel-pt: Fix occasional decoding errors when tracing system-wide In order to successfully decode Intel PT traces, context switch events are needed from the moment the trace starts. Currently that is ensured by using the 'immediate' flag which enables the switch event when it is opened. However, since commit 86c2786994bd ("perf intel-pt: Add support for PERF_RECORD_SWITCH") that might not always happen. When tracing system-wide the context switch event is added to the tracking event which was not set as 'immediate'. Change that so it is. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: stable@vger.kernel.org # v4.4+ Fixes: 86c2786994bd ("perf intel-pt: Add support for PERF_RECORD_SWITCH") Link: http://lkml.kernel.org/r/1471245784-22580-1-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/arch/x86/util/intel-pt.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index fb51457ba338f..a2412e9d883b5 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -501,7 +501,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, struct intel_pt_recording *ptr = container_of(itr, struct intel_pt_recording, itr); struct perf_pmu *intel_pt_pmu = ptr->intel_pt_pmu; - bool have_timing_info; + bool have_timing_info, need_immediate = false; struct perf_evsel *evsel, *intel_pt_evsel = NULL; const struct cpu_map *cpus = evlist->cpus; bool privileged = geteuid() == 0 || perf_event_paranoid() < 0; @@ -655,6 +655,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, ptr->have_sched_switch = 3; } else { opts->record_switch_events = true; + need_immediate = true; if (cpu_wide) ptr->have_sched_switch = 3; else @@ -700,6 +701,9 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, tracking_evsel->attr.freq = 0; tracking_evsel->attr.sample_period = 1; + if (need_immediate) + tracking_evsel->immediate = true; + /* In per-cpu case, always need the time of mmap events etc */ if (!cpu_map__empty(cpus)) { perf_evsel__set_sample_bit(tracking_evsel, TIME); From 5d87f493ddb1b86a0569fa3c4037fa9efc0c7183 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com> Date: Sun, 14 Aug 2016 04:07:32 +0200 Subject: [PATCH 252/513] x86/power/64: Use __pa() for physical address computation The value of temp_level4_pgt is the physical address of the top-level page directory, so use __pa() to compute it. Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Acked-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/power/hibernate_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index a3e3ccc87138a..9634557a54447 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -113,7 +113,7 @@ static int set_up_temporary_mappings(void) return result; } - temp_level4_pgt = (unsigned long)pgd - __PAGE_OFFSET; + temp_level4_pgt = __pa(pgd); return 0; } From 8ca7f4fe0733342c862b8585dd6eb6521b9bf533 Mon Sep 17 00:00:00 2001 From: "sean.wang@mediatek.com" <sean.wang@mediatek.com> Date: Tue, 16 Aug 2016 13:55:13 +0800 Subject: [PATCH 253/513] net: ethernet: mediatek: fix RMII mode and add REVMII supported by GMAC The patch fixes up the incorrect setup of reduced MII (RMII) on GMAC and adds the supplement for the setup of reverse MII (REVMII) on GMAC , and rearranges the error handling for invalid PHY argument. Signed-off-by: Sean Wang <sean.wang@mediatek.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 003036114a1c2..bd0ea056405f7 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -245,12 +245,16 @@ static int mtk_phy_connect(struct mtk_mac *mac) case PHY_INTERFACE_MODE_MII: ge_mode = 1; break; - case PHY_INTERFACE_MODE_RMII: + case PHY_INTERFACE_MODE_REVMII: ge_mode = 2; break; + case PHY_INTERFACE_MODE_RMII: + if (!mac->id) + goto err_phy; + ge_mode = 3; + break; default: - dev_err(eth->dev, "invalid phy_mode\n"); - return -1; + goto err_phy; } /* put the gmac into the right mode */ @@ -272,6 +276,11 @@ static int mtk_phy_connect(struct mtk_mac *mac) of_node_put(np); return 0; + +err_phy: + of_node_put(np); + dev_err(eth->dev, "invalid phy_mode\n"); + return -EINVAL; } static int mtk_mdio_init(struct mtk_eth *eth) From b2025c7cc92d5bfc8c5ce756c8d8a6f57c776fbd Mon Sep 17 00:00:00 2001 From: "sean.wang@mediatek.com" <sean.wang@mediatek.com> Date: Tue, 16 Aug 2016 13:55:14 +0800 Subject: [PATCH 254/513] net: ethernet: mediatek: fix flow control settings on GMAC0 is not being enabled properly Commit 08ef55c6f257acf3bdc6940813f80e8f0f5d90ec ("net-next: mediatek: fix gigabit and flow control advertisement") had supported proper flow control settings for GMAC1. But for GMAC0, 1.GMAC0 shares the common logic with GMAC1 inside mtk_phy_link_adjust() to adapt various settings for the target phy. 2.GMAC0 uses fixed-phy to connect to a builtin gigabit switch with fixed link speed as commit 0c72c50f6f93b0c3daa9ea35d89ab3a933c7b5a0 ("net-next: mediatek: add fixed-phy support") describes. 3.However, fixed-phy doesn't enable SUPPORTED_Pause & SUPPORTED_Asym_Pause supported flag on default that would cause mtk_phy_link_adjust() not to enable flow control setting on GMAC0 properly and cause packet dropped when high traffic. Due to these reasons, the patch adds SUPPORTED_Pause & SUPPORTED_Asym_Pause supported flags on fixed-phy used by the driver to have proper handling on the both GMAC with the shared common logic. Signed-off-by: Sean Wang <sean.wang@mediatek.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index bd0ea056405f7..9901527a39df0 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -267,6 +267,11 @@ static int mtk_phy_connect(struct mtk_mac *mac) mac->phy_dev->autoneg = AUTONEG_ENABLE; mac->phy_dev->speed = 0; mac->phy_dev->duplex = 0; + + if (of_phy_is_fixed_link(mac->of_node)) + mac->phy_dev->supported |= + SUPPORTED_Pause | SUPPORTED_Asym_Pause; + mac->phy_dev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause | SUPPORTED_Asym_Pause; mac->phy_dev->advertising = mac->phy_dev->supported | From 55a4e778191cfcf675aa1f9716edb71a3014d5fb Mon Sep 17 00:00:00 2001 From: "sean.wang@mediatek.com" <sean.wang@mediatek.com> Date: Tue, 16 Aug 2016 13:55:15 +0800 Subject: [PATCH 255/513] net: ethernet: mediatek: fix runtime warning raised by inconsistent struct device pointers passed to DMA API Runtime warning occurs if DMA-API debug feature is enabled that would be raised by pointers passed to DMA API as arguments to inconsistent struct device objects, so that the patch makes them usage aligned between DMA operations such as dma_map_*() and dma_unmap_*() to eliminate the warning. Signed-off-by: Sean Wang <sean.wang@mediatek.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 26 ++++++++++----------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 9901527a39df0..f1609542adf19 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -558,15 +558,15 @@ static inline struct mtk_tx_buf *mtk_desc_to_tx_buf(struct mtk_tx_ring *ring, return &ring->buf[idx]; } -static void mtk_tx_unmap(struct device *dev, struct mtk_tx_buf *tx_buf) +static void mtk_tx_unmap(struct mtk_eth *eth, struct mtk_tx_buf *tx_buf) { if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) { - dma_unmap_single(dev, + dma_unmap_single(eth->dev, dma_unmap_addr(tx_buf, dma_addr0), dma_unmap_len(tx_buf, dma_len0), DMA_TO_DEVICE); } else if (tx_buf->flags & MTK_TX_FLAGS_PAGE0) { - dma_unmap_page(dev, + dma_unmap_page(eth->dev, dma_unmap_addr(tx_buf, dma_addr0), dma_unmap_len(tx_buf, dma_len0), DMA_TO_DEVICE); @@ -611,9 +611,9 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, if (skb_vlan_tag_present(skb)) txd4 |= TX_DMA_INS_VLAN | skb_vlan_tag_get(skb); - mapped_addr = dma_map_single(&dev->dev, skb->data, + mapped_addr = dma_map_single(eth->dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(&dev->dev, mapped_addr))) + if (unlikely(dma_mapping_error(eth->dev, mapped_addr))) return -ENOMEM; WRITE_ONCE(itxd->txd1, mapped_addr); @@ -639,10 +639,10 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, n_desc++; frag_map_size = min(frag_size, MTK_TX_DMA_BUF_LEN); - mapped_addr = skb_frag_dma_map(&dev->dev, frag, offset, + mapped_addr = skb_frag_dma_map(eth->dev, frag, offset, frag_map_size, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(&dev->dev, mapped_addr))) + if (unlikely(dma_mapping_error(eth->dev, mapped_addr))) goto err_dma; if (i == nr_frags - 1 && @@ -695,7 +695,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, tx_buf = mtk_desc_to_tx_buf(ring, itxd); /* unmap dma */ - mtk_tx_unmap(&dev->dev, tx_buf); + mtk_tx_unmap(eth, tx_buf); itxd->txd3 = TX_DMA_LS0 | TX_DMA_OWNER_CPU; itxd = mtk_qdma_phys_to_virt(ring, itxd->txd2); @@ -852,11 +852,11 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, netdev->stats.rx_dropped++; goto release_desc; } - dma_addr = dma_map_single(ð->netdev[mac]->dev, + dma_addr = dma_map_single(eth->dev, new_data + NET_SKB_PAD, ring->buf_size, DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(&netdev->dev, dma_addr))) { + if (unlikely(dma_mapping_error(eth->dev, dma_addr))) { skb_free_frag(new_data); netdev->stats.rx_dropped++; goto release_desc; @@ -871,7 +871,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, } skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); - dma_unmap_single(&netdev->dev, trxd.rxd1, + dma_unmap_single(eth->dev, trxd.rxd1, ring->buf_size, DMA_FROM_DEVICE); pktlen = RX_DMA_GET_PLEN0(trxd.rxd2); skb->dev = netdev; @@ -953,7 +953,7 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget) done[mac]++; budget--; } - mtk_tx_unmap(eth->dev, tx_buf); + mtk_tx_unmap(eth, tx_buf); ring->last_free = desc; atomic_inc(&ring->free_count); @@ -1108,7 +1108,7 @@ static void mtk_tx_clean(struct mtk_eth *eth) if (ring->buf) { for (i = 0; i < MTK_DMA_SIZE; i++) - mtk_tx_unmap(eth->dev, &ring->buf[i]); + mtk_tx_unmap(eth, &ring->buf[i]); kfree(ring->buf); ring->buf = NULL; } From 33be126510974e2eb9679f1ca9bca4f67ee4c4c7 Mon Sep 17 00:00:00 2001 From: Mathias Nyman <mathias.nyman@linux.intel.com> Date: Tue, 16 Aug 2016 10:18:03 +0300 Subject: [PATCH 256/513] xhci: always handle "Command Ring Stopped" events Fix "Command completion event does not match command" errors by always handling the command ring stopped events. The command ring stopped event is generated as a result of aborting or stopping the command ring with a register write. It is not caused by a command in the command queue, and thus won't have a matching command in the comman list. Solve it by handling the command ring stopped event before checking for a matching command. In most command time out cases we abort the command ring, and get a command ring stopped event. The events command pointer will point at the current command ring dequeue, which in most cases matches the timed out command in the command list, and no error messages are seen. If we instead get a command aborted event before the command ring stopped event, the abort event will increse the command ring dequeue pointer, and the following command ring stopped events command pointer will point at the next, not yet queued command. This case triggered the error message Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com> CC: <stable@vger.kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/usb/host/xhci-ring.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 918e0c739b795..fe5b70be61baa 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1334,12 +1334,6 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, cmd = list_entry(xhci->cmd_list.next, struct xhci_command, cmd_list); - if (cmd->command_trb != xhci->cmd_ring->dequeue) { - xhci_err(xhci, - "Command completion event does not match command\n"); - return; - } - del_timer(&xhci->cmd_timer); trace_xhci_cmd_completion(cmd_trb, (struct xhci_generic_trb *) event); @@ -1351,6 +1345,13 @@ static void handle_cmd_completion(struct xhci_hcd *xhci, xhci_handle_stopped_cmd_ring(xhci, cmd); return; } + + if (cmd->command_trb != xhci->cmd_ring->dequeue) { + xhci_err(xhci, + "Command completion event does not match command\n"); + return; + } + /* * Host aborted the command ring, check if the current command was * supposed to be aborted, otherwise continue normally. From 0d2daaded82565f807a4435d678343f437b8b848 Mon Sep 17 00:00:00 2001 From: Alban Browaeys <alban.browaeys@gmail.com> Date: Tue, 16 Aug 2016 10:18:04 +0300 Subject: [PATCH 257/513] xhci: really enqueue zero length TRBs. Enqueue the first TRB even if full_len is zero. Without this "adb install <apk>" freezes the system. Signed-off-by: Alban Browaeys <alban.browaeys@gmail.com> Fixes: 86065c2719a5 ("xhci: don't rely on precalculated value of needed trbs in the enqueue loop") Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/usb/host/xhci-ring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index fe5b70be61baa..fd9fd12e48617 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -3244,7 +3244,8 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags, send_addr = addr; /* Queue the TRBs, even if they are zero-length */ - for (enqd_len = 0; enqd_len < full_len; enqd_len += trb_buff_len) { + for (enqd_len = 0; first_trb || enqd_len < full_len; + enqd_len += trb_buff_len) { field = TRB_TYPE(TRB_NORMAL); /* TRB buffer should not cross 64KB boundaries */ From 88716a93766b8f095cdef37a8e8f2c93aa233b21 Mon Sep 17 00:00:00 2001 From: Jim Lin <jilin@nvidia.com> Date: Tue, 16 Aug 2016 10:18:05 +0300 Subject: [PATCH 258/513] usb: xhci: Fix panic if disconnect After a device is disconnected, xhci_stop_device() will be invoked in xhci_bus_suspend(). Also the "disconnect" IRQ will have ISR to invoke xhci_free_virt_device() in this sequence. xhci_irq -> xhci_handle_event -> handle_cmd_completion -> xhci_handle_cmd_disable_slot -> xhci_free_virt_device If xhci->devs[slot_id] has been assigned to NULL in xhci_free_virt_device(), then virt_dev->eps[i].ring in xhci_stop_device() may point to an invlid address to cause kernel panic. virt_dev = xhci->devs[slot_id]; : if (virt_dev->eps[i].ring && virt_dev->eps[i].ring->dequeue) [] Unable to handle kernel paging request at virtual address 00001a68 [] pgd=ffffffc001430000 [] [00001a68] *pgd=000000013c807003, *pud=000000013c807003, *pmd=000000013c808003, *pte=0000000000000000 [] Internal error: Oops: 96000006 [#1] PREEMPT SMP [] CPU: 0 PID: 39 Comm: kworker/0:1 Tainted: G U [] Workqueue: pm pm_runtime_work [] task: ffffffc0bc0e0bc0 ti: ffffffc0bc0ec000 task.ti: ffffffc0bc0ec000 [] PC is at xhci_stop_device.constprop.11+0xb4/0x1a4 This issue is found when running with realtek ethernet device (0bda:8153). Signed-off-by: Jim Lin <jilin@nvidia.com> Cc: <stable@vger.kernel.org> Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/usb/host/xhci-hub.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index d61fcc48099ed..730b9fd266852 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -386,6 +386,9 @@ static int xhci_stop_device(struct xhci_hcd *xhci, int slot_id, int suspend) ret = 0; virt_dev = xhci->devs[slot_id]; + if (!virt_dev) + return -ENODEV; + cmd = xhci_alloc_command(xhci, false, true, GFP_NOIO); if (!cmd) { xhci_dbg(xhci, "Couldn't allocate command structure.\n"); From f1f6d9a8b540df22b87a5bf6bc104edaade81f47 Mon Sep 17 00:00:00 2001 From: Mathias Nyman <mathias.nyman@linux.intel.com> Date: Tue, 16 Aug 2016 10:18:06 +0300 Subject: [PATCH 259/513] xhci: don't dereference a xhci member after removing xhci Remove the hcd after checking for the xhci last quirks, not before. This caused a hang on a Alpine Ridge xhci based maching which remove the whole xhci controller when unplugging the last usb device CC: <stable@vger.kernel.org> Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- drivers/usb/host/xhci-pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 4fd041bec332c..d7b0f97abbad6 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -314,11 +314,12 @@ static void xhci_pci_remove(struct pci_dev *dev) usb_remove_hcd(xhci->shared_hcd); usb_put_hcd(xhci->shared_hcd); } - usb_hcd_pci_remove(dev); /* Workaround for spurious wakeups at shutdown with HSW */ if (xhci->quirks & XHCI_SPURIOUS_WAKEUP) pci_set_power_state(dev, PCI_D3hot); + + usb_hcd_pci_remove(dev); } #ifdef CONFIG_PM From 172b1d6b5a9337eb8c1ec294b80e448e03a9ac17 Mon Sep 17 00:00:00 2001 From: Xiaodong Liu <xiaodong.liu@intel.com> Date: Fri, 12 Aug 2016 06:24:42 -0400 Subject: [PATCH 260/513] crypto: sha256-mb - fix ctx pointer and digest copy 1. fix ctx pointer Use req_ctx which is the ctx for the next job that have been completed in the lanes instead of the first completed job rctx, whose completion could have been called and released. 2. fix digest copy Use XMM register to copy another 16 bytes sha256 digest instead of a regular register. Signed-off-by: Xiaodong Liu <xiaodong.liu@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> --- arch/x86/crypto/sha256-mb/sha256_mb.c | 4 ++-- arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/x86/crypto/sha256-mb/sha256_mb.c b/arch/x86/crypto/sha256-mb/sha256_mb.c index 89fa85e8b10ca..6f97fb33ae216 100644 --- a/arch/x86/crypto/sha256-mb/sha256_mb.c +++ b/arch/x86/crypto/sha256-mb/sha256_mb.c @@ -485,10 +485,10 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, req = cast_mcryptd_ctx_to_req(req_ctx); if (irqs_disabled()) - rctx->complete(&req->base, ret); + req_ctx->complete(&req->base, ret); else { local_bh_disable(); - rctx->complete(&req->base, ret); + req_ctx->complete(&req->base, ret); local_bh_enable(); } } diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S index b691da981cd9c..a78a0694ddef3 100644 --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S +++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S @@ -265,13 +265,14 @@ ENTRY(sha256_mb_mgr_get_comp_job_avx2) vpinsrd $1, _args_digest+1*32(state, idx, 4), %xmm0, %xmm0 vpinsrd $2, _args_digest+2*32(state, idx, 4), %xmm0, %xmm0 vpinsrd $3, _args_digest+3*32(state, idx, 4), %xmm0, %xmm0 - movl _args_digest+4*32(state, idx, 4), tmp2_w + vmovd _args_digest(state , idx, 4) , %xmm0 vpinsrd $1, _args_digest+5*32(state, idx, 4), %xmm1, %xmm1 vpinsrd $2, _args_digest+6*32(state, idx, 4), %xmm1, %xmm1 vpinsrd $3, _args_digest+7*32(state, idx, 4), %xmm1, %xmm1 - vmovdqu %xmm0, _result_digest(job_rax) - movl tmp2_w, _result_digest+1*16(job_rax) + vmovdqu %xmm0, _result_digest(job_rax) + offset = (_result_digest + 1*16) + vmovdqu %xmm1, offset(job_rax) pop %rbx From e67479b13ede47cc2f5beb5b51e67fdb30778ee8 Mon Sep 17 00:00:00 2001 From: Xiaodong Liu <xiaodong.liu@intel.com> Date: Fri, 12 Aug 2016 06:28:31 -0400 Subject: [PATCH 261/513] crypto: sha512-mb - fix ctx pointer 1. fix ctx pointer Use req_ctx which is the ctx for the next job that have been completed in the lanes instead of the first completed job rctx, whose completion could have been called and released. Signed-off-by: Xiaodong Liu <xiaodong.liu@intel.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> --- arch/x86/crypto/sha512-mb/sha512_mb.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c index f4cf5b78fd360..d210174a52b0e 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb.c +++ b/arch/x86/crypto/sha512-mb/sha512_mb.c @@ -497,10 +497,10 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, req = cast_mcryptd_ctx_to_req(req_ctx); if (irqs_disabled()) - rctx->complete(&req->base, ret); + req_ctx->complete(&req->base, ret); else { local_bh_disable(); - rctx->complete(&req->base, ret); + req_ctx->complete(&req->base, ret); local_bh_enable(); } } From b0f12c61de013ecb0abe19d5e64bdab45989de5a Mon Sep 17 00:00:00 2001 From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com> Date: Tue, 16 Aug 2016 11:24:46 +0100 Subject: [PATCH 262/513] ASoC: compress: Fix leak of a widget list in soc_compr_open_fe After we have called dpcm_path_get we should make sure to call dpcm_path_put on all error paths. This was not happening causing the allocated widget list to be leaked, this patch corrects this by adding a dpcm_path_put to the error path. Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com> Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/soc-compress.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sound/soc/soc-compress.c b/sound/soc/soc-compress.c index d2df46c14c682..bf7b52fce5974 100644 --- a/sound/soc/soc-compress.c +++ b/sound/soc/soc-compress.c @@ -121,7 +121,7 @@ static int soc_compr_open_fe(struct snd_compr_stream *cstream) dpcm_be_disconnect(fe, stream); fe->dpcm[stream].runtime = NULL; - goto fe_err; + goto path_err; } dpcm_clear_pending_state(fe, stream); @@ -136,6 +136,8 @@ static int soc_compr_open_fe(struct snd_compr_stream *cstream) return 0; +path_err: + dpcm_path_put(&list); fe_err: if (fe->dai_link->compr_ops && fe->dai_link->compr_ops->shutdown) fe->dai_link->compr_ops->shutdown(cstream); From 924d8696751c4b9e58263bc82efdafcf875596a6 Mon Sep 17 00:00:00 2001 From: James Morse <james.morse@arm.com> Date: Tue, 16 Aug 2016 10:46:38 +0100 Subject: [PATCH 263/513] PM / hibernate: Fix rtree_next_node() to avoid walking off list ends rtree_next_node() walks the linked list of leaf nodes to find the next block of pages in the struct memory_bitmap. If it walks off the end of the list of nodes, it walks the list of memory zones to find the next region of memory. If it walks off the end of the list of zones, it returns false. This leaves the struct bm_position's node and zone pointers pointing at their respective struct list_heads in struct mem_zone_bm_rtree. memory_bm_find_bit() uses struct bm_position's node and zone pointers to avoid walking lists and trees if the next bit appears in the same node/zone. It handles these values being stale. Swap rtree_next_node()s 'step then test' to 'test-next then step', this means if we reach the end of memory we return false and leave the node and zone pointers as they were. This fixes a panic on resume using AMD Seattle with 64K pages: [ 6.868732] Freezing user space processes ... (elapsed 0.000 seconds) done. [ 6.875753] Double checking all user space processes after OOM killer disable... (elapsed 0.000 seconds) [ 6.896453] PM: Using 3 thread(s) for decompression. [ 6.896453] PM: Loading and decompressing image data (5339 pages)... [ 7.318890] PM: Image loading progress: 0% [ 7.323395] Unable to handle kernel paging request at virtual address 00800040 [ 7.330611] pgd = ffff000008df0000 [ 7.334003] [00800040] *pgd=00000083fffe0003, *pud=00000083fffe0003, *pmd=00000083fffd0003, *pte=0000000000000000 [ 7.344266] Internal error: Oops: 96000005 [#1] PREEMPT SMP [ 7.349825] Modules linked in: [ 7.352871] CPU: 2 PID: 1 Comm: swapper/0 Tainted: G W I 4.8.0-rc1 #4737 [ 7.360512] Hardware name: AMD Overdrive/Supercharger/Default string, BIOS ROD1002C 04/08/2016 [ 7.369109] task: ffff8003c0220000 task.stack: ffff8003c0280000 [ 7.375020] PC is at set_bit+0x18/0x30 [ 7.378758] LR is at memory_bm_set_bit+0x24/0x30 [ 7.383362] pc : [<ffff00000835bbc8>] lr : [<ffff0000080faf18>] pstate: 60000045 [ 7.390743] sp : ffff8003c0283b00 [ 7.473551] [ 7.475031] Process swapper/0 (pid: 1, stack limit = 0xffff8003c0280020) [ 7.481718] Stack: (0xffff8003c0283b00 to 0xffff8003c0284000) [ 7.800075] Call trace: [ 7.887097] [<ffff00000835bbc8>] set_bit+0x18/0x30 [ 7.891876] [<ffff0000080fb038>] duplicate_memory_bitmap.constprop.38+0x54/0x70 [ 7.899172] [<ffff0000080fcc40>] snapshot_write_next+0x22c/0x47c [ 7.905166] [<ffff0000080fe1b4>] load_image_lzo+0x754/0xa88 [ 7.910725] [<ffff0000080ff0a8>] swsusp_read+0x144/0x230 [ 7.916025] [<ffff0000080fa338>] load_image_and_restore+0x58/0x90 [ 7.922105] [<ffff0000080fa660>] software_resume+0x2f0/0x338 [ 7.927752] [<ffff000008083350>] do_one_initcall+0x38/0x11c [ 7.933314] [<ffff000008b40cc0>] kernel_init_freeable+0x14c/0x1ec [ 7.939395] [<ffff0000087ce564>] kernel_init+0x10/0xfc [ 7.944520] [<ffff000008082e90>] ret_from_fork+0x10/0x40 [ 7.949820] Code: d2800022 8b400c21 f9800031 9ac32043 (c85f7c22) [ 7.955909] ---[ end trace 0024a5986e6ff323 ]--- [ 7.960529] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b Here struct mem_zone_bm_rtree's start_pfn has been returned instead of struct rtree_node's addr as the node/zone pointers are corrupt after we walked off the end of the lists during mark_unsafe_pages(). This behaviour was exposed by commit 6dbecfd345a6 ("PM / hibernate: Simplify mark_unsafe_pages()"), which caused mark_unsafe_pages() to call duplicate_memory_bitmap(), which uses memory_bm_find_bit() after walking off the end of the memory bitmap. Fixes: 3a20cb177961 (PM / Hibernate: Implement position keeping in radix tree) Signed-off-by: James Morse <james.morse@arm.com> [ rjw: Subject ] Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- kernel/power/snapshot.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index d90df926b59fb..2ba18691fe765 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -835,9 +835,9 @@ static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) */ static bool rtree_next_node(struct memory_bitmap *bm) { - bm->cur.node = list_entry(bm->cur.node->list.next, - struct rtree_node, list); - if (&bm->cur.node->list != &bm->cur.zone->leaves) { + if (!list_is_last(&bm->cur.node->list, &bm->cur.zone->leaves)) { + bm->cur.node = list_entry(bm->cur.node->list.next, + struct rtree_node, list); bm->cur.node_pfn += BM_BITS_PER_BLOCK; bm->cur.node_bit = 0; touch_softlockup_watchdog(); @@ -845,9 +845,9 @@ static bool rtree_next_node(struct memory_bitmap *bm) } /* No more nodes, goto next zone */ - bm->cur.zone = list_entry(bm->cur.zone->list.next, + if (!list_is_last(&bm->cur.zone->list, &bm->zones)) { + bm->cur.zone = list_entry(bm->cur.zone->list.next, struct mem_zone_bm_rtree, list); - if (&bm->cur.zone->list != &bm->zones) { bm->cur.node = list_entry(bm->cur.zone->leaves.next, struct rtree_node, list); bm->cur.node_pfn = 0; From 3e103a65514c2947e53f3171b21255fbde8b60c6 Mon Sep 17 00:00:00 2001 From: Christoph Huber <c.huber@bct-electronic.com> Date: Mon, 15 Aug 2016 18:59:25 +0200 Subject: [PATCH 264/513] ASoC: atmel_ssc_dai: Don't unconditionally reset SSC on stream startup commit cbaadf0f90d6 ("ASoC: atmel_ssc_dai: refactor the startup and shutdown") refactored code such that the SSC is reset on every startup; this breaks duplex audio (e.g. first start audio playback, then start record, causing the playback to stop/hang) Fixes: cbaadf0f90d6 (ASoC: atmel_ssc_dai: refactor the startup and shutdown) Signed-off-by: Christoph Huber <c.huber@bct-electronic.com> Signed-off-by: Peter Meerwald-Stadler <p.meerwald@bct-electronic.com> Signed-off-by: Mark Brown <broonie@kernel.org> Cc: stable@vger.kernel.org --- sound/soc/atmel/atmel_ssc_dai.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c index 54c09acd3fed0..16e459aedffe4 100644 --- a/sound/soc/atmel/atmel_ssc_dai.c +++ b/sound/soc/atmel/atmel_ssc_dai.c @@ -299,8 +299,9 @@ static int atmel_ssc_startup(struct snd_pcm_substream *substream, clk_enable(ssc_p->ssc->clk); ssc_p->mck_rate = clk_get_rate(ssc_p->ssc->clk); - /* Reset the SSC to keep it at a clean status */ - ssc_writel(ssc_p->ssc->regs, CR, SSC_BIT(CR_SWRST)); + /* Reset the SSC unless initialized to keep it in a clean state */ + if (!ssc_p->initialized) + ssc_writel(ssc_p->ssc->regs, CR, SSC_BIT(CR_SWRST)); if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) { dir = 0; From cab0b8d50e9bbef62c04067072c953433a87a9ff Mon Sep 17 00:00:00 2001 From: Felix Kuehling <Felix.Kuehling@amd.com> Date: Fri, 12 Aug 2016 19:25:21 -0400 Subject: [PATCH 265/513] drm/amdgpu: Change GART offset to 64-bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GART aperture size can be bigger than 4GB. Therefore the offset used in amdgpu_gart_bind and amdgpu_gart_unbind must be 64-bit. Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8ebc5f1eb4c0f..8c704c86597b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -646,9 +646,9 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev); void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev); int amdgpu_gart_init(struct amdgpu_device *adev); void amdgpu_gart_fini(struct amdgpu_device *adev); -void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset, +void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, int pages); -int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset, +int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, int pages, struct page **pagelist, dma_addr_t *dma_addr, uint32_t flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 921bce2df0b07..0feea347f680b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -221,7 +221,7 @@ void amdgpu_gart_table_vram_free(struct amdgpu_device *adev) * Unbinds the requested pages from the gart page table and * replaces them with the dummy page (all asics). */ -void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset, +void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, int pages) { unsigned t; @@ -268,7 +268,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, unsigned offset, * (all asics). * Returns 0 for success, -EINVAL for failure. */ -int amdgpu_gart_bind(struct amdgpu_device *adev, unsigned offset, +int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, int pages, struct page **pagelist, dma_addr_t *dma_addr, uint32_t flags) { From 7afafc8a44bf0ab841b17d450b02aedb3a138985 Mon Sep 17 00:00:00 2001 From: Adrian Hunter <adrian.hunter@intel.com> Date: Tue, 16 Aug 2016 10:59:35 +0300 Subject: [PATCH 266/513] block: Fix secure erase Commit 288dab8a35a0 ("block: add a separate operation type for secure erase") split REQ_OP_SECURE_ERASE from REQ_OP_DISCARD without considering all the places REQ_OP_DISCARD was being used to mean either. Fix those. Signed-off-by: Adrian Hunter <adrian.hunter@intel.com> Fixes: 288dab8a35a0 ("block: add a separate operation type for secure erase") Signed-off-by: Jens Axboe <axboe@fb.com> --- block/bio.c | 21 +++++++++++---------- block/blk-merge.c | 33 +++++++++++++++++++-------------- block/elevator.c | 2 +- drivers/mmc/card/block.c | 1 + drivers/mmc/card/queue.c | 3 ++- drivers/mmc/card/queue.h | 4 +++- include/linux/bio.h | 10 ++++++++-- include/linux/blkdev.h | 6 ++++-- kernel/trace/blktrace.c | 2 +- 9 files changed, 50 insertions(+), 32 deletions(-) diff --git a/block/bio.c b/block/bio.c index f39477538fef0..aa7354088008b 100644 --- a/block/bio.c +++ b/block/bio.c @@ -667,18 +667,19 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask, bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; - if (bio_op(bio) == REQ_OP_DISCARD) - goto integrity_clone; - - if (bio_op(bio) == REQ_OP_WRITE_SAME) { + switch (bio_op(bio)) { + case REQ_OP_DISCARD: + case REQ_OP_SECURE_ERASE: + break; + case REQ_OP_WRITE_SAME: bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0]; - goto integrity_clone; + break; + default: + bio_for_each_segment(bv, bio_src, iter) + bio->bi_io_vec[bio->bi_vcnt++] = bv; + break; } - bio_for_each_segment(bv, bio_src, iter) - bio->bi_io_vec[bio->bi_vcnt++] = bv; - -integrity_clone: if (bio_integrity(bio_src)) { int ret; @@ -1788,7 +1789,7 @@ struct bio *bio_split(struct bio *bio, int sectors, * Discards need a mutable bio_vec to accommodate the payload * required by the DSM TRIM and UNMAP commands. */ - if (bio_op(bio) == REQ_OP_DISCARD) + if (bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE) split = bio_clone_bioset(bio, gfp, bs); else split = bio_clone_fast(bio, gfp, bs); diff --git a/block/blk-merge.c b/block/blk-merge.c index 3eec75a9e91d8..72627e3cf91e1 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -172,12 +172,18 @@ void blk_queue_split(struct request_queue *q, struct bio **bio, struct bio *split, *res; unsigned nsegs; - if (bio_op(*bio) == REQ_OP_DISCARD) + switch (bio_op(*bio)) { + case REQ_OP_DISCARD: + case REQ_OP_SECURE_ERASE: split = blk_bio_discard_split(q, *bio, bs, &nsegs); - else if (bio_op(*bio) == REQ_OP_WRITE_SAME) + break; + case REQ_OP_WRITE_SAME: split = blk_bio_write_same_split(q, *bio, bs, &nsegs); - else + break; + default: split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs); + break; + } /* physical segments can be figured out during splitting */ res = split ? split : *bio; @@ -213,7 +219,7 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, * This should probably be returning 0, but blk_add_request_payload() * (Christoph!!!!) */ - if (bio_op(bio) == REQ_OP_DISCARD) + if (bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE) return 1; if (bio_op(bio) == REQ_OP_WRITE_SAME) @@ -385,7 +391,9 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio, nsegs = 0; cluster = blk_queue_cluster(q); - if (bio_op(bio) == REQ_OP_DISCARD) { + switch (bio_op(bio)) { + case REQ_OP_DISCARD: + case REQ_OP_SECURE_ERASE: /* * This is a hack - drivers should be neither modifying the * biovec, nor relying on bi_vcnt - but because of @@ -393,19 +401,16 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio, * a payload we need to set up here (thank you Christoph) and * bi_vcnt is really the only way of telling if we need to. */ - - if (bio->bi_vcnt) - goto single_segment; - - return 0; - } - - if (bio_op(bio) == REQ_OP_WRITE_SAME) { -single_segment: + if (!bio->bi_vcnt) + return 0; + /* Fall through */ + case REQ_OP_WRITE_SAME: *sg = sglist; bvec = bio_iovec(bio); sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset); return 1; + default: + break; } for_each_bio(bio) diff --git a/block/elevator.c b/block/elevator.c index 7096c22041e7e..f7d973a56fd75 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -366,7 +366,7 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq) list_for_each_prev(entry, &q->queue_head) { struct request *pos = list_entry_rq(entry); - if ((req_op(rq) == REQ_OP_DISCARD) != (req_op(pos) == REQ_OP_DISCARD)) + if (req_op(rq) != req_op(pos)) break; if (rq_data_dir(rq) != rq_data_dir(pos)) break; diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 48a5dd740f3ba..82503e6f04b31 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -1726,6 +1726,7 @@ static u8 mmc_blk_prep_packed_list(struct mmc_queue *mq, struct request *req) break; if (req_op(next) == REQ_OP_DISCARD || + req_op(next) == REQ_OP_SECURE_ERASE || req_op(next) == REQ_OP_FLUSH) break; diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c index bf14642a576a5..29578e98603d6 100644 --- a/drivers/mmc/card/queue.c +++ b/drivers/mmc/card/queue.c @@ -33,7 +33,8 @@ static int mmc_prep_request(struct request_queue *q, struct request *req) /* * We only like normal block requests and discards. */ - if (req->cmd_type != REQ_TYPE_FS && req_op(req) != REQ_OP_DISCARD) { + if (req->cmd_type != REQ_TYPE_FS && req_op(req) != REQ_OP_DISCARD && + req_op(req) != REQ_OP_SECURE_ERASE) { blk_dump_rq_flags(req, "MMC bad request"); return BLKPREP_KILL; } diff --git a/drivers/mmc/card/queue.h b/drivers/mmc/card/queue.h index d62531124d542..fee5e12714651 100644 --- a/drivers/mmc/card/queue.h +++ b/drivers/mmc/card/queue.h @@ -4,7 +4,9 @@ static inline bool mmc_req_is_special(struct request *req) { return req && - (req_op(req) == REQ_OP_FLUSH || req_op(req) == REQ_OP_DISCARD); + (req_op(req) == REQ_OP_FLUSH || + req_op(req) == REQ_OP_DISCARD || + req_op(req) == REQ_OP_SECURE_ERASE); } struct request; diff --git a/include/linux/bio.h b/include/linux/bio.h index 59ffaa68b11bb..23ddf4b46a9b0 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -71,7 +71,8 @@ static inline bool bio_has_data(struct bio *bio) { if (bio && bio->bi_iter.bi_size && - bio_op(bio) != REQ_OP_DISCARD) + bio_op(bio) != REQ_OP_DISCARD && + bio_op(bio) != REQ_OP_SECURE_ERASE) return true; return false; @@ -79,7 +80,9 @@ static inline bool bio_has_data(struct bio *bio) static inline bool bio_no_advance_iter(struct bio *bio) { - return bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_WRITE_SAME; + return bio_op(bio) == REQ_OP_DISCARD || + bio_op(bio) == REQ_OP_SECURE_ERASE || + bio_op(bio) == REQ_OP_WRITE_SAME; } static inline bool bio_is_rw(struct bio *bio) @@ -199,6 +202,9 @@ static inline unsigned bio_segments(struct bio *bio) if (bio_op(bio) == REQ_OP_DISCARD) return 1; + if (bio_op(bio) == REQ_OP_SECURE_ERASE) + return 1; + if (bio_op(bio) == REQ_OP_WRITE_SAME) return 1; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2c210b6a7bcf8..e79055c8b5779 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -882,7 +882,7 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq) static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, int op) { - if (unlikely(op == REQ_OP_DISCARD)) + if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE)) return min(q->limits.max_discard_sectors, UINT_MAX >> 9); if (unlikely(op == REQ_OP_WRITE_SAME)) @@ -913,7 +913,9 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq, if (unlikely(rq->cmd_type != REQ_TYPE_FS)) return q->limits.max_hw_sectors; - if (!q->limits.chunk_sectors || (req_op(rq) == REQ_OP_DISCARD)) + if (!q->limits.chunk_sectors || + req_op(rq) == REQ_OP_DISCARD || + req_op(rq) == REQ_OP_SECURE_ERASE) return blk_queue_get_max_sectors(q, req_op(rq)); return min(blk_max_size_offset(q, offset), diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 7598e6ca817a8..dbafc5df03f3f 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -223,7 +223,7 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, what |= MASK_TC_BIT(op_flags, META); what |= MASK_TC_BIT(op_flags, PREFLUSH); what |= MASK_TC_BIT(op_flags, FUA); - if (op == REQ_OP_DISCARD) + if (op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE) what |= BLK_TC_ACT(BLK_TC_DISCARD); if (op == REQ_OP_FLUSH) what |= BLK_TC_ACT(BLK_TC_FLUSH); From 286054a7a8674b256da16564d96772b88896eb35 Mon Sep 17 00:00:00 2001 From: Andre Przywara <andre.przywara@arm.com> Date: Tue, 16 Aug 2016 17:51:06 +0100 Subject: [PATCH 267/513] KVM: arm64: ITS: avoid re-mapping LPIs When a guest wants to map a device-ID/event-ID combination that is already mapped, we may end up in a situation where an LPI is never "put", thus never being freed. Since the GICv3 spec says that mapping an already mapped LPI is UNPREDICTABLE, lets just bail out early in this situation to avoid any potential leaks. Signed-off-by: Andre Przywara <andre.przywara@arm.com> Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> --- virt/kvm/arm/vgic/vgic-its.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 9533080b47d3b..4660a7d04eeaf 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -731,7 +731,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, u32 device_id = its_cmd_get_deviceid(its_cmd); u32 event_id = its_cmd_get_id(its_cmd); u32 coll_id = its_cmd_get_collection(its_cmd); - struct its_itte *itte, *new_itte = NULL; + struct its_itte *itte; struct its_device *device; struct its_collection *collection, *new_coll = NULL; int lpi_nr; @@ -749,6 +749,10 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, lpi_nr >= max_lpis_propbaser(kvm->arch.vgic.propbaser)) return E_ITS_MAPTI_PHYSICALID_OOR; + /* If there is an existing mapping, behavior is UNPREDICTABLE. */ + if (find_itte(its, device_id, event_id)) + return 0; + collection = find_collection(its, coll_id); if (!collection) { int ret = vgic_its_alloc_collection(its, &collection, coll_id); @@ -757,20 +761,16 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, new_coll = collection; } - itte = find_itte(its, device_id, event_id); + itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL); if (!itte) { - itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL); - if (!itte) { - if (new_coll) - vgic_its_free_collection(its, coll_id); - return -ENOMEM; - } - - new_itte = itte; - itte->event_id = event_id; - list_add_tail(&itte->itte_list, &device->itt_head); + if (new_coll) + vgic_its_free_collection(its, coll_id); + return -ENOMEM; } + itte->event_id = event_id; + list_add_tail(&itte->itte_list, &device->itt_head); + itte->collection = collection; itte->lpi = lpi_nr; @@ -778,8 +778,7 @@ static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, if (IS_ERR(irq)) { if (new_coll) vgic_its_free_collection(its, coll_id); - if (new_itte) - its_free_itte(kvm, new_itte); + its_free_itte(kvm, itte); return PTR_ERR(irq); } itte->irq = irq; From 0066c8b6f4050d7c57f6379d6fd4535e2f267f17 Mon Sep 17 00:00:00 2001 From: Kshitiz Gupta <kshitiz.gupta@ni.com> Date: Sat, 16 Jul 2016 02:23:45 -0500 Subject: [PATCH 268/513] igb: fix adjusting PTP timestamps for Tx/Rx latency Fix PHY delay compensation math in igb_ptp_tx_hwtstamp() and igb_ptp_rx_rgtstamp. Add PHY delay compensation in igb_ptp_rx_pktstamp(). In the IGB driver, there are two functions that retrieve timestamps received by the PHY - igb_ptp_rx_rgtstamp() and igb_ptp_rx_pktstamp(). The previous commit only changed igb_ptp_rx_rgtstamp(), and the change was incorrect. There are two instances in which PHY delay compensations should be made: - Before the packet transmission over the PHY, the latency between when the packet is timestamped and transmission of the packets, should be an add operation, but it is currently a subtract. - After the packets are received from the PHY, the latency between the receiving and timestamping of the packets should be a subtract operation, but it is currently an add. Signed-off-by: Kshitiz Gupta <kshitiz.gupta@ni.com> Fixes: 3f544d2 (igb: adjust ptp timestamps for tx/rx latency) Tested-by: Aaron Brown <aaron.f.brown@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> --- drivers/net/ethernet/intel/igb/igb_ptp.c | 26 +++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index e61b647f5f2a8..336c103ae374e 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -744,7 +744,8 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) } } - shhwtstamps.hwtstamp = ktime_sub_ns(shhwtstamps.hwtstamp, adjust); + shhwtstamps.hwtstamp = + ktime_add_ns(shhwtstamps.hwtstamp, adjust); skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps); dev_kfree_skb_any(adapter->ptp_tx_skb); @@ -767,13 +768,32 @@ void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, struct sk_buff *skb) { __le64 *regval = (__le64 *)va; + struct igb_adapter *adapter = q_vector->adapter; + int adjust = 0; /* The timestamp is recorded in little endian format. * DWORD: 0 1 2 3 * Field: Reserved Reserved SYSTIML SYSTIMH */ - igb_ptp_systim_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb), + igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), le64_to_cpu(regval[1])); + + /* adjust timestamp for the RX latency based on link speed */ + if (adapter->hw.mac.type == e1000_i210) { + switch (adapter->link_speed) { + case SPEED_10: + adjust = IGB_I210_RX_LATENCY_10; + break; + case SPEED_100: + adjust = IGB_I210_RX_LATENCY_100; + break; + case SPEED_1000: + adjust = IGB_I210_RX_LATENCY_1000; + break; + } + } + skb_hwtstamps(skb)->hwtstamp = + ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust); } /** @@ -825,7 +845,7 @@ void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, } } skb_hwtstamps(skb)->hwtstamp = - ktime_add_ns(skb_hwtstamps(skb)->hwtstamp, adjust); + ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust); /* Update the last_rx_timestamp timer in order to enable watchdog check * for error case of latched timestamp on a dropped packet. From 0be5b96cd8400aeb4bf3f8c5e7f5efaa38ae5055 Mon Sep 17 00:00:00 2001 From: Jarod Wilson <jarod@redhat.com> Date: Tue, 26 Jul 2016 14:25:34 -0400 Subject: [PATCH 269/513] e1000e: factor out systim sanitization This is prepatory work for an expanding list of adapter families that have occasional ~10 hour clock jumps when being used for PTP. Factor out the sanitization function and convert to using a feature (bug) flag, per suggestion from Jesse Brandeburg. Littering functional code with device-specific checks is much messier than simply checking a flag, and having device-specific init set flags as needed. There are probably a number of other cases in the e1000e code that could/should be converted similarly. Suggested-by: Jesse Brandeburg <jesse.brandeburg@intel.com> Signed-off-by: Jarod Wilson <jarod@redhat.com> Tested-by: Aaron Brown <aaron.f.brown@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> --- drivers/net/ethernet/intel/e1000e/82571.c | 6 +- drivers/net/ethernet/intel/e1000e/e1000.h | 1 + drivers/net/ethernet/intel/e1000e/netdev.c | 66 +++++++++++++--------- 3 files changed, 44 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c index 7fd4d54599e45..6b03c8553e597 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.c +++ b/drivers/net/ethernet/intel/e1000e/82571.c @@ -2032,7 +2032,8 @@ const struct e1000_info e1000_82574_info = { | FLAG2_DISABLE_ASPM_L0S | FLAG2_DISABLE_ASPM_L1 | FLAG2_NO_DISABLE_RX - | FLAG2_DMA_BURST, + | FLAG2_DMA_BURST + | FLAG2_CHECK_SYSTIM_OVERFLOW, .pba = 32, .max_hw_frame_size = DEFAULT_JUMBO, .get_variants = e1000_get_variants_82571, @@ -2053,7 +2054,8 @@ const struct e1000_info e1000_82583_info = { | FLAG_HAS_CTRLEXT_ON_LOAD, .flags2 = FLAG2_DISABLE_ASPM_L0S | FLAG2_DISABLE_ASPM_L1 - | FLAG2_NO_DISABLE_RX, + | FLAG2_NO_DISABLE_RX + | FLAG2_CHECK_SYSTIM_OVERFLOW, .pba = 32, .max_hw_frame_size = DEFAULT_JUMBO, .get_variants = e1000_get_variants_82571, diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index ef96cd11d6d2c..879cca47b0214 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -452,6 +452,7 @@ s32 e1000e_get_base_timinca(struct e1000_adapter *adapter, u32 *timinca); #define FLAG2_PCIM2PCI_ARBITER_WA BIT(11) #define FLAG2_DFLT_CRC_STRIPPING BIT(12) #define FLAG2_CHECK_RX_HWTSTAMP BIT(13) +#define FLAG2_CHECK_SYSTIM_OVERFLOW BIT(14) #define E1000_RX_DESC_PS(R, i) \ (&(((union e1000_rx_desc_packet_split *)((R).desc))[i])) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 02f443958f319..7017281ba2dc6 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -4302,6 +4302,42 @@ void e1000e_reinit_locked(struct e1000_adapter *adapter) clear_bit(__E1000_RESETTING, &adapter->state); } +/** + * e1000e_sanitize_systim - sanitize raw cycle counter reads + * @hw: pointer to the HW structure + * @systim: cycle_t value read, sanitized and returned + * + * Errata for 82574/82583 possible bad bits read from SYSTIMH/L: + * check to see that the time is incrementing at a reasonable + * rate and is a multiple of incvalue. + **/ +static cycle_t e1000e_sanitize_systim(struct e1000_hw *hw, cycle_t systim) +{ + u64 time_delta, rem, temp; + cycle_t systim_next; + u32 incvalue; + int i; + + incvalue = er32(TIMINCA) & E1000_TIMINCA_INCVALUE_MASK; + for (i = 0; i < E1000_MAX_82574_SYSTIM_REREADS; i++) { + /* latch SYSTIMH on read of SYSTIML */ + systim_next = (cycle_t)er32(SYSTIML); + systim_next |= (cycle_t)er32(SYSTIMH) << 32; + + time_delta = systim_next - systim; + temp = time_delta; + /* VMWare users have seen incvalue of zero, don't div / 0 */ + rem = incvalue ? do_div(temp, incvalue) : (time_delta != 0); + + systim = systim_next; + + if ((time_delta < E1000_82574_SYSTIM_EPSILON) && (rem == 0)) + break; + } + + return systim; +} + /** * e1000e_cyclecounter_read - read raw cycle counter (used by time counter) * @cc: cyclecounter structure @@ -4312,7 +4348,7 @@ static cycle_t e1000e_cyclecounter_read(const struct cyclecounter *cc) cc); struct e1000_hw *hw = &adapter->hw; u32 systimel, systimeh; - cycle_t systim, systim_next; + cycle_t systim; /* SYSTIMH latching upon SYSTIML read does not work well. * This means that if SYSTIML overflows after we read it but before * we read SYSTIMH, the value of SYSTIMH has been incremented and we @@ -4335,33 +4371,9 @@ static cycle_t e1000e_cyclecounter_read(const struct cyclecounter *cc) systim = (cycle_t)systimel; systim |= (cycle_t)systimeh << 32; - if ((hw->mac.type == e1000_82574) || (hw->mac.type == e1000_82583)) { - u64 time_delta, rem, temp; - u32 incvalue; - int i; - - /* errata for 82574/82583 possible bad bits read from SYSTIMH/L - * check to see that the time is incrementing at a reasonable - * rate and is a multiple of incvalue - */ - incvalue = er32(TIMINCA) & E1000_TIMINCA_INCVALUE_MASK; - for (i = 0; i < E1000_MAX_82574_SYSTIM_REREADS; i++) { - /* latch SYSTIMH on read of SYSTIML */ - systim_next = (cycle_t)er32(SYSTIML); - systim_next |= (cycle_t)er32(SYSTIMH) << 32; - - time_delta = systim_next - systim; - temp = time_delta; - /* VMWare users have seen incvalue of zero, don't div / 0 */ - rem = incvalue ? do_div(temp, incvalue) : (time_delta != 0); - - systim = systim_next; + if (adapter->flags2 & FLAG2_CHECK_SYSTIM_OVERFLOW) + systim = e1000e_sanitize_systim(hw, systim); - if ((time_delta < E1000_82574_SYSTIM_EPSILON) && - (rem == 0)) - break; - } - } return systim; } From 8037dd60f45264c3fbbea4cc0cea5f2f0a774b5e Mon Sep 17 00:00:00 2001 From: Jarod Wilson <jarod@redhat.com> Date: Tue, 26 Jul 2016 14:25:35 -0400 Subject: [PATCH 270/513] e1000e: fix PTP on e1000_pch_lpt variants I've got reports that the Intel I-218V NIC in Intel NUC5i5RYH systems used as a PTP slave experiences random ~10 hour clock jumps, which are resolved if the same workaround for the 82574 and 82583 is employed, so set the appropriate flag2 in e1000_pch_lpt_info too. Reported-by: Rupesh Patel <rupatel@redhat.com> Signed-off-by: Jarod Wilson <jarod@redhat.com> Tested-by: Aaron Brown <aaron.f.brown@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index 3e11322d8d586..f3aaca743ea3f 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -5885,7 +5885,8 @@ const struct e1000_info e1000_pch_lpt_info = { | FLAG_HAS_JUMBO_FRAMES | FLAG_APME_IN_WUC, .flags2 = FLAG2_HAS_PHY_STATS - | FLAG2_HAS_EEE, + | FLAG2_HAS_EEE + | FLAG2_CHECK_SYSTIM_OVERFLOW, .pba = 26, .max_hw_frame_size = 9022, .get_variants = e1000_get_variants_ich8lan, From 67540759151aefafddade3e27c4671ab7b3d230f Mon Sep 17 00:00:00 2001 From: Milian Wolff <milian.wolff@kdab.com> Date: Tue, 16 Aug 2016 17:39:26 +0200 Subject: [PATCH 271/513] perf unwind: Use addr_location::addr instead of ip for entries This fixes the srcline translation for call chains of user space applications. Before we got: perf report --stdio --no-children -s sym,srcline -g address 8.92% [.] main mandelbrot.h:41 | |--3.70%--main +8390240 | __libc_start_main +139950056726769 | _start +8388650 | |--2.74%--main +8390189 | --2.08%--main +8390296 __libc_start_main +139950056726769 _start +8388650 7.59% [.] main complex:1326 | |--4.79%--main +8390203 | __libc_start_main +139950056726769 | _start +8388650 | --2.80%--main +8390219 7.12% [.] __muldc3 libgcc2.c:1945 | |--3.76%--__muldc3 +139950060519490 | main +8390224 | __libc_start_main +139950056726769 | _start +8388650 | --3.32%--__muldc3 +139950060519512 main +8390224 With this patch applied, we instead get: perf report --stdio --no-children -s sym,srcline -g address 8.92% [.] main mandelbrot.h:41 | |--3.70%--main mandelbrot.h:41 | __libc_start_main +241 | _start +4194346 | |--2.74%--main mandelbrot.h:41 | --2.08%--main mandelbrot.h:41 __libc_start_main +241 _start +4194346 7.59% [.] main complex:1326 | |--4.79%--main complex:1326 | __libc_start_main +241 | _start +4194346 | --2.80%--main complex:1326 7.12% [.] __muldc3 libgcc2.c:1945 | |--3.76%--__muldc3 libgcc2.c:1945 | main mandelbrot.h:39 | __libc_start_main +241 | _start +4194346 | --3.32%--__muldc3 libgcc2.c:1945 main mandelbrot.h:39 Suggested-and-Acked-by: Namhyung Kim <namhyung@kernel.org> Signed-off-by: Milian Wolff <milian.wolff@kdab.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> LPU-Reference: 20160816153926.11288-1-milian.wolff@kdab.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/unwind-libdw.c | 2 +- tools/perf/util/unwind-libunwind-local.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index cf5e250bc78e1..783a53fb7a4ed 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -66,7 +66,7 @@ static int entry(u64 ip, struct unwind_info *ui) if (__report_module(&al, ip, ui)) return -1; - e->ip = ip; + e->ip = al.addr; e->map = al.map; e->sym = al.sym; diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 97c0f8fc55615..20c2e57439038 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -542,7 +542,7 @@ static int entry(u64 ip, struct thread *thread, thread__find_addr_location(thread, PERF_RECORD_MISC_USER, MAP__FUNCTION, ip, &al); - e.ip = ip; + e.ip = al.addr; e.map = al.map; e.sym = al.sym; From 4fe0d154880bb6eb833cbe84fa6f385f400f0b9c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig <hch@lst.de> Date: Thu, 11 Aug 2016 07:11:04 -0700 Subject: [PATCH 272/513] PCI: Use positive flags in pci_alloc_irq_vectors() Instead of passing negative flags like PCI_IRQ_NOMSI to prevent use of certain interrupt types, pass positive flags like PCI_IRQ_LEGACY, PCI_IRQ_MSI, etc., to specify the acceptable interrupt types. This is based on a number of pending driver conversions that just happend to be a whole more obvious to read this way, and given that we have no users in the tree yet it can still easily be done. I've also added a PCI_IRQ_ALL_TYPES catchall to keep the case of accepting all interrupt types very simple. [bhelgaas: changelog, fix PCI_IRQ_AFFINITY doc typo, remove mention of PCI_IRQ_NOLEGACY] Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Alexander Gordeev <agordeev@redhat.com> --- Documentation/PCI/MSI-HOWTO.txt | 24 ++++++++++-------------- drivers/pci/msi.c | 15 +++++++-------- include/linux/pci.h | 10 ++++++---- 3 files changed, 23 insertions(+), 26 deletions(-) diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt index c55df2911136c..cd9c9f6a7cd9e 100644 --- a/Documentation/PCI/MSI-HOWTO.txt +++ b/Documentation/PCI/MSI-HOWTO.txt @@ -94,14 +94,11 @@ has a requirements for a minimum number of vectors the driver can pass a min_vecs argument set to this limit, and the PCI core will return -ENOSPC if it can't meet the minimum number of vectors. -The flags argument should normally be set to 0, but can be used to pass the -PCI_IRQ_NOMSI and PCI_IRQ_NOMSIX flag in case a device claims to support -MSI or MSI-X, but the support is broken, or to pass PCI_IRQ_NOLEGACY in -case the device does not support legacy interrupt lines. - -By default this function will spread the interrupts around the available -CPUs, but this feature can be disabled by passing the PCI_IRQ_NOAFFINITY -flag. +The flags argument is used to specify which type of interrupt can be used +by the device and the driver (PCI_IRQ_LEGACY, PCI_IRQ_MSI, PCI_IRQ_MSIX). +A convenient short-hand (PCI_IRQ_ALL_TYPES) is also available to ask for +any possible kind of interrupt. If the PCI_IRQ_AFFINITY flag is set, +pci_alloc_irq_vectors() will spread the interrupts around the available CPUs. To get the Linux IRQ numbers passed to request_irq() and free_irq() and the vectors, use the following function: @@ -131,7 +128,7 @@ larger than the number supported by the device it will automatically be capped to the supported limit, so there is no need to query the number of vectors supported beforehand: - nvec = pci_alloc_irq_vectors(pdev, 1, nvec, 0); + nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_ALL_TYPES) if (nvec < 0) goto out_err; @@ -140,7 +137,7 @@ interrupts it can request a particular number of interrupts by passing that number to pci_alloc_irq_vectors() function as both 'min_vecs' and 'max_vecs' parameters: - ret = pci_alloc_irq_vectors(pdev, nvec, nvec, 0); + ret = pci_alloc_irq_vectors(pdev, nvec, nvec, PCI_IRQ_ALL_TYPES); if (ret < 0) goto out_err; @@ -148,15 +145,14 @@ The most notorious example of the request type described above is enabling the single MSI mode for a device. It could be done by passing two 1s as 'min_vecs' and 'max_vecs': - ret = pci_alloc_irq_vectors(pdev, 1, 1, 0); + ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES); if (ret < 0) goto out_err; Some devices might not support using legacy line interrupts, in which case -the PCI_IRQ_NOLEGACY flag can be used to fail the request if the platform -can't provide MSI or MSI-X interrupts: +the driver can specify that only MSI or MSI-X is acceptable: - nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_NOLEGACY); + nvec = pci_alloc_irq_vectors(pdev, 1, nvec, PCI_IRQ_MSI | PCI_IRQ_MSIX); if (nvec < 0) goto out_err; diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index a02981efdad57..9233e7f62f474 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1069,7 +1069,7 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, nvec = maxvec; for (;;) { - if (!(flags & PCI_IRQ_NOAFFINITY)) { + if (flags & PCI_IRQ_AFFINITY) { dev->irq_affinity = irq_create_affinity_mask(&nvec); if (nvec < minvec) return -ENOSPC; @@ -1105,7 +1105,7 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, **/ int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec) { - return __pci_enable_msi_range(dev, minvec, maxvec, PCI_IRQ_NOAFFINITY); + return __pci_enable_msi_range(dev, minvec, maxvec, 0); } EXPORT_SYMBOL(pci_enable_msi_range); @@ -1120,7 +1120,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev, return -ERANGE; for (;;) { - if (!(flags & PCI_IRQ_NOAFFINITY)) { + if (flags & PCI_IRQ_AFFINITY) { dev->irq_affinity = irq_create_affinity_mask(&nvec); if (nvec < minvec) return -ENOSPC; @@ -1160,8 +1160,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev, int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, int maxvec) { - return __pci_enable_msix_range(dev, entries, minvec, maxvec, - PCI_IRQ_NOAFFINITY); + return __pci_enable_msix_range(dev, entries, minvec, maxvec, 0); } EXPORT_SYMBOL(pci_enable_msix_range); @@ -1187,21 +1186,21 @@ int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, { int vecs = -ENOSPC; - if (!(flags & PCI_IRQ_NOMSIX)) { + if (flags & PCI_IRQ_MSIX) { vecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs, flags); if (vecs > 0) return vecs; } - if (!(flags & PCI_IRQ_NOMSI)) { + if (flags & PCI_IRQ_MSI) { vecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, flags); if (vecs > 0) return vecs; } /* use legacy irq if allowed */ - if (!(flags & PCI_IRQ_NOLEGACY) && min_vecs == 1) + if ((flags & PCI_IRQ_LEGACY) && min_vecs == 1) return 1; return vecs; } diff --git a/include/linux/pci.h b/include/linux/pci.h index 2599a980340f4..fbc1fa625c3ec 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1251,10 +1251,12 @@ resource_size_t pcibios_iov_resource_alignment(struct pci_dev *dev, int resno); int pci_set_vga_state(struct pci_dev *pdev, bool decode, unsigned int command_bits, u32 flags); -#define PCI_IRQ_NOLEGACY (1 << 0) /* don't use legacy interrupts */ -#define PCI_IRQ_NOMSI (1 << 1) /* don't use MSI interrupts */ -#define PCI_IRQ_NOMSIX (1 << 2) /* don't use MSI-X interrupts */ -#define PCI_IRQ_NOAFFINITY (1 << 3) /* don't auto-assign affinity */ +#define PCI_IRQ_LEGACY (1 << 0) /* allow legacy interrupts */ +#define PCI_IRQ_MSI (1 << 1) /* allow MSI interrupts */ +#define PCI_IRQ_MSIX (1 << 2) /* allow MSI-X interrupts */ +#define PCI_IRQ_AFFINITY (1 << 3) /* auto-assign affinity */ +#define PCI_IRQ_ALL_TYPES \ + (PCI_IRQ_LEGACY | PCI_IRQ_MSI | PCI_IRQ_MSIX) /* kmem_cache style wrapper around pci_alloc_consistent() */ From a4423287ec16e74c25de8ee3f261b1ea18c0af67 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen <heinzm@redhat.com> Date: Tue, 9 Aug 2016 14:56:14 +0200 Subject: [PATCH 273/513] dm raid: fix frozen recovery regression On LVM2 conversions via lvconvert(8), the target keeps mapped devices in frozen state when requesting RAID devices be resynchronized. This applies to e.g. adding legs to a raid1 device or taking over from raid0 to raid4 when the rebuild flag's set on the new raid1 legs or the added dedicated parity stripe. Also, fix frozen recovery for reshaping as well. Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com> --- drivers/md/dm-raid.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 1b9795d75ef89..07d17287fa4a9 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -191,7 +191,6 @@ struct raid_dev { #define RT_FLAG_RS_BITMAP_LOADED 2 #define RT_FLAG_UPDATE_SBS 3 #define RT_FLAG_RESHAPE_RS 4 -#define RT_FLAG_KEEP_RS_FROZEN 5 /* Array elements of 64 bit needed for rebuild/failed disk bits */ #define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8) @@ -2579,7 +2578,6 @@ static int rs_prepare_reshape(struct raid_set *rs) } else { /* Process raid1 without delta_disks */ mddev->raid_disks = rs->raid_disks; - set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags); reshape = false; } } else { @@ -2590,7 +2588,6 @@ static int rs_prepare_reshape(struct raid_set *rs) if (reshape) { set_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags); set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); - set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags); } else if (mddev->raid_disks < rs->raid_disks) /* Create new superblocks and bitmaps, if any new disks */ set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); @@ -2902,7 +2899,6 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); - set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags); /* Takeover ain't recovery, so disable recovery */ rs_setup_recovery(rs, MaxSector); rs_set_new(rs); @@ -3624,8 +3620,7 @@ static void raid_resume(struct dm_target *ti) * retrieved from the superblock by the ctr because * the ongoing recovery/reshape will change it after read. */ - if (!test_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags)) - clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); if (mddev->suspended) mddev_resume(mddev); From f60439bc21e3337429838e477903214f5bd8277f Mon Sep 17 00:00:00 2001 From: Alexander Duyck <alexander.h.duyck@intel.com> Date: Thu, 11 Aug 2016 14:51:56 -0700 Subject: [PATCH 274/513] ixgbe: Force VLNCTRL.VFE to be set in all VMDq paths When I was adding the code for enabling VLAN promiscuous mode with SR-IOV enabled I had inadvertently left the VLNCTRL.VFE bit unchanged as I has assumed there was code in another path that was setting it when we enabled SR-IOV. This wasn't the case and as a result we were just disabling VLAN filtering for all the VFs apparently. Also the previous patches were always clearing CFIEN which was always set to 0 by the hardware anyway so I am dropping the redundant bit clearing. Fixes: 16369564915a ("ixgbe: Add support for VLAN promiscuous with SR-IOV") Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 5418c69a74630..e0fdef85b6802 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -4100,6 +4100,8 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; u32 vlnctrl, i; + vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); + switch (hw->mac.type) { case ixgbe_mac_82599EB: case ixgbe_mac_X540: @@ -4112,8 +4114,7 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter) /* fall through */ case ixgbe_mac_82598EB: /* legacy case, we can just disable VLAN filtering */ - vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); - vlnctrl &= ~(IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN); + vlnctrl &= ~IXGBE_VLNCTRL_VFE; IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); return; } @@ -4125,6 +4126,10 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter) /* Set flag so we don't redo unnecessary work */ adapter->flags2 |= IXGBE_FLAG2_VLAN_PROMISC; + /* For VMDq and SR-IOV we must leave VLAN filtering enabled */ + vlnctrl |= IXGBE_VLNCTRL_VFE; + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); + /* Add PF to all active pools */ for (i = IXGBE_VLVF_ENTRIES; --i;) { u32 reg_offset = IXGBE_VLVFB(i * 2 + VMDQ_P(0) / 32); @@ -4191,6 +4196,11 @@ static void ixgbe_vlan_promisc_disable(struct ixgbe_adapter *adapter) struct ixgbe_hw *hw = &adapter->hw; u32 vlnctrl, i; + /* Set VLAN filtering to enabled */ + vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); + vlnctrl |= IXGBE_VLNCTRL_VFE; + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); + switch (hw->mac.type) { case ixgbe_mac_82599EB: case ixgbe_mac_X540: @@ -4202,10 +4212,6 @@ static void ixgbe_vlan_promisc_disable(struct ixgbe_adapter *adapter) break; /* fall through */ case ixgbe_mac_82598EB: - vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); - vlnctrl &= ~IXGBE_VLNCTRL_CFIEN; - vlnctrl |= IXGBE_VLNCTRL_VFE; - IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); return; } From 31e10a41203dbc95e0c1e81ef49ad1773a50d4f9 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen <heinzm@redhat.com> Date: Wed, 10 Aug 2016 02:45:59 +0200 Subject: [PATCH 275/513] dm raid: fix restoring of failed devices regression 'lvchange --refresh RaidLV' causes a mapped device suspend/resume cycle aiming at device restore and resync after transient device failures. This failed because flag RT_FLAG_RS_RESUMED was always cleared in the suspend path, thus the device restore wasn't performed in the resume path. Solve by removing RT_FLAG_RS_RESUMED from the suspend path and resume unconditionally. Also, remove superfluous comment from raid_resume(). Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com> --- drivers/md/dm-raid.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 07d17287fa4a9..81ec772b1cc91 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3382,11 +3382,10 @@ static void raid_postsuspend(struct dm_target *ti) { struct raid_set *rs = ti->private; - if (test_and_clear_bit(RT_FLAG_RS_RESUMED, &rs->runtime_flags)) { - if (!rs->md.suspended) - mddev_suspend(&rs->md); - rs->md.ro = 1; - } + if (!rs->md.suspended) + mddev_suspend(&rs->md); + + rs->md.ro = 1; } static void attempt_restore_of_faulty_devices(struct raid_set *rs) @@ -3606,25 +3605,15 @@ static void raid_resume(struct dm_target *ti) * devices are reachable again. */ attempt_restore_of_faulty_devices(rs); - } else { - mddev->ro = 0; - mddev->in_sync = 0; + } - /* - * When passing in flags to the ctr, we expect userspace - * to reset them because they made it to the superblocks - * and reload the mapping anyway. - * - * -> only unfreeze recovery in case of a table reload or - * we'll have a bogus recovery/reshape position - * retrieved from the superblock by the ctr because - * the ongoing recovery/reshape will change it after read. - */ - clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + mddev->ro = 0; + mddev->in_sync = 0; - if (mddev->suspended) - mddev_resume(mddev); - } + clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); + + if (mddev->suspended) + mddev_resume(mddev); } static struct target_type raid_target = { From 3d951822be216d8c6fcfc8abf75e5ed307eeb646 Mon Sep 17 00:00:00 2001 From: Alexander Duyck <alexander.h.duyck@intel.com> Date: Fri, 12 Aug 2016 09:53:39 -0700 Subject: [PATCH 276/513] ixgbe: Re-enable ability to toggle VLAN filtering Back when I submitted the GSO code I messed up and dropped the support for disabling the VLAN tag filtering via the feature bit. This patch re-enables the use of the NETIF_F_HW_VLAN_CTAG_FILTER to enable/disable the VLAN filtering independent of toggling promiscuous mode. Fixes: b83e30104b ("ixgbe/ixgbevf: Add support for GSO partial") Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index e0fdef85b6802..ee57a89252bbc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -9523,6 +9523,7 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /* copy netdev features into list of user selectable features */ netdev->hw_features |= netdev->features | + NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_RXALL | From a3c06a389751192fdcbcdd8bba57bdb856eafe68 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen <heinzm@redhat.com> Date: Tue, 9 Aug 2016 14:55:35 +0200 Subject: [PATCH 277/513] dm raid: enhance attempt_restore_of_faulty_devices() to support more devices attempt_restore_of_faulty_devices() is limited to 64 when it should support the new maximum of 253 when identifying any failed devices. It clears any revivable devices via an MD personality hot remove and add cylce to allow for their recovery. Address by using existing functions to retrieve and update all failed devices' bitfield members in the dm raid superblocks on all RAID devices and check for any devices to clear in it. Whilst on it, don't call attempt_restore_of_faulty_devices() for any MD personality not providing disk hot add/remove methods (i.e. raid0 now), because such personalities don't support reviving of failed disks. Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com> --- drivers/md/dm-raid.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 81ec772b1cc91..b1c251872800e 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3391,11 +3391,19 @@ static void raid_postsuspend(struct dm_target *ti) static void attempt_restore_of_faulty_devices(struct raid_set *rs) { int i; - uint64_t failed_devices, cleared_failed_devices = 0; + uint64_t cleared_failed_devices[DISKS_ARRAY_ELEMS]; unsigned long flags; + bool cleared = false; struct dm_raid_superblock *sb; + struct mddev *mddev = &rs->md; struct md_rdev *r; + /* RAID personalities have to provide hot add/remove methods or we need to bail out. */ + if (!mddev->pers || !mddev->pers->hot_add_disk || !mddev->pers->hot_remove_disk) + return; + + memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices)); + for (i = 0; i < rs->md.raid_disks; i++) { r = &rs->dev[i].rdev; if (test_bit(Faulty, &r->flags) && r->sb_page && @@ -3415,7 +3423,7 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs) * ourselves. */ if ((r->raid_disk >= 0) && - (r->mddev->pers->hot_remove_disk(r->mddev, r) != 0)) + (mddev->pers->hot_remove_disk(mddev, r) != 0)) /* Failed to revive this device, try next */ continue; @@ -3425,22 +3433,30 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs) clear_bit(Faulty, &r->flags); clear_bit(WriteErrorSeen, &r->flags); clear_bit(In_sync, &r->flags); - if (r->mddev->pers->hot_add_disk(r->mddev, r)) { + if (mddev->pers->hot_add_disk(mddev, r)) { r->raid_disk = -1; r->saved_raid_disk = -1; r->flags = flags; } else { r->recovery_offset = 0; - cleared_failed_devices |= 1 << i; + set_bit(i, (void *) cleared_failed_devices); + cleared = true; } } } - if (cleared_failed_devices) { + + /* If any failed devices could be cleared, update all sbs failed_devices bits */ + if (cleared) { + uint64_t failed_devices[DISKS_ARRAY_ELEMS]; + rdev_for_each(r, &rs->md) { sb = page_address(r->sb_page); - failed_devices = le64_to_cpu(sb->failed_devices); - failed_devices &= ~cleared_failed_devices; - sb->failed_devices = cpu_to_le64(failed_devices); + sb_retrieve_failed_devices(sb, failed_devices); + + for (i = 0; i < DISKS_ARRAY_ELEMS; i++) + failed_devices[i] &= ~cleared_failed_devices[i]; + + sb_update_failed_devices(sb, failed_devices); } } } From fbfe12c64f9650aa22f434dd9dd22df7ddf63221 Mon Sep 17 00:00:00 2001 From: Dave Ertman <david.m.ertman@intel.com> Date: Fri, 12 Aug 2016 09:56:32 -0700 Subject: [PATCH 278/513] i40e: check for and deal with non-contiguous TCs The i40e driver was causing a kernel panic when non-contiguous Traffic Classes, or Traffic Classes not starting with TC0, were configured on a link partner switch. i40e does not support non-contiguous TCs. To fix this, the patch changes the logic when determining the total number of TCs enabled. Before, this would use the highest TC number enabled and assume that all TCs below it were also enabled. Now, we create a bitmask of enabled TCs and scan it to determine not only the number of TCs, but also if the set of enabled TCs starts at zero and is contiguous. If not, then DCB is disabled by only returning one TC. Signed-off-by: Dave Ertman <david.m.ertman@intel.com> Tested-by: Andrew Bowers <andrewx.bowers@intel.com> Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com> --- drivers/net/ethernet/intel/i40e/i40e_main.c | 35 +++++++++++++++------ 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 81c99e1be708d..c6ac7a61812fb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -4554,23 +4554,38 @@ static u8 i40e_get_iscsi_tc_map(struct i40e_pf *pf) **/ static u8 i40e_dcb_get_num_tc(struct i40e_dcbx_config *dcbcfg) { + int i, tc_unused = 0; u8 num_tc = 0; - int i; + u8 ret = 0; /* Scan the ETS Config Priority Table to find * traffic class enabled for a given priority - * and use the traffic class index to get the - * number of traffic classes enabled + * and create a bitmask of enabled TCs */ - for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) { - if (dcbcfg->etscfg.prioritytable[i] > num_tc) - num_tc = dcbcfg->etscfg.prioritytable[i]; - } + for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) + num_tc |= BIT(dcbcfg->etscfg.prioritytable[i]); - /* Traffic class index starts from zero so - * increment to return the actual count + /* Now scan the bitmask to check for + * contiguous TCs starting with TC0 */ - return num_tc + 1; + for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { + if (num_tc & BIT(i)) { + if (!tc_unused) { + ret++; + } else { + pr_err("Non-contiguous TC - Disabling DCB\n"); + return 1; + } + } else { + tc_unused = 1; + } + } + + /* There is always at least TC0 */ + if (!ret) + ret = 1; + + return ret; } /** From 4dd3fd7197303739094183b139bae3142a3d55e6 Mon Sep 17 00:00:00 2001 From: Brian Foster <bfoster@redhat.com> Date: Wed, 17 Aug 2016 08:30:28 +1000 Subject: [PATCH 279/513] xfs: don't assert fail on non-async buffers on ioacct decrement The buffer I/O accounting mechanism tracks async buffers under I/O. As an optimization, the buffer I/O count is incremented only once on the first async I/O for a given hold cycle of a buffer and decremented once the buffer is released to the LRU (or freed). xfs_buf_ioacct_dec() has an ASSERT() check for an XBF_ASYNC buffer, but we have one or two corner cases where a buffer can be submitted for I/O multiple times via different methods in a single hold cycle. If an async I/O occurs first, the I/O count is incremented. If a sync I/O occurs before the hold count drops, XBF_ASYNC is cleared by the time the I/O count is decremented. Remove the async assert check from xfs_buf_ioacct_dec() as this is a perfectly valid scenario. For the purposes of I/O accounting, we really only care about the buffer async state at I/O submission time. Discovered-and-analyzed-by: Dave Chinner <david@fromorbit.com> Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com> --- fs/xfs/xfs_buf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 47a318ce82e0a..607cc29bba21e 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -115,7 +115,6 @@ xfs_buf_ioacct_dec( if (!(bp->b_flags & _XBF_IN_FLIGHT)) return; - ASSERT(bp->b_flags & XBF_ASYNC); bp->b_flags &= ~_XBF_IN_FLIGHT; percpu_counter_dec(&bp->b_target->bt_io_count); } From 0af32fb468b4a4434dd759d68611763658650b59 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig <hch@lst.de> Date: Wed, 17 Aug 2016 08:30:28 +1000 Subject: [PATCH 280/513] xfs: fix bogus space reservation in xfs_iomap_write_allocate The space reservations was without an explaination in commit "Add error reporting calls in error paths that return EFSCORRUPTED" back in 2003. There is no reason to reserve disk blocks in the transaction when allocating blocks for delalloc space as we already reserved the space when creating the delalloc extent. With this fix we stop running out of the reserved pool in generic/229, which has happened for long time with small blocksize file systems, and has increased in severity with the new buffered write path. [ dchinner: we still need to pass the block reservation into xfs_bmapi_write() to ensure we don't deadlock during AG selection. See commit dbd5c8c ("xfs: pass total block res. as total xfs_bmapi_write() parameter") for more details on why this is necessary. ] Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com> --- fs/xfs/xfs_iomap.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 2114d53df4331..4398932a7d1b3 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -715,12 +715,16 @@ xfs_iomap_write_allocate( * is in the delayed allocation extent on which we sit * but before our buffer starts. */ - nimaps = 0; while (nimaps == 0) { nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK); - - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, nres, + /* + * We have already reserved space for the extent and any + * indirect blocks when creating the delalloc extent, + * there is no need to reserve space in this transaction + * again. + */ + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, XFS_TRANS_RESERVE, &tp); if (error) return error; From 8b2180b3bf0338625cab07da6543acb436df9c40 Mon Sep 17 00:00:00 2001 From: Dave Chinner <dchinner@redhat.com> Date: Wed, 17 Aug 2016 08:31:33 +1000 Subject: [PATCH 281/513] xfs: don't invalidate whole file on DAX read/write When we do DAX IO, we try to invalidate the entire page cache held on the file. This is incorrect as it will trash the entire mapping tree that now tracks dirty state in exceptional entries in the radix tree slots. What we are trying to do is remove cached pages (e.g from reads into holes) that sit in the radix tree over the range we are about to write to. Hence we should just limit the invalidation to the range we are about to overwrite. Reported-by: Jan Kara <jack@suse.cz> Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dave Chinner <david@fromorbit.com> --- fs/xfs/xfs_file.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index ed95e5bb04e69..e612a02337108 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -741,9 +741,20 @@ xfs_file_dax_write( * page is inserted into the pagecache when we have to serve a write * fault on a hole. It should never be dirtied and can simply be * dropped from the pagecache once we get real data for the page. + * + * XXX: This is racy against mmap, and there's nothing we can do about + * it. dax_do_io() should really do this invalidation internally as + * it will know if we've allocated over a holei for this specific IO and + * if so it needs to update the mapping tree and invalidate existing + * PTEs over the newly allocated range. Remove this invalidation when + * dax_do_io() is fixed up. */ if (mapping->nrpages) { - ret = invalidate_inode_pages2(mapping); + loff_t end = iocb->ki_pos + iov_iter_count(from) - 1; + + ret = invalidate_inode_pages2_range(mapping, + iocb->ki_pos >> PAGE_SHIFT, + end >> PAGE_SHIFT); WARN_ON_ONCE(ret); } From f32866fdc9ed077c3bc7160f4f53d6a479201d46 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" <darrick.wong@oracle.com> Date: Wed, 17 Aug 2016 08:31:49 +1000 Subject: [PATCH 282/513] xfs: store rmapbt block count in the AGF Track the number of blocks used for the rmapbt in the AGF. When we get to the AG reservation code we need this counter to quickly make our reservation during mount. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com> --- fs/xfs/libxfs/xfs_alloc.c | 1 + fs/xfs/libxfs/xfs_format.h | 11 ++++++++--- fs/xfs/libxfs/xfs_rmap_btree.c | 6 ++++++ fs/xfs/xfs_fsops.c | 1 + 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 776ae2f325d1e..af1a72efa64cf 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2264,6 +2264,7 @@ xfs_alloc_log_agf( offsetof(xfs_agf_t, agf_longest), offsetof(xfs_agf_t, agf_btreeblks), offsetof(xfs_agf_t, agf_uuid), + offsetof(xfs_agf_t, agf_rmap_blocks), sizeof(xfs_agf_t) }; diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index f814d42c73b2f..e6a8bea0f7bad 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -640,12 +640,15 @@ typedef struct xfs_agf { __be32 agf_btreeblks; /* # of blocks held in AGF btrees */ uuid_t agf_uuid; /* uuid of filesystem */ + __be32 agf_rmap_blocks; /* rmapbt blocks used */ + __be32 agf_padding; /* padding */ + /* * reserve some contiguous space for future logged fields before we add * the unlogged fields. This makes the range logging via flags and * structure offsets much simpler. */ - __be64 agf_spare64[16]; + __be64 agf_spare64[15]; /* unlogged fields, written during buffer writeback. */ __be64 agf_lsn; /* last write sequence */ @@ -670,7 +673,8 @@ typedef struct xfs_agf { #define XFS_AGF_LONGEST 0x00000400 #define XFS_AGF_BTREEBLKS 0x00000800 #define XFS_AGF_UUID 0x00001000 -#define XFS_AGF_NUM_BITS 13 +#define XFS_AGF_RMAP_BLOCKS 0x00002000 +#define XFS_AGF_NUM_BITS 14 #define XFS_AGF_ALL_BITS ((1 << XFS_AGF_NUM_BITS) - 1) #define XFS_AGF_FLAGS \ @@ -686,7 +690,8 @@ typedef struct xfs_agf { { XFS_AGF_FREEBLKS, "FREEBLKS" }, \ { XFS_AGF_LONGEST, "LONGEST" }, \ { XFS_AGF_BTREEBLKS, "BTREEBLKS" }, \ - { XFS_AGF_UUID, "UUID" } + { XFS_AGF_UUID, "UUID" }, \ + { XFS_AGF_RMAP_BLOCKS, "RMAP_BLOCKS" } /* disk block (xfs_daddr_t) in the AG */ #define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log)) diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index bc1faebc84ecc..17b8eeb34ac89 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -98,6 +98,8 @@ xfs_rmapbt_alloc_block( union xfs_btree_ptr *new, int *stat) { + struct xfs_buf *agbp = cur->bc_private.a.agbp; + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); int error; xfs_agblock_t bno; @@ -124,6 +126,8 @@ xfs_rmapbt_alloc_block( xfs_trans_agbtree_delta(cur->bc_tp, 1); new->s = cpu_to_be32(bno); + be32_add_cpu(&agf->agf_rmap_blocks, 1); + xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 1; @@ -143,6 +147,8 @@ xfs_rmapbt_free_block( bno = xfs_daddr_to_agbno(cur->bc_mp, XFS_BUF_ADDR(bp)); trace_xfs_rmapbt_free_block(cur->bc_mp, cur->bc_private.a.agno, bno, 1); + be32_add_cpu(&agf->agf_rmap_blocks, -1); + xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1); if (error) return error; diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 0f96847b90e11..0b7f986745c17 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -248,6 +248,7 @@ xfs_growfs_data_private( agf->agf_roots[XFS_BTNUM_RMAPi] = cpu_to_be32(XFS_RMAP_BLOCK(mp)); agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1); + agf->agf_rmap_blocks = cpu_to_be32(1); } agf->agf_flfirst = cpu_to_be32(1); From 97dd8c9ee60c98f09be86a2e7e61b18f8d8ed4e9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig <hch@lst.de> Date: Wed, 17 Aug 2016 08:39:47 +1000 Subject: [PATCH 283/513] iomap: remove superflous mark_page_accessed from iomap_write_actor This catches up with commit 2457ae ("mm: non-atomically mark page accessed during page cache allocation where possible"), which moved the initial access marking into the pagecache allocator. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com> --- fs/iomap.c | 1 - 1 file changed, 1 deletion(-) diff --git a/fs/iomap.c b/fs/iomap.c index 48141b8eff5f4..f39c3181fa108 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -199,7 +199,6 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data, pagefault_enable(); flush_dcache_page(page); - mark_page_accessed(page); status = iomap_write_end(inode, pos, bytes, copied, page); if (unlikely(status < 0)) From 274c887494cb248eb05f8180bda8298942d98625 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig <hch@lst.de> Date: Wed, 17 Aug 2016 08:40:18 +1000 Subject: [PATCH 284/513] iomap: remove superflous pagefault_disable from iomap_write_actor iov_iter_copy_from_user_atomic disables page faults internally, no need to do it around the call. This also brings the iomap code in line with the original filemap version. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com> --- fs/iomap.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/iomap.c b/fs/iomap.c index f39c3181fa108..74712e25184ba 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -194,9 +194,7 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data, if (mapping_writably_mapped(inode->i_mapping)) flush_dcache_page(page); - pagefault_disable(); copied = iov_iter_copy_from_user_atomic(page, i, offset, bytes); - pagefault_enable(); flush_dcache_page(page); From 8896b8f60951ff0a8b7092f495a445714a1bddc4 Mon Sep 17 00:00:00 2001 From: Dave Chinner <dchinner@redhat.com> Date: Wed, 17 Aug 2016 08:41:10 +1000 Subject: [PATCH 285/513] iomap: fiemap should honor the FIEMAP_FLAG_SYNC flag The flag is checked as supported, but then we do an unconditional sync of the file, regardless of whether the flag is set or not. Make the sync conditional on having the FIEMAP_FLAG_SYNC flag set. Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dave Chinner <david@fromorbit.com> --- fs/iomap.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fs/iomap.c b/fs/iomap.c index 74712e25184ba..56c19e617a265 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -467,9 +467,11 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi, if (ret) return ret; - ret = filemap_write_and_wait(inode->i_mapping); - if (ret) - return ret; + if (fi->fi_flags & FIEMAP_FLAG_SYNC) { + ret = filemap_write_and_wait(inode->i_mapping); + if (ret) + return ret; + } while (len > 0) { ret = iomap_apply(inode, start, len, 0, ops, &ctx, From ac2dc058bce81b83631ad5accb90b8f4abe613b7 Mon Sep 17 00:00:00 2001 From: Dave Chinner <dchinner@redhat.com> Date: Wed, 17 Aug 2016 08:41:34 +1000 Subject: [PATCH 286/513] iomap: prepare iomap_fiemap for attribute mappings By bassing through an -ENOENT, similar to the old XFS implementation of FIEMAP_FLAG_XATTR. Signed-off-by: Dave Chinner <dchinner@redhat.com> [hch: split from a larger patch] Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com> --- fs/iomap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/iomap.c b/fs/iomap.c index 56c19e617a265..d9d1f50c36aaa 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -476,6 +476,9 @@ int iomap_fiemap(struct inode *inode, struct fiemap_extent_info *fi, while (len > 0) { ret = iomap_apply(inode, start, len, 0, ops, &ctx, iomap_fiemap_actor); + /* inode with no (attribute) mapping will give ENOENT */ + if (ret == -ENOENT) + break; if (ret < 0) return ret; if (ret == 0) From f20ac7ab17fcf7046bd9bc4166faf9580f713afd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig <hch@lst.de> Date: Wed, 17 Aug 2016 08:42:34 +1000 Subject: [PATCH 287/513] iomap: mark ->iomap_end as optional No need to implement it for read-only mappings. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com> --- fs/iomap.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/iomap.c b/fs/iomap.c index d9d1f50c36aaa..0342254646e35 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -84,8 +84,11 @@ iomap_apply(struct inode *inode, loff_t pos, loff_t length, unsigned flags, * Now the data has been copied, commit the range we've copied. This * should not fail unless the filesystem has had a fatal error. */ - ret = ops->iomap_end(inode, pos, length, written > 0 ? written : 0, - flags, &iomap); + if (ops->iomap_end) { + ret = ops->iomap_end(inode, pos, length, + written > 0 ? written : 0, + flags, &iomap); + } return written ? written : ret; } From b95a21271b30544a9fb992269d79ed1e1978e023 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig <hch@lst.de> Date: Wed, 17 Aug 2016 08:44:52 +1000 Subject: [PATCH 288/513] xfs: simplify xfs_file_iomap_begin We'll never get nimap == 0 for a successful return from xfs_bmapi_read, so don't try to handle it. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com> --- fs/xfs/xfs_iomap.c | 14 ++++---------- fs/xfs/xfs_trace.h | 1 - 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 2114d53df4331..1cce760399a2b 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1037,20 +1037,14 @@ xfs_file_iomap_begin( return error; trace_xfs_iomap_alloc(ip, offset, length, 0, &imap); - xfs_bmbt_to_iomap(ip, iomap, &imap); - } else if (nimaps) { - xfs_iunlock(ip, XFS_ILOCK_EXCL); - trace_xfs_iomap_found(ip, offset, length, 0, &imap); - xfs_bmbt_to_iomap(ip, iomap, &imap); } else { + ASSERT(nimaps); + xfs_iunlock(ip, XFS_ILOCK_EXCL); - trace_xfs_iomap_not_found(ip, offset, length, 0, &imap); - iomap->blkno = IOMAP_NULL_BLOCK; - iomap->type = IOMAP_HOLE; - iomap->offset = offset; - iomap->length = length; + trace_xfs_iomap_found(ip, offset, length, 0, &imap); } + xfs_bmbt_to_iomap(ip, iomap, &imap); return 0; } diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 551b7e26980c5..7e88bec3f3596 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1298,7 +1298,6 @@ DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct); DEFINE_IOMAP_EVENT(xfs_iomap_alloc); DEFINE_IOMAP_EVENT(xfs_iomap_found); -DEFINE_IOMAP_EVENT(xfs_iomap_not_found); DECLARE_EVENT_CLASS(xfs_simple_io_class, TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), From 1d4795e7bde075588c90df2175349bb2251802d5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig <hch@lst.de> Date: Wed, 17 Aug 2016 08:45:30 +1000 Subject: [PATCH 289/513] xfs: (re-)implement FIEMAP_FLAG_XATTR Use a special read-only iomap_ops implementation to support fiemap on the attr fork. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com> --- fs/xfs/xfs_iomap.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_iomap.h | 1 + fs/xfs/xfs_iops.c | 9 ++++++++- 3 files changed, 54 insertions(+), 1 deletion(-) diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 1cce760399a2b..697c8fd4200fc 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1106,3 +1106,48 @@ struct iomap_ops xfs_iomap_ops = { .iomap_begin = xfs_file_iomap_begin, .iomap_end = xfs_file_iomap_end, }; + +static int +xfs_xattr_iomap_begin( + struct inode *inode, + loff_t offset, + loff_t length, + unsigned flags, + struct iomap *iomap) +{ + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); + xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + length); + struct xfs_bmbt_irec imap; + int nimaps = 1, error = 0; + unsigned lockmode; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + lockmode = xfs_ilock_data_map_shared(ip); + + /* if there are no attribute fork or extents, return ENOENT */ + if (XFS_IFORK_Q(ip) || !ip->i_d.di_anextents) { + error = -ENOENT; + goto out_unlock; + } + + ASSERT(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL); + error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, + &nimaps, XFS_BMAPI_ENTIRE | XFS_BMAPI_ATTRFORK); +out_unlock: + xfs_iunlock(ip, lockmode); + + if (!error) { + ASSERT(nimaps); + xfs_bmbt_to_iomap(ip, iomap, &imap); + } + + return error; +} + +struct iomap_ops xfs_xattr_iomap_ops = { + .iomap_begin = xfs_xattr_iomap_begin, +}; diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index e066d045e2ffe..fb8aca3d69ab3 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -35,5 +35,6 @@ void xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *, struct xfs_bmbt_irec *); extern struct iomap_ops xfs_iomap_ops; +extern struct iomap_ops xfs_xattr_iomap_ops; #endif /* __XFS_IOMAP_H__*/ diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index ab820f84ed507..b24c3102fa93f 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -1009,7 +1009,14 @@ xfs_vn_fiemap( int error; xfs_ilock(XFS_I(inode), XFS_IOLOCK_SHARED); - error = iomap_fiemap(inode, fieinfo, start, length, &xfs_iomap_ops); + if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { + fieinfo->fi_flags &= ~FIEMAP_FLAG_XATTR; + error = iomap_fiemap(inode, fieinfo, start, length, + &xfs_xattr_iomap_ops); + } else { + error = iomap_fiemap(inode, fieinfo, start, length, + &xfs_iomap_ops); + } xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED); return error; From a03f1a6633144300ef4a3a33e95dfa11866f1299 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" <darrick.wong@oracle.com> Date: Wed, 17 Aug 2016 11:12:57 +1000 Subject: [PATCH 290/513] xfs: remove OWN_AG rmap when allocating a block from the AGFL When we're really tight on space, xfs_alloc_ag_vextent_small() can allocate a block from the AGFL and give it to the caller. Since the caller is never the AGFL-fixing method, we must remove the OWN_AG reverse mapping because it will clash with whatever rmap the caller wants to set up. This bug was discovered by running generic/299 repeatedly. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com> --- fs/xfs/libxfs/xfs_alloc.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index af1a72efa64cf..3dd8f1d544987 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -1582,6 +1582,7 @@ xfs_alloc_ag_vextent_small( xfs_extlen_t *flenp, /* result length */ int *stat) /* status: 0-freelist, 1-normal/none */ { + struct xfs_owner_info oinfo; int error; xfs_agblock_t fbno; xfs_extlen_t flen; @@ -1624,6 +1625,18 @@ xfs_alloc_ag_vextent_small( error0); args->wasfromfl = 1; trace_xfs_alloc_small_freelist(args); + + /* + * If we're feeding an AGFL block to something that + * doesn't live in the free space, we need to clear + * out the OWN_AG rmap. + */ + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_AG); + error = xfs_rmap_free(args->tp, args->agbp, args->agno, + fbno, 1, &oinfo); + if (error) + goto error0; + *stat = 0; return 0; } From 1b856086813be9371929b6cc62045f9fd470f5a0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche <bart.vanassche@sandisk.com> Date: Tue, 16 Aug 2016 16:48:36 -0700 Subject: [PATCH 291/513] block: Fix race triggered by blk_set_queue_dying() blk_set_queue_dying() can be called while another thread is submitting I/O or changing queue flags, e.g. through dm_stop_queue(). Hence protect the QUEUE_FLAG_DYING flag change with locking. Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Mike Snitzer <snitzer@redhat.com> Cc: stable <stable@vger.kernel.org> Signed-off-by: Jens Axboe <axboe@fb.com> --- block/blk-core.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/block/blk-core.c b/block/blk-core.c index 999442ec46014..36c7ac328d8c1 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -515,7 +515,9 @@ EXPORT_SYMBOL_GPL(blk_queue_bypass_end); void blk_set_queue_dying(struct request_queue *q) { - queue_flag_set_unlocked(QUEUE_FLAG_DYING, q); + spin_lock_irq(q->queue_lock); + queue_flag_set(QUEUE_FLAG_DYING, q); + spin_unlock_irq(q->queue_lock); if (q->mq_ops) blk_mq_wake_waiters(q); From 9ac715954682b23d293d910ad2697554171035e7 Mon Sep 17 00:00:00 2001 From: Christoffer Dall <christoffer.dall@linaro.org> Date: Wed, 17 Aug 2016 10:46:10 +0200 Subject: [PATCH 292/513] KVM: arm/arm64: Change misleading use of is_error_pfn When converting a gfn to a pfn, we call gfn_to_pfn_prot, which returns various kinds of error values. It turns out that is_error_pfn() only returns true when the gfn was found in a memory slot and could somehow not be used, but it does not return true if the gfn does not belong to any memory slot. Change use to is_error_noslot_pfn() which covers both cases. Note: Since we already check for kvm_is_error_hva(hva) explicitly in the caller of this function while holding the kvm->srcu lock protecting the memory slots, this should never be a problem, but nevertheless this change is warranted as it shows the intention of the code. Reported-by: James Hogan <james.hogan@imgtec.com> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> --- arch/arm/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index bda27b6b1aa2b..29d0b23af2a9d 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1309,7 +1309,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, smp_rmb(); pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable); - if (is_error_pfn(pfn)) + if (is_error_noslot_pfn(pfn)) return -EFAULT; if (kvm_is_device_pfn(pfn)) { From 674e70127069f3fd3c58fb0f94c60eb0f6567d78 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <marc.zyngier@arm.com> Date: Tue, 16 Aug 2016 15:03:01 +0100 Subject: [PATCH 293/513] arm64: Document workaround for Cortex-A72 erratum #853709 We already have a workaround for Cortex-A57 erratum #852523, but Cortex-A72 r0p0 to r0p2 do suffer from the same issue (known as erratum #853709). Let's document the fact that we already handle this. Acked-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> --- Documentation/arm64/silicon-errata.txt | 1 + arch/arm64/kvm/hyp/switch.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index 4da60b4639954..ccc60324e7388 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -53,6 +53,7 @@ stable kernels. | ARM | Cortex-A57 | #832075 | ARM64_ERRATUM_832075 | | ARM | Cortex-A57 | #852523 | N/A | | ARM | Cortex-A57 | #834220 | ARM64_ERRATUM_834220 | +| ARM | Cortex-A72 | #853709 | N/A | | ARM | MMU-500 | #841119,#826419 | N/A | | | | | | | Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 | diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index ae7855f16ec29..5a84b45626032 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -256,7 +256,7 @@ static int __hyp_text __guest_run(struct kvm_vcpu *vcpu) /* * We must restore the 32-bit state before the sysregs, thanks - * to Cortex-A57 erratum #852523. + * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). */ __sysreg32_restore_state(vcpu); __sysreg_restore_guest_state(guest_ctxt); From cabdc5c59ab46a1ec5ea98c5ac4022111fbfd63a Mon Sep 17 00:00:00 2001 From: Marc Zyngier <marc.zyngier@arm.com> Date: Tue, 16 Aug 2016 15:03:02 +0100 Subject: [PATCH 294/513] KVM: arm/arm64: timer: Workaround misconfigured timer interrupt Similarily to f005bd7e3b84 ("clocksource/arm_arch_timer: Force per-CPU interrupt to be level-triggered"), make sure we can survive an interrupt that has been misconfigured as edge-triggered by forcing it to be level-triggered (active low is assumed, but the GIC doesn't really care whether this is high or low). Hopefully, the amount of shouting in the kernel log will convince the user to do something about their firmware. Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> --- virt/kvm/arm/arch_timer.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 4fde8c7dfcfe1..77e6ccf149011 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -33,6 +33,7 @@ static struct timecounter *timecounter; static struct workqueue_struct *wqueue; static unsigned int host_vtimer_irq; +static u32 host_vtimer_irq_flags; void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) { @@ -365,7 +366,7 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) static void kvm_timer_init_interrupt(void *info) { - enable_percpu_irq(host_vtimer_irq, 0); + enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); } int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) @@ -432,6 +433,14 @@ int kvm_timer_hyp_init(void) } host_vtimer_irq = info->virtual_irq; + host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq); + if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH && + host_vtimer_irq_flags != IRQF_TRIGGER_LOW) { + kvm_err("Invalid trigger for IRQ%d, assuming level low\n", + host_vtimer_irq); + host_vtimer_irq_flags = IRQF_TRIGGER_LOW; + } + err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, "kvm guest timer", kvm_get_running_vcpus()); if (err) { From 1e12c4a9393b75a744aada2c8115434572698bc3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <marc.zyngier@arm.com> Date: Thu, 11 Aug 2016 14:19:42 +0100 Subject: [PATCH 295/513] genirq: Correctly configure the trigger on chained interrupts Commit 1e2a7d78499e ("irqdomain: Don't set type when mapping an IRQ") moved the trigger configuration call from the irqdomain mapping to the interrupt being actually requested. This patch failed to handle the case where we configure a chained interrupt, which doesn't get requested through the usual path. In order to solve this, let's call __irq_set_trigger just before starting the cascade interrupt. Special care must be taken to make the flow handler stick, as the .irq_set_type method could have reset it (it doesn't know we're dealing with a chained interrupt). Based on an initial patch by Jon Hunter. Fixes: 1e2a7d78499e ("irqdomain: Don't set type when mapping an IRQ") Reported-by: John Stultz <john.stultz@linaro.org> Reported-by: Linus Walleij <linus.walleij@linaro.org> Tested-by: John Stultz <john.stultz@linaro.org> Acked-by: Jon Hunter <jonathanh@nvidia.com> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> --- kernel/irq/chip.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index b4c1bc7c9ca20..637389088b3f9 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -820,6 +820,17 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle, desc->name = name; if (handle != handle_bad_irq && is_chained) { + /* + * We're about to start this interrupt immediately, + * hence the need to set the trigger configuration. + * But the .set_type callback may have overridden the + * flow handler, ignoring that we're dealing with a + * chained interrupt. Reset it immediately because we + * do know better. + */ + __irq_set_trigger(desc, irqd_get_trigger_type(&desc->irq_data)); + desc->handle_irq = handle; + irq_settings_set_noprobe(desc); irq_settings_set_norequest(desc); irq_settings_set_nothread(desc); From b63bebe2355cf2632a2979fd2982c88d080c44b6 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin <vladimir.murzin@arm.com> Date: Wed, 10 Aug 2016 10:49:42 +0100 Subject: [PATCH 296/513] arm64: KVM: remove misleading comment on pmu status Comment about how PMU access is handled is not relavant since v4.6 where proper PMU support was added in. Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com> Acked-by: Marc Zyngier <marc.zyngier@arm.com> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> --- arch/arm64/kvm/sys_regs.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index b0b225ceca18f..af5ea86d1c195 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -823,14 +823,6 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, * Architected system registers. * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 * - * We could trap ID_DFR0 and tell the guest we don't support performance - * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was - * NAKed, so it will read the PMCR anyway. - * - * Therefore we tell the guest we have 0 counters. Unfortunately, we - * must always support PMCCNTR (the cycle counter): we just RAZ/WI for - * all PM registers, which doesn't crash the guest kernel at least. - * * Debug handling: We do trap most, if not all debug related system * registers. The implementation is good enough to ensure that a guest * can use these with minimal performance degradation. The drawback is From f7f6f2d94f0027242ddfd665289b107a873fde43 Mon Sep 17 00:00:00 2001 From: Vladimir Murzin <vladimir.murzin@arm.com> Date: Wed, 10 Aug 2016 10:49:43 +0100 Subject: [PATCH 297/513] arm64: KVM: report configured SRE value to 32-bit world After commit b34f2bc ("arm64: KVM: Make ICC_SRE_EL1 access return the configured SRE value") we report SRE value to 64-bit guest, but 32-bit one still handled as RAZ/WI what leads to funny promise we do not keep: "GICv3: GIC: unable to set SRE (disabled at EL2), panic ahead" Instead, return the actual value of the ICC_SRE_EL1 register that the guest should see. [ Tweaked commit message - Christoffer ] Signed-off-by: Vladimir Murzin <vladimir.murzin@arm.com> Acked-by: Marc Zyngier <marc.zyngier@arm.com> Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org> --- arch/arm64/kvm/sys_regs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index af5ea86d1c195..e51367d159d02 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1352,7 +1352,7 @@ static const struct sys_reg_desc cp15_regs[] = { { Op1( 0), CRn(10), CRm( 3), Op2( 1), access_vm_reg, NULL, c10_AMAIR1 }, /* ICC_SRE */ - { Op1( 0), CRn(12), CRm(12), Op2( 5), trap_raz_wi }, + { Op1( 0), CRn(12), CRm(12), Op2( 5), access_gic_sre }, { Op1( 0), CRn(13), CRm( 0), Op2( 1), access_vm_reg, NULL, c13_CID }, From 0e62fd836e4c2908cc1e32c68806529b4f859955 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT <gregory.clement@free-electrons.com> Date: Wed, 17 Aug 2016 12:21:33 +0200 Subject: [PATCH 298/513] clocksource/drivers/time-armada-370-xp: Fix the clock reference While converting the init function to return an error, the wrong clock was get. This leads to the wrong clock rate and slows down the kernel. For example, it affects typical boot time: - without fix: over 1 minute - with fix: 15 seconds Tested-by: Stefan Roese <sr@denx.de> Tested-by: Ralph Sennhauser <ralph.sennhauser@gmail.com> Signed-off-by: Gregory CLEMENT <gregory.clement@free-electrons.com> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Fixes: 12549e27c63c ("clocksource/drivers/time-armada-370-xp: Convert init function to return error") Link: http://lkml.kernel.org/r/1471429296-9053-1-git-send-email-daniel.lezcano@linaro.org [ Refined the changelog. ] Signed-off-by: Ingo Molnar <mingo@kernel.org> --- drivers/clocksource/time-armada-370-xp.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/clocksource/time-armada-370-xp.c b/drivers/clocksource/time-armada-370-xp.c index 719b478d136e7..3c39e6f459714 100644 --- a/drivers/clocksource/time-armada-370-xp.c +++ b/drivers/clocksource/time-armada-370-xp.c @@ -338,7 +338,6 @@ static int __init armada_xp_timer_init(struct device_node *np) struct clk *clk = of_clk_get_by_name(np, "fixed"); int ret; - clk = of_clk_get(np, 0); if (IS_ERR(clk)) { pr_err("Failed to get clock"); return PTR_ERR(clk); From 16c8eba0fe01e03317f48868105103a8f5938e85 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Wed, 17 Aug 2016 12:21:34 +0200 Subject: [PATCH 299/513] clocksource/drivers/kona: Fix get_counter() error handling I could not figure out why, but GCC cannot prove that the kona_timer_init() function always initializes its two outputs, and we get a warning for the use of the 'lsw' variable later, which is obviously correct. drivers/clocksource/bcm_kona_timer.c: In function 'kona_timer_init': drivers/clocksource/bcm_kona_timer.c:119:13: error: 'lsw' may be used uninitialized in this function [-Werror=maybe-uninitialized] Slightly reordering the loop makes the warning disappear, after it becomes more obvious to the compiler that the loop is always entered on the first iteration. As pointed out by Ray Jui, there is a related problem in the way we deal with the loop running into the limit, as we just keep going there with an invalid counter data, so instead we now propagate a -ETIMEDOUT result to the caller. Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org> Acked-by: Ray Jui <ray.jui@broadcom.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: bcm-kernel-feedback-list@broadcom.com Link: http://lkml.kernel.org/r/1471429296-9053-2-git-send-email-daniel.lezcano@linaro.org Link: https://patchwork.kernel.org/patch/9174261/ Signed-off-by: Ingo Molnar <mingo@kernel.org> --- drivers/clocksource/bcm_kona_timer.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/clocksource/bcm_kona_timer.c b/drivers/clocksource/bcm_kona_timer.c index 7e3fd375a6278..92f6e4deee74a 100644 --- a/drivers/clocksource/bcm_kona_timer.c +++ b/drivers/clocksource/bcm_kona_timer.c @@ -66,10 +66,10 @@ static void kona_timer_disable_and_clear(void __iomem *base) } -static void +static int kona_timer_get_counter(void __iomem *timer_base, uint32_t *msw, uint32_t *lsw) { - int loop_limit = 4; + int loop_limit = 3; /* * Read 64-bit free running counter @@ -83,18 +83,19 @@ kona_timer_get_counter(void __iomem *timer_base, uint32_t *msw, uint32_t *lsw) * if new hi-word is equal to previously read hi-word then stop. */ - while (--loop_limit) { + do { *msw = readl(timer_base + KONA_GPTIMER_STCHI_OFFSET); *lsw = readl(timer_base + KONA_GPTIMER_STCLO_OFFSET); if (*msw == readl(timer_base + KONA_GPTIMER_STCHI_OFFSET)) break; - } + } while (--loop_limit); if (!loop_limit) { pr_err("bcm_kona_timer: getting counter failed.\n"); pr_err(" Timer will be impacted\n"); + return -ETIMEDOUT; } - return; + return 0; } static int kona_timer_set_next_event(unsigned long clc, @@ -112,8 +113,11 @@ static int kona_timer_set_next_event(unsigned long clc, uint32_t lsw, msw; uint32_t reg; + int ret; - kona_timer_get_counter(timers.tmr_regs, &msw, &lsw); + ret = kona_timer_get_counter(timers.tmr_regs, &msw, &lsw); + if (ret) + return ret; /* Load the "next" event tick value */ writel(lsw + clc, timers.tmr_regs + KONA_GPTIMER_STCM0_OFFSET); From be5769e2061ac40b32daa83e28e1c4aac7133511 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker <paul.gortmaker@windriver.com> Date: Wed, 17 Aug 2016 12:21:35 +0200 Subject: [PATCH 300/513] clocksource/drivers/mips-gic-timer: Make gic_clocksource_of_init() return int In commit: d8152bf85d2c0 ("clocksource/drivers/mips-gic-timer: Convert init function to return error") several return values were added to a void function resulting in the following warnings: clocksource/mips-gic-timer.c: In function 'gic_clocksource_of_init': clocksource/mips-gic-timer.c:175:3: warning: 'return' with a value, in function returning void [enabled by default] clocksource/mips-gic-timer.c:183:4: warning: 'return' with a value, in function returning void [enabled by default] clocksource/mips-gic-timer.c:190:3: warning: 'return' with a value, in function returning void [enabled by default] clocksource/mips-gic-timer.c:195:3: warning: 'return' with a value, in function returning void [enabled by default] clocksource/mips-gic-timer.c:200:3: warning: 'return' with a value, in function returning void [enabled by default] clocksource/mips-gic-timer.c:211:2: warning: 'return' with a value, in function returning void [enabled by default] clocksource/mips-gic-timer.c: At top level: clocksource/mips-gic-timer.c:213:1: warning: comparison of distinct pointer types lacks a cast [enabled by default] clocksource/mips-gic-timer.c: In function 'gic_clocksource_of_init': clocksource/mips-gic-timer.c:183:18: warning: ignoring return value of 'PTR_ERR', declared with attribute warn_unused_result [-Wunused-result] Given that the addition of the return values was intentional, it seems that the conversion of the containing function from void to int was simply overlooked. Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-mips@linux-mips.org Fixes: d8152bf85d2c ("clocksource/drivers/mips-gic-timer: Convert init function to return error") Link: http://lkml.kernel.org/r/1471429296-9053-3-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Ingo Molnar <mingo@kernel.org> --- drivers/clocksource/mips-gic-timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/mips-gic-timer.c b/drivers/clocksource/mips-gic-timer.c index d91e8725917c6..b4b3ab5a11ad0 100644 --- a/drivers/clocksource/mips-gic-timer.c +++ b/drivers/clocksource/mips-gic-timer.c @@ -164,7 +164,7 @@ void __init gic_clocksource_init(unsigned int frequency) gic_start_count(); } -static void __init gic_clocksource_of_init(struct device_node *node) +static int __init gic_clocksource_of_init(struct device_node *node) { struct clk *clk; int ret; From 9e7d9367e6a29e4cac85aa6df199f760dfa39c8a Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen <heinzm@redhat.com> Date: Wed, 17 Aug 2016 15:36:44 +0200 Subject: [PATCH 301/513] dm raid: support raid0 with missing metadata devices The raid0 MD personality does not start a raid0 array with any of its data devices missing. dm-raid was removing data/metadata device pairs unconditionally if it failed to read a superblock off the respective metadata device of such pair, resulting in failure to start arrays with the raid0 personality. Avoid removing any data/metadata device pairs in case of raid0 (e.g. lvm2 segment type 'raid0_meta') thus allowing MD to start the array. Also, avoid region size validation for raid0. Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com> --- drivers/md/dm-raid.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index b1c251872800e..8abde6b8cedc4 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -860,6 +860,9 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size) { unsigned long min_region_size = rs->ti->len / (1 << 21); + if (rs_is_raid0(rs)) + return 0; + if (!region_size) { /* * Choose a reasonable default. All figures in sectors. @@ -929,6 +932,8 @@ static int validate_raid_redundancy(struct raid_set *rs) rebuild_cnt++; switch (rs->raid_type->level) { + case 0: + break; case 1: if (rebuild_cnt >= rs->md.raid_disks) goto too_many; @@ -2334,6 +2339,13 @@ static int analyse_superblocks(struct dm_target *ti, struct raid_set *rs) case 0: break; default: + /* + * We have to keep any raid0 data/metadata device pairs or + * the MD raid0 personality will fail to start the array. + */ + if (rs_is_raid0(rs)) + continue; + dev = container_of(rdev, struct raid_dev, rdev); if (dev->meta_dev) dm_put_device(ti, dev->meta_dev); From bfe6c8a89e03f52bccf922f74ced8fe959e7fd36 Mon Sep 17 00:00:00 2001 From: Catalin Marinas <catalin.marinas@arm.com> Date: Mon, 15 Aug 2016 16:33:10 +0100 Subject: [PATCH 302/513] arm64: Fix NUMA build error when !CONFIG_ACPI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since asm/acpi.h is only included by linux/acpi.h when CONFIG_ACPI is enabled, disabling the latter leads to the following build error on arm64: arch/arm64/mm/numa.c: In function ‘arm64_numa_init’: arch/arm64/mm/numa.c:395:24: error: ‘arm64_acpi_numa_init’ undeclared (first use in this function) if (!acpi_disabled && !numa_init(arm64_acpi_numa_init)) This patch include the asm/acpi.h explicitly in arch/arm64/mm/numa.c for the arm64_acpi_numa_init() definition. Fixes: d8b47fca8c23 ("arm64, ACPI, NUMA: NUMA support based on SRAT and SLIT") Reviewed-by: Hanjun Guo <hanjun.guo@linaro.org> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> --- arch/arm64/mm/numa.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index c7fe3ec70774a..5bb15eab6f00e 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -23,6 +23,8 @@ #include <linux/module.h> #include <linux/of.h> +#include <asm/acpi.h> + struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; EXPORT_SYMBOL(node_data); nodemask_t numa_nodes_parsed __initdata; From bc9f3d7788a88d080a30599bde68f383daf8f8a5 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel <ard.biesheuvel@linaro.org> Date: Wed, 17 Aug 2016 17:54:41 +0200 Subject: [PATCH 303/513] arm64: kernel: avoid literal load of virtual address with MMU off Literal loads of virtual addresses are subject to runtime relocation when CONFIG_RELOCATABLE=y, and given that the relocation routines run with the MMU and caches enabled, literal loads of relocated values performed with the MMU off are not guaranteed to return the latest value unless the memory covering the literal is cleaned to the PoC explicitly. So defer the literal load until after the MMU has been enabled, just like we do for primary_switch() and secondary_switch() in head.S. Fixes: 1e48ef7fcc37 ("arm64: add support for building vmlinux as a relocatable PIE binary") Cc: <stable@vger.kernel.org> # 4.6+ Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Acked-by: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> --- arch/arm64/kernel/sleep.S | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 9a3aec97ac091..ccf79d849e0a8 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -101,12 +101,20 @@ ENTRY(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly /* enable the MMU early - so we can access sleep_save_stash by va */ adr_l lr, __enable_mmu /* __cpu_setup will return here */ - ldr x27, =_cpu_resume /* __enable_mmu will branch here */ + adr_l x27, _resume_switched /* __enable_mmu will branch here */ adrp x25, idmap_pg_dir adrp x26, swapper_pg_dir b __cpu_setup ENDPROC(cpu_resume) + .pushsection ".idmap.text", "ax" +_resume_switched: + ldr x8, =_cpu_resume + br x8 +ENDPROC(_resume_switched) + .ltorg + .popsection + ENTRY(_cpu_resume) mrs x1, mpidr_el1 adrp x8, mpidr_hash From 059e232089e45b0befc9933d31209c225e08b426 Mon Sep 17 00:00:00 2001 From: Marc Zyngier <marc.zyngier@arm.com> Date: Tue, 9 Aug 2016 07:50:44 +0100 Subject: [PATCH 304/513] irqchip/gic: Allow self-SGIs for SMP on UP configurations On systems where a single CPU is present, the GIC may not support having SGIs delivered to a target list. In that case, we use the self-SGI mechanism to allow the interrupt to be delivered locally. Tested-by: Fabio Estevam <fabio.estevam@nxp.com> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> --- drivers/irqchip/irq-gic.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index c2cab572c5111..390fac59c6bca 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -769,6 +769,13 @@ static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) int cpu; unsigned long flags, map = 0; + if (unlikely(nr_cpu_ids == 1)) { + /* Only one CPU? let's do a self-IPI... */ + writel_relaxed(2 << 24 | irq, + gic_data_dist_base(&gic_data[0]) + GIC_DIST_SOFTINT); + return; + } + raw_spin_lock_irqsave(&irq_controller_lock, flags); /* Convert our logical CPU mask into a physical one. */ From ccd9432a5c85f35df7b491a1b701560d247466a5 Mon Sep 17 00:00:00 2001 From: Sudeep Holla <sudeep.holla@arm.com> Date: Wed, 17 Aug 2016 13:49:19 +0100 Subject: [PATCH 305/513] irqchip/gicv3: Remove disabling redistributor and group1 non-secure interrupts As per the GICv3 specification, to power down a processor using GICv3 and allow automatic power-on if an interrupt must be sent to a processor, software must set Enable to zero for all interrupt groups(by writing to GICC_CTLR or ICC_IGRPEN{0,1}_EL1/3 as appropriate. When commit 3708d52fc6bb ("irqchip: gic-v3: Implement CPU PM notifier") was introduced there were no firmware implementations(in particular PSCI) handling this. Linux kernel may not be aware of the CPU power state details and might fail to identify the power states that require quiescing the CPU interface. Even if it can be aware of those details, it can't determine which CPU power state have been triggered at the platform level and how the power control is implemented. This patch make disabling redistributor and group1 non-secure interrupts in the power down path and re-enabling of redistributor in the power-up path conditional. It will be handled in the kernel if and only if the non-secure accesses are permitted to access and modify control registers. It is left to the platform implementation otherwise. Cc: Marc Zyngier <marc.zyngier@arm.com> Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Jason Cooper <jason@lakedaemon.net> Tested-by: Christopher Covington <cov@codeaurora.org> Signed-off-by: Sudeep Holla <sudeep.holla@arm.com> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> --- drivers/irqchip/irq-gic-v3.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 6fc56c3466b00..ede5672ab34d4 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -667,13 +667,20 @@ static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, #endif #ifdef CONFIG_CPU_PM +/* Check whether it's single security state view */ +static bool gic_dist_security_disabled(void) +{ + return readl_relaxed(gic_data.dist_base + GICD_CTLR) & GICD_CTLR_DS; +} + static int gic_cpu_pm_notifier(struct notifier_block *self, unsigned long cmd, void *v) { if (cmd == CPU_PM_EXIT) { - gic_enable_redist(true); + if (gic_dist_security_disabled()) + gic_enable_redist(true); gic_cpu_sys_reg_init(); - } else if (cmd == CPU_PM_ENTER) { + } else if (cmd == CPU_PM_ENTER && gic_dist_security_disabled()) { gic_write_grpen1(0); gic_enable_redist(false); } From 7a35583ec5b64f17559c9de8d7c47f7360e40362 Mon Sep 17 00:00:00 2001 From: Ido Schimmel <idosch@mellanox.com> Date: Wed, 17 Aug 2016 16:39:28 +0200 Subject: [PATCH 306/513] mlxsw: spectrum: Don't return upon error in removal path When removing a VLAN filter from the device we shouldn't return upon the first error we encounter, as otherwise we'll have resources that will never be freed nor used. Instead, we should keep trying to free as much resources as possible in a best effort mode. Remove the error message as well, since we already get these from the EMAD transaction code. Fixes: 99724c18fc66 ("mlxsw: spectrum: Introduce support for router interfaces") Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 27 ++++--------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index e1b8f62ccaedd..76b53ed9e8e71 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1010,7 +1010,6 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev, struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp_port *mlxsw_sp_vport; struct mlxsw_sp_fid *f; - int err; /* VLAN 0 is removed from HW filter when device goes down, but * it is reserved in our case, so simply return. @@ -1019,23 +1018,12 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev, return 0; mlxsw_sp_vport = mlxsw_sp_port_vport_find(mlxsw_sp_port, vid); - if (!mlxsw_sp_vport) { - netdev_warn(dev, "VID=%d does not exist\n", vid); + if (WARN_ON(!mlxsw_sp_vport)) return 0; - } - err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false); - if (err) { - netdev_err(dev, "Failed to set VLAN membership for VID=%d\n", - vid); - return err; - } + mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false); - err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true); - if (err) { - netdev_err(dev, "Failed to enable learning for VID=%d\n", vid); - return err; - } + mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true); /* Drop FID reference. If this was the last reference the * resources will be freed. @@ -1048,13 +1036,8 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev, * transition all active 802.1Q bridge VLANs to use VID to FID * mappings and set port's mode to VLAN mode. */ - if (list_is_singular(&mlxsw_sp_port->vports_list)) { - err = mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); - if (err) { - netdev_err(dev, "Failed to set to VLAN mode\n"); - return err; - } - } + if (list_is_singular(&mlxsw_sp_port->vports_list)) + mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); mlxsw_sp_port_vport_destroy(mlxsw_sp_vport); From fa66d7e3fea7504e241e9004998af2c71814da18 Mon Sep 17 00:00:00 2001 From: Ido Schimmel <idosch@mellanox.com> Date: Wed, 17 Aug 2016 16:39:29 +0200 Subject: [PATCH 307/513] mlxsw: spectrum: Remove redundant errors from the code Currently, when device configuration fails we emit errors to the kernel log despite the fact we already get these from the EMAD transaction layer, so remove them. In addition to being unnecessary, removing these error messages will allow us to reuse mlxsw_sp_port_add_vid() to create the PVID vPort before registering the netdevice. Fixes: 99724c18fc66 ("mlxsw: spectrum: Introduce support for router interfaces") Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 21 +++++-------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 76b53ed9e8e71..a9281afce0442 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -956,16 +956,12 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, if (!vid) return 0; - if (mlxsw_sp_port_vport_find(mlxsw_sp_port, vid)) { - netdev_warn(dev, "VID=%d already configured\n", vid); + if (mlxsw_sp_port_vport_find(mlxsw_sp_port, vid)) return 0; - } mlxsw_sp_vport = mlxsw_sp_port_vport_create(mlxsw_sp_port, vid); - if (!mlxsw_sp_vport) { - netdev_err(dev, "Failed to create vPort for VID=%d\n", vid); + if (!mlxsw_sp_vport) return -ENOMEM; - } /* When adding the first VLAN interface on a bridged port we need to * transition all the active 802.1Q bridge VLANs to use explicit @@ -973,24 +969,17 @@ int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, */ if (list_is_singular(&mlxsw_sp_port->vports_list)) { err = mlxsw_sp_port_vp_mode_trans(mlxsw_sp_port); - if (err) { - netdev_err(dev, "Failed to set to Virtual mode\n"); + if (err) goto err_port_vp_mode_trans; - } } err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false); - if (err) { - netdev_err(dev, "Failed to disable learning for VID=%d\n", vid); + if (err) goto err_port_vid_learning_set; - } err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, untagged); - if (err) { - netdev_err(dev, "Failed to set VLAN membership for VID=%d\n", - vid); + if (err) goto err_port_add_vid; - } return 0; From 05978481e77e47b0bcb1767d3783fa0e5a18f399 Mon Sep 17 00:00:00 2001 From: Ido Schimmel <idosch@mellanox.com> Date: Wed, 17 Aug 2016 16:39:30 +0200 Subject: [PATCH 308/513] mlxsw: spectrum: Create PVID vPort before registering netdevice After registering a netdevice it's possible for user space applications to configure an IP address on it. From the driver's perspective, this means a router interface (RIF) should be created for the PVID vPort. Therefore, we must create the PVID vPort before registering the netdevice. Fixes: 99724c18fc66 ("mlxsw: spectrum: Introduce support for router interfaces") Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- .../net/ethernet/mellanox/mlxsw/spectrum.c | 33 ++++++++++----- .../net/ethernet/mellanox/mlxsw/spectrum.h | 2 - .../mellanox/mlxsw/spectrum_switchdev.c | 40 +++---------------- 3 files changed, 29 insertions(+), 46 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index a9281afce0442..0677f3f8e274b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -942,8 +942,8 @@ static void mlxsw_sp_port_vport_destroy(struct mlxsw_sp_port *mlxsw_sp_vport) kfree(mlxsw_sp_vport); } -int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, - u16 vid) +static int mlxsw_sp_port_add_vid(struct net_device *dev, + __be16 __always_unused proto, u16 vid) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp_port *mlxsw_sp_vport; @@ -2048,6 +2048,18 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port) return 0; } +static int mlxsw_sp_port_pvid_vport_create(struct mlxsw_sp_port *mlxsw_sp_port) +{ + mlxsw_sp_port->pvid = 1; + + return mlxsw_sp_port_add_vid(mlxsw_sp_port->dev, 0, 1); +} + +static int mlxsw_sp_port_pvid_vport_destroy(struct mlxsw_sp_port *mlxsw_sp_port) +{ + return mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1); +} + static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, bool split, u8 module, u8 width, u8 lane) { @@ -2163,6 +2175,13 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_port_dcb_init; } + err = mlxsw_sp_port_pvid_vport_create(mlxsw_sp_port); + if (err) { + dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to create PVID vPort\n", + mlxsw_sp_port->local_port); + goto err_port_pvid_vport_create; + } + mlxsw_sp_port_switchdev_init(mlxsw_sp_port); err = register_netdev(dev); if (err) { @@ -2180,18 +2199,14 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_core_port_init; } - err = mlxsw_sp_port_vlan_init(mlxsw_sp_port); - if (err) - goto err_port_vlan_init; - mlxsw_sp->ports[local_port] = mlxsw_sp_port; return 0; -err_port_vlan_init: - mlxsw_core_port_fini(&mlxsw_sp_port->core_port); err_core_port_init: unregister_netdev(dev); err_register_netdev: + mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port); +err_port_pvid_vport_create: mlxsw_sp_port_dcb_fini(mlxsw_sp_port); err_port_dcb_init: err_port_ets_init: @@ -2221,8 +2236,8 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) mlxsw_sp->ports[local_port] = NULL; mlxsw_core_port_fini(&mlxsw_sp_port->core_port); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ + mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port); mlxsw_sp_port_dcb_fini(mlxsw_sp_port); - mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1); mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); mlxsw_sp_port_module_unmap(mlxsw_sp, mlxsw_sp_port->local_port); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index f69aa37d15218..ab3feb81bd432 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -536,8 +536,6 @@ int mlxsw_sp_port_vid_to_fid_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid); int mlxsw_sp_port_vlan_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end, bool is_member, bool untagged); -int mlxsw_sp_port_add_vid(struct net_device *dev, __be16 __always_unused proto, - u16 vid); int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid, bool set); void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index a1ad5e6bdfa8a..b5e864d2c554b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -997,13 +997,13 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev, } static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, - u16 vid_begin, u16 vid_end, bool init) + u16 vid_begin, u16 vid_end) { struct net_device *dev = mlxsw_sp_port->dev; u16 vid, pvid; int err; - if (!init && !mlxsw_sp_port->bridged) + if (!mlxsw_sp_port->bridged) return -EINVAL; err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, @@ -1014,9 +1014,6 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, return err; } - if (init) - goto out; - pvid = mlxsw_sp_port->pvid; if (pvid >= vid_begin && pvid <= vid_end) { err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0); @@ -1028,7 +1025,6 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_port_fid_leave(mlxsw_sp_port, vid_begin, vid_end); -out: /* Changing activity bits only if HW operation succeded */ for (vid = vid_begin; vid <= vid_end; vid++) clear_bit(vid, mlxsw_sp_port->active_vlans); @@ -1039,8 +1035,8 @@ static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, static int mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, const struct switchdev_obj_port_vlan *vlan) { - return __mlxsw_sp_port_vlans_del(mlxsw_sp_port, - vlan->vid_begin, vlan->vid_end, false); + return __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vlan->vid_begin, + vlan->vid_end); } void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port) @@ -1048,7 +1044,7 @@ void mlxsw_sp_port_active_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port) u16 vid; for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) - __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vid, vid, false); + __mlxsw_sp_port_vlans_del(mlxsw_sp_port, vid, vid); } static int @@ -1546,32 +1542,6 @@ void mlxsw_sp_switchdev_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_sp_fdb_fini(mlxsw_sp); } -int mlxsw_sp_port_vlan_init(struct mlxsw_sp_port *mlxsw_sp_port) -{ - struct net_device *dev = mlxsw_sp_port->dev; - int err; - - /* Allow only untagged packets to ingress and tag them internally - * with VID 1. - */ - mlxsw_sp_port->pvid = 1; - err = __mlxsw_sp_port_vlans_del(mlxsw_sp_port, 0, VLAN_N_VID - 1, - true); - if (err) { - netdev_err(dev, "Unable to init VLANs\n"); - return err; - } - - /* Add implicit VLAN interface in the device, so that untagged - * packets will be classified to the default vFID. - */ - err = mlxsw_sp_port_add_vid(dev, 0, 1); - if (err) - netdev_err(dev, "Failed to configure default vFID\n"); - - return err; -} - void mlxsw_sp_port_switchdev_init(struct mlxsw_sp_port *mlxsw_sp_port) { mlxsw_sp_port->dev->switchdev_ops = &mlxsw_sp_port_switchdev_ops; From 2f25844c233650b2abb92b66b3d0af7d73b5f88f Mon Sep 17 00:00:00 2001 From: Ido Schimmel <idosch@mellanox.com> Date: Wed, 17 Aug 2016 16:39:31 +0200 Subject: [PATCH 309/513] mlxsw: spectrum: Mark port as active before registering it Commit bbf2a4757b30 ("mlxsw: spectrum: Initialize ports at the end of init sequence") moved ports initialization to the end of the init sequence, which means ports are the first to be removed during fini. Since the FDB delayed work is still active when ports are removed it's possible for it to process FDB notifications of inactive ports, resulting in a warning message. Fix that by marking ports as inactive only after unregistering them. The NETDEV_UNREGISTER event will invoke bridge's driver port removal sequence that will cause the FDB (and FDB notifications) to be flushed. Fixes: bbf2a4757b30 ("mlxsw: spectrum: Initialize ports at the end of init sequence") Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 0677f3f8e274b..12681db79a688 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2183,6 +2183,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, } mlxsw_sp_port_switchdev_init(mlxsw_sp_port); + mlxsw_sp->ports[local_port] = mlxsw_sp_port; err = register_netdev(dev); if (err) { dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to register netdev\n", @@ -2199,12 +2200,12 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_core_port_init; } - mlxsw_sp->ports[local_port] = mlxsw_sp_port; return 0; err_core_port_init: unregister_netdev(dev); err_register_netdev: + mlxsw_sp->ports[local_port] = NULL; mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port); err_port_pvid_vport_create: mlxsw_sp_port_dcb_fini(mlxsw_sp_port); @@ -2233,9 +2234,9 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) if (!mlxsw_sp_port) return; - mlxsw_sp->ports[local_port] = NULL; mlxsw_core_port_fini(&mlxsw_sp_port->core_port); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ + mlxsw_sp->ports[local_port] = NULL; mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port); mlxsw_sp_port_dcb_fini(mlxsw_sp_port); mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); From c20b80187a93b4fcc1c5c46fc8a436df1f17636d Mon Sep 17 00:00:00 2001 From: Elad Raz <eladr@mellanox.com> Date: Wed, 17 Aug 2016 16:39:32 +0200 Subject: [PATCH 310/513] mlxsw: spectrum: Add missing packet traps Add the following traps: 1) MTU Error: Trap packets whose size is bigger than the egress RIF's MTU. If DF bit isn't set, traffic will continue to be routed in slow path. 2) TTL Error: Trap packets whose TTL expired. This allows traceroute to work properly. 3) OSPF packets. Fixes: 7b27ce7bb9cd ("mlxsw: spectrum: Add traps needed for router implementation") Signed-off-by: Elad Raz <eladr@mellanox.com> Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 15 +++++++++++++++ drivers/net/ethernet/mellanox/mlxsw/trap.h | 3 +++ 2 files changed, 18 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 12681db79a688..6b69c8ac7b881 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2648,6 +2648,21 @@ static const struct mlxsw_rx_listener mlxsw_sp_rx_listener[] = { .local_port = MLXSW_PORT_DONT_CARE, .trap_id = MLXSW_TRAP_ID_ARPUC, }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_MTUERROR, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_TTLERROR, + }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_OSPF, + }, { .func = mlxsw_sp_rx_listener_func, .local_port = MLXSW_PORT_DONT_CARE, diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 470d7696e9fed..9508e0a49e885 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -56,6 +56,9 @@ enum { MLXSW_TRAP_ID_IGMP_V3_REPORT = 0x34, MLXSW_TRAP_ID_ARPBC = 0x50, MLXSW_TRAP_ID_ARPUC = 0x51, + MLXSW_TRAP_ID_MTUERROR = 0x52, + MLXSW_TRAP_ID_TTLERROR = 0x53, + MLXSW_TRAP_ID_OSPF = 0x55, MLXSW_TRAP_ID_IP2ME = 0x5F, MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70, MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90, From a94a614fa2bd32848a67f8261228e193beb826ca Mon Sep 17 00:00:00 2001 From: Ido Schimmel <idosch@mellanox.com> Date: Wed, 17 Aug 2016 16:39:33 +0200 Subject: [PATCH 311/513] mlxsw: spectrum: Trap loop-backed packets One of the conditions to generate an ICMP Redirect Message is that "the packet is being forwarded out the same physical interface that it was received from" (RFC 1812). Therefore, we need to be able to trap such packets and let the kernel decide what to do with them. For each RIF, enable the loop-back filter, which will raise the LBERROR trap whenever the ingress RIF equals the egress RIF. Fixes: 99724c18fc66 ("mlxsw: spectrum: Introduce support for router interfaces") Reported-by: Ilan Tayari <ilant@mellanox.com> Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 10 ++++++++++ drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 5 +++++ drivers/net/ethernet/mellanox/mlxsw/trap.h | 1 + 3 files changed, 16 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 7ca9201f7dcbf..a1bd36cc9488a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -3383,6 +3383,15 @@ MLXSW_ITEM32(reg, ritr, ipv4_fe, 0x04, 29, 1); */ MLXSW_ITEM32(reg, ritr, ipv6_fe, 0x04, 28, 1); +/* reg_ritr_lb_en + * Loop-back filter enable for unicast packets. + * If the flag is set then loop-back filter for unicast packets is + * implemented on the RIF. Multicast packets are always subject to + * loop-back filtering. + * Access: RW + */ +MLXSW_ITEM32(reg, ritr, lb_en, 0x04, 24, 1); + /* reg_ritr_virtual_router * Virtual router ID associated with the router interface. * Access: RW @@ -3484,6 +3493,7 @@ static inline void mlxsw_reg_ritr_pack(char *payload, bool enable, mlxsw_reg_ritr_op_set(payload, op); mlxsw_reg_ritr_rif_set(payload, rif); mlxsw_reg_ritr_ipv4_fe_set(payload, 1); + mlxsw_reg_ritr_lb_en_set(payload, 1); mlxsw_reg_ritr_mtu_set(payload, mtu); mlxsw_reg_ritr_if_mac_memcpy_to(payload, mac); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 6b69c8ac7b881..8137daadb25b5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2658,6 +2658,11 @@ static const struct mlxsw_rx_listener mlxsw_sp_rx_listener[] = { .local_port = MLXSW_PORT_DONT_CARE, .trap_id = MLXSW_TRAP_ID_TTLERROR, }, + { + .func = mlxsw_sp_rx_listener_func, + .local_port = MLXSW_PORT_DONT_CARE, + .trap_id = MLXSW_TRAP_ID_LBERROR, + }, { .func = mlxsw_sp_rx_listener_func, .local_port = MLXSW_PORT_DONT_CARE, diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h index 9508e0a49e885..ed8e301864004 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/trap.h +++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h @@ -58,6 +58,7 @@ enum { MLXSW_TRAP_ID_ARPUC = 0x51, MLXSW_TRAP_ID_MTUERROR = 0x52, MLXSW_TRAP_ID_TTLERROR = 0x53, + MLXSW_TRAP_ID_LBERROR = 0x54, MLXSW_TRAP_ID_OSPF = 0x55, MLXSW_TRAP_ID_IP2ME = 0x5F, MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70, From 0e7df1a290abbcf3ecf697bbbbd4549c9a113db0 Mon Sep 17 00:00:00 2001 From: Jiri Pirko <jiri@mellanox.com> Date: Wed, 17 Aug 2016 16:39:34 +0200 Subject: [PATCH 312/513] mlxsw: reg: Fix missing op field fill-up Ralue pack function needs to set op, otherwise it is 0 for add always. Fixes: d5a1c749d22 ("mlxsw: reg: Add Router Algorithmic LPM Unicast Entry Register definition") Signed-off-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index a1bd36cc9488a..1721098eef131 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -4010,6 +4010,7 @@ static inline void mlxsw_reg_ralue_pack(char *payload, { MLXSW_REG_ZERO(ralue, payload); mlxsw_reg_ralue_protocol_set(payload, protocol); + mlxsw_reg_ralue_op_set(payload, op); mlxsw_reg_ralue_virtual_router_set(payload, virtual_router); mlxsw_reg_ralue_prefix_len_set(payload, prefix_len); mlxsw_reg_ralue_entry_type_set(payload, From 0583272d91f0f4e21f1eb666786286863185be7e Mon Sep 17 00:00:00 2001 From: Ido Schimmel <idosch@mellanox.com> Date: Wed, 17 Aug 2016 16:39:35 +0200 Subject: [PATCH 313/513] mlxsw: spectrum: Add missing rollbacks in error path While going over the code I noticed we are missing two rollbacks in the port's creation error path. Add them and adjust the place of one of them in the port's removal sequence so that both are symmetric. Fixes: 56ade8fe3fe1 ("mlxsw: spectrum: Add initial support for Spectrum ASIC") Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 8137daadb25b5..1fe9fbdc91021 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -2206,6 +2206,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, unregister_netdev(dev); err_register_netdev: mlxsw_sp->ports[local_port] = NULL; + mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port); err_port_pvid_vport_create: mlxsw_sp_port_dcb_fini(mlxsw_sp_port); @@ -2215,6 +2216,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, err_port_admin_status_set: err_port_mtu_set: err_port_speed_by_width_set: + mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); err_port_swid_set: err_port_system_port_mapping_set: err_dev_addr_init: @@ -2237,9 +2239,9 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) mlxsw_core_port_fini(&mlxsw_sp_port->core_port); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ mlxsw_sp->ports[local_port] = NULL; + mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); mlxsw_sp_port_pvid_vport_destroy(mlxsw_sp_port); mlxsw_sp_port_dcb_fini(mlxsw_sp_port); - mlxsw_sp_port_switchdev_fini(mlxsw_sp_port); mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); mlxsw_sp_port_module_unmap(mlxsw_sp, mlxsw_sp_port->local_port); free_percpu(mlxsw_sp_port->pcpu_stats); From 8168287b5dfac9227a549ed87f5e111b7005e8a4 Mon Sep 17 00:00:00 2001 From: Ido Schimmel <idosch@mellanox.com> Date: Wed, 17 Aug 2016 16:39:36 +0200 Subject: [PATCH 314/513] mlxsw: spectrum: Unmap 802.1Q FID before destroying it Before destroying the 802.1Q FID we should first remove the VID-to-FID mapping. This makes mlxsw_sp_fid_destroy() symmetric with regards to mlxsw_sp_fid_create(). Fixes: 14d39461b3f4 ("mlxsw: spectrum: Use per-FID struct for the VLAN-aware bridge") Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index b5e864d2c554b..d1b59cdfacc13 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -450,6 +450,8 @@ void mlxsw_sp_fid_destroy(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fid *f) kfree(f); + mlxsw_sp_fid_map(mlxsw_sp, fid, false); + mlxsw_sp_fid_op(mlxsw_sp, fid, false); } From 9ffcc3725f096e9f0d985f738b0e44214cd72d93 Mon Sep 17 00:00:00 2001 From: Ido Schimmel <idosch@mellanox.com> Date: Wed, 17 Aug 2016 16:39:37 +0200 Subject: [PATCH 315/513] mlxsw: spectrum: Allow packets to be trapped from any PG When packets enter the device they are classified to a priority group (PG) buffer based on their PCP value. After their egress port and traffic class are determined they are moved to the switch's shared buffer and await transmission, if: (Ingress{Port}.Usage < Thres && Ingress{Port,PG}.Usage < Thres && Egress{Port}.Usage < Thres && Egress{Port,TC}.Usage < Thres) || (Ingress{Port}.Usage < Min || Ingress{Port,PG} < Min || Egress{Port}.Usage < Min || Egress{Port,TC}.Usage < Min) Packets scheduled to transmission through CPU port (trapped to CPU) use traffic class 7, which has a zero maximum and minimum quotas. However, when such packets arrive from PG 0 they are admitted to the shared buffer as PG 0 has a non-zero minimum quota. Allow all packets to be trapped to the CPU - regardless of the PG they were classified to - by assigning a 10KB minimum quota for CPU port and TC7. Fixes: 8e8dfe9fdf06 ("mlxsw: spectrum: Add IEEE 802.1Qaz ETS support") Reported-by: Tamir Winetroub <tamirw@mellanox.com> Tested-by: Tamir Winetroub <tamirw@mellanox.com> Signed-off-by: Ido Schimmel <idosch@mellanox.com> Signed-off-by: Jiri Pirko <jiri@mellanox.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 074cdda7b6f33..237418a0e6e08 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -330,7 +330,7 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = { MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, - MLXSW_SP_CPU_PORT_SB_CM, + MLXSW_SP_SB_CM(MLXSW_SP_BYTES_TO_CELLS(10000), 0, 0), MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, MLXSW_SP_CPU_PORT_SB_CM, From f07fed82ad7994cc4d779ee79bdf7a46848c4b8f Mon Sep 17 00:00:00 2001 From: WANG Cong <xiyou.wangcong@gmail.com> Date: Sat, 13 Aug 2016 22:34:56 -0700 Subject: [PATCH 316/513] net_sched: remove the leftover cleanup_a() After refactoring tc_action into tcf_common, we no longer need to cleanup temporary "actions" in list, they are permanently stored in the hashtable. Fixes: a85a970af265 ("net_sched: move tc_action into tcf_common") Reported-by: Jamal Hadi Salim <jhs@mojatatu.com> Cc: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/sched/act_api.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index e4a5f2607ffa2..cce6986d5bc29 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -754,16 +754,6 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla, return ERR_PTR(err); } -static void cleanup_a(struct list_head *actions) -{ - struct tc_action *a, *tmp; - - list_for_each_entry_safe(a, tmp, actions, list) { - list_del(&a->list); - kfree(a); - } -} - static int tca_action_flush(struct net *net, struct nlattr *nla, struct nlmsghdr *n, u32 portid) { @@ -905,7 +895,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, return ret; } err: - cleanup_a(&actions); + tcf_action_destroy(&actions, 0); return ret; } @@ -942,15 +932,9 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, ret = tcf_action_init(net, nla, NULL, NULL, ovr, 0, &actions); if (ret) - goto done; + return ret; - /* dump then free all the actions after update; inserted policy - * stays intact - */ - ret = tcf_add_notify(net, n, &actions, portid); - cleanup_a(&actions); -done: - return ret; + return tcf_add_notify(net, n, &actions, portid); } static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n) From 824a7e8863b3eb283343f891b11a782b4ec0d0de Mon Sep 17 00:00:00 2001 From: WANG Cong <xiyou.wangcong@gmail.com> Date: Sat, 13 Aug 2016 22:34:57 -0700 Subject: [PATCH 317/513] net_sched: remove an unnecessary list_del() This list_del() for tc action is not needed actually, because we only use this list to chain bulk operations, therefore should not be carried for latter operations. Fixes: ec0595cc4495 ("net_sched: get rid of struct tcf_common") Cc: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/sched/act_api.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index cce6986d5bc29..b4c7be38b6320 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -64,7 +64,6 @@ int __tcf_hash_release(struct tc_action *p, bool bind, bool strict) if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) { if (p->ops->cleanup) p->ops->cleanup(p, bind); - list_del(&p->list); tcf_hash_destroy(p->hinfo, p); ret = ACT_P_DELETED; } From 0c23c3e705691cfb99c94f2760df2b456fe45194 Mon Sep 17 00:00:00 2001 From: WANG Cong <xiyou.wangcong@gmail.com> Date: Sat, 13 Aug 2016 22:34:58 -0700 Subject: [PATCH 318/513] net_sched: fix a typo in tc_for_each_action() It is harmless because all users pass 'a' to this macro. Fixes: 00175aec941e ("net/sched: Macro instead of CONFIG_NET_CLS_ACT ifdef") Cc: Amir Vadai <amir@vadai.me> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- include/net/act_api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index 41e6a24a44b9b..f53ee9d315c15 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -193,7 +193,7 @@ int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int); (list_empty(&(_exts)->actions)) #define tc_for_each_action(_a, _exts) \ - list_for_each_entry(a, &(_exts)->actions, list) + list_for_each_entry(_a, &(_exts)->actions, list) #define tc_single_action(_exts) \ (list_is_singular(&(_exts)->actions)) From 2734437ef3c2943090d0914bf91caa6b30451615 Mon Sep 17 00:00:00 2001 From: WANG Cong <xiyou.wangcong@gmail.com> Date: Sat, 13 Aug 2016 22:34:59 -0700 Subject: [PATCH 319/513] net_sched: move tc offload macros to pkt_cls.h struct tcf_exts belongs to filters, should not be visible to plain tc actions. Cc: Ido Schimmel <idosch@mellanox.com> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- include/net/act_api.h | 19 +++---------------- include/net/pkt_cls.h | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index f53ee9d315c15..870332ff61eb0 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -189,30 +189,17 @@ int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int); int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int); int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int); -#define tc_no_actions(_exts) \ - (list_empty(&(_exts)->actions)) - -#define tc_for_each_action(_a, _exts) \ - list_for_each_entry(_a, &(_exts)->actions, list) - -#define tc_single_action(_exts) \ - (list_is_singular(&(_exts)->actions)) +#endif /* CONFIG_NET_CLS_ACT */ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes, u64 packets, u64 lastuse) { +#ifdef CONFIG_NET_CLS_ACT if (!a->ops->stats_update) return; a->ops->stats_update(a, bytes, packets, lastuse); +#endif } -#else /* CONFIG_NET_CLS_ACT */ - -#define tc_no_actions(_exts) true -#define tc_for_each_action(_a, _exts) while ((void)(_a), 0) -#define tc_single_action(_exts) false -#define tcf_action_stats_update(a, bytes, packets, lastuse) - -#endif /* CONFIG_NET_CLS_ACT */ #endif diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 6f8d65342d3ad..00dd5c4c1d0a7 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -130,6 +130,25 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, return 0; } +#ifdef CONFIG_NET_CLS_ACT + +#define tc_no_actions(_exts) \ + (list_empty(&(_exts)->actions)) + +#define tc_for_each_action(_a, _exts) \ + list_for_each_entry(_a, &(_exts)->actions, list) + +#define tc_single_action(_exts) \ + (list_is_singular(&(_exts)->actions)) + +#else /* CONFIG_NET_CLS_ACT */ + +#define tc_no_actions(_exts) true +#define tc_for_each_action(_a, _exts) while ((void)(_a), 0) +#define tc_single_action(_exts) false + +#endif /* CONFIG_NET_CLS_ACT */ + int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr); From 22dc13c837c33207548c8ee5116b64e2930a6e23 Mon Sep 17 00:00:00 2001 From: WANG Cong <xiyou.wangcong@gmail.com> Date: Sat, 13 Aug 2016 22:35:00 -0700 Subject: [PATCH 320/513] net_sched: convert tcf_exts from list to pointer array As pointed out by Jamal, an action could be shared by multiple filters, so we can't use list to chain them any more after we get rid of the original tc_action. Instead, we could just save pointers to these actions in tcf_exts, since they are refcount'ed, so convert the list to an array of pointers. The "ugly" part is the action API still accepts list as a parameter, I just introduce a helper function to convert the array of pointers to a list, instead of relying on the C99 feature to iterate the array. Fixes: a85a970af265 ("net_sched: move tc_action into tcf_common") Reported-by: Jamal Hadi Salim <jhs@mojatatu.com> Cc: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 +- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 12 +++-- .../net/ethernet/mellanox/mlxsw/spectrum.c | 4 +- include/net/act_api.h | 4 +- include/net/pkt_cls.h | 40 ++++++++++----- net/sched/act_api.c | 11 ++-- net/sched/cls_api.c | 51 +++++++++++++------ 7 files changed, 85 insertions(+), 41 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index ee57a89252bbc..b4f03748adc02 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -8396,12 +8396,14 @@ static int parse_tc_actions(struct ixgbe_adapter *adapter, struct tcf_exts *exts, u64 *action, u8 *queue) { const struct tc_action *a; + LIST_HEAD(actions); int err; if (tc_no_actions(exts)) return -EINVAL; - tc_for_each_action(a, exts) { + tcf_exts_to_list(exts, &actions); + list_for_each_entry(a, &actions, list) { /* Drop action */ if (is_tcf_gact_shot(a)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 0f19b01e3fffa..dc8b1cb0fdc85 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -318,6 +318,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, u32 *action, u32 *flow_tag) { const struct tc_action *a; + LIST_HEAD(actions); if (tc_no_actions(exts)) return -EINVAL; @@ -325,7 +326,8 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, *flow_tag = MLX5_FS_DEFAULT_FLOW_TAG; *action = 0; - tc_for_each_action(a, exts) { + tcf_exts_to_list(exts, &actions); + list_for_each_entry(a, &actions, list) { /* Only support a single action per rule */ if (*action) return -EINVAL; @@ -362,13 +364,15 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, u32 *action, u32 *dest_vport) { const struct tc_action *a; + LIST_HEAD(actions); if (tc_no_actions(exts)) return -EINVAL; *action = 0; - tc_for_each_action(a, exts) { + tcf_exts_to_list(exts, &actions); + list_for_each_entry(a, &actions, list) { /* Only support a single action per rule */ if (*action) return -EINVAL; @@ -503,6 +507,7 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow; struct tc_action *a; struct mlx5_fc *counter; + LIST_HEAD(actions); u64 bytes; u64 packets; u64 lastuse; @@ -518,7 +523,8 @@ int mlx5e_stats_flower(struct mlx5e_priv *priv, mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse); - tc_for_each_action(a, f->exts) + tcf_exts_to_list(f->exts, &actions); + list_for_each_entry(a, &actions, list) tcf_action_stats_update(a, bytes, packets, lastuse); return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 1fe9fbdc91021..1f81689068110 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1121,6 +1121,7 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, bool ingress) { const struct tc_action *a; + LIST_HEAD(actions); int err; if (!tc_single_action(cls->exts)) { @@ -1128,7 +1129,8 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port, return -ENOTSUPP; } - tc_for_each_action(a, cls->exts) { + tcf_exts_to_list(cls->exts, &actions); + list_for_each_entry(a, &actions, list) { if (!is_tcf_mirred_mirror(a) || protocol != htons(ETH_P_ALL)) return -ENOTSUPP; diff --git a/include/net/act_api.h b/include/net/act_api.h index 870332ff61eb0..82f3c912a5b17 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -176,8 +176,8 @@ int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops); int tcf_unregister_action(struct tc_action_ops *a, struct pernet_operations *ops); int tcf_action_destroy(struct list_head *actions, int bind); -int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions, - struct tcf_result *res); +int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, + int nr_actions, struct tcf_result *res); int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est, char *n, int ovr, int bind, struct list_head *); diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 00dd5c4c1d0a7..c99508d426ccf 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -59,7 +59,8 @@ tcf_unbind_filter(struct tcf_proto *tp, struct tcf_result *r) struct tcf_exts { #ifdef CONFIG_NET_CLS_ACT __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ - struct list_head actions; + int nr_actions; + struct tc_action **actions; #endif /* Map to export classifier specific extension TLV types to the * generic extensions API. Unsupported extensions must be set to 0. @@ -72,7 +73,10 @@ static inline void tcf_exts_init(struct tcf_exts *exts, int action, int police) { #ifdef CONFIG_NET_CLS_ACT exts->type = 0; - INIT_LIST_HEAD(&exts->actions); + exts->nr_actions = 0; + exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *), + GFP_KERNEL); + WARN_ON(!exts->actions); /* TODO: propagate the error to callers */ #endif exts->action = action; exts->police = police; @@ -89,7 +93,7 @@ static inline int tcf_exts_is_predicative(struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT - return !list_empty(&exts->actions); + return exts->nr_actions; #else return 0; #endif @@ -108,6 +112,20 @@ tcf_exts_is_available(struct tcf_exts *exts) return tcf_exts_is_predicative(exts); } +static inline void tcf_exts_to_list(const struct tcf_exts *exts, + struct list_head *actions) +{ +#ifdef CONFIG_NET_CLS_ACT + int i; + + for (i = 0; i < exts->nr_actions; i++) { + struct tc_action *a = exts->actions[i]; + + list_add(&a->list, actions); + } +#endif +} + /** * tcf_exts_exec - execute tc filter extensions * @skb: socket buffer @@ -124,27 +142,21 @@ tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts, struct tcf_result *res) { #ifdef CONFIG_NET_CLS_ACT - if (!list_empty(&exts->actions)) - return tcf_action_exec(skb, &exts->actions, res); + if (exts->nr_actions) + return tcf_action_exec(skb, exts->actions, exts->nr_actions, + res); #endif return 0; } #ifdef CONFIG_NET_CLS_ACT -#define tc_no_actions(_exts) \ - (list_empty(&(_exts)->actions)) - -#define tc_for_each_action(_a, _exts) \ - list_for_each_entry(_a, &(_exts)->actions, list) - -#define tc_single_action(_exts) \ - (list_is_singular(&(_exts)->actions)) +#define tc_no_actions(_exts) ((_exts)->nr_actions == 0) +#define tc_single_action(_exts) ((_exts)->nr_actions == 1) #else /* CONFIG_NET_CLS_ACT */ #define tc_no_actions(_exts) true -#define tc_for_each_action(_a, _exts) while ((void)(_a), 0) #define tc_single_action(_exts) false #endif /* CONFIG_NET_CLS_ACT */ diff --git a/net/sched/act_api.c b/net/sched/act_api.c index b4c7be38b6320..d09d0687594b0 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -420,18 +420,19 @@ static struct tc_action_ops *tc_lookup_action(struct nlattr *kind) return res; } -int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions, - struct tcf_result *res) +int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, + int nr_actions, struct tcf_result *res) { - const struct tc_action *a; - int ret = -1; + int ret = -1, i; if (skb->tc_verd & TC_NCLS) { skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); ret = TC_ACT_OK; goto exec_done; } - list_for_each_entry(a, actions, list) { + for (i = 0; i < nr_actions; i++) { + const struct tc_action *a = actions[i]; + repeat: ret = a->ops->act(skb, a, res); if (ret == TC_ACT_REPEAT) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 843a716a4303e..a7c5645373afb 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -541,8 +541,12 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) void tcf_exts_destroy(struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT - tcf_action_destroy(&exts->actions, TCA_ACT_UNBIND); - INIT_LIST_HEAD(&exts->actions); + LIST_HEAD(actions); + + tcf_exts_to_list(exts, &actions); + tcf_action_destroy(&actions, TCA_ACT_UNBIND); + kfree(exts->actions); + exts->nr_actions = 0; #endif } EXPORT_SYMBOL(tcf_exts_destroy); @@ -554,7 +558,6 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, { struct tc_action *act; - INIT_LIST_HEAD(&exts->actions); if (exts->police && tb[exts->police]) { act = tcf_action_init_1(net, tb[exts->police], rate_tlv, "police", ovr, @@ -563,14 +566,20 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, return PTR_ERR(act); act->type = exts->type = TCA_OLD_COMPAT; - list_add(&act->list, &exts->actions); + exts->actions[0] = act; + exts->nr_actions = 1; } else if (exts->action && tb[exts->action]) { - int err; + LIST_HEAD(actions); + int err, i = 0; + err = tcf_action_init(net, tb[exts->action], rate_tlv, NULL, ovr, - TCA_ACT_BIND, &exts->actions); + TCA_ACT_BIND, &actions); if (err) return err; + list_for_each_entry(act, &actions, list) + exts->actions[i++] = act; + exts->nr_actions = i; } } #else @@ -587,37 +596,49 @@ void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, struct tcf_exts *src) { #ifdef CONFIG_NET_CLS_ACT - LIST_HEAD(tmp); + struct tcf_exts old = *dst; + tcf_tree_lock(tp); - list_splice_init(&dst->actions, &tmp); - list_splice(&src->actions, &dst->actions); + dst->nr_actions = src->nr_actions; + dst->actions = src->actions; dst->type = src->type; tcf_tree_unlock(tp); - tcf_action_destroy(&tmp, TCA_ACT_UNBIND); + + tcf_exts_destroy(&old); #endif } EXPORT_SYMBOL(tcf_exts_change); -#define tcf_exts_first_act(ext) \ - list_first_entry_or_null(&(exts)->actions, \ - struct tc_action, list) +#ifdef CONFIG_NET_CLS_ACT +static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts) +{ + if (exts->nr_actions == 0) + return NULL; + else + return exts->actions[0]; +} +#endif int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT struct nlattr *nest; - if (exts->action && !list_empty(&exts->actions)) { + if (exts->action && exts->nr_actions) { /* * again for backward compatible mode - we want * to work with both old and new modes of entering * tc data even if iproute2 was newer - jhs */ if (exts->type != TCA_OLD_COMPAT) { + LIST_HEAD(actions); + nest = nla_nest_start(skb, exts->action); if (nest == NULL) goto nla_put_failure; - if (tcf_action_dump(skb, &exts->actions, 0, 0) < 0) + + tcf_exts_to_list(exts, &actions); + if (tcf_action_dump(skb, &actions, 0, 0) < 0) goto nla_put_failure; nla_nest_end(skb, nest); } else if (exts->police) { From 0852e455238f8550fa92b1e40355eb2c6805787e Mon Sep 17 00:00:00 2001 From: WANG Cong <xiyou.wangcong@gmail.com> Date: Sat, 13 Aug 2016 22:35:01 -0700 Subject: [PATCH 321/513] net_sched: unify the init logic for act_police Jamal reported a crash when we create a police action with a specific index, this is because the init logic is not correct, we should always create one for this case. Just unify the logic with other tc actions. Fixes: a03e6fe56971 ("act_police: fix a crash during removal") Reported-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/sched/act_police.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index b3c7e975fc9e2..259352d978df6 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -125,6 +125,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla, struct tcf_police *police; struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; struct tc_action_net *tn = net_generic(net, police_net_id); + bool exists = false; int size; if (nla == NULL) @@ -139,24 +140,24 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla, size = nla_len(tb[TCA_POLICE_TBF]); if (size != sizeof(*parm) && size != sizeof(struct tc_police_compat)) return -EINVAL; + parm = nla_data(tb[TCA_POLICE_TBF]); + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; - if (parm->index) { - if (tcf_hash_check(tn, parm->index, a, bind)) { - if (ovr) - goto override; - /* not replacing */ - return -EEXIST; - } - } else { + if (!exists) { ret = tcf_hash_create(tn, parm->index, NULL, a, &act_police_ops, bind, false); if (ret) return ret; ret = ACT_P_CREATED; + } else { + tcf_hash_release(*a, bind); + if (!ovr) + return -EEXIST; } -override: police = to_police(*a); if (parm->rate.rate) { err = -ENOMEM; From b5ac851885accffe0485aea2805df8f2d49c95a8 Mon Sep 17 00:00:00 2001 From: Roman Mashak <mrv@mojatatu.com> Date: Sat, 13 Aug 2016 22:35:02 -0700 Subject: [PATCH 322/513] net_sched: allow flushing tc police actions The act_police uses its own code to walk the action hashtable, which leads to that we could not flush standalone tc police actions, so just switch to tcf_generic_walker() like other actions. (Joint work from Roman and Cong.) Signed-off-by: Roman Mashak <mrv@mojatatu.com> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com> Acked-by: Jamal Hadi Salim <jhs@mojatatu.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- net/sched/act_police.c | 43 +----------------------------------------- 1 file changed, 1 insertion(+), 42 deletions(-) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 259352d978df6..8a3be1d99775b 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -63,49 +63,8 @@ static int tcf_act_police_walker(struct net *net, struct sk_buff *skb, const struct tc_action_ops *ops) { struct tc_action_net *tn = net_generic(net, police_net_id); - struct tcf_hashinfo *hinfo = tn->hinfo; - int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; - struct nlattr *nest; - - spin_lock_bh(&hinfo->lock); - - s_i = cb->args[0]; - - for (i = 0; i < (POL_TAB_MASK + 1); i++) { - struct hlist_head *head; - struct tc_action *p; - - head = &hinfo->htab[tcf_hash(i, POL_TAB_MASK)]; - - hlist_for_each_entry_rcu(p, head, tcfa_head) { - index++; - if (index < s_i) - continue; - nest = nla_nest_start(skb, index); - if (nest == NULL) - goto nla_put_failure; - if (type == RTM_DELACTION) - err = tcf_action_dump_1(skb, p, 0, 1); - else - err = tcf_action_dump_1(skb, p, 0, 0); - if (err < 0) { - index--; - nla_nest_cancel(skb, nest); - goto done; - } - nla_nest_end(skb, nest); - n_i++; - } - } -done: - spin_unlock_bh(&hinfo->lock); - if (n_i) - cb->args[0] += n_i; - return n_i; -nla_put_failure: - nla_nest_cancel(skb, nest); - goto done; + return tcf_generic_walker(tn, skb, cb, type, ops); } static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { From 6c4687cc17a788a6dd8de3e27dbeabb7cbd3e066 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov <oleg@redhat.com> Date: Wed, 17 Aug 2016 17:36:29 +0200 Subject: [PATCH 323/513] uprobes: Fix the memcg accounting __replace_page() wronlgy calls mem_cgroup_cancel_charge() in "success" path, it should only do this if page_check_address() fails. This means that every enable/disable leads to unbalanced mem_cgroup_uncharge() from put_page(old_page), it is trivial to underflow the page_counter->count and trigger OOM. Reported-and-tested-by: Brenden Blanco <bblanco@plumgrid.com> Signed-off-by: Oleg Nesterov <oleg@redhat.com> Reviewed-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Michal Hocko <mhocko@kernel.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vladimir Davydov <vdavydov@virtuozzo.com> Cc: stable@vger.kernel.org # 3.17+ Fixes: 00501b531c47 ("mm: memcontrol: rewrite charge API") Link: http://lkml.kernel.org/r/20160817153629.GB29724@redhat.com Signed-off-by: Ingo Molnar <mingo@kernel.org> --- kernel/events/uprobes.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index b7a525ab20837..8c50276b60d1c 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -172,8 +172,10 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); err = -EAGAIN; ptep = page_check_address(page, mm, addr, &ptl, 0); - if (!ptep) + if (!ptep) { + mem_cgroup_cancel_charge(kpage, memcg, false); goto unlock; + } get_page(kpage); page_add_new_anon_rmap(kpage, vma, addr, false); @@ -200,7 +202,6 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, err = 0; unlock: - mem_cgroup_cancel_charge(kpage, memcg, false); mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); unlock_page(page); return err; From 88b2f634028f1f38dcc3d412e10ff1f224976daa Mon Sep 17 00:00:00 2001 From: Borislav Petkov <bp@alien8.de> Date: Wed, 17 Aug 2016 13:33:14 +0200 Subject: [PATCH 324/513] x86/microcode/AMD: Fix initrd loading with CONFIG_RANDOMIZE_MEMORY=y Similar to: efaad554b4ff ("x86/microcode/intel: Fix initrd loading with CONFIG_RANDOMIZE_MEMORY=y") ... fix microcode loading from the initrd on AMD by adding the randomization offset to the microcode patch container within the initrd. Reported-and-tested-by: Brian Gerst <brgerst@gmail.com> Signed-off-by: Borislav Petkov <bp@suse.de> Cc: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Kees Cook <keescook@chromium.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-tip-commits@vger.kernel.org Link: http://lkml.kernel.org/r/20160817113314.GA19221@nazgul.tnic Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/kernel/cpu/microcode/amd.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 27a0228c9cae0..b816971f5da4c 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -355,6 +355,7 @@ void load_ucode_amd_ap(void) unsigned int cpu = smp_processor_id(); struct equiv_cpu_entry *eq; struct microcode_amd *mc; + u8 *cont = container; u32 rev, eax; u16 eq_id; @@ -371,8 +372,11 @@ void load_ucode_amd_ap(void) if (check_current_patch_level(&rev, false)) return; + /* Add CONFIG_RANDOMIZE_MEMORY offset. */ + cont += PAGE_OFFSET - __PAGE_OFFSET_BASE; + eax = cpuid_eax(0x00000001); - eq = (struct equiv_cpu_entry *)(container + CONTAINER_HDR_SZ); + eq = (struct equiv_cpu_entry *)(cont + CONTAINER_HDR_SZ); eq_id = find_equiv_id(eq, eax); if (!eq_id) @@ -434,6 +438,9 @@ int __init save_microcode_in_initrd_amd(void) else container = cont_va; + /* Add CONFIG_RANDOMIZE_MEMORY offset. */ + container += PAGE_OFFSET - __PAGE_OFFSET_BASE; + eax = cpuid_eax(0x00000001); eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); From 7b0501b1e7cddd32b265178e32d332bdfbb532d4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa <jolsa@redhat.com> Date: Mon, 15 Aug 2016 12:17:00 +0200 Subject: [PATCH 325/513] x86/smp: Fix __max_logical_packages value setup Frank reported kernel panic when he disabled several cores in BIOS via following option: Core Disable Bitmap(Hex) [0] with number 0xFFE, which leaves 16 CPUs in system (out of 48). The kernel panic below goes along with following messages: smpboot: Max logical packages: 2^M smpboot: APIC(0) Converting physical 0 to logical package 0^M smpboot: APIC(20) Converting physical 1 to logical package 1^M smpboot: APIC(40) Package 2 exceeds logical package map^M smpboot: CPU 8 APICId 40 disabled^M smpboot: APIC(60) Package 3 exceeds logical package map^M smpboot: CPU 12 APICId 60 disabled^M ... general protection fault: 0000 [#1] SMP^M Modules linked in:^M CPU: 15 PID: 1 Comm: swapper/0 Not tainted 4.7.0-rc5+ #1^M Hardware name: SGI UV300/UV300, BIOS SGI UV 300 series BIOS 05/25/2016^M task: ffff8801673e0000 ti: ffff8801673ac000 task.ti: ffff8801673ac000^M RIP: 0010:[<ffffffff81014d54>] [<ffffffff81014d54>] uncore_change_context+0xd4/0x180^M ... [<ffffffff810158ac>] uncore_event_init_cpu+0x6c/0x70^M [<ffffffff81d8c91c>] intel_uncore_init+0x1c2/0x2dd^M [<ffffffff81d8c75a>] ? uncore_cpu_setup+0x17/0x17^M [<ffffffff81002190>] do_one_initcall+0x50/0x190^M [<ffffffff810ab193>] ? parse_args+0x293/0x480^M [<ffffffff81d87365>] kernel_init_freeable+0x1a5/0x249^M [<ffffffff81d86a35>] ? set_debug_rodata+0x12/0x12^M [<ffffffff816dc19e>] kernel_init+0xe/0x110^M [<ffffffff816e93bf>] ret_from_fork+0x1f/0x40^M [<ffffffff816dc190>] ? rest_init+0x80/0x80^M The reason for the panic is wrong value of __max_logical_packages, which lets logical_package_map uninitialized and the uncore code relying on this map being properly initialized (maybe we should add some safety checks there as well). The __max_logical_packages is computed as: DIV_ROUND_UP(total_cpus, ncpus); - ncpus being number of cores With above BIOS setup we get total_cpus == 16 which set __max_logical_packages to 2 (ncpus is 12). Once topology_update_package_map processes CPU with logical pkg over 2 we display above messages and fail to initialize the physical_to_logical_pkg map, which makes the uncore code crash. The fix is to remove logical_package_map bitmap completely and keep and update the logical_packages number instead. After we enumerate all the present CPUs, we check if the enumerated logical packages count is within its computed maximum from BIOS data. If it's not the case, we set this maximum to the new enumerated value and freeze any new addition of logical packages. The freeze is because lot of init code like uncore/rapl/cqm depends on having maximum logical package value set to allocate their data, so we can't change it later on. Prarit Bhargava tested the patch and confirms that it solves the problem: From dmidecode: Core Count: 24 Core Enabled: 24 Thread Count: 48 Orig kernel boot log: [ 0.464981] smpboot: Max logical packages: 19 [ 0.469861] smpboot: APIC(0) Converting physical 0 to logical package 0 [ 0.477261] smpboot: APIC(40) Converting physical 1 to logical package 1 [ 0.484760] smpboot: APIC(80) Converting physical 2 to logical package 2 [ 0.492258] smpboot: APIC(c0) Converting physical 3 to logical package 3 1. nr_cpus=8, should stop enumerating in package 0: [ 0.533664] smpboot: APIC(0) Converting physical 0 to logical package 0 [ 0.539596] smpboot: Max logical packages: 19 2. max_cpus=8, should still enumerate all packages: [ 0.526494] smpboot: APIC(0) Converting physical 0 to logical package 0 [ 0.532428] smpboot: APIC(40) Converting physical 1 to logical package 1 [ 0.538456] smpboot: APIC(80) Converting physical 2 to logical package 2 [ 0.544486] smpboot: APIC(c0) Converting physical 3 to logical package 3 [ 0.550524] smpboot: Max logical packages: 19 3. nr_cpus=49 ( 2 socket + 1 core on 3rd socket), should stop enumerating in package 2: [ 0.521378] smpboot: APIC(0) Converting physical 0 to logical package 0 [ 0.527314] smpboot: APIC(40) Converting physical 1 to logical package 1 [ 0.533345] smpboot: APIC(80) Converting physical 2 to logical package 2 [ 0.539368] smpboot: Max logical packages: 19 4. maxcpus=49, should still enumerate all packages: [ 0.525591] smpboot: APIC(0) Converting physical 0 to logical package 0 [ 0.531525] smpboot: APIC(40) Converting physical 1 to logical package 1 [ 0.537547] smpboot: APIC(80) Converting physical 2 to logical package 2 [ 0.543579] smpboot: APIC(c0) Converting physical 3 to logical package 3 [ 0.549624] smpboot: Max logical packages: 19 5. kdump (nr_cpus=1) works as well. Reported-by: Frank Ramsay <framsay@redhat.com> Tested-by: Prarit Bhargava <prarit@redhat.com> Signed-off-by: Jiri Olsa <jolsa@kernel.org> Reviewed-by: Prarit Bhargava <prarit@redhat.com> Acked-by: Peter Zijlstra <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/20160815101700.GA30090@krava Signed-off-by: Ingo Molnar <mingo@kernel.org> --- arch/x86/kernel/smpboot.c | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2a6e84a30a546..4296beb8fdd3b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -100,10 +100,11 @@ EXPORT_PER_CPU_SYMBOL(cpu_info); /* Logical package management. We might want to allocate that dynamically */ static int *physical_to_logical_pkg __read_mostly; static unsigned long *physical_package_map __read_mostly;; -static unsigned long *logical_package_map __read_mostly; static unsigned int max_physical_pkg_id __read_mostly; unsigned int __max_logical_packages __read_mostly; EXPORT_SYMBOL(__max_logical_packages); +static unsigned int logical_packages __read_mostly; +static bool logical_packages_frozen __read_mostly; /* Maximum number of SMT threads on any online core */ int __max_smt_threads __read_mostly; @@ -277,14 +278,14 @@ int topology_update_package_map(unsigned int apicid, unsigned int cpu) if (test_and_set_bit(pkg, physical_package_map)) goto found; - new = find_first_zero_bit(logical_package_map, __max_logical_packages); - if (new >= __max_logical_packages) { + if (logical_packages_frozen) { physical_to_logical_pkg[pkg] = -1; - pr_warn("APIC(%x) Package %u exceeds logical package map\n", + pr_warn("APIC(%x) Package %u exceeds logical package max\n", apicid, pkg); return -ENOSPC; } - set_bit(new, logical_package_map); + + new = logical_packages++; pr_info("APIC(%x) Converting physical %u to logical package %u\n", apicid, pkg, new); physical_to_logical_pkg[pkg] = new; @@ -341,6 +342,7 @@ static void __init smp_init_package_map(void) } __max_logical_packages = DIV_ROUND_UP(total_cpus, ncpus); + logical_packages = 0; /* * Possibly larger than what we need as the number of apic ids per @@ -352,10 +354,6 @@ static void __init smp_init_package_map(void) memset(physical_to_logical_pkg, 0xff, size); size = BITS_TO_LONGS(max_physical_pkg_id) * sizeof(unsigned long); physical_package_map = kzalloc(size, GFP_KERNEL); - size = BITS_TO_LONGS(__max_logical_packages) * sizeof(unsigned long); - logical_package_map = kzalloc(size, GFP_KERNEL); - - pr_info("Max logical packages: %u\n", __max_logical_packages); for_each_present_cpu(cpu) { unsigned int apicid = apic->cpu_present_to_apicid(cpu); @@ -369,6 +367,15 @@ static void __init smp_init_package_map(void) set_cpu_possible(cpu, false); set_cpu_present(cpu, false); } + + if (logical_packages > __max_logical_packages) { + pr_warn("Detected more packages (%u), then computed by BIOS data (%u).\n", + logical_packages, __max_logical_packages); + logical_packages_frozen = true; + __max_logical_packages = logical_packages; + } + + pr_info("Max logical packages: %u\n", __max_logical_packages); } void __init smp_store_boot_cpu_info(void) From cca2094605efe6ccf43ff2876dd5bccc799202d8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra <peterz@infradead.org> Date: Tue, 16 Aug 2016 13:33:26 +0200 Subject: [PATCH 326/513] perf/core: Fix event_function_local() Vincent reported triggering the WARN_ON_ONCE() in event_function_local(). While thinking through cases I noticed that by using event_function() directly, we miss the inactive case usually handled by event_function_call(). Therefore construct a blend of event_function_call() and event_function() that handles the cases relevant to event_function_local(). Reported-by: Vince Weaver <vincent.weaver@maine.edu> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: stable@vger.kernel.org # 4.5+ Fixes: fae3fde65138 ("perf: Collapse and fix event_function_call() users") Signed-off-by: Ingo Molnar <mingo@kernel.org> --- kernel/events/core.c | 60 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 12 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 1903b8f3a7057..6e454bfd514f3 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -242,18 +242,6 @@ static int event_function(void *info) return ret; } -static void event_function_local(struct perf_event *event, event_f func, void *data) -{ - struct event_function_struct efs = { - .event = event, - .func = func, - .data = data, - }; - - int ret = event_function(&efs); - WARN_ON_ONCE(ret); -} - static void event_function_call(struct perf_event *event, event_f func, void *data) { struct perf_event_context *ctx = event->ctx; @@ -303,6 +291,54 @@ static void event_function_call(struct perf_event *event, event_f func, void *da raw_spin_unlock_irq(&ctx->lock); } +/* + * Similar to event_function_call() + event_function(), but hard assumes IRQs + * are already disabled and we're on the right CPU. + */ +static void event_function_local(struct perf_event *event, event_f func, void *data) +{ + struct perf_event_context *ctx = event->ctx; + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); + struct task_struct *task = READ_ONCE(ctx->task); + struct perf_event_context *task_ctx = NULL; + + WARN_ON_ONCE(!irqs_disabled()); + + if (task) { + if (task == TASK_TOMBSTONE) + return; + + task_ctx = ctx; + } + + perf_ctx_lock(cpuctx, task_ctx); + + task = ctx->task; + if (task == TASK_TOMBSTONE) + goto unlock; + + if (task) { + /* + * We must be either inactive or active and the right task, + * otherwise we're screwed, since we cannot IPI to somewhere + * else. + */ + if (ctx->is_active) { + if (WARN_ON_ONCE(task != current)) + goto unlock; + + if (WARN_ON_ONCE(cpuctx->task_ctx != ctx)) + goto unlock; + } + } else { + WARN_ON_ONCE(&cpuctx->ctx != ctx); + } + + func(event, cpuctx, ctx, data); +unlock: + perf_ctx_unlock(cpuctx, task_ctx); +} + #define PERF_FLAG_ALL (PERF_FLAG_FD_NO_GROUP |\ PERF_FLAG_FD_OUTPUT |\ PERF_FLAG_PID_CGROUP |\ From 4059ffd09d694f704e18a4baf97fc0016c32e9ad Mon Sep 17 00:00:00 2001 From: Mathieu Poirier <mathieu.poirier@linaro.org> Date: Mon, 18 Jul 2016 10:43:05 -0600 Subject: [PATCH 327/513] perf/core: Fix file name handling for start/stop filters Binary file names have to be supplied for both range and start/stop filters but the current code only processes the filename if an address range filter is specified. This code adds processing of the filename for start/stop filters. Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Link: http://lkml.kernel.org/r/1468860187-318-2-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Ingo Molnar <mingo@kernel.org> --- kernel/events/core.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 6e454bfd514f3..52780ef941d30 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7972,8 +7972,10 @@ perf_event_parse_addr_filter(struct perf_event *event, char *fstr, goto fail; } - if (token == IF_SRC_FILE) { - filename = match_strdup(&args[2]); + if (token == IF_SRC_FILE || token == IF_SRC_FILEADDR) { + int fpos = filter->range ? 2 : 1; + + filename = match_strdup(&args[fpos]); if (!filename) { ret = -ENOMEM; goto fail; From 12b40a2393719a37ff86a0b43bece6d28a75cbfc Mon Sep 17 00:00:00 2001 From: Mathieu Poirier <mathieu.poirier@linaro.org> Date: Mon, 18 Jul 2016 10:43:06 -0600 Subject: [PATCH 328/513] perf/core: Update filters only on executable mmap Function perf_event_mmap() is called by the MM subsystem each time part of a binary is loaded in memory. There can be several mapping for a binary, many times unrelated to the code section. Each time a section of a binary is mapped address filters are updated, event when the map doesn't pertain to the code section. The end result is that filters are configured based on the last map event that was received rather than the last mapping of the code segment. For example if we have an executable 'main' that calls library 'libcstest.so.1.0', and that we want to collect traces on code that is in that library. The perf cmd line for this scenario would be: perf record -e cs_etm// --filter 'filter 0x72c/0x40@/opt/lib/libcstest.so.1.0' --per-thread ./main Resulting in binaries being mapped this way: root@linaro-nano:~# cat /proc/1950/maps 00400000-00401000 r-xp 00000000 08:02 33169 /home/linaro/main 00410000-00411000 r--p 00000000 08:02 33169 /home/linaro/main 00411000-00412000 rw-p 00001000 08:02 33169 /home/linaro/main 7fa2464000-7fa2474000 rw-p 00000000 00:00 0 7fa2474000-7fa25a4000 r-xp 00000000 08:02 543 /lib/aarch64-linux-gnu/libc-2.21.so 7fa25a4000-7fa25b3000 ---p 00130000 08:02 543 /lib/aarch64-linux-gnu/libc-2.21.so 7fa25b3000-7fa25b7000 r--p 0012f000 08:02 543 /lib/aarch64-linux-gnu/libc-2.21.so 7fa25b7000-7fa25b9000 rw-p 00133000 08:02 543 /lib/aarch64-linux-gnu/libc-2.21.so 7fa25b9000-7fa25bd000 rw-p 00000000 00:00 0 7fa25bd000-7fa25be000 r-xp 00000000 08:02 38308 /opt/lib/libcstest.so.1.0 7fa25be000-7fa25cd000 ---p 00001000 08:02 38308 /opt/lib/libcstest.so.1.0 7fa25cd000-7fa25ce000 r--p 00000000 08:02 38308 /opt/lib/libcstest.so.1.0 7fa25ce000-7fa25cf000 rw-p 00001000 08:02 38308 /opt/lib/libcstest.so.1.0 7fa25cf000-7fa25eb000 r-xp 00000000 08:02 574 /lib/aarch64-linux-gnu/ld-2.21.so 7fa25ef000-7fa25f2000 rw-p 00000000 00:00 0 7fa25f7000-7fa25f9000 rw-p 00000000 00:00 0 7fa25f9000-7fa25fa000 r--p 00000000 00:00 0 [vvar] 7fa25fa000-7fa25fb000 r-xp 00000000 00:00 0 [vdso] 7fa25fb000-7fa25fc000 r--p 0001c000 08:02 574 /lib/aarch64-linux-gnu/ld-2.21.so 7fa25fc000-7fa25fe000 rw-p 0001d000 08:02 574 /lib/aarch64-linux-gnu/ld-2.21.so 7ff2ea8000-7ff2ec9000 rw-p 00000000 00:00 0 [stack] root@linaro-nano:~# Before 'main()' can execute 'libcstest.so.1.0' has to be loaded in memory. Once that has been done perf_event_mmap() has been called 4 times, with the last map starting at address 0x7fa25ce000 and the address filter configured to start filtering when the IP has passed over address 0x0x7fa25ce72c (0x7fa25ce000 + 0x72c). But that is wrong since the code segment for library 'libcstest.so.1.0' as been mapped at 0x7fa25bd000, resulting in traces not being collected. This patch corrects the situation by requesting that address filters be updated only if the mapped event is for a code segment. Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Link: http://lkml.kernel.org/r/1468860187-318-3-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Ingo Molnar <mingo@kernel.org> --- kernel/events/core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 52780ef941d30..9a030a96bc1fb 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6689,6 +6689,13 @@ static void perf_addr_filters_adjust(struct vm_area_struct *vma) struct perf_event_context *ctx; int ctxn; + /* + * Data tracing isn't supported yet and as such there is no need + * to keep track of anything that isn't related to executable code: + */ + if (!(vma->vm_flags & VM_EXEC)) + return; + rcu_read_lock(); for_each_task_context_nr(ctxn) { ctx = rcu_dereference(current->perf_event_ctxp[ctxn]); From 99f5bc9bfa9094e7c264a8e09f9507b391a3d1d1 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier <mathieu.poirier@linaro.org> Date: Mon, 18 Jul 2016 10:43:07 -0600 Subject: [PATCH 329/513] perf/core: Enable mapping of the stop filters At this time the perf_addr_filter_needs_mmap() function will _not_ return true on a user space 'stop' filter. But stop filters need exactly the same kind of mapping that range and start filters get. Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Link: http://lkml.kernel.org/r/1468860187-318-4-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Ingo Molnar <mingo@kernel.org> --- kernel/events/core.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 9a030a96bc1fb..a5fc5c8cdfb0d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6619,15 +6619,6 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) kfree(buf); } -/* - * Whether this @filter depends on a dynamic object which is not loaded - * yet or its load addresses are not known. - */ -static bool perf_addr_filter_needs_mmap(struct perf_addr_filter *filter) -{ - return filter->filter && filter->inode; -} - /* * Check whether inode and address range match filter criteria. */ @@ -7848,7 +7839,11 @@ static void perf_event_addr_filters_apply(struct perf_event *event) list_for_each_entry(filter, &ifh->list, entry) { event->addr_filters_offs[count] = 0; - if (perf_addr_filter_needs_mmap(filter)) + /* + * Adjust base offset if the filter is associated to a binary + * that needs to be mapped: + */ + if (filter->inode) event->addr_filters_offs[count] = perf_addr_filter_apply(filter, mm); From 71e7bc2bab77e64882c031c2af943c3256c1adb0 Mon Sep 17 00:00:00 2001 From: David Carrillo-Cisneros <davidcc@google.com> Date: Wed, 17 Aug 2016 13:55:04 -0700 Subject: [PATCH 330/513] perf/core: Check return value of the perf_event_read() IPI The call to smp_call_function_single in perf_event_read() may fail if an invalid or not online CPU index is passed. Warn user if such bug is present and return error. Signed-off-by: David Carrillo-Cisneros <davidcc@google.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Kan Liang <kan.liang@intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Paul Turner <pjt@google.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vegard Nossum <vegard.nossum@gmail.com> Cc: Vince Weaver <vincent.weaver@maine.edu> Link: http://lkml.kernel.org/r/1471467307-61171-2-git-send-email-davidcc@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org> --- kernel/events/core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index a5fc5c8cdfb0d..5650f5317e0c3 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3549,9 +3549,10 @@ static int perf_event_read(struct perf_event *event, bool group) .group = group, .ret = 0, }; - smp_call_function_single(event->oncpu, - __perf_event_read, &data, 1); - ret = data.ret; + ret = smp_call_function_single(event->oncpu, __perf_event_read, &data, 1); + /* The event must have been read from an online CPU: */ + WARN_ON_ONCE(ret); + ret = ret ? : data.ret; } else if (event->state == PERF_EVENT_STATE_INACTIVE) { struct perf_event_context *ctx = event->ctx; unsigned long flags; From 173be9a14f7b2e901cf77c18b1aafd4d672e9d9e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra <peterz@infradead.org> Date: Mon, 15 Aug 2016 18:38:42 +0200 Subject: [PATCH 331/513] sched/cputime: Fix NO_HZ_FULL getrusage() monotonicity regression Mike reports: Roughly 10% of the time, ltp testcase getrusage04 fails: getrusage04 0 TINFO : Expected timers granularity is 4000 us getrusage04 0 TINFO : Using 1 as multiply factor for max [us]time increment (1000+4000us)! getrusage04 0 TINFO : utime: 0us; stime: 179us getrusage04 0 TINFO : utime: 3751us; stime: 0us getrusage04 1 TFAIL : getrusage04.c:133: stime increased > 5000us: And tracked it down to the case where the task simply doesn't get _any_ [us]time ticks. Update the code to assume all rtime is utime when we lack information, thus ensuring a task that elides the tick gets time accounted. Reported-by: Mike Galbraith <umgwanakikbuti@gmail.com> Tested-by: Mike Galbraith <umgwanakikbuti@gmail.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Fredrik Markstrom <fredrik.markstrom@gmail.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Radim <rkrcmar@redhat.com> Cc: Rik van Riel <riel@redhat.com> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Cc: Wanpeng Li <wanpeng.li@hotmail.com> Cc: stable@vger.kernel.org # 4.3+ Fixes: 9d7fb0427648 ("sched/cputime: Guarantee stime + utime == rtime") Signed-off-by: Ingo Molnar <mingo@kernel.org> --- kernel/sched/cputime.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 9858266fb0b32..2ee83b2005043 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -614,19 +614,25 @@ static void cputime_adjust(struct task_cputime *curr, stime = curr->stime; utime = curr->utime; - if (utime == 0) { - stime = rtime; + /* + * If either stime or both stime and utime are 0, assume all runtime is + * userspace. Once a task gets some ticks, the monotonicy code at + * 'update' will ensure things converge to the observed ratio. + */ + if (stime == 0) { + utime = rtime; goto update; } - if (stime == 0) { - utime = rtime; + if (utime == 0) { + stime = rtime; goto update; } stime = scale_stime((__force u64)stime, (__force u64)rtime, (__force u64)(stime + utime)); +update: /* * Make sure stime doesn't go backwards; this preserves monotonicity * for utime because rtime is monotonic. @@ -649,7 +655,6 @@ static void cputime_adjust(struct task_cputime *curr, stime = rtime - utime; } -update: prev->stime = stime; prev->utime = utime; out: From 03cbc732639ddcad15218c4b2046d255851ff1e3 Mon Sep 17 00:00:00 2001 From: Wanpeng Li <wanpeng.li@hotmail.com> Date: Wed, 17 Aug 2016 10:05:46 +0800 Subject: [PATCH 332/513] sched/cputime: Resync steal time when guest & host lose sync Commit: 57430218317e ("sched/cputime: Count actually elapsed irq & softirq time") ... fixed a bug but also triggered a regression: On an i5 laptop, 4 pCPUs, 4vCPUs for one full dynticks guest, there are four CPU hog processes(for loop) running in the guest, I hot-unplug the pCPUs on host one by one until there is only one left, then observe CPU utilization via 'top' in the guest, it shows: 100% st for cpu0(housekeeping) 75% st for other CPUs (nohz full mode) However, w/o this commit it shows the correct 75% for all four CPUs. When a guest is interrupted for a longer amount of time, missed clock ticks are not redelivered later. Because of that, we should not limit the amount of steal time accounted to the amount of time that the calling functions think have passed. However, the interval returned by account_other_time() is NOT rounded down to the nearest jiffy, while the base interval in get_vtime_delta() it is subtracted from is, so the max cputime limit is required to avoid underflow. This patch fixes the regression by limiting the account_other_time() from get_vtime_delta() to avoid underflow, and lets the other three call sites (in account_other_time() and steal_account_process_time()) account however much steal time the host told us elapsed. Suggested-by: Rik van Riel <riel@redhat.com> Suggested-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: Frederic Weisbecker <fweisbec@gmail.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mike Galbraith <efault@gmx.de> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Radim Krcmar <rkrcmar@redhat.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: kvm@vger.kernel.org Link: http://lkml.kernel.org/r/1471399546-4069-1-git-send-email-wanpeng.li@hotmail.com [ Improved the changelog. ] Signed-off-by: Ingo Molnar <mingo@kernel.org> --- kernel/sched/cputime.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 2ee83b2005043..a846cf89eb961 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -263,6 +263,11 @@ void account_idle_time(cputime_t cputime) cpustat[CPUTIME_IDLE] += (__force u64) cputime; } +/* + * When a guest is interrupted for a longer amount of time, missed clock + * ticks are not redelivered later. Due to that, this function may on + * occasion account more time than the calling functions think elapsed. + */ static __always_inline cputime_t steal_account_process_time(cputime_t maxtime) { #ifdef CONFIG_PARAVIRT @@ -371,7 +376,7 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick, * idle, or potentially user or system time. Due to rounding, * other time can exceed ticks occasionally. */ - other = account_other_time(cputime); + other = account_other_time(ULONG_MAX); if (other >= cputime) return; cputime -= other; @@ -486,7 +491,7 @@ void account_process_tick(struct task_struct *p, int user_tick) } cputime = cputime_one_jiffy; - steal = steal_account_process_time(cputime); + steal = steal_account_process_time(ULONG_MAX); if (steal >= cputime) return; @@ -516,7 +521,7 @@ void account_idle_ticks(unsigned long ticks) } cputime = jiffies_to_cputime(ticks); - steal = steal_account_process_time(cputime); + steal = steal_account_process_time(ULONG_MAX); if (steal >= cputime) return; @@ -699,6 +704,13 @@ static cputime_t get_vtime_delta(struct task_struct *tsk) unsigned long now = READ_ONCE(jiffies); cputime_t delta, other; + /* + * Unlike tick based timing, vtime based timing never has lost + * ticks, and no need for steal time accounting to make up for + * lost ticks. Vtime accounts a rounded version of actual + * elapsed time. Limit account_other_time to prevent rounding + * errors from causing elapsed vtime to go negative. + */ delta = jiffies_to_cputime(now - tsk->vtime_snap); other = account_other_time(delta); WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_INACTIVE); From d048c098218e91ed0e10dfa1f0f80e2567fe4ef7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com> Date: Mon, 8 Aug 2016 20:16:22 +0200 Subject: [PATCH 333/513] KVM: nVMX: fix msr bitmaps to prevent L2 from accessing L0 x2APIC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit msr bitmap can be used to avoid a VM exit (interception) on guest MSR accesses. In some configurations of VMX controls, the guest can even directly access host's x2APIC MSRs. See SDM 29.5 VIRTUALIZING MSR-BASED APIC ACCESSES. L2 could read all L0's x2APIC MSRs and write TPR, EOI, and SELF_IPI. To do so, L1 would first trick KVM to disable all possible interceptions by enabling APICv features and then would turn those features off; nested_vmx_merge_msr_bitmap() only disabled interceptions, so VMX would not intercept previously enabled MSRs even though they were not safe with the new configuration. Correctly re-enabling interceptions is not enough as a second bug would still allow L1+L2 to access host's MSRs: msr bitmap was shared for all VMCSs, so L1 could trigger a race to get the desired combination of msr bitmap and VMX controls. This fix allocates a msr bitmap for every L1 VCPU, allows only safe x2APIC MSRs from L1's msr bitmap, and disables msr bitmaps if they would have to intercept everything anyway. Fixes: 3af18d9c5fe9 ("KVM: nVMX: Prepare for using hardware MSR bitmap") Reported-by: Jim Mattson <jmattson@google.com> Suggested-by: Wincy Van <fanwenyi0529@gmail.com> Reviewed-by: Wanpeng Li <wanpeng.li@hotmail.com> Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> --- arch/x86/kvm/vmx.c | 107 +++++++++++++++++++-------------------------- 1 file changed, 44 insertions(+), 63 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a45d8580f91e7..c66ac2c70d228 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -435,6 +435,8 @@ struct nested_vmx { bool pi_pending; u16 posted_intr_nv; + unsigned long *msr_bitmap; + struct hrtimer preemption_timer; bool preemption_timer_expired; @@ -924,7 +926,6 @@ static unsigned long *vmx_msr_bitmap_legacy; static unsigned long *vmx_msr_bitmap_longmode; static unsigned long *vmx_msr_bitmap_legacy_x2apic; static unsigned long *vmx_msr_bitmap_longmode_x2apic; -static unsigned long *vmx_msr_bitmap_nested; static unsigned long *vmx_vmread_bitmap; static unsigned long *vmx_vmwrite_bitmap; @@ -2508,7 +2509,7 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu) unsigned long *msr_bitmap; if (is_guest_mode(vcpu)) - msr_bitmap = vmx_msr_bitmap_nested; + msr_bitmap = to_vmx(vcpu)->nested.msr_bitmap; else if (cpu_has_secondary_exec_ctrls() && (vmcs_read32(SECONDARY_VM_EXEC_CONTROL) & SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) { @@ -6363,13 +6364,6 @@ static __init int hardware_setup(void) if (!vmx_msr_bitmap_longmode_x2apic) goto out4; - if (nested) { - vmx_msr_bitmap_nested = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx_msr_bitmap_nested) - goto out5; - } - vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); if (!vmx_vmread_bitmap) goto out6; @@ -6392,8 +6386,6 @@ static __init int hardware_setup(void) memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); - if (nested) - memset(vmx_msr_bitmap_nested, 0xff, PAGE_SIZE); if (setup_vmcs_config(&vmcs_config) < 0) { r = -EIO; @@ -6529,9 +6521,6 @@ static __init int hardware_setup(void) out7: free_page((unsigned long)vmx_vmread_bitmap); out6: - if (nested) - free_page((unsigned long)vmx_msr_bitmap_nested); -out5: free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic); out4: free_page((unsigned long)vmx_msr_bitmap_longmode); @@ -6557,8 +6546,6 @@ static __exit void hardware_unsetup(void) free_page((unsigned long)vmx_io_bitmap_a); free_page((unsigned long)vmx_vmwrite_bitmap); free_page((unsigned long)vmx_vmread_bitmap); - if (nested) - free_page((unsigned long)vmx_msr_bitmap_nested); free_kvm_area(); } @@ -6995,16 +6982,21 @@ static int handle_vmon(struct kvm_vcpu *vcpu) return 1; } + if (cpu_has_vmx_msr_bitmap()) { + vmx->nested.msr_bitmap = + (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx->nested.msr_bitmap) + goto out_msr_bitmap; + } + vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); if (!vmx->nested.cached_vmcs12) - return -ENOMEM; + goto out_cached_vmcs12; if (enable_shadow_vmcs) { shadow_vmcs = alloc_vmcs(); - if (!shadow_vmcs) { - kfree(vmx->nested.cached_vmcs12); - return -ENOMEM; - } + if (!shadow_vmcs) + goto out_shadow_vmcs; /* mark vmcs as shadow */ shadow_vmcs->revision_id |= (1u << 31); /* init shadow vmcs */ @@ -7024,6 +7016,15 @@ static int handle_vmon(struct kvm_vcpu *vcpu) skip_emulated_instruction(vcpu); nested_vmx_succeed(vcpu); return 1; + +out_shadow_vmcs: + kfree(vmx->nested.cached_vmcs12); + +out_cached_vmcs12: + free_page((unsigned long)vmx->nested.msr_bitmap); + +out_msr_bitmap: + return -ENOMEM; } /* @@ -7098,6 +7099,10 @@ static void free_nested(struct vcpu_vmx *vmx) vmx->nested.vmxon = false; free_vpid(vmx->nested.vpid02); nested_release_vmcs12(vmx); + if (vmx->nested.msr_bitmap) { + free_page((unsigned long)vmx->nested.msr_bitmap); + vmx->nested.msr_bitmap = NULL; + } if (enable_shadow_vmcs) free_vmcs(vmx->nested.current_shadow_vmcs); kfree(vmx->nested.cached_vmcs12); @@ -9472,8 +9477,10 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, { int msr; struct page *page; - unsigned long *msr_bitmap; + unsigned long *msr_bitmap_l1; + unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.msr_bitmap; + /* This shortcut is ok because we support only x2APIC MSRs so far. */ if (!nested_cpu_has_virt_x2apic_mode(vmcs12)) return false; @@ -9482,63 +9489,37 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, WARN_ON(1); return false; } - msr_bitmap = (unsigned long *)kmap(page); - if (!msr_bitmap) { + msr_bitmap_l1 = (unsigned long *)kmap(page); + if (!msr_bitmap_l1) { nested_release_page_clean(page); WARN_ON(1); return false; } + memset(msr_bitmap_l0, 0xff, PAGE_SIZE); + if (nested_cpu_has_virt_x2apic_mode(vmcs12)) { if (nested_cpu_has_apic_reg_virt(vmcs12)) for (msr = 0x800; msr <= 0x8ff; msr++) nested_vmx_disable_intercept_for_msr( - msr_bitmap, - vmx_msr_bitmap_nested, + msr_bitmap_l1, msr_bitmap_l0, msr, MSR_TYPE_R); - /* TPR is allowed */ - nested_vmx_disable_intercept_for_msr(msr_bitmap, - vmx_msr_bitmap_nested, + + nested_vmx_disable_intercept_for_msr( + msr_bitmap_l1, msr_bitmap_l0, APIC_BASE_MSR + (APIC_TASKPRI >> 4), MSR_TYPE_R | MSR_TYPE_W); + if (nested_cpu_has_vid(vmcs12)) { - /* EOI and self-IPI are allowed */ nested_vmx_disable_intercept_for_msr( - msr_bitmap, - vmx_msr_bitmap_nested, + msr_bitmap_l1, msr_bitmap_l0, APIC_BASE_MSR + (APIC_EOI >> 4), MSR_TYPE_W); nested_vmx_disable_intercept_for_msr( - msr_bitmap, - vmx_msr_bitmap_nested, + msr_bitmap_l1, msr_bitmap_l0, APIC_BASE_MSR + (APIC_SELF_IPI >> 4), MSR_TYPE_W); } - } else { - /* - * Enable reading intercept of all the x2apic - * MSRs. We should not rely on vmcs12 to do any - * optimizations here, it may have been modified - * by L1. - */ - for (msr = 0x800; msr <= 0x8ff; msr++) - __vmx_enable_intercept_for_msr( - vmx_msr_bitmap_nested, - msr, - MSR_TYPE_R); - - __vmx_enable_intercept_for_msr( - vmx_msr_bitmap_nested, - APIC_BASE_MSR + (APIC_TASKPRI >> 4), - MSR_TYPE_W); - __vmx_enable_intercept_for_msr( - vmx_msr_bitmap_nested, - APIC_BASE_MSR + (APIC_EOI >> 4), - MSR_TYPE_W); - __vmx_enable_intercept_for_msr( - vmx_msr_bitmap_nested, - APIC_BASE_MSR + (APIC_SELF_IPI >> 4), - MSR_TYPE_W); } kunmap(page); nested_release_page_clean(page); @@ -9957,10 +9938,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) } if (cpu_has_vmx_msr_bitmap() && - exec_control & CPU_BASED_USE_MSR_BITMAPS) { - nested_vmx_merge_msr_bitmap(vcpu, vmcs12); - /* MSR_BITMAP will be set by following vmx_set_efer. */ - } else + exec_control & CPU_BASED_USE_MSR_BITMAPS && + nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) + ; /* MSR_BITMAP will be set by following vmx_set_efer. */ + else exec_control &= ~CPU_BASED_USE_MSR_BITMAPS; /* From dccbfcf52cebb8963246eba5b177b77f26b34da0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= <rkrcmar@redhat.com> Date: Mon, 8 Aug 2016 20:16:23 +0200 Subject: [PATCH 334/513] KVM: nVMX: postpone VMCS changes on MSR_IA32_APICBASE write MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If vmcs12 does not intercept APIC_BASE writes, then KVM will handle the write with vmcs02 as the current VMCS. This will incorrectly apply modifications intended for vmcs01 to vmcs02 and L2 can use it to gain access to L0's x2APIC registers by disabling virtualized x2APIC while using msr bitmap that assumes enabled. Postpone execution of vmx_set_virtual_x2apic_mode until vmcs01 is the current VMCS. An alternative solution would temporarily make vmcs01 the current VMCS, but it requires more care. Fixes: 8d14695f9542 ("x86, apicv: add virtual x2apic support") Reported-by: Jim Mattson <jmattson@google.com> Reviewed-by: Wanpeng Li <wanpeng.li@hotmail.com> Signed-off-by: Radim Krčmář <rkrcmar@redhat.com> --- arch/x86/kvm/vmx.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c66ac2c70d228..ae111a07acc48 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -422,6 +422,7 @@ struct nested_vmx { struct list_head vmcs02_pool; int vmcs02_num; u64 vmcs01_tsc_offset; + bool change_vmcs01_virtual_x2apic_mode; /* L2 must run next, and mustn't decide to exit to L1. */ bool nested_run_pending; /* @@ -8424,6 +8425,12 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) { u32 sec_exec_control; + /* Postpone execution until vmcs01 is the current VMCS. */ + if (is_guest_mode(vcpu)) { + to_vmx(vcpu)->nested.change_vmcs01_virtual_x2apic_mode = true; + return; + } + /* * There is not point to enable virtualize x2apic without enable * apicv @@ -10749,6 +10756,12 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL, PIN_BASED_VMX_PREEMPTION_TIMER); + if (vmx->nested.change_vmcs01_virtual_x2apic_mode) { + vmx->nested.change_vmcs01_virtual_x2apic_mode = false; + vmx_set_virtual_x2apic_mode(vcpu, + vcpu->arch.apic_base & X2APIC_ENABLE); + } + /* This is needed for same reason as it was needed in prepare_vmcs02 */ vmx->host_rsp = 0; From c95ba92afb238ac565c68968fc72e38ca8d1b6e8 Mon Sep 17 00:00:00 2001 From: Peter Feiner <pfeiner@google.com> Date: Wed, 17 Aug 2016 09:36:47 -0700 Subject: [PATCH 335/513] kvm: nVMX: fix nested tsc scaling When the host supported TSC scaling, L2 would use a TSC multiplier of 0, which causes a VM entry failure. Now L2's TSC uses the same multiplier as L1. Signed-off-by: Peter Feiner <pfeiner@google.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- arch/x86/kvm/vmx.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ae111a07acc48..5cede40e25524 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2200,6 +2200,12 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu) new.control) != old.control); } +static void decache_tsc_multiplier(struct vcpu_vmx *vmx) +{ + vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio; + vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio); +} + /* * Switches to specified vcpu, until a matching vcpu_put(), but assumes * vcpu mutex is already taken. @@ -2258,10 +2264,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) /* Setup TSC multiplier */ if (kvm_has_tsc_control && - vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) { - vmx->current_tsc_ratio = vcpu->arch.tsc_scaling_ratio; - vmcs_write64(TSC_MULTIPLIER, vmx->current_tsc_ratio); - } + vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio) + decache_tsc_multiplier(vmx); vmx_vcpu_pi_load(vcpu, cpu); vmx->host_pkru = read_pkru(); @@ -9999,6 +10003,8 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset); else vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); + if (kvm_has_tsc_control) + decache_tsc_multiplier(vmx); if (enable_vpid) { /* @@ -10755,6 +10761,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, else vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL, PIN_BASED_VMX_PREEMPTION_TIMER); + if (kvm_has_tsc_control) + decache_tsc_multiplier(vmx); if (vmx->nested.change_vmcs01_virtual_x2apic_mode) { vmx->nested.change_vmcs01_virtual_x2apic_mode = false; From a93a4d62324123dcda383bdb4ab89151bbc0e499 Mon Sep 17 00:00:00 2001 From: Catalin Marinas <catalin.marinas@arm.com> Date: Fri, 5 Dec 2014 12:34:54 +0000 Subject: [PATCH 336/513] arm64: Fix shift warning in arch/arm64/mm/dump.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building with 48-bit VAs and 16K page configuration, it's possible to get the following warning when building the arm64 page table dumping code: arch/arm64/mm/dump.c: In function ‘walk_pud’: arch/arm64/mm/dump.c:274:102: warning: right shift count >= width of type [-Wshift-count-overflow] This is because pud_offset(pgd, 0) performs a shift to the right by 36 while the value 0 has the type 'int' by default, therefore 32-bit. This patch modifies all the p*_offset() uses in arch/arm64/mm/dump.c to use 0UL for the address argument. Acked-by: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> --- arch/arm64/mm/dump.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index f94b80eb295dc..9c3e75df21804 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -242,7 +242,7 @@ static void note_page(struct pg_state *st, unsigned long addr, unsigned level, static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) { - pte_t *pte = pte_offset_kernel(pmd, 0); + pte_t *pte = pte_offset_kernel(pmd, 0UL); unsigned long addr; unsigned i; @@ -254,7 +254,7 @@ static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start) static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) { - pmd_t *pmd = pmd_offset(pud, 0); + pmd_t *pmd = pmd_offset(pud, 0UL); unsigned long addr; unsigned i; @@ -271,7 +271,7 @@ static void walk_pmd(struct pg_state *st, pud_t *pud, unsigned long start) static void walk_pud(struct pg_state *st, pgd_t *pgd, unsigned long start) { - pud_t *pud = pud_offset(pgd, 0); + pud_t *pud = pud_offset(pgd, 0UL); unsigned long addr; unsigned i; From 5d0bdf2867825a92c0a563957a2fb059149ab0d4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig <hch@lst.de> Date: Thu, 11 Aug 2016 07:11:05 -0700 Subject: [PATCH 337/513] PCI: Call pci_intx() when using legacy interrupts in pci_alloc_irq_vectors() ahci currently insists on an explicit call to pci_intx() before falling back from MSI or MSI-X to legacy IRQs. As pci_intx() is a no-op if the command register already contains the right value it seems safe and useful to add this call to pci_alloc_irq_vectors() so that ahci can just use pci_alloc_irq_vectors(). Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> --- drivers/pci/msi.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 9233e7f62f474..593698e11f963 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -1200,8 +1200,11 @@ int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, } /* use legacy irq if allowed */ - if ((flags & PCI_IRQ_LEGACY) && min_vecs == 1) + if ((flags & PCI_IRQ_LEGACY) && min_vecs == 1) { + pci_intx(dev, 1); return 1; + } + return vecs; } EXPORT_SYMBOL(pci_alloc_irq_vectors); From f17b3ea3d2df7c9bf3ce1dbd65b5fd7061f8e787 Mon Sep 17 00:00:00 2001 From: Johannes Berg <johannes.berg@intel.com> Date: Thu, 11 Aug 2016 11:50:21 +0200 Subject: [PATCH 338/513] Revert "drm/fb-helper: Reduce READ_ONCE(master) to lockless_dereference" This reverts commit: fa7d81bb3c269 ("drm/fb-helper: Reduce READ_ONCE(master) to lockless_dereference") As Peter explained: [...] lockless_dereference() is _stronger_ than READ_ONCE(), not weaker. [...] Also, clue is in the name: 'dereference', you don't actually dereference the pointer here, only load it. My next patch breaks the compile without this revert, because it assumes you want to deference and thus also need the struct type visible (which it isn't here), so revert it. Tested-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Johannes Berg <johannes.berg@intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/1470909022-687-1-git-send-email-johannes@sipsolutions.net Signed-off-by: Ingo Molnar <mingo@kernel.org> --- drivers/gpu/drm/drm_fb_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index ce54e985d91ba..0a06f9120b5a6 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -464,7 +464,7 @@ static bool drm_fb_helper_is_bound(struct drm_fb_helper *fb_helper) /* Sometimes user space wants everything disabled, so don't steal the * display if there's a master. */ - if (lockless_dereference(dev->master)) + if (READ_ONCE(dev->master)) return false; drm_for_each_crtc(crtc, dev) { From 112dc0c8069e5554e0ad29c58228f1e6ca49e13d Mon Sep 17 00:00:00 2001 From: Johannes Berg <johannes.berg@intel.com> Date: Thu, 11 Aug 2016 11:50:22 +0200 Subject: [PATCH 339/513] locking/barriers: Suppress sparse warnings in lockless_dereference() After Peter's commit: 331b6d8c7afc ("locking/barriers: Validate lockless_dereference() is used on a pointer type") ... we get a lot of sparse warnings (one for every rcu_dereference, and more) since the expression here is assigning to the wrong address space. Instead of validating that 'p' is a pointer this way, instead make it fail compilation when it's not by using sizeof(*(p)). This will not cause any sparse warnings (tested, likely since the address space is irrelevant for sizeof), and will fail compilation when 'p' isn't a pointer type. Tested-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: Johannes Berg <johannes.berg@intel.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Chris Wilson <chris@chris-wilson.co.uk> Cc: Daniel Vetter <daniel.vetter@intel.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Fixes: 331b6d8c7afc ("locking/barriers: Validate lockless_dereference() is used on a pointer type") Link: http://lkml.kernel.org/r/1470909022-687-2-git-send-email-johannes@sipsolutions.net Signed-off-by: Ingo Molnar <mingo@kernel.org> --- include/linux/compiler.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 1bb9548427250..436aa4e42221b 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -527,13 +527,13 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s * object's lifetime is managed by something other than RCU. That * "something other" might be reference counting or simple immortality. * - * The seemingly unused void * variable is to validate @p is indeed a pointer - * type. All pointer types silently cast to void *. + * The seemingly unused size_t variable is to validate @p is indeed a pointer + * type by making sure it can be dereferenced. */ #define lockless_dereference(p) \ ({ \ typeof(p) _________p1 = READ_ONCE(p); \ - __maybe_unused const void * const _________p2 = _________p1; \ + size_t __maybe_unused __size_of_ptr = sizeof(*(p)); \ smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ (_________p1); \ }) From 479e2a86dc6aeaec6013165e1bd3525db6914f3a Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi <peter.ujfalusi@ti.com> Date: Thu, 18 Aug 2016 14:00:59 +0300 Subject: [PATCH 340/513] ASoC: omap-mcpdm: Drop pdmclk clock handling This reverts commit 65aca64d05b5eaa5ce15e18b458a8d338ddbd478. The patches for twl6040 MFD and clk missed the merge window and causing the McPDM driver to never probe since it is put back to the deferred list because the missing drivers. Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com> Signed-off-by: Mark Brown <broonie@kernel.org> --- .../devicetree/bindings/sound/omap-mcpdm.txt | 10 ---------- sound/soc/omap/omap-mcpdm.c | 17 ----------------- 2 files changed, 27 deletions(-) diff --git a/Documentation/devicetree/bindings/sound/omap-mcpdm.txt b/Documentation/devicetree/bindings/sound/omap-mcpdm.txt index 6f6c2f8e908db..0741dff048dd3 100644 --- a/Documentation/devicetree/bindings/sound/omap-mcpdm.txt +++ b/Documentation/devicetree/bindings/sound/omap-mcpdm.txt @@ -8,8 +8,6 @@ Required properties: - interrupts: Interrupt number for McPDM - interrupt-parent: The parent interrupt controller - ti,hwmods: Name of the hwmod associated to the McPDM -- clocks: phandle for the pdmclk provider, likely <&twl6040> -- clock-names: Must be "pdmclk" Example: @@ -21,11 +19,3 @@ mcpdm: mcpdm@40132000 { interrupt-parent = <&gic>; ti,hwmods = "mcpdm"; }; - -In board DTS file the pdmclk needs to be added: - -&mcpdm { - clocks = <&twl6040>; - clock-names = "pdmclk"; - status = "okay"; -}; diff --git a/sound/soc/omap/omap-mcpdm.c b/sound/soc/omap/omap-mcpdm.c index e7cdc51fd8065..74d6e6fdcfd0e 100644 --- a/sound/soc/omap/omap-mcpdm.c +++ b/sound/soc/omap/omap-mcpdm.c @@ -31,7 +31,6 @@ #include <linux/err.h> #include <linux/io.h> #include <linux/irq.h> -#include <linux/clk.h> #include <linux/slab.h> #include <linux/pm_runtime.h> #include <linux/of_device.h> @@ -55,7 +54,6 @@ struct omap_mcpdm { unsigned long phys_base; void __iomem *io_base; int irq; - struct clk *pdmclk; struct mutex mutex; @@ -390,7 +388,6 @@ static int omap_mcpdm_probe(struct snd_soc_dai *dai) struct omap_mcpdm *mcpdm = snd_soc_dai_get_drvdata(dai); int ret; - clk_prepare_enable(mcpdm->pdmclk); pm_runtime_enable(mcpdm->dev); /* Disable lines while request is ongoing */ @@ -425,7 +422,6 @@ static int omap_mcpdm_remove(struct snd_soc_dai *dai) pm_runtime_disable(mcpdm->dev); - clk_disable_unprepare(mcpdm->pdmclk); return 0; } @@ -445,8 +441,6 @@ static int omap_mcpdm_suspend(struct snd_soc_dai *dai) mcpdm->pm_active_count++; } - clk_disable_unprepare(mcpdm->pdmclk); - return 0; } @@ -454,8 +448,6 @@ static int omap_mcpdm_resume(struct snd_soc_dai *dai) { struct omap_mcpdm *mcpdm = snd_soc_dai_get_drvdata(dai); - clk_prepare_enable(mcpdm->pdmclk); - if (mcpdm->pm_active_count) { while (mcpdm->pm_active_count--) pm_runtime_get_sync(mcpdm->dev); @@ -549,15 +541,6 @@ static int asoc_mcpdm_probe(struct platform_device *pdev) mcpdm->dev = &pdev->dev; - mcpdm->pdmclk = devm_clk_get(&pdev->dev, "pdmclk"); - if (IS_ERR(mcpdm->pdmclk)) { - if (PTR_ERR(mcpdm->pdmclk) == -EPROBE_DEFER) - return -EPROBE_DEFER; - dev_warn(&pdev->dev, "Error getting pdmclk (%ld)!\n", - PTR_ERR(mcpdm->pdmclk)); - mcpdm->pdmclk = NULL; - } - ret = devm_snd_soc_register_component(&pdev->dev, &omap_mcpdm_component, &omap_mcpdm_dai, 1); From ae5b80d2b68eac945b124227dea34462118a6f01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> Date: Thu, 18 Aug 2016 11:51:14 +0200 Subject: [PATCH 341/513] drm/radeon: only apply the SS fractional workaround to RS[78]80 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Looks like some RV6xx have problems with that. bug: https://bugs.freedesktop.org/show_bug.cgi?id=97099 Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/atombios_crtc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c index a97abc8af657e..1dcf39084555b 100644 --- a/drivers/gpu/drm/radeon/atombios_crtc.c +++ b/drivers/gpu/drm/radeon/atombios_crtc.c @@ -627,7 +627,9 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc, if (radeon_crtc->ss.refdiv) { radeon_crtc->pll_flags |= RADEON_PLL_USE_REF_DIV; radeon_crtc->pll_reference_div = radeon_crtc->ss.refdiv; - if (rdev->family >= CHIP_RV770) + if (ASIC_IS_AVIVO(rdev) && + rdev->family != CHIP_RS780 && + rdev->family != CHIP_RS880) radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV; } } From 0215d59b154ab90c56c4fe49bc1deefe8bca18f1 Mon Sep 17 00:00:00 2001 From: Vineet Gupta <Vineet.Gupta1@synopsys.com> Date: Thu, 18 Aug 2016 09:28:23 -0700 Subject: [PATCH 342/513] tools lib: Reinstate strlcpy() header guard with __UCLIBC__ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit perf tools build in recent kernels spews splat when cross compiling with uClibc: | CC util/alias.o | In file included from tools/perf/util/../ui/../util/cache.h:8:0, | from tools/perf/util/../ui/helpline.h:7, | from tools/perf/util/debug.h:8, | from arch/../util/cpumap.h:9, | from arch/../util/env.h:5, | from arch/common.h:4, | from arch/common.c:3: | tools/include/linux/string.h:12:15: warning: redundant redeclaration of ‘strlcpy’ [-Wredundant-decls] | extern size_t strlcpy(char *dest, const char *src, size_t size); ^ This is after commit 61a6445e463a31 ("tools lib: Guard the strlcpy() header with __GLIBC__"). The problem is uClibc also defines __GLIBC__ for exported headers for applications. So add that specific check to not trip for uClibc. Signed-off-by: Vineet Gupta <vgupta@synopsys.com> Cc: Adrian Hunter <adrian.hunter@intel.com> Cc: Alexey Brodkin <Alexey.Brodkin@synopsys.com> Cc: David Ahern <dsahern@gmail.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Namhyung Kim <namhyung@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Petri Gynther <pgynther@google.com> Cc: Wang Nan <wangnan0@huawei.com> Cc: linux-snps-arc@lists.infradead.org Link: http://lkml.kernel.org/r/1471537703-16439-1-git-send-email-vgupta@synopsys.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/include/linux/string.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h index b968794773116..f436d2420a185 100644 --- a/tools/include/linux/string.h +++ b/tools/include/linux/string.h @@ -8,7 +8,11 @@ void *memdup(const void *src, size_t len); int strtobool(const char *s, bool *res); -#ifdef __GLIBC__ +/* + * glibc based builds needs the extern while uClibc doesn't. + * However uClibc headers also define __GLIBC__ hence the hack below + */ +#if defined(__GLIBC__) && !defined(__UCLIBC__) extern size_t strlcpy(char *dest, const char *src, size_t size); #endif From c53412ee8c7ec31373a4176ff7f3a6b79296c05c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo <acme@redhat.com> Date: Thu, 18 Aug 2016 16:30:28 -0300 Subject: [PATCH 343/513] perf evsel: Do not access outside hw cache name arrays We have to check if the values are >= *_MAX, not just >, fix it. From the bugzilla report: ''In file /tools/perf/util/evsel.c function __perf_evsel__hw_cache_name it appears that there is a bug that reads beyond the end of the buffer. The statement "if (type > PERF_COUNT_HW_CACHE_MAX)" allows type to be equal to the maximum value. Later, when statement "if (!perf_evsel__is_cache_op_valid(type, op))" is executed, the function can access array perf_evsel__hw_cache_stat[type] beyond the end of the buffer. It appears to me that the statement "if (type > PERF_COUNT_HW_CACHE_MAX)" should be "if (type >= PERF_COUNT_HW_CACHE_MAX)" Bug found with Coverity and manual code review. No attempts were made to execute the code with a maximum type value.'' Committer note: Testing it: $ perf record -e $(echo $(perf list cache | cut -d \[ -f1) | sed 's/ /,/g') usleep 1 [ perf record: Woken up 16 times to write data ] [ perf record: Captured and wrote 0.023 MB perf.data (34 samples) ] $ perf evlist L1-dcache-load-misses L1-dcache-loads L1-dcache-stores L1-icache-load-misses LLC-load-misses LLC-loads LLC-store-misses LLC-stores branch-load-misses branch-loads dTLB-load-misses dTLB-loads dTLB-store-misses dTLB-stores iTLB-load-misses iTLB-loads node-load-misses node-loads node-store-misses node-stores $ perf list cache List of pre-defined events (to be used in -e): L1-dcache-load-misses [Hardware cache event] L1-dcache-loads [Hardware cache event] L1-dcache-stores [Hardware cache event] L1-icache-load-misses [Hardware cache event] LLC-load-misses [Hardware cache event] LLC-loads [Hardware cache event] LLC-store-misses [Hardware cache event] LLC-stores [Hardware cache event] branch-load-misses [Hardware cache event] branch-loads [Hardware cache event] dTLB-load-misses [Hardware cache event] dTLB-loads [Hardware cache event] dTLB-store-misses [Hardware cache event] dTLB-stores [Hardware cache event] iTLB-load-misses [Hardware cache event] iTLB-loads [Hardware cache event] node-load-misses [Hardware cache event] node-loads [Hardware cache event] node-store-misses [Hardware cache event] node-stores [Hardware cache event] $ Reported-by: Brian Sweeney <bsweeney@lgsinnovations.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@kernel.org> Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=153351 Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> --- tools/perf/util/evsel.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index d9b80ef881cde..21fd573106edd 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -507,17 +507,17 @@ static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size) u8 op, result, type = (config >> 0) & 0xff; const char *err = "unknown-ext-hardware-cache-type"; - if (type > PERF_COUNT_HW_CACHE_MAX) + if (type >= PERF_COUNT_HW_CACHE_MAX) goto out_err; op = (config >> 8) & 0xff; err = "unknown-ext-hardware-cache-op"; - if (op > PERF_COUNT_HW_CACHE_OP_MAX) + if (op >= PERF_COUNT_HW_CACHE_OP_MAX) goto out_err; result = (config >> 16) & 0xff; err = "unknown-ext-hardware-cache-result"; - if (result > PERF_COUNT_HW_CACHE_RESULT_MAX) + if (result >= PERF_COUNT_HW_CACHE_RESULT_MAX) goto out_err; err = "invalid-cache"; From b873b798af6386f3c7ca1636d4989e9b8f9d1794 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim <jaegeuk@kernel.org> Date: Thu, 4 Aug 2016 11:38:25 -0700 Subject: [PATCH 344/513] Revert "f2fs: use percpu_rw_semaphore" LKP reported -36.3% regression of fsmark.files_per_sec due to this patch. I've confirmed that fxmark [1] has also slight regression for DWAL. [1] https://github.com/sslab-gatech/fxmark This reverts commit ec795418c41850056feb956534edf059dc1155d4. --- fs/f2fs/f2fs.h | 12 ++++++------ fs/f2fs/node.c | 47 +++++++++++++++++++++++------------------------ fs/f2fs/super.c | 6 +----- 3 files changed, 30 insertions(+), 35 deletions(-) diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 675fa79d86f65..14f5fe2b841e2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -538,7 +538,7 @@ struct f2fs_nm_info { /* NAT cache management */ struct radix_tree_root nat_root;/* root of the nat entry cache */ struct radix_tree_root nat_set_root;/* root of the nat set cache */ - struct percpu_rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ + struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */ struct list_head nat_entries; /* cached nat entry list (clean) */ unsigned int nat_cnt; /* the # of cached nat entries */ unsigned int dirty_nat_cnt; /* total num of nat entries in set */ @@ -787,7 +787,7 @@ struct f2fs_sb_info { struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ struct inode *meta_inode; /* cache meta blocks */ struct mutex cp_mutex; /* checkpoint procedure lock */ - struct percpu_rw_semaphore cp_rwsem; /* blocking FS operations */ + struct rw_semaphore cp_rwsem; /* blocking FS operations */ struct rw_semaphore node_write; /* locking node writes */ wait_queue_head_t cp_wait; unsigned long last_time[MAX_TIME]; /* to store time in jiffies */ @@ -1074,22 +1074,22 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) { - percpu_down_read(&sbi->cp_rwsem); + down_read(&sbi->cp_rwsem); } static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi) { - percpu_up_read(&sbi->cp_rwsem); + up_read(&sbi->cp_rwsem); } static inline void f2fs_lock_all(struct f2fs_sb_info *sbi) { - percpu_down_write(&sbi->cp_rwsem); + down_write(&sbi->cp_rwsem); } static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi) { - percpu_up_write(&sbi->cp_rwsem); + up_write(&sbi->cp_rwsem); } static inline int __get_cp_reason(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b2fa4b615925b..f75d197d5beb0 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -206,14 +206,14 @@ int need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid) struct nat_entry *e; bool need = false; - percpu_down_read(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e) { if (!get_nat_flag(e, IS_CHECKPOINTED) && !get_nat_flag(e, HAS_FSYNCED_INODE)) need = true; } - percpu_up_read(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); return need; } @@ -223,11 +223,11 @@ bool is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) struct nat_entry *e; bool is_cp = true; - percpu_down_read(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e && !get_nat_flag(e, IS_CHECKPOINTED)) is_cp = false; - percpu_up_read(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); return is_cp; } @@ -237,13 +237,13 @@ bool need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino) struct nat_entry *e; bool need_update = true; - percpu_down_read(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ino); if (e && get_nat_flag(e, HAS_LAST_FSYNC) && (get_nat_flag(e, IS_CHECKPOINTED) || get_nat_flag(e, HAS_FSYNCED_INODE))) need_update = false; - percpu_up_read(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); return need_update; } @@ -284,7 +284,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, struct f2fs_nm_info *nm_i = NM_I(sbi); struct nat_entry *e; - percpu_down_write(&nm_i->nat_tree_lock); + down_write(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, ni->nid); if (!e) { e = grab_nat_entry(nm_i, ni->nid); @@ -334,7 +334,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, set_nat_flag(e, HAS_FSYNCED_INODE, true); set_nat_flag(e, HAS_LAST_FSYNC, fsync_done); } - percpu_up_write(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); } int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) @@ -342,7 +342,8 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) struct f2fs_nm_info *nm_i = NM_I(sbi); int nr = nr_shrink; - percpu_down_write(&nm_i->nat_tree_lock); + if (!down_write_trylock(&nm_i->nat_tree_lock)) + return 0; while (nr_shrink && !list_empty(&nm_i->nat_entries)) { struct nat_entry *ne; @@ -351,7 +352,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) __del_from_nat_cache(nm_i, ne); nr_shrink--; } - percpu_up_write(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); return nr - nr_shrink; } @@ -373,13 +374,13 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) ni->nid = nid; /* Check nat cache */ - percpu_down_read(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); e = __lookup_nat_cache(nm_i, nid); if (e) { ni->ino = nat_get_ino(e); ni->blk_addr = nat_get_blkaddr(e); ni->version = nat_get_version(e); - percpu_up_read(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); return; } @@ -403,11 +404,11 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) node_info_from_raw_nat(ni, &ne); f2fs_put_page(page, 1); cache: - percpu_up_read(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); /* cache nat entry */ - percpu_down_write(&nm_i->nat_tree_lock); + down_write(&nm_i->nat_tree_lock); cache_nat_entry(sbi, nid, &ne); - percpu_up_write(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); } /* @@ -1788,7 +1789,7 @@ void build_free_nids(struct f2fs_sb_info *sbi) ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT, true); - percpu_down_read(&nm_i->nat_tree_lock); + down_read(&nm_i->nat_tree_lock); while (1) { struct page *page = get_current_nat_page(sbi, nid); @@ -1820,7 +1821,7 @@ void build_free_nids(struct f2fs_sb_info *sbi) remove_free_nid(nm_i, nid); } up_read(&curseg->journal_rwsem); - percpu_up_read(&nm_i->nat_tree_lock); + up_read(&nm_i->nat_tree_lock); ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid), nm_i->ra_nid_pages, META_NAT, false); @@ -2209,7 +2210,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) if (!nm_i->dirty_nat_cnt) return; - percpu_down_write(&nm_i->nat_tree_lock); + down_write(&nm_i->nat_tree_lock); /* * if there are no enough space in journal to store dirty nat @@ -2232,7 +2233,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) list_for_each_entry_safe(set, tmp, &sets, set_list) __flush_nat_entry_set(sbi, set); - percpu_up_write(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); } @@ -2268,8 +2269,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi) mutex_init(&nm_i->build_lock); spin_lock_init(&nm_i->free_nid_list_lock); - if (percpu_init_rwsem(&nm_i->nat_tree_lock)) - return -ENOMEM; + init_rwsem(&nm_i->nat_tree_lock); nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); @@ -2326,7 +2326,7 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) spin_unlock(&nm_i->free_nid_list_lock); /* destroy nat cache */ - percpu_down_write(&nm_i->nat_tree_lock); + down_write(&nm_i->nat_tree_lock); while ((found = __gang_lookup_nat_cache(nm_i, nid, NATVEC_SIZE, natvec))) { unsigned idx; @@ -2351,9 +2351,8 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) kmem_cache_free(nat_entry_set_slab, setvec[idx]); } } - percpu_up_write(&nm_i->nat_tree_lock); + up_write(&nm_i->nat_tree_lock); - percpu_free_rwsem(&nm_i->nat_tree_lock); kfree(nm_i->nat_bitmap); sbi->nm_info = NULL; kfree(nm_i); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1b86d3f638efc..7f863a645ab1d 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -706,8 +706,6 @@ static void destroy_percpu_info(struct f2fs_sb_info *sbi) percpu_counter_destroy(&sbi->nr_pages[i]); percpu_counter_destroy(&sbi->alloc_valid_block_count); percpu_counter_destroy(&sbi->total_valid_inode_count); - - percpu_free_rwsem(&sbi->cp_rwsem); } static void f2fs_put_super(struct super_block *sb) @@ -1483,9 +1481,6 @@ static int init_percpu_info(struct f2fs_sb_info *sbi) { int i, err; - if (percpu_init_rwsem(&sbi->cp_rwsem)) - return -ENOMEM; - for (i = 0; i < NR_COUNT_TYPE; i++) { err = percpu_counter_init(&sbi->nr_pages[i], 0, GFP_KERNEL); if (err) @@ -1686,6 +1681,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) sbi->write_io[i].bio = NULL; } + init_rwsem(&sbi->cp_rwsem); init_waitqueue_head(&sbi->cp_wait); init_sb_info(sbi); From 3024c9a1fefb3ac0d1b0b078a2e3f2f69478daab Mon Sep 17 00:00:00 2001 From: Chao Yu <yuchao0@huawei.com> Date: Sat, 6 Aug 2016 21:09:41 +0800 Subject: [PATCH 345/513] Revert "f2fs: move i_size_write in f2fs_write_end" This reverts commit a2ee0a300344a6da76186129b078113354fe13d2. When testing with generic/032 of xfstest suit, failure message will be reported as below: generic/032 8s ... [failed, exit status 1] - output mismatch (see results/generic/032.out.bad) --- tests/generic/032.out 2015-01-11 16:52:27.643681072 +0800 +++ results/generic/032.out.bad 2016-08-06 13:44:43.861330500 +0800 @@ -1,5 +1,5 @@ QA output created by 032 -100 iterations -0000000 cdcd cdcd cdcd cdcd cdcd cdcd cdcd cdcd -* -0100000 +1: [768..775]: unwritten +Unwritten extents found! ... (Run 'diff -u tests/generic/032.out results/generic/032.out.bad' to see the entire diff) Ran: generic/032 Failures: generic/032 Failed 1 of 1 tests In write_end(), we should update i_size of inode before unlock page, otherwise, we will lose newly updated data in following race condition. Thread A Thread B - write_end - unlock page - writepages - lock_page - writepage if page is out-of-range of file size, we will skip writting the page. - update i_size Signed-off-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org> --- fs/f2fs/data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d64d2a515cb2c..ccb401eebc112 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1699,11 +1699,11 @@ static int f2fs_write_end(struct file *file, trace_f2fs_write_end(inode, pos, len, copied); set_page_dirty(page); - f2fs_put_page(page, 1); if (pos + copied > i_size_read(inode)) f2fs_i_size_write(inode, pos + copied); + f2fs_put_page(page, 1); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); return copied; } From fe8494bfc8c2914fca821d4ae994aef039be5cf1 Mon Sep 17 00:00:00 2001 From: Chao Yu <yuchao0@huawei.com> Date: Thu, 4 Aug 2016 20:13:02 +0800 Subject: [PATCH 346/513] f2fs: allow copying file range only in between regular files Only if two input files are regular files, we allow copying data in range of them, otherwise, deny it. Signed-off-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org> --- fs/f2fs/file.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 0e493f63ea414..685e629f768bd 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2086,8 +2086,8 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, if (unlikely(f2fs_readonly(src->i_sb))) return -EROFS; - if (S_ISDIR(src->i_mode) || S_ISDIR(dst->i_mode)) - return -EISDIR; + if (!S_ISREG(src->i_mode) || !S_ISREG(dst->i_mode)) + return -EINVAL; if (f2fs_encrypted_inode(src) || f2fs_encrypted_inode(dst)) return -EOPNOTSUPP; From 20a3d61d46e1fb45efa6eb4637c0dcd3f00a14e9 Mon Sep 17 00:00:00 2001 From: Chao Yu <yuchao0@huawei.com> Date: Thu, 4 Aug 2016 20:13:03 +0800 Subject: [PATCH 347/513] f2fs: avoid potential deadlock in f2fs_move_file_range Thread A Thread B - inode_lock fileA - inode_lock fileB - inode_lock fileA - inode_lock fileB We may encounter above potential deadlock during moving file range in concurrent scenario. This patch fixes the issue by using inode_trylock instead. Signed-off-by: Chao Yu <yuchao0@huawei.com> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org> --- fs/f2fs/file.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 685e629f768bd..47abb96098e49 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2093,8 +2093,12 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, return -EOPNOTSUPP; inode_lock(src); - if (src != dst) - inode_lock(dst); + if (src != dst) { + if (!inode_trylock(dst)) { + ret = -EBUSY; + goto out; + } + } ret = -EINVAL; if (pos_in + len > src->i_size || pos_in + len < pos_in) @@ -2152,6 +2156,7 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, out_unlock: if (src != dst) inode_unlock(dst); +out: inode_unlock(src); return ret; } From d9dc1702b297ec4a6bb9c0326a70641b322ba886 Mon Sep 17 00:00:00 2001 From: Eric Wheeler <git@linux.ewheeler.net> Date: Fri, 17 Jun 2016 15:01:54 -0700 Subject: [PATCH 348/513] bcache: register_bcache(): call blkdev_put() when cache_alloc() fails register_cache() is supposed to return an error string on error so that register_bcache() will will blkdev_put and cleanup other user counters, but it does not set 'char *err' when cache_alloc() fails (eg, due to memory pressure) and thus register_bcache() performs no cleanup. register_bcache() <----------\ <- no jump to err_close, no blkdev_put() | | +->register_cache() | <- fails to set char *err | | +->cache_alloc() ---/ <- returns error This patch sets `char *err` for this failure case so that register_cache() will cause register_bcache() to correctly jump to err_close and do cleanup. This was tested under OOM conditions that triggered the bug. Signed-off-by: Eric Wheeler <bcache@linux.ewheeler.net> Cc: Kent Overstreet <kent.overstreet@gmail.com> Cc: stable@vger.kernel.org --- drivers/md/bcache/super.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 95a4ca6ce6fff..6ada14b9a1579 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1844,7 +1844,7 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page, struct block_device *bdev, struct cache *ca) { char name[BDEVNAME_SIZE]; - const char *err = NULL; + const char *err = NULL; /* must be set for any error case */ int ret = 0; memcpy(&ca->sb, sb, sizeof(struct cache_sb)); @@ -1861,8 +1861,13 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page, ca->discard = CACHE_DISCARD(&ca->sb); ret = cache_alloc(ca); - if (ret != 0) + if (ret != 0) { + if (ret == -ENOMEM) + err = "cache_alloc(): -ENOMEM"; + else + err = "cache_alloc(): unknown error"; goto err; + } if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache")) { err = "error calling kobject_add"; From acc9cf8c66c66b2cbbdb4a375537edee72be64df Mon Sep 17 00:00:00 2001 From: Kent Overstreet <kent.overstreet@gmail.com> Date: Wed, 17 Aug 2016 18:21:24 -0700 Subject: [PATCH 349/513] bcache: RESERVE_PRIO is too small by one when prio_buckets() is a power of two. This patch fixes a cachedev registration-time allocation deadlock. This can deadlock on boot if your initrd auto-registeres bcache devices: Allocator thread: [ 720.727614] INFO: task bcache_allocato:3833 blocked for more than 120 seconds. [ 720.732361] [<ffffffff816eeac7>] schedule+0x37/0x90 [ 720.732963] [<ffffffffa05192b8>] bch_bucket_alloc+0x188/0x360 [bcache] [ 720.733538] [<ffffffff810e6950>] ? prepare_to_wait_event+0xf0/0xf0 [ 720.734137] [<ffffffffa05302bd>] bch_prio_write+0x19d/0x340 [bcache] [ 720.734715] [<ffffffffa05190bf>] bch_allocator_thread+0x3ff/0x470 [bcache] [ 720.735311] [<ffffffff816ee41c>] ? __schedule+0x2dc/0x950 [ 720.735884] [<ffffffffa0518cc0>] ? invalidate_buckets+0x980/0x980 [bcache] Registration thread: [ 720.710403] INFO: task bash:3531 blocked for more than 120 seconds. [ 720.715226] [<ffffffff816eeac7>] schedule+0x37/0x90 [ 720.715805] [<ffffffffa05235cd>] __bch_btree_map_nodes+0x12d/0x150 [bcache] [ 720.716409] [<ffffffffa0522d30>] ? bch_btree_insert_check_key+0x1c0/0x1c0 [bcache] [ 720.717008] [<ffffffffa05236e4>] bch_btree_insert+0xf4/0x170 [bcache] [ 720.717586] [<ffffffff810e6950>] ? prepare_to_wait_event+0xf0/0xf0 [ 720.718191] [<ffffffffa0527d9a>] bch_journal_replay+0x14a/0x290 [bcache] [ 720.718766] [<ffffffff810cc90d>] ? ttwu_do_activate.constprop.94+0x5d/0x70 [ 720.719369] [<ffffffff810cf684>] ? try_to_wake_up+0x1d4/0x350 [ 720.719968] [<ffffffffa05317d0>] run_cache_set+0x580/0x8e0 [bcache] [ 720.720553] [<ffffffffa053302e>] register_bcache+0xe2e/0x13b0 [bcache] [ 720.721153] [<ffffffff81354cef>] kobj_attr_store+0xf/0x20 [ 720.721730] [<ffffffff812a2dad>] sysfs_kf_write+0x3d/0x50 [ 720.722327] [<ffffffff812a225a>] kernfs_fop_write+0x12a/0x180 [ 720.722904] [<ffffffff81225177>] __vfs_write+0x37/0x110 [ 720.723503] [<ffffffff81228048>] ? __sb_start_write+0x58/0x110 [ 720.724100] [<ffffffff812cedb3>] ? security_file_permission+0x23/0xa0 [ 720.724675] [<ffffffff812258a9>] vfs_write+0xa9/0x1b0 [ 720.725275] [<ffffffff8102479c>] ? do_audit_syscall_entry+0x6c/0x70 [ 720.725849] [<ffffffff81226755>] SyS_write+0x55/0xd0 [ 720.726451] [<ffffffff8106a390>] ? do_page_fault+0x30/0x80 [ 720.727045] [<ffffffff816f2cae>] system_call_fastpath+0x12/0x71 The fifo code in upstream bcache can't use the last element in the buffer, which was the cause of the bug: if you asked for a power of two size, it'd give you a fifo that could hold one less than what you asked for rather than allocating a buffer twice as big. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Tested-by: Eric Wheeler <bcache@linux.ewheeler.net> Cc: stable@vger.kernel.org --- drivers/md/bcache/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 6ada14b9a1579..6b93e1b77767b 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -1820,7 +1820,7 @@ static int cache_alloc(struct cache *ca) free = roundup_pow_of_two(ca->sb.nbuckets) >> 10; if (!init_fifo(&ca->free[RESERVE_BTREE], 8, GFP_KERNEL) || - !init_fifo(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) || + !init_fifo_exact(&ca->free[RESERVE_PRIO], prio_buckets(ca), GFP_KERNEL) || !init_fifo(&ca->free[RESERVE_MOVINGGC], free, GFP_KERNEL) || !init_fifo(&ca->free[RESERVE_NONE], free, GFP_KERNEL) || !init_fifo(&ca->free_inc, free << 2, GFP_KERNEL) || From 90706094d5be614ae7285b3c96c3125bb198618c Mon Sep 17 00:00:00 2001 From: Eric Wheeler <git@linux.ewheeler.net> Date: Thu, 18 Aug 2016 20:15:26 -0700 Subject: [PATCH 350/513] bcache: pr_err: more meaningful error message when nr_stripes is invalid The original error was thought to be corruption, but was actually caused by: make-bcache --data-offset N where N was in bytes and should have been in sectors. While userspace tools should be updated to check --data-offset beyond end of volume, hopefully this will help others that might not have noticed the units. Signed-off-by: Eric Wheeler <bcache@linux.ewheeler.net> Cc: Kent Overstreet <kent.overstreet@gmail.com> --- drivers/md/bcache/super.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 6b93e1b77767b..849ad441cd76b 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -760,7 +760,8 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, if (!d->nr_stripes || d->nr_stripes > INT_MAX || d->nr_stripes > SIZE_MAX / sizeof(atomic_t)) { - pr_err("nr_stripes too large"); + pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)", + (unsigned)d->nr_stripes); return -ENOMEM; } From e633fc7a1347528c3b4a6bbdeb41f5d63988242c Mon Sep 17 00:00:00 2001 From: Robin Murphy <robin.murphy@arm.com> Date: Thu, 11 Aug 2016 17:44:05 +0100 Subject: [PATCH 351/513] iommu/io-pgtable-arm-v7s: Fix attributes when splitting blocks Due to the attribute bits being all over the place in the different types of short-descriptor PTEs, when remapping an existing entry, e.g. splitting a section into pages, we take the approach of decomposing the PTE attributes back to the IOMMU API flags to start from scratch. On inspection, though, the existing code seems to have got the read-only bit backwards and ignored the XN bit. How embarrassing... Fortunately the primary user so far, the Mediatek IOMMU, both never splits blocks (because it only serves non-overlapping DMA API calls) and also ignores permissions anyway, but let's put things right before any future users trip up. Cc: <stable@vger.kernel.org> Fixes: e5fc9753b1a8 ("iommu/io-pgtable: Add ARMv7 short descriptor support") Signed-off-by: Robin Murphy <robin.murphy@arm.com> Signed-off-by: Will Deacon <will.deacon@arm.com> --- drivers/iommu/io-pgtable-arm-v7s.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 8c6139986d7d3..def8ca1c982d5 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -286,12 +286,14 @@ static int arm_v7s_pte_to_prot(arm_v7s_iopte pte, int lvl) int prot = IOMMU_READ; arm_v7s_iopte attr = pte >> ARM_V7S_ATTR_SHIFT(lvl); - if (attr & ARM_V7S_PTE_AP_RDONLY) + if (!(attr & ARM_V7S_PTE_AP_RDONLY)) prot |= IOMMU_WRITE; if ((attr & (ARM_V7S_TEX_MASK << ARM_V7S_TEX_SHIFT)) == 0) prot |= IOMMU_MMIO; else if (pte & ARM_V7S_ATTR_C) prot |= IOMMU_CACHE; + if (pte & ARM_V7S_ATTR_XN(lvl)) + prot |= IOMMU_NOEXEC; return prot; } From aea2037e0d3e23c3be1498feae29f71ca997d9e6 Mon Sep 17 00:00:00 2001 From: Will Deacon <will.deacon@arm.com> Date: Fri, 29 Jul 2016 11:15:37 +0100 Subject: [PATCH 352/513] iommu/arm-smmu: Fix CMDQ error handling In the unlikely event of a global command queue error, the ARM SMMUv3 driver attempts to convert the problematic command into a CMD_SYNC and resume the command queue. Unfortunately, this code is pretty badly broken: 1. It uses the index into the error string table as the CMDQ index, so we probably read the wrong entry out of the queue 2. The arguments to queue_write are the wrong way round, so we end up writing from the queue onto the stack. These happily cancel out, so the kernel is likely to stay alive, but the command queue will probably fault again when we resume. This patch fixes the error handling code to use the correct queue index and write back the CMD_SYNC to the faulting entry. Cc: <stable@vger.kernel.org> Fixes: 48ec83bcbcf5 ("iommu/arm-smmu: Add initial driver support for ARM SMMUv3 devices") Reported-by: Diwakar Subraveti <Diwakar.Subraveti@arm.com> Signed-off-by: Will Deacon <will.deacon@arm.com> --- drivers/iommu/arm-smmu-v3.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index ce801170d5f23..330623f8e3442 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -879,7 +879,7 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) * We may have concurrent producers, so we need to be careful * not to touch any of the shadow cmdq state. */ - queue_read(cmd, Q_ENT(q, idx), q->ent_dwords); + queue_read(cmd, Q_ENT(q, cons), q->ent_dwords); dev_err(smmu->dev, "skipping command in error state:\n"); for (i = 0; i < ARRAY_SIZE(cmd); ++i) dev_err(smmu->dev, "\t0x%016llx\n", (unsigned long long)cmd[i]); @@ -890,7 +890,7 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) return; } - queue_write(cmd, Q_ENT(q, idx), q->ent_dwords); + queue_write(Q_ENT(q, cons), cmd, q->ent_dwords); } static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, From 7611da865c1060b2a7c87a15de663a59035747f8 Mon Sep 17 00:00:00 2001 From: David Daney <david.daney@cavium.com> Date: Thu, 18 Aug 2016 15:41:58 -0700 Subject: [PATCH 353/513] irqchip/gicv3-its: Disable the ITS before initializing it When starting a kexec/kdump kernel, the GIC ITS will already have been enabled. According to the ARM Generic Interrupt Controller Architecture Specification (GIC architecture Version 3.0 and version 4.0), writing to GITS_BASER<n> or GITS_CBASER is "UNPREDICTABLE" when the ITS is enabled. On Cavium Thunder systems, this prevents the ITS from being initializing in the kexec/kdump kernel, resulting in failure to register/enable interrupts for all devices. The fix is to disable the ITS if it is not already in the disabled state. This allows the ITS to be properly initialized and then re-enabled in the kexec/kdump kernel. Acked-by: Marc Zyngier <marc.zyngier@arm.com> Signed-off-by: David Daney <david.daney@cavium.com> Signed-off-by: Marc Zyngier <marc.zyngier@arm.com> --- drivers/irqchip/irq-gic-v3-its.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 7ceaba81efb44..36b9c28a5c917 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1545,7 +1545,12 @@ static int its_force_quiescent(void __iomem *base) u32 val; val = readl_relaxed(base + GITS_CTLR); - if (val & GITS_CTLR_QUIESCENT) + /* + * GIC architecture specification requires the ITS to be both + * disabled and quiescent for writes to GITS_BASER<n> or + * GITS_CBASER to not have UNPREDICTABLE results. + */ + if ((val & GITS_CTLR_QUIESCENT) && !(val & GITS_CTLR_ENABLE)) return 0; /* Disable the generation of all interrupts to this ITS */ From 3714ce1d6655098ee69ede632883e5874d67e4ab Mon Sep 17 00:00:00 2001 From: Will Deacon <will.deacon@arm.com> Date: Fri, 5 Aug 2016 19:49:45 +0100 Subject: [PATCH 354/513] iommu/arm-smmu: Disable stalling faults for all endpoints Enabling stalling faults can result in hardware deadlock on poorly designed systems, particularly those with a PCI root complex upstream of the SMMU. Although it's not really Linux's job to save hardware integrators from their own misfortune, it *is* our job to stop userspace (e.g. VFIO clients) from hosing the system for everybody else, even if they might already be required to have elevated privileges. Given that the fault handling code currently executes entirely in IRQ context, there is nothing that can sensibly be done to recover from things like page faults anyway, so let's rip this code out for now and avoid the potential for deadlock. Cc: <stable@vger.kernel.org> Fixes: 48ec83bcbcf5 ("iommu/arm-smmu: Add initial driver support for ARM SMMUv3 devices") Reported-by: Matt Evans <matt.evans@arm.com> Signed-off-by: Will Deacon <will.deacon@arm.com> --- drivers/iommu/arm-smmu.c | 34 +++++++--------------------------- 1 file changed, 7 insertions(+), 27 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 4f49fe29f2029..2db74ebc32406 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -686,8 +686,7 @@ static struct iommu_gather_ops arm_smmu_gather_ops = { static irqreturn_t arm_smmu_context_fault(int irq, void *dev) { - int flags, ret; - u32 fsr, fsynr, resume; + u32 fsr, fsynr; unsigned long iova; struct iommu_domain *domain = dev; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); @@ -701,34 +700,15 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev) if (!(fsr & FSR_FAULT)) return IRQ_NONE; - if (fsr & FSR_IGN) - dev_err_ratelimited(smmu->dev, - "Unexpected context fault (fsr 0x%x)\n", - fsr); - fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0); - flags = fsynr & FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ; - iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR); - if (!report_iommu_fault(domain, smmu->dev, iova, flags)) { - ret = IRQ_HANDLED; - resume = RESUME_RETRY; - } else { - dev_err_ratelimited(smmu->dev, - "Unhandled context fault: iova=0x%08lx, fsynr=0x%x, cb=%d\n", - iova, fsynr, cfg->cbndx); - ret = IRQ_NONE; - resume = RESUME_TERMINATE; - } - - /* Clear the faulting FSR */ - writel(fsr, cb_base + ARM_SMMU_CB_FSR); - /* Retry or terminate any stalled transactions */ - if (fsr & FSR_SS) - writel_relaxed(resume, cb_base + ARM_SMMU_CB_RESUME); + dev_err_ratelimited(smmu->dev, + "Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n", + fsr, iova, fsynr, cfg->cbndx); - return ret; + writel(fsr, cb_base + ARM_SMMU_CB_FSR); + return IRQ_HANDLED; } static irqreturn_t arm_smmu_global_fault(int irq, void *dev) @@ -837,7 +817,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, } /* SCTLR */ - reg = SCTLR_CFCFG | SCTLR_CFIE | SCTLR_CFRE | SCTLR_M | SCTLR_EAE_SBOP; + reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_M | SCTLR_EAE_SBOP; if (stage1) reg |= SCTLR_S1_ASIDPNE; #ifdef __BIG_ENDIAN From 5bc0a11664e17e9f9551983f5b660bd48b57483c Mon Sep 17 00:00:00 2001 From: Will Deacon <will.deacon@arm.com> Date: Tue, 16 Aug 2016 14:29:16 +0100 Subject: [PATCH 355/513] iommu/arm-smmu: Don't BUG() if we find aborting STEs with disable_bypass The disable_bypass cmdline option changes the SMMUv3 driver to put down faulting stream table entries by default, as opposed to bypassing transactions from unconfigured devices. In this mode of operation, it is entirely expected to see aborting entries in the stream table if and when we come to installing a valid translation, so don't trigger a BUG() as a result of misdiagnosing these entries as stream table corruption. Cc: <stable@vger.kernel.org> Fixes: 48ec83bcbcf5 ("iommu/arm-smmu: Add initial driver support for ARM SMMUv3 devices") Tested-by: Robin Murphy <robin.murphy@arm.com> Reported-by: Robin Murphy <robin.murphy@arm.com> Reviewed-by: Robin Murphy <robin.murphy@arm.com> Signed-off-by: Will Deacon <will.deacon@arm.com> --- drivers/iommu/arm-smmu-v3.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 330623f8e3442..641e887613193 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1034,6 +1034,9 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, case STRTAB_STE_0_CFG_S2_TRANS: ste_live = true; break; + case STRTAB_STE_0_CFG_ABORT: + if (disable_bypass) + break; default: BUG(); /* STE corruption */ } From d1e81428826221d7ff8e4d83db784d099cd232a7 Mon Sep 17 00:00:00 2001 From: Mark Brown <broonie@kernel.org> Date: Thu, 18 Aug 2016 19:32:59 +0100 Subject: [PATCH 356/513] ASoC: core: Clean up DAPM before the card debugfs Both the card and DAPM cleanups recursively delete their debugfs directories. Since the DAPM debugfs subdirectory for the card is located within the card debugfs this means we end up trying to double free the DAPM subdirectory. Reorder the cleanup to free the card debugfs after we've cleaned up DAPM and it has deleted its own subdirectory. Reported-by: Russell King - ARM Linux <linux@armlinux.org.uk> Tested-by: Russell King <rmk+kernel@armlinux.org.uk> Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/soc-core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index 16369cad48038..66a33f1e48817 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -2083,14 +2083,13 @@ static int soc_cleanup_card_resources(struct snd_soc_card *card) /* remove auxiliary devices */ soc_remove_aux_devices(card); + snd_soc_dapm_free(&card->dapm); soc_cleanup_card_debugfs(card); /* remove the card */ if (card->remove) card->remove(card); - snd_soc_dapm_free(&card->dapm); - snd_card_free(card->snd_card); return 0; From 2527ecc9195e9c66252af24c4689e8a67cd4ccb9 Mon Sep 17 00:00:00 2001 From: Linus Walleij <linus.walleij@linaro.org> Date: Tue, 16 Aug 2016 09:58:25 +0200 Subject: [PATCH 357/513] gpio: Fix OF build problem on UM The UserMode (UM) Linux build was failing in gpiolib-of as it requires ioremap()/iounmap() to exist, which is absent from UM. The non-existence of IO memory is negatively defined as CONFIG_NO_IOMEM which means we need to depend on HAS_IOMEM. Cc: stable@vger.kernel.org Cc: Geert Uytterhoeven <geert@linux-m68k.org> Reported-by: kbuild test robot <fengguang.wu@intel.com> Signed-off-by: Linus Walleij <linus.walleij@linaro.org> --- drivers/gpio/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 98dd47a30fc75..24f833c3d3bab 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -50,6 +50,7 @@ config GPIO_DEVRES config OF_GPIO def_bool y depends on OF + depends on HAS_IOMEM config GPIO_ACPI def_bool y From 048c28c91e56781082bc17d181e460b81e7e8bcb Mon Sep 17 00:00:00 2001 From: Linus Walleij <linus.walleij@linaro.org> Date: Tue, 16 Aug 2016 12:07:31 +0200 Subject: [PATCH 358/513] gpio: make any OF dependent driver depend on OF_GPIO The drivers that depend on OF but not OF_GPIO are wreaking havoc with the autobuilders for archs that have all requirements for OF but not for OF_GPIO, particularly the UM (Usermode) arch does not have iomem (NO_IOMEM) which result in configuring GPIOLIB but without OF_GPIO which is wrong if the driver is using the .of_node of the gpiochip, which only appears with OF_GPIO. After a brief look at the drivers just depending on OF it seems most if not all of them actually require stuff from gpiolib-of so the dependency is wrong in the first place. This simply patches the Kconfig so that all GPIO drivers using OF depend on OF_GPIO rather than just OF. Cc: Rabin Vincent <rabin@rab.in> Cc: Pramod Gurav <pramod.gurav@smartplayin.com> Cc: Andreas Larsson <andreas@gaisler.com> Cc: Gregory CLEMENT <gregory.clement@free-electrons.com> Cc: Thierry Reding <treding@nvidia.com> Cc: Laxman Dewangan <ldewangan@nvidia.com> Cc: Alexandre Courbot <acourbot@nvidia.com> Cc: Geert Uytterhoeven <geert+renesas@glider.be> Cc: Phil Reid <preid@electromag.com.au> Signed-off-by: Linus Walleij <linus.walleij@linaro.org> --- drivers/gpio/Kconfig | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index 24f833c3d3bab..66a94103798bb 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -189,7 +189,7 @@ config GPIO_EP93XX config GPIO_ETRAXFS bool "Axis ETRAX FS General I/O" depends on CRIS || COMPILE_TEST - depends on OF + depends on OF_GPIO select GPIO_GENERIC select GPIOLIB_IRQCHIP help @@ -215,7 +215,7 @@ config GPIO_GENERIC_PLATFORM config GPIO_GRGPIO tristate "Aeroflex Gaisler GRGPIO support" - depends on OF + depends on OF_GPIO select GPIO_GENERIC select IRQ_DOMAIN help @@ -313,7 +313,7 @@ config GPIO_MPC8XXX config GPIO_MVEBU def_bool y depends on PLAT_ORION - depends on OF + depends on OF_GPIO select GENERIC_IRQ_CHIP config GPIO_MXC @@ -406,7 +406,7 @@ config GPIO_TEGRA bool "NVIDIA Tegra GPIO support" default ARCH_TEGRA depends on ARCH_TEGRA || COMPILE_TEST - depends on OF + depends on OF_GPIO help Say yes here to support GPIO pins on NVIDIA Tegra SoCs. @@ -1100,7 +1100,7 @@ menu "SPI GPIO expanders" config GPIO_74X164 tristate "74x164 serial-in/parallel-out 8-bits shift register" - depends on OF + depends on OF_GPIO help Driver for 74x164 compatible serial-in/parallel-out 8-outputs shift registers. This driver can be used to provide access From a305a4387acb01cecadeeea5151c049a022a1bfc Mon Sep 17 00:00:00 2001 From: Brendan Jackman <brendan.jackman@arm.com> Date: Wed, 17 Aug 2016 16:14:59 +0100 Subject: [PATCH 359/513] thermal: cpu_cooling: Fix NULL dereference in cpufreq_state2power Currently all CPU cooling devices share a `struct thermal_cooling_device_ops` instance. The thermal core uses the presence of functions in this struct to determine if a cooling device has a power model (see cdev_is_power_actor). cpu_cooling.c adds the power model functions to the shared struct when a device is registered with a power model. Therefore, if a CPU cooling device is registered using [of_]cpufreq_power_cooling_register, _all_ devices will be determined to have a power model, including any registered with [of_]cpufreq_cooling_register. This can result in cpufreq_state2power being called on a device where dyn_power_table is NULL. With this commit, instead of having a shared thermal_cooling_device_ops which is mutated, we have two versions: one with the power functions and one without. Signed-off-by: Brendan Jackman <brendan.jackman@arm.com> Cc: Amit Daniel Kachhap <amit.kachhap@gmail.com> Cc: Viresh Kumar <viresh.kumar@linaro.org> Cc: Javi Merino <javi.merino@arm.com> Acked-by: Viresh Kumar <viresh.kumar@linaro.org> Acked-by: Javi Merino <javi.merino@arm.com> Signed-off-by: Zhang Rui <rui.zhang@intel.com> --- drivers/thermal/cpu_cooling.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 3788ed74c9abe..a32b41783b777 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -740,12 +740,22 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev, } /* Bind cpufreq callbacks to thermal cooling device ops */ + static struct thermal_cooling_device_ops cpufreq_cooling_ops = { .get_max_state = cpufreq_get_max_state, .get_cur_state = cpufreq_get_cur_state, .set_cur_state = cpufreq_set_cur_state, }; +static struct thermal_cooling_device_ops cpufreq_power_cooling_ops = { + .get_max_state = cpufreq_get_max_state, + .get_cur_state = cpufreq_get_cur_state, + .set_cur_state = cpufreq_set_cur_state, + .get_requested_power = cpufreq_get_requested_power, + .state2power = cpufreq_state2power, + .power2state = cpufreq_power2state, +}; + /* Notifier for cpufreq policy change */ static struct notifier_block thermal_cpufreq_notifier_block = { .notifier_call = cpufreq_thermal_notifier, @@ -795,6 +805,7 @@ __cpufreq_cooling_register(struct device_node *np, struct cpumask temp_mask; unsigned int freq, i, num_cpus; int ret; + struct thermal_cooling_device_ops *cooling_ops; cpumask_and(&temp_mask, clip_cpus, cpu_online_mask); policy = cpufreq_cpu_get(cpumask_first(&temp_mask)); @@ -850,10 +861,6 @@ __cpufreq_cooling_register(struct device_node *np, cpumask_copy(&cpufreq_dev->allowed_cpus, clip_cpus); if (capacitance) { - cpufreq_cooling_ops.get_requested_power = - cpufreq_get_requested_power; - cpufreq_cooling_ops.state2power = cpufreq_state2power; - cpufreq_cooling_ops.power2state = cpufreq_power2state; cpufreq_dev->plat_get_static_power = plat_static_func; ret = build_dyn_power_table(cpufreq_dev, capacitance); @@ -861,6 +868,10 @@ __cpufreq_cooling_register(struct device_node *np, cool_dev = ERR_PTR(ret); goto free_table; } + + cooling_ops = &cpufreq_power_cooling_ops; + } else { + cooling_ops = &cpufreq_cooling_ops; } ret = get_idr(&cpufreq_idr, &cpufreq_dev->id); @@ -885,7 +896,7 @@ __cpufreq_cooling_register(struct device_node *np, cpufreq_dev->id); cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev, - &cpufreq_cooling_ops); + cooling_ops); if (IS_ERR(cool_dev)) goto remove_idr; From 57027db00d1094d0abd5776899b00ca55d42d37c Mon Sep 17 00:00:00 2001 From: Markus Elfring <elfring@users.sourceforge.net> Date: Mon, 15 Aug 2016 11:22:40 +0200 Subject: [PATCH 360/513] Thermal-INT3406: Delete owner assignment The field "owner" is set by core. Thus delete an extra initialisation. Generated by: scripts/coccinelle/api/platform_no_drv_owner.cocci Signed-off-by: Markus Elfring <elfring@users.sourceforge.net> Signed-off-by: Zhang Rui <rui.zhang@intel.com> --- drivers/thermal/int340x_thermal/int3406_thermal.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/thermal/int340x_thermal/int3406_thermal.c b/drivers/thermal/int340x_thermal/int3406_thermal.c index a578cd257db4b..1891f34ab7fcd 100644 --- a/drivers/thermal/int340x_thermal/int3406_thermal.c +++ b/drivers/thermal/int340x_thermal/int3406_thermal.c @@ -225,7 +225,6 @@ static struct platform_driver int3406_thermal_driver = { .remove = int3406_thermal_remove, .driver = { .name = "int3406 thermal", - .owner = THIS_MODULE, .acpi_match_table = int3406_thermal_match, }, }; From 55f2ac33adc78d429c470c9ca05e18c36dc24922 Mon Sep 17 00:00:00 2001 From: Caesar Wang <wxt@rock-chips.com> Date: Tue, 3 May 2016 20:07:41 +0800 Subject: [PATCH 361/513] thermal: trivial: fix the typo See the thermal code, the obvious typo from my editor. Signed-off-by: Caesar Wang <wxt@rock-chips.com> Signed-off-by: Eduardo Valentin <edubezval@gmail.com> Signed-off-by: Zhang Rui <rui.zhang@intel.com> --- Documentation/devicetree/bindings/thermal/thermal.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/thermal/thermal.txt b/Documentation/devicetree/bindings/thermal/thermal.txt index 41b817f7b6706..88b6ea1ad2903 100644 --- a/Documentation/devicetree/bindings/thermal/thermal.txt +++ b/Documentation/devicetree/bindings/thermal/thermal.txt @@ -62,7 +62,7 @@ For more examples of cooling devices, refer to the example sections below. Required properties: - #cooling-cells: Used to provide cooling device specific information Type: unsigned while referring to it. Must be at least 2, in order - Size: one cell to specify minimum and maximum cooling state used + Size: one cell to specify minimum and maximum cooling state used in the reference. The first cell is the minimum cooling state requested and the second cell is the maximum cooling state requested in the reference. @@ -119,7 +119,7 @@ Required properties: Optional property: - contribution: The cooling contribution to the thermal zone of the Type: unsigned referred cooling device at the referred trip point. - Size: one cell The contribution is a ratio of the sum + Size: one cell The contribution is a ratio of the sum of all cooling contributions within a thermal zone. Note: Using the THERMAL_NO_LIMIT (-1UL) constant in the cooling-device phandle @@ -145,7 +145,7 @@ Required properties: Size: one cell - thermal-sensors: A list of thermal sensor phandles and sensor specifier - Type: list of used while monitoring the thermal zone. + Type: list of used while monitoring the thermal zone. phandles + sensor specifier @@ -473,7 +473,7 @@ thermal-zones { <&adc>; /* pcb north */ /* hotspot = 100 * bandgap - 120 * adc + 484 */ - coefficients = <100 -120 484>; + coefficients = <100 -120 484>; trips { ... @@ -502,7 +502,7 @@ from the ADC sensor. The binding would be then: thermal-sensors = <&adc>; /* hotspot = 1 * adc + 6000 */ - coefficients = <1 6000>; + coefficients = <1 6000>; (d) - Board thermal From 829bc78aa7628e81a9de717316b85cbee3c5eb86 Mon Sep 17 00:00:00 2001 From: Corentin LABBE <clabbe.montjoie@gmail.com> Date: Tue, 16 Aug 2016 10:51:38 +0200 Subject: [PATCH 362/513] thermal: imx: fix a possible NULL dereference of_match_device could return NULL, and so cause a NULL pointer dereference later at line 472: data->socdata = of_id->data; For fixing this problem, we use of_device_get_match_data(), this will simplify the code a little by using a standard function for getting the match data. Reported-by: coverity (CID 1324128) Signed-off-by: LABBE Corentin <clabbe.montjoie@gmail.com> Signed-off-by: Zhang Rui <rui.zhang@intel.com> --- drivers/thermal/imx_thermal.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c index c5547bd711dbe..e473548b5d289 100644 --- a/drivers/thermal/imx_thermal.c +++ b/drivers/thermal/imx_thermal.c @@ -471,8 +471,6 @@ MODULE_DEVICE_TABLE(of, of_imx_thermal_match); static int imx_thermal_probe(struct platform_device *pdev) { - const struct of_device_id *of_id = - of_match_device(of_imx_thermal_match, &pdev->dev); struct imx_thermal_data *data; struct regmap *map; int measure_freq; @@ -490,7 +488,7 @@ static int imx_thermal_probe(struct platform_device *pdev) } data->tempmon = map; - data->socdata = of_id->data; + data->socdata = of_device_get_match_data(&pdev->dev); /* make sure the IRQ flag is clear before enabling irq on i.MX6SX */ if (data->socdata->version == TEMPMON_IMX6SX) { From 21eb45db282317543ca46c821bbb8d5075e02cbe Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi <peter.ujfalusi@ti.com> Date: Fri, 19 Aug 2016 09:34:24 +0300 Subject: [PATCH 363/513] ASoC: omap-abe-twl6040: Correct dmic-codec device registration The dmic-codec was registered within the platform_driver's probe function, which can cause deferred probe to run in loops as reported and analyzed by Russell King. Use module_init/exit in the driver and handle the dmic-codec device registration and removal at that level instead of the platform_driver probe/remove. Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com> Reported-by: Russell King <rmk+kernel@armlinux.org.uk> Tested-by: Russell King <rmk+kernel@armlinux.org.uk> Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/omap/omap-abe-twl6040.c | 61 ++++++++++++++++--------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/sound/soc/omap/omap-abe-twl6040.c b/sound/soc/omap/omap-abe-twl6040.c index 0843a68f277c2..f61b3b58083b9 100644 --- a/sound/soc/omap/omap-abe-twl6040.c +++ b/sound/soc/omap/omap-abe-twl6040.c @@ -38,10 +38,10 @@ struct abe_twl6040 { int jack_detection; /* board can detect jack events */ int mclk_freq; /* MCLK frequency speed for twl6040 */ - - struct platform_device *dmic_codec_dev; }; +struct platform_device *dmic_codec_dev; + static int omap_abe_hw_params(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params) { @@ -258,8 +258,6 @@ static int omap_abe_probe(struct platform_device *pdev) if (priv == NULL) return -ENOMEM; - priv->dmic_codec_dev = ERR_PTR(-EINVAL); - if (snd_soc_of_parse_card_name(card, "ti,model")) { dev_err(&pdev->dev, "Card name is not provided\n"); return -ENODEV; @@ -284,13 +282,6 @@ static int omap_abe_probe(struct platform_device *pdev) num_links = 2; abe_twl6040_dai_links[1].cpu_of_node = dai_node; abe_twl6040_dai_links[1].platform_of_node = dai_node; - - priv->dmic_codec_dev = platform_device_register_simple( - "dmic-codec", -1, NULL, 0); - if (IS_ERR(priv->dmic_codec_dev)) { - dev_err(&pdev->dev, "Can't instantiate dmic-codec\n"); - return PTR_ERR(priv->dmic_codec_dev); - } } else { num_links = 1; } @@ -299,16 +290,14 @@ static int omap_abe_probe(struct platform_device *pdev) of_property_read_u32(node, "ti,mclk-freq", &priv->mclk_freq); if (!priv->mclk_freq) { dev_err(&pdev->dev, "MCLK frequency not provided\n"); - ret = -EINVAL; - goto err_unregister; + return -EINVAL; } card->fully_routed = 1; if (!priv->mclk_freq) { dev_err(&pdev->dev, "MCLK frequency missing\n"); - ret = -ENODEV; - goto err_unregister; + return -ENODEV; } card->dai_link = abe_twl6040_dai_links; @@ -317,17 +306,9 @@ static int omap_abe_probe(struct platform_device *pdev) snd_soc_card_set_drvdata(card, priv); ret = snd_soc_register_card(card); - if (ret) { + if (ret) dev_err(&pdev->dev, "snd_soc_register_card() failed: %d\n", ret); - goto err_unregister; - } - - return 0; - -err_unregister: - if (!IS_ERR(priv->dmic_codec_dev)) - platform_device_unregister(priv->dmic_codec_dev); return ret; } @@ -335,13 +316,9 @@ static int omap_abe_probe(struct platform_device *pdev) static int omap_abe_remove(struct platform_device *pdev) { struct snd_soc_card *card = platform_get_drvdata(pdev); - struct abe_twl6040 *priv = snd_soc_card_get_drvdata(card); snd_soc_unregister_card(card); - if (!IS_ERR(priv->dmic_codec_dev)) - platform_device_unregister(priv->dmic_codec_dev); - return 0; } @@ -361,7 +338,33 @@ static struct platform_driver omap_abe_driver = { .remove = omap_abe_remove, }; -module_platform_driver(omap_abe_driver); +static int __init omap_abe_init(void) +{ + int ret; + + dmic_codec_dev = platform_device_register_simple("dmic-codec", -1, NULL, + 0); + if (IS_ERR(dmic_codec_dev)) { + pr_err("%s: dmic-codec device registration failed\n", __func__); + return PTR_ERR(dmic_codec_dev); + } + + ret = platform_driver_register(&omap_abe_driver); + if (ret) { + pr_err("%s: platform driver registration failed\n", __func__); + platform_device_unregister(dmic_codec_dev); + } + + return ret; +} +module_init(omap_abe_init); + +static void __exit omap_abe_exit(void) +{ + platform_driver_unregister(&omap_abe_driver); + platform_device_unregister(dmic_codec_dev); +} +module_exit(omap_abe_exit); MODULE_AUTHOR("Misael Lopez Cruz <misael.lopez@ti.com>"); MODULE_DESCRIPTION("ALSA SoC for OMAP boards with ABE and twl6040 codec"); From ba913e4f72fc9cfd03dad968dfb110eb49211d80 Mon Sep 17 00:00:00 2001 From: James Hogan <james.hogan@imgtec.com> Date: Fri, 19 Aug 2016 14:30:29 +0100 Subject: [PATCH 364/513] MIPS: KVM: Check for pfn noslot case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When mapping a page into the guest we error check using is_error_pfn(), however this doesn't detect a value of KVM_PFN_NOSLOT, indicating an error HVA for the page. This can only happen on MIPS right now due to unusual memslot management (e.g. being moved / removed / resized), or with an Enhanced Virtual Memory (EVA) configuration where the default KVM_HVA_ERR_* and kvm_is_error_hva() definitions are unsuitable (fixed in a later patch). This case will be treated as a pfn of zero, mapping the first page of physical memory into the guest. It would appear the MIPS KVM port wasn't updated prior to being merged (in v3.10) to take commit 81c52c56e2b4 ("KVM: do not treat noslot pfn as a error pfn") into account (merged v3.8), which converted a bunch of is_error_pfn() calls to is_error_noslot_pfn(). Switch to using is_error_noslot_pfn() instead to catch this case properly. Fixes: 858dd5d45733 ("KVM/MIPS32: MMU/TLB operations for the Guest.") Signed-off-by: James Hogan <james.hogan@imgtec.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Radim Krčmář <rkrcmar@redhat.com> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Cc: <stable@vger.kernel.org> # 3.10.y- Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> --- arch/mips/kvm/mmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index 6cfdcf55572d6..121008c0fcc92 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -40,7 +40,7 @@ static int kvm_mips_map_page(struct kvm *kvm, gfn_t gfn) srcu_idx = srcu_read_lock(&kvm->srcu); pfn = gfn_to_pfn(kvm, gfn); - if (is_error_pfn(pfn)) { + if (is_error_noslot_pfn(pfn)) { kvm_err("Couldn't get pfn for gfn %#llx!\n", gfn); err = -EFAULT; goto out; From 13f479b9df4e2bbf2d16e7e1b02f3f55f70e2455 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> Date: Wed, 17 Aug 2016 09:46:42 +0200 Subject: [PATCH 365/513] drm/radeon: fix radeon_move_blit on 32bit systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This bug seems to be present for a very long time. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Cc: stable@vger.kernel.org --- drivers/gpu/drm/radeon/radeon_ttm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 0c00e192c8458..c2e0a1ccdfbce 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -263,8 +263,8 @@ static int radeon_move_blit(struct ttm_buffer_object *bo, rdev = radeon_get_rdev(bo->bdev); ridx = radeon_copy_ring_index(rdev); - old_start = old_mem->start << PAGE_SHIFT; - new_start = new_mem->start << PAGE_SHIFT; + old_start = (u64)old_mem->start << PAGE_SHIFT; + new_start = (u64)new_mem->start << PAGE_SHIFT; switch (old_mem->mem_type) { case TTM_PL_VRAM: From 815d27a46f3119f74fe01fe10bf683aa5bc55597 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> Date: Wed, 17 Aug 2016 09:45:25 +0200 Subject: [PATCH 366/513] drm/amdgpu: fix amdgpu_move_blit on 32bit systems MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This bug seems to be present for a very long time. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 9b61c8ba7aaf9..141bfcabd19c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -251,8 +251,8 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, adev = amdgpu_get_adev(bo->bdev); ring = adev->mman.buffer_funcs_ring; - old_start = old_mem->start << PAGE_SHIFT; - new_start = new_mem->start << PAGE_SHIFT; + old_start = (u64)old_mem->start << PAGE_SHIFT; + new_start = (u64)new_mem->start << PAGE_SHIFT; switch (old_mem->mem_type) { case TTM_PL_VRAM: From 847927bb3db2b18744ddec1b4fbb274ac7e05199 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> Date: Wed, 17 Aug 2016 14:10:37 +0200 Subject: [PATCH 367/513] drm/amdgpu: fix sdma_v2_4_ring_test_ib MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Typo in checking the return code. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Reviewed-by: Chunming Zhou <david1.zhou@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 1351c7e834a21..a64715d90503a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -714,7 +714,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout) DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; goto err1; - } else if (r) { + } else if (r < 0) { DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err1; } From bdf001374bfe12677ae6ec5076452493fb682012 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> Date: Tue, 16 Aug 2016 19:52:35 +0200 Subject: [PATCH 368/513] drm/amdgpu: fix timeout value check in amd_sched_job_recovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Could be that we don't actually have a timeout set. Signed-off-by: Christian König <christian.koenig@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Reviewed-by: Chunming Zhou <david1.zhou@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> --- drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index ef312bb75fda0..963a24d46a93d 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -405,7 +405,7 @@ void amd_sched_job_recovery(struct amd_gpu_scheduler *sched) spin_lock(&sched->job_list_lock); s_job = list_first_entry_or_null(&sched->ring_mirror_list, struct amd_sched_job, node); - if (s_job) + if (s_job && sched->timeout != MAX_SCHEDULE_TIMEOUT) schedule_delayed_work(&s_job->work_tdr, sched->timeout); list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) { From 6c647b0eb01cd7326dca093590f5e123e3c68b9c Mon Sep 17 00:00:00 2001 From: Bob Liu <bob.liu@oracle.com> Date: Fri, 1 Jul 2016 15:45:57 -0400 Subject: [PATCH 369/513] xen-blkfront: fix places not updated after introducing 64KB page granularity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two places didn't get updated when 64KB page granularity was introduced, this patch fix them. Signed-off-by: Bob Liu <bob.liu@oracle.com> Acked-by: Roger Pau Monné <roger.pau@citrix.com> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> --- drivers/block/xen-blkfront.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index be4fea6a5dd33..6a1756d72dcb2 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -1315,7 +1315,7 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo) rinfo->ring_ref[i] = GRANT_INVALID_REF; } } - free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * PAGE_SIZE)); + free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * XEN_PAGE_SIZE)); rinfo->ring.sring = NULL; if (rinfo->irq) @@ -2008,7 +2008,7 @@ static int blkif_recover(struct blkfront_info *info) blkfront_gather_backend_features(info); segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; - blk_queue_max_segments(info->rq, segs); + blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG); for (r_index = 0; r_index < info->nr_rings; r_index++) { struct blkfront_ring_info *rinfo = &info->rinfo[r_index]; From 172335ada40ce26806e514c83a504b45c14a4139 Mon Sep 17 00:00:00 2001 From: Bob Liu <bob.liu@oracle.com> Date: Fri, 1 Jul 2016 17:43:39 -0400 Subject: [PATCH 370/513] xen-blkfront: introduce blkif_set_queue_limits() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit blk_mq_update_nr_hw_queues() reset all queue limits to default which it's not as xen-blkfront expected, introducing blkif_set_queue_limits() to reset limits with initial correct values. Signed-off-by: Bob Liu <bob.liu@oracle.com> Acked-by: Roger Pau Monné <roger.pau@citrix.com> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> --- drivers/block/xen-blkfront.c | 86 ++++++++++++++++++++---------------- 1 file changed, 48 insertions(+), 38 deletions(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 6a1756d72dcb2..f84e220a26e6e 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -189,6 +189,8 @@ struct blkfront_info struct mutex mutex; struct xenbus_device *xbdev; struct gendisk *gd; + u16 sector_size; + unsigned int physical_sector_size; int vdevice; blkif_vdev_t handle; enum blkif_state connected; @@ -910,9 +912,45 @@ static struct blk_mq_ops blkfront_mq_ops = { .map_queue = blk_mq_map_queue, }; +static void blkif_set_queue_limits(struct blkfront_info *info) +{ + struct request_queue *rq = info->rq; + struct gendisk *gd = info->gd; + unsigned int segments = info->max_indirect_segments ? : + BLKIF_MAX_SEGMENTS_PER_REQUEST; + + queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); + + if (info->feature_discard) { + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq); + blk_queue_max_discard_sectors(rq, get_capacity(gd)); + rq->limits.discard_granularity = info->discard_granularity; + rq->limits.discard_alignment = info->discard_alignment; + if (info->feature_secdiscard) + queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, rq); + } + + /* Hard sector size and max sectors impersonate the equiv. hardware. */ + blk_queue_logical_block_size(rq, info->sector_size); + blk_queue_physical_block_size(rq, info->physical_sector_size); + blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512); + + /* Each segment in a request is up to an aligned page in size. */ + blk_queue_segment_boundary(rq, PAGE_SIZE - 1); + blk_queue_max_segment_size(rq, PAGE_SIZE); + + /* Ensure a merged request will fit in a single I/O ring slot. */ + blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG); + + /* Make sure buffer addresses are sector-aligned. */ + blk_queue_dma_alignment(rq, 511); + + /* Make sure we don't use bounce buffers. */ + blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY); +} + static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, - unsigned int physical_sector_size, - unsigned int segments) + unsigned int physical_sector_size) { struct request_queue *rq; struct blkfront_info *info = gd->private_data; @@ -944,36 +982,11 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size, } rq->queuedata = info; - queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq); - - if (info->feature_discard) { - queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, rq); - blk_queue_max_discard_sectors(rq, get_capacity(gd)); - rq->limits.discard_granularity = info->discard_granularity; - rq->limits.discard_alignment = info->discard_alignment; - if (info->feature_secdiscard) - queue_flag_set_unlocked(QUEUE_FLAG_SECERASE, rq); - } - - /* Hard sector size and max sectors impersonate the equiv. hardware. */ - blk_queue_logical_block_size(rq, sector_size); - blk_queue_physical_block_size(rq, physical_sector_size); - blk_queue_max_hw_sectors(rq, (segments * XEN_PAGE_SIZE) / 512); - - /* Each segment in a request is up to an aligned page in size. */ - blk_queue_segment_boundary(rq, PAGE_SIZE - 1); - blk_queue_max_segment_size(rq, PAGE_SIZE); - - /* Ensure a merged request will fit in a single I/O ring slot. */ - blk_queue_max_segments(rq, segments / GRANTS_PER_PSEG); - - /* Make sure buffer addresses are sector-aligned. */ - blk_queue_dma_alignment(rq, 511); - - /* Make sure we don't use bounce buffers. */ - blk_queue_bounce_limit(rq, BLK_BOUNCE_ANY); - - gd->queue = rq; + info->rq = gd->queue = rq; + info->gd = gd; + info->sector_size = sector_size; + info->physical_sector_size = physical_sector_size; + blkif_set_queue_limits(info); return 0; } @@ -1136,16 +1149,11 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity, gd->private_data = info; set_capacity(gd, capacity); - if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size, - info->max_indirect_segments ? : - BLKIF_MAX_SEGMENTS_PER_REQUEST)) { + if (xlvbd_init_blk_queue(gd, sector_size, physical_sector_size)) { del_gendisk(gd); goto release; } - info->rq = gd->queue; - info->gd = gd; - xlvbd_flush(info); if (vdisk_info & VDISK_READONLY) @@ -2007,6 +2015,8 @@ static int blkif_recover(struct blkfront_info *info) struct split_bio *split_bio; blkfront_gather_backend_features(info); + /* Reset limits changed by blk_mq_update_nr_hw_queues(). */ + blkif_set_queue_limits(info); segs = info->max_indirect_segments ? : BLKIF_MAX_SEGMENTS_PER_REQUEST; blk_queue_max_segments(info->rq, segs / GRANTS_PER_PSEG); From 4e876c2bd37fbb5c37a4554a79cf979d486f0e82 Mon Sep 17 00:00:00 2001 From: Bob Liu <bob.liu@oracle.com> Date: Wed, 27 Jul 2016 17:42:04 +0800 Subject: [PATCH 371/513] xen-blkfront: free resources if xlvbd_alloc_gendisk fails Current code forgets to free resources in the failure path of xlvbd_alloc_gendisk(), this patch fix it. Signed-off-by: Bob Liu <bob.liu@oracle.com> Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> --- drivers/block/xen-blkfront.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index f84e220a26e6e..88ef6d4729b46 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -2442,7 +2442,7 @@ static void blkfront_connect(struct blkfront_info *info) if (err) { xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s", info->xbdev->otherend); - return; + goto fail; } xenbus_switch_state(info->xbdev, XenbusStateConnected); @@ -2455,6 +2455,11 @@ static void blkfront_connect(struct blkfront_info *info) device_add_disk(&info->xbdev->dev, info->gd); info->is_ready = 1; + return; + +fail: + blkif_free(info, 0); + return; } /** From 47af45d684b5f3ae000ad448db02ce4f13f73273 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov <dmitry.torokhov@gmail.com> Date: Tue, 16 Aug 2016 17:38:54 -0700 Subject: [PATCH 372/513] Input: i8042 - set up shared ps2_cmd_mutex for AUX ports The commit 4097461897df ("Input: i8042 - break load dependency ...") correctly set up ps2_cmd_mutex pointer for the KBD port but forgot to do the same for AUX port(s), which results in communication on KBD and AUX ports to clash with each other. Fixes: 4097461897df ("Input: i8042 - break load dependency ...") Reported-by: Bruno Wolff III <bruno@wolff.to> Tested-by: Bruno Wolff III <bruno@wolff.to> Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com> --- drivers/input/serio/i8042.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index b4d34086e73f5..405252a884dd4 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -1305,6 +1305,7 @@ static int __init i8042_create_aux_port(int idx) serio->write = i8042_aux_write; serio->start = i8042_start; serio->stop = i8042_stop; + serio->ps2_cmd_mutex = &i8042_mutex; serio->port_data = port; serio->dev.parent = &i8042_platform_device->dev; if (idx < 0) { From 86147e3cfa5e118b61e78f4f0bf29e920dcbd477 Mon Sep 17 00:00:00 2001 From: Liav Rehana <liavr@mellanox.com> Date: Tue, 16 Aug 2016 10:55:35 +0300 Subject: [PATCH 373/513] ARC: use correct offset in pt_regs for saving/restoring user mode r25 User mode callee regs are explicitly collected before signal delivery or breakpoint trap. r25 is special for kernel as it serves as task pointer, so user mode value is clobbered very early. It is saved in pt_regs where generally only scratch (aka caller saved) regs are saved. The code to access the corresponding pt_regs location had a subtle bug as it was using load/store with scaling of offset, whereas the offset was already byte wise correct. So fix this by replacing LD.AS with a standard LD Cc: <stable@vger.kernel.org> Signed-off-by: Liav Rehana <liavr@mellanox.com> Reviewed-by: Alexey Brodkin <abrodkin@synopsys.com> [vgupta: rewrote title and commit log] Signed-off-by: Vineet Gupta <vgupta@synopsys.com> --- arch/arc/include/asm/entry.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arc/include/asm/entry.h b/arch/arc/include/asm/entry.h index ad7860c5ce153..51597f344a62a 100644 --- a/arch/arc/include/asm/entry.h +++ b/arch/arc/include/asm/entry.h @@ -142,7 +142,7 @@ #ifdef CONFIG_ARC_CURR_IN_REG ; Retrieve orig r25 and save it with rest of callee_regs - ld.as r12, [r12, PT_user_r25] + ld r12, [r12, PT_user_r25] PUSH r12 #else PUSH r25 @@ -198,7 +198,7 @@ ; SP is back to start of pt_regs #ifdef CONFIG_ARC_CURR_IN_REG - st.as r12, [sp, PT_user_r25] + st r12, [sp, PT_user_r25] #endif .endm From 840c054fd0efb048df6fceb0c46385ec5b66dfe6 Mon Sep 17 00:00:00 2001 From: Vineet Gupta <vgupta@synopsys.com> Date: Wed, 10 Aug 2016 14:10:57 -0700 Subject: [PATCH 374/513] ARC: Support syscall ABI v4 The syscall ABI includes the gcc functional calling ABI since a syscall implies userland caller and kernel callee. The current gcc ABI (v3) for ARCv2 ISA required 64-bit data be passed in even-odd register pairs, (potentially punching reg holes when passing such values as args). This was partly driven by the fact that the double-word LDD/STD instructions in ARCv2 expect the register alignment and thus gcc forcing this avoids extra MOV at the cost of a few unused register (which we have plenty anyways). This however was rejected as part of upstreaming gcc port to HS. So the new ABI v4 doesn't enforce the even-odd reg restriction. Do note that for ARCompact ISA builds v3 and v4 are practically the same in terms of gcc code generation. In terms of change management, we infer the new ABI if gcc 6.x onwards is used for building the kernel. This also needs a stable backport to enable older kernels to work with new tools/user-space Cc: <stable@vger.kernel.org> Signed-off-by: Vineet Gupta <vgupta@synopsys.com> --- arch/arc/include/uapi/asm/elf.h | 11 +++++++++-- arch/arc/kernel/process.c | 2 +- arch/arc/kernel/setup.c | 6 ++++-- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/arch/arc/include/uapi/asm/elf.h b/arch/arc/include/uapi/asm/elf.h index 0f99ac8fcbb23..0037a587320d5 100644 --- a/arch/arc/include/uapi/asm/elf.h +++ b/arch/arc/include/uapi/asm/elf.h @@ -13,8 +13,15 @@ /* Machine specific ELF Hdr flags */ #define EF_ARC_OSABI_MSK 0x00000f00 -#define EF_ARC_OSABI_ORIG 0x00000000 /* MUST be zero for back-compat */ -#define EF_ARC_OSABI_CURRENT 0x00000300 /* v3 (no legacy syscalls) */ + +#define EF_ARC_OSABI_V3 0x00000300 /* v3 (no legacy syscalls) */ +#define EF_ARC_OSABI_V4 0x00000400 /* v4 (64bit data any reg align) */ + +#if __GNUC__ < 6 +#define EF_ARC_OSABI_CURRENT EF_ARC_OSABI_V3 +#else +#define EF_ARC_OSABI_CURRENT EF_ARC_OSABI_V4 +#endif typedef unsigned long elf_greg_t; typedef unsigned long elf_fpregset_t; diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index b5db9e7fd6492..be1972bd2729e 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -199,7 +199,7 @@ int elf_check_arch(const struct elf32_hdr *x) } eflags = x->e_flags; - if ((eflags & EF_ARC_OSABI_MSK) < EF_ARC_OSABI_CURRENT) { + if ((eflags & EF_ARC_OSABI_MSK) != EF_ARC_OSABI_CURRENT) { pr_err("ABI mismatch - you need newer toolchain\n"); force_sigsegv(SIGSEGV, current); return 0; diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index a946400a86d0d..f52a0d0dc4622 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -291,8 +291,10 @@ static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len) cpu->dccm.base_addr, TO_KB(cpu->dccm.sz), cpu->iccm.base_addr, TO_KB(cpu->iccm.sz)); - n += scnprintf(buf + n, len - n, - "OS ABI [v3]\t: no-legacy-syscalls\n"); + n += scnprintf(buf + n, len - n, "OS ABI [v%d]\t: %s\n", + EF_ARC_OSABI_CURRENT >> 8, + EF_ARC_OSABI_CURRENT == EF_ARC_OSABI_V3 ? + "no-legacy-syscalls" : "64-bit data any register aligned"); return buf; } From d77976c414ed7f521b9c79b2a9dde0147a3cf754 Mon Sep 17 00:00:00 2001 From: Vineet Gupta <vgupta@synopsys.com> Date: Wed, 17 Aug 2016 17:34:46 -0700 Subject: [PATCH 375/513] ARC: export kmap | MODPOST 7 modules | ERROR: "kmap" [fs/ext2/ext2.ko] undefined! | ../scripts/Makefile.modpost:91: recipe for target '__modpost' failed Cc: <stable@vger.kernel.org> Signed-off-by: Vineet Gupta <vgupta@synopsys.com> --- arch/arc/mm/highmem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c index 04f83322c9fda..77ff64a874a1b 100644 --- a/arch/arc/mm/highmem.c +++ b/arch/arc/mm/highmem.c @@ -61,6 +61,7 @@ void *kmap(struct page *page) return kmap_high(page); } +EXPORT_SYMBOL(kmap); void *kmap_atomic(struct page *page) { From 1c3c909303924d30145601f47b6c058fdd2cbc2e Mon Sep 17 00:00:00 2001 From: Vineet Gupta <vgupta@synopsys.com> Date: Tue, 16 Aug 2016 18:27:07 -0700 Subject: [PATCH 376/513] ARC: mm: fix build breakage with STRICT_MM_TYPECHECKS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit | CC mm/memory.o | In file included from ../mm/memory.c:53:0: | ../include/linux/pfn_t.h: In function ‘pfn_t_pte’: | ../include/linux/pfn_t.h:78:2: error: conversion to non-scalar type requested | return pfn_pte(pfn_t_to_pfn(pfn), pgprot); With STRICT_MM_TYPECHECKS pte_t is a struct and the offending code forces a cast which ends up shifting a struct and hence the gcc warning. Note that in recent past some of the arches (aarch64, s390) made STRICT_MM_TYPECHECKS default, but we don't for ARC as this leads to slightly worse generated code, given ARC ABI definition of returning structs (which pte_t would become) Quoting from ARC ABI... "Results of type struct are returned in a caller-supplied temporary variable whose address is passed in r0. For such functions, the arguments are shifted so that they are passed in r1 and up." So - struct to be returned would be allocated on stack requiring extra code at call sites - callee updates stack memory to facilitate the return (vs. simple MOV into return reg r0) Hence STRICT_MM_TYPECHECKS is not enabled by default for ARC Cc: <stable@vger.kernel.org> #4.4+ Signed-off-by: Vineet Gupta <vgupta@synopsys.com> --- arch/arc/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 0f92d97432a21..89eeb37200518 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -280,7 +280,7 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep) #define pte_page(pte) pfn_to_page(pte_pfn(pte)) #define mk_pte(page, prot) pfn_pte(page_to_pfn(page), prot) -#define pfn_pte(pfn, prot) (__pte(((pte_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) +#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) /* Don't use virt_to_pfn for macros below: could cause truncations for PAE40*/ #define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT) From 6040e57658eee6eb1315a26119101ca832d1f854 Mon Sep 17 00:00:00 2001 From: Linus Torvalds <torvalds@linux-foundation.org> Date: Fri, 19 Aug 2016 12:47:01 -0700 Subject: [PATCH 377/513] Make the hardened user-copy code depend on having a hardened allocator The kernel test robot reported a usercopy failure in the new hardened sanity checks, due to a page-crossing copy of the FPU state into the task structure. This happened because the kernel test robot was testing with SLOB, which doesn't actually do the required book-keeping for slab allocations, and as a result the hardening code didn't realize that the task struct allocation was one single allocation - and the sanity checks fail. Since SLOB doesn't even claim to support hardening (and you really shouldn't use it), the straightforward solution is to just make the usercopy hardening code depend on the allocator supporting it. Reported-by: kernel test robot <xiaolong.ye@intel.com> Cc: Kees Cook <keescook@chromium.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- security/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/security/Kconfig b/security/Kconfig index df28f2b6f3e1b..da10d9b573a4a 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -136,6 +136,7 @@ config HAVE_ARCH_HARDENED_USERCOPY config HARDENED_USERCOPY bool "Harden memory copies between kernel and userspace" depends on HAVE_ARCH_HARDENED_USERCOPY + depends on HAVE_HARDENED_USERCOPY_ALLOCATOR select BUG help This option checks for obviously wrong memory regions when From c57653dc94d0db7bf63067433ceaa97bdcd0a312 Mon Sep 17 00:00:00 2001 From: Vineet Gupta <vgupta@synopsys.com> Date: Fri, 19 Aug 2016 13:59:02 -0700 Subject: [PATCH 378/513] ARC: export __udivdi3 for modules Some module using div_u64() was failing to link because the libgcc 64-bit divide assist routine was not being exported for modules Reported-by: avinashp@quantenna.com Cc: stable@vger.kernel.org Signed-off-by: Vineet Gupta <vgupta@synopsys.com> --- arch/arc/kernel/arcksyms.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arc/kernel/arcksyms.c b/arch/arc/kernel/arcksyms.c index 4d9e77724bede..000dd041ab42e 100644 --- a/arch/arc/kernel/arcksyms.c +++ b/arch/arc/kernel/arcksyms.c @@ -28,6 +28,7 @@ extern void __muldf3(void); extern void __divdf3(void); extern void __floatunsidf(void); extern void __floatunsisf(void); +extern void __udivdi3(void); EXPORT_SYMBOL(__ashldi3); EXPORT_SYMBOL(__ashrdi3); @@ -45,6 +46,7 @@ EXPORT_SYMBOL(__muldf3); EXPORT_SYMBOL(__divdf3); EXPORT_SYMBOL(__floatunsidf); EXPORT_SYMBOL(__floatunsisf); +EXPORT_SYMBOL(__udivdi3); /* ARC optimised assembler routines */ EXPORT_SYMBOL(memset); From ae141830b118c3fb5b7eab6fa7c8ab7b7224b0a4 Mon Sep 17 00:00:00 2001 From: Helge Deller <deller@gmx.de> Date: Fri, 19 Aug 2016 22:39:02 +0200 Subject: [PATCH 379/513] parisc: Fix automatic selection of cr16 clocksource Commit 54b66800907 (parisc: Add native high-resolution sched_clock() implementation) added support to use the CPU-internal cr16 counters as reliable clocksource with the help of HAVE_UNSTABLE_SCHED_CLOCK. Sadly the commit missed to remove the hack which prevented cr16 to become the default clocksource even on SMP systems. Signed-off-by: Helge Deller <deller@gmx.de> Cc: stable@vger.kernel.org # 4.7+ --- arch/parisc/kernel/processor.c | 8 -------- arch/parisc/kernel/time.c | 12 ------------ 2 files changed, 20 deletions(-) diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index 5adc339eb7c8d..0c2a94a0f7518 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -51,8 +51,6 @@ EXPORT_SYMBOL(_parisc_requires_coherency); DEFINE_PER_CPU(struct cpuinfo_parisc, cpu_data); -extern int update_cr16_clocksource(void); /* from time.c */ - /* ** PARISC CPU driver - claim "device" and initialize CPU data structures. ** @@ -228,12 +226,6 @@ static int processor_probe(struct parisc_device *dev) } #endif - /* If we've registered more than one cpu, - * we'll use the jiffies clocksource since cr16 - * is not synchronized between CPUs. - */ - update_cr16_clocksource(); - return 0; } diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 505cf1ac5af24..4b0b963d52a75 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -221,18 +221,6 @@ static struct clocksource clocksource_cr16 = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -int update_cr16_clocksource(void) -{ - /* since the cr16 cycle counters are not synchronized across CPUs, - we'll check if we should switch to a safe clocksource: */ - if (clocksource_cr16.rating != 0 && num_online_cpus() > 1) { - clocksource_change_rating(&clocksource_cr16, 0); - return 1; - } - - return 0; -} - void __init start_cpu_itimer(void) { unsigned int cpu = smp_processor_id(); From 3eb53b20d7bd1374598cfb1feaa081fcac0e76cd Mon Sep 17 00:00:00 2001 From: Helge Deller <deller@gmx.de> Date: Sat, 20 Aug 2016 11:51:38 +0200 Subject: [PATCH 380/513] parisc: Fix order of EREFUSED define in errno.h When building gccgo in userspace, errno.h gets parsed and the go include file sysinfo.go is generated. Since EREFUSED is defined to the same value as ECONNREFUSED, and ECONNREFUSED is defined later on in errno.h, this leads to go complaining that EREFUSED isn't defined yet. Fix this trivial problem by moving the define of EREFUSED down after ECONNREFUSED in errno.h (and clean up the indenting while touching this line). Signed-off-by: Helge Deller <deller@gmx.de> Cc: stable@vger.kernel.org --- arch/parisc/include/uapi/asm/errno.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/parisc/include/uapi/asm/errno.h b/arch/parisc/include/uapi/asm/errno.h index c0ae62520d157..274d5bc6ecce4 100644 --- a/arch/parisc/include/uapi/asm/errno.h +++ b/arch/parisc/include/uapi/asm/errno.h @@ -97,10 +97,10 @@ #define ENOTCONN 235 /* Transport endpoint is not connected */ #define ESHUTDOWN 236 /* Cannot send after transport endpoint shutdown */ #define ETOOMANYREFS 237 /* Too many references: cannot splice */ -#define EREFUSED ECONNREFUSED /* for HP's NFS apparently */ #define ETIMEDOUT 238 /* Connection timed out */ #define ECONNREFUSED 239 /* Connection refused */ -#define EREMOTERELEASE 240 /* Remote peer released connection */ +#define EREFUSED ECONNREFUSED /* for HP's NFS apparently */ +#define EREMOTERELEASE 240 /* Remote peer released connection */ #define EHOSTDOWN 241 /* Host is down */ #define EHOSTUNREACH 242 /* No route to host */ From 4ec656bdf43a13a655a8259b79dd63bc1f0b1e41 Mon Sep 17 00:00:00 2001 From: Tony Luck <tony.luck@intel.com> Date: Sat, 20 Aug 2016 16:27:58 -0700 Subject: [PATCH 381/513] EDAC, skx_edac: Add EDAC driver for Skylake This is an entirely new driver instead of yet another set of patches to sb_edac.c because: 1) Mapping from PCI devices to socket/memory controller is significantly different. Skylake scatters devices on a socket across a number of PCI buses. 2) There is an extra level of interleaving via the "mcroute" register that would be a little messy to squeeze into the old driver. 3) Validation is getting too expensive. Changes to sb_edac need to be checked against Sandy Bridge, Ivy Bridge, Haswell, Broadwell and Knights Landing. Acked-by: Aristeu Rozanski <aris@redhat.com> Acked-by: Borislav Petkov <bp@suse.de> Signed-off-by: Tony Luck <tony.luck@intel.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- MAINTAINERS | 6 + drivers/edac/Kconfig | 8 + drivers/edac/Makefile | 1 + drivers/edac/skx_edac.c | 1121 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 1136 insertions(+) create mode 100644 drivers/edac/skx_edac.c diff --git a/MAINTAINERS b/MAINTAINERS index a306795a74506..0bbe4b105c346 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4525,6 +4525,12 @@ L: linux-edac@vger.kernel.org S: Maintained F: drivers/edac/sb_edac.c +EDAC-SKYLAKE +M: Tony Luck <tony.luck@intel.com> +L: linux-edac@vger.kernel.org +S: Maintained +F: drivers/edac/skx_edac.c + EDAC-XGENE APPLIED MICRO (APM) X-GENE SOC EDAC M: Loc Ho <lho@apm.com> diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index d0c1dab9b4354..dff1a4a6dc1b5 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -251,6 +251,14 @@ config EDAC_SBRIDGE Support for error detection and correction the Intel Sandy Bridge, Ivy Bridge and Haswell Integrated Memory Controllers. +config EDAC_SKX + tristate "Intel Skylake server Integrated MC" + depends on EDAC_MM_EDAC && PCI && X86_64 && X86_MCE_INTEL + depends on PCI_MMCONFIG + help + Support for error detection and correction the Intel + Skylake server Integrated Memory Controllers. + config EDAC_MPC85XX tristate "Freescale MPC83xx / MPC85xx" depends on EDAC_MM_EDAC && FSL_SOC diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index f9e4a3e0e6e91..986049925b085 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -31,6 +31,7 @@ obj-$(CONFIG_EDAC_I5400) += i5400_edac.o obj-$(CONFIG_EDAC_I7300) += i7300_edac.o obj-$(CONFIG_EDAC_I7CORE) += i7core_edac.o obj-$(CONFIG_EDAC_SBRIDGE) += sb_edac.o +obj-$(CONFIG_EDAC_SKX) += skx_edac.o obj-$(CONFIG_EDAC_E7XXX) += e7xxx_edac.o obj-$(CONFIG_EDAC_E752X) += e752x_edac.o obj-$(CONFIG_EDAC_I82443BXGX) += i82443bxgx_edac.o diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c new file mode 100644 index 0000000000000..0ff4878c2aa18 --- /dev/null +++ b/drivers/edac/skx_edac.c @@ -0,0 +1,1121 @@ +/* + * EDAC driver for Intel(R) Xeon(R) Skylake processors + * Copyright (c) 2016, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/pci.h> +#include <linux/pci_ids.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/edac.h> +#include <linux/mmzone.h> +#include <linux/smp.h> +#include <linux/bitmap.h> +#include <linux/math64.h> +#include <linux/mod_devicetable.h> +#include <asm/cpu_device_id.h> +#include <asm/processor.h> +#include <asm/mce.h> + +#include "edac_core.h" + +#define SKX_REVISION " Ver: 1.0 " + +/* + * Debug macros + */ +#define skx_printk(level, fmt, arg...) \ + edac_printk(level, "skx", fmt, ##arg) + +#define skx_mc_printk(mci, level, fmt, arg...) \ + edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg) + +/* + * Get a bit field at register value <v>, from bit <lo> to bit <hi> + */ +#define GET_BITFIELD(v, lo, hi) \ + (((v) & GENMASK_ULL((hi), (lo))) >> (lo)) + +static LIST_HEAD(skx_edac_list); + +static u64 skx_tolm, skx_tohm; + +#define NUM_IMC 2 /* memory controllers per socket */ +#define NUM_CHANNELS 3 /* channels per memory controller */ +#define NUM_DIMMS 2 /* Max DIMMS per channel */ + +#define MASK26 0x3FFFFFF /* Mask for 2^26 */ +#define MASK29 0x1FFFFFFF /* Mask for 2^29 */ + +/* + * Each cpu socket contains some pci devices that provide global + * information, and also some that are local to each of the two + * memory controllers on the die. + */ +struct skx_dev { + struct list_head list; + u8 bus[4]; + struct pci_dev *sad_all; + struct pci_dev *util_all; + u32 mcroute; + struct skx_imc { + struct mem_ctl_info *mci; + u8 mc; /* system wide mc# */ + u8 lmc; /* socket relative mc# */ + u8 src_id, node_id; + struct skx_channel { + struct pci_dev *cdev; + struct skx_dimm { + u8 close_pg; + u8 bank_xor_enable; + u8 fine_grain_bank; + u8 rowbits; + u8 colbits; + } dimms[NUM_DIMMS]; + } chan[NUM_CHANNELS]; + } imc[NUM_IMC]; +}; +static int skx_num_sockets; + +struct skx_pvt { + struct skx_imc *imc; +}; + +struct decoded_addr { + struct skx_dev *dev; + u64 addr; + int socket; + int imc; + int channel; + u64 chan_addr; + int sktways; + int chanways; + int dimm; + int rank; + int channel_rank; + u64 rank_address; + int row; + int column; + int bank_address; + int bank_group; +}; + +static struct skx_dev *get_skx_dev(u8 bus, u8 idx) +{ + struct skx_dev *d; + + list_for_each_entry(d, &skx_edac_list, list) { + if (d->bus[idx] == bus) + return d; + } + + return NULL; +} + +enum munittype { + CHAN0, CHAN1, CHAN2, SAD_ALL, UTIL_ALL, SAD +}; + +struct munit { + u16 did; + u16 devfn[NUM_IMC]; + u8 busidx; + u8 per_socket; + enum munittype mtype; +}; + +/* + * List of PCI device ids that we need together with some device + * number and function numbers to tell which memory controller the + * device belongs to. + */ +static const struct munit skx_all_munits[] = { + { 0x2054, { }, 1, 1, SAD_ALL }, + { 0x2055, { }, 1, 1, UTIL_ALL }, + { 0x2040, { PCI_DEVFN(10, 0), PCI_DEVFN(12, 0) }, 2, 2, CHAN0 }, + { 0x2044, { PCI_DEVFN(10, 4), PCI_DEVFN(12, 4) }, 2, 2, CHAN1 }, + { 0x2048, { PCI_DEVFN(11, 0), PCI_DEVFN(13, 0) }, 2, 2, CHAN2 }, + { 0x208e, { }, 1, 0, SAD }, + { } +}; + +/* + * We use the per-socket device 0x2016 to count how many sockets are present, + * and to detemine which PCI buses are associated with each socket. Allocate + * and build the full list of all the skx_dev structures that we need here. + */ +static int get_all_bus_mappings(void) +{ + struct pci_dev *pdev, *prev; + struct skx_dev *d; + u32 reg; + int ndev = 0; + + prev = NULL; + for (;;) { + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2016, prev); + if (!pdev) + break; + ndev++; + d = kzalloc(sizeof(*d), GFP_KERNEL); + if (!d) { + pci_dev_put(pdev); + return -ENOMEM; + } + pci_read_config_dword(pdev, 0xCC, ®); + d->bus[0] = GET_BITFIELD(reg, 0, 7); + d->bus[1] = GET_BITFIELD(reg, 8, 15); + d->bus[2] = GET_BITFIELD(reg, 16, 23); + d->bus[3] = GET_BITFIELD(reg, 24, 31); + edac_dbg(2, "busses: %x, %x, %x, %x\n", + d->bus[0], d->bus[1], d->bus[2], d->bus[3]); + list_add_tail(&d->list, &skx_edac_list); + skx_num_sockets++; + prev = pdev; + } + + return ndev; +} + +static int get_all_munits(const struct munit *m) +{ + struct pci_dev *pdev, *prev; + struct skx_dev *d; + u32 reg; + int i = 0, ndev = 0; + + prev = NULL; + for (;;) { + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, m->did, prev); + if (!pdev) + break; + ndev++; + if (m->per_socket == NUM_IMC) { + for (i = 0; i < NUM_IMC; i++) + if (m->devfn[i] == pdev->devfn) + break; + if (i == NUM_IMC) + goto fail; + } + d = get_skx_dev(pdev->bus->number, m->busidx); + if (!d) + goto fail; + + /* Be sure that the device is enabled */ + if (unlikely(pci_enable_device(pdev) < 0)) { + skx_printk(KERN_ERR, + "Couldn't enable %04x:%04x\n", PCI_VENDOR_ID_INTEL, m->did); + goto fail; + } + + switch (m->mtype) { + case CHAN0: case CHAN1: case CHAN2: + pci_dev_get(pdev); + d->imc[i].chan[m->mtype].cdev = pdev; + break; + case SAD_ALL: + pci_dev_get(pdev); + d->sad_all = pdev; + break; + case UTIL_ALL: + pci_dev_get(pdev); + d->util_all = pdev; + break; + case SAD: + /* + * one of these devices per core, including cores + * that don't exist on this SKU. Ignore any that + * read a route table of zero, make sure all the + * non-zero values match. + */ + pci_read_config_dword(pdev, 0xB4, ®); + if (reg != 0) { + if (d->mcroute == 0) + d->mcroute = reg; + else if (d->mcroute != reg) { + skx_printk(KERN_ERR, + "mcroute mismatch\n"); + goto fail; + } + } + ndev--; + break; + } + + prev = pdev; + } + + return ndev; +fail: + pci_dev_put(pdev); + return -ENODEV; +} + +const struct x86_cpu_id skx_cpuids[] = { + { X86_VENDOR_INTEL, 6, 0x55, 0, 0 }, /* Skylake */ + { } +}; +MODULE_DEVICE_TABLE(x86cpu, skx_cpuids); + +static u8 get_src_id(struct skx_dev *d) +{ + u32 reg; + + pci_read_config_dword(d->util_all, 0xF0, ®); + + return GET_BITFIELD(reg, 12, 14); +} + +static u8 skx_get_node_id(struct skx_dev *d) +{ + u32 reg; + + pci_read_config_dword(d->util_all, 0xF4, ®); + + return GET_BITFIELD(reg, 0, 2); +} + +static int get_dimm_attr(u32 reg, int lobit, int hibit, int add, int minval, + int maxval, char *name) +{ + u32 val = GET_BITFIELD(reg, lobit, hibit); + + if (val < minval || val > maxval) { + edac_dbg(2, "bad %s = %d (raw=%x)\n", name, val, reg); + return -EINVAL; + } + return val + add; +} + +#define IS_DIMM_PRESENT(mtr) GET_BITFIELD((mtr), 15, 15) + +#define numrank(reg) get_dimm_attr((reg), 12, 13, 0, 1, 2, "ranks") +#define numrow(reg) get_dimm_attr((reg), 2, 4, 12, 1, 6, "rows") +#define numcol(reg) get_dimm_attr((reg), 0, 1, 10, 0, 2, "cols") + +static int get_width(u32 mtr) +{ + switch (GET_BITFIELD(mtr, 8, 9)) { + case 0: + return DEV_X4; + case 1: + return DEV_X8; + case 2: + return DEV_X16; + } + return DEV_UNKNOWN; +} + +static int skx_get_hi_lo(void) +{ + struct pci_dev *pdev; + u32 reg; + + pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x2034, NULL); + if (!pdev) { + edac_dbg(0, "Can't get tolm/tohm\n"); + return -ENODEV; + } + + pci_read_config_dword(pdev, 0xD0, ®); + skx_tolm = reg; + pci_read_config_dword(pdev, 0xD4, ®); + skx_tohm = reg; + pci_read_config_dword(pdev, 0xD8, ®); + skx_tohm |= (u64)reg << 32; + + pci_dev_put(pdev); + edac_dbg(2, "tolm=%llx tohm=%llx\n", skx_tolm, skx_tohm); + + return 0; +} + +static int get_dimm_info(u32 mtr, u32 amap, struct dimm_info *dimm, + struct skx_imc *imc, int chan, int dimmno) +{ + int banks = 16, ranks, rows, cols, npages; + u64 size; + + if (!IS_DIMM_PRESENT(mtr)) + return 0; + ranks = numrank(mtr); + rows = numrow(mtr); + cols = numcol(mtr); + + /* + * Compute size in 8-byte (2^3) words, then shift to MiB (2^20) + */ + size = ((1ull << (rows + cols + ranks)) * banks) >> (20 - 3); + npages = MiB_TO_PAGES(size); + + edac_dbg(0, "mc#%d: channel %d, dimm %d, %lld Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n", + imc->mc, chan, dimmno, size, npages, + banks, ranks, rows, cols); + + imc->chan[chan].dimms[dimmno].close_pg = GET_BITFIELD(mtr, 0, 0); + imc->chan[chan].dimms[dimmno].bank_xor_enable = GET_BITFIELD(mtr, 9, 9); + imc->chan[chan].dimms[dimmno].fine_grain_bank = GET_BITFIELD(amap, 0, 0); + imc->chan[chan].dimms[dimmno].rowbits = rows; + imc->chan[chan].dimms[dimmno].colbits = cols; + + dimm->nr_pages = npages; + dimm->grain = 32; + dimm->dtype = get_width(mtr); + dimm->mtype = MEM_DDR4; + dimm->edac_mode = EDAC_SECDED; /* likely better than this */ + snprintf(dimm->label, sizeof(dimm->label), "CPU_SrcID#%u_MC#%u_Chan#%u_DIMM#%u", + imc->src_id, imc->lmc, chan, dimmno); + + return 1; +} + +#define SKX_GET_MTMTR(dev, reg) \ + pci_read_config_dword((dev), 0x87c, ®) + +static bool skx_check_ecc(struct pci_dev *pdev) +{ + u32 mtmtr; + + SKX_GET_MTMTR(pdev, mtmtr); + + return !!GET_BITFIELD(mtmtr, 2, 2); +} + +static int skx_get_dimm_config(struct mem_ctl_info *mci) +{ + struct skx_pvt *pvt = mci->pvt_info; + struct skx_imc *imc = pvt->imc; + struct dimm_info *dimm; + int i, j; + u32 mtr, amap; + int ndimms; + + for (i = 0; i < NUM_CHANNELS; i++) { + ndimms = 0; + pci_read_config_dword(imc->chan[i].cdev, 0x8C, &amap); + for (j = 0; j < NUM_DIMMS; j++) { + dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, + mci->n_layers, i, j, 0); + pci_read_config_dword(imc->chan[i].cdev, + 0x80 + 4*j, &mtr); + ndimms += get_dimm_info(mtr, amap, dimm, imc, i, j); + } + if (ndimms && !skx_check_ecc(imc->chan[0].cdev)) { + skx_printk(KERN_ERR, "ECC is disabled on imc %d\n", imc->mc); + return -ENODEV; + } + } + + return 0; +} + +static void skx_unregister_mci(struct skx_imc *imc) +{ + struct mem_ctl_info *mci = imc->mci; + + if (!mci) + return; + + edac_dbg(0, "MC%d: mci = %p\n", imc->mc, mci); + + /* Remove MC sysfs nodes */ + edac_mc_del_mc(mci->pdev); + + edac_dbg(1, "%s: free mci struct\n", mci->ctl_name); + kfree(mci->ctl_name); + edac_mc_free(mci); +} + +static int skx_register_mci(struct skx_imc *imc) +{ + struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; + struct pci_dev *pdev = imc->chan[0].cdev; + struct skx_pvt *pvt; + int rc; + + /* allocate a new MC control structure */ + layers[0].type = EDAC_MC_LAYER_CHANNEL; + layers[0].size = NUM_CHANNELS; + layers[0].is_virt_csrow = false; + layers[1].type = EDAC_MC_LAYER_SLOT; + layers[1].size = NUM_DIMMS; + layers[1].is_virt_csrow = true; + mci = edac_mc_alloc(imc->mc, ARRAY_SIZE(layers), layers, + sizeof(struct skx_pvt)); + + if (unlikely(!mci)) + return -ENOMEM; + + edac_dbg(0, "MC#%d: mci = %p\n", imc->mc, mci); + + /* Associate skx_dev and mci for future usage */ + imc->mci = mci; + pvt = mci->pvt_info; + pvt->imc = imc; + + mci->ctl_name = kasprintf(GFP_KERNEL, "Skylake Socket#%d IMC#%d", + imc->node_id, imc->lmc); + mci->mtype_cap = MEM_FLAG_DDR4; + mci->edac_ctl_cap = EDAC_FLAG_NONE; + mci->edac_cap = EDAC_FLAG_NONE; + mci->mod_name = "skx_edac.c"; + mci->dev_name = pci_name(imc->chan[0].cdev); + mci->mod_ver = SKX_REVISION; + mci->ctl_page_to_phys = NULL; + + rc = skx_get_dimm_config(mci); + if (rc < 0) + goto fail; + + /* record ptr to the generic device */ + mci->pdev = &pdev->dev; + + /* add this new MC control structure to EDAC's list of MCs */ + if (unlikely(edac_mc_add_mc(mci))) { + edac_dbg(0, "MC: failed edac_mc_add_mc()\n"); + rc = -EINVAL; + goto fail; + } + + return 0; + +fail: + kfree(mci->ctl_name); + edac_mc_free(mci); + imc->mci = NULL; + return rc; +} + +#define SKX_MAX_SAD 24 + +#define SKX_GET_SAD(d, i, reg) \ + pci_read_config_dword((d)->sad_all, 0x60 + 8 * (i), ®) +#define SKX_GET_ILV(d, i, reg) \ + pci_read_config_dword((d)->sad_all, 0x64 + 8 * (i), ®) + +#define SKX_SAD_MOD3MODE(sad) GET_BITFIELD((sad), 30, 31) +#define SKX_SAD_MOD3(sad) GET_BITFIELD((sad), 27, 27) +#define SKX_SAD_LIMIT(sad) (((u64)GET_BITFIELD((sad), 7, 26) << 26) | MASK26) +#define SKX_SAD_MOD3ASMOD2(sad) GET_BITFIELD((sad), 5, 6) +#define SKX_SAD_ATTR(sad) GET_BITFIELD((sad), 3, 4) +#define SKX_SAD_INTERLEAVE(sad) GET_BITFIELD((sad), 1, 2) +#define SKX_SAD_ENABLE(sad) GET_BITFIELD((sad), 0, 0) + +#define SKX_ILV_REMOTE(tgt) (((tgt) & 8) == 0) +#define SKX_ILV_TARGET(tgt) ((tgt) & 7) + +static bool skx_sad_decode(struct decoded_addr *res) +{ + struct skx_dev *d = list_first_entry(&skx_edac_list, typeof(*d), list); + u64 addr = res->addr; + int i, idx, tgt, lchan, shift; + u32 sad, ilv; + u64 limit, prev_limit; + int remote = 0; + + /* Simple sanity check for I/O space or out of range */ + if (addr >= skx_tohm || (addr >= skx_tolm && addr < BIT_ULL(32))) { + edac_dbg(0, "Address %llx out of range\n", addr); + return false; + } + +restart: + prev_limit = 0; + for (i = 0; i < SKX_MAX_SAD; i++) { + SKX_GET_SAD(d, i, sad); + limit = SKX_SAD_LIMIT(sad); + if (SKX_SAD_ENABLE(sad)) { + if (addr >= prev_limit && addr <= limit) + goto sad_found; + } + prev_limit = limit + 1; + } + edac_dbg(0, "No SAD entry for %llx\n", addr); + return false; + +sad_found: + SKX_GET_ILV(d, i, ilv); + + switch (SKX_SAD_INTERLEAVE(sad)) { + case 0: + idx = GET_BITFIELD(addr, 6, 8); + break; + case 1: + idx = GET_BITFIELD(addr, 8, 10); + break; + case 2: + idx = GET_BITFIELD(addr, 12, 14); + break; + case 3: + idx = GET_BITFIELD(addr, 30, 32); + break; + } + + tgt = GET_BITFIELD(ilv, 4 * idx, 4 * idx + 3); + + /* If point to another node, find it and start over */ + if (SKX_ILV_REMOTE(tgt)) { + if (remote) { + edac_dbg(0, "Double remote!\n"); + return false; + } + remote = 1; + list_for_each_entry(d, &skx_edac_list, list) { + if (d->imc[0].src_id == SKX_ILV_TARGET(tgt)) + goto restart; + } + edac_dbg(0, "Can't find node %d\n", SKX_ILV_TARGET(tgt)); + return false; + } + + if (SKX_SAD_MOD3(sad) == 0) + lchan = SKX_ILV_TARGET(tgt); + else { + switch (SKX_SAD_MOD3MODE(sad)) { + case 0: + shift = 6; + break; + case 1: + shift = 8; + break; + case 2: + shift = 12; + break; + default: + edac_dbg(0, "illegal mod3mode\n"); + return false; + } + switch (SKX_SAD_MOD3ASMOD2(sad)) { + case 0: + lchan = (addr >> shift) % 3; + break; + case 1: + lchan = (addr >> shift) % 2; + break; + case 2: + lchan = (addr >> shift) % 2; + lchan = (lchan << 1) | ~lchan; + break; + case 3: + lchan = ((addr >> shift) % 2) << 1; + break; + } + lchan = (lchan << 1) | (SKX_ILV_TARGET(tgt) & 1); + } + + res->dev = d; + res->socket = d->imc[0].src_id; + res->imc = GET_BITFIELD(d->mcroute, lchan * 3, lchan * 3 + 2); + res->channel = GET_BITFIELD(d->mcroute, lchan * 2 + 18, lchan * 2 + 19); + + edac_dbg(2, "%llx: socket=%d imc=%d channel=%d\n", + res->addr, res->socket, res->imc, res->channel); + return true; +} + +#define SKX_MAX_TAD 8 + +#define SKX_GET_TADBASE(d, mc, i, reg) \ + pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x850 + 4 * (i), ®) +#define SKX_GET_TADWAYNESS(d, mc, i, reg) \ + pci_read_config_dword((d)->imc[mc].chan[0].cdev, 0x880 + 4 * (i), ®) +#define SKX_GET_TADCHNILVOFFSET(d, mc, ch, i, reg) \ + pci_read_config_dword((d)->imc[mc].chan[ch].cdev, 0x90 + 4 * (i), ®) + +#define SKX_TAD_BASE(b) ((u64)GET_BITFIELD((b), 12, 31) << 26) +#define SKX_TAD_SKT_GRAN(b) GET_BITFIELD((b), 4, 5) +#define SKX_TAD_CHN_GRAN(b) GET_BITFIELD((b), 6, 7) +#define SKX_TAD_LIMIT(b) (((u64)GET_BITFIELD((b), 12, 31) << 26) | MASK26) +#define SKX_TAD_OFFSET(b) ((u64)GET_BITFIELD((b), 4, 23) << 26) +#define SKX_TAD_SKTWAYS(b) (1 << GET_BITFIELD((b), 10, 11)) +#define SKX_TAD_CHNWAYS(b) (GET_BITFIELD((b), 8, 9) + 1) + +/* which bit used for both socket and channel interleave */ +static int skx_granularity[] = { 6, 8, 12, 30 }; + +static u64 skx_do_interleave(u64 addr, int shift, int ways, u64 lowbits) +{ + addr >>= shift; + addr /= ways; + addr <<= shift; + + return addr | (lowbits & ((1ull << shift) - 1)); +} + +static bool skx_tad_decode(struct decoded_addr *res) +{ + int i; + u32 base, wayness, chnilvoffset; + int skt_interleave_bit, chn_interleave_bit; + u64 channel_addr; + + for (i = 0; i < SKX_MAX_TAD; i++) { + SKX_GET_TADBASE(res->dev, res->imc, i, base); + SKX_GET_TADWAYNESS(res->dev, res->imc, i, wayness); + if (SKX_TAD_BASE(base) <= res->addr && res->addr <= SKX_TAD_LIMIT(wayness)) + goto tad_found; + } + edac_dbg(0, "No TAD entry for %llx\n", res->addr); + return false; + +tad_found: + res->sktways = SKX_TAD_SKTWAYS(wayness); + res->chanways = SKX_TAD_CHNWAYS(wayness); + skt_interleave_bit = skx_granularity[SKX_TAD_SKT_GRAN(base)]; + chn_interleave_bit = skx_granularity[SKX_TAD_CHN_GRAN(base)]; + + SKX_GET_TADCHNILVOFFSET(res->dev, res->imc, res->channel, i, chnilvoffset); + channel_addr = res->addr - SKX_TAD_OFFSET(chnilvoffset); + + if (res->chanways == 3 && skt_interleave_bit > chn_interleave_bit) { + /* Must handle channel first, then socket */ + channel_addr = skx_do_interleave(channel_addr, chn_interleave_bit, + res->chanways, channel_addr); + channel_addr = skx_do_interleave(channel_addr, skt_interleave_bit, + res->sktways, channel_addr); + } else { + /* Handle socket then channel. Preserve low bits from original address */ + channel_addr = skx_do_interleave(channel_addr, skt_interleave_bit, + res->sktways, res->addr); + channel_addr = skx_do_interleave(channel_addr, chn_interleave_bit, + res->chanways, res->addr); + } + + res->chan_addr = channel_addr; + + edac_dbg(2, "%llx: chan_addr=%llx sktways=%d chanways=%d\n", + res->addr, res->chan_addr, res->sktways, res->chanways); + return true; +} + +#define SKX_MAX_RIR 4 + +#define SKX_GET_RIRWAYNESS(d, mc, ch, i, reg) \ + pci_read_config_dword((d)->imc[mc].chan[ch].cdev, \ + 0x108 + 4 * (i), ®) +#define SKX_GET_RIRILV(d, mc, ch, idx, i, reg) \ + pci_read_config_dword((d)->imc[mc].chan[ch].cdev, \ + 0x120 + 16 * idx + 4 * (i), ®) + +#define SKX_RIR_VALID(b) GET_BITFIELD((b), 31, 31) +#define SKX_RIR_LIMIT(b) (((u64)GET_BITFIELD((b), 1, 11) << 29) | MASK29) +#define SKX_RIR_WAYS(b) (1 << GET_BITFIELD((b), 28, 29)) +#define SKX_RIR_CHAN_RANK(b) GET_BITFIELD((b), 16, 19) +#define SKX_RIR_OFFSET(b) ((u64)(GET_BITFIELD((b), 2, 15) << 26)) + +static bool skx_rir_decode(struct decoded_addr *res) +{ + int i, idx, chan_rank; + int shift; + u32 rirway, rirlv; + u64 rank_addr, prev_limit = 0, limit; + + if (res->dev->imc[res->imc].chan[res->channel].dimms[0].close_pg) + shift = 6; + else + shift = 13; + + for (i = 0; i < SKX_MAX_RIR; i++) { + SKX_GET_RIRWAYNESS(res->dev, res->imc, res->channel, i, rirway); + limit = SKX_RIR_LIMIT(rirway); + if (SKX_RIR_VALID(rirway)) { + if (prev_limit <= res->chan_addr && + res->chan_addr <= limit) + goto rir_found; + } + prev_limit = limit; + } + edac_dbg(0, "No RIR entry for %llx\n", res->addr); + return false; + +rir_found: + rank_addr = res->chan_addr >> shift; + rank_addr /= SKX_RIR_WAYS(rirway); + rank_addr <<= shift; + rank_addr |= res->chan_addr & GENMASK_ULL(shift - 1, 0); + + res->rank_address = rank_addr; + idx = (res->chan_addr >> shift) % SKX_RIR_WAYS(rirway); + + SKX_GET_RIRILV(res->dev, res->imc, res->channel, idx, i, rirlv); + res->rank_address = rank_addr - SKX_RIR_OFFSET(rirlv); + chan_rank = SKX_RIR_CHAN_RANK(rirlv); + res->channel_rank = chan_rank; + res->dimm = chan_rank / 4; + res->rank = chan_rank % 4; + + edac_dbg(2, "%llx: dimm=%d rank=%d chan_rank=%d rank_addr=%llx\n", + res->addr, res->dimm, res->rank, + res->channel_rank, res->rank_address); + return true; +} + +static u8 skx_close_row[] = { + 15, 16, 17, 18, 20, 21, 22, 28, 10, 11, 12, 13, 29, 30, 31, 32, 33 +}; +static u8 skx_close_column[] = { + 3, 4, 5, 14, 19, 23, 24, 25, 26, 27 +}; +static u8 skx_open_row[] = { + 14, 15, 16, 20, 28, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33 +}; +static u8 skx_open_column[] = { + 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 +}; +static u8 skx_open_fine_column[] = { + 3, 4, 5, 7, 8, 9, 10, 11, 12, 13 +}; + +static int skx_bits(u64 addr, int nbits, u8 *bits) +{ + int i, res = 0; + + for (i = 0; i < nbits; i++) + res |= ((addr >> bits[i]) & 1) << i; + return res; +} + +static int skx_bank_bits(u64 addr, int b0, int b1, int do_xor, int x0, int x1) +{ + int ret = GET_BITFIELD(addr, b0, b0) | (GET_BITFIELD(addr, b1, b1) << 1); + + if (do_xor) + ret ^= GET_BITFIELD(addr, x0, x0) | (GET_BITFIELD(addr, x1, x1) << 1); + + return ret; +} + +static bool skx_mad_decode(struct decoded_addr *r) +{ + struct skx_dimm *dimm = &r->dev->imc[r->imc].chan[r->channel].dimms[r->dimm]; + int bg0 = dimm->fine_grain_bank ? 6 : 13; + + if (dimm->close_pg) { + r->row = skx_bits(r->rank_address, dimm->rowbits, skx_close_row); + r->column = skx_bits(r->rank_address, dimm->colbits, skx_close_column); + r->column |= 0x400; /* C10 is autoprecharge, always set */ + r->bank_address = skx_bank_bits(r->rank_address, 8, 9, dimm->bank_xor_enable, 22, 28); + r->bank_group = skx_bank_bits(r->rank_address, 6, 7, dimm->bank_xor_enable, 20, 21); + } else { + r->row = skx_bits(r->rank_address, dimm->rowbits, skx_open_row); + if (dimm->fine_grain_bank) + r->column = skx_bits(r->rank_address, dimm->colbits, skx_open_fine_column); + else + r->column = skx_bits(r->rank_address, dimm->colbits, skx_open_column); + r->bank_address = skx_bank_bits(r->rank_address, 18, 19, dimm->bank_xor_enable, 22, 23); + r->bank_group = skx_bank_bits(r->rank_address, bg0, 17, dimm->bank_xor_enable, 20, 21); + } + r->row &= (1u << dimm->rowbits) - 1; + + edac_dbg(2, "%llx: row=%x col=%x bank_addr=%d bank_group=%d\n", + r->addr, r->row, r->column, r->bank_address, + r->bank_group); + return true; +} + +static bool skx_decode(struct decoded_addr *res) +{ + + return skx_sad_decode(res) && skx_tad_decode(res) && + skx_rir_decode(res) && skx_mad_decode(res); +} + +#ifdef CONFIG_EDAC_DEBUG +/* + * Debug feature. Make /sys/kernel/debug/skx_edac_test/addr. + * Write an address to this file to exercise the address decode + * logic in this driver. + */ +static struct dentry *skx_test; +static u64 skx_fake_addr; + +static int debugfs_u64_set(void *data, u64 val) +{ + struct decoded_addr res; + + res.addr = val; + skx_decode(&res); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n"); + +static struct dentry *mydebugfs_create(const char *name, umode_t mode, + struct dentry *parent, u64 *value) +{ + return debugfs_create_file(name, mode, parent, value, &fops_u64_wo); +} + +static void setup_skx_debug(void) +{ + skx_test = debugfs_create_dir("skx_edac_test", NULL); + mydebugfs_create("addr", S_IWUSR, skx_test, &skx_fake_addr); +} + +static void teardown_skx_debug(void) +{ + debugfs_remove_recursive(skx_test); +} +#else +static void setup_skx_debug(void) +{ +} + +static void teardown_skx_debug(void) +{ +} +#endif /*CONFIG_EDAC_DEBUG*/ + +static void skx_mce_output_error(struct mem_ctl_info *mci, + const struct mce *m, + struct decoded_addr *res) +{ + enum hw_event_mc_err_type tp_event; + char *type, *optype, msg[256]; + bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); + bool overflow = GET_BITFIELD(m->status, 62, 62); + bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); + bool recoverable; + u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52); + u32 mscod = GET_BITFIELD(m->status, 16, 31); + u32 errcode = GET_BITFIELD(m->status, 0, 15); + u32 optypenum = GET_BITFIELD(m->status, 4, 6); + + recoverable = GET_BITFIELD(m->status, 56, 56); + + if (uncorrected_error) { + if (ripv) { + type = "FATAL"; + tp_event = HW_EVENT_ERR_FATAL; + } else { + type = "NON_FATAL"; + tp_event = HW_EVENT_ERR_UNCORRECTED; + } + } else { + type = "CORRECTED"; + tp_event = HW_EVENT_ERR_CORRECTED; + } + + /* + * According with Table 15-9 of the Intel Architecture spec vol 3A, + * memory errors should fit in this mask: + * 000f 0000 1mmm cccc (binary) + * where: + * f = Correction Report Filtering Bit. If 1, subsequent errors + * won't be shown + * mmm = error type + * cccc = channel + * If the mask doesn't match, report an error to the parsing logic + */ + if (!((errcode & 0xef80) == 0x80)) { + optype = "Can't parse: it is not a mem"; + } else { + switch (optypenum) { + case 0: + optype = "generic undef request error"; + break; + case 1: + optype = "memory read error"; + break; + case 2: + optype = "memory write error"; + break; + case 3: + optype = "addr/cmd error"; + break; + case 4: + optype = "memory scrubbing error"; + break; + default: + optype = "reserved"; + break; + } + } + + snprintf(msg, sizeof(msg), + "%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x", + overflow ? " OVERFLOW" : "", + (uncorrected_error && recoverable) ? " recoverable" : "", + mscod, errcode, + res->socket, res->imc, res->rank, + res->bank_group, res->bank_address, res->row, res->column); + + edac_dbg(0, "%s\n", msg); + + /* Call the helper to output message */ + edac_mc_handle_error(tp_event, mci, core_err_cnt, + m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, + res->channel, res->dimm, -1, + optype, msg); +} + +static int skx_mce_check_error(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct mce *mce = (struct mce *)data; + struct decoded_addr res; + struct mem_ctl_info *mci; + char *type; + + if (get_edac_report_status() == EDAC_REPORTING_DISABLED) + return NOTIFY_DONE; + + /* ignore unless this is memory related with an address */ + if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV)) + return NOTIFY_DONE; + + res.addr = mce->addr; + if (!skx_decode(&res)) + return NOTIFY_DONE; + mci = res.dev->imc[res.imc].mci; + + if (mce->mcgstatus & MCG_STATUS_MCIP) + type = "Exception"; + else + type = "Event"; + + skx_mc_printk(mci, KERN_DEBUG, "HANDLING MCE MEMORY ERROR\n"); + + skx_mc_printk(mci, KERN_DEBUG, "CPU %d: Machine Check %s: %Lx " + "Bank %d: %016Lx\n", mce->extcpu, type, + mce->mcgstatus, mce->bank, mce->status); + skx_mc_printk(mci, KERN_DEBUG, "TSC %llx ", mce->tsc); + skx_mc_printk(mci, KERN_DEBUG, "ADDR %llx ", mce->addr); + skx_mc_printk(mci, KERN_DEBUG, "MISC %llx ", mce->misc); + + skx_mc_printk(mci, KERN_DEBUG, "PROCESSOR %u:%x TIME %llu SOCKET " + "%u APIC %x\n", mce->cpuvendor, mce->cpuid, + mce->time, mce->socketid, mce->apicid); + + skx_mce_output_error(mci, mce, &res); + + return NOTIFY_DONE; +} + +static struct notifier_block skx_mce_dec = { + .notifier_call = skx_mce_check_error, +}; + +static void skx_remove(void) +{ + int i, j; + struct skx_dev *d, *tmp; + + edac_dbg(0, "\n"); + + list_for_each_entry_safe(d, tmp, &skx_edac_list, list) { + list_del(&d->list); + for (i = 0; i < NUM_IMC; i++) { + skx_unregister_mci(&d->imc[i]); + for (j = 0; j < NUM_CHANNELS; j++) + pci_dev_put(d->imc[i].chan[j].cdev); + } + pci_dev_put(d->util_all); + pci_dev_put(d->sad_all); + + kfree(d); + } +} + +/* + * skx_init: + * make sure we are running on the correct cpu model + * search for all the devices we need + * check which DIMMs are present. + */ +int __init skx_init(void) +{ + const struct x86_cpu_id *id; + const struct munit *m; + int rc = 0, i; + u8 mc = 0, src_id, node_id; + struct skx_dev *d; + + edac_dbg(2, "\n"); + + id = x86_match_cpu(skx_cpuids); + if (!id) + return -ENODEV; + + rc = skx_get_hi_lo(); + if (rc) + return rc; + + rc = get_all_bus_mappings(); + if (rc < 0) + goto fail; + if (rc == 0) { + edac_dbg(2, "No memory controllers found\n"); + return -ENODEV; + } + + for (m = skx_all_munits; m->did; m++) { + rc = get_all_munits(m); + if (rc < 0) + goto fail; + if (rc != m->per_socket * skx_num_sockets) { + edac_dbg(2, "Expected %d, got %d of %x\n", + m->per_socket * skx_num_sockets, rc, m->did); + rc = -ENODEV; + goto fail; + } + } + + list_for_each_entry(d, &skx_edac_list, list) { + src_id = get_src_id(d); + node_id = skx_get_node_id(d); + edac_dbg(2, "src_id=%d node_id=%d\n", src_id, node_id); + for (i = 0; i < NUM_IMC; i++) { + d->imc[i].mc = mc++; + d->imc[i].lmc = i; + d->imc[i].src_id = src_id; + d->imc[i].node_id = node_id; + rc = skx_register_mci(&d->imc[i]); + if (rc < 0) + goto fail; + } + } + + /* Ensure that the OPSTATE is set correctly for POLL or NMI */ + opstate_init(); + + setup_skx_debug(); + + mce_register_decode_chain(&skx_mce_dec); + + return 0; +fail: + skx_remove(); + return rc; +} + +static void __exit skx_exit(void) +{ + edac_dbg(2, "\n"); + mce_unregister_decode_chain(&skx_mce_dec); + skx_remove(); + teardown_skx_debug(); +} + +module_init(skx_init); +module_exit(skx_exit); + +module_param(edac_op_state, int, 0444); +MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI"); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Tony Luck"); +MODULE_DESCRIPTION("MC Driver for Intel Skylake server processors"); From fa8410b355251fd30341662a40ac6b22d3e38468 Mon Sep 17 00:00:00 2001 From: Linus Torvalds <torvalds@linux-foundation.org> Date: Sun, 21 Aug 2016 16:14:10 -0700 Subject: [PATCH 382/513] Linux 4.8-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5c18baad72182..3537aa23905fa 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Psychotic Stoned Sheep # *DOCUMENTATION* From 6f00975c619064a18c23fd3aced325ae165a73b9 Mon Sep 17 00:00:00 2001 From: Daniel Vetter <daniel.vetter@ffwll.ch> Date: Sat, 20 Aug 2016 12:22:11 +0200 Subject: [PATCH 383/513] drm: Reject page_flip for !DRIVER_MODESET Somehow this one slipped through, which means drivers without modeset support can be oopsed (since those also don't call drm_mode_config_init, which means the crtc lookup will chase an uninitalized idr). Reported-by: Alexander Potapenko <glider@google.com> Cc: Alexander Potapenko <glider@google.com> Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter <daniel.vetter@intel.com> Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Dave Airlie <airlied@redhat.com> --- drivers/gpu/drm/drm_crtc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c index b1dbb60af99fa..ddebe54cd5ca6 100644 --- a/drivers/gpu/drm/drm_crtc.c +++ b/drivers/gpu/drm/drm_crtc.c @@ -5404,6 +5404,9 @@ int drm_mode_page_flip_ioctl(struct drm_device *dev, struct drm_pending_vblank_event *e = NULL; int ret = -EINVAL; + if (!drm_core_check_feature(dev, DRIVER_MODESET)) + return -EINVAL; + if (page_flip->flags & ~DRM_MODE_PAGE_FLIP_FLAGS || page_flip->reserved != 0) return -EINVAL; From 4396f46c8c628329bd35ee4b84140b8b001a11eb Mon Sep 17 00:00:00 2001 From: Shawn Lin <shawn.lin@rock-chips.com> Date: Mon, 22 Aug 2016 16:21:52 +0800 Subject: [PATCH 384/513] genirq: Fix potential memleak when failing to get irq pm Obviously we should free action here if irq_chip_pm_get failed. Fixes: be45beb2df69: "genirq: Add runtime power management support for IRQ chips" Signed-off-by: Shawn Lin <shawn.lin@rock-chips.com> Cc: Jon Hunter <jonathanh@nvidia.com> Cc: Marc Zyngier <marc.zyngier@arm.com> Link: http://lkml.kernel.org/r/1471854112-13006-1-git-send-email-shawn.lin@rock-chips.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- kernel/irq/manage.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 73a2b786b5e99..9530fcd277040 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1681,8 +1681,10 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler, action->dev_id = dev_id; retval = irq_chip_pm_get(&desc->irq_data); - if (retval < 0) + if (retval < 0) { + kfree(action); return retval; + } chip_bus_lock(desc); retval = __setup_irq(irq, desc, action); @@ -1985,8 +1987,10 @@ int request_percpu_irq(unsigned int irq, irq_handler_t handler, action->percpu_dev_id = dev_id; retval = irq_chip_pm_get(&desc->irq_data); - if (retval < 0) + if (retval < 0) { + kfree(action); return retval; + } chip_bus_lock(desc); retval = __setup_irq(irq, desc, action); From 3ee0ce2a54dff07d09440723594df89bc1a12e79 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig <hch@lst.de> Date: Thu, 11 Aug 2016 07:06:45 -0700 Subject: [PATCH 385/513] genirq/affinity: Use get/put_online_cpus around cpumask operations Without locking out CPU mask operations we might end up with an inconsistent view of the cpumask in the function. Fixes: 5e385a6ef31f: "genirq: Add a helper to spread an affinity mask for MSI/MSI-X vectors" Signed-off-by: Christoph Hellwig <hch@lst.de> Link: http://lkml.kernel.org/r/1470924405-25728-1-git-send-email-hch@lst.de Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- kernel/irq/affinity.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index f68959341c0fa..32f6cfcff2124 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -39,6 +39,7 @@ struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs) return NULL; } + get_online_cpus(); if (max_vecs >= num_online_cpus()) { cpumask_copy(affinity_mask, cpu_online_mask); *nr_vecs = num_online_cpus(); @@ -56,6 +57,7 @@ struct cpumask *irq_create_affinity_mask(unsigned int *nr_vecs) } *nr_vecs = vecs; } + put_online_cpus(); return affinity_mask; } From 7e4379eae0e31994ea645db1d13006ea8e5ce539 Mon Sep 17 00:00:00 2001 From: Andrej Krutak <dev@andree.sk> Date: Thu, 18 Aug 2016 23:52:10 +0200 Subject: [PATCH 386/513] ALSA: line6: Remove double line6_pcm_release() after failed acquire. If there's an error, pcm is released in line6_pcm_acquire already. Fixes: 247d95ee6dd2 ('ALSA: line6: Handle error from line6_pcm_acquire()') Cc: <stable@vger.kernel.org> # v4.0+ Reviewed-by: Stefan Hajnoczi <stefanha@gmail.com> Signed-off-by: Andrej Krutak <dev@andree.sk> Signed-off-by: Takashi Iwai <tiwai@suse.de> --- sound/usb/line6/pcm.c | 1 - 1 file changed, 1 deletion(-) diff --git a/sound/usb/line6/pcm.c b/sound/usb/line6/pcm.c index 204cc074adb96..2bc8879757f97 100644 --- a/sound/usb/line6/pcm.c +++ b/sound/usb/line6/pcm.c @@ -55,7 +55,6 @@ static int snd_line6_impulse_volume_put(struct snd_kcontrol *kcontrol, err = line6_pcm_acquire(line6pcm, LINE6_STREAM_IMPULSE); if (err < 0) { line6pcm->impulse_volume = 0; - line6_pcm_release(line6pcm, LINE6_STREAM_IMPULSE); return err; } } else { From adc8a43a6d6688272ebffa81789fa857e603dec6 Mon Sep 17 00:00:00 2001 From: Andrej Krutak <dev@andree.sk> Date: Thu, 18 Aug 2016 23:52:11 +0200 Subject: [PATCH 387/513] ALSA: line6: Give up on the lock while URBs are released. Done, because line6_stream_stop() locks and calls line6_unlink_audio_urbs(), which in turn invokes audio_out_callback(), which tries to lock 2nd time. Fixes: ============================================= [ INFO: possible recursive locking detected ] 4.4.15+ #15 Not tainted --------------------------------------------- mplayer/3591 is trying to acquire lock: (&(&line6pcm->out.lock)->rlock){-.-...}, at: [<bfa27655>] audio_out_callback+0x70/0x110 [snd_usb_line6] but task is already holding lock: (&(&line6pcm->out.lock)->rlock){-.-...}, at: [<bfa26aad>] line6_stream_stop+0x24/0x5c [snd_usb_line6] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&line6pcm->out.lock)->rlock); lock(&(&line6pcm->out.lock)->rlock); *** DEADLOCK *** May be due to missing lock nesting notation 3 locks held by mplayer/3591: #0: (snd_pcm_link_rwlock){.-.-..}, at: [<bf8d49a7>] snd_pcm_stream_lock+0x1e/0x40 [snd_pcm] #1: (&(&substream->self_group.lock)->rlock){-.-...}, at: [<bf8d49af>] snd_pcm_stream_lock+0x26/0x40 [snd_pcm] #2: (&(&line6pcm->out.lock)->rlock){-.-...}, at: [<bfa26aad>] line6_stream_stop+0x24/0x5c [snd_usb_line6] stack backtrace: CPU: 0 PID: 3591 Comm: mplayer Not tainted 4.4.15+ #15 Hardware name: Generic AM33XX (Flattened Device Tree) [<c0015d85>] (unwind_backtrace) from [<c001253d>] (show_stack+0x11/0x14) [<c001253d>] (show_stack) from [<c02f1bdf>] (dump_stack+0x8b/0xac) [<c02f1bdf>] (dump_stack) from [<c0076f43>] (__lock_acquire+0xc8b/0x1780) [<c0076f43>] (__lock_acquire) from [<c007810d>] (lock_acquire+0x99/0x1c0) [<c007810d>] (lock_acquire) from [<c06171e7>] (_raw_spin_lock_irqsave+0x3f/0x4c) [<c06171e7>] (_raw_spin_lock_irqsave) from [<bfa27655>] (audio_out_callback+0x70/0x110 [snd_usb_line6]) [<bfa27655>] (audio_out_callback [snd_usb_line6]) from [<c04294db>] (__usb_hcd_giveback_urb+0x53/0xd0) [<c04294db>] (__usb_hcd_giveback_urb) from [<c046388d>] (musb_giveback+0x3d/0x98) [<c046388d>] (musb_giveback) from [<c04647f5>] (musb_urb_dequeue+0x6d/0x114) [<c04647f5>] (musb_urb_dequeue) from [<c042ac11>] (usb_hcd_unlink_urb+0x39/0x98) [<c042ac11>] (usb_hcd_unlink_urb) from [<bfa26a87>] (line6_unlink_audio_urbs+0x6a/0x6c [snd_usb_line6]) [<bfa26a87>] (line6_unlink_audio_urbs [snd_usb_line6]) from [<bfa26acb>] (line6_stream_stop+0x42/0x5c [snd_usb_line6]) [<bfa26acb>] (line6_stream_stop [snd_usb_line6]) from [<bfa26fe7>] (snd_line6_trigger+0xb6/0xf4 [snd_usb_line6]) [<bfa26fe7>] (snd_line6_trigger [snd_usb_line6]) from [<bf8d47b7>] (snd_pcm_do_stop+0x36/0x38 [snd_pcm]) [<bf8d47b7>] (snd_pcm_do_stop [snd_pcm]) from [<bf8d462f>] (snd_pcm_action_single+0x22/0x40 [snd_pcm]) [<bf8d462f>] (snd_pcm_action_single [snd_pcm]) from [<bf8d46f9>] (snd_pcm_action+0xac/0xb0 [snd_pcm]) [<bf8d46f9>] (snd_pcm_action [snd_pcm]) from [<bf8d4b61>] (snd_pcm_drop+0x38/0x64 [snd_pcm]) [<bf8d4b61>] (snd_pcm_drop [snd_pcm]) from [<bf8d6233>] (snd_pcm_common_ioctl1+0x7fe/0xbe8 [snd_pcm]) [<bf8d6233>] (snd_pcm_common_ioctl1 [snd_pcm]) from [<bf8d6779>] (snd_pcm_playback_ioctl1+0x15c/0x51c [snd_pcm]) [<bf8d6779>] (snd_pcm_playback_ioctl1 [snd_pcm]) from [<bf8d6b59>] (snd_pcm_playback_ioctl+0x20/0x28 [snd_pcm]) [<bf8d6b59>] (snd_pcm_playback_ioctl [snd_pcm]) from [<c016714b>] (do_vfs_ioctl+0x3af/0x5c8) Fixes: 63e20df1e5b2 ('ALSA: line6: Reorganize PCM stream handling') Cc: <stable@vger.kernel.org> # v4.0+ Reviewed-by: Stefan Hajnoczi <stefanha@gmail.com> Signed-off-by: Andrej Krutak <dev@andree.sk> Signed-off-by: Takashi Iwai <tiwai@suse.de> --- sound/usb/line6/pcm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/usb/line6/pcm.c b/sound/usb/line6/pcm.c index 2bc8879757f97..41aa3355e9204 100644 --- a/sound/usb/line6/pcm.c +++ b/sound/usb/line6/pcm.c @@ -210,7 +210,9 @@ static void line6_stream_stop(struct snd_line6_pcm *line6pcm, int direction, spin_lock_irqsave(&pstr->lock, flags); clear_bit(type, &pstr->running); if (!pstr->running) { + spin_unlock_irqrestore(&pstr->lock, flags); line6_unlink_audio_urbs(line6pcm, pstr); + spin_lock_irqsave(&pstr->lock, flags); if (direction == SNDRV_PCM_STREAM_CAPTURE) { line6pcm->prev_fbuf = NULL; line6pcm->prev_fsize = 0; From b027d11263836a0cd335520175257dcb99b43757 Mon Sep 17 00:00:00 2001 From: Andrej Krutak <dev@andree.sk> Date: Thu, 18 Aug 2016 23:52:12 +0200 Subject: [PATCH 388/513] ALSA: line6: Fix POD sysfs attributes segfault The commit 02fc76f6a changed base of the sysfs attributes from device to card. The "show" callbacks dereferenced wrong objects because of this. Fixes: 02fc76f6a7db ('ALSA: line6: Create sysfs via snd_card_add_dev_attr()') Cc: <stable@vger.kernel.org> # v4.0+ Reviewed-by: Stefan Hajnoczi <stefanha@gmail.com> Signed-off-by: Andrej Krutak <dev@andree.sk> Signed-off-by: Takashi Iwai <tiwai@suse.de> --- sound/usb/line6/pod.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sound/usb/line6/pod.c b/sound/usb/line6/pod.c index daf81d169a420..45dd34874f43a 100644 --- a/sound/usb/line6/pod.c +++ b/sound/usb/line6/pod.c @@ -244,8 +244,8 @@ static int pod_set_system_param_int(struct usb_line6_pod *pod, int value, static ssize_t serial_number_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct usb_interface *interface = to_usb_interface(dev); - struct usb_line6_pod *pod = usb_get_intfdata(interface); + struct snd_card *card = dev_to_snd_card(dev); + struct usb_line6_pod *pod = card->private_data; return sprintf(buf, "%u\n", pod->serial_number); } @@ -256,8 +256,8 @@ static ssize_t serial_number_show(struct device *dev, static ssize_t firmware_version_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct usb_interface *interface = to_usb_interface(dev); - struct usb_line6_pod *pod = usb_get_intfdata(interface); + struct snd_card *card = dev_to_snd_card(dev); + struct usb_line6_pod *pod = card->private_data; return sprintf(buf, "%d.%02d\n", pod->firmware_version / 100, pod->firmware_version % 100); @@ -269,8 +269,8 @@ static ssize_t firmware_version_show(struct device *dev, static ssize_t device_id_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct usb_interface *interface = to_usb_interface(dev); - struct usb_line6_pod *pod = usb_get_intfdata(interface); + struct snd_card *card = dev_to_snd_card(dev); + struct usb_line6_pod *pod = card->private_data; return sprintf(buf, "%d\n", pod->device_id); } From f4750a46a0dee58f7a65b438b28a092669b609aa Mon Sep 17 00:00:00 2001 From: Matt Roper <matthew.d.roper@intel.com> Date: Fri, 17 Jun 2016 13:42:18 -0700 Subject: [PATCH 389/513] drm/i915/gen9: Initialize intel_state->active_crtcs during WM sanitization (v2) intel_state->active_crtcs is usually only initialized when doing a modeset. During our first atomic commit after boot, we're effectively faking a modeset to sanitize the DDB/wm setup, so ensure that this field gets initialized before use. v2: - Don't clobber active_crtcs if our first commit really is a modeset (Maarten) - Grab connection_mutex when faking a modeset during sanitization (Maarten) Reported-by: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Signed-off-by: Matt Roper <matthew.d.roper@intel.com> Tested-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1466196140-16336-2-git-send-email-matthew.d.roper@intel.com Cc: stable@vger.kernel.org #v4.7+ (cherry picked from commit 1b54a880b250acc226b13cea221b90aa1b3e37dd) Signed-off-by: Jani Nikula <jani.nikula@intel.com> --- drivers/gpu/drm/i915/intel_pm.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index d5deb58a2128d..e9763a89f3d7b 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3912,9 +3912,24 @@ skl_compute_ddb(struct drm_atomic_state *state) * pretend that all pipes switched active status so that we'll * ensure a full DDB recompute. */ - if (dev_priv->wm.distrust_bios_wm) + if (dev_priv->wm.distrust_bios_wm) { + ret = drm_modeset_lock(&dev->mode_config.connection_mutex, + state->acquire_ctx); + if (ret) + return ret; + intel_state->active_pipe_changes = ~0; + /* + * We usually only initialize intel_state->active_crtcs if we + * we're doing a modeset; make sure this field is always + * initialized during the sanitization process that happens + * on the first commit too. + */ + if (!intel_state->modeset) + intel_state->active_crtcs = dev_priv->active_crtcs; + } + /* * If the modeset changes which CRTC's are active, we need to * recompute the DDB allocation for *all* active pipes, even From c7aca235aa60d1432c95b752812d359d0dbece4f Mon Sep 17 00:00:00 2001 From: Matt Roper <matthew.d.roper@intel.com> Date: Fri, 17 Jun 2016 13:42:20 -0700 Subject: [PATCH 390/513] drm/i915/gen9: Drop invalid WARN() during data rate calculation It's possible to have a non-zero plane mask and still wind up with a total data rate of zero. There are two cases where this can happen: * planes are active (from the KMS point of view), but are all fully clipped (positioned offscreen) * the only active plane on a CRTC is the cursor (which is handled independently and not counted into the general data rate computations These are both valid display setups (although unusual), so we need to drop the WARN(). Signed-off-by: Matt Roper <matthew.d.roper@intel.com> Reviewed-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Testcase: kms_universal_planes.cursor-only-pipe-* Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1466196140-16336-4-git-send-email-matthew.d.roper@intel.com Cc: stable@vger.kernel.org #v4.7+ (cherry picked from commit 43aa7e87507f519b0b2497b6fac1e894554eaef2) Signed-off-by: Jani Nikula <jani.nikula@intel.com> --- drivers/gpu/drm/i915/intel_pm.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index e9763a89f3d7b..421c5b72464a0 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3107,8 +3107,6 @@ skl_get_total_relative_data_rate(struct intel_crtc_state *intel_cstate) total_data_rate += intel_cstate->wm.skl.plane_y_data_rate[id]; } - WARN_ON(cstate->plane_mask && total_data_rate == 0); - return total_data_rate; } From dcd79934b0dd803fdb29216fbd6f4a899a66f466 Mon Sep 17 00:00:00 2001 From: Chris Wilson <chris@chris-wilson.co.uk> Date: Thu, 18 Aug 2016 17:16:40 +0100 Subject: [PATCH 391/513] drm/i915: Unconditionally flush any chipset buffers before execbuf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If userspace is asynchronously streaming into the batch or other execobjects, we may not flush those writes along with a change in cache domain (as there is no change). Therefore those writes may end up in internal chipset buffers and not visible to the GPU upon execution. We must issue a flush command or otherwise we encounter incoherency in the batchbuffers and the GPU executing invalid commands (i.e. hanging) quite regularly. v2: Throw a paranoid wmb() into the general flush so that we remain consistent with before. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90841 Fixes: 1816f9236303 ("drm/i915: Support creation of unbound wc user...") Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Akash Goel <akash.goel@intel.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Tvrtko Ursulin <tvrtko.ursulin@linux.intel.com> Tested-by: Matti Hämäläinen <ccr@tnsp.org> Cc: stable@vger.kernel.org Reviewed-by: Mika Kuoppala <mika.kuoppala@intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/20160818161718.27187-1-chris@chris-wilson.co.uk (cherry picked from commit 600f436801deae65e48404847b61c89b4944e355) Signed-off-by: Jani Nikula <jani.nikula@intel.com> --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 13 +++---------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 20fe9d52e2562..3ca674e9c105a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3591,6 +3591,7 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle); /* belongs in i915_gem_gtt.h */ static inline void i915_gem_chipset_flush(struct drm_i915_private *dev_priv) { + wmb(); if (INTEL_GEN(dev_priv) < 6) intel_gtt_chipset_flush(); } diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 1978633e7549a..b35e5b6475b24 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -943,8 +943,6 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, { const unsigned other_rings = ~intel_engine_flag(req->engine); struct i915_vma *vma; - uint32_t flush_domains = 0; - bool flush_chipset = false; int ret; list_for_each_entry(vma, vmas, exec_list) { @@ -957,16 +955,11 @@ i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req, } if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) - flush_chipset |= i915_gem_clflush_object(obj, false); - - flush_domains |= obj->base.write_domain; + i915_gem_clflush_object(obj, false); } - if (flush_chipset) - i915_gem_chipset_flush(req->engine->i915); - - if (flush_domains & I915_GEM_DOMAIN_GTT) - wmb(); + /* Unconditionally flush any chipset caches (for streaming writes). */ + i915_gem_chipset_flush(req->engine->i915); /* Unconditionally invalidate gpu caches and ensure that we do flush * any residual writes from the previous batch. From 0184c2fff11705d0041c61356c57eac2e033c686 Mon Sep 17 00:00:00 2001 From: Dave Gordon <david.s.gordon@intel.com> Date: Fri, 19 Aug 2016 15:23:42 +0100 Subject: [PATCH 392/513] drm/i915: Reattach comment, complete type specification In the recent patch bc3d674 drm/i915: Allow userspace to request no-error-capture upon ... the final version moved the flags and the associated #defines around so they were adjacent; unfortunately, they ended up between a comment and the thing (hw_id) to which the comment applies :( So this patch reshuffles the comment and subject back together. Also, as we're touching 'hw_id', let's change it from just 'unsigned' to a fully-specified 'unsigned int', because some code checking tools (including checkpatch) object to plain 'unsigned'. Fixes: bc3d674462e5 ("drm/i915: Allow userspace to request no-error-capture...") Signed-off-by: Dave Gordon <david.s.gordon@intel.com> Cc: Chris Wilson <chris@chris-wilson.co.uk> Link: http://patchwork.freedesktop.org/patch/msgid/1471616622-6919-1-git-send-email-david.s.gordon@intel.com Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> (cherry picked from commit 0be81156b3fb4d4e8e2c94177e5222dc21c3ff10) Signed-off-by: Jani Nikula <jani.nikula@intel.com> --- drivers/gpu/drm/i915/i915_drv.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 3ca674e9c105a..46e372ece6ebe 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -882,11 +882,12 @@ struct i915_gem_context { struct i915_ctx_hang_stats hang_stats; - /* Unique identifier for this context, used by the hw for tracking */ unsigned long flags; #define CONTEXT_NO_ZEROMAP BIT(0) #define CONTEXT_NO_ERROR_CAPTURE BIT(1) - unsigned hw_id; + + /* Unique identifier for this context, used by the hw for tracking */ + unsigned int hw_id; u32 user_handle; u32 ggtt_alignment; From 5bc6abe7674d9cf41dbcdaaf98a19184da181439 Mon Sep 17 00:00:00 2001 From: Lyude <cpaul@redhat.com> Date: Wed, 17 Aug 2016 15:55:53 -0400 Subject: [PATCH 393/513] drm/i915/gen6+: Interpret mailbox error flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order to add proper support for the SAGV, we need to be able to know what the cause of a failure to change the SAGV through the pcode mailbox was. The reasoning for this is that some very early pre-release Skylake machines don't actually allow you to control the SAGV on them, and indicate an invalid mailbox command was sent. This also might come in handy in the future for debugging. Changes since v1: - Add functions for interpreting gen6 mailbox error codes along with gen7+ error codes, and actually interpret those codes properly - Renamed patch to reflect new behavior Signed-off-by: Lyude <cpaul@redhat.com> Cc: Matt Roper <matthew.d.roper@intel.com> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Cc: stable@vger.kernel.org Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1471463761-26796-2-git-send-email-cpaul@redhat.com [mlankhorst: -ENOSYS -> -ENXIO for checkpatch] (cherry picked from commit 87660502f1a4d51fb043e89a45d30c9917787c22) Signed-off-by: Jani Nikula <jani.nikula@intel.com> --- drivers/gpu/drm/i915/i915_reg.h | 9 +++++ drivers/gpu/drm/i915/intel_pm.c | 71 ++++++++++++++++++++++++++++++++- 2 files changed, 79 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 5c06413ae0e61..2b9508dda7594 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7145,6 +7145,15 @@ enum { #define GEN6_PCODE_MAILBOX _MMIO(0x138124) #define GEN6_PCODE_READY (1<<31) +#define GEN6_PCODE_ERROR_MASK 0xFF +#define GEN6_PCODE_SUCCESS 0x0 +#define GEN6_PCODE_ILLEGAL_CMD 0x1 +#define GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE 0x2 +#define GEN6_PCODE_TIMEOUT 0x3 +#define GEN6_PCODE_UNIMPLEMENTED_CMD 0xFF +#define GEN7_PCODE_TIMEOUT 0x2 +#define GEN7_PCODE_ILLEGAL_DATA 0x3 +#define GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE 0x10 #define GEN6_PCODE_WRITE_RC6VIDS 0x4 #define GEN6_PCODE_READ_RC6VIDS 0x5 #define GEN6_ENCODE_RC6_VID(mv) (((mv) - 245) / 5) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 421c5b72464a0..2c88c2f50d021 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -7671,8 +7671,53 @@ void intel_init_pm(struct drm_device *dev) } } +static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv) +{ + uint32_t flags = + I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK; + + switch (flags) { + case GEN6_PCODE_SUCCESS: + return 0; + case GEN6_PCODE_UNIMPLEMENTED_CMD: + case GEN6_PCODE_ILLEGAL_CMD: + return -ENXIO; + case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: + return -EOVERFLOW; + case GEN6_PCODE_TIMEOUT: + return -ETIMEDOUT; + default: + MISSING_CASE(flags) + return 0; + } +} + +static inline int gen7_check_mailbox_status(struct drm_i915_private *dev_priv) +{ + uint32_t flags = + I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_ERROR_MASK; + + switch (flags) { + case GEN6_PCODE_SUCCESS: + return 0; + case GEN6_PCODE_ILLEGAL_CMD: + return -ENXIO; + case GEN7_PCODE_TIMEOUT: + return -ETIMEDOUT; + case GEN7_PCODE_ILLEGAL_DATA: + return -EINVAL; + case GEN7_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: + return -EOVERFLOW; + default: + MISSING_CASE(flags); + return 0; + } +} + int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val) { + int status; + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); /* GEN6_PCODE_* are outside of the forcewake domain, we can @@ -7699,12 +7744,25 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val *val = I915_READ_FW(GEN6_PCODE_DATA); I915_WRITE_FW(GEN6_PCODE_DATA, 0); + if (INTEL_GEN(dev_priv) > 6) + status = gen7_check_mailbox_status(dev_priv); + else + status = gen6_check_mailbox_status(dev_priv); + + if (status) { + DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed: %d\n", + status); + return status; + } + return 0; } int sandybridge_pcode_write(struct drm_i915_private *dev_priv, - u32 mbox, u32 val) + u32 mbox, u32 val) { + int status; + WARN_ON(!mutex_is_locked(&dev_priv->rps.hw_lock)); /* GEN6_PCODE_* are outside of the forcewake domain, we can @@ -7729,6 +7787,17 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv, I915_WRITE_FW(GEN6_PCODE_DATA, 0); + if (INTEL_GEN(dev_priv) > 6) + status = gen7_check_mailbox_status(dev_priv); + else + status = gen6_check_mailbox_status(dev_priv); + + if (status) { + DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed: %d\n", + status); + return status; + } + return 0; } From f403372658fc7b652a77885a4141e58e57d9c75a Mon Sep 17 00:00:00 2001 From: Lyude <cpaul@redhat.com> Date: Wed, 17 Aug 2016 15:55:54 -0400 Subject: [PATCH 394/513] drm/i915/skl: Add support for the SAGV, fix underrun hangs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the watermark calculations for Skylake are still broken, we're apt to hitting underruns very easily under multi-monitor configurations. While it would be lovely if this was fixed, it's not. Another problem that's been coming from this however, is the mysterious issue of underruns causing full system hangs. An easy way to reproduce this with a skylake system: - Get a laptop with a skylake GPU, and hook up two external monitors to it - Move the cursor from the built-in LCD to one of the external displays as quickly as you can - You'll get a few pipe underruns, and eventually the entire system will just freeze. After doing a lot of investigation and reading through the bspec, I found the existence of the SAGV, which is responsible for adjusting the system agent voltage and clock frequencies depending on how much power we need. According to the bspec: "The display engine access to system memory is blocked during the adjustment time. SAGV defaults to enabled. Software must use the GT-driver pcode mailbox to disable SAGV when the display engine is not able to tolerate the blocking time." The rest of the bspec goes on to explain that software can simply leave the SAGV enabled, and disable it when we use interlaced pipes/have more then one pipe active. Sure enough, with this patchset the system hangs resulting from pipe underruns on Skylake have completely vanished on my T460s. Additionally, the bspec mentions turning off the SAGV with more then one pipe enabled as a workaround for display underruns. While this patch doesn't entirely fix that, it looks like it does improve the situation a little bit so it's likely this is going to be required to make watermarks on Skylake fully functional. This will still need additional work in the future: we shouldn't be enabling the SAGV if any of the currently enabled planes can't enable WM levels that introduce latencies >= 30 µs. Changes since v11: - Add skl_can_enable_sagv() - Make sure we don't enable SAGV when not all planes can enable watermarks >= the SAGV engine block time. I was originally going to save this for later, but I recently managed to run into a machine that was having problems with a single pipe configuration + SAGV. - Make comparisons to I915_SKL_SAGV_NOT_CONTROLLED explicit - Change I915_SAGV_DYNAMIC_FREQ to I915_SAGV_ENABLE - Move printks outside of mutexes - Don't print error messages twice Changes since v10: - Apparently sandybridge_pcode_read actually writes values and reads them back, despite it's misleading function name. This means we've been doing this mostly wrong and have been writing garbage to the SAGV control. Because of this, we no longer attempt to read the SAGV status during initialization (since there are no helpers for this). - mlankhorst noticed that this patch was breaking on some very early pre-release Skylake machines, which apparently don't allow you to disable the SAGV. To prevent machines from failing tests due to SAGV errors, if the first time we try to control the SAGV results in the mailbox indicating an invalid command, we just disable future attempts to control the SAGV state by setting dev_priv->skl_sagv_status to I915_SKL_SAGV_NOT_CONTROLLED and make a note of it in dmesg. - Move mutex_unlock() a little higher in skl_enable_sagv(). This doesn't actually fix anything, but lets us release the lock a little sooner since we're finished with it. Changes since v9: - Only enable/disable sagv on Skylake Changes since v8: - Add intel_state->modeset guard to the conditional for skl_enable_sagv() Changes since v7: - Remove GEN9_SAGV_LOW_FREQ, replace with GEN9_SAGV_IS_ENABLED (that's all we use it for anyway) - Use GEN9_SAGV_IS_ENABLED instead of 0x1 for clarification - Fix a styling error that snuck past me Changes since v6: - Protect skl_enable_sagv() with intel_state->modeset conditional in intel_atomic_commit_tail() Changes since v5: - Don't use is_power_of_2. Makes things confusing - Don't use the old state to figure out whether or not to enable/disable the sagv, use the new one - Split the loop in skl_disable_sagv into it's own function - Move skl_sagv_enable/disable() calls into intel_atomic_commit_tail() Changes since v4: - Use is_power_of_2 against active_crtcs to check whether we have > 1 pipe enabled - Fix skl_sagv_get_hw_state(): (temp & 0x1) indicates disabled, 0x0 enabled - Call skl_sagv_enable/disable() from pre/post-plane updates Changes since v3: - Use time_before() to compare timeout to jiffies Changes since v2: - Really apply minor style nitpicks to patch this time Changes since v1: - Added comments about this probably being one of the requirements to fixing Skylake's watermark issues - Minor style nitpicks from Matt Roper - Disable these functions on Broxton, since it doesn't have an SAGV Signed-off-by: Lyude <cpaul@redhat.com> Cc: Matt Roper <matthew.d.roper@intel.com> Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Cc: Daniel Vetter <daniel.vetter@ffwll.ch> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Cc: stable@vger.kernel.org Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1471463761-26796-3-git-send-email-cpaul@redhat.com [mlankhorst: ENOSYS -> ENXIO, whitespace fixes] (cherry picked from commit 656d1b89e5ffb83036ab0e2a24be7558f34365c7) Signed-off-by: Jani Nikula <jani.nikula@intel.com> --- drivers/gpu/drm/i915/i915_drv.h | 7 ++ drivers/gpu/drm/i915/i915_reg.h | 4 + drivers/gpu/drm/i915/intel_display.c | 11 ++ drivers/gpu/drm/i915/intel_drv.h | 3 + drivers/gpu/drm/i915/intel_pm.c | 148 +++++++++++++++++++++++++++ 5 files changed, 173 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 46e372ece6ebe..f68c78918d632 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1964,6 +1964,13 @@ struct drm_i915_private { struct i915_suspend_saved_registers regfile; struct vlv_s0ix_state vlv_s0ix_state; + enum { + I915_SKL_SAGV_UNKNOWN = 0, + I915_SKL_SAGV_DISABLED, + I915_SKL_SAGV_ENABLED, + I915_SKL_SAGV_NOT_CONTROLLED + } skl_sagv_status; + struct { /* * Raw watermark latency values: diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 2b9508dda7594..bf2cad3f9e1fb 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7175,6 +7175,10 @@ enum { #define HSW_PCODE_DE_WRITE_FREQ_REQ 0x17 #define DISPLAY_IPS_CONTROL 0x19 #define HSW_PCODE_DYNAMIC_DUTY_CYCLE_CONTROL 0x1A +#define GEN9_PCODE_SAGV_CONTROL 0x21 +#define GEN9_SAGV_DISABLE 0x0 +#define GEN9_SAGV_IS_DISABLED 0x1 +#define GEN9_SAGV_ENABLE 0x3 #define GEN6_PCODE_DATA _MMIO(0x138128) #define GEN6_PCODE_FREQ_IA_RATIO_SHIFT 8 #define GEN6_PCODE_FREQ_RING_RATIO_SHIFT 16 diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 2a751b6e02535..175595fc3e45d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13759,6 +13759,13 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) intel_state->cdclk_pll_vco != dev_priv->cdclk_pll.vco)) dev_priv->display.modeset_commit_cdclk(state); + /* + * SKL workaround: bspec recommends we disable the SAGV when we + * have more then one pipe enabled + */ + if (IS_SKYLAKE(dev_priv) && !skl_can_enable_sagv(state)) + skl_disable_sagv(dev_priv); + intel_modeset_verify_disabled(dev); } @@ -13832,6 +13839,10 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state) intel_modeset_verify_crtc(crtc, old_crtc_state, crtc->state); } + if (IS_SKYLAKE(dev_priv) && intel_state->modeset && + skl_can_enable_sagv(state)) + skl_enable_sagv(dev_priv); + drm_atomic_helper_commit_hw_done(state); if (intel_state->modeset) diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index cc937a19b1ba0..ff399b9a5c1f9 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -1716,6 +1716,9 @@ void ilk_wm_get_hw_state(struct drm_device *dev); void skl_wm_get_hw_state(struct drm_device *dev); void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv, struct skl_ddb_allocation *ddb /* out */); +bool skl_can_enable_sagv(struct drm_atomic_state *state); +int skl_enable_sagv(struct drm_i915_private *dev_priv); +int skl_disable_sagv(struct drm_i915_private *dev_priv); uint32_t ilk_pipe_pixel_rate(const struct intel_crtc_state *pipe_config); bool ilk_disable_lp_wm(struct drm_device *dev); int sanitize_rc6_option(struct drm_i915_private *dev_priv, int enable_rc6); diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 2c88c2f50d021..496a911e56682 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2852,6 +2852,7 @@ bool ilk_disable_lp_wm(struct drm_device *dev) #define SKL_DDB_SIZE 896 /* in blocks */ #define BXT_DDB_SIZE 512 +#define SKL_SAGV_BLOCK_TIME 30 /* µs */ /* * Return the index of a plane in the SKL DDB and wm result arrays. Primary @@ -2875,6 +2876,153 @@ skl_wm_plane_id(const struct intel_plane *plane) } } +/* + * SAGV dynamically adjusts the system agent voltage and clock frequencies + * depending on power and performance requirements. The display engine access + * to system memory is blocked during the adjustment time. Because of the + * blocking time, having this enabled can cause full system hangs and/or pipe + * underruns if we don't meet all of the following requirements: + * + * - <= 1 pipe enabled + * - All planes can enable watermarks for latencies >= SAGV engine block time + * - We're not using an interlaced display configuration + */ +int +skl_enable_sagv(struct drm_i915_private *dev_priv) +{ + int ret; + + if (dev_priv->skl_sagv_status == I915_SKL_SAGV_NOT_CONTROLLED || + dev_priv->skl_sagv_status == I915_SKL_SAGV_ENABLED) + return 0; + + DRM_DEBUG_KMS("Enabling the SAGV\n"); + mutex_lock(&dev_priv->rps.hw_lock); + + ret = sandybridge_pcode_write(dev_priv, GEN9_PCODE_SAGV_CONTROL, + GEN9_SAGV_ENABLE); + + /* We don't need to wait for the SAGV when enabling */ + mutex_unlock(&dev_priv->rps.hw_lock); + + /* + * Some skl systems, pre-release machines in particular, + * don't actually have an SAGV. + */ + if (ret == -ENXIO) { + DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); + dev_priv->skl_sagv_status = I915_SKL_SAGV_NOT_CONTROLLED; + return 0; + } else if (ret < 0) { + DRM_ERROR("Failed to enable the SAGV\n"); + return ret; + } + + dev_priv->skl_sagv_status = I915_SKL_SAGV_ENABLED; + return 0; +} + +static int +skl_do_sagv_disable(struct drm_i915_private *dev_priv) +{ + int ret; + uint32_t temp = GEN9_SAGV_DISABLE; + + ret = sandybridge_pcode_read(dev_priv, GEN9_PCODE_SAGV_CONTROL, + &temp); + if (ret) + return ret; + else + return temp & GEN9_SAGV_IS_DISABLED; +} + +int +skl_disable_sagv(struct drm_i915_private *dev_priv) +{ + int ret, result; + + if (dev_priv->skl_sagv_status == I915_SKL_SAGV_NOT_CONTROLLED || + dev_priv->skl_sagv_status == I915_SKL_SAGV_DISABLED) + return 0; + + DRM_DEBUG_KMS("Disabling the SAGV\n"); + mutex_lock(&dev_priv->rps.hw_lock); + + /* bspec says to keep retrying for at least 1 ms */ + ret = wait_for(result = skl_do_sagv_disable(dev_priv), 1); + mutex_unlock(&dev_priv->rps.hw_lock); + + if (ret == -ETIMEDOUT) { + DRM_ERROR("Request to disable SAGV timed out\n"); + return -ETIMEDOUT; + } + + /* + * Some skl systems, pre-release machines in particular, + * don't actually have an SAGV. + */ + if (result == -ENXIO) { + DRM_DEBUG_DRIVER("No SAGV found on system, ignoring\n"); + dev_priv->skl_sagv_status = I915_SKL_SAGV_NOT_CONTROLLED; + return 0; + } else if (result < 0) { + DRM_ERROR("Failed to disable the SAGV\n"); + return result; + } + + dev_priv->skl_sagv_status = I915_SKL_SAGV_DISABLED; + return 0; +} + +bool skl_can_enable_sagv(struct drm_atomic_state *state) +{ + struct drm_device *dev = state->dev; + struct drm_i915_private *dev_priv = to_i915(dev); + struct intel_atomic_state *intel_state = to_intel_atomic_state(state); + struct drm_crtc *crtc; + enum pipe pipe; + int level, plane; + + /* + * SKL workaround: bspec recommends we disable the SAGV when we have + * more then one pipe enabled + * + * If there are no active CRTCs, no additional checks need be performed + */ + if (hweight32(intel_state->active_crtcs) == 0) + return true; + else if (hweight32(intel_state->active_crtcs) > 1) + return false; + + /* Since we're now guaranteed to only have one active CRTC... */ + pipe = ffs(intel_state->active_crtcs) - 1; + crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + + if (crtc->state->mode.flags & DRM_MODE_FLAG_INTERLACE) + return false; + + for_each_plane(dev_priv, pipe, plane) { + /* Skip this plane if it's not enabled */ + if (intel_state->wm_results.plane[pipe][plane][0] == 0) + continue; + + /* Find the highest enabled wm level for this plane */ + for (level = ilk_wm_max_level(dev); + intel_state->wm_results.plane[pipe][plane][level] == 0; --level) + { } + + /* + * If any of the planes on this pipe don't enable wm levels + * that incur memory latencies higher then 30µs we can't enable + * the SAGV + */ + if (dev_priv->wm.skl_latency[level] < SKL_SAGV_BLOCK_TIME) + return false; + } + + return true; +} + static void skl_ddb_get_pipe_allocation_limits(struct drm_device *dev, const struct intel_crtc_state *cstate, From 9909113cc48a7ce6e772573e3cc82a3f03ffa8ef Mon Sep 17 00:00:00 2001 From: Matt Roper <matthew.d.roper@intel.com> Date: Wed, 17 Aug 2016 15:55:55 -0400 Subject: [PATCH 395/513] drm/i915/gen9: Only copy WM results for changed pipes to skl_hw MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we write watermark values to the hardware, those values are stored in dev_priv->wm.skl_hw. However with recent watermark changes, the results structure we're copying from only contains valid watermark and DDB values for the pipes that are actually changing; the values for other pipes remain 0. Thus a blind copy of the entire skl_wm_values structure will clobber the values for unchanged pipes...we need to be more selective and only copy over the values for the changing pipes. This mistake was hidden until recently due to another bug that caused us to erroneously re-calculate watermarks for all active pipes rather than changing pipes. Only when that bug was fixed was the impact of this bug discovered (e.g., modesets failing with "Requested display configuration exceeds system watermark limitations" messages and leaving watermarks non-functional, even ones initiated by intel_fbdev_restore_mode). Changes since v1: - Add a function for copying a pipe's wm values (skl_copy_wm_for_pipe()) so we can reuse this later Fixes: 734fa01f3a17 ("drm/i915/gen9: Calculate watermarks during atomic 'check' (v2)") Fixes: 9b6130227495 ("drm/i915/gen9: Re-allocate DDB only for changed pipes") Signed-off-by: Matt Roper <matthew.d.roper@intel.com> Signed-off-by: Lyude <cpaul@redhat.com> Reviewed-by: Matt Roper <matthew.d.roper@intel.com> Cc: stable@vger.kernel.org Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Cc: Daniel Vetter <daniel.vetter@intel.com> Cc: Radhakrishna Sripada <radhakrishna.sripada@intel.com> Cc: Hans de Goede <hdegoede@redhat.com> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1471463761-26796-4-git-send-email-cpaul@redhat.com (cherry picked from commit 2722efb90b3420dee54b4cb3cdc7917efacc2dce) Signed-off-by: Jani Nikula <jani.nikula@intel.com> --- drivers/gpu/drm/i915/intel_pm.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 496a911e56682..70dcf8d355684 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4109,6 +4109,24 @@ skl_compute_ddb(struct drm_atomic_state *state) return 0; } +static void +skl_copy_wm_for_pipe(struct skl_wm_values *dst, + struct skl_wm_values *src, + enum pipe pipe) +{ + dst->wm_linetime[pipe] = src->wm_linetime[pipe]; + memcpy(dst->plane[pipe], src->plane[pipe], + sizeof(dst->plane[pipe])); + memcpy(dst->plane_trans[pipe], src->plane_trans[pipe], + sizeof(dst->plane_trans[pipe])); + + dst->ddb.pipe[pipe] = src->ddb.pipe[pipe]; + memcpy(dst->ddb.y_plane[pipe], src->ddb.y_plane[pipe], + sizeof(dst->ddb.y_plane[pipe])); + memcpy(dst->ddb.plane[pipe], src->ddb.plane[pipe], + sizeof(dst->ddb.plane[pipe])); +} + static int skl_compute_wm(struct drm_atomic_state *state) { @@ -4181,8 +4199,10 @@ static void skl_update_wm(struct drm_crtc *crtc) struct drm_device *dev = crtc->dev; struct drm_i915_private *dev_priv = to_i915(dev); struct skl_wm_values *results = &dev_priv->wm.skl_results; + struct skl_wm_values *hw_vals = &dev_priv->wm.skl_hw; struct intel_crtc_state *cstate = to_intel_crtc_state(crtc->state); struct skl_pipe_wm *pipe_wm = &cstate->wm.skl.optimal; + int pipe; if ((results->dirty_pipes & drm_crtc_mask(crtc)) == 0) return; @@ -4194,8 +4214,12 @@ static void skl_update_wm(struct drm_crtc *crtc) skl_write_wm_values(dev_priv, results); skl_flush_wm_values(dev_priv, results); - /* store the new configuration */ - dev_priv->wm.skl_hw = *results; + /* + * Store the new configuration (but only for the pipes that have + * changed; the other values weren't recomputed). + */ + for_each_pipe_masked(dev_priv, pipe, results->dirty_pipes) + skl_copy_wm_for_pipe(hw_vals, results, pipe); mutex_unlock(&dev_priv->wm.wm_mutex); } From 762c60ab0257d25eea8db3e3fec85ed53b5330fe Mon Sep 17 00:00:00 2001 From: Lyude <cpaul@redhat.com> Date: Wed, 17 Aug 2016 15:55:57 -0400 Subject: [PATCH 396/513] drm/i915/skl: Ensure pipes with changed wms get added to the state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If we're enabling a pipe, we'll need to modify the watermarks on all active planes. Since those planes won't be added to the state on their own, we need to add them ourselves. Signed-off-by: Lyude <cpaul@redhat.com> Reviewed-by: Matt Roper <matthew.d.roper@intel.com> Cc: stable@vger.kernel.org Cc: Ville Syrjälä <ville.syrjala@linux.intel.com> Cc: Daniel Vetter <daniel.vetter@intel.com> Cc: Radhakrishna Sripada <radhakrishna.sripada@intel.com> Cc: Hans de Goede <hdegoede@redhat.com> Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1471463761-26796-6-git-send-email-cpaul@redhat.com (cherry picked from commit 05a76d3d6ad1ee9f9814f88949cc9305fc165460) Signed-off-by: Jani Nikula <jani.nikula@intel.com> --- drivers/gpu/drm/i915/intel_pm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 70dcf8d355684..53e13c10e4ea8 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4104,6 +4104,10 @@ skl_compute_ddb(struct drm_atomic_state *state) ret = skl_allocate_pipe_ddb(cstate, ddb); if (ret) return ret; + + ret = drm_atomic_add_affected_planes(state, &intel_crtc->base); + if (ret) + return ret; } return 0; From 177d91aaea4bcafb29232336bafaa521b85286aa Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Date: Mon, 15 Aug 2016 15:09:27 +0200 Subject: [PATCH 397/513] drm/i915: Fix botched merge that downgrades CSR versions. Merge commit 5e580523d9128a4d8 reverts the version bumping parts of commit 4aa7fb9c3c4fa0. Bump the versions again and request the specific firmware version. The currently recommended versions are: SKL 1.26, KBL 1.01 and BXT 1.07. Cc: Patrik Jakobsson <patrik.jakobsson@linux.intel.com> Cc: Imre Deak <imre.deak@intel.com> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97242 Cc: drm-intel-fixes@lists.freedesktop.org Fixes: 5e580523d912 ("Backmerge tag 'v4.7' into drm-next") Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com> Link: http://patchwork.freedesktop.org/patch/msgid/1471266567-22443-1-git-send-email-maarten.lankhorst@linux.intel.com Reviewed-by: Imre Deak <imre.deak@intel.com> (cherry picked from commit 536ab3ca19ef856e84389a155c5832c68559a28a) Signed-off-by: Jani Nikula <jani.nikula@intel.com> --- drivers/gpu/drm/i915/intel_csr.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c index 3edb9580928e5..c3b33a10c15cd 100644 --- a/drivers/gpu/drm/i915/intel_csr.c +++ b/drivers/gpu/drm/i915/intel_csr.c @@ -41,15 +41,15 @@ * be moved to FW_FAILED. */ -#define I915_CSR_KBL "i915/kbl_dmc_ver1.bin" +#define I915_CSR_KBL "i915/kbl_dmc_ver1_01.bin" MODULE_FIRMWARE(I915_CSR_KBL); #define KBL_CSR_VERSION_REQUIRED CSR_VERSION(1, 1) -#define I915_CSR_SKL "i915/skl_dmc_ver1.bin" +#define I915_CSR_SKL "i915/skl_dmc_ver1_26.bin" MODULE_FIRMWARE(I915_CSR_SKL); -#define SKL_CSR_VERSION_REQUIRED CSR_VERSION(1, 23) +#define SKL_CSR_VERSION_REQUIRED CSR_VERSION(1, 26) -#define I915_CSR_BXT "i915/bxt_dmc_ver1.bin" +#define I915_CSR_BXT "i915/bxt_dmc_ver1_07.bin" MODULE_FIRMWARE(I915_CSR_BXT); #define BXT_CSR_VERSION_REQUIRED CSR_VERSION(1, 7) From e9e5e3fae8da7e237049e00e0bfc9e32fd808fe8 Mon Sep 17 00:00:00 2001 From: Vegard Nossum <vegard.nossum@oracle.com> Date: Mon, 22 Aug 2016 12:47:43 +0200 Subject: [PATCH 398/513] bdev: fix NULL pointer dereference I got this: kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) CPU: 0 PID: 5505 Comm: syz-executor Not tainted 4.8.0-rc2+ #161 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014 task: ffff880113415940 task.stack: ffff880118350000 RIP: 0010:[<ffffffff8172cb32>] [<ffffffff8172cb32>] bd_mount+0x52/0xa0 RSP: 0018:ffff880118357ca0 EFLAGS: 00010207 RAX: dffffc0000000000 RBX: ffffffffffffffff RCX: ffffc90000bb6000 RDX: 0000000000000018 RSI: ffffffff846d6b20 RDI: 00000000000000c7 RBP: ffff880118357cb0 R08: ffff880115967c68 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801188211e8 R13: ffffffff847baa20 R14: ffff8801139cb000 R15: 0000000000000080 FS: 00007fa3ff6c0700(0000) GS:ffff88011aa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fc1d8cc7e78 CR3: 0000000109f20000 CR4: 00000000000006f0 DR0: 000000000000001e DR1: 000000000000001e DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600 Stack: ffff880112cfd6c0 ffff8801188211e8 ffff880118357cf0 ffffffff8167f207 ffffffff816d7a1e ffff880112a413c0 ffffffff847baa20 ffff8801188211e8 0000000000000080 ffff880112cfd6c0 ffff880118357d38 ffffffff816dce0a Call Trace: [<ffffffff8167f207>] mount_fs+0x97/0x2e0 [<ffffffff816d7a1e>] ? alloc_vfsmnt+0x55e/0x760 [<ffffffff816dce0a>] vfs_kern_mount+0x7a/0x300 [<ffffffff83c3247c>] ? _raw_read_unlock+0x2c/0x50 [<ffffffff816dfc87>] do_mount+0x3d7/0x2730 [<ffffffff81235fd4>] ? trace_do_page_fault+0x1f4/0x3a0 [<ffffffff816df8b0>] ? copy_mount_string+0x40/0x40 [<ffffffff8161ea81>] ? memset+0x31/0x40 [<ffffffff816df73e>] ? copy_mount_options+0x1ee/0x320 [<ffffffff816e2a02>] SyS_mount+0xb2/0x120 [<ffffffff816e2950>] ? copy_mnt_ns+0x970/0x970 [<ffffffff81005524>] do_syscall_64+0x1c4/0x4e0 [<ffffffff83c3282a>] entry_SYSCALL64_slow_path+0x25/0x25 Code: 83 e8 63 1b fc ff 48 85 c0 48 89 c3 74 4c e8 56 35 d1 ff 48 8d bb c8 00 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <80> 3c 02 00 75 36 4c 8b a3 c8 00 00 00 48 b8 00 00 00 00 00 fc RIP [<ffffffff8172cb32>] bd_mount+0x52/0xa0 RSP <ffff880118357ca0> ---[ end trace 13690ad962168b98 ]--- mount_pseudo() returns ERR_PTR(), not NULL, on error. Fixes: 3684aa7099e0 ("block-dev: enable writeback cgroup support") Cc: Shaohua Li <shli@fb.com> Cc: Tejun Heo <tj@kernel.org> Cc: Jens Axboe <axboe@fb.com> Cc: stable@vger.kernel.org Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com> Signed-off-by: Jens Axboe <axboe@fb.com> --- fs/block_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index c3cdde87cc8c6..e17bdbdfe9b1c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -646,7 +646,7 @@ static struct dentry *bd_mount(struct file_system_type *fs_type, { struct dentry *dent; dent = mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC); - if (dent) + if (!IS_ERR(dent)) dent->d_sb->s_iflags |= SB_I_CGROUPWB; return dent; } From 209c721ce27022656a9d792b89572c4fdd7d31a7 Mon Sep 17 00:00:00 2001 From: Wei Yongjun <weiyongjun1@huawei.com> Date: Sun, 21 Aug 2016 15:37:16 +0000 Subject: [PATCH 399/513] ASoC: max98371: Add terminate entry for i2c_device_id tables Make sure i2c_device_id tables are NULL terminated. Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com> Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/codecs/max98371.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/soc/codecs/max98371.c b/sound/soc/codecs/max98371.c index cf0a39bb631aa..02352ed8961c4 100644 --- a/sound/soc/codecs/max98371.c +++ b/sound/soc/codecs/max98371.c @@ -412,6 +412,7 @@ static int max98371_i2c_remove(struct i2c_client *client) static const struct i2c_device_id max98371_i2c_id[] = { { "max98371", 0 }, + { } }; MODULE_DEVICE_TABLE(i2c, max98371_i2c_id); From 6a33fa2b87513fee44cb8f0cd17b1acd6316bc6b Mon Sep 17 00:00:00 2001 From: Paul Burton <paul.burton@imgtec.com> Date: Fri, 19 Aug 2016 18:07:14 +0100 Subject: [PATCH 400/513] irqchip/mips-gic: Cleanup chip and handler setup gic_shared_irq_domain_map() is called from gic_irq_domain_alloc() where the wrong chip has been set, and is then overwritten. Tidy this up by setting the correct chip the first time, and setting the handle_level_irq handler from gic_irq_domain_alloc() too. gic_shared_irq_domain_map() is also called from gic_irq_domain_map(), which now calls irq_set_chip_and_handler() to retain its previous behaviour. This patch prepares for a follow-on which will call gic_shared_irq_domain_map() from a callback where the lock on the struct irq_desc is held, which without this change would cause the call to irq_set_chip_and_handler() to lead to a deadlock. Fixes: c98c1822ee13 ("irqchip/mips-gic: Add device hierarchy domain") Signed-off-by: Paul Burton <paul.burton@imgtec.com> Cc: linux-mips@linux-mips.org Cc: Jason Cooper <jason@lakedaemon.net> Cc: Marc Zyngier <marc.zyngier@arm.com> Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20160819170715.27820-1-paul.burton@imgtec.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- drivers/irqchip/irq-mips-gic.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index c5f33c3bd2285..2e0f49992a982 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -713,9 +713,6 @@ static int gic_shared_irq_domain_map(struct irq_domain *d, unsigned int virq, unsigned long flags; int i; - irq_set_chip_and_handler(virq, &gic_level_irq_controller, - handle_level_irq); - spin_lock_irqsave(&gic_lock, flags); gic_map_to_pin(intr, gic_cpu_pin); gic_map_to_vpe(intr, mips_cm_vp_id(vpe)); @@ -732,6 +729,10 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq, { if (GIC_HWIRQ_TO_LOCAL(hw) < GIC_NUM_LOCAL_INTRS) return gic_local_irq_domain_map(d, virq, hw); + + irq_set_chip_and_handler(virq, &gic_level_irq_controller, + handle_level_irq); + return gic_shared_irq_domain_map(d, virq, hw, 0); } @@ -771,11 +772,13 @@ static int gic_irq_domain_alloc(struct irq_domain *d, unsigned int virq, hwirq = GIC_SHARED_TO_HWIRQ(base_hwirq + i); ret = irq_domain_set_hwirq_and_chip(d, virq + i, hwirq, - &gic_edge_irq_controller, + &gic_level_irq_controller, NULL); if (ret) goto error; + irq_set_handler(virq + i, handle_level_irq); + ret = gic_shared_irq_domain_map(d, virq + i, hwirq, cpu); if (ret) goto error; From 2564970a381651865364974ea414384b569cb9c0 Mon Sep 17 00:00:00 2001 From: Paul Burton <paul.burton@imgtec.com> Date: Fri, 19 Aug 2016 18:07:15 +0100 Subject: [PATCH 401/513] irqchip/mips-gic: Implement activate op for device domain If an IRQ is setup using __setup_irq(), which is used by the request_irq() family of functions, and we are using an SMP kernel then the affinity of the IRQ will be set via setup_affinity() immediately after the IRQ is enabled. This call to gic_set_affinity() will lead to the interrupt being mapped to a VPE. However there are other ways to use IRQs which don't cause affinity to be set, for example if it is used to chain to another IRQ controller with irq_set_chained_handler_and_data(). The irq_set_chained_handler_and_data() code path will enable the IRQ, but will not trigger a call to gic_set_affinity() and in this case nothing will map the interrupt to a VPE, meaning that the interrupt is never received. Fix this by implementing the activate operation for the GIC device IRQ domain, using gic_shared_irq_domain_map() to map the interrupt to the correct pin of cpu 0. Fixes: c98c1822ee13 ("irqchip/mips-gic: Add device hierarchy domain") Signed-off-by: Paul Burton <paul.burton@imgtec.com> Cc: linux-mips@linux-mips.org Cc: Jason Cooper <jason@lakedaemon.net> Cc: Marc Zyngier <marc.zyngier@arm.com> Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20160819170715.27820-2-paul.burton@imgtec.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- drivers/irqchip/irq-mips-gic.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index 2e0f49992a982..83f498393a7f0 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -893,10 +893,17 @@ void gic_dev_domain_free(struct irq_domain *d, unsigned int virq, return; } +static void gic_dev_domain_activate(struct irq_domain *domain, + struct irq_data *d) +{ + gic_shared_irq_domain_map(domain, d->irq, d->hwirq, 0); +} + static struct irq_domain_ops gic_dev_domain_ops = { .xlate = gic_dev_domain_xlate, .alloc = gic_dev_domain_alloc, .free = gic_dev_domain_free, + .activate = gic_dev_domain_activate, }; static int gic_ipi_domain_xlate(struct irq_domain *d, struct device_node *ctrlr, From e6a00f6684c9f348cd782922b4b277c68ab90b63 Mon Sep 17 00:00:00 2001 From: Yuval Shaia <yuval.shaia@oracle.com> Date: Wed, 27 Jul 2016 01:24:52 -0700 Subject: [PATCH 402/513] IB/mlx4: Make function use_tunnel_data return void No need to return int if function always returns 0 Signed-off-by: Yuval Shaia <yuval.shaia@oracle.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/mlx4/cq.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index d6fc8a6e8c332..15b628996633f 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -576,8 +576,8 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum) checksum == cpu_to_be16(0xffff); } -static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc, - unsigned tail, struct mlx4_cqe *cqe, int is_eth) +static void use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc, + unsigned tail, struct mlx4_cqe *cqe, int is_eth) { struct mlx4_ib_proxy_sqp_hdr *hdr; @@ -600,8 +600,6 @@ static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32); wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12); } - - return 0; } static void mlx4_ib_qp_sw_comp(struct mlx4_ib_qp *qp, int num_entries, @@ -852,9 +850,11 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) { if ((*cur_qp)->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | - MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) - return use_tunnel_data(*cur_qp, cq, wc, tail, - cqe, is_eth); + MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) { + use_tunnel_data(*cur_qp, cq, wc, tail, cqe, + is_eth); + return 0; + } } wc->slid = be16_to_cpu(cqe->rlid); From 5412352fcd8fba7f278ae8c9ba36296716f17ae8 Mon Sep 17 00:00:00 2001 From: Yuval Shaia <yuval.shaia@oracle.com> Date: Wed, 27 Jul 2016 01:24:53 -0700 Subject: [PATCH 403/513] IB/mlx4: Return EAGAIN for any error in mlx4_ib_poll_one Error code EAGAIN should be used when errors are temporary and next call might succeeds. When error code other than EAGAIN is returned, the caller (mlx4_ib_poll) will assume all CQE in the same bunch are error too and will drop them all. Signed-off-by: Yuval Shaia <yuval.shaia@oracle.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/mlx4/cq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 15b628996633f..006db6436e3b2 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -690,7 +690,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == MLX4_OPCODE_NOP && is_send)) { pr_warn("Completion for NOP opcode detected!\n"); - return -EINVAL; + return -EAGAIN; } /* Resize CQ in progress */ @@ -721,7 +721,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, if (unlikely(!mqp)) { pr_warn("CQ %06x with entry for unknown QPN %06x\n", cq->mcq.cqn, be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_QPN_MASK); - return -EINVAL; + return -EAGAIN; } *cur_qp = to_mibqp(mqp); @@ -739,7 +739,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, if (unlikely(!msrq)) { pr_warn("CQ %06x with entry for unknown SRQN %06x\n", cq->mcq.cqn, srq_num); - return -EINVAL; + return -EAGAIN; } } From 8303f683b161467b6595c153c8751b80f9df3508 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk <tadeusz.struk@intel.com> Date: Wed, 3 Aug 2016 20:19:32 -0400 Subject: [PATCH 404/513] IB/hfi1: Allocate cpu mask on the heap to silence warning If CONFIG_FRAME_WARN is small (1K) and CONFIG_NR_CPUS big then a frame size warning is triggered during build. Allocate the cpu mask dynamically to silence the warning. Reviewed-by: Sebastian Sanchez <sebastian.sanchez@intel.com> Signed-off-by: Tadeusz Struk <tadeusz.struk@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/hfi1/affinity.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 79575ee873f21..9bbb214591661 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -682,7 +682,7 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, size_t count) { struct hfi1_affinity_node *entry; - struct cpumask mask; + cpumask_var_t mask; int ret, i; spin_lock(&node_affinity.lock); @@ -692,19 +692,24 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, if (!entry) return -EINVAL; - ret = cpulist_parse(buf, &mask); + ret = zalloc_cpumask_var(&mask, GFP_KERNEL); + if (!ret) + return -ENOMEM; + + ret = cpulist_parse(buf, mask); if (ret) - return ret; + goto out; - if (!cpumask_subset(&mask, cpu_online_mask) || cpumask_empty(&mask)) { + if (!cpumask_subset(mask, cpu_online_mask) || cpumask_empty(mask)) { dd_dev_warn(dd, "Invalid CPU mask\n"); - return -EINVAL; + ret = -EINVAL; + goto out; } mutex_lock(&sdma_affinity_mutex); /* reset the SDMA interrupt affinity details */ init_cpu_mask_set(&entry->def_intr); - cpumask_copy(&entry->def_intr.mask, &mask); + cpumask_copy(&entry->def_intr.mask, mask); /* * Reassign the affinity for each SDMA interrupt. */ @@ -720,8 +725,9 @@ int hfi1_set_sdma_affinity(struct hfi1_devdata *dd, const char *buf, if (ret) break; } - mutex_unlock(&sdma_affinity_mutex); +out: + free_cpumask_var(mask); return ret ? ret : strnlen(buf, PAGE_SIZE); } From 23d70503ee187819a3775c7ac73f17c5bfe3fad0 Mon Sep 17 00:00:00 2001 From: Wei Yongjun <weiyj.lk@gmail.com> Date: Fri, 5 Aug 2016 13:46:49 +0000 Subject: [PATCH 405/513] IB/core: Fix possible memory leak in cma_resolve_iboe_route() 'work' and 'route->path_rec' are malloced in cma_resolve_iboe_route() and should be freed before leaving from the error handling cases, otherwise it will cause memory leak. Fixes: 200298326b27 ('IB/core: Validate route when we init ah') Signed-off-by: Wei Yongjun <weiyj.lk@gmail.com> Reviewed-by: Haggai Eran <haggaie@mellanox.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/core/cma.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e6dfa1bd3defa..5f65a78b27c9c 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2462,18 +2462,24 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) if (addr->dev_addr.bound_dev_if) { ndev = dev_get_by_index(&init_net, addr->dev_addr.bound_dev_if); - if (!ndev) - return -ENODEV; + if (!ndev) { + ret = -ENODEV; + goto err2; + } if (ndev->flags & IFF_LOOPBACK) { dev_put(ndev); - if (!id_priv->id.device->get_netdev) - return -EOPNOTSUPP; + if (!id_priv->id.device->get_netdev) { + ret = -EOPNOTSUPP; + goto err2; + } ndev = id_priv->id.device->get_netdev(id_priv->id.device, id_priv->id.port_num); - if (!ndev) - return -ENODEV; + if (!ndev) { + ret = -ENODEV; + goto err2; + } } route->path_rec->net = &init_net; From 1d5840c971455ad4ecece3f72012961cac8d0f00 Mon Sep 17 00:00:00 2001 From: Wei Yongjun <weiyj.lk@gmail.com> Date: Sun, 7 Aug 2016 12:20:38 +0000 Subject: [PATCH 406/513] IB/isert: fix error return code in isert_alloc_login_buf() Fix to return error code -ENOMEM from the alloc error handling case instead of 0, as done elsewhere in this function. Signed-off-by: Wei Yongjun <weiyj.lk@gmail.com> Acked-by: Sagi Grimberg <sagi@grimberg.me> Reviewed-by: Leon Romanovsky <leonro@mellanox.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/ulp/isert/ib_isert.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index ba6be060a476b..7914c14478cd1 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -448,7 +448,7 @@ isert_alloc_login_buf(struct isert_conn *isert_conn, isert_conn->login_rsp_buf = kzalloc(ISER_RX_PAYLOAD_SIZE, GFP_KERNEL); if (!isert_conn->login_rsp_buf) { - isert_err("Unable to allocate isert_conn->login_rspbuf\n"); + ret = -ENOMEM; goto out_unmap_login_req_buf; } From abb658ef0529488cfa75e79a2a9f894cb95eaec3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun <weiyj.lk@gmail.com> Date: Mon, 8 Aug 2016 09:49:47 +0000 Subject: [PATCH 407/513] IB/hfi1: Remove duplicated include from affinity.c Remove duplicated include. Signed-off-by: Wei Yongjun <weiyj.lk@gmail.com> Reviewed-by: Leon Romanovsky <leonro@mellanox.com> Reviewed-by: Ira Weiny <ira.weiny@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/hfi1/affinity.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 9bbb214591661..0566393e5aba6 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -47,7 +47,6 @@ #include <linux/topology.h> #include <linux/cpumask.h> #include <linux/module.h> -#include <linux/cpumask.h> #include "hfi.h" #include "affinity.h" From c62fb260a86dde3df5b2905432caa0e9f6898434 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn <mike.marciniszyn@intel.com> Date: Fri, 12 Aug 2016 11:17:37 -0400 Subject: [PATCH 408/513] IB/hfi1,IB/qib: Fix qp_stats sleep with rcu read lock held The qp init function does a kzalloc() while holding the RCU lock that encounters the following warning with a debug kernel when a cat of the qp_stats is done: [ 231.723948] rcu_scheduler_active = 1, debug_locks = 0 [ 231.731939] 3 locks held by cat/11355: [ 231.736492] #0: (debugfs_srcu){......}, at: [<ffffffff813001a5>] debugfs_use_file_start+0x5/0x90 [ 231.746955] #1: (&p->lock){+.+.+.}, at: [<ffffffff81289a6c>] seq_read+0x4c/0x3c0 [ 231.755873] #2: (rcu_read_lock){......}, at: [<ffffffffa0a0c535>] _qp_stats_seq_start+0x5/0xd0 [hfi1] [ 231.766862] The init functions do an implicit next which requires the rcu read lock before the kzalloc(). Fix for both drivers is to change the scope of the init function to only do the allocation and the initialization of the just allocated iter. The implict next is moved back into the respective start functions to fix the issue. Signed-off-by: Ira Weiny <ira.weiny@intel.com> Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com> CC: <stable@vger.kernel.org> # 4.6.x- Reviewed-by: Leon Romanovsky <leonro@mellanox.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/hfi1/debugfs.c | 14 +++++++++----- drivers/infiniband/hw/hfi1/qp.c | 4 ---- drivers/infiniband/hw/qib/qib_debugfs.c | 12 +++++++++--- drivers/infiniband/hw/qib/qib_qp.c | 4 ---- 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/debugfs.c b/drivers/infiniband/hw/hfi1/debugfs.c index dbab9d9cc288c..a49cc88f08a2a 100644 --- a/drivers/infiniband/hw/hfi1/debugfs.c +++ b/drivers/infiniband/hw/hfi1/debugfs.c @@ -223,28 +223,32 @@ DEBUGFS_SEQ_FILE_OPEN(ctx_stats) DEBUGFS_FILE_OPS(ctx_stats); static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos) -__acquires(RCU) + __acquires(RCU) { struct qp_iter *iter; loff_t n = *pos; - rcu_read_lock(); iter = qp_iter_init(s->private); + + /* stop calls rcu_read_unlock */ + rcu_read_lock(); + if (!iter) return NULL; - while (n--) { + do { if (qp_iter_next(iter)) { kfree(iter); return NULL; } - } + } while (n--); return iter; } static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, loff_t *pos) + __must_hold(RCU) { struct qp_iter *iter = iter_ptr; @@ -259,7 +263,7 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, } static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr) -__releases(RCU) + __releases(RCU) { rcu_read_unlock(); } diff --git a/drivers/infiniband/hw/hfi1/qp.c b/drivers/infiniband/hw/hfi1/qp.c index a5aa3517e7d5c..4e4d8317c281c 100644 --- a/drivers/infiniband/hw/hfi1/qp.c +++ b/drivers/infiniband/hw/hfi1/qp.c @@ -656,10 +656,6 @@ struct qp_iter *qp_iter_init(struct hfi1_ibdev *dev) iter->dev = dev; iter->specials = dev->rdi.ibdev.phys_port_cnt * 2; - if (qp_iter_next(iter)) { - kfree(iter); - return NULL; - } return iter; } diff --git a/drivers/infiniband/hw/qib/qib_debugfs.c b/drivers/infiniband/hw/qib/qib_debugfs.c index 5e75b43c596b6..5bad8e3b40bb3 100644 --- a/drivers/infiniband/hw/qib/qib_debugfs.c +++ b/drivers/infiniband/hw/qib/qib_debugfs.c @@ -189,27 +189,32 @@ static int _ctx_stats_seq_show(struct seq_file *s, void *v) DEBUGFS_FILE(ctx_stats) static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos) + __acquires(RCU) { struct qib_qp_iter *iter; loff_t n = *pos; - rcu_read_lock(); iter = qib_qp_iter_init(s->private); + + /* stop calls rcu_read_unlock */ + rcu_read_lock(); + if (!iter) return NULL; - while (n--) { + do { if (qib_qp_iter_next(iter)) { kfree(iter); return NULL; } - } + } while (n--); return iter; } static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, loff_t *pos) + __must_hold(RCU) { struct qib_qp_iter *iter = iter_ptr; @@ -224,6 +229,7 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr, } static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr) + __releases(RCU) { rcu_read_unlock(); } diff --git a/drivers/infiniband/hw/qib/qib_qp.c b/drivers/infiniband/hw/qib/qib_qp.c index 9cc0aae1d7819..f9b8cd2354d1b 100644 --- a/drivers/infiniband/hw/qib/qib_qp.c +++ b/drivers/infiniband/hw/qib/qib_qp.c @@ -573,10 +573,6 @@ struct qib_qp_iter *qib_qp_iter_init(struct qib_ibdev *dev) return NULL; iter->dev = dev; - if (qib_qp_iter_next(iter)) { - kfree(iter); - return NULL; - } return iter; } From 140690eae7626a820ea135f33ff9fe11c1bcee6d Mon Sep 17 00:00:00 2001 From: Easwar Hariharan <easwar.hariharan@intel.com> Date: Tue, 9 Aug 2016 11:17:18 -0400 Subject: [PATCH 409/513] IB/hfi1: Fetch monitor values on-demand for CableInfo query The monitor values from bytes 22 through 81 of the QSFP memory space (SFF 8636) are dynamic and serving them out of the QSFP memory cache maintained by the driver provides stale data to the CableInfo SMA query. This patch refreshes the dynamic values from the QSFP memory on request and overwrites the stale data from the cache for the overlap between the requested range and the monitor range. Reviewed-by: Jubin John <jubin.john@intel.com> Reviewed-by: Ira Weiny <ira.weiny@intel.com> Signed-off-by: Easwar Hariharan <easwar.hariharan@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/hfi1/qsfp.c | 32 +++++++++++++++++++++++++++++-- drivers/infiniband/hw/hfi1/qsfp.h | 3 +++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c index a207717ade2aa..4e95ad810847a 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.c +++ b/drivers/infiniband/hw/hfi1/qsfp.c @@ -706,8 +706,8 @@ int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, u32 len, u8 *data) { struct hfi1_pportdata *ppd; - u32 excess_len = 0; - int ret = 0; + u32 excess_len = len; + int ret = 0, offset = 0; if (port_num > dd->num_pports || port_num < 1) { dd_dev_info(dd, "%s: Invalid port number %d\n", @@ -740,6 +740,34 @@ int get_cable_info(struct hfi1_devdata *dd, u32 port_num, u32 addr, u32 len, } memcpy(data, &ppd->qsfp_info.cache[addr], len); + + if (addr <= QSFP_MONITOR_VAL_END && + (addr + len) >= QSFP_MONITOR_VAL_START) { + /* Overlap with the dynamic channel monitor range */ + if (addr < QSFP_MONITOR_VAL_START) { + if (addr + len <= QSFP_MONITOR_VAL_END) + len = addr + len - QSFP_MONITOR_VAL_START; + else + len = QSFP_MONITOR_RANGE; + offset = QSFP_MONITOR_VAL_START - addr; + addr = QSFP_MONITOR_VAL_START; + } else if (addr == QSFP_MONITOR_VAL_START) { + offset = 0; + if (addr + len > QSFP_MONITOR_VAL_END) + len = QSFP_MONITOR_RANGE; + } else { + offset = 0; + if (addr + len > QSFP_MONITOR_VAL_END) + len = QSFP_MONITOR_VAL_END - addr + 1; + } + /* Refresh the values of the dynamic monitors from the cable */ + ret = one_qsfp_read(ppd, dd->hfi1_id, addr, data + offset, len); + if (ret != len) { + ret = -EAGAIN; + goto set_zeroes; + } + } + return 0; set_zeroes: diff --git a/drivers/infiniband/hw/hfi1/qsfp.h b/drivers/infiniband/hw/hfi1/qsfp.h index 69275ebd95973..36cf52359848a 100644 --- a/drivers/infiniband/hw/hfi1/qsfp.h +++ b/drivers/infiniband/hw/hfi1/qsfp.h @@ -74,6 +74,9 @@ /* Defined fields that Intel requires of qualified cables */ /* Byte 0 is Identifier, not checked */ /* Byte 1 is reserved "status MSB" */ +#define QSFP_MONITOR_VAL_START 22 +#define QSFP_MONITOR_VAL_END 81 +#define QSFP_MONITOR_RANGE (QSFP_MONITOR_VAL_END - QSFP_MONITOR_VAL_START + 1) #define QSFP_TX_CTRL_BYTE_OFFS 86 #define QSFP_PWR_CTRL_BYTE_OFFS 93 #define QSFP_CDR_CTRL_BYTE_OFFS 98 From c867caaf8ecd5498d399558d0f11825f175c6cdd Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn <mike.marciniszyn@intel.com> Date: Tue, 9 Aug 2016 11:19:55 -0400 Subject: [PATCH 410/513] IB/hfi1: Pass packet ptr to set_armed_active The "packet" parameter was being passed on the stack, change it to a pointer. Reviewed-by: Don Hiatt <don.hiatt@intel.com> Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/hfi1/driver.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 8246dc7d0573a..c4156870a8bb6 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -888,14 +888,14 @@ void set_all_slowpath(struct hfi1_devdata *dd) } static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, - struct hfi1_packet packet, + struct hfi1_packet *packet, struct hfi1_devdata *dd) { struct work_struct *lsaw = &rcd->ppd->linkstate_active_work; - struct hfi1_message_header *hdr = hfi1_get_msgheader(packet.rcd->dd, - packet.rhf_addr); + struct hfi1_message_header *hdr = hfi1_get_msgheader(packet->rcd->dd, + packet->rhf_addr); - if (hdr2sc(hdr, packet.rhf) != 0xf) { + if (hdr2sc(hdr, packet->rhf) != 0xf) { int hwstate = read_logical_state(dd); if (hwstate != LSTATE_ACTIVE) { @@ -979,7 +979,7 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread) /* Auto activate link on non-SC15 packet receive */ if (unlikely(rcd->ppd->host_link_state == HLS_UP_ARMED) && - set_armed_to_active(rcd, packet, dd)) + set_armed_to_active(rcd, &packet, dd)) goto bail; last = process_rcv_packet(&packet, thread); } From 69b9f4a4233b6f9ea168c25123b9eb629739d8e5 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn <mike.marciniszyn@intel.com> Date: Tue, 9 Aug 2016 11:19:56 -0400 Subject: [PATCH 411/513] IB/hfi1: Validate header in set_armed_active Validate the etype to insure that the header is correct. Reviewed-by: Don Hiatt <don.hiatt@intel.com> Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/hfi1/driver.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index c4156870a8bb6..303f105557297 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -894,8 +894,9 @@ static inline int set_armed_to_active(struct hfi1_ctxtdata *rcd, struct work_struct *lsaw = &rcd->ppd->linkstate_active_work; struct hfi1_message_header *hdr = hfi1_get_msgheader(packet->rcd->dd, packet->rhf_addr); + u8 etype = rhf_rcv_type(packet->rhf); - if (hdr2sc(hdr, packet->rhf) != 0xf) { + if (etype == RHF_RCV_TYPE_IB && hdr2sc(hdr, packet->rhf) != 0xf) { int hwstate = read_logical_state(dd); if (hwstate != LSTATE_ACTIVE) { From 476d95bd02240894806ebe64f1c4dcf6dbba87f4 Mon Sep 17 00:00:00 2001 From: Wei Yongjun <weiyj.lk@gmail.com> Date: Wed, 10 Aug 2016 03:14:04 +0000 Subject: [PATCH 412/513] IB/hfi1: Using kfree_rcu() to simplify the code The callback function of call_rcu() just calls a kfree(), so we can use kfree_rcu() instead of call_rcu() + callback function. Signed-off-by: Wei Yongjun <weiyj.lk@gmail.com> Tested-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Acked-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Tested-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Acked-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/hfi1/hfi.h | 1 - drivers/infiniband/hw/hfi1/init.c | 2 +- drivers/infiniband/hw/hfi1/mad.c | 9 +-------- 3 files changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index 1000e0fd96d9b..f41414ef0cbc1 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1656,7 +1656,6 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd) struct hfi1_devdata *hfi1_init_dd(struct pci_dev *, const struct pci_device_id *); void hfi1_free_devdata(struct hfi1_devdata *); -void cc_state_reclaim(struct rcu_head *rcu); struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev, size_t extra); /* LED beaconing functions */ diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c index a358d23ecd54d..b7935451093c6 100644 --- a/drivers/infiniband/hw/hfi1/init.c +++ b/drivers/infiniband/hw/hfi1/init.c @@ -1333,7 +1333,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd) spin_unlock(&ppd->cc_state_lock); if (cc_state) - call_rcu(&cc_state->rcu, cc_state_reclaim); + kfree_rcu(cc_state, rcu); } free_credit_return(dd); diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 1263abe01999e..95c43e1e0ce5f 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -3398,7 +3398,7 @@ static void apply_cc_state(struct hfi1_pportdata *ppd) spin_unlock(&ppd->cc_state_lock); - call_rcu(&old_cc_state->rcu, cc_state_reclaim); + kfree_rcu(old_cc_state, rcu); } static int __subn_set_opa_cong_setting(struct opa_smp *smp, u32 am, u8 *data, @@ -3553,13 +3553,6 @@ static int __subn_get_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, return reply((struct ib_mad_hdr *)smp); } -void cc_state_reclaim(struct rcu_head *rcu) -{ - struct cc_state *cc_state = container_of(rcu, struct cc_state, rcu); - - kfree(cc_state); -} - static int __subn_set_opa_cc_table(struct opa_smp *smp, u32 am, u8 *data, struct ib_device *ibdev, u8 port, u32 *resp_len) From 57bb562ad4651453c9f20d506a65e46e4d11042f Mon Sep 17 00:00:00 2001 From: Christophe Jaillet <christophe.jaillet@wanadoo.fr> Date: Wed, 10 Aug 2016 07:34:27 +0200 Subject: [PATCH 413/513] IB/hfi1: Add missing error code assignment before test It is likely that checking the result of 'setup_ctxt' is expected here. Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr> Acked-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/hfi1/file_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 1ecbec1923589..ed76be39b20b8 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -222,7 +222,7 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd, ret = assign_ctxt(fp, &uinfo); if (ret < 0) return ret; - setup_ctxt(fp); + ret = setup_ctxt(fp); if (ret) return ret; ret = user_init(fp); From 86cd747c6dd6cdbc825e36ad5f0029f3c5a37776 Mon Sep 17 00:00:00 2001 From: Christophe Jaillet <christophe.jaillet@wanadoo.fr> Date: Wed, 10 Aug 2016 17:45:01 +0200 Subject: [PATCH 414/513] IB/usnic: Fix error return code If 'pci_register_driver' fails, we return 'err' which is known to be 0. Return the error instead. Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/usnic/usnic_ib_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index c229b9f4a52da..0a89a955550b2 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -664,7 +664,8 @@ static int __init usnic_ib_init(void) return err; } - if (pci_register_driver(&usnic_ib_pci_driver)) { + err = pci_register_driver(&usnic_ib_pci_driver); + if (err) { usnic_err("Unable to register with PCI\n"); goto out_umem_fini; } From 701b4bf6e3eeff1a856d6889e1ebb35edd6c019a Mon Sep 17 00:00:00 2001 From: Leon Romanovsky <leon@kernel.org> Date: Tue, 16 Aug 2016 12:29:46 +0300 Subject: [PATCH 415/513] MAINTAINERS: Fix Soft RoCE location The Soft RoCE (rxe) is located in drivers/inifiniband/sw and not in drivers/infiniband/hw/. This patch fixes it. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Leon Romanovsky <leon@kernel.org> Signed-off-by: Doug Ledford <dledford@redhat.com> --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 0bbe4b105c346..fc2613d45504d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7661,7 +7661,7 @@ L: linux-rdma@vger.kernel.org S: Supported W: https://github.com/SoftRoCE/rxe-dev/wiki/rxe-dev:-Home Q: http://patchwork.kernel.org/project/linux-rdma/list/ -F: drivers/infiniband/hw/rxe/ +F: drivers/infiniband/sw/rxe/ F: include/uapi/rdma/rdma_user_rxe.h MEMBARRIER SUPPORT From f29a08dc145e05de6a57f0aeaba6020464f80e15 Mon Sep 17 00:00:00 2001 From: Easwar Hariharan <easwar.hariharan@intel.com> Date: Tue, 16 Aug 2016 13:25:34 -0700 Subject: [PATCH 416/513] IB/hfi1: Return invalid field for non-QSFP CableInfo queries The driver does not check if the CableInfo query is supported for the port type. Return early if CableInfo is not supported for the port type, making compliance with the specification explicit and preventing lower level code from potentially doing the wrong thing if the query is not supported for the hardware implementation. Reviewed-by: Ira Weiny <ira.weiny@intel.com> Signed-off-by: Easwar Hariharan <easwar.hariharan@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/hfi1/mad.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 95c43e1e0ce5f..39e42c373a01c 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -1819,6 +1819,11 @@ static int __subn_get_opa_cable_info(struct opa_smp *smp, u32 am, u8 *data, u32 len = OPA_AM_CI_LEN(am) + 1; int ret; + if (dd->pport->port_type != PORT_TYPE_QSFP) { + smp->status |= IB_SMP_INVALID_FIELD; + return reply((struct ib_mad_hdr *)smp); + } + #define __CI_PAGE_SIZE BIT(7) /* 128 bytes */ #define __CI_PAGE_MASK ~(__CI_PAGE_SIZE - 1) #define __CI_PAGE_NUM(a) ((a) & __CI_PAGE_MASK) From 08fe16f6192bccd5798e9b60461f7aa151b34cd4 Mon Sep 17 00:00:00 2001 From: Mitko Haralanov <mitko.haralanov@intel.com> Date: Tue, 16 Aug 2016 13:26:12 -0700 Subject: [PATCH 417/513] IB/hfi1: Improve J_KEY generation Previously, J_KEY generation was based on the lower 16 bits of the user's UID. While this works, it was not good enough as a non-root user could collide with a root user given a sufficiently large UID. This patch attempt to improve the J_KEY generation by using the following algorithm: The 16 bit J_KEY space is partitioned into 3 separate spaces reserved for different user classes: * all users with administtor privileges (including 'root') will use J_KEYs in the range of 0 to 31, * all kernel protocols, which use KDETH packets will use J_KEYs in the range of 32 to 63, and * all other users will use J_KEYs in the range of 64 to 65535. The above separation is aimed at preventing different user levels from sending packets to each other and, additionally, separate kernel protocols from all other types of users. The later is meant to prevent the potential corruption of kernel memory by any other type of user. Reviewed-by: Ira Weiny <ira.weiny@intel.com> Signed-off-by: Mitko Haralanov <mitko.haralanov@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/hfi1/hfi.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hfi1/hfi.h b/drivers/infiniband/hw/hfi1/hfi.h index f41414ef0cbc1..a021e660d482d 100644 --- a/drivers/infiniband/hw/hfi1/hfi.h +++ b/drivers/infiniband/hw/hfi1/hfi.h @@ -1272,9 +1272,26 @@ static inline int hdr2sc(struct hfi1_message_header *hdr, u64 rhf) ((!!(rhf_dc_info(rhf))) << 4); } +#define HFI1_JKEY_WIDTH 16 +#define HFI1_JKEY_MASK (BIT(16) - 1) +#define HFI1_ADMIN_JKEY_RANGE 32 + +/* + * J_KEYs are split and allocated in the following groups: + * 0 - 31 - users with administrator privileges + * 32 - 63 - kernel protocols using KDETH packets + * 64 - 65535 - all other users using KDETH packets + */ static inline u16 generate_jkey(kuid_t uid) { - return from_kuid(current_user_ns(), uid) & 0xffff; + u16 jkey = from_kuid(current_user_ns(), uid) & HFI1_JKEY_MASK; + + if (capable(CAP_SYS_ADMIN)) + jkey &= HFI1_ADMIN_JKEY_RANGE - 1; + else if (jkey < 64) + jkey |= BIT(HFI1_JKEY_WIDTH - 1); + + return jkey; } /* From 56c8ca510de2884b2f741e5fd8d3df6161378de6 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn <mike.marciniszyn@intel.com> Date: Tue, 16 Aug 2016 13:26:29 -0700 Subject: [PATCH 418/513] IB/rdmvat: Fix double vfree() in rvt_create_qp() error path The unwind logic for creating a user QP has a double vfree of the non-shared receive queue when handling a "too many qps" failure. The code unwinds the mmmap info by decrementing a reference count which will call rvt_release_mmap_info() which in turn does the vfree() of the r_rq.wq. The unwind code then does the same free. Fix by guarding the vfree() with the same test that is done in close and only do the vfree() if qp->ip is NULL. Reviewed-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Mike Marciniszyn <mike.marciniszyn@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/sw/rdmavt/qp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c index bdb540f25a888..870b4f212fbcf 100644 --- a/drivers/infiniband/sw/rdmavt/qp.c +++ b/drivers/infiniband/sw/rdmavt/qp.c @@ -873,7 +873,8 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd, free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num); bail_rq_wq: - vfree(qp->r_rq.wq); + if (!qp->ip) + vfree(qp->r_rq.wq); bail_driver_priv: rdi->driver_f.qp_priv_free(rdi, qp); From e0cf75deab8155334c8228eb7f097b15127d0a49 Mon Sep 17 00:00:00 2001 From: Ira Weiny <ira.weiny@intel.com> Date: Tue, 16 Aug 2016 13:27:03 -0700 Subject: [PATCH 419/513] IB/hfi1: Fix mm_struct use after free Testing with CONFIG_SLUB_DEBUG_ON=y resulted in the kernel panic below. This is the result of the mm_struct sometimes being free'd prior to hfi1_file_close being called. This was due to the combination of 2 reasons: 1) hfi1_file_close is deferred in process exit and it therefore may not be called synchronously with process exit. 2) exit_mm is called prior to exit_files in do_exit. Normally this is ok however, our kernel bypass code requires us to have access to the mm_struct for house keeping both at "normal" close time as well as at process exit. Therefore, the fix is to simply keep a reference to the mm_struct until we are done with it. [ 3006.340150] general protection fault: 0000 [#1] SMP [ 3006.346469] Modules linked in: hfi1 rdmavt rpcrdma ib_isert iscsi_target_mod ib_iser libiscsi scsi_transport_iscsi ib_srpt target_core_mod ib_srp scsi_transport_srp ib_ipoib rdma_ucm ib_ucm ib_uverbs ib_umad rdma_cm ib_cm iw_cm dm_mirror dm_region_hash dm_log dm_mod snd_hda_code c_realtek iTCO_wdt snd_hda_codec_generic iTCO_vendor_support sb_edac edac_core x86_pkg_temp_thermal intel_powerclamp coretemp kvm irqbypass c rct10dif_pclmul crc32_pclmul ghash_clmulni_intel aesni_intel lrw snd_hda_intel gf128mul snd_hda_codec glue_helper snd_hda_core ablk_helper sn d_hwdep cryptd snd_seq snd_seq_device snd_pcm snd_timer snd soundcore pcspkr shpchp mei_me sg lpc_ich mei i2c_i801 mfd_core ioatdma ipmi_devi ntf wmi ipmi_si ipmi_msghandler acpi_cpufreq nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables ext4 jbd2 mbcache mlx4_en ib_core sr_mod s d_mod cdrom crc32c_intel mgag200 drm_kms_helper syscopyarea sysfillrect igb sysimgblt fb_sys_fops ptp mlx4_core ttm isci pps_core ahci drm li bsas libahci dca firewire_ohci i2c_algo_bit scsi_transport_sas firewire_core crc_itu_t i2c_core libata [last unloaded: mlx4_ib] [ 3006.461759] CPU: 16 PID: 11624 Comm: mpi_stress Not tainted 4.7.0-rc5+ #1 [ 3006.469915] Hardware name: Intel Corporation W2600CR ........../W2600CR, BIOS SE5C600.86B.01.08.0003.022620131521 02/26/2013 [ 3006.483027] task: ffff8804102f0040 ti: ffff8804102f8000 task.ti: ffff8804102f8000 [ 3006.491971] RIP: 0010:[<ffffffff810f0383>] [<ffffffff810f0383>] __lock_acquire+0xb3/0x19e0 [ 3006.501905] RSP: 0018:ffff8804102fb908 EFLAGS: 00010002 [ 3006.508447] RAX: 6b6b6b6b6b6b6b6b RBX: 0000000000000001 RCX: 0000000000000000 [ 3006.517012] RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff880410b56a40 [ 3006.525569] RBP: ffff8804102fb9b0 R08: 0000000000000001 R09: 0000000000000000 [ 3006.534119] R10: ffff8804102f0040 R11: 0000000000000000 R12: 0000000000000000 [ 3006.542664] R13: ffff880410b56a40 R14: 0000000000000000 R15: 0000000000000000 [ 3006.551203] FS: 00007ff478c08700(0000) GS:ffff88042e200000(0000) knlGS:0000000000000000 [ 3006.560814] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3006.567806] CR2: 00007f667f5109e0 CR3: 0000000001c06000 CR4: 00000000000406e0 [ 3006.576352] Stack: [ 3006.579157] ffffffff8124b819 ffffffffffffffff 0000000000000000 ffff8804102fb940 [ 3006.588072] 0000000000000002 0000000000000000 ffff8804102f0040 0000000000000007 [ 3006.596971] 0000000000000006 ffff8803cad6f000 0000000000000000 ffff8804102f0040 [ 3006.605878] Call Trace: [ 3006.609220] [<ffffffff8124b819>] ? uncharge_batch+0x109/0x250 [ 3006.616382] [<ffffffff810f2313>] lock_acquire+0xd3/0x220 [ 3006.623056] [<ffffffffa0a30bfc>] ? hfi1_release_user_pages+0x7c/0xa0 [hfi1] [ 3006.631593] [<ffffffff81775579>] down_write+0x49/0x80 [ 3006.638022] [<ffffffffa0a30bfc>] ? hfi1_release_user_pages+0x7c/0xa0 [hfi1] [ 3006.646569] [<ffffffffa0a30bfc>] hfi1_release_user_pages+0x7c/0xa0 [hfi1] [ 3006.654898] [<ffffffffa0a2efb6>] cacheless_tid_rb_remove+0x106/0x330 [hfi1] [ 3006.663417] [<ffffffff810efd36>] ? mark_held_locks+0x66/0x90 [ 3006.670498] [<ffffffff817771f6>] ? _raw_spin_unlock_irqrestore+0x36/0x60 [ 3006.678741] [<ffffffffa0a2f1ee>] tid_rb_remove+0xe/0x10 [hfi1] [ 3006.686010] [<ffffffffa0a0c5d5>] hfi1_mmu_rb_unregister+0xc5/0x100 [hfi1] [ 3006.694387] [<ffffffffa0a2fcb9>] hfi1_user_exp_rcv_free+0x39/0x120 [hfi1] [ 3006.702732] [<ffffffffa09fc6ea>] hfi1_file_close+0x17a/0x330 [hfi1] [ 3006.710489] [<ffffffff81263e9a>] __fput+0xfa/0x230 [ 3006.716595] [<ffffffff8126400e>] ____fput+0xe/0x10 [ 3006.722696] [<ffffffff810b95c6>] task_work_run+0x86/0xc0 [ 3006.729379] [<ffffffff81099933>] do_exit+0x323/0xc40 [ 3006.735672] [<ffffffff8109a2dc>] do_group_exit+0x4c/0xc0 [ 3006.742371] [<ffffffff810a7f55>] get_signal+0x345/0x940 [ 3006.748958] [<ffffffff810340c7>] do_signal+0x37/0x700 [ 3006.755328] [<ffffffff8127872a>] ? poll_select_set_timeout+0x5a/0x90 [ 3006.763146] [<ffffffff811609cb>] ? __audit_syscall_exit+0x1db/0x260 [ 3006.770853] [<ffffffff8110f3e3>] ? rcu_read_lock_sched_held+0x93/0xa0 [ 3006.778765] [<ffffffff812347a4>] ? kfree+0x1e4/0x2a0 [ 3006.784986] [<ffffffff8108e75a>] ? exit_to_usermode_loop+0x33/0xac [ 3006.792551] [<ffffffff8108e785>] exit_to_usermode_loop+0x5e/0xac [ 3006.799907] [<ffffffff81003dca>] do_syscall_64+0x12a/0x190 [ 3006.806664] [<ffffffff81777a7f>] entry_SYSCALL64_slow_path+0x25/0x25 [ 3006.814396] Code: 24 08 44 89 44 24 10 89 4c 24 18 e8 a8 d8 ff ff 48 85 c0 8b 4c 24 18 44 8b 44 24 10 44 8b 4c 24 08 4c 8b 14 24 0f 84 30 08 00 00 <f0> ff 80 98 01 00 00 8b 3d 48 ad be 01 45 8b a2 90 0b 00 00 85 [ 3006.837158] RIP [<ffffffff810f0383>] __lock_acquire+0xb3/0x19e0 [ 3006.844401] RSP <ffff8804102fb908> [ 3006.851170] ---[ end trace b7b9f21cf06c27df ]--- [ 3006.927420] Kernel panic - not syncing: Fatal exception [ 3006.933954] Kernel Offset: disabled [ 3006.940961] ---[ end Kernel panic - not syncing: Fatal exception [ 3006.948249] ------------[ cut here ]------------ Fixes: 3faa3d9a308e ("IB/hfi1: Make use of mm consistent") Reviewed-by: Dean Luick <dean.luick@intel.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Signed-off-by: Dennis Dalessandro <dennis.dalessandro@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/hfi1/file_ops.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index ed76be39b20b8..7e03ccd2554db 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -183,6 +183,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) if (fd) { fd->rec_cpu_num = -1; /* no cpu affinity by default */ fd->mm = current->mm; + atomic_inc(&fd->mm->mm_count); } fp->private_data = fd; @@ -779,6 +780,7 @@ static int hfi1_file_close(struct inode *inode, struct file *fp) mutex_unlock(&hfi1_mutex); hfi1_free_ctxtdata(dd, uctxt); done: + mmdrop(fdata->mm); kobject_put(&dd->kobj); kfree(fdata); return 0; From 7f446abf12d741f4e29f00c633cb5fa67c2eee71 Mon Sep 17 00:00:00 2001 From: Steve Wise <swise@opengridcomputing.com> Date: Fri, 19 Aug 2016 07:29:07 -0700 Subject: [PATCH 420/513] iw_cxgb4: limit IRD/ORD advertised to ULP by device max. The i40iw initiator sends an MPA-request with ird = 63, ord = 63. The cxgb4 responder sends a RST. Since the inbound ord=63 and it exceeds the max_ird/c4iw_max_read_depth (=32 default), chelsio decides to abort. Instead, cxgb4 should adjust the ord/ird down before presenting it to the ULP. Reported-by: Shiraz Saleem <shiraz.saleem@intel.com> Signed-off-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/cxgb4/cm.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 3aca7f6171b42..a698efc69257c 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -1827,8 +1827,12 @@ static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb) (ep->mpa_pkt + sizeof(*mpa)); ep->ird = ntohs(mpa_v2_params->ird) & MPA_V2_IRD_ORD_MASK; + ep->ird = min_t(u32, ep->ird, + cur_max_read_depth(ep->com.dev)); ep->ord = ntohs(mpa_v2_params->ord) & MPA_V2_IRD_ORD_MASK; + ep->ord = min_t(u32, ep->ord, + cur_max_read_depth(ep->com.dev)); PDBG("%s initiator ird %u ord %u\n", __func__, ep->ird, ep->ord); if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL) From 30b03b1528b60623986ee0b50ec0e5dab9094be8 Mon Sep 17 00:00:00 2001 From: Steve Wise <swise@opengridcomputing.com> Date: Fri, 19 Aug 2016 07:29:08 -0700 Subject: [PATCH 421/513] iw_cxgb4: use the MPA initiator's IRD if < our ORD The i40iw initiator sends an MPA-request with ird=16 and ord=16. The cxgb4 responder sends an MPA-reply with ord = 32 causing i40iw to terminate due to insufficient resources. The logic to reduce the ORD to <= peer's IRD was wrong. Reported-by: Shiraz Saleem <shiraz.saleem@intel.com> Signed-off-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/cxgb4/cm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index a698efc69257c..b6a953aed7e83 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3140,7 +3140,7 @@ int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) { if (conn_param->ord > ep->ird) { if (RELAXED_IRD_NEGOTIATION) { - ep->ord = ep->ird; + conn_param->ord = ep->ird; } else { ep->ird = conn_param->ird; ep->ord = conn_param->ord; From 5cab4d84780573afbf5077ae9c3f919b4f305f20 Mon Sep 17 00:00:00 2001 From: Hans de Goede <hdegoede@redhat.com> Date: Mon, 22 Aug 2016 13:49:59 -0700 Subject: [PATCH 422/513] Input: silead - use devm_gpiod_get The silead code is using devm_foo for everything (and does not free any resources). Except that it is using gpiod_get instead of devm_gpiod_get (but is not freeing the gpio_desc), change this to use devm_gpiod_get so that the gpio will be properly released. Signed-off-by: Hans de Goede <hdegoede@redhat.com> Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com> --- drivers/input/touchscreen/silead.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/touchscreen/silead.c b/drivers/input/touchscreen/silead.c index 7379fe153cf99..b2744a64e9331 100644 --- a/drivers/input/touchscreen/silead.c +++ b/drivers/input/touchscreen/silead.c @@ -464,7 +464,7 @@ static int silead_ts_probe(struct i2c_client *client, return -ENODEV; /* Power GPIO pin */ - data->gpio_power = gpiod_get_optional(dev, "power", GPIOD_OUT_LOW); + data->gpio_power = devm_gpiod_get_optional(dev, "power", GPIOD_OUT_LOW); if (IS_ERR(data->gpio_power)) { if (PTR_ERR(data->gpio_power) != -EPROBE_DEFER) dev_err(dev, "Shutdown GPIO request failed\n"); From fae16989be77b09bab86c79233e4b511ea769cea Mon Sep 17 00:00:00 2001 From: Masahiro Yamada <yamada.masahiro@socionext.com> Date: Mon, 22 Aug 2016 13:25:56 -0700 Subject: [PATCH 423/513] Input: tegra-kbc - fix inverted reset logic Commit fe6b0dfaba68 ("Input: tegra-kbc - use reset framework") accidentally converted _deassert to _assert, so there is no code to wake up this hardware. Fixes: fe6b0dfaba68 ("Input: tegra-kbc - use reset framework") Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com> Acked-by: Thierry Reding <treding@nvidia.com> Acked-by: Laxman Dewangan <ldewangan@nvidia.com> Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com> --- drivers/input/keyboard/tegra-kbc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c index 7d61439be5f27..0c07e1023a469 100644 --- a/drivers/input/keyboard/tegra-kbc.c +++ b/drivers/input/keyboard/tegra-kbc.c @@ -376,7 +376,7 @@ static int tegra_kbc_start(struct tegra_kbc *kbc) /* Reset the KBC controller to clear all previous status.*/ reset_control_assert(kbc->rst); udelay(100); - reset_control_assert(kbc->rst); + reset_control_deassert(kbc->rst); udelay(100); tegra_kbc_config_pins(kbc); From 3e29d6bb6433ebfa4e187b1164b80baf720d58c3 Mon Sep 17 00:00:00 2001 From: Andrew Duggan <aduggan@synaptics.com> Date: Mon, 22 Aug 2016 11:28:11 -0700 Subject: [PATCH 424/513] Input: synaptics-rmi4 - fix register descriptor subpacket map construction The map_offset variable is specific to the register and needs to be reset in the loop. Otherwise, subsequent register's subpacket maps will have their bits set at the wrong index. Signed-off-by: Andrew Duggan <aduggan@synaptics.com> Tested-by: Nitin Chaudhary <nitinchaudhary1289@gmail.com> Reviewed-by: Benjamin Tissoires <benjamin.tissoires@redhat.com> Cc: stable@vger.kernel.org Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com> --- drivers/input/rmi4/rmi_driver.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/input/rmi4/rmi_driver.c b/drivers/input/rmi4/rmi_driver.c index faa295ec4f313..c83bce89028b2 100644 --- a/drivers/input/rmi4/rmi_driver.c +++ b/drivers/input/rmi4/rmi_driver.c @@ -553,7 +553,6 @@ int rmi_read_register_desc(struct rmi_device *d, u16 addr, goto free_struct_buff; reg = find_first_bit(rdesc->presense_map, RMI_REG_DESC_PRESENSE_BITS); - map_offset = 0; for (i = 0; i < rdesc->num_registers; i++) { struct rmi_register_desc_item *item = &rdesc->registers[i]; int reg_size = struct_buf[offset]; @@ -576,6 +575,8 @@ int rmi_read_register_desc(struct rmi_device *d, u16 addr, item->reg = reg; item->reg_size = reg_size; + map_offset = 0; + do { for (b = 0; b < 7; b++) { if (struct_buf[offset] & (0x1 << b)) From e3a888a4bff0bef0b256d55c58bc32c99fb44ece Mon Sep 17 00:00:00 2001 From: Petr Cvek <petr.cvek@tul.cz> Date: Fri, 19 Aug 2016 10:14:29 -0700 Subject: [PATCH 425/513] Input: ads7846 - remove redundant regulator_disable call ADS7846 regulator is disabled twice in a row in ads7846_remove(). Valid one is in ads7846_disable(). Removing the ads7846 module causes warning about unbalanced disables. ... WARNING: CPU: 0 PID: 29269 at drivers/regulator/core.c:2251 _regulator_disable+0xf8/0x130 unbalanced disables for vads7846 CPU: 0 PID: 29269 Comm: rmmod Tainted: G D W 4.7.0+ #3 Hardware name: HTC Magician ... show_stack+0x10/0x14 __warn+0xd8/0x100 warn_slowpath_fmt+0x38/0x48 _regulator_disable+0xf8/0x130 regulator_disable+0x34/0x60 ads7846_remove+0x58/0xd4 [ads7846] spi_drv_remove+0x1c/0x34 __device_release_driver+0x84/0x114 driver_detach+0x8c/0x90 bus_remove_driver+0x5c/0xc8 SyS_delete_module+0x1a0/0x238 ret_fast_syscall+0x0/0x38 Signed-off-by: Petr Cvek <petr.cvek@tul.cz> Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com> --- drivers/input/touchscreen/ads7846.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index a61b2153ab8c2..1ce3ecbe37f89 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -1473,7 +1473,6 @@ static int ads7846_remove(struct spi_device *spi) ads784x_hwmon_unregister(spi, ts); - regulator_disable(ts->reg); regulator_put(ts->reg); if (!ts->get_pendown_state) { From 7329a655875a2f4bd6984fe8a7e00a6981e802f3 Mon Sep 17 00:00:00 2001 From: Eric Biggers <ebiggers@google.com> Date: Fri, 19 Aug 2016 12:15:22 -0700 Subject: [PATCH 426/513] usercopy: avoid potentially undefined behavior in pointer math check_bogus_address() checked for pointer overflow using this expression, where 'ptr' has type 'const void *': ptr + n < ptr Since pointer wraparound is undefined behavior, gcc at -O2 by default treats it like the following, which would not behave as intended: (long)n < 0 Fortunately, this doesn't currently happen for kernel code because kernel code is compiled with -fno-strict-overflow. But the expression should be fixed anyway to use well-defined integer arithmetic, since it could be treated differently by different compilers in the future or could be reported by tools checking for undefined behavior. Signed-off-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Kees Cook <keescook@chromium.org> --- mm/usercopy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/usercopy.c b/mm/usercopy.c index 8ebae91a6b551..82f81df2edcf9 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -124,7 +124,7 @@ static inline const char *check_kernel_text_object(const void *ptr, static inline const char *check_bogus_address(const void *ptr, unsigned long n) { /* Reject if object wraps past end of memory. */ - if (ptr + n < ptr) + if ((unsigned long)ptr + n < (unsigned long)ptr) return "<wrapped address>"; /* Reject if NULL or ZERO-allocation. */ From 94cd97af690dd9537818dc9841d0ec68bb1dd877 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf <jpoimboe@redhat.com> Date: Mon, 22 Aug 2016 11:53:59 -0500 Subject: [PATCH 427/513] usercopy: fix overlap check for kernel text When running with a local patch which moves the '_stext' symbol to the very beginning of the kernel text area, I got the following panic with CONFIG_HARDENED_USERCOPY: usercopy: kernel memory exposure attempt detected from ffff88103dfff000 (<linear kernel text>) (4096 bytes) ------------[ cut here ]------------ kernel BUG at mm/usercopy.c:79! invalid opcode: 0000 [#1] SMP ... CPU: 0 PID: 4800 Comm: cp Not tainted 4.8.0-rc3.after+ #1 Hardware name: Dell Inc. PowerEdge R720/0X3D66, BIOS 2.5.4 01/22/2016 task: ffff880817444140 task.stack: ffff880816274000 RIP: 0010:[<ffffffff8121c796>] __check_object_size+0x76/0x413 RSP: 0018:ffff880816277c40 EFLAGS: 00010246 RAX: 000000000000006b RBX: ffff88103dfff000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff88081f80dfa8 RDI: ffff88081f80dfa8 RBP: ffff880816277c90 R08: 000000000000054c R09: 0000000000000000 R10: 0000000000000005 R11: 0000000000000006 R12: 0000000000001000 R13: ffff88103e000000 R14: ffff88103dffffff R15: 0000000000000001 FS: 00007fb9d1750800(0000) GS:ffff88081f800000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000021d2000 CR3: 000000081a08f000 CR4: 00000000001406f0 Stack: ffff880816277cc8 0000000000010000 000000043de07000 0000000000000000 0000000000001000 ffff880816277e60 0000000000001000 ffff880816277e28 000000000000c000 0000000000001000 ffff880816277ce8 ffffffff8136c3a6 Call Trace: [<ffffffff8136c3a6>] copy_page_to_iter_iovec+0xa6/0x1c0 [<ffffffff8136e766>] copy_page_to_iter+0x16/0x90 [<ffffffff811970e3>] generic_file_read_iter+0x3e3/0x7c0 [<ffffffffa06a738d>] ? xfs_file_buffered_aio_write+0xad/0x260 [xfs] [<ffffffff816e6262>] ? down_read+0x12/0x40 [<ffffffffa06a61b1>] xfs_file_buffered_aio_read+0x51/0xc0 [xfs] [<ffffffffa06a6692>] xfs_file_read_iter+0x62/0xb0 [xfs] [<ffffffff812224cf>] __vfs_read+0xdf/0x130 [<ffffffff81222c9e>] vfs_read+0x8e/0x140 [<ffffffff81224195>] SyS_read+0x55/0xc0 [<ffffffff81003a47>] do_syscall_64+0x67/0x160 [<ffffffff816e8421>] entry_SYSCALL64_slow_path+0x25/0x25 RIP: 0033:[<00007fb9d0c33c00>] 0x7fb9d0c33c00 RSP: 002b:00007ffc9c262f28 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 RAX: ffffffffffffffda RBX: fffffffffff8ffff RCX: 00007fb9d0c33c00 RDX: 0000000000010000 RSI: 00000000021c3000 RDI: 0000000000000004 RBP: 00000000021c3000 R08: 0000000000000000 R09: 00007ffc9c264d6c R10: 00007ffc9c262c50 R11: 0000000000000246 R12: 0000000000010000 R13: 00007ffc9c2630b0 R14: 0000000000000004 R15: 0000000000010000 Code: 81 48 0f 44 d0 48 c7 c6 90 4d a3 81 48 c7 c0 bb b3 a2 81 48 0f 44 f0 4d 89 e1 48 89 d9 48 c7 c7 68 16 a3 81 31 c0 e8 f4 57 f7 ff <0f> 0b 48 8d 90 00 40 00 00 48 39 d3 0f 83 22 01 00 00 48 39 c3 RIP [<ffffffff8121c796>] __check_object_size+0x76/0x413 RSP <ffff880816277c40> The checked object's range [ffff88103dfff000, ffff88103e000000) is valid, so there shouldn't have been a BUG. The hardened usercopy code got confused because the range's ending address is the same as the kernel's text starting address at 0xffff88103e000000. The overlap check is slightly off. Fixes: f5509cc18daa ("mm: Hardened usercopy") Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> Signed-off-by: Kees Cook <keescook@chromium.org> --- mm/usercopy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/usercopy.c b/mm/usercopy.c index 82f81df2edcf9..a3cc3052f830a 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -83,7 +83,7 @@ static bool overlaps(const void *ptr, unsigned long n, unsigned long low, unsigned long check_high = check_low + n; /* Does not overlap if entirely above or entirely below. */ - if (check_low >= high || check_high < low) + if (check_low >= high || check_high <= low) return false; return true; From a8719670687c46ed2e904c0d05fa4cd7e4950cd1 Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi <peter.ujfalusi@ti.com> Date: Tue, 23 Aug 2016 10:27:19 +0300 Subject: [PATCH 428/513] ASoC: omap-mcpdm: Fix irq resource handling Fixes: ddd17531ad908 ("ASoC: omap-mcpdm: Clean up with devm_* function") Managed irq request will not doing any good in ASoC probe level as it is not going to free up the irq when the driver is unbound from the sound card. Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com> Reported-by: Russell King <linux@armlinux.org.uk> Signed-off-by: Mark Brown <broonie@kernel.org> --- sound/soc/omap/omap-mcpdm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sound/soc/omap/omap-mcpdm.c b/sound/soc/omap/omap-mcpdm.c index 74d6e6fdcfd0e..64609c77a79d1 100644 --- a/sound/soc/omap/omap-mcpdm.c +++ b/sound/soc/omap/omap-mcpdm.c @@ -394,8 +394,8 @@ static int omap_mcpdm_probe(struct snd_soc_dai *dai) pm_runtime_get_sync(mcpdm->dev); omap_mcpdm_write(mcpdm, MCPDM_REG_CTRL, 0x00); - ret = devm_request_irq(mcpdm->dev, mcpdm->irq, omap_mcpdm_irq_handler, - 0, "McPDM", (void *)mcpdm); + ret = request_irq(mcpdm->irq, omap_mcpdm_irq_handler, 0, "McPDM", + (void *)mcpdm); pm_runtime_put_sync(mcpdm->dev); @@ -420,6 +420,7 @@ static int omap_mcpdm_remove(struct snd_soc_dai *dai) { struct omap_mcpdm *mcpdm = snd_soc_dai_get_drvdata(dai); + free_irq(mcpdm->irq, (void *)mcpdm); pm_runtime_disable(mcpdm->dev); return 0; From a77ec83a57890240c546df00ca5df1cdeedb1cc3 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington <bcodding@redhat.com> Date: Mon, 6 Jun 2016 18:07:59 -0400 Subject: [PATCH 429/513] vhost/scsi: fix reuse of &vq->iov[out] in response The address of the iovec &vq->iov[out] is not guaranteed to contain the scsi command's response iovec throughout the lifetime of the command. Rather, it is more likely to contain an iovec from an immediately following command after looping back around to vhost_get_vq_desc(). Pass along the iovec entirely instead. Fixes: 79c14141a487 ("vhost/scsi: Convert completion path to use copy_to_iter") Cc: stable@vger.kernel.org Signed-off-by: Benjamin Coddington <bcodding@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> --- drivers/vhost/scsi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 9d6320e8ff3e0..6e29d053843d0 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -88,7 +88,7 @@ struct vhost_scsi_cmd { struct scatterlist *tvc_prot_sgl; struct page **tvc_upages; /* Pointer to response header iovec */ - struct iovec *tvc_resp_iov; + struct iovec tvc_resp_iov; /* Pointer to vhost_scsi for our device */ struct vhost_scsi *tvc_vhost; /* Pointer to vhost_virtqueue for the cmd */ @@ -547,7 +547,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) memcpy(v_rsp.sense, cmd->tvc_sense_buf, se_cmd->scsi_sense_length); - iov_iter_init(&iov_iter, READ, cmd->tvc_resp_iov, + iov_iter_init(&iov_iter, READ, &cmd->tvc_resp_iov, cmd->tvc_in_iovs, sizeof(v_rsp)); ret = copy_to_iter(&v_rsp, sizeof(v_rsp), &iov_iter); if (likely(ret == sizeof(v_rsp))) { @@ -1044,7 +1044,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq) } cmd->tvc_vhost = vs; cmd->tvc_vq = vq; - cmd->tvc_resp_iov = &vq->iov[out]; + cmd->tvc_resp_iov = vq->iov[out]; cmd->tvc_in_iovs = in; pr_debug("vhost_scsi got command opcode: %#02x, lun: %d\n", From 48ef5865d08fa0a36d786f2f8e12c6194d27538b Mon Sep 17 00:00:00 2001 From: Markus Elfring <elfring@users.sourceforge.net> Date: Fri, 19 Aug 2016 08:50:23 +0200 Subject: [PATCH 430/513] IB/qib: Use memdup_user() rather than duplicating its implementation Reuse existing functionality from memdup_user() instead of keeping duplicate source code. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring <elfring@users.sourceforge.net> Reviewed-by: Leon Romanovsky <leonro@mellanox.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/qib/qib_fs.c | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index fcdf37913a264..c3edc033f7c4c 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -328,26 +328,12 @@ static ssize_t flash_write(struct file *file, const char __user *buf, pos = *ppos; - if (pos != 0) { - ret = -EINVAL; - goto bail; - } - - if (count != sizeof(struct qib_flash)) { - ret = -EINVAL; - goto bail; - } - - tmp = kmalloc(count, GFP_KERNEL); - if (!tmp) { - ret = -ENOMEM; - goto bail; - } + if (pos != 0 || count != sizeof(struct qib_flash)) + return -EINVAL; - if (copy_from_user(tmp, buf, count)) { - ret = -EFAULT; - goto bail_tmp; - } + tmp = memdup_user(buf, count); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); dd = private2dd(file); if (qib_eeprom_write(dd, pos, tmp, count)) { @@ -361,8 +347,6 @@ static ssize_t flash_write(struct file *file, const char __user *buf, bail_tmp: kfree(tmp); - -bail: return ret; } From 92d27ae6b3bb3491c1685fb3ca7ae1b26d81bdf4 Mon Sep 17 00:00:00 2001 From: Markus Elfring <elfring@users.sourceforge.net> Date: Mon, 22 Aug 2016 18:23:24 +0200 Subject: [PATCH 431/513] IB/core: Use memdup_user() rather than duplicating its implementation * Reuse existing functionality from memdup_user() instead of keeping duplicate source code. This issue was detected by using the Coccinelle software. * The local variable "ret" will be set to an appropriate value a bit later. Thus omit the explicit initialisation at the beginning. Signed-off-by: Markus Elfring <elfring@users.sourceforge.net> Signed-off-by: Doug Ledford <dledford@redhat.com> --- include/rdma/ib_verbs.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 8e90dd28bb753..e1f96737c2a17 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2115,22 +2115,17 @@ static inline bool ib_is_udata_cleared(struct ib_udata *udata, size_t len) { const void __user *p = udata->inbuf + offset; - bool ret = false; + bool ret; u8 *buf; if (len > USHRT_MAX) return false; - buf = kmalloc(len, GFP_KERNEL); - if (!buf) + buf = memdup_user(p, len); + if (IS_ERR(buf)) return false; - if (copy_from_user(buf, p, len)) - goto free; - ret = !memchr_inv(buf, 0, len); - -free: kfree(buf); return ret; } From 6c7d46fdb8165ece4b0a17fb8f0b9320dbfeffc2 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem <shiraz.saleem@intel.com> Date: Mon, 22 Aug 2016 18:09:14 -0500 Subject: [PATCH 432/513] i40iw: Change mem_resources pointer to a u8 iwdev->mem_resources is incorrectly defined as an unsigned long instead of u8. As a result, the offset into the dynamic allocated structures in i40iw_initialize_hw_resources() is incorrectly calculated and would lead to writing of memory regions outside of the allocated buffer. Fixes: 8e06af711bf2 ("i40iw: add main, hdr, status") Reported-by: Stefan Assmann <sassmann@redhat.com> Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com> Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/i40iw/i40iw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index b738acdb9b027..882f3ef2e4c7a 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -232,7 +232,7 @@ struct i40iw_device { struct i40e_client *client; struct i40iw_hw hw; struct i40iw_cm_core cm_core; - unsigned long *mem_resources; + u8 *mem_resources; unsigned long *allocated_qps; unsigned long *allocated_cqs; unsigned long *allocated_mrs; From 44856be3e95c87f03e850ef4fdf8c0503c2dde18 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail <mustafa.ismail@intel.com> Date: Mon, 22 Aug 2016 18:15:58 -0500 Subject: [PATCH 433/513] i40iw: Protect req_resource_num update In i40iw_alloc_resource(), ensure that the update to req_resource_num is protected by the lock. Fixes: 8e06af711bf2 ("i40iw: add main, hdr, status") Reported-by: Stefan Assmann <sassmann@redhat.com> Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com> Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/i40iw/i40iw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h index 882f3ef2e4c7a..8ec09e470f848 100644 --- a/drivers/infiniband/hw/i40iw/i40iw.h +++ b/drivers/infiniband/hw/i40iw/i40iw.h @@ -435,8 +435,8 @@ static inline int i40iw_alloc_resource(struct i40iw_device *iwdev, *next = resource_num + 1; if (*next == max_resources) *next = 0; - spin_unlock_irqrestore(&iwdev->resource_lock, flags); *req_resource_num = resource_num; + spin_unlock_irqrestore(&iwdev->resource_lock, flags); return 0; } From faa739fb5df56aadab96bcd2f6eb3486cc3a3aec Mon Sep 17 00:00:00 2001 From: Mustafa Ismail <mustafa.ismail@intel.com> Date: Mon, 22 Aug 2016 18:17:12 -0500 Subject: [PATCH 434/513] i40iw: Add missing check for interface already open In i40iw_open(), check if interface is already open and return success if it is. Fixes: 8e06af711bf2 ("i40iw: add main, hdr, status") Reported-by: Stefan Assmann <sassmann@redhat.com> Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com> Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/i40iw/i40iw_main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c index 6e9081380a276..0cbbe40382982 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_main.c +++ b/drivers/infiniband/hw/i40iw/i40iw_main.c @@ -1558,6 +1558,10 @@ static int i40iw_open(struct i40e_info *ldev, struct i40e_client *client) enum i40iw_status_code status; struct i40iw_handler *hdl; + hdl = i40iw_find_netdev(ldev->netdev); + if (hdl) + return 0; + hdl = kzalloc(sizeof(*hdl), GFP_KERNEL); if (!hdl) return -ENOMEM; From cff069b78c21559f427c3fefe9ef3294e3dec094 Mon Sep 17 00:00:00 2001 From: Bharat Potnuri <bharat@chelsio.com> Date: Tue, 23 Aug 2016 20:27:33 +0530 Subject: [PATCH 435/513] iw_cxgb4: Fix cxgb4 arm CQ logic w/IB_CQ_REPORT_MISSED_EVENTS Current cxgb4 arm CQ logic ignores IB_CQ_REPORT_MISSED_EVENTS for request completion notification on a CQ. Due to this ib_poll_handler() assumes all events polled and avoids further iopoll scheduling. This patch adds logic to cxgb4 ib_req_notify_cq() handler to check if CQ is not empty and return accordingly. Based on the return value of ib_req_notify_cq() handler, ib_poll_handler() will schedule a run of iopoll handler. Signed-off-by: Potnuri Bharat Teja <bharat@chelsio.com> Reviewed-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/cxgb4/cq.c | 10 +++++----- drivers/infiniband/hw/cxgb4/t4.h | 5 +++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c index 812ab7278b8ee..ac926c942fee7 100644 --- a/drivers/infiniband/hw/cxgb4/cq.c +++ b/drivers/infiniband/hw/cxgb4/cq.c @@ -1016,15 +1016,15 @@ int c4iw_resize_cq(struct ib_cq *cq, int cqe, struct ib_udata *udata) int c4iw_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct c4iw_cq *chp; - int ret; + int ret = 0; unsigned long flag; chp = to_c4iw_cq(ibcq); spin_lock_irqsave(&chp->lock, flag); - ret = t4_arm_cq(&chp->cq, - (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); + t4_arm_cq(&chp->cq, + (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED); + if (flags & IB_CQ_REPORT_MISSED_EVENTS) + ret = t4_cq_notempty(&chp->cq); spin_unlock_irqrestore(&chp->lock, flag); - if (ret && !(flags & IB_CQ_REPORT_MISSED_EVENTS)) - ret = 0; return ret; } diff --git a/drivers/infiniband/hw/cxgb4/t4.h b/drivers/infiniband/hw/cxgb4/t4.h index 6126bbe36095c..02173f4315fa6 100644 --- a/drivers/infiniband/hw/cxgb4/t4.h +++ b/drivers/infiniband/hw/cxgb4/t4.h @@ -634,6 +634,11 @@ static inline int t4_valid_cqe(struct t4_cq *cq, struct t4_cqe *cqe) return (CQE_GENBIT(cqe) == cq->gen); } +static inline int t4_cq_notempty(struct t4_cq *cq) +{ + return cq->sw_in_use || t4_valid_cqe(cq, &cq->queue[cq->cidx]); +} + static inline int t4_next_hw_cqe(struct t4_cq *cq, struct t4_cqe **cqe) { int ret; From c0082e985fdf77b02fc9e0dac3b58504dcf11b7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20Stehl=C3=A9?= <vincent.stehle@intel.com> Date: Fri, 12 Aug 2016 15:26:30 +0200 Subject: [PATCH 436/513] ubifs: Fix assertion in layout_in_gaps() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An assertion in layout_in_gaps() verifies that the gap_lebs pointer is below the maximum bound. When computing this maximum bound the idx_lebs count is multiplied by sizeof(int), while C pointers arithmetic does take into account the size of the pointed elements implicitly already. Remove the multiplication to fix the assertion. Fixes: 1e51764a3c2ac05a ("UBIFS: add new flash file system") Cc: <stable@vger.kernel.org> Signed-off-by: Vincent Stehlé <vincent.stehle@intel.com> Cc: Artem Bityutskiy <artem.bityutskiy@linux.intel.com> Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com> Signed-off-by: Richard Weinberger <richard@nod.at> --- fs/ubifs/tnc_commit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index b45345d701e77..51157da3f76ed 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c @@ -370,7 +370,7 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt) p = c->gap_lebs; do { - ubifs_assert(p < c->gap_lebs + sizeof(int) * c->lst.idx_lebs); + ubifs_assert(p < c->gap_lebs + c->lst.idx_lebs); written = layout_leb_in_gaps(c, p); if (written < 0) { err = written; From 17ce1eb0b64eb27d4f9180daae7495fa022c7b0d Mon Sep 17 00:00:00 2001 From: Richard Weinberger <richard@nod.at> Date: Sun, 31 Jul 2016 21:42:23 +0200 Subject: [PATCH 437/513] ubifs: Fix xattr generic handler usage UBIFS uses full names to work with xattrs, therefore we have to use xattr_full_name() to obtain the xattr prefix as string. Cc: <stable@vger.kernel.org> Cc: Andreas Gruenbacher <agruenba@redhat.com> Fixes: 2b88fc21ca ("ubifs: Switch to generic xattr handlers") Signed-off-by: Richard Weinberger <richard@nod.at> Reviewed-by: Andreas Gruenbacher <agruenba@redhat.com> Tested-by: Dongsheng Yang <dongsheng081251@gmail.com> --- fs/ubifs/xattr.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index e237811f09ce5..11a004114eba5 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c @@ -575,7 +575,8 @@ static int ubifs_xattr_get(const struct xattr_handler *handler, dbg_gen("xattr '%s', ino %lu ('%pd'), buf size %zd", name, inode->i_ino, dentry, size); - return __ubifs_getxattr(inode, name, buffer, size); + name = xattr_full_name(handler, name); + return __ubifs_getxattr(inode, name, buffer, size); } static int ubifs_xattr_set(const struct xattr_handler *handler, @@ -586,6 +587,8 @@ static int ubifs_xattr_set(const struct xattr_handler *handler, dbg_gen("xattr '%s', host ino %lu ('%pd'), size %zd", name, inode->i_ino, dentry, size); + name = xattr_full_name(handler, name); + if (value) return __ubifs_setxattr(inode, name, value, size, flags); else From dad2232844073295c64e9cc2d734a0ade043e0f6 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin <aryabinin@virtuozzo.com> Date: Wed, 17 Aug 2016 18:10:11 +0300 Subject: [PATCH 438/513] um: Don't discard .text.exit section Commit e41f501d3912 ("vmlinux.lds: account for destructor sections") added '.text.exit' to EXIT_TEXT which is discarded at link time by default. This breaks compilation of UML: `.text.exit' referenced in section `.fini_array' of /usr/lib/gcc/x86_64-linux-gnu/6/../../../x86_64-linux-gnu/libc.a(sdlerror.o): defined in discarded section `.text.exit' of /usr/lib/gcc/x86_64-linux-gnu/6/../../../x86_64-linux-gnu/libc.a(sdlerror.o) Apparently UML doesn't want to discard exit text, so let's place all EXIT_TEXT sections in .exit.text. Fixes: e41f501d3912 ("vmlinux.lds: account for destructor sections") Reported-by: Stefan Traby <stefan@hello-penguin.com> Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com> Cc: <stable@vger.kernel.org> Acked-by: Dmitry Vyukov <dvyukov@google.com> Signed-off-by: Richard Weinberger <richard@nod.at> --- arch/um/include/asm/common.lds.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S index 1dd5bd8a8c599..133055311dce1 100644 --- a/arch/um/include/asm/common.lds.S +++ b/arch/um/include/asm/common.lds.S @@ -81,7 +81,7 @@ .altinstr_replacement : { *(.altinstr_replacement) } /* .exit.text is discard at runtime, not link time, to deal with references from .altinstructions and .eh_frame */ - .exit.text : { *(.exit.text) } + .exit.text : { EXIT_TEXT } .exit.data : { *(.exit.data) } .preinit_array : { From 21c80c9fefc3db10b530a96eb0478c29eb28bf77 Mon Sep 17 00:00:00 2001 From: Keith Busch <keith.busch@intel.com> Date: Tue, 23 Aug 2016 16:36:42 -0500 Subject: [PATCH 439/513] x86/PCI: VMD: Fix infinite loop executing irq's We can't initialize the list head on deletion as this causes the node to point to itself, which causes an infinite loop if vmd_irq() happens to be servicing that node. The list initialization was trying to fix a bug from multiple calls to disable the same IRQ. Fix this instead by having the VMD driver track if the interrupt is enabled. [bhelgaas: changelog, add "Fixes"] Fixes: 97e923063575 ("x86/PCI: VMD: Initialize list item in IRQ disable") Reported-by: Grzegorz Koczot <grzegorz.koczot@intel.com> Tested-by: Miroslaw Drost <miroslaw.drost@intel.com> Signed-off-by: Keith Busch <keith.busch@intel.com> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Acked-by Jon Derrick: <jonathan.derrick@intel.com> --- arch/x86/pci/vmd.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/pci/vmd.c b/arch/x86/pci/vmd.c index b814ca675131e..7948be342ee99 100644 --- a/arch/x86/pci/vmd.c +++ b/arch/x86/pci/vmd.c @@ -41,6 +41,7 @@ static DEFINE_RAW_SPINLOCK(list_lock); * @node: list item for parent traversal. * @rcu: RCU callback item for freeing. * @irq: back pointer to parent. + * @enabled: true if driver enabled IRQ * @virq: the virtual IRQ value provided to the requesting driver. * * Every MSI/MSI-X IRQ requested for a device in a VMD domain will be mapped to @@ -50,6 +51,7 @@ struct vmd_irq { struct list_head node; struct rcu_head rcu; struct vmd_irq_list *irq; + bool enabled; unsigned int virq; }; @@ -122,7 +124,9 @@ static void vmd_irq_enable(struct irq_data *data) unsigned long flags; raw_spin_lock_irqsave(&list_lock, flags); + WARN_ON(vmdirq->enabled); list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list); + vmdirq->enabled = true; raw_spin_unlock_irqrestore(&list_lock, flags); data->chip->irq_unmask(data); @@ -136,8 +140,10 @@ static void vmd_irq_disable(struct irq_data *data) data->chip->irq_mask(data); raw_spin_lock_irqsave(&list_lock, flags); - list_del_rcu(&vmdirq->node); - INIT_LIST_HEAD_RCU(&vmdirq->node); + if (vmdirq->enabled) { + list_del_rcu(&vmdirq->node); + vmdirq->enabled = false; + } raw_spin_unlock_irqrestore(&list_lock, flags); } From 27727df240c7cc84f2ba6047c6f18d5addfd25ef Mon Sep 17 00:00:00 2001 From: John Stultz <john.stultz@linaro.org> Date: Tue, 23 Aug 2016 16:08:21 -0700 Subject: [PATCH 440/513] timekeeping: Avoid taking lock in NMI path with CONFIG_DEBUG_TIMEKEEPING When I added some extra sanity checking in timekeeping_get_ns() under CONFIG_DEBUG_TIMEKEEPING, I missed that the NMI safe __ktime_get_fast_ns() method was using timekeeping_get_ns(). Thus the locking added to the debug checks broke the NMI-safety of __ktime_get_fast_ns(). This patch open-codes the timekeeping_get_ns() logic for __ktime_get_fast_ns(), so can avoid any deadlocks in NMI. Fixes: 4ca22c2648f9 "timekeeping: Add warnings when overflows or underflows are observed" Reported-by: Steven Rostedt <rostedt@goodmis.org> Reported-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: John Stultz <john.stultz@linaro.org> Cc: stable <stable@vger.kernel.org> Link: http://lkml.kernel.org/r/1471993702-29148-2-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- kernel/time/timekeeping.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 3b65746c7f156..e07fb093f8195 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -401,7 +401,10 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) do { seq = raw_read_seqcount_latch(&tkf->seq); tkr = tkf->base + (seq & 0x01); - now = ktime_to_ns(tkr->base) + timekeeping_get_ns(tkr); + now = ktime_to_ns(tkr->base); + + now += clocksource_delta(tkr->read(tkr->clock), + tkr->cycle_last, tkr->mask); } while (read_seqcount_retry(&tkf->seq, seq)); return now; From a4f8f6667f099036c88f231dcad4cf233652c824 Mon Sep 17 00:00:00 2001 From: John Stultz <john.stultz@linaro.org> Date: Tue, 23 Aug 2016 16:08:22 -0700 Subject: [PATCH 441/513] timekeeping: Cap array access in timekeeping_debug It was reported that hibernation could fail on the 2nd attempt, where the system hangs at hibernate() -> syscore_resume() -> i8237A_resume() -> claim_dma_lock(), because the lock has already been taken. However there is actually no other process would like to grab this lock on that problematic platform. Further investigation showed that the problem is triggered by setting /sys/power/pm_trace to 1 before the 1st hibernation. Since once pm_trace is enabled, the rtc becomes unmeaningful after suspend, and meanwhile some BIOSes would like to adjust the 'invalid' RTC (e.g, smaller than 1970) to the release date of that motherboard during POST stage, thus after resumed, it may seem that the system had a significant long sleep time which is a completely meaningless value. Then in timekeeping_resume -> tk_debug_account_sleep_time, if the bit31 of the sleep time happened to be set to 1, fls() returns 32 and we add 1 to sleep_time_bin[32], which causes an out of bounds array access and therefor memory being overwritten. As depicted by System.map: 0xffffffff81c9d080 b sleep_time_bin 0xffffffff81c9d100 B dma_spin_lock the dma_spin_lock.val is set to 1, which caused this problem. This patch adds a sanity check in tk_debug_account_sleep_time() to ensure we don't index past the sleep_time_bin array. [jstultz: Problem diagnosed and original patch by Chen Yu, I've solved the issue slightly differently, but borrowed his excelent explanation of the issue here.] Fixes: 5c83545f24ab "power: Add option to log time spent in suspend" Reported-by: Janek Kozicki <cosurgi@gmail.com> Reported-by: Chen Yu <yu.c.chen@intel.com> Signed-off-by: John Stultz <john.stultz@linaro.org> Cc: linux-pm@vger.kernel.org Cc: Peter Zijlstra <peterz@infradead.org> Cc: Xunlei Pang <xpang@redhat.com> Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net> Cc: stable <stable@vger.kernel.org> Cc: Zhang Rui <rui.zhang@intel.com> Link: http://lkml.kernel.org/r/1471993702-29148-3-git-send-email-john.stultz@linaro.org Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- kernel/time/timekeeping_debug.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c index f6bd65236712b..107310a6f36f4 100644 --- a/kernel/time/timekeeping_debug.c +++ b/kernel/time/timekeeping_debug.c @@ -23,7 +23,9 @@ #include "timekeeping_internal.h" -static unsigned int sleep_time_bin[32] = {0}; +#define NUM_BINS 32 + +static unsigned int sleep_time_bin[NUM_BINS] = {0}; static int tk_debug_show_sleep_time(struct seq_file *s, void *data) { @@ -69,6 +71,9 @@ late_initcall(tk_debug_sleep_time_init); void tk_debug_account_sleep_time(struct timespec64 *t) { - sleep_time_bin[fls(t->tv_sec)]++; + /* Cap bin index so we don't overflow the array */ + int bin = min(fls(t->tv_sec), NUM_BINS-1); + + sleep_time_bin[bin]++; } From 2e63ad4bd5dd583871e6602f9d398b9322d358d9 Mon Sep 17 00:00:00 2001 From: Wanpeng Li <wanpeng.li@hotmail.com> Date: Tue, 23 Aug 2016 20:07:19 +0800 Subject: [PATCH 442/513] x86/apic: Do not init irq remapping if ioapic is disabled native_smp_prepare_cpus -> default_setup_apic_routing -> enable_IR_x2apic -> irq_remapping_prepare -> intel_prepare_irq_remapping -> intel_setup_irq_remapping So IR table is setup even if "noapic" boot parameter is added. As a result we crash later when the interrupt affinity is set due to a half initialized remapping infrastructure. Prevent remap initialization when IOAPIC is disabled. Signed-off-by: Wanpeng Li <wanpeng.li@hotmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Joerg Roedel <joro@8bytes.org> Link: http://lkml.kernel.org/r/1471954039-3942-1-git-send-email-wanpeng.li@hotmail.com Cc: stable@vger.kernel.org Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- arch/x86/kernel/apic/apic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index cea4fc19e8447..50c95af0f017d 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1623,6 +1623,9 @@ void __init enable_IR_x2apic(void) unsigned long flags; int ret, ir_stat; + if (skip_ioapic_setup) + return; + ir_stat = irq_remapping_prepare(); if (ir_stat < 0 && !x2apic_supported()) return; From abaa2274811d607679e8687b4118c4922a3517ac Mon Sep 17 00:00:00 2001 From: Anisse Astier <anisse@astier.eu> Date: Wed, 24 Aug 2016 09:14:13 +0200 Subject: [PATCH 443/513] ALSA: hda/realtek - fix headset mic detection for MSI MS-B120 MSI Cubi MS-B120 needs the same fixup as the Gigabyte BXBT-2807 for its mic to work. They both use a single 3-way jack for both mic and headset with an ALC283 codec, with the same pins used. Cc: Daniel Drake <drake@endlessm.com> Signed-off-by: Anisse Astier <anisse@astier.eu> Signed-off-by: Takashi Iwai <tiwai@suse.de> --- sound/pci/hda/patch_realtek.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 574b1b48996f1..7100f05e651a0 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -4828,7 +4828,7 @@ enum { ALC293_FIXUP_DELL1_MIC_NO_PRESENCE, ALC292_FIXUP_TPT440_DOCK, ALC292_FIXUP_TPT440, - ALC283_FIXUP_BXBT2807_MIC, + ALC283_FIXUP_HEADSET_MIC, ALC255_FIXUP_DELL_WMI_MIC_MUTE_LED, ALC282_FIXUP_ASPIRE_V5_PINS, ALC280_FIXUP_HP_GPIO4, @@ -5321,7 +5321,7 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC292_FIXUP_TPT440_DOCK, }, - [ALC283_FIXUP_BXBT2807_MIC] = { + [ALC283_FIXUP_HEADSET_MIC] = { .type = HDA_FIXUP_PINS, .v.pins = (const struct hda_pintbl[]) { { 0x19, 0x04a110f0 }, @@ -5651,7 +5651,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x10cf, 0x1757, "Lifebook E752", ALC269_FIXUP_LIFEBOOK_HP_PIN), SND_PCI_QUIRK(0x10cf, 0x1845, "Lifebook U904", ALC269_FIXUP_LIFEBOOK_EXTMIC), SND_PCI_QUIRK(0x144d, 0xc109, "Samsung Ativ book 9 (NP900X3G)", ALC269_FIXUP_INV_DMIC), - SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_BXBT2807_MIC), + SND_PCI_QUIRK(0x1458, 0xfa53, "Gigabyte BXBT-2807", ALC283_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1462, 0xb120, "MSI Cubi MS-B120", ALC283_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x17aa, 0x20f2, "Thinkpad SL410/510", ALC269_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x17aa, 0x215e, "Thinkpad L512", ALC269_FIXUP_SKU_IGNORE), SND_PCI_QUIRK(0x17aa, 0x21b8, "Thinkpad Edge 14", ALC269_FIXUP_SKU_IGNORE), From 588deb614a0d3caa596dd8eba8c4d31eaaeb89b9 Mon Sep 17 00:00:00 2001 From: Mark Rutland <mark.rutland@arm.com> Date: Tue, 9 Aug 2016 11:03:56 +0100 Subject: [PATCH 444/513] MAINTAINERS: Add ARM ARCHITECTED TIMER entry The ARM architected timer driver falls under the drivers/clocksource/ catch-all in MAINTAINERS, and get_maintainers.pl doesn't suggest a number of people who should be Cc'd. The ARM architected timer is a core component of ARMv7+VE and ARMv8, and is critical to the correct operation of both architecture ports (and their respective KVM code), and patches to it should have review by knowledgeable interested parties. This patch adds a MAINTAINERS entry for the driver and its low-level arch components, such that get_maintainer.pl will always include relevant interested parties for modifications to the driver. For the timebeing, this means myself and Marc Zyngier. Signed-off-by: Mark Rutland <mark.rutland@arm.com> Acked-by: Marc Zyngier <marc.zyngier@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Daniel Lezcano <daniel.lezcano@linaro.org> Cc: Will Deacon <will.deacon@arm.com> Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1470737036-2082-1-git-send-email-mark.rutland@arm.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- MAINTAINERS | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0bbe4b105c346..4705c94f6a371 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -881,6 +881,15 @@ S: Supported F: drivers/gpu/drm/arc/ F: Documentation/devicetree/bindings/display/snps,arcpgu.txt +ARM ARCHITECTED TIMER DRIVER +M: Mark Rutland <mark.rutland@arm.com> +M: Marc Zyngier <marc.zyngier@arm.com> +L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) +S: Maintained +F: arch/arm/include/asm/arch_timer.h +F: arch/arm64/include/asm/arch_timer.h +F: drivers/clocksource/arm_arch_timer.c + ARM HDLCD DRM DRIVER M: Liviu Dudau <liviu.dudau@arm.com> S: Supported From aa8c0f1ad7e862147f4efb32bbb71ff66eb38caa Mon Sep 17 00:00:00 2001 From: Baoyou Xie <baoyou.xie@linaro.org> Date: Tue, 23 Aug 2016 23:19:29 +0800 Subject: [PATCH 445/513] clocksource/drivers/pxa: Fix include files for compilation We get 1 warning about global functions without a declaration in the clocksource/drivers/pxa driver when building with W=1: drivers/clocksource/pxa_timer.c:221:13: warning: no previous prototype for 'pxa_timer_nodt_init' [-Wmissing-prototypes] void __init pxa_timer_nodt_init(int irq, void __iomem *base, In fact, this function is declared in pxa.h, so this patch add missing header dependencies. Signed-off-by: Baoyou Xie <baoyou.xie@linaro.org> Reviewed-by: Arnd Bergmann <arnd@arndb.de> Cc: daniel.lezcano@linaro.org Cc: xie.baoyou@zte.com.cn Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1471965569-4104-1-git-send-email-baoyou.xie@linaro.org Signed-off-by: Thomas Gleixner <tglx@linutronix.de> --- drivers/clocksource/pxa_timer.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clocksource/pxa_timer.c b/drivers/clocksource/pxa_timer.c index 937e10b84d585..3e1cb512f3ce9 100644 --- a/drivers/clocksource/pxa_timer.c +++ b/drivers/clocksource/pxa_timer.c @@ -21,6 +21,8 @@ #include <linux/of_irq.h> #include <linux/sched_clock.h> +#include <clocksource/pxa.h> + #include <asm/div64.h> #define OSMR0 0x00 /* OS Timer 0 Match Register */ From 8b6a3fe8fab97716990a3abde1a01fb5a34552a3 Mon Sep 17 00:00:00 2001 From: Will Deacon <will.deacon@arm.com> Date: Wed, 24 Aug 2016 10:07:14 +0100 Subject: [PATCH 446/513] perf/core: Use this_cpu_ptr() when stopping AUX events When tearing down an AUX buf for an event via perf_mmap_close(), __perf_event_output_stop() is called on the event's CPU to ensure that trace generation is halted before the process of unmapping and freeing the buffer pages begins. The callback is performed via cpu_function_call(), which ensures that it runs with interrupts disabled and is therefore not preemptible. Unfortunately, the current code grabs the per-cpu context pointer using get_cpu_ptr(), which unnecessarily disables preemption and doesn't pair the call with put_cpu_ptr(), leading to a preempt_count() imbalance and a BUG when freeing the AUX buffer later on: WARNING: CPU: 1 PID: 2249 at kernel/events/ring_buffer.c:539 __rb_free_aux+0x10c/0x120 Modules linked in: [...] Call Trace: [<ffffffff813379dd>] dump_stack+0x4f/0x72 [<ffffffff81059ff6>] __warn+0xc6/0xe0 [<ffffffff8105a0c8>] warn_slowpath_null+0x18/0x20 [<ffffffff8112761c>] __rb_free_aux+0x10c/0x120 [<ffffffff81128163>] rb_free_aux+0x13/0x20 [<ffffffff8112515e>] perf_mmap_close+0x29e/0x2f0 [<ffffffff8111da30>] ? perf_iterate_ctx+0xe0/0xe0 [<ffffffff8115f685>] remove_vma+0x25/0x60 [<ffffffff81161796>] exit_mmap+0x106/0x140 [<ffffffff8105725c>] mmput+0x1c/0xd0 [<ffffffff8105cac3>] do_exit+0x253/0xbf0 [<ffffffff8105e32e>] do_group_exit+0x3e/0xb0 [<ffffffff81068d49>] get_signal+0x249/0x640 [<ffffffff8101c273>] do_signal+0x23/0x640 [<ffffffff81905f42>] ? _raw_write_unlock_irq+0x12/0x30 [<ffffffff81905f69>] ? _raw_spin_unlock_irq+0x9/0x10 [<ffffffff81901896>] ? __schedule+0x2c6/0x710 [<ffffffff810022a4>] exit_to_usermode_loop+0x74/0x90 [<ffffffff81002a56>] prepare_exit_to_usermode+0x26/0x30 [<ffffffff81906d1b>] retint_user+0x8/0x10 This patch uses this_cpu_ptr() instead of get_cpu_ptr(), since preemption is already disabled by the caller. Signed-off-by: Will Deacon <will.deacon@arm.com> Reviewed-by: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@kernel.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Fixes: 95ff4ca26c49 ("perf/core: Free AUX pages in unmap path") Link: http://lkml.kernel.org/r/20160824091905.GA16944@arm.com Signed-off-by: Ingo Molnar <mingo@kernel.org> --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 5650f5317e0c3..3cfabdf7b9429 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6166,7 +6166,7 @@ static int __perf_pmu_output_stop(void *info) { struct perf_event *event = info; struct pmu *pmu = event->pmu; - struct perf_cpu_context *cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); + struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); struct remote_output ro = { .rb = event->rb, }; From 87904c3e82319cf2bad8d656d79c5030dab9490e Mon Sep 17 00:00:00 2001 From: Thierry Reding <treding@nvidia.com> Date: Fri, 12 Aug 2016 16:00:53 +0200 Subject: [PATCH 447/513] drm/tegra: dsi: Enhance runtime power management The MIPI DSI output on Tegra SoCs requires some external logic to calibrate the MIPI pads before a video signal can be transmitted. This MIPI calibration logic requires to be powered on while the MIPI pads are being used, which is currently done as part of the DSI driver's probe implementation. This is suboptimal because it will leave the MIPI calibration logic powered up even if the DSI output is never used. On Tegra114 and earlier this behaviour also causes the driver to hang while trying to power up the MIPI calibration logic because the power partition that contains the MIPI calibration logic will be powered on by the display controller at output pipeline configuration time. Thus the power up sequence for the MIPI calibration logic happens before it's power partition is guaranteed to be enabled. Fix this by splitting up the API into a request/free pair of functions that manage the runtime dependency between the DSI and the calibration modules (no registers are accessed) and a set of enable, calibrate and disable functions that program the MIPI calibration logic at points in time where the power partition is really enabled. While at it, make sure that the runtime power management also works in ganged mode, which is currently also broken. Reported-by: Jonathan Hunter <jonathanh@nvidia.com> Tested-by: Jonathan Hunter <jonathanh@nvidia.com> Signed-off-by: Thierry Reding <treding@nvidia.com> --- drivers/gpu/drm/tegra/dsi.c | 43 ++++++++++++++++++++----- drivers/gpu/host1x/mipi.c | 63 ++++++++++++++++++------------------- include/linux/host1x.h | 2 ++ 3 files changed, 69 insertions(+), 39 deletions(-) diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c index 3d228ad90e0f1..3dea1216bafdc 100644 --- a/drivers/gpu/drm/tegra/dsi.c +++ b/drivers/gpu/drm/tegra/dsi.c @@ -840,6 +840,21 @@ static const struct drm_encoder_funcs tegra_dsi_encoder_funcs = { .destroy = tegra_output_encoder_destroy, }; +static void tegra_dsi_unprepare(struct tegra_dsi *dsi) +{ + int err; + + if (dsi->slave) + tegra_dsi_unprepare(dsi->slave); + + err = tegra_mipi_disable(dsi->mipi); + if (err < 0) + dev_err(dsi->dev, "failed to disable MIPI calibration: %d\n", + err); + + pm_runtime_put(dsi->dev); +} + static void tegra_dsi_encoder_disable(struct drm_encoder *encoder) { struct tegra_output *output = encoder_to_output(encoder); @@ -876,7 +891,26 @@ static void tegra_dsi_encoder_disable(struct drm_encoder *encoder) tegra_dsi_disable(dsi); - pm_runtime_put(dsi->dev); + tegra_dsi_unprepare(dsi); +} + +static void tegra_dsi_prepare(struct tegra_dsi *dsi) +{ + int err; + + pm_runtime_get_sync(dsi->dev); + + err = tegra_mipi_enable(dsi->mipi); + if (err < 0) + dev_err(dsi->dev, "failed to enable MIPI calibration: %d\n", + err); + + err = tegra_dsi_pad_calibrate(dsi); + if (err < 0) + dev_err(dsi->dev, "MIPI calibration failed: %d\n", err); + + if (dsi->slave) + tegra_dsi_prepare(dsi->slave); } static void tegra_dsi_encoder_enable(struct drm_encoder *encoder) @@ -887,13 +921,8 @@ static void tegra_dsi_encoder_enable(struct drm_encoder *encoder) struct tegra_dsi *dsi = to_dsi(output); struct tegra_dsi_state *state; u32 value; - int err; - - pm_runtime_get_sync(dsi->dev); - err = tegra_dsi_pad_calibrate(dsi); - if (err < 0) - dev_err(dsi->dev, "MIPI calibration failed: %d\n", err); + tegra_dsi_prepare(dsi); state = tegra_dsi_get_state(dsi); diff --git a/drivers/gpu/host1x/mipi.c b/drivers/gpu/host1x/mipi.c index 52a6fd224127e..e00809d996a29 100644 --- a/drivers/gpu/host1x/mipi.c +++ b/drivers/gpu/host1x/mipi.c @@ -242,20 +242,6 @@ struct tegra_mipi_device *tegra_mipi_request(struct device *device) dev->pads = args.args[0]; dev->device = device; - mutex_lock(&dev->mipi->lock); - - if (dev->mipi->usage_count++ == 0) { - err = tegra_mipi_power_up(dev->mipi); - if (err < 0) { - dev_err(dev->mipi->dev, - "failed to power up MIPI bricks: %d\n", - err); - return ERR_PTR(err); - } - } - - mutex_unlock(&dev->mipi->lock); - return dev; put: @@ -270,29 +256,42 @@ EXPORT_SYMBOL(tegra_mipi_request); void tegra_mipi_free(struct tegra_mipi_device *device) { - int err; + platform_device_put(device->pdev); + kfree(device); +} +EXPORT_SYMBOL(tegra_mipi_free); - mutex_lock(&device->mipi->lock); +int tegra_mipi_enable(struct tegra_mipi_device *dev) +{ + int err = 0; - if (--device->mipi->usage_count == 0) { - err = tegra_mipi_power_down(device->mipi); - if (err < 0) { - /* - * Not much that can be done here, so an error message - * will have to do. - */ - dev_err(device->mipi->dev, - "failed to power down MIPI bricks: %d\n", - err); - } - } + mutex_lock(&dev->mipi->lock); - mutex_unlock(&device->mipi->lock); + if (dev->mipi->usage_count++ == 0) + err = tegra_mipi_power_up(dev->mipi); + + mutex_unlock(&dev->mipi->lock); + + return err; - platform_device_put(device->pdev); - kfree(device); } -EXPORT_SYMBOL(tegra_mipi_free); +EXPORT_SYMBOL(tegra_mipi_enable); + +int tegra_mipi_disable(struct tegra_mipi_device *dev) +{ + int err = 0; + + mutex_lock(&dev->mipi->lock); + + if (--dev->mipi->usage_count == 0) + err = tegra_mipi_power_down(dev->mipi); + + mutex_unlock(&dev->mipi->lock); + + return err; + +} +EXPORT_SYMBOL(tegra_mipi_disable); static int tegra_mipi_wait(struct tegra_mipi *mipi) { diff --git a/include/linux/host1x.h b/include/linux/host1x.h index d2ba7d334039e..1ffbf2a8cb997 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -304,6 +304,8 @@ struct tegra_mipi_device; struct tegra_mipi_device *tegra_mipi_request(struct device *device); void tegra_mipi_free(struct tegra_mipi_device *device); +int tegra_mipi_enable(struct tegra_mipi_device *device); +int tegra_mipi_disable(struct tegra_mipi_device *device); int tegra_mipi_calibrate(struct tegra_mipi_device *device); #endif From 9b47f77a680447e0132b2cf7fb82374e014bec1c Mon Sep 17 00:00:00 2001 From: Andy Lutomirski <luto@kernel.org> Date: Wed, 24 Aug 2016 03:52:12 -0700 Subject: [PATCH 448/513] nvme: Fix nvme_get/set_features() with a NULL result pointer nvme_set_features() callers seem to expect that passing NULL as the result pointer is acceptable. Teach nvme_set_features() not to try to write to the NULL address. For symmetry, make the same change to nvme_get_features(), despite the fact that all current callers pass a valid result pointer. I assume that this bug hasn't been reported in practice because the callers that pass NULL are all in the SCSI translation layer and no one uses the relevant operations. Cc: stable@vger.kernel.org Signed-off-by: Andy Lutomirski <luto@kernel.org> Reviewed-by: Sagi Grimberg <sagi@grimberg.me> Signed-off-by: Jens Axboe <axboe@fb.com> --- drivers/nvme/host/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 7f75d661237f6..2feacc70bf61f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -611,7 +611,7 @@ int nvme_get_features(struct nvme_ctrl *dev, unsigned fid, unsigned nsid, ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0, NVME_QID_ANY, 0, 0); - if (ret >= 0) + if (ret >= 0 && result) *result = le32_to_cpu(cqe.result); return ret; } @@ -631,7 +631,7 @@ int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, ret = __nvme_submit_sync_cmd(dev->admin_q, &c, &cqe, NULL, 0, 0, NVME_QID_ANY, 0, 0); - if (ret >= 0) + if (ret >= 0 && result) *result = le32_to_cpu(cqe.result); return ret; } From 4d70dca4eadf2f95abe389116ac02b8439c2d16c Mon Sep 17 00:00:00 2001 From: Ming Lei <ming.lei@canonical.com> Date: Tue, 23 Aug 2016 21:49:45 +0800 Subject: [PATCH 449/513] block: make sure a big bio is split into at most 256 bvecs After arbitrary bio size was introduced, the incoming bio may be very big. We have to split the bio into small bios so that each holds at most BIO_MAX_PAGES bvecs for safety reason, such as bio_clone(). This patch fixes the following kernel crash: > [ 172.660142] BUG: unable to handle kernel NULL pointer dereference at 0000000000000028 > [ 172.660229] IP: [<ffffffff811e53b4>] bio_trim+0xf/0x2a > [ 172.660289] PGD 7faf3e067 PUD 7f9279067 PMD 0 > [ 172.660399] Oops: 0000 [#1] SMP > [...] > [ 172.664780] Call Trace: > [ 172.664813] [<ffffffffa007f3be>] ? raid1_make_request+0x2e8/0xad7 [raid1] > [ 172.664846] [<ffffffff811f07da>] ? blk_queue_split+0x377/0x3d4 > [ 172.664880] [<ffffffffa005fb5f>] ? md_make_request+0xf6/0x1e9 [md_mod] > [ 172.664912] [<ffffffff811eb860>] ? generic_make_request+0xb5/0x155 > [ 172.664947] [<ffffffffa0445c89>] ? prio_io+0x85/0x95 [bcache] > [ 172.664981] [<ffffffffa0448252>] ? register_cache_set+0x355/0x8d0 [bcache] > [ 172.665016] [<ffffffffa04497d3>] ? register_bcache+0x1006/0x1174 [bcache] The issue can be reproduced by the following steps: - create one raid1 over two virtio-blk - build bcache device over the above raid1 and another cache device and bucket size is set as 2Mbytes - set cache mode as writeback - run random write over ext4 on the bcache device Fixes: 54efd50(block: make generic_make_request handle arbitrarily sized bios) Reported-by: Sebastian Roesner <sroesner-kernelorg@roesner-online.de> Reported-by: Eric Wheeler <bcache@lists.ewheeler.net> Cc: stable@vger.kernel.org (4.3+) Cc: Shaohua Li <shli@fb.com> Acked-by: Kent Overstreet <kent.overstreet@gmail.com> Signed-off-by: Ming Lei <ming.lei@canonical.com> Signed-off-by: Jens Axboe <axboe@fb.com> --- block/blk-merge.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/block/blk-merge.c b/block/blk-merge.c index 72627e3cf91e1..2642e5fc8b69a 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -94,8 +94,30 @@ static struct bio *blk_bio_segment_split(struct request_queue *q, bool do_split = true; struct bio *new = NULL; const unsigned max_sectors = get_max_io_size(q, bio); + unsigned bvecs = 0; bio_for_each_segment(bv, bio, iter) { + /* + * With arbitrary bio size, the incoming bio may be very + * big. We have to split the bio into small bios so that + * each holds at most BIO_MAX_PAGES bvecs because + * bio_clone() can fail to allocate big bvecs. + * + * It should have been better to apply the limit per + * request queue in which bio_clone() is involved, + * instead of globally. The biggest blocker is the + * bio_clone() in bio bounce. + * + * If bio is splitted by this reason, we should have + * allowed to continue bios merging, but don't do + * that now for making the change simple. + * + * TODO: deal with bio bounce's bio_clone() gracefully + * and convert the global limit into per-queue limit. + */ + if (bvecs++ >= BIO_MAX_PAGES) + goto split; + /* * If the queue doesn't support SG gaps and adding this * offset would create a gap, disallow it. From 5661538749511d4c2f7d33e1e179f10c545b24d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com> Date: Wed, 17 Aug 2016 13:44:20 +0200 Subject: [PATCH 450/513] drm/amdgpu: fix lru size grouping v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding a BO can make it the insertion point for larger sizes as well. v2: add a comment about the guard structure. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Reviewed-by: Felix Kuehling <felix.kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 ++++++++ 2 files changed, 10 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8c704c86597b3..700c56baf2de7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -426,6 +426,8 @@ struct amdgpu_mman { /* custom LRU management */ struct amdgpu_mman_lru log2_size[AMDGPU_TTM_LRU_SIZE]; + /* guard for log2_size array, don't add anything in between */ + struct amdgpu_mman_lru guard; }; int amdgpu_copy_buffer(struct amdgpu_ring *ring, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 141bfcabd19c7..716f2afeb6a9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -950,6 +950,8 @@ static struct list_head *amdgpu_ttm_lru_tail(struct ttm_buffer_object *tbo) struct list_head *res = lru->lru[tbo->mem.mem_type]; lru->lru[tbo->mem.mem_type] = &tbo->lru; + while ((++lru)->lru[tbo->mem.mem_type] == res) + lru->lru[tbo->mem.mem_type] = &tbo->lru; return res; } @@ -960,6 +962,8 @@ static struct list_head *amdgpu_ttm_swap_lru_tail(struct ttm_buffer_object *tbo) struct list_head *res = lru->swap_lru; lru->swap_lru = &tbo->swap; + while ((++lru)->swap_lru == res) + lru->swap_lru = &tbo->swap; return res; } @@ -1011,6 +1015,10 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) lru->swap_lru = &adev->mman.bdev.glob->swap_lru; } + for (j = 0; j < TTM_NUM_MEM_TYPES; ++j) + adev->mman.guard.lru[j] = NULL; + adev->mman.guard.swap_lru = NULL; + adev->mman.initialized = true; r = ttm_bo_init_mm(&adev->mman.bdev, TTM_PL_VRAM, adev->mc.real_vram_size >> PAGE_SHIFT); From 5dfd5e5e3bc68ab3912acc712c8180942094fc69 Mon Sep 17 00:00:00 2001 From: Shiraz Saleem <shiraz.saleem@intel.com> Date: Mon, 22 Aug 2016 18:16:37 -0500 Subject: [PATCH 451/513] i40iw: Add missing NULL check for MPA private data Add NULL check for pdata and pdata->addr before the memcpy in i40iw_form_cm_frame(). This fixes a NULL pointer de-reference which occurs when the MPA private data pointer is NULL. Also only copy pdata->size bytes in the memcpy to prevent reading past the length of the private data buffer provided by upper layer. Fixes: f27b4746f378 ("i40iw: add connection management code") Reported-by: Stefan Assmann <sassmann@redhat.com> Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com> Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 5026dc79978a7..64343985e0b7d 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -535,8 +535,8 @@ static struct i40iw_puda_buf *i40iw_form_cm_frame(struct i40iw_cm_node *cm_node, buf += hdr_len; } - if (pd_len) - memcpy(buf, pdata->addr, pd_len); + if (pdata && pdata->addr) + memcpy(buf, pdata->addr, pdata->size); atomic_set(&sqbuf->refcount, 1); From 7eaf8313b1cfe93417a22bdc3f7380cac2a3dc6d Mon Sep 17 00:00:00 2001 From: Mustafa Ismail <mustafa.ismail@intel.com> Date: Mon, 22 Aug 2016 19:01:47 -0500 Subject: [PATCH 452/513] i40iw: Do not set self-referencing pointer to NULL after kfree In i40iw_free_virt_mem(), do not set mem->va to NULL after freeing it as mem->va is a self-referencing pointer to mem. Fixes: 4e9042e647ff ("i40iw: add hw and utils files") Reported-by: Stefan Assmann <sassmann@redhat.com> Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com> Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/i40iw/i40iw_utils.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index 0e8db0a351415..6fd043b1d7141 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -673,8 +673,11 @@ enum i40iw_status_code i40iw_free_virt_mem(struct i40iw_hw *hw, { if (!mem) return I40IW_ERR_PARAM; + /* + * mem->va points to the parent of mem, so both mem and mem->va + * can not be touched once mem->va is freed + */ kfree(mem->va); - mem->va = NULL; return 0; } From 82d200cc6fc817dbf049b67e5e3215eab427e846 Mon Sep 17 00:00:00 2001 From: Chris Wilson <chris@chris-wilson.co.uk> Date: Tue, 23 Aug 2016 21:16:26 +0100 Subject: [PATCH 453/513] IB/mlx5: Remove superfluous include of io-mapping.h This file does not use any structs or functions defined by io-mapping.h (nor does it directly use iomap, ioremap, iounamp or friends). Remove it to simplify verification of changes to io-mapping.h The include existed since its inception in commit e126ba97dba9edeb6fafa3665b5f8497fc9cdf8c Author: Eli Cohen <eli@mellanox.com> Date: Sun Jul 7 17:25:49 2013 +0300 mlx5: Add driver for Mellanox Connect-IB adapters which looks like a copy across from the Mellanox ethernet driver. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk> Cc: Eli Cohen <eli@mellanox.com> Cc: Jack Morgenstein <jackm@dev.mellanox.co.il> Cc: Or Gerlitz <ogerlitz@mellanox.com> Cc: Matan Barak <matanb@mellanox.com> Cc: Leon Romanovsky <leonro@mellanox.com> Cc: Doug Ledford <dledford@redhat.com> Cc: Sean Hefty <sean.hefty@intel.com> Cc: Hal Rosenstock <hal.rosenstock@gmail.com> Cc: linux-rdma@vger.kernel.org Reviewed-by: Leon Romanovsky <leonro@mellanox.com> Reviewed-by: Laurence Oberman <loberman@redhat.com> Tested-by: Laurence Oberman <loberman@redhat.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/mlx5/main.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a84bb766fc628..1b4094baa2dee 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -37,7 +37,6 @@ #include <linux/pci.h> #include <linux/dma-mapping.h> #include <linux/slab.h> -#include <linux/io-mapping.h> #if defined(CONFIG_X86) #include <asm/pat.h> #endif From d41d0910d97f05be987d2d60de7e8685c108963b Mon Sep 17 00:00:00 2001 From: Mustafa Ismail <mustafa.ismail@intel.com> Date: Tue, 23 Aug 2016 16:50:13 -0500 Subject: [PATCH 454/513] i40iw: Fix double free of allocated_buffer Memory allocated for iwqp; iwqp->allocated_buffer is freed twice in the create_qp error path. Correct this by having it freed only once in i40iw_free_qp_resources(). Fixes: d37498417947 ("i40iw: add files for iwarp interface") Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com> Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 2360338877bf6..722e5af1e3d85 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -794,7 +794,6 @@ static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd, return &iwqp->ibqp; error: i40iw_free_qp_resources(iwdev, iwqp, qp_num); - kfree(mem); return ERR_PTR(err_code); } From 433c58139f6a7d59824aadd23d6c9cac1d4e6100 Mon Sep 17 00:00:00 2001 From: Mustafa Ismail <mustafa.ismail@intel.com> Date: Tue, 23 Aug 2016 17:24:56 -0500 Subject: [PATCH 455/513] i40iw: Avoid writing to freed memory iwpbl->iwmr points to the structure that contains iwpbl, which is iwmr. Setting this to NULL would result in writing to freed memory. So just free iwmr, and return. Fixes: d37498417947 ("i40iw: add files for iwarp interface") Reported-by: Stefan Assmann <sassmann@redhat.com> Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com> Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 722e5af1e3d85..6329c971c22fc 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -1925,8 +1925,7 @@ static int i40iw_dereg_mr(struct ib_mr *ib_mr) } if (iwpbl->pbl_allocated) i40iw_free_pble(iwdev->pble_rsrc, palloc); - kfree(iwpbl->iwmr); - iwpbl->iwmr = NULL; + kfree(iwmr); return 0; } From 3c199b4523c92dc5df027eeeffa657e2ccf453ab Mon Sep 17 00:00:00 2001 From: Selvin Xavier <selvin.xavier@broadcom.com> Date: Wed, 24 Aug 2016 01:17:41 -0400 Subject: [PATCH 456/513] RDMA/ocrdma: Fix the max_sge reported from FW Current driver is reporting wrong values for max_sge and max_sge_rd in query_device. This breaks the nfs rdma and iser in some device profiles. Fixing the driver to report correct values from FW. Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com> Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 14 +++++++------- drivers/infiniband/hw/ocrdma/ocrdma_sli.h | 12 ++++++++---- drivers/infiniband/hw/ocrdma/ocrdma_verbs.c | 4 ++-- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 16740dcb876bd..67fc0b6857e18 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1156,18 +1156,18 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev, attr->max_srq = (rsp->max_srq_rpir_qps & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_SRQ_OFFSET; - attr->max_send_sge = ((rsp->max_write_send_sge & + attr->max_send_sge = ((rsp->max_recv_send_sge & OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT); - attr->max_recv_sge = (rsp->max_write_send_sge & - OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK) >> - OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT; + attr->max_recv_sge = (rsp->max_recv_send_sge & + OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT; attr->max_srq_sge = (rsp->max_srq_rqe_sge & OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET; - attr->max_rdma_sge = (rsp->max_write_send_sge & - OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_MASK) >> - OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT; + attr->max_rdma_sge = (rsp->max_wr_rd_sge & + OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_MASK) >> + OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_SHIFT; attr->max_ord_per_qp = (rsp->max_ird_ord_per_qp & OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK) >> OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h index 0efc9662c6d87..37df4481bb8fe 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_sli.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_sli.h @@ -554,9 +554,9 @@ enum { OCRDMA_MBX_QUERY_CFG_L3_TYPE_MASK = 0x18, OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT = 0, OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK = 0xFFFF, - OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT = 16, - OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_MASK = 0xFFFF << - OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT, + OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT = 16, + OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_MASK = 0xFFFF << + OCRDMA_MBX_QUERY_CFG_MAX_RECV_SGE_SHIFT, OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_SHIFT = 0, OCRDMA_MBX_QUERY_CFG_MAX_ORD_PER_QP_MASK = 0xFFFF, @@ -612,6 +612,8 @@ enum { OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET = 0, OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_MASK = 0xFFFF << OCRDMA_MBX_QUERY_CFG_MAX_SRQ_SGE_OFFSET, + OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_SHIFT = 0, + OCRDMA_MBX_QUERY_CFG_MAX_RD_SGE_MASK = 0xFFFF, }; struct ocrdma_mbx_query_config { @@ -619,7 +621,7 @@ struct ocrdma_mbx_query_config { struct ocrdma_mbx_rsp rsp; u32 qp_srq_cq_ird_ord; u32 max_pd_ca_ack_delay; - u32 max_write_send_sge; + u32 max_recv_send_sge; u32 max_ird_ord_per_qp; u32 max_shared_ird_ord; u32 max_mr; @@ -639,6 +641,8 @@ struct ocrdma_mbx_query_config { u32 max_wqes_rqes_per_q; u32 max_cq_cqes_per_cq; u32 max_srq_rqe_sge; + u32 max_wr_rd_sge; + u32 ird_pgsz_num_pages; }; struct ocrdma_fw_ver_rsp { diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c index b1a3d91fe8b94..0aa854737e74e 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c @@ -125,8 +125,8 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_LOCAL_DMA_LKEY | IB_DEVICE_MEM_MGT_EXTENSIONS; - attr->max_sge = dev->attr.max_send_sge; - attr->max_sge_rd = attr->max_sge; + attr->max_sge = min(dev->attr.max_send_sge, dev->attr.max_recv_sge); + attr->max_sge_rd = dev->attr.max_rdma_sge; attr->max_cq = dev->attr.max_cq; attr->max_cqe = dev->attr.max_cqe; attr->max_mr = dev->attr.max_mr; From 9a035a40f7f3f6708b79224b86c5777a3334f7ea Mon Sep 17 00:00:00 2001 From: Jan Beulich <JBeulich@suse.com> Date: Mon, 15 Aug 2016 09:02:38 -0600 Subject: [PATCH 457/513] xenbus: don't look up transaction IDs for ordinary writes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This should really only be done for XS_TRANSACTION_END messages, or else at least some of the xenstore-* tools don't work anymore. Fixes: 0beef634b8 ("xenbus: don't BUG() on user mode induced condition") Reported-by: Richard Schütz <rschuetz@uni-koblenz.de> Cc: <stable@vger.kernel.org> Signed-off-by: Jan Beulich <jbeulich@suse.com> Tested-by: Richard Schütz <rschuetz@uni-koblenz.de> Signed-off-by: David Vrabel <david.vrabel@citrix.com> --- drivers/xen/xenbus/xenbus_dev_frontend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c index 7487971f9f788..c1010f018bd85 100644 --- a/drivers/xen/xenbus/xenbus_dev_frontend.c +++ b/drivers/xen/xenbus/xenbus_dev_frontend.c @@ -316,7 +316,7 @@ static int xenbus_write_transaction(unsigned msg_type, rc = -ENOMEM; goto out; } - } else { + } else if (msg_type == XS_TRANSACTION_END) { list_for_each_entry(trans, &u->transactions, list) if (trans->handle.id == u->u.msg.tx_id) break; From 55467dea2967259f21f4f854fc99d39cc5fea60e Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov <vkuznets@redhat.com> Date: Fri, 29 Jul 2016 11:06:48 +0200 Subject: [PATCH 458/513] xen: change the type of xen_vcpu_id to uint32_t We pass xen_vcpu_id mapping information to hypercalls which require uint32_t type so it would be cleaner to have it as uint32_t. The initializer to -1 can be dropped as we always do the mapping before using it and we never check the 'not set' value anyway. Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com> Signed-off-by: David Vrabel <david.vrabel@citrix.com> --- arch/arm/xen/enlighten.c | 2 +- arch/x86/xen/enlighten.c | 2 +- include/xen/xen-ops.h | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index b0b82f5ea3382..3d2cef6488ea2 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -50,7 +50,7 @@ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); static struct vcpu_info __percpu *xen_vcpu_info; /* Linux <-> Xen vCPU id mapping */ -DEFINE_PER_CPU(int, xen_vcpu_id) = -1; +DEFINE_PER_CPU(uint32_t, xen_vcpu_id); EXPORT_PER_CPU_SYMBOL(xen_vcpu_id); /* These are unused until we support booting "pre-ballooned" */ diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 8ffb089b19a56..b86ebb1a9a7f4 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -118,7 +118,7 @@ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); /* Linux <-> Xen vCPU id mapping */ -DEFINE_PER_CPU(int, xen_vcpu_id) = -1; +DEFINE_PER_CPU(uint32_t, xen_vcpu_id); EXPORT_PER_CPU_SYMBOL(xen_vcpu_id); enum xen_domain_type xen_domain_type = XEN_NATIVE; diff --git a/include/xen/xen-ops.h b/include/xen/xen-ops.h index 9a37c541822f6..b5486e6486075 100644 --- a/include/xen/xen-ops.h +++ b/include/xen/xen-ops.h @@ -9,8 +9,8 @@ DECLARE_PER_CPU(struct vcpu_info *, xen_vcpu); -DECLARE_PER_CPU(int, xen_vcpu_id); -static inline int xen_vcpu_nr(int cpu) +DECLARE_PER_CPU(uint32_t, xen_vcpu_id); +static inline uint32_t xen_vcpu_nr(int cpu) { return per_cpu(xen_vcpu_id, cpu); } From e1718d97aa88ea44a6a8f50ff464253dd0dacf01 Mon Sep 17 00:00:00 2001 From: Alex Deucher <alexander.deucher@amd.com> Date: Wed, 24 Aug 2016 12:31:36 -0400 Subject: [PATCH 459/513] drm/amdgpu: avoid a possible array overflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When looking up the connector type make sure the index is valid. Avoids a later crash if we read past the end of the array. Workaround for bug: https://bugs.freedesktop.org/show_bug.cgi?id=97460 Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 983175363b068..151422307d317 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -321,6 +321,12 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device * (le16_to_cpu(path->usConnObjectId) & OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT; + if (con_obj_id >= ARRAY_SIZE(object_connector_convert)) { + DRM_ERROR("invalid con_obj_id %d for device tag 0x%04x\n", + con_obj_id, le16_to_cpu(path->usDeviceTag)); + continue; + } + connector_type = object_connector_convert[con_obj_id]; connector_object_id = con_obj_id; From 611a1507fe8569ce1adab3abc982ea58ab559fb9 Mon Sep 17 00:00:00 2001 From: Alex Deucher <alexander.deucher@amd.com> Date: Wed, 24 Aug 2016 13:04:15 -0400 Subject: [PATCH 460/513] drm/amdgpu: skip TV/CV in display parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No asics supported by amdgpu support analog TV. Workaround for bug: https://bugs.freedesktop.org/show_bug.cgi?id=97460 Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 151422307d317..fe872b82e6191 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -321,6 +321,13 @@ bool amdgpu_atombios_get_connector_info_from_object_table(struct amdgpu_device * (le16_to_cpu(path->usConnObjectId) & OBJECT_TYPE_MASK) >> OBJECT_TYPE_SHIFT; + /* Skip TV/CV support */ + if ((le16_to_cpu(path->usDeviceTag) == + ATOM_DEVICE_TV1_SUPPORT) || + (le16_to_cpu(path->usDeviceTag) == + ATOM_DEVICE_CV_SUPPORT)) + continue; + if (con_obj_id >= ARRAY_SIZE(object_connector_convert)) { DRM_ERROR("invalid con_obj_id %d for device tag 0x%04x\n", con_obj_id, le16_to_cpu(path->usDeviceTag)); From 716b076ba4b273f5f85c97448c5110c6d21e73e6 Mon Sep 17 00:00:00 2001 From: Doug Ledford <dledford@redhat.com> Date: Wed, 24 Aug 2016 12:14:19 -0400 Subject: [PATCH 461/513] IB/srpt: Update sport->port_guid with each port refresh If port_guid is set with the default subnet_prefix, then we get a change event and run a port refresh, we don't update the port_guid. As a result, attempts to create a target device that uses the new subnet_prefix in the wwn will fail to find a match and be rejected by the ib_srpt driver. This makes it impossible to configure a port if it was initialized with a default subnet_prefix and later changed to any non-default subnet-prefix. Updating the port refresh task to always update the wwn based upon the current subnext_prefix solves this problem. Cc: Bart Van Assche <bart.vanassche@sandisk.com> Cc: nab@linux-iscsi.org Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/ulp/srpt/ib_srpt.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index dfa23b075a884..883bbfe08e0ef 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -522,6 +522,11 @@ static int srpt_refresh_port(struct srpt_port *sport) if (ret) goto err_query_port; + snprintf(sport->port_guid, sizeof(sport->port_guid), + "0x%016llx%016llx", + be64_to_cpu(sport->gid.global.subnet_prefix), + be64_to_cpu(sport->gid.global.interface_id)); + if (!sport->mad_agent) { memset(®_req, 0, sizeof(reg_req)); reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT; @@ -2548,10 +2553,6 @@ static void srpt_add_one(struct ib_device *device) sdev->device->name, i); goto err_ring; } - snprintf(sport->port_guid, sizeof(sport->port_guid), - "0x%016llx%016llx", - be64_to_cpu(sport->gid.global.subnet_prefix), - be64_to_cpu(sport->gid.global.interface_id)); } spin_lock(&srpt_dev_lock); From e57690fe009b2ab0cee8a57f53be634540e49c9d Mon Sep 17 00:00:00 2001 From: Jens Axboe <axboe@fb.com> Date: Wed, 24 Aug 2016 15:34:35 -0600 Subject: [PATCH 462/513] blk-mq: don't overwrite rq->mq_ctx We do this in a few places, if the CPU is offline. This isn't allowed, though, since on multi queue hardware, we can't just move a request from one software queue to another, if they map to different hardware queues. The request and tag isn't valid on another hardware queue. This can happen if plugging races with CPU offlining. But it does no harm, since it can only happen in the window where we are currently busy freezing the queue and flushing IO, in preparation for redoing the software <-> hardware queue mappings. Signed-off-by: Jens Axboe <axboe@fb.com> --- block/blk-mq.c | 55 +++++++++++++++----------------------------------- 1 file changed, 16 insertions(+), 39 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index e931a0e8e73df..729169d022fc5 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1036,10 +1036,11 @@ void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) EXPORT_SYMBOL(blk_mq_delay_queue); static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx, - struct blk_mq_ctx *ctx, struct request *rq, bool at_head) { + struct blk_mq_ctx *ctx = rq->mq_ctx; + trace_block_rq_insert(hctx->queue, rq); if (at_head) @@ -1053,20 +1054,16 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, { struct blk_mq_ctx *ctx = rq->mq_ctx; - __blk_mq_insert_req_list(hctx, ctx, rq, at_head); + __blk_mq_insert_req_list(hctx, rq, at_head); blk_mq_hctx_mark_pending(hctx, ctx); } void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue, - bool async) + bool async) { + struct blk_mq_ctx *ctx = rq->mq_ctx; struct request_queue *q = rq->q; struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *ctx = rq->mq_ctx, *current_ctx; - - current_ctx = blk_mq_get_ctx(q); - if (!cpu_online(ctx->cpu)) - rq->mq_ctx = ctx = current_ctx; hctx = q->mq_ops->map_queue(q, ctx->cpu); @@ -1076,8 +1073,6 @@ void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue, if (run_queue) blk_mq_run_hw_queue(hctx, async); - - blk_mq_put_ctx(current_ctx); } static void blk_mq_insert_requests(struct request_queue *q, @@ -1088,14 +1083,9 @@ static void blk_mq_insert_requests(struct request_queue *q, { struct blk_mq_hw_ctx *hctx; - struct blk_mq_ctx *current_ctx; trace_block_unplug(q, depth, !from_schedule); - current_ctx = blk_mq_get_ctx(q); - - if (!cpu_online(ctx->cpu)) - ctx = current_ctx; hctx = q->mq_ops->map_queue(q, ctx->cpu); /* @@ -1107,15 +1097,14 @@ static void blk_mq_insert_requests(struct request_queue *q, struct request *rq; rq = list_first_entry(list, struct request, queuelist); + BUG_ON(rq->mq_ctx != ctx); list_del_init(&rq->queuelist); - rq->mq_ctx = ctx; - __blk_mq_insert_req_list(hctx, ctx, rq, false); + __blk_mq_insert_req_list(hctx, rq, false); } blk_mq_hctx_mark_pending(hctx, ctx); spin_unlock(&ctx->lock); blk_mq_run_hw_queue(hctx, from_schedule); - blk_mq_put_ctx(current_ctx); } static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b) @@ -1630,16 +1619,17 @@ static int blk_mq_alloc_bitmap(struct blk_mq_ctxmap *bitmap, int node) return 0; } +/* + * 'cpu' is going away. splice any existing rq_list entries from this + * software queue to the hw queue dispatch list, and ensure that it + * gets run. + */ static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu) { - struct request_queue *q = hctx->queue; struct blk_mq_ctx *ctx; LIST_HEAD(tmp); - /* - * Move ctx entries to new CPU, if this one is going away. - */ - ctx = __blk_mq_get_ctx(q, cpu); + ctx = __blk_mq_get_ctx(hctx->queue, cpu); spin_lock(&ctx->lock); if (!list_empty(&ctx->rq_list)) { @@ -1651,24 +1641,11 @@ static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu) if (list_empty(&tmp)) return NOTIFY_OK; - ctx = blk_mq_get_ctx(q); - spin_lock(&ctx->lock); - - while (!list_empty(&tmp)) { - struct request *rq; - - rq = list_first_entry(&tmp, struct request, queuelist); - rq->mq_ctx = ctx; - list_move_tail(&rq->queuelist, &ctx->rq_list); - } - - hctx = q->mq_ops->map_queue(q, ctx->cpu); - blk_mq_hctx_mark_pending(hctx, ctx); - - spin_unlock(&ctx->lock); + spin_lock(&hctx->lock); + list_splice_tail_init(&tmp, &hctx->dispatch); + spin_unlock(&hctx->lock); blk_mq_run_hw_queue(hctx, true); - blk_mq_put_ctx(ctx); return NOTIFY_OK; } From 0e87e58bf60edb6bb28e493c7a143f41b091a5e5 Mon Sep 17 00:00:00 2001 From: Jens Axboe <axboe@fb.com> Date: Wed, 24 Aug 2016 15:38:01 -0600 Subject: [PATCH 463/513] blk-mq: improve warning for running a queue on the wrong CPU __blk_mq_run_hw_queue() currently warns if we are running the queue on a CPU that isn't set in its mask. However, this can happen if a CPU is being offlined, and the workqueue handling will place the work on CPU0 instead. Improve the warning so that it only triggers if the batch cpu in the hardware queue is currently online. If it triggers for that case, then it's indicative of a flow problem in blk-mq, so we want to retain it for that case. Signed-off-by: Jens Axboe <axboe@fb.com> --- block/blk-mq.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 729169d022fc5..13f5a6c1de768 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -793,11 +793,12 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) struct list_head *dptr; int queued; - WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)); - if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state))) return; + WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) && + cpu_online(hctx->next_cpu)); + hctx->run++; /* From 299f6230bc6d0ccd5f95bb0fb865d80a9c7d5ccc Mon Sep 17 00:00:00 2001 From: Mike Snitzer <snitzer@redhat.com> Date: Wed, 24 Aug 2016 21:12:58 -0400 Subject: [PATCH 464/513] dm flakey: fix reads to be issued if drop_writes configured v4.8-rc3 commit 99f3c90d0d ("dm flakey: error READ bios during the down_interval") overlooked the 'drop_writes' feature, which is meant to allow reads to be issued rather than errored, during the down_interval. Fixes: 99f3c90d0d ("dm flakey: error READ bios during the down_interval") Reported-by: Qu Wenruo <quwenruo@cn.fujitsu.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com> Cc: stable@vger.kernel.org --- drivers/md/dm-flakey.c | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 97e446d54a151..6a2e8dd44a1b8 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -289,15 +289,13 @@ static int flakey_map(struct dm_target *ti, struct bio *bio) pb->bio_submitted = true; /* - * Map reads as normal only if corrupt_bio_byte set. + * Error reads if neither corrupt_bio_byte or drop_writes are set. + * Otherwise, flakey_end_io() will decide if the reads should be modified. */ if (bio_data_dir(bio) == READ) { - /* If flags were specified, only corrupt those that match. */ - if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) && - all_corrupt_bio_flags_match(bio, fc)) - goto map_bio; - else + if (!fc->corrupt_bio_byte && !test_bit(DROP_WRITES, &fc->flags)) return -EIO; + goto map_bio; } /* @@ -334,14 +332,21 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, int error) struct flakey_c *fc = ti->private; struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); - /* - * Corrupt successful READs while in down state. - */ if (!error && pb->bio_submitted && (bio_data_dir(bio) == READ)) { - if (fc->corrupt_bio_byte) + if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) && + all_corrupt_bio_flags_match(bio, fc)) { + /* + * Corrupt successful matching READs while in down state. + */ corrupt_bio_data(bio, fc); - else + + } else if (!test_bit(DROP_WRITES, &fc->flags)) { + /* + * Error read during the down_interval if drop_writes + * wasn't configured. + */ return -EIO; + } } return error; From 9c5a559d9495bba6e5b6c5ee4e8e2f2b71088684 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen <heinzm@redhat.com> Date: Tue, 23 Aug 2016 21:17:48 +0200 Subject: [PATCH 465/513] dm log: fix unitialized bio operation flags Commit e6047149db ("dm: use bio op accessors") switched DM over to using bio_set_op_attrs() but didn't take care to initialize lc->io_req.bi_op_flags in dm-log.c:rw_header(). This caused rw_header()'s call to dm_io() to make bio->bi_op_flags be uninitialized in dm-io.c:do_region(), which ultimately resulted in a SCSI BUG() in sd_init_command(). Also, adjust rw_header() and its callers to use REQ_OP_{READ|WRITE}. Fixes: e6047149db ("dm: use bio op accessors") Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com> Reviewed-by: Shaun Tancheff <shaun.tancheff@seagate.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com> --- drivers/md/dm-log.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index 4ca2d1df5b44c..07fc1ad42ec57 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -291,9 +291,10 @@ static void header_from_disk(struct log_header_core *core, struct log_header_dis core->nr_regions = le64_to_cpu(disk->nr_regions); } -static int rw_header(struct log_c *lc, int rw) +static int rw_header(struct log_c *lc, int op) { - lc->io_req.bi_op = rw; + lc->io_req.bi_op = op; + lc->io_req.bi_op_flags = 0; return dm_io(&lc->io_req, 1, &lc->header_location, NULL); } @@ -316,7 +317,7 @@ static int read_header(struct log_c *log) { int r; - r = rw_header(log, READ); + r = rw_header(log, REQ_OP_READ); if (r) return r; @@ -630,7 +631,7 @@ static int disk_resume(struct dm_dirty_log *log) header_to_disk(&lc->header, lc->disk_header); /* write the new header */ - r = rw_header(lc, WRITE); + r = rw_header(lc, REQ_OP_WRITE); if (!r) { r = flush_header(lc); if (r) @@ -698,7 +699,7 @@ static int disk_flush(struct dm_dirty_log *log) log_clear_bit(lc, lc->clean_bits, i); } - r = rw_header(lc, WRITE); + r = rw_header(lc, REQ_OP_WRITE); if (r) fail_log_device(lc); else { From fd363bd417ddb6103564c69cfcbd92d9a7877431 Mon Sep 17 00:00:00 2001 From: Mark Rutland <mark.rutland@arm.com> Date: Wed, 24 Aug 2016 18:02:08 +0100 Subject: [PATCH 466/513] arm64: avoid TLB conflict with CONFIG_RANDOMIZE_BASE When CONFIG_RANDOMIZE_BASE is selected, we modify the page tables to remap the kernel at a newly-chosen VA range. We do this with the MMU disabled, but do not invalidate TLBs prior to re-enabling the MMU with the new tables. Thus the old mappings entries may still live in TLBs, and we risk violating Break-Before-Make requirements, leading to TLB conflicts and/or other issues. We invalidate TLBs when we uninsall the idmap in early setup code, but prior to this we are subject to issues relating to the Break-Before-Make violation. Avoid these issues by invalidating the TLBs before the new mappings can be used by the hardware. Fixes: f80fb3a3d508 ("arm64: add support for kernel ASLR") Cc: <stable@vger.kernel.org> # 4.6+ Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> Acked-by: Will Deacon <will.deacon@arm.com> Signed-off-by: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com> --- arch/arm64/kernel/head.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index b77f58355da11..3e7b050e99dce 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -757,6 +757,9 @@ ENTRY(__enable_mmu) isb bl __create_page_tables // recreate kernel mapping + tlbi vmalle1 // Remove any stale TLB entries + dsb nsh + msr sctlr_el1, x19 // re-enable the MMU isb ic iallu // flush instructions fetched From 1c1ea4f781db9f754842b9c31d1eff400d17cddc Mon Sep 17 00:00:00 2001 From: Liu Bo <bo.li.liu@oracle.com> Date: Tue, 19 Jul 2016 15:36:05 -0700 Subject: [PATCH 467/513] Btrfs: fix memory leak of reloc_root When some critical errors occur and FS would be flipped into RO, if we have an on-going balance, we can end up with a memory leak of root->reloc_root since btrfs_drop_snapshots() bails out without freeing reloc_root at the very early start. However, we're not able to free reloc_root in btrfs_drop_snapshots() because its caller, merge_reloc_roots(), still needs to access it to cleanup reloc_root's rbtree. This makes us free reloc_root when we're going to free fs/file roots. Signed-off-by: Liu Bo <bo.li.liu@oracle.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Chris Mason <clm@fb.com> --- fs/btrfs/disk-io.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index ff2362dca91aa..4a34c9fcc1a7d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3744,8 +3744,15 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, if (btrfs_root_refs(&root->root_item) == 0) synchronize_srcu(&fs_info->subvol_srcu); - if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) + if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { btrfs_free_log(NULL, root); + if (root->reloc_root) { + free_extent_buffer(root->reloc_root->node); + free_extent_buffer(root->reloc_root->commit_root); + btrfs_put_fs_root(root->reloc_root); + root->reloc_root = NULL; + } + } if (root->free_ino_pinned) __btrfs_remove_free_space_cache(root->free_ino_pinned); From d8422ba334f9df16e071bc77707e55fd7f8446ae Mon Sep 17 00:00:00 2001 From: Qu Wenruo <quwenruo@cn.fujitsu.com> Date: Wed, 20 Jul 2016 15:04:18 +0800 Subject: [PATCH 468/513] btrfs: backref: Fix soft lockup in __merge_refs function When over 1000 file extents refers to one extent, find_parent_nodes() will be obviously slow, due to the O(n^2)~O(n^3) loops inside __merge_refs(). The following ftrace shows the cubic growth of execution time: 256 refs 5) + 91.768 us | __add_keyed_refs.isra.12 [btrfs](); 5) 1.447 us | __add_missing_keys.isra.13 [btrfs](); 5) ! 114.544 us | __merge_refs [btrfs](); 5) ! 136.399 us | __merge_refs [btrfs](); 512 refs 6) ! 279.859 us | __add_keyed_refs.isra.12 [btrfs](); 6) 3.164 us | __add_missing_keys.isra.13 [btrfs](); 6) ! 442.498 us | __merge_refs [btrfs](); 6) # 2091.073 us | __merge_refs [btrfs](); and 1024 refs 7) ! 368.683 us | __add_keyed_refs.isra.12 [btrfs](); 7) 4.810 us | __add_missing_keys.isra.13 [btrfs](); 7) # 2043.428 us | __merge_refs [btrfs](); 7) * 18964.23 us | __merge_refs [btrfs](); And sort them into the following char: (Unit: us) ------------------------------------------------------------------------ Trace function | 256 ref | 512 refs | 1024 refs | ------------------------------------------------------------------------ __add_keyed_refs | 91 | 249 | 368 | __add_missing_keys | 1 | 3 | 4 | __merge_refs 1st call | 114 | 442 | 2043 | __merge_refs 2nd call | 136 | 2091 | 18964 | ------------------------------------------------------------------------ We can see the that __add_keyed_refs() grows almost in linear behavior. And __add_missing_keys() in this case doesn't change much or takes much time. While for the 1st __merge_refs() it's square growth for the 2nd __merge_refs() call it's cubic growth. It's no doubt that merge_refs() will take a long long time to execute if the number of refs continues its grows. So add a cond_resced() into the loop of __merge_refs(). Although this will solve the problem of soft lockup, we need to use the new rb_tree based structure introduced by Lu Fengqi to really solve the long execution time. Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Chris Mason <clm@fb.com> --- fs/btrfs/backref.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 2b88439c2ee86..455a6b2fd5395 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -589,6 +589,7 @@ static void __merge_refs(struct list_head *head, int mode) list_del(&ref2->list); kmem_cache_free(btrfs_prelim_ref_cache, ref2); + cond_resched(); } } From f3bca8028bd934e96257b8bd1143e6474fe98465 Mon Sep 17 00:00:00 2001 From: Liu Bo <bo.li.liu@oracle.com> Date: Wed, 20 Jul 2016 17:33:44 -0700 Subject: [PATCH 469/513] Btrfs: add ASSERT for block group's memory leak This adds several ASSERT()' s to report memory leak of block group cache. Signed-off-by: Liu Bo <bo.li.liu@oracle.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Chris Mason <clm@fb.com> --- fs/btrfs/extent-tree.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c2b81b0d3fe08..e962b0e253248 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9942,6 +9942,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info) block_group->iref = 0; block_group->inode = NULL; spin_unlock(&block_group->lock); + ASSERT(block_group->io_ctl.inode == NULL); iput(inode); last = block_group->key.objectid + block_group->key.offset; btrfs_put_block_group(block_group); @@ -9999,6 +10000,10 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) free_excluded_extents(info->extent_root, block_group); btrfs_remove_free_space_cache(block_group); + ASSERT(list_empty(&block_group->dirty_list)); + ASSERT(list_empty(&block_group->io_list)); + ASSERT(list_empty(&block_group->bg_list)); + ASSERT(atomic_read(&block_group->count) == 1); btrfs_put_block_group(block_group); spin_lock(&info->block_group_cache_lock); From eecba891d38051ebf7f4af6394d188a5fd151a6a Mon Sep 17 00:00:00 2001 From: Alex Lyakas <alex@zadarastorage.com> Date: Sun, 6 Dec 2015 12:32:31 +0200 Subject: [PATCH 470/513] btrfs: flush_space: treat return value of do_chunk_alloc properly do_chunk_alloc returns 1 when it succeeds to allocate a new chunk. But flush_space will not convert this to 0, and will also return 1. As a result, reserve_metadata_bytes will think that flush_space failed, and may potentially return this value "1" to the caller (depends how reserve_metadata_bytes was called). The caller will also treat this as an error. For example, btrfs_block_rsv_refill does: int ret = -ENOSPC; ... ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); if (!ret) { block_rsv_add_bytes(block_rsv, num_bytes, 0); return 0; } return ret; So it will return -ENOSPC. Signed-off-by: Alex Lyakas <alex@zadarastorage.com> Reviewed-by: Josef Bacik <jbacik@fb.com> Reviewed-by: Liu Bo <bo.li.liu@oracle.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Chris Mason <clm@fb.com> --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e962b0e253248..d2cc16ff3e21d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4882,7 +4882,7 @@ static int flush_space(struct btrfs_root *root, btrfs_get_alloc_profile(root, 0), CHUNK_ALLOC_NO_FORCE); btrfs_end_transaction(trans, root); - if (ret == -ENOSPC) + if (ret > 0 || ret == -ENOSPC) ret = 0; break; case COMMIT_TRANS: From d2c609b834d62f1e91f1635a27dca29f7806d3d6 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney <jeffm@suse.com> Date: Mon, 15 Aug 2016 12:10:33 -0400 Subject: [PATCH 471/513] btrfs: properly track when rescan worker is running The qgroup_flags field is overloaded such that it reflects the on-disk status of qgroups and the runtime state. The BTRFS_QGROUP_STATUS_FLAG_RESCAN flag is used to indicate that a rescan operation is in progress, but if the file system is unmounted while a rescan is running, the rescan operation is paused. If the file system is then mounted read-only, the flag will still be present but the rescan operation will not have been resumed. When we go to umount, btrfs_qgroup_wait_for_completion will see the flag and interpret it to mean that the rescan worker is still running and will wait for a completion that will never come. This patch uses a separate flag to indicate when the worker is running. The locking and state surrounding the qgroup rescan worker needs a lot of attention beyond this patch but this is enough to avoid a hung umount. Cc: <stable@vger.kernel.org> # v4.4+ Signed-off-by; Jeff Mahoney <jeffm@suse.com> Reviewed-by: Qu Wenruo <quwenruo@cn.fujitsu.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Chris Mason <clm@fb.com> --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/qgroup.c | 9 ++++++++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f66a0ba9a2a97..acc6850a118f0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1028,6 +1028,7 @@ struct btrfs_fs_info { struct btrfs_workqueue *qgroup_rescan_workers; struct completion qgroup_rescan_completion; struct btrfs_work qgroup_rescan_work; + bool qgroup_rescan_running; /* protected by qgroup_rescan_lock */ /* filesystem state */ unsigned long fs_state; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 4a34c9fcc1a7d..f5b2a7fb45750 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2304,6 +2304,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info) fs_info->quota_enabled = 0; fs_info->pending_quota_state = 0; fs_info->qgroup_ulist = NULL; + fs_info->qgroup_rescan_running = false; mutex_init(&fs_info->qgroup_rescan_lock); } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 93ee1c18ef9d4..3fe295e2c84a1 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2303,6 +2303,10 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) int err = -ENOMEM; int ret = 0; + mutex_lock(&fs_info->qgroup_rescan_lock); + fs_info->qgroup_rescan_running = true; + mutex_unlock(&fs_info->qgroup_rescan_lock); + path = btrfs_alloc_path(); if (!path) goto out; @@ -2369,6 +2373,9 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) } done: + mutex_lock(&fs_info->qgroup_rescan_lock); + fs_info->qgroup_rescan_running = false; + mutex_unlock(&fs_info->qgroup_rescan_lock); complete_all(&fs_info->qgroup_rescan_completion); } @@ -2494,7 +2501,7 @@ int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info) mutex_lock(&fs_info->qgroup_rescan_lock); spin_lock(&fs_info->qgroup_lock); - running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN; + running = fs_info->qgroup_rescan_running; spin_unlock(&fs_info->qgroup_lock); mutex_unlock(&fs_info->qgroup_rescan_lock); From d06f23d6a947c9abae41dc46be69a56baf36f436 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney <jeffm@suse.com> Date: Mon, 8 Aug 2016 22:08:06 -0400 Subject: [PATCH 472/513] btrfs: waiting on qgroup rescan should not always be interruptible We wait on qgroup rescan completion in three places: file system shutdown, the quota disable ioctl, and the rescan wait ioctl. If the user sends a signal while we're waiting, we continue happily along. This is expected behavior for the rescan wait ioctl. It's racy in the shutdown path but mostly works due to other unrelated synchronization points. In the quota disable path, it Oopses the kernel pretty much immediately. Cc: <stable@vger.kernel.org> # v4.4+ Signed-off-by: Jeff Mahoney <jeffm@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Chris Mason <clm@fb.com> --- fs/btrfs/disk-io.c | 2 +- fs/btrfs/ioctl.c | 2 +- fs/btrfs/qgroup.c | 12 +++++++++--- fs/btrfs/qgroup.h | 3 ++- 4 files changed, 13 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index f5b2a7fb45750..7857f64e1cae2 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3864,7 +3864,7 @@ void close_ctree(struct btrfs_root *root) smp_mb(); /* wait for the qgroup rescan worker to stop */ - btrfs_qgroup_wait_for_completion(fs_info); + btrfs_qgroup_wait_for_completion(fs_info, false); /* wait for the uuid_scan task to finish */ down(&fs_info->uuid_tree_rescan_sem); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 14ed1e9e6bc83..b2a2da5893af5 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -5084,7 +5084,7 @@ static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - return btrfs_qgroup_wait_for_completion(root->fs_info); + return btrfs_qgroup_wait_for_completion(root->fs_info, true); } static long _btrfs_ioctl_set_received_subvol(struct file *file, diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 3fe295e2c84a1..13eb6a7a4db1d 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -995,7 +995,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, goto out; fs_info->quota_enabled = 0; fs_info->pending_quota_state = 0; - btrfs_qgroup_wait_for_completion(fs_info); + btrfs_qgroup_wait_for_completion(fs_info, false); spin_lock(&fs_info->qgroup_lock); quota_root = fs_info->quota_root; fs_info->quota_root = NULL; @@ -2494,7 +2494,8 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) return 0; } -int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info) +int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, + bool interruptible) { int running; int ret = 0; @@ -2505,9 +2506,14 @@ int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info) spin_unlock(&fs_info->qgroup_lock); mutex_unlock(&fs_info->qgroup_rescan_lock); - if (running) + if (!running) + return 0; + + if (interruptible) ret = wait_for_completion_interruptible( &fs_info->qgroup_rescan_completion); + else + wait_for_completion(&fs_info->qgroup_rescan_completion); return ret; } diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index 710887c06aaf4..af3e5578cad75 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -46,7 +46,8 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info); void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info); -int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info); +int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info, + bool interruptible); int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 src, u64 dst); int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, From cb93b52cc005ba0e470845b519c662e661d5113c Mon Sep 17 00:00:00 2001 From: Qu Wenruo <quwenruo@cn.fujitsu.com> Date: Mon, 15 Aug 2016 10:36:50 +0800 Subject: [PATCH 473/513] btrfs: qgroup: Refactor btrfs_qgroup_insert_dirty_extent() Refactor btrfs_qgroup_insert_dirty_extent() function, to two functions: 1. btrfs_qgroup_insert_dirty_extent_nolock() Almost the same with original code. For delayed_ref usage, which has delayed refs locked. Change the return value type to int, since caller never needs the pointer, but only needs to know if they need to free the allocated memory. 2. btrfs_qgroup_insert_dirty_extent() The more encapsulated version. Will do the delayed_refs lock, memory allocation, quota enabled check and other things. The original design is to keep exported functions to minimal, but since more btrfs hacks exposed, like replacing path in balance, we need to record dirty extents manually, so we have to add such functions. Also, add comment for both functions, to info developers how to keep qgroup correct when doing hacks. Cc: Mark Fasheh <mfasheh@suse.de> Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com> Reviewed-and-Tested-by: Goldwyn Rodrigues <rgoldwyn@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Chris Mason <clm@fb.com> --- fs/btrfs/delayed-ref.c | 7 ++----- fs/btrfs/extent-tree.c | 37 +++++-------------------------------- fs/btrfs/qgroup.c | 41 +++++++++++++++++++++++++++++++++++------ fs/btrfs/qgroup.h | 33 +++++++++++++++++++++++++++++---- 4 files changed, 71 insertions(+), 47 deletions(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index d9ddcfc18c91f..ac02e041464bc 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -541,7 +541,6 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, struct btrfs_delayed_ref_head *existing; struct btrfs_delayed_ref_head *head_ref = NULL; struct btrfs_delayed_ref_root *delayed_refs; - struct btrfs_qgroup_extent_record *qexisting; int count_mod = 1; int must_insert_reserved = 0; @@ -606,10 +605,8 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, qrecord->num_bytes = num_bytes; qrecord->old_roots = NULL; - qexisting = btrfs_qgroup_insert_dirty_extent(fs_info, - delayed_refs, - qrecord); - if (qexisting) + if(btrfs_qgroup_insert_dirty_extent_nolock(fs_info, + delayed_refs, qrecord)) kfree(qrecord); } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d2cc16ff3e21d..d10872748c43e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8521,35 +8521,6 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans, wc->reada_slot = slot; } -/* - * These may not be seen by the usual inc/dec ref code so we have to - * add them here. - */ -static int record_one_subtree_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 bytenr, - u64 num_bytes) -{ - struct btrfs_qgroup_extent_record *qrecord; - struct btrfs_delayed_ref_root *delayed_refs; - - qrecord = kmalloc(sizeof(*qrecord), GFP_NOFS); - if (!qrecord) - return -ENOMEM; - - qrecord->bytenr = bytenr; - qrecord->num_bytes = num_bytes; - qrecord->old_roots = NULL; - - delayed_refs = &trans->transaction->delayed_refs; - spin_lock(&delayed_refs->lock); - if (btrfs_qgroup_insert_dirty_extent(trans->fs_info, - delayed_refs, qrecord)) - kfree(qrecord); - spin_unlock(&delayed_refs->lock); - - return 0; -} - static int account_leaf_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *eb) @@ -8583,7 +8554,8 @@ static int account_leaf_items(struct btrfs_trans_handle *trans, num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); - ret = record_one_subtree_extent(trans, root, bytenr, num_bytes); + ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info, + bytenr, num_bytes, GFP_NOFS); if (ret) return ret; } @@ -8732,8 +8704,9 @@ static int account_shared_subtree(struct btrfs_trans_handle *trans, btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); path->locks[level] = BTRFS_READ_LOCK_BLOCKING; - ret = record_one_subtree_extent(trans, root, child_bytenr, - root->nodesize); + ret = btrfs_qgroup_insert_dirty_extent(trans, + root->fs_info, child_bytenr, + root->nodesize, GFP_NOFS); if (ret) goto out; } diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 13eb6a7a4db1d..8db2e29fdcf41 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1453,10 +1453,9 @@ int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, return ret; } -struct btrfs_qgroup_extent_record * -btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_root *delayed_refs, - struct btrfs_qgroup_extent_record *record) +int btrfs_qgroup_insert_dirty_extent_nolock(struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_qgroup_extent_record *record) { struct rb_node **p = &delayed_refs->dirty_extent_root.rb_node; struct rb_node *parent_node = NULL; @@ -1475,12 +1474,42 @@ btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info, else if (bytenr > entry->bytenr) p = &(*p)->rb_right; else - return entry; + return 1; } rb_link_node(&record->node, parent_node, p); rb_insert_color(&record->node, &delayed_refs->dirty_extent_root); - return NULL; + return 0; +} + +int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, + gfp_t gfp_flag) +{ + struct btrfs_qgroup_extent_record *record; + struct btrfs_delayed_ref_root *delayed_refs; + int ret; + + if (!fs_info->quota_enabled || bytenr == 0 || num_bytes == 0) + return 0; + if (WARN_ON(trans == NULL)) + return -EINVAL; + record = kmalloc(sizeof(*record), gfp_flag); + if (!record) + return -ENOMEM; + + delayed_refs = &trans->transaction->delayed_refs; + record->bytenr = bytenr; + record->num_bytes = num_bytes; + record->old_roots = NULL; + + spin_lock(&delayed_refs->lock); + ret = btrfs_qgroup_insert_dirty_extent_nolock(fs_info, delayed_refs, + record); + spin_unlock(&delayed_refs->lock); + if (ret > 0) + kfree(record); + return 0; } #define UPDATE_NEW 0 diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index af3e5578cad75..1bc64c864b626 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -64,10 +64,35 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info); struct btrfs_delayed_extent_op; int btrfs_qgroup_prepare_account_extents(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info); -struct btrfs_qgroup_extent_record * -btrfs_qgroup_insert_dirty_extent(struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_root *delayed_refs, - struct btrfs_qgroup_extent_record *record); +/* + * Insert one dirty extent record into @delayed_refs, informing qgroup to + * account that extent at commit trans time. + * + * No lock version, caller must acquire delayed ref lock and allocate memory. + * + * Return 0 for success insert + * Return >0 for existing record, caller can free @record safely. + * Error is not possible + */ +int btrfs_qgroup_insert_dirty_extent_nolock( + struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_qgroup_extent_record *record); + +/* + * Insert one dirty extent record into @delayed_refs, informing qgroup to + * account that extent at commit trans time. + * + * Better encapsulated version. + * + * Return 0 if the operation is done. + * Return <0 for error, like memory allocation failure or invalid parameter + * (NULL trans) + */ +int btrfs_qgroup_insert_dirty_extent(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, u64 num_bytes, + gfp_t gfp_flag); + int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, From 62b99540a1d91e46422f0e04de50fc723812c421 Mon Sep 17 00:00:00 2001 From: Qu Wenruo <quwenruo@cn.fujitsu.com> Date: Mon, 15 Aug 2016 10:36:51 +0800 Subject: [PATCH 474/513] btrfs: relocation: Fix leaking qgroups numbers on data extents This patch fixes a REGRESSION introduced in 4.2, caused by the big quota rework. When balancing data extents, qgroup will leak all its numbers for relocated data extents. The relocation is done in the following steps for data extents: 1) Create data reloc tree and inode 2) Copy all data extents to data reloc tree And commit transaction 3) Create tree reloc tree(special snapshot) for any related subvolumes 4) Replace file extent in tree reloc tree with new extents in data reloc tree And commit transaction 5) Merge tree reloc tree with original fs, by swapping tree blocks For 1)~4), since tree reloc tree and data reloc tree doesn't count to qgroup, everything is OK. But for 5), the swapping of tree blocks will only info qgroup to track metadata extents. If metadata extents contain file extents, qgroup number for file extents will get lost, leading to corrupted qgroup accounting. The fix is, before commit transaction of step 5), manually info qgroup to track all file extents in data reloc tree. Since at commit transaction time, the tree swapping is done, and qgroup will account these data extents correctly. Cc: Mark Fasheh <mfasheh@suse.de> Reported-by: Mark Fasheh <mfasheh@suse.de> Reported-by: Filipe Manana <fdmanana@gmail.com> Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com> Tested-by: Goldwyn Rodrigues <rgoldwyn@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Chris Mason <clm@fb.com> --- fs/btrfs/relocation.c | 109 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 103 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index b26a5aea41b4a..27480ef9813cd 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -31,6 +31,7 @@ #include "async-thread.h" #include "free-space-cache.h" #include "inode-map.h" +#include "qgroup.h" /* * backref_node, mapping_node and tree_block start with this @@ -3916,6 +3917,90 @@ int prepare_to_relocate(struct reloc_control *rc) return 0; } +/* + * Qgroup fixer for data chunk relocation. + * The data relocation is done in the following steps + * 1) Copy data extents into data reloc tree + * 2) Create tree reloc tree(special snapshot) for related subvolumes + * 3) Modify file extents in tree reloc tree + * 4) Merge tree reloc tree with original fs tree, by swapping tree blocks + * + * The problem is, data and tree reloc tree are not accounted to qgroup, + * and 4) will only info qgroup to track tree blocks change, not file extents + * in the tree blocks. + * + * The good news is, related data extents are all in data reloc tree, so we + * only need to info qgroup to track all file extents in data reloc tree + * before commit trans. + */ +static int qgroup_fix_relocated_data_extents(struct btrfs_trans_handle *trans, + struct reloc_control *rc) +{ + struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; + struct inode *inode = rc->data_inode; + struct btrfs_root *data_reloc_root = BTRFS_I(inode)->root; + struct btrfs_path *path; + struct btrfs_key key; + int ret = 0; + + if (!fs_info->quota_enabled) + return 0; + + /* + * Only for stage where we update data pointers the qgroup fix is + * valid. + * For MOVING_DATA stage, we will miss the timing of swapping tree + * blocks, and won't fix it. + */ + if (!(rc->stage == UPDATE_DATA_PTRS && rc->extents_found)) + return 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + key.objectid = btrfs_ino(inode); + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = 0; + + ret = btrfs_search_slot(NULL, data_reloc_root, &key, path, 0, 0); + if (ret < 0) + goto out; + + lock_extent(&BTRFS_I(inode)->io_tree, 0, (u64)-1); + while (1) { + struct btrfs_file_extent_item *fi; + + btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); + if (key.objectid > btrfs_ino(inode)) + break; + if (key.type != BTRFS_EXTENT_DATA_KEY) + goto next; + fi = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(path->nodes[0], fi) != + BTRFS_FILE_EXTENT_REG) + goto next; + ret = btrfs_qgroup_insert_dirty_extent(trans, fs_info, + btrfs_file_extent_disk_bytenr(path->nodes[0], fi), + btrfs_file_extent_disk_num_bytes(path->nodes[0], fi), + GFP_NOFS); + if (ret < 0) + break; +next: + ret = btrfs_next_item(data_reloc_root, path); + if (ret < 0) + break; + if (ret > 0) { + ret = 0; + break; + } + } + unlock_extent(&BTRFS_I(inode)->io_tree, 0 , (u64)-1); +out: + btrfs_free_path(path); + return ret; +} + static noinline_for_stack int relocate_block_group(struct reloc_control *rc) { struct rb_root blocks = RB_ROOT; @@ -4102,10 +4187,16 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) /* get rid of pinned extents */ trans = btrfs_join_transaction(rc->extent_root); - if (IS_ERR(trans)) + if (IS_ERR(trans)) { err = PTR_ERR(trans); - else - btrfs_commit_transaction(trans, rc->extent_root); + goto out_free; + } + err = qgroup_fix_relocated_data_extents(trans, rc); + if (err < 0) { + btrfs_abort_transaction(trans, err); + goto out_free; + } + btrfs_commit_transaction(trans, rc->extent_root); out_free: btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); btrfs_free_path(path); @@ -4468,10 +4559,16 @@ int btrfs_recover_relocation(struct btrfs_root *root) unset_reloc_control(rc); trans = btrfs_join_transaction(rc->extent_root); - if (IS_ERR(trans)) + if (IS_ERR(trans)) { err = PTR_ERR(trans); - else - err = btrfs_commit_transaction(trans, rc->extent_root); + goto out_free; + } + err = qgroup_fix_relocated_data_extents(trans, rc); + if (err < 0) { + btrfs_abort_transaction(trans, err); + goto out_free; + } + err = btrfs_commit_transaction(trans, rc->extent_root); out_free: kfree(rc); out: From df2c95f33e0a28b22509e4ee85365eedf32a1056 Mon Sep 17 00:00:00 2001 From: Qu Wenruo <quwenruo@cn.fujitsu.com> Date: Mon, 15 Aug 2016 10:36:52 +0800 Subject: [PATCH 475/513] btrfs: qgroup: Fix qgroup incorrectness caused by log replay When doing log replay at mount time(after power loss), qgroup will leak numbers of replayed data extents. The cause is almost the same of balance. So fix it by manually informing qgroup for owner changed extents. The bug can be detected by btrfs/119 test case. Cc: Mark Fasheh <mfasheh@suse.de> Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com> Reviewed-and-Tested-by: Goldwyn Rodrigues <rgoldwyn@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Chris Mason <clm@fb.com> --- fs/btrfs/tree-log.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index fff3f3efa4360..ffe92da81b8ac 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -27,6 +27,7 @@ #include "backref.h" #include "hash.h" #include "compression.h" +#include "qgroup.h" /* magic values for the inode_only field in btrfs_log_inode: * @@ -680,6 +681,21 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, ins.type = BTRFS_EXTENT_ITEM_KEY; offset = key->offset - btrfs_file_extent_offset(eb, item); + /* + * Manually record dirty extent, as here we did a shallow + * file extent item copy and skip normal backref update, + * but modifying extent tree all by ourselves. + * So need to manually record dirty extent for qgroup, + * as the owner of the file extent changed from log tree + * (doesn't affect qgroup) to fs/file tree(affects qgroup) + */ + ret = btrfs_qgroup_insert_dirty_extent(trans, root->fs_info, + btrfs_file_extent_disk_bytenr(eb, item), + btrfs_file_extent_disk_num_bytes(eb, item), + GFP_NOFS); + if (ret < 0) + goto out; + if (ins.objectid > 0) { u64 csum_start; u64 csum_end; From dcb40c196fc85c6dfb28456480e5a882e26f567d Mon Sep 17 00:00:00 2001 From: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com> Date: Mon, 25 Jul 2016 15:51:38 +0800 Subject: [PATCH 476/513] btrfs: use correct offset for reloc_inode in prealloc_file_extent_cluster() In prealloc_file_extent_cluster(), btrfs_check_data_free_space() uses wrong file offset for reloc_inode, it uses cluster->start and cluster->end, which indeed are extent's bytenr. The correct value should be cluster->[start|end] minus block group's start bytenr. start bytenr cluster->start | | extent | extent | ...| extent | |----------------------------------------------------------------| | block group reloc_inode | Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com> Reviewed-by: Josef Bacik <jbacik@fb.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Chris Mason <clm@fb.com> --- fs/btrfs/relocation.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 27480ef9813cd..71b4b70f56b95 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3038,12 +3038,14 @@ int prealloc_file_extent_cluster(struct inode *inode, u64 num_bytes; int nr = 0; int ret = 0; + u64 prealloc_start = cluster->start - offset; + u64 prealloc_end = cluster->end - offset; BUG_ON(cluster->start != cluster->boundary[0]); inode_lock(inode); - ret = btrfs_check_data_free_space(inode, cluster->start, - cluster->end + 1 - cluster->start); + ret = btrfs_check_data_free_space(inode, prealloc_start, + prealloc_end + 1 - prealloc_start); if (ret) goto out; @@ -3064,8 +3066,8 @@ int prealloc_file_extent_cluster(struct inode *inode, break; nr++; } - btrfs_free_reserved_data_space(inode, cluster->start, - cluster->end + 1 - cluster->start); + btrfs_free_reserved_data_space(inode, prealloc_start, + prealloc_end + 1 - prealloc_start); out: inode_unlock(inode); return ret; From 4824f1f412f75e9f84b9cecbde828e8f4699f82d Mon Sep 17 00:00:00 2001 From: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com> Date: Mon, 25 Jul 2016 15:51:39 +0800 Subject: [PATCH 477/513] btrfs: divide btrfs_update_reserved_bytes() into two functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch divides btrfs_update_reserved_bytes() into btrfs_add_reserved_bytes() and btrfs_free_reserved_bytes(), and next patch will extend btrfs_add_reserved_bytes()to fix some false ENOSPC error, please see later patch for detailed info. Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com> Reviewed-by: Josef Bacik <jbacik@fb.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Chris Mason <clm@fb.com> --- fs/btrfs/extent-tree.c | 97 +++++++++++++++++++++++++----------------- 1 file changed, 57 insertions(+), 40 deletions(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d10872748c43e..f1121db658785 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -104,9 +104,10 @@ static int find_next_key(struct btrfs_path *path, int level, struct btrfs_key *key); static void dump_space_info(struct btrfs_space_info *info, u64 bytes, int dump_block_groups); -static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, - u64 num_bytes, int reserve, - int delalloc); +static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 num_bytes, int reserve, int delalloc); +static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 num_bytes, int delalloc); static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, u64 num_bytes); int btrfs_pin_extent(struct btrfs_root *root, @@ -6497,19 +6498,14 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) } /** - * btrfs_update_reserved_bytes - update the block_group and space info counters + * btrfs_add_reserved_bytes - update the block_group and space info counters * @cache: The cache we are manipulating * @num_bytes: The number of bytes in question * @reserve: One of the reservation enums * @delalloc: The blocks are allocated for the delalloc write * - * This is called by the allocator when it reserves space, or by somebody who is - * freeing space that was never actually used on disk. For example if you - * reserve some space for a new leaf in transaction A and before transaction A - * commits you free that leaf, you call this with reserve set to 0 in order to - * clear the reservation. - * - * Metadata reservations should be called with RESERVE_ALLOC so we do the proper + * This is called by the allocator when it reserves space. Metadata + * reservations should be called with RESERVE_ALLOC so we do the proper * ENOSPC accounting. For data we handle the reservation through clearing the * delalloc bits in the io_tree. We have to do this since we could end up * allocating less disk space for the amount of data we have reserved in the @@ -6519,44 +6515,65 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) * make the reservation and return -EAGAIN, otherwise this function always * succeeds. */ -static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, - u64 num_bytes, int reserve, int delalloc) +static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 num_bytes, int reserve, int delalloc) { struct btrfs_space_info *space_info = cache->space_info; int ret = 0; spin_lock(&space_info->lock); spin_lock(&cache->lock); - if (reserve != RESERVE_FREE) { - if (cache->ro) { - ret = -EAGAIN; - } else { - cache->reserved += num_bytes; - space_info->bytes_reserved += num_bytes; - if (reserve == RESERVE_ALLOC) { - trace_btrfs_space_reservation(cache->fs_info, - "space_info", space_info->flags, - num_bytes, 0); - space_info->bytes_may_use -= num_bytes; - } - - if (delalloc) - cache->delalloc_bytes += num_bytes; - } + if (cache->ro) { + ret = -EAGAIN; } else { - if (cache->ro) - space_info->bytes_readonly += num_bytes; - cache->reserved -= num_bytes; - space_info->bytes_reserved -= num_bytes; + cache->reserved += num_bytes; + space_info->bytes_reserved += num_bytes; + if (reserve == RESERVE_ALLOC) { + trace_btrfs_space_reservation(cache->fs_info, + "space_info", space_info->flags, + num_bytes, 0); + space_info->bytes_may_use -= num_bytes; + } if (delalloc) - cache->delalloc_bytes -= num_bytes; + cache->delalloc_bytes += num_bytes; } spin_unlock(&cache->lock); spin_unlock(&space_info->lock); return ret; } +/** + * btrfs_free_reserved_bytes - update the block_group and space info counters + * @cache: The cache we are manipulating + * @num_bytes: The number of bytes in question + * @delalloc: The blocks are allocated for the delalloc write + * + * This is called by somebody who is freeing space that was never actually used + * on disk. For example if you reserve some space for a new leaf in transaction + * A and before transaction A commits you free that leaf, you call this with + * reserve set to 0 in order to clear the reservation. + */ + +static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache, + u64 num_bytes, int delalloc) +{ + struct btrfs_space_info *space_info = cache->space_info; + int ret = 0; + + spin_lock(&space_info->lock); + spin_lock(&cache->lock); + if (cache->ro) + space_info->bytes_readonly += num_bytes; + cache->reserved -= num_bytes; + space_info->bytes_reserved -= num_bytes; + + if (delalloc) + cache->delalloc_bytes -= num_bytes; + spin_unlock(&cache->lock); + spin_unlock(&space_info->lock); + return ret; +} void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -7191,7 +7208,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); btrfs_add_free_space(cache, buf->start, buf->len); - btrfs_update_reserved_bytes(cache, buf->len, RESERVE_FREE, 0); + btrfs_free_reserved_bytes(cache, buf->len, 0); btrfs_put_block_group(cache); trace_btrfs_reserved_extent_free(root, buf->start, buf->len); pin = 0; @@ -7763,8 +7780,8 @@ static noinline int find_free_extent(struct btrfs_root *orig_root, search_start - offset); BUG_ON(offset > search_start); - ret = btrfs_update_reserved_bytes(block_group, num_bytes, - alloc_type, delalloc); + ret = btrfs_add_reserved_bytes(block_group, num_bytes, + alloc_type, delalloc); if (ret == -EAGAIN) { btrfs_add_free_space(block_group, offset, num_bytes); goto loop; @@ -7995,7 +8012,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root, if (btrfs_test_opt(root->fs_info, DISCARD)) ret = btrfs_discard_extent(root, start, len, NULL); btrfs_add_free_space(cache, start, len); - btrfs_update_reserved_bytes(cache, len, RESERVE_FREE, delalloc); + btrfs_free_reserved_bytes(cache, len, delalloc); trace_btrfs_reserved_extent_free(root, start, len); } @@ -8223,8 +8240,8 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, if (!block_group) return -EINVAL; - ret = btrfs_update_reserved_bytes(block_group, ins->offset, - RESERVE_ALLOC_NO_ACCOUNT, 0); + ret = btrfs_add_reserved_bytes(block_group, ins->offset, + RESERVE_ALLOC_NO_ACCOUNT, 0); BUG_ON(ret); /* logic error */ ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 0, owner, offset, ins, 1); From 18513091af9483ba84328d42092bd4d42a3c958f Mon Sep 17 00:00:00 2001 From: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com> Date: Mon, 25 Jul 2016 15:51:40 +0800 Subject: [PATCH 478/513] btrfs: update btrfs_space_info's bytes_may_use timely This patch can fix some false ENOSPC errors, below test script can reproduce one false ENOSPC error: #!/bin/bash dd if=/dev/zero of=fs.img bs=$((1024*1024)) count=128 dev=$(losetup --show -f fs.img) mkfs.btrfs -f -M $dev mkdir /tmp/mntpoint mount $dev /tmp/mntpoint cd /tmp/mntpoint xfs_io -f -c "falloc 0 $((64*1024*1024))" testfile Above script will fail for ENOSPC reason, but indeed fs still has free space to satisfy this request. Please see call graph: btrfs_fallocate() |-> btrfs_alloc_data_chunk_ondemand() | bytes_may_use += 64M |-> btrfs_prealloc_file_range() |-> btrfs_reserve_extent() |-> btrfs_add_reserved_bytes() | alloc_type is RESERVE_ALLOC_NO_ACCOUNT, so it does not | change bytes_may_use, and bytes_reserved += 64M. Now | bytes_may_use + bytes_reserved == 128M, which is greater | than btrfs_space_info's total_bytes, false enospc occurs. | Note, the bytes_may_use decrease operation will be done in | end of btrfs_fallocate(), which is too late. Here is another simple case for buffered write: CPU 1 | CPU 2 | |-> cow_file_range() |-> __btrfs_buffered_write() |-> btrfs_reserve_extent() | | | | | | | | | ..... | |-> btrfs_check_data_free_space() | | | | |-> extent_clear_unlock_delalloc() | In CPU 1, btrfs_reserve_extent()->find_free_extent()-> btrfs_add_reserved_bytes() do not decrease bytes_may_use, the decrease operation will be delayed to be done in extent_clear_unlock_delalloc(). Assume in this case, btrfs_reserve_extent() reserved 128MB data, CPU2's btrfs_check_data_free_space() tries to reserve 100MB data space. If 100MB > data_sinfo->total_bytes - data_sinfo->bytes_used - data_sinfo->bytes_reserved - data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - data_sinfo->bytes_may_use btrfs_check_data_free_space() will try to allcate new data chunk or call btrfs_start_delalloc_roots(), or commit current transaction in order to reserve some free space, obviously a lot of work. But indeed it's not necessary as long as decreasing bytes_may_use timely, we still have free space, decreasing 128M from bytes_may_use. To fix this issue, this patch chooses to update bytes_may_use for both data and metadata in btrfs_add_reserved_bytes(). For compress path, real extent length may not be equal to file content length, so introduce a ram_bytes argument for btrfs_reserve_extent(), find_free_extent() and btrfs_add_reserved_bytes(), it's becasue bytes_may_use is increased by file content length. Then compress path can update bytes_may_use correctly. Also now we can discard RESERVE_ALLOC_NO_ACCOUNT, RESERVE_ALLOC and RESERVE_FREE. As we know, usually EXTENT_DO_ACCOUNTING is used for error path. In run_delalloc_nocow(), for inode marked as NODATACOW or extent marked as PREALLOC, we also need to update bytes_may_use, but can not pass EXTENT_DO_ACCOUNTING, because it also clears metadata reservation, so here we introduce EXTENT_CLEAR_DATA_RESV flag to indicate btrfs_clear_bit_hook() to update btrfs_space_info's bytes_may_use. Meanwhile __btrfs_prealloc_file_range() will call btrfs_free_reserved_data_space() internally for both sucessful and failed path, btrfs_prealloc_file_range()'s callers does not need to call btrfs_free_reserved_data_space() any more. Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com> Reviewed-by: Josef Bacik <jbacik@fb.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Chris Mason <clm@fb.com> --- fs/btrfs/ctree.h | 2 +- fs/btrfs/extent-tree.c | 56 ++++++++++++++---------------------------- fs/btrfs/extent_io.h | 1 + fs/btrfs/file.c | 26 +++++++++++--------- fs/btrfs/inode-map.c | 3 +-- fs/btrfs/inode.c | 37 ++++++++++++++++++++-------- fs/btrfs/relocation.c | 11 +++++++-- 7 files changed, 73 insertions(+), 63 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index acc6850a118f0..09cdff0d58e87 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2579,7 +2579,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 root_objectid, u64 owner, u64 offset, struct btrfs_key *ins); -int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes, +int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes, u64 min_alloc_size, u64 empty_size, u64 hint_byte, struct btrfs_key *ins, int is_data, int delalloc); int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index f1121db658785..133c93b0a0f1d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -60,21 +60,6 @@ enum { CHUNK_ALLOC_FORCE = 2, }; -/* - * Control how reservations are dealt with. - * - * RESERVE_FREE - freeing a reservation. - * RESERVE_ALLOC - allocating space and we need to update bytes_may_use for - * ENOSPC accounting - * RESERVE_ALLOC_NO_ACCOUNT - allocating space and we should not update - * bytes_may_use as the ENOSPC accounting is done elsewhere - */ -enum { - RESERVE_FREE = 0, - RESERVE_ALLOC = 1, - RESERVE_ALLOC_NO_ACCOUNT = 2, -}; - static int update_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 num_bytes, int alloc); @@ -105,7 +90,7 @@ static int find_next_key(struct btrfs_path *path, int level, static void dump_space_info(struct btrfs_space_info *info, u64 bytes, int dump_block_groups); static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, - u64 num_bytes, int reserve, int delalloc); + u64 ram_bytes, u64 num_bytes, int delalloc); static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache, u64 num_bytes, int delalloc); static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, @@ -3502,7 +3487,6 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group, dcs = BTRFS_DC_SETUP; else if (ret == -ENOSPC) set_bit(BTRFS_TRANS_CACHE_ENOSPC, &trans->transaction->flags); - btrfs_free_reserved_data_space(inode, 0, num_pages); out_put: iput(inode); @@ -6500,8 +6484,9 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) /** * btrfs_add_reserved_bytes - update the block_group and space info counters * @cache: The cache we are manipulating + * @ram_bytes: The number of bytes of file content, and will be same to + * @num_bytes except for the compress path. * @num_bytes: The number of bytes in question - * @reserve: One of the reservation enums * @delalloc: The blocks are allocated for the delalloc write * * This is called by the allocator when it reserves space. Metadata @@ -6516,7 +6501,7 @@ void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg) * succeeds. */ static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, - u64 num_bytes, int reserve, int delalloc) + u64 ram_bytes, u64 num_bytes, int delalloc) { struct btrfs_space_info *space_info = cache->space_info; int ret = 0; @@ -6528,13 +6513,11 @@ static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, } else { cache->reserved += num_bytes; space_info->bytes_reserved += num_bytes; - if (reserve == RESERVE_ALLOC) { - trace_btrfs_space_reservation(cache->fs_info, - "space_info", space_info->flags, - num_bytes, 0); - space_info->bytes_may_use -= num_bytes; - } + trace_btrfs_space_reservation(cache->fs_info, + "space_info", space_info->flags, + ram_bytes, 0); + space_info->bytes_may_use -= ram_bytes; if (delalloc) cache->delalloc_bytes += num_bytes; } @@ -7433,9 +7416,9 @@ btrfs_release_block_group(struct btrfs_block_group_cache *cache, * the free space extent currently. */ static noinline int find_free_extent(struct btrfs_root *orig_root, - u64 num_bytes, u64 empty_size, - u64 hint_byte, struct btrfs_key *ins, - u64 flags, int delalloc) + u64 ram_bytes, u64 num_bytes, u64 empty_size, + u64 hint_byte, struct btrfs_key *ins, + u64 flags, int delalloc) { int ret = 0; struct btrfs_root *root = orig_root->fs_info->extent_root; @@ -7447,8 +7430,6 @@ static noinline int find_free_extent(struct btrfs_root *orig_root, struct btrfs_space_info *space_info; int loop = 0; int index = __get_raid_index(flags); - int alloc_type = (flags & BTRFS_BLOCK_GROUP_DATA) ? - RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC; bool failed_cluster_refill = false; bool failed_alloc = false; bool use_cluster = true; @@ -7780,8 +7761,8 @@ static noinline int find_free_extent(struct btrfs_root *orig_root, search_start - offset); BUG_ON(offset > search_start); - ret = btrfs_add_reserved_bytes(block_group, num_bytes, - alloc_type, delalloc); + ret = btrfs_add_reserved_bytes(block_group, ram_bytes, + num_bytes, delalloc); if (ret == -EAGAIN) { btrfs_add_free_space(block_group, offset, num_bytes); goto loop; @@ -7953,7 +7934,7 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, up_read(&info->groups_sem); } -int btrfs_reserve_extent(struct btrfs_root *root, +int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes, u64 min_alloc_size, u64 empty_size, u64 hint_byte, struct btrfs_key *ins, int is_data, int delalloc) @@ -7965,8 +7946,8 @@ int btrfs_reserve_extent(struct btrfs_root *root, flags = btrfs_get_alloc_profile(root, is_data); again: WARN_ON(num_bytes < root->sectorsize); - ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins, - flags, delalloc); + ret = find_free_extent(root, ram_bytes, num_bytes, empty_size, + hint_byte, ins, flags, delalloc); if (!ret && !is_data) { btrfs_dec_block_group_reservations(root->fs_info, ins->objectid); @@ -7975,6 +7956,7 @@ int btrfs_reserve_extent(struct btrfs_root *root, num_bytes = min(num_bytes >> 1, ins->offset); num_bytes = round_down(num_bytes, root->sectorsize); num_bytes = max(num_bytes, min_alloc_size); + ram_bytes = num_bytes; if (num_bytes == min_alloc_size) final_tried = true; goto again; @@ -8241,7 +8223,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, return -EINVAL; ret = btrfs_add_reserved_bytes(block_group, ins->offset, - RESERVE_ALLOC_NO_ACCOUNT, 0); + ins->offset, 0); BUG_ON(ret); /* logic error */ ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 0, owner, offset, ins, 1); @@ -8385,7 +8367,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, if (IS_ERR(block_rsv)) return ERR_CAST(block_rsv); - ret = btrfs_reserve_extent(root, blocksize, blocksize, + ret = btrfs_reserve_extent(root, blocksize, blocksize, blocksize, empty_size, hint, &ins, 0, 0); if (ret) goto out_unuse; diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index c0c1c4fef6cea..b52ca5db01cb9 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -20,6 +20,7 @@ #define EXTENT_DAMAGED (1U << 14) #define EXTENT_NORESERVE (1U << 15) #define EXTENT_QGROUP_RESERVED (1U << 16) +#define EXTENT_CLEAR_DATA_RESV (1U << 17) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 5842423f8f47b..3391f2adf0c86 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2675,6 +2675,7 @@ static long btrfs_fallocate(struct file *file, int mode, alloc_start = round_down(offset, blocksize); alloc_end = round_up(offset + len, blocksize); + cur_offset = alloc_start; /* Make sure we aren't being give some crap mode */ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) @@ -2767,7 +2768,6 @@ static long btrfs_fallocate(struct file *file, int mode, /* First, check if we exceed the qgroup limit */ INIT_LIST_HEAD(&reserve_list); - cur_offset = alloc_start; while (1) { em = btrfs_get_extent(inode, NULL, 0, cur_offset, alloc_end - cur_offset, 0); @@ -2794,6 +2794,14 @@ static long btrfs_fallocate(struct file *file, int mode, last_byte - cur_offset); if (ret < 0) break; + } else { + /* + * Do not need to reserve unwritten extent for this + * range, free reserved data space first, otherwise + * it'll result in false ENOSPC error. + */ + btrfs_free_reserved_data_space(inode, cur_offset, + last_byte - cur_offset); } free_extent_map(em); cur_offset = last_byte; @@ -2811,6 +2819,9 @@ static long btrfs_fallocate(struct file *file, int mode, range->start, range->len, 1 << inode->i_blkbits, offset + len, &alloc_hint); + else + btrfs_free_reserved_data_space(inode, range->start, + range->len); list_del(&range->list); kfree(range); } @@ -2845,18 +2856,11 @@ static long btrfs_fallocate(struct file *file, int mode, unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, &cached_state, GFP_KERNEL); out: - /* - * As we waited the extent range, the data_rsv_map must be empty - * in the range, as written data range will be released from it. - * And for prealloacted extent, it will also be released when - * its metadata is written. - * So this is completely used as cleanup. - */ - btrfs_qgroup_free_data(inode, alloc_start, alloc_end - alloc_start); inode_unlock(inode); /* Let go of our reservation. */ - btrfs_free_reserved_data_space(inode, alloc_start, - alloc_end - alloc_start); + if (ret != 0) + btrfs_free_reserved_data_space(inode, alloc_start, + alloc_end - cur_offset); return ret; } diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index aa6fabaee72ed..359ee861b5a4b 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -495,10 +495,9 @@ int btrfs_save_ino_cache(struct btrfs_root *root, ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, prealloc, prealloc, &alloc_hint); if (ret) { - btrfs_delalloc_release_space(inode, 0, prealloc); + btrfs_delalloc_release_metadata(inode, prealloc); goto out_put; } - btrfs_free_reserved_data_space(inode, 0, prealloc); ret = btrfs_write_out_ino_cache(root, trans, path, inode); out_put: diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 29636624a4279..bcea20015d9ac 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -566,6 +566,8 @@ static noinline void compress_file_range(struct inode *inode, PAGE_SET_WRITEBACK | page_error_op | PAGE_END_WRITEBACK); + btrfs_free_reserved_data_space_noquota(inode, start, + end - start + 1); goto free_pages_out; } } @@ -742,7 +744,7 @@ static noinline void submit_compressed_extents(struct inode *inode, lock_extent(io_tree, async_extent->start, async_extent->start + async_extent->ram_size - 1); - ret = btrfs_reserve_extent(root, + ret = btrfs_reserve_extent(root, async_extent->ram_size, async_extent->compressed_size, async_extent->compressed_size, 0, alloc_hint, &ins, 1, 1); @@ -969,7 +971,8 @@ static noinline int cow_file_range(struct inode *inode, EXTENT_DEFRAG, PAGE_UNLOCK | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK); - + btrfs_free_reserved_data_space_noquota(inode, start, + end - start + 1); *nr_written = *nr_written + (end - start + PAGE_SIZE) / PAGE_SIZE; *page_started = 1; @@ -989,7 +992,7 @@ static noinline int cow_file_range(struct inode *inode, unsigned long op; cur_alloc_size = disk_num_bytes; - ret = btrfs_reserve_extent(root, cur_alloc_size, + ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size, root->sectorsize, 0, alloc_hint, &ins, 1, 1); if (ret < 0) @@ -1489,8 +1492,10 @@ static noinline int run_delalloc_nocow(struct inode *inode, extent_clear_unlock_delalloc(inode, cur_offset, cur_offset + num_bytes - 1, locked_page, EXTENT_LOCKED | - EXTENT_DELALLOC, PAGE_UNLOCK | - PAGE_SET_PRIVATE2); + EXTENT_DELALLOC | + EXTENT_CLEAR_DATA_RESV, + PAGE_UNLOCK | PAGE_SET_PRIVATE2); + if (!nolock && nocow) btrfs_end_write_no_snapshoting(root); cur_offset = extent_end; @@ -1807,7 +1812,9 @@ static void btrfs_clear_bit_hook(struct inode *inode, return; if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID - && do_list && !(state->state & EXTENT_NORESERVE)) + && do_list && !(state->state & EXTENT_NORESERVE) + && (*bits & (EXTENT_DO_ACCOUNTING | + EXTENT_CLEAR_DATA_RESV))) btrfs_free_reserved_data_space_noquota(inode, state->start, len); @@ -7252,7 +7259,7 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, int ret; alloc_hint = get_extent_allocation_hint(inode, start, len); - ret = btrfs_reserve_extent(root, len, root->sectorsize, 0, + ret = btrfs_reserve_extent(root, len, len, root->sectorsize, 0, alloc_hint, &ins, 1, 1); if (ret) return ERR_PTR(ret); @@ -7752,6 +7759,13 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, ret = PTR_ERR(em2); goto unlock_err; } + /* + * For inode marked NODATACOW or extent marked PREALLOC, + * use the existing or preallocated extent, so does not + * need to adjust btrfs_space_info's bytes_may_use. + */ + btrfs_free_reserved_data_space_noquota(inode, + start, len); goto unlock; } } @@ -7786,7 +7800,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, i_size_write(inode, start + len); adjust_dio_outstanding_extents(inode, dio_data, len); - btrfs_free_reserved_data_space(inode, start, len); WARN_ON(dio_data->reserve < len); dio_data->reserve -= len; dio_data->unsubmitted_oe_range_end = start + len; @@ -10306,6 +10319,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, u64 last_alloc = (u64)-1; int ret = 0; bool own_trans = true; + u64 end = start + num_bytes - 1; if (trans) own_trans = false; @@ -10327,8 +10341,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, * sized chunks. */ cur_bytes = min(cur_bytes, last_alloc); - ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0, - *alloc_hint, &ins, 1, 0); + ret = btrfs_reserve_extent(root, cur_bytes, cur_bytes, + min_size, 0, *alloc_hint, &ins, 1, 0); if (ret) { if (own_trans) btrfs_end_transaction(trans, root); @@ -10414,6 +10428,9 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, if (own_trans) btrfs_end_transaction(trans, root); } + if (cur_offset < end) + btrfs_free_reserved_data_space(inode, cur_offset, + end - cur_offset + 1); return ret; } diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 71b4b70f56b95..8a2c2a07987b2 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3040,6 +3040,7 @@ int prealloc_file_extent_cluster(struct inode *inode, int ret = 0; u64 prealloc_start = cluster->start - offset; u64 prealloc_end = cluster->end - offset; + u64 cur_offset; BUG_ON(cluster->start != cluster->boundary[0]); inode_lock(inode); @@ -3049,6 +3050,7 @@ int prealloc_file_extent_cluster(struct inode *inode, if (ret) goto out; + cur_offset = prealloc_start; while (nr < cluster->nr) { start = cluster->boundary[nr] - offset; if (nr + 1 < cluster->nr) @@ -3058,16 +3060,21 @@ int prealloc_file_extent_cluster(struct inode *inode, lock_extent(&BTRFS_I(inode)->io_tree, start, end); num_bytes = end + 1 - start; + if (cur_offset < start) + btrfs_free_reserved_data_space(inode, cur_offset, + start - cur_offset); ret = btrfs_prealloc_file_range(inode, 0, start, num_bytes, num_bytes, end + 1, &alloc_hint); + cur_offset = end + 1; unlock_extent(&BTRFS_I(inode)->io_tree, start, end); if (ret) break; nr++; } - btrfs_free_reserved_data_space(inode, prealloc_start, - prealloc_end + 1 - prealloc_start); + if (cur_offset < prealloc_end) + btrfs_free_reserved_data_space(inode, cur_offset, + prealloc_end + 1 - cur_offset); out: inode_unlock(inode); return ret; From 9e7cc91a6d18a4973c6d2cc104871439c9e94f3d Mon Sep 17 00:00:00 2001 From: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com> Date: Mon, 1 Aug 2016 13:28:08 +0800 Subject: [PATCH 479/513] btrfs: fix fsfreeze hang caused by delayed iputs deal When running fstests generic/068, sometimes we got below deadlock: xfs_io D ffff8800331dbb20 0 6697 6693 0x00000080 ffff8800331dbb20 ffff88007acfc140 ffff880034d895c0 ffff8800331dc000 ffff880032d243e8 fffffffeffffffff ffff880032d24400 0000000000000001 ffff8800331dbb38 ffffffff816a9045 ffff880034d895c0 ffff8800331dbba8 Call Trace: [<ffffffff816a9045>] schedule+0x35/0x80 [<ffffffff816abab2>] rwsem_down_read_failed+0xf2/0x140 [<ffffffff8118f5e1>] ? __filemap_fdatawrite_range+0xd1/0x100 [<ffffffff8134f978>] call_rwsem_down_read_failed+0x18/0x30 [<ffffffffa06631fc>] ? btrfs_alloc_block_rsv+0x2c/0xb0 [btrfs] [<ffffffff810d32b5>] percpu_down_read+0x35/0x50 [<ffffffff81217dfc>] __sb_start_write+0x2c/0x40 [<ffffffffa067f5d5>] start_transaction+0x2a5/0x4d0 [btrfs] [<ffffffffa067f857>] btrfs_join_transaction+0x17/0x20 [btrfs] [<ffffffffa068ba34>] btrfs_evict_inode+0x3c4/0x5d0 [btrfs] [<ffffffff81230a1a>] evict+0xba/0x1a0 [<ffffffff812316b6>] iput+0x196/0x200 [<ffffffffa06851d0>] btrfs_run_delayed_iputs+0x70/0xc0 [btrfs] [<ffffffffa067f1d8>] btrfs_commit_transaction+0x928/0xa80 [btrfs] [<ffffffffa0646df0>] btrfs_freeze+0x30/0x40 [btrfs] [<ffffffff81218040>] freeze_super+0xf0/0x190 [<ffffffff81229275>] do_vfs_ioctl+0x4a5/0x5c0 [<ffffffff81003176>] ? do_audit_syscall_entry+0x66/0x70 [<ffffffff810038cf>] ? syscall_trace_enter_phase1+0x11f/0x140 [<ffffffff81229409>] SyS_ioctl+0x79/0x90 [<ffffffff81003c12>] do_syscall_64+0x62/0x110 [<ffffffff816acbe1>] entry_SYSCALL64_slow_path+0x25/0x25 >From this warning, freeze_super() already holds SB_FREEZE_FS, but btrfs_freeze() will call btrfs_commit_transaction() again, if btrfs_commit_transaction() finds that it has delayed iputs to handle, it'll start_transaction(), which will try to get SB_FREEZE_FS lock again, then deadlock occurs. The root cause is that in btrfs, sync_filesystem(sb) does not make sure all metadata is updated. There still maybe some codes adding delayed iputs, see below sample race window: CPU1 | CPU2 |-> freeze_super() | |-> sync_filesystem(sb); | | |-> cleaner_kthread() | | |-> btrfs_delete_unused_bgs() | | |-> btrfs_remove_chunk() | | |-> btrfs_remove_block_group() | | |-> btrfs_add_delayed_iput() | | |-> sb->s_writers.frozen = SB_FREEZE_FS; | |-> sb_wait_write(sb, SB_FREEZE_FS); | | acquire SB_FREEZE_FS lock. | | | |-> btrfs_freeze() | |-> btrfs_commit_transaction() | |-> btrfs_run_delayed_iputs() | | will handle delayed iputs, | | that means start_transaction() | | will be called, which will try | | to get SB_FREEZE_FS lock. | To fix this issue, introduce a "int fs_frozen" to record internally whether fs has been frozen. If fs has been frozen, we can not handle delayed iputs. Signed-off-by: Wang Xiaoguang <wangxg.fnst@cn.fujitsu.com> Reviewed-by: David Sterba <dsterba@suse.com> [ add comment to btrfs_freeze ] Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Chris Mason <clm@fb.com> --- fs/btrfs/ctree.h | 2 ++ fs/btrfs/disk-io.c | 1 + fs/btrfs/super.c | 16 ++++++++++++++++ fs/btrfs/transaction.c | 7 ++++++- 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 09cdff0d58e87..ec4154faab612 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1080,6 +1080,8 @@ struct btrfs_fs_info { struct list_head pinned_chunks; int creating_free_space_tree; + /* Used to record internally whether fs has been frozen */ + int fs_frozen; }; struct btrfs_subvolume_writers { diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7857f64e1cae2..17062223fac31 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2631,6 +2631,7 @@ int open_ctree(struct super_block *sb, atomic_set(&fs_info->qgroup_op_seq, 0); atomic_set(&fs_info->reada_works_cnt, 0); atomic64_set(&fs_info->tree_mod_seq, 0); + fs_info->fs_frozen = 0; fs_info->sb = sb; fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE; fs_info->metadata_ratio = 0; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 864ce334f696c..4071fe2bd0981 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -2241,6 +2241,13 @@ static int btrfs_freeze(struct super_block *sb) struct btrfs_trans_handle *trans; struct btrfs_root *root = btrfs_sb(sb)->tree_root; + root->fs_info->fs_frozen = 1; + /* + * We don't need a barrier here, we'll wait for any transaction that + * could be in progress on other threads (and do delayed iputs that + * we want to avoid on a frozen filesystem), or do the commit + * ourselves. + */ trans = btrfs_attach_transaction_barrier(root); if (IS_ERR(trans)) { /* no transaction, don't bother */ @@ -2251,6 +2258,14 @@ static int btrfs_freeze(struct super_block *sb) return btrfs_commit_transaction(trans, root); } +static int btrfs_unfreeze(struct super_block *sb) +{ + struct btrfs_root *root = btrfs_sb(sb)->tree_root; + + root->fs_info->fs_frozen = 0; + return 0; +} + static int btrfs_show_devname(struct seq_file *m, struct dentry *root) { struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); @@ -2299,6 +2314,7 @@ static const struct super_operations btrfs_super_ops = { .statfs = btrfs_statfs, .remount_fs = btrfs_remount, .freeze_fs = btrfs_freeze, + .unfreeze_fs = btrfs_unfreeze, }; static const struct file_operations btrfs_ctl_fops = { diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 9cca0a7219618..95d41919d034e 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -2278,8 +2278,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, kmem_cache_free(btrfs_trans_handle_cachep, trans); + /* + * If fs has been frozen, we can not handle delayed iputs, otherwise + * it'll result in deadlock about SB_FREEZE_FS. + */ if (current != root->fs_info->transaction_kthread && - current != root->fs_info->cleaner_kthread) + current != root->fs_info->cleaner_kthread && + !root->fs_info->fs_frozen) btrfs_run_delayed_iputs(root); return ret; From 28b737f6ede3661fe610937706c4a6f50e9ab769 Mon Sep 17 00:00:00 2001 From: Liu Bo <bo.li.liu@oracle.com> Date: Fri, 29 Jul 2016 11:09:50 -0700 Subject: [PATCH 480/513] Btrfs: clarify do_chunk_alloc()'s return value Function start_transaction() can return ERR_PTR(1) when flush is BTRFS_RESERVE_FLUSH_LIMIT, so the call graph is start_transaction (return ERR_PTR(1)) -> btrfs_block_rsv_add (return 1) -> reserve_metadata_bytes (return 1) -> flush_space (return 1) -> do_chunk_alloc (return 1) With BTRFS_RESERVE_FLUSH_LIMIT, if flush_space is already on the flush_state of ALLOC_CHUNK and it successfully allocates a new chunk, then instead of trying to reserve space again, reserve_metadata_bytes returns 1 immediately. Eventually the callers who call start_transaction() usually just do the IS_ERR() check which ERR_PTR(1) can pass, then it'll get a panic when dereferencing a pointer which is ERR_PTR(1). The following patch fixes the above problem. "btrfs: flush_space: treat return value of do_chunk_alloc properly" https://patchwork.kernel.org/patch/7778651/ This add comments to clarify do_chunk_alloc()'s return value. Signed-off-by: Liu Bo <bo.li.liu@oracle.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Chris Mason <clm@fb.com> --- fs/btrfs/extent-tree.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 133c93b0a0f1d..d5d3cfb1f66fd 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -4457,6 +4457,15 @@ void check_system_chunk(struct btrfs_trans_handle *trans, } } +/* + * If force is CHUNK_ALLOC_FORCE: + * - return 1 if it successfully allocates a chunk, + * - return errors including -ENOSPC otherwise. + * If force is NOT CHUNK_ALLOC_FORCE: + * - return 0 if it doesn't need to allocate a new chunk, + * - return 1 if it successfully allocates a chunk, + * - return errors including -ENOSPC otherwise. + */ static int do_chunk_alloc(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 flags, int force) { From 142388194191a3edc9ba01cfcfd8b691e0971fb2 Mon Sep 17 00:00:00 2001 From: Anand Jain <Anand.Jain@oracle.com> Date: Fri, 22 Jul 2016 06:04:53 +0800 Subject: [PATCH 481/513] btrfs: do not background blkdev_put() At the end of unmount/dev-delete, if the device exclusive open is not actually closed, then there might be a race with another program in the userland who is trying to open the device in exclusive mode and it may fail for eg: unmount /btrfs; fsck /dev/x btrfs dev del /dev/x /btrfs; fsck /dev/x so here background blkdev_put() is not a choice Signed-off-by: Anand Jain <Anand.Jain@oracle.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Chris Mason <clm@fb.com> --- fs/btrfs/volumes.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e217035d63df4..9a6f0dcb3e9c7 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -834,10 +834,6 @@ static void __free_device(struct work_struct *work) struct btrfs_device *device; device = container_of(work, struct btrfs_device, rcu_work); - - if (device->bdev) - blkdev_put(device->bdev, device->mode); - rcu_string_free(device->name); kfree(device); } @@ -852,6 +848,17 @@ static void free_device(struct rcu_head *head) schedule_work(&device->rcu_work); } +static void btrfs_close_bdev(struct btrfs_device *device) +{ + if (device->bdev && device->writeable) { + sync_blockdev(device->bdev); + invalidate_bdev(device->bdev); + } + + if (device->bdev) + blkdev_put(device->bdev, device->mode); +} + static void btrfs_close_one_device(struct btrfs_device *device) { struct btrfs_fs_devices *fs_devices = device->fs_devices; @@ -870,10 +877,7 @@ static void btrfs_close_one_device(struct btrfs_device *device) if (device->missing) fs_devices->missing_devices--; - if (device->bdev && device->writeable) { - sync_blockdev(device->bdev); - invalidate_bdev(device->bdev); - } + btrfs_close_bdev(device); new_device = btrfs_alloc_device(NULL, &device->devid, device->uuid); @@ -1932,6 +1936,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path, u64 devid) btrfs_sysfs_rm_device_link(root->fs_info->fs_devices, device); } + btrfs_close_bdev(device); + call_rcu(&device->rcu, free_device); num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1; @@ -2025,6 +2031,9 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, /* zero out the old super if it is writable */ btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str); } + + btrfs_close_bdev(srcdev); + call_rcu(&srcdev->rcu, free_device); /* @@ -2080,6 +2089,8 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, * the device_list_mutex lock. */ btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str); + + btrfs_close_bdev(tgtdev); call_rcu(&tgtdev->rcu, free_device); } From 187ee58c62c1d0d238d3dc4835869d33e1869906 Mon Sep 17 00:00:00 2001 From: Josef Bacik <jbacik@fb.com> Date: Thu, 18 Aug 2016 15:30:06 -0400 Subject: [PATCH 482/513] Btrfs: fix em leak in find_first_block_group We need to call free_extent_map() on the em we look up. Signed-off-by: Josef Bacik <jbacik@fb.com> Reviewed-by: Omar Sandoval <osandov@fb.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Chris Mason <clm@fb.com> --- fs/btrfs/extent-tree.c | 1 + 1 file changed, 1 insertion(+) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d5d3cfb1f66fd..60d4ae7ce9746 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9887,6 +9887,7 @@ static int find_first_block_group(struct btrfs_root *root, } else { ret = 0; } + free_extent_map(em); goto out; } path->slots[0]++; From 35bbb97fc898aeb874cb7c8b746f091caa359994 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney <jeffm@suse.com> Date: Wed, 17 Aug 2016 21:58:33 -0400 Subject: [PATCH 483/513] btrfs: don't create or leak aliased root while cleaning up orphans commit 909c3a22da3 (Btrfs: fix loading of orphan roots leading to BUG_ON) avoids the BUG_ON but can add an aliased root to the dead_roots list or leak the root. Since we've already been loading roots into the radix tree, we should use it before looking the root up on disk. Cc: <stable@vger.kernel.org> # 4.5 Signed-off-by: Jeff Mahoney <jeffm@suse.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Chris Mason <clm@fb.com> --- fs/btrfs/disk-io.c | 4 ++-- fs/btrfs/disk-io.h | 2 ++ fs/btrfs/root-tree.c | 27 ++++++++++++++++++--------- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 17062223fac31..edda47162752b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1624,8 +1624,8 @@ int btrfs_init_fs_root(struct btrfs_root *root) return ret; } -static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, - u64 root_id) +struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, + u64 root_id) { struct btrfs_root *root; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index c9d42c92da2b2..e0efe45e52823 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -68,6 +68,8 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info, struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, struct btrfs_key *location); int btrfs_init_fs_root(struct btrfs_root *root); +struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, + u64 root_id); int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info); diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 7fd7e1830cfe6..091296062456b 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -272,6 +272,23 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) root_key.objectid = key.offset; key.offset++; + /* + * The root might have been inserted already, as before we look + * for orphan roots, log replay might have happened, which + * triggers a transaction commit and qgroup accounting, which + * in turn reads and inserts fs roots while doing backref + * walking. + */ + root = btrfs_lookup_fs_root(tree_root->fs_info, + root_key.objectid); + if (root) { + WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, + &root->state)); + if (btrfs_root_refs(&root->root_item) == 0) + btrfs_add_dead_root(root); + continue; + } + root = btrfs_read_fs_root(tree_root, &root_key); err = PTR_ERR_OR_ZERO(root); if (err && err != -ENOENT) { @@ -310,16 +327,8 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state); err = btrfs_insert_fs_root(root->fs_info, root); - /* - * The root might have been inserted already, as before we look - * for orphan roots, log replay might have happened, which - * triggers a transaction commit and qgroup accounting, which - * in turn reads and inserts fs roots while doing backref - * walking. - */ - if (err == -EEXIST) - err = 0; if (err) { + BUG_ON(err == -EEXIST); btrfs_free_fs_root(root); break; } From 053ab70f0604224c7893b43f9d9d5efa283580d6 Mon Sep 17 00:00:00 2001 From: Liu Bo <bo.li.liu@oracle.com> Date: Tue, 23 Aug 2016 17:37:45 -0700 Subject: [PATCH 484/513] Btrfs: check btree node's nritems When btree node (level = 1) has nritems which equals to zero, we can end up with panic due to insert_ptr()'s BUG_ON(slot > nritems); where slot is 1 and nritems is 0, as copy_for_split() calls insert_ptr(.., path->slots[1] + 1, ...); A invalid value results in the whole mess, this adds the check for btree's node nritems so that we stop reading block when when something is wrong. Signed-off-by: Liu Bo <bo.li.liu@oracle.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Chris Mason <clm@fb.com> --- fs/btrfs/disk-io.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index edda47162752b..474209f50844a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -613,6 +613,19 @@ static noinline int check_leaf(struct btrfs_root *root, return 0; } +static int check_node(struct btrfs_root *root, struct extent_buffer *node) +{ + unsigned long nr = btrfs_header_nritems(node); + + if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) { + btrfs_crit(root->fs_info, + "corrupt node: block %llu root %llu nritems %lu", + node->start, root->objectid, nr); + return -EIO; + } + return 0; +} + static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, u64 phy_offset, struct page *page, u64 start, u64 end, int mirror) @@ -683,6 +696,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, ret = -EIO; } + if (found_level > 0 && check_node(root, eb)) + ret = -EIO; + if (!ret) set_extent_buffer_uptodate(eb); err: From 1ba98d086fe3a14d6a31f2f66dbab70c45d00f63 Mon Sep 17 00:00:00 2001 From: Liu Bo <bo.li.liu@oracle.com> Date: Tue, 23 Aug 2016 15:22:58 -0700 Subject: [PATCH 485/513] Btrfs: detect corruption when non-root leaf has zero item Right now we treat leaf which has zero item as a valid one because we could have an empty tree, that is, a root that is also a leaf without any item, however, in the same case but when the leaf is not a root, we can end up with hitting the BUG_ON(1) in btrfs_extend_item() called by setup_inline_extent_backref(). This makes us check the situation as a corruption if leaf is not its own root. Signed-off-by: Liu Bo <bo.li.liu@oracle.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Chris Mason <clm@fb.com> --- fs/btrfs/disk-io.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 474209f50844a..70e76ad11fbf5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -560,8 +560,29 @@ static noinline int check_leaf(struct btrfs_root *root, u32 nritems = btrfs_header_nritems(leaf); int slot; - if (nritems == 0) + if (nritems == 0) { + struct btrfs_root *check_root; + + key.objectid = btrfs_header_owner(leaf); + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + + check_root = btrfs_get_fs_root(root->fs_info, &key, false); + /* + * The only reason we also check NULL here is that during + * open_ctree() some roots has not yet been set up. + */ + if (!IS_ERR_OR_NULL(check_root)) { + /* if leaf is the root, then it's fine */ + if (leaf->start != + btrfs_root_bytenr(&check_root->root_item)) { + CORRUPT("non-root leaf's nritems is 0", + leaf, root, 0); + return -EIO; + } + } return 0; + } /* Check the 0 item */ if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != From 28a235931b56d4e7bdd51f6733daf95f2b269da8 Mon Sep 17 00:00:00 2001 From: Filipe Manana <fdmanana@suse.com> Date: Tue, 23 Aug 2016 21:13:51 +0100 Subject: [PATCH 486/513] Btrfs: fix lockdep warning on deadlock against an inode's log mutex Commit 44f714dae50a ("Btrfs: improve performance on fsync against new inode after rename/unlink"), which landed in 4.8-rc2, introduced a possibility for a deadlock due to double locking of an inode's log mutex by the same task, which lockdep reports with: [23045.433975] ============================================= [23045.434748] [ INFO: possible recursive locking detected ] [23045.435426] 4.7.0-rc6-btrfs-next-34+ #1 Not tainted [23045.436044] --------------------------------------------- [23045.436044] xfs_io/3688 is trying to acquire lock: [23045.436044] (&ei->log_mutex){+.+...}, at: [<ffffffffa038552d>] btrfs_log_inode+0x13a/0xc95 [btrfs] [23045.436044] but task is already holding lock: [23045.436044] (&ei->log_mutex){+.+...}, at: [<ffffffffa038552d>] btrfs_log_inode+0x13a/0xc95 [btrfs] [23045.436044] other info that might help us debug this: [23045.436044] Possible unsafe locking scenario: [23045.436044] CPU0 [23045.436044] ---- [23045.436044] lock(&ei->log_mutex); [23045.436044] lock(&ei->log_mutex); [23045.436044] *** DEADLOCK *** [23045.436044] May be due to missing lock nesting notation [23045.436044] 3 locks held by xfs_io/3688: [23045.436044] #0: (&sb->s_type->i_mutex_key#15){+.+...}, at: [<ffffffffa035f2ae>] btrfs_sync_file+0x14e/0x425 [btrfs] [23045.436044] #1: (sb_internal#2){.+.+.+}, at: [<ffffffff8118446b>] __sb_start_write+0x5f/0xb0 [23045.436044] #2: (&ei->log_mutex){+.+...}, at: [<ffffffffa038552d>] btrfs_log_inode+0x13a/0xc95 [btrfs] [23045.436044] stack backtrace: [23045.436044] CPU: 4 PID: 3688 Comm: xfs_io Not tainted 4.7.0-rc6-btrfs-next-34+ #1 [23045.436044] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.1-0-gb3ef39f-prebuilt.qemu-project.org 04/01/2014 [23045.436044] 0000000000000000 ffff88022f5f7860 ffffffff8127074d ffffffff82a54b70 [23045.436044] ffffffff82a54b70 ffff88022f5f7920 ffffffff81092897 ffff880228015d68 [23045.436044] 0000000000000000 ffffffff82a54b70 ffffffff829c3f00 ffff880228015d68 [23045.436044] Call Trace: [23045.436044] [<ffffffff8127074d>] dump_stack+0x67/0x90 [23045.436044] [<ffffffff81092897>] __lock_acquire+0xcbb/0xe4e [23045.436044] [<ffffffff8109155f>] ? mark_lock+0x24/0x201 [23045.436044] [<ffffffff8109179a>] ? mark_held_locks+0x5e/0x74 [23045.436044] [<ffffffff81092de0>] lock_acquire+0x12f/0x1c3 [23045.436044] [<ffffffff81092de0>] ? lock_acquire+0x12f/0x1c3 [23045.436044] [<ffffffffa038552d>] ? btrfs_log_inode+0x13a/0xc95 [btrfs] [23045.436044] [<ffffffffa038552d>] ? btrfs_log_inode+0x13a/0xc95 [btrfs] [23045.436044] [<ffffffff814a51a4>] mutex_lock_nested+0x77/0x3a7 [23045.436044] [<ffffffffa038552d>] ? btrfs_log_inode+0x13a/0xc95 [btrfs] [23045.436044] [<ffffffffa039705e>] ? btrfs_release_delayed_node+0xb/0xd [btrfs] [23045.436044] [<ffffffffa038552d>] btrfs_log_inode+0x13a/0xc95 [btrfs] [23045.436044] [<ffffffffa038552d>] ? btrfs_log_inode+0x13a/0xc95 [btrfs] [23045.436044] [<ffffffff810a0ed1>] ? vprintk_emit+0x453/0x465 [23045.436044] [<ffffffffa0385a61>] btrfs_log_inode+0x66e/0xc95 [btrfs] [23045.436044] [<ffffffffa03c084d>] log_new_dir_dentries+0x26c/0x359 [btrfs] [23045.436044] [<ffffffffa03865aa>] btrfs_log_inode_parent+0x4a6/0x628 [btrfs] [23045.436044] [<ffffffffa0387552>] btrfs_log_dentry_safe+0x5a/0x75 [btrfs] [23045.436044] [<ffffffffa035f464>] btrfs_sync_file+0x304/0x425 [btrfs] [23045.436044] [<ffffffff811acaf4>] vfs_fsync_range+0x8c/0x9e [23045.436044] [<ffffffff811acb22>] vfs_fsync+0x1c/0x1e [23045.436044] [<ffffffff811acc79>] do_fsync+0x31/0x4a [23045.436044] [<ffffffff811ace99>] SyS_fsync+0x10/0x14 [23045.436044] [<ffffffff814a88e5>] entry_SYSCALL_64_fastpath+0x18/0xa8 [23045.436044] [<ffffffff8108f039>] ? trace_hardirqs_off_caller+0x3f/0xaa An example reproducer for this is: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ mkdir /mnt/dir $ touch /mnt/dir/foo $ sync $ mv /mnt/dir/foo /mnt/dir/bar $ touch /mnt/dir/foo $ xfs_io -c "fsync" /mnt/dir/bar This is because while logging the inode of file bar we end up logging its parent directory (since its inode has an unlink_trans field matching the current transaction id due to the rename operation), which in turn logs the inodes for all its new dentries, so that the new inode for the new file named foo gets logged which in turn triggered another logging attempt for the inode we are fsync'ing, since that inode had an old name that corresponds to the name of the new inode. So fix this by ensuring that when logging the inode for a new dentry that has a name matching an old name of some other inode, we don't log again the original inode that we are fsync'ing. Fixes: 44f714dae50a ("Btrfs: improve performance on fsync against new inode after rename/unlink") Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com> Signed-off-by: Chris Mason <clm@fb.com> --- fs/btrfs/file.c | 2 +- fs/btrfs/tree-log.c | 5 +++-- fs/btrfs/tree-log.h | 5 ++++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 3391f2adf0c86..fea31a4a6e368 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2070,7 +2070,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) } trans->sync = true; - btrfs_init_log_ctx(&ctx); + btrfs_init_log_ctx(&ctx, inode); ret = btrfs_log_dentry_safe(trans, root, dentry, start, end, &ctx); if (ret < 0) { diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index ffe92da81b8ac..e935035ac0343 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2823,7 +2823,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, */ mutex_unlock(&root->log_mutex); - btrfs_init_log_ctx(&root_log_ctx); + btrfs_init_log_ctx(&root_log_ctx, NULL); mutex_lock(&log_root_tree->log_mutex); atomic_inc(&log_root_tree->log_batch); @@ -4757,7 +4757,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, if (ret < 0) { err = ret; goto out_unlock; - } else if (ret > 0) { + } else if (ret > 0 && ctx && + other_ino != btrfs_ino(ctx->inode)) { struct btrfs_key inode_key; struct inode *other_inode; diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index a9f1b75d080d3..ab858e31ccbc2 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h @@ -30,15 +30,18 @@ struct btrfs_log_ctx { int log_transid; int io_err; bool log_new_dentries; + struct inode *inode; struct list_head list; }; -static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx) +static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, + struct inode *inode) { ctx->log_ret = 0; ctx->log_transid = 0; ctx->io_err = 0; ctx->log_new_dentries = false; + ctx->inode = inode; INIT_LIST_HEAD(&ctx->list); } From 5bb53c0fb8e0fc2e34287d5d0fcadc784de913e1 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin <aryabinin@virtuozzo.com> Date: Tue, 23 Aug 2016 18:55:31 +0300 Subject: [PATCH 487/513] fs/block_dev: fix potential NULL ptr deref in freeze_bdev() Calling freeze_bdev() twice on the same block device without mounted filesystem get_super() will return NULL, which will lead to NULL-ptr dereference later in drop_super(). Check get_super() result to fix that. Note, that this is a purely theoretical issue. We have only 3 freeze_bdev() callers. 2 of them are in filesystem code and used on a device with mounted fs. The third one in lock_fs() has protection in upper-layer code against freezing block device the second time without thawing it first. Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <axboe@fb.com> --- fs/block_dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/block_dev.c b/fs/block_dev.c index e17bdbdfe9b1c..08ae99343d92f 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -249,7 +249,8 @@ struct super_block *freeze_bdev(struct block_device *bdev) * thaw_bdev drops it. */ sb = get_super(bdev); - drop_super(sb); + if (sb) + drop_super(sb); mutex_unlock(&bdev->bd_fsfreeze_mutex); return sb; } From 468c298ad3ed3f0d94a65f8ca00f6bfc6c2b4e33 Mon Sep 17 00:00:00 2001 From: Jens Axboe <axboe@fb.com> Date: Thu, 25 Aug 2016 08:56:44 -0600 Subject: [PATCH 488/513] Revert "floppy: fix open(O_ACCMODE) for ioctl-only open" This reverts commit ff06db1efb2ad6db06eb5b99b88a0c15a9cc9b0e. --- drivers/block/floppy.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index b71a9c7670095..c557057fe8ae0 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3663,6 +3663,11 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) opened_bdev[drive] = bdev; + if (!(mode & (FMODE_READ|FMODE_WRITE))) { + res = -EINVAL; + goto out; + } + res = -ENXIO; if (!floppy_track_buffer) { @@ -3706,15 +3711,13 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) if (UFDCS->rawcmd == 1) UFDCS->rawcmd = 2; - if (mode & (FMODE_READ|FMODE_WRITE)) { - UDRS->last_checked = 0; - clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags); - check_disk_change(bdev); - if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags)) - goto out; - if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags)) - goto out; - } + UDRS->last_checked = 0; + clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags); + check_disk_change(bdev); + if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags)) + goto out; + if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags)) + goto out; res = -EROFS; From f2791e7eadf437633f30faa51b30878cf15650be Mon Sep 17 00:00:00 2001 From: Jens Axboe <axboe@fb.com> Date: Thu, 25 Aug 2016 08:56:51 -0600 Subject: [PATCH 489/513] Revert "floppy: refactor open() flags handling" This reverts commit 09954bad448791ef01202351d437abdd9497a804. --- drivers/block/floppy.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index c557057fe8ae0..e3d8e4ced4a23 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -3663,11 +3663,6 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) opened_bdev[drive] = bdev; - if (!(mode & (FMODE_READ|FMODE_WRITE))) { - res = -EINVAL; - goto out; - } - res = -ENXIO; if (!floppy_track_buffer) { @@ -3711,20 +3706,21 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) if (UFDCS->rawcmd == 1) UFDCS->rawcmd = 2; - UDRS->last_checked = 0; - clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags); - check_disk_change(bdev); - if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags)) - goto out; - if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags)) - goto out; - - res = -EROFS; - - if ((mode & FMODE_WRITE) && - !test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags)) - goto out; - + if (!(mode & FMODE_NDELAY)) { + if (mode & (FMODE_READ|FMODE_WRITE)) { + UDRS->last_checked = 0; + clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags); + check_disk_change(bdev); + if (test_bit(FD_DISK_CHANGED_BIT, &UDRS->flags)) + goto out; + if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &UDRS->flags)) + goto out; + } + res = -EROFS; + if ((mode & FMODE_WRITE) && + !test_bit(FD_DISK_WRITABLE_BIT, &UDRS->flags)) + goto out; + } mutex_unlock(&open_lock); mutex_unlock(&floppy_mutex); return 0; From 07c72d7d54d138eb2ca37709a5a3d55fbcc01536 Mon Sep 17 00:00:00 2001 From: Tatyana Nikolova <tatyana.e.nikolova@intel.com> Date: Wed, 24 Aug 2016 13:59:17 -0500 Subject: [PATCH 490/513] i40iw: Send last streaming mode message for loopback connections Send a zero length last streaming mode message for loopback connections to synchronize between accepting QP and connecting QP. This avoids data transfer to start on the accepting QP before the connecting QP is in RTS. Also remove function i40iw_loopback_nop() as it is no longer used. Fixes: f27b4746f378 ("i40iw: add connection management code") Signed-off-by: Tatyana Nikolova <tatyana.e.nikolova@intel.com> Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com> Signed-off-by: Doug Ledford <dledford@redhat.com> --- drivers/infiniband/hw/i40iw/i40iw_cm.c | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_cm.c b/drivers/infiniband/hw/i40iw/i40iw_cm.c index 64343985e0b7d..7ca0638579c0b 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_cm.c +++ b/drivers/infiniband/hw/i40iw/i40iw_cm.c @@ -3346,26 +3346,6 @@ int i40iw_cm_disconn(struct i40iw_qp *iwqp) return 0; } -/** - * i40iw_loopback_nop - Send a nop - * @qp: associated hw qp - */ -static void i40iw_loopback_nop(struct i40iw_sc_qp *qp) -{ - u64 *wqe; - u64 header; - - wqe = qp->qp_uk.sq_base->elem; - set_64bit_val(wqe, 0, 0); - set_64bit_val(wqe, 8, 0); - set_64bit_val(wqe, 16, 0); - - header = LS_64(I40IWQP_OP_NOP, I40IWQPSQ_OPCODE) | - LS_64(0, I40IWQPSQ_SIGCOMPL) | - LS_64(qp->qp_uk.swqe_polarity, I40IWQPSQ_VALID); - set_64bit_val(wqe, 24, header); -} - /** * i40iw_qp_disconnect - free qp and close cm * @iwqp: associate qp for the connection @@ -3638,7 +3618,7 @@ int i40iw_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) } else { if (iwqp->page) iwqp->sc_qp.qp_uk.sq_base = kmap(iwqp->page); - i40iw_loopback_nop(&iwqp->sc_qp); + dev->iw_priv_qp_ops->qp_send_lsmm(&iwqp->sc_qp, NULL, 0, 0); } if (iwqp->page) From 2b721f20770ccbca4d3dad58e1bd44aa570efb3f Mon Sep 17 00:00:00 2001 From: Daniel Vetter <daniel.vetter@ffwll.ch> Date: Wed, 10 Aug 2016 18:52:38 +0200 Subject: [PATCH 491/513] drm: Protect fb_defio in drivers with CONFIG_KMS_FBDEV_EMULATION For reasons that entirely elude me fb.h exposes all the structures, even when it is not enabled. Except for special stuff like fb_defio. Which means all the drivers which haven't yet switched over to the defio support in the helpers and still roll their own, will fail to compile when fbdev emulation is disabled. Protect just those bits, as a gnarly reminder that conversion to the core defio helpers would be good. Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch> Link: http://patchwork.freedesktop.org/patch/msgid/1470847958-28465-6-git-send-email-daniel.vetter@ffwll.ch Signed-off-by: Dave Airlie <airlied@redhat.com> --- drivers/gpu/drm/qxl/qxl_fb.c | 4 ++++ drivers/gpu/drm/udl/udl_fb.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/drivers/gpu/drm/qxl/qxl_fb.c b/drivers/gpu/drm/qxl/qxl_fb.c index df2657051afd7..28c1423049c56 100644 --- a/drivers/gpu/drm/qxl/qxl_fb.c +++ b/drivers/gpu/drm/qxl/qxl_fb.c @@ -73,10 +73,12 @@ static void qxl_fb_image_init(struct qxl_fb_image *qxl_fb_image, } } +#ifdef CONFIG_DRM_FBDEV_EMULATION static struct fb_deferred_io qxl_defio = { .delay = QXL_DIRTY_DELAY, .deferred_io = drm_fb_helper_deferred_io, }; +#endif static struct fb_ops qxlfb_ops = { .owner = THIS_MODULE, @@ -313,8 +315,10 @@ static int qxlfb_create(struct qxl_fbdev *qfbdev, goto out_destroy_fbi; } +#ifdef CONFIG_DRM_FBDEV_EMULATION info->fbdefio = &qxl_defio; fb_deferred_io_init(info); +#endif qdev->fbdev_info = info; qdev->fbdev_qfb = &qfbdev->qfb; diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c index d5df555aeba02..9688bfa92ccd3 100644 --- a/drivers/gpu/drm/udl/udl_fb.c +++ b/drivers/gpu/drm/udl/udl_fb.c @@ -203,6 +203,7 @@ static int udl_fb_open(struct fb_info *info, int user) ufbdev->fb_count++; +#ifdef CONFIG_DRM_FBDEV_EMULATION if (fb_defio && (info->fbdefio == NULL)) { /* enable defio at last moment if not disabled by client */ @@ -218,6 +219,7 @@ static int udl_fb_open(struct fb_info *info, int user) info->fbdefio = fbdefio; fb_deferred_io_init(info); } +#endif pr_notice("open /dev/fb%d user=%d fb_info=%p count=%d\n", info->node, user, info, ufbdev->fb_count); @@ -235,12 +237,14 @@ static int udl_fb_release(struct fb_info *info, int user) ufbdev->fb_count--; +#ifdef CONFIG_DRM_FBDEV_EMULATION if ((ufbdev->fb_count == 0) && (info->fbdefio)) { fb_deferred_io_cleanup(info); kfree(info->fbdefio); info->fbdefio = NULL; info->fbops->fb_mmap = udl_fb_mmap; } +#endif pr_warn("released /dev/fb%d user=%d count=%d\n", info->node, user, ufbdev->fb_count); From 869c554808ccf7ddd25be5317073b88ceddb8507 Mon Sep 17 00:00:00 2001 From: Adrian Hunter <adrian.hunter@intel.com> Date: Thu, 25 Aug 2016 14:11:43 -0600 Subject: [PATCH 492/513] mmc: fix use-after-free of struct request We call mmc_req_is_special() after having processed a request, but it could be freed after that. Check that ahead of time, and use the cached value. Reported-by: Hans de Goede <hdegoede@redhat.com> Tested-by: Hans de Goede <hdegoede@redhat.com> Fixes: c2df40dfb8c0 ("drivers: use req op accessor") Signed-off-by: Jens Axboe <axboe@fb.com> --- drivers/mmc/card/block.c | 4 ++-- drivers/mmc/card/queue.c | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c index 82503e6f04b31..2206d4477dbbd 100644 --- a/drivers/mmc/card/block.c +++ b/drivers/mmc/card/block.c @@ -2151,6 +2151,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) struct mmc_card *card = md->queue.card; struct mmc_host *host = card->host; unsigned long flags; + bool req_is_special = mmc_req_is_special(req); if (req && !mq->mqrq_prev->req) /* claim host only for the first request */ @@ -2191,8 +2192,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req) } out: - if ((!req && !(mq->flags & MMC_QUEUE_NEW_REQUEST)) || - mmc_req_is_special(req)) + if ((!req && !(mq->flags & MMC_QUEUE_NEW_REQUEST)) || req_is_special) /* * Release host when there are no more requests * and after special request(discard, flush) is done. diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c index 29578e98603d6..708057261b389 100644 --- a/drivers/mmc/card/queue.c +++ b/drivers/mmc/card/queue.c @@ -65,6 +65,8 @@ static int mmc_queue_thread(void *d) spin_unlock_irq(q->queue_lock); if (req || mq->mqrq_prev->req) { + bool req_is_special = mmc_req_is_special(req); + set_current_state(TASK_RUNNING); mq->issue_fn(mq, req); cond_resched(); @@ -80,7 +82,7 @@ static int mmc_queue_thread(void *d) * has been finished. Do not assign it to previous * request. */ - if (mmc_req_is_special(req)) + if (req_is_special) mq->mqrq_cur->req = NULL; mq->mqrq_prev->brq.mrq.data = NULL; From 699e36e5b8e9f77b2be4c23f0b309e53be4b2880 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni <alexandre.belloni@free-electrons.com> Date: Tue, 23 Aug 2016 10:44:02 +0200 Subject: [PATCH 493/513] clocksource/drivers/timer-atmel-pit: Enable mck clock mck is needed to get the PIT working. Explicitly prepare_enable it instead of assuming it is enabled. This solves an issue where the system is freezing when the ETM/ETB drivers are enabled. Reported-by: Olivier Schonken <olivier.schonken@gmail.com> Reviewed-by: Boris Brezillon <boris.brezillon@free-electrons.com> Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com> Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org> --- drivers/clocksource/timer-atmel-pit.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/clocksource/timer-atmel-pit.c b/drivers/clocksource/timer-atmel-pit.c index 1ffac0cb0cb78..3494bc5a21d55 100644 --- a/drivers/clocksource/timer-atmel-pit.c +++ b/drivers/clocksource/timer-atmel-pit.c @@ -261,6 +261,12 @@ static int __init at91sam926x_pit_dt_init(struct device_node *node) return PTR_ERR(data->mck); } + ret = clk_prepare_enable(data->mck); + if (ret) { + pr_err("Unable to enable mck\n"); + return ret; + } + /* Get the interrupts property */ data->irq = irq_of_parse_and_map(node, 0); if (!data->irq) { From 4d0e701659aa869a445823149e172e23faa6edac Mon Sep 17 00:00:00 2001 From: Marcin Nowakowski <marcin.nowakowski@imgtec.com> Date: Wed, 17 Aug 2016 12:22:33 +0200 Subject: [PATCH 494/513] drivers/clocksource/pistachio: Fix memory corruption in init Driver init code incorrectly uses the block base address and as a result clears clocksource structure's fields instead of the hardware registers. Commit 09a998201649 ("timekeeping: Lift clocksource cacheline restriction") has changed the offsets within pistachio_clocksource structure and what has previously gone unnoticed now leads to a kernel panic during boot. Signed-off-by: Marcin Nowakowski <marcin.nowakowski@imgtec.com> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org> --- drivers/clocksource/time-pistachio.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/clocksource/time-pistachio.c b/drivers/clocksource/time-pistachio.c index a7d9a08e4b0e3..a8e6c7df853d2 100644 --- a/drivers/clocksource/time-pistachio.c +++ b/drivers/clocksource/time-pistachio.c @@ -202,10 +202,10 @@ static int __init pistachio_clksrc_of_init(struct device_node *node) rate = clk_get_rate(fast_clk); /* Disable irq's for clocksource usage */ - gpt_writel(&pcs_gpt.base, 0, TIMER_IRQ_MASK, 0); - gpt_writel(&pcs_gpt.base, 0, TIMER_IRQ_MASK, 1); - gpt_writel(&pcs_gpt.base, 0, TIMER_IRQ_MASK, 2); - gpt_writel(&pcs_gpt.base, 0, TIMER_IRQ_MASK, 3); + gpt_writel(pcs_gpt.base, 0, TIMER_IRQ_MASK, 0); + gpt_writel(pcs_gpt.base, 0, TIMER_IRQ_MASK, 1); + gpt_writel(pcs_gpt.base, 0, TIMER_IRQ_MASK, 2); + gpt_writel(pcs_gpt.base, 0, TIMER_IRQ_MASK, 3); /* Enable timer block */ writel(TIMER_ME_GLOBAL, pcs_gpt.base); From b53e7d000d9e6e9fd2c6eb6b82d2783c67fd599e Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai <wens@csie.org> Date: Thu, 25 Aug 2016 14:26:59 +0800 Subject: [PATCH 495/513] clocksource/drivers/sun4i: Clear interrupts after stopping timer in probe function The bootloader (U-boot) sometimes uses this timer for various delays. It uses it as a ongoing counter, and does comparisons on the current counter value. The timer counter is never stopped. In some cases when the user interacts with the bootloader, or lets it idle for some time before loading Linux, the timer may expire, and an interrupt will be pending. This results in an unexpected interrupt when the timer interrupt is enabled by the kernel, at which point the event_handler isn't set yet. This results in a NULL pointer dereference exception, panic, and no way to reboot. Clear any pending interrupts after we stop the timer in the probe function to avoid this. Cc: stable@vger.kernel.org Signed-off-by: Chen-Yu Tsai <wens@csie.org> Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org> Acked-by: Maxime Ripard <maxime.ripard@free-electrons.com> --- drivers/clocksource/sun4i_timer.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/clocksource/sun4i_timer.c b/drivers/clocksource/sun4i_timer.c index 97669ee4df2a6..c83452cacb412 100644 --- a/drivers/clocksource/sun4i_timer.c +++ b/drivers/clocksource/sun4i_timer.c @@ -123,12 +123,16 @@ static struct clock_event_device sun4i_clockevent = { .set_next_event = sun4i_clkevt_next_event, }; +static void sun4i_timer_clear_interrupt(void) +{ + writel(TIMER_IRQ_EN(0), timer_base + TIMER_IRQ_ST_REG); +} static irqreturn_t sun4i_timer_interrupt(int irq, void *dev_id) { struct clock_event_device *evt = (struct clock_event_device *)dev_id; - writel(0x1, timer_base + TIMER_IRQ_ST_REG); + sun4i_timer_clear_interrupt(); evt->event_handler(evt); return IRQ_HANDLED; @@ -208,6 +212,9 @@ static int __init sun4i_timer_init(struct device_node *node) /* Make sure timer is stopped before playing with interrupts */ sun4i_clkevt_time_stop(0); + /* clear timer0 interrupt */ + sun4i_timer_clear_interrupt(); + sun4i_clockevent.cpumask = cpu_possible_mask; sun4i_clockevent.irq = irq; From 079d37df3397d48aab0f014986c1b0a1ca6256aa Mon Sep 17 00:00:00 2001 From: Eric Ren <zren@suse.com> Date: Thu, 25 Aug 2016 17:20:59 +0800 Subject: [PATCH 496/513] dlm: fix malfunction of dlm_tool caused by debugfs changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the current kernel, `dlm_tool lockdebug` fails as below: "dlm_tool lockdebug ED0BD86DCE724393918A1AE8FDBF1EE3 can't open /sys/kernel/debug/dlm/ED0BD86DCE724393918A1AE8FDBF1EE3: Operation not permitted" This is because table_open() depends on file->f_op to tell which seq_file ops should be passed down. But, the original file ops in file->f_op is replaced by "debugfs_full_proxy_file_operations" with commit 49d200deaa68 ("debugfs: prevent access to removed files' private data"). Currently, I can think up 2 solutions: 1st, replace debugfs_create_file() with debugfs_create_file_unsafe(); 2nd, make different table_open#() accordingly. The 1st one is neat, but I don't thoroughly understand its risk. Maybe someone has a better one. Signed-off-by: Eric Ren <zren@suse.com> Signed-off-by: David Teigland <teigland@redhat.com> --- fs/dlm/debug_fs.c | 62 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 14 deletions(-) diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index eea64912c9c0a..466f7d60edc27 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c @@ -607,20 +607,54 @@ static const struct file_operations format2_fops; static const struct file_operations format3_fops; static const struct file_operations format4_fops; -static int table_open(struct inode *inode, struct file *file) +static int table_open1(struct inode *inode, struct file *file) { struct seq_file *seq; - int ret = -1; + int ret; - if (file->f_op == &format1_fops) - ret = seq_open(file, &format1_seq_ops); - else if (file->f_op == &format2_fops) - ret = seq_open(file, &format2_seq_ops); - else if (file->f_op == &format3_fops) - ret = seq_open(file, &format3_seq_ops); - else if (file->f_op == &format4_fops) - ret = seq_open(file, &format4_seq_ops); + ret = seq_open(file, &format1_seq_ops); + if (ret) + return ret; + + seq = file->private_data; + seq->private = inode->i_private; /* the dlm_ls */ + return 0; +} + +static int table_open2(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + int ret; + + ret = seq_open(file, &format2_seq_ops); + if (ret) + return ret; + + seq = file->private_data; + seq->private = inode->i_private; /* the dlm_ls */ + return 0; +} + +static int table_open3(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + int ret; + + ret = seq_open(file, &format3_seq_ops); + if (ret) + return ret; + + seq = file->private_data; + seq->private = inode->i_private; /* the dlm_ls */ + return 0; +} + +static int table_open4(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + int ret; + ret = seq_open(file, &format4_seq_ops); if (ret) return ret; @@ -631,7 +665,7 @@ static int table_open(struct inode *inode, struct file *file) static const struct file_operations format1_fops = { .owner = THIS_MODULE, - .open = table_open, + .open = table_open1, .read = seq_read, .llseek = seq_lseek, .release = seq_release @@ -639,7 +673,7 @@ static const struct file_operations format1_fops = { static const struct file_operations format2_fops = { .owner = THIS_MODULE, - .open = table_open, + .open = table_open2, .read = seq_read, .llseek = seq_lseek, .release = seq_release @@ -647,7 +681,7 @@ static const struct file_operations format2_fops = { static const struct file_operations format3_fops = { .owner = THIS_MODULE, - .open = table_open, + .open = table_open3, .read = seq_read, .llseek = seq_lseek, .release = seq_release @@ -655,7 +689,7 @@ static const struct file_operations format3_fops = { static const struct file_operations format4_fops = { .owner = THIS_MODULE, - .open = table_open, + .open = table_open4, .read = seq_read, .llseek = seq_lseek, .release = seq_release From 101b29a204f87c99377faa53bd378f101ebb1824 Mon Sep 17 00:00:00 2001 From: Johannes Berg <johannes.berg@intel.com> Date: Thu, 25 Aug 2016 15:16:45 -0700 Subject: [PATCH 497/513] byteswap: don't use __builtin_bswap*() with sparse Although sparse declares __builtin_bswap*(), it can't actually do constant folding inside them (yet). As such, things like switch (protocol) { case htons(ETH_P_IP): break; } which we do all over the place cause sparse to warn that it expects a constant instead of a function call. Disable __HAVE_BUILTIN_BSWAP*__ if __CHECKER__ is defined to avoid this. Fixes: 7322dd755e7d ("byteswap: try to avoid __builtin_constant_p gcc bug") Link: http://lkml.kernel.org/r/1470914102-26389-1-git-send-email-johannes@sipsolutions.net Signed-off-by: Johannes Berg <johannes.berg@intel.com> Acked-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- include/linux/compiler-gcc.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index e2949397c19b0..8dbc8929a6a00 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -242,7 +242,11 @@ */ #define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0) -#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP +/* + * sparse (__CHECKER__) pretends to be gcc, but can't do constant + * folding in __builtin_bswap*() (yet), so don't set these for it. + */ +#if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) && !defined(__CHECKER__) #if GCC_VERSION >= 40400 #define __HAVE_BUILTIN_BSWAP32__ #define __HAVE_BUILTIN_BSWAP64__ @@ -250,7 +254,7 @@ #if GCC_VERSION >= 40800 #define __HAVE_BUILTIN_BSWAP16__ #endif -#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ +#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP && !__CHECKER__ */ #if GCC_VERSION >= 50000 #define KASAN_ABI_VERSION 4 From 8582fb59f7ee012f2b8118c2bb95168c1622a4c5 Mon Sep 17 00:00:00 2001 From: Joe Perches <joe@perches.com> Date: Thu, 25 Aug 2016 15:16:48 -0700 Subject: [PATCH 498/513] get_maintainer: quiet noisy implicit -f vcs_file_exists checking Checking command line filenames that are outside the git tree can emit a noisy and confusing message. Quiet that message by redirecting stderr. Verify that the command was executed successfully. Fixes: 4cad35a7ca69 ("get_maintainer.pl: reduce need for command-line option -f") Link: http://lkml.kernel.org/r/1970a1d2fecb258e384e2e4fdaacdc9ccf3e30a4.1470955439.git.joe@perches.com Signed-off-by: Joe Perches <joe@perches.com> Reported-by: Wolfram Sang <wsa@the-dreams.de> Tested-by: Wolfram Sang <wsa@the-dreams.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- scripts/get_maintainer.pl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 49a00d54b835f..aed4511f0304e 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -2136,9 +2136,11 @@ sub vcs_file_exists { my $cmd = $VCS_cmds{"file_exists_cmd"}; $cmd =~ s/(\$\w+)/$1/eeg; # interpolate $cmd - + $cmd .= " 2>&1"; $exists = &{$VCS_cmds{"execute_cmd"}}($cmd); + return 0 if ($? != 0); + return $exists; } From e7d316a02f683864a12389f8808570e37fb90aa3 Mon Sep 17 00:00:00 2001 From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org> Date: Thu, 25 Aug 2016 15:16:51 -0700 Subject: [PATCH 499/513] sysctl: handle error writing UINT_MAX to u32 fields We have scripts which write to certain fields on 3.18 kernels but this seems to be failing on 4.4 kernels. An entry which we write to here is xfrm_aevent_rseqth which is u32. echo 4294967295 > /proc/sys/net/core/xfrm_aevent_rseqth Commit 230633d109e3 ("kernel/sysctl.c: detect overflows when converting to int") prevented writing to sysctl entries when integer overflow occurs. However, this does not apply to unsigned integers. Heinrich suggested that we introduce a new option to handle 64 bit limits and set min as 0 and max as UINT_MAX. This might not work as it leads to issues similar to __do_proc_doulongvec_minmax. Alternatively, we would need to change the datatype of the entry to 64 bit. static int __do_proc_doulongvec_minmax(void *data, struct ctl_table { i = (unsigned long *) data; //This cast is causing to read beyond the size of data (u32) vleft = table->maxlen / sizeof(unsigned long); //vleft is 0 because maxlen is sizeof(u32) which is lesser than sizeof(unsigned long) on x86_64. Introduce a new proc handler proc_douintvec. Individual proc entries will need to be updated to use the new handler. [akpm@linux-foundation.org: coding-style fixes] Fixes: 230633d109e3 ("kernel/sysctl.c:detect overflows when converting to int") Link: http://lkml.kernel.org/r/1471479806-5252-1-git-send-email-subashab@codeaurora.org Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org> Cc: Heinrich Schuchardt <xypron.glpk@gmx.de> Cc: Kees Cook <keescook@chromium.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Ingo Molnar <mingo@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- include/linux/sysctl.h | 2 ++ kernel/sysctl.c | 45 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 697e160c78d0d..a4f7203a90170 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -42,6 +42,8 @@ extern int proc_dostring(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int proc_dointvec(struct ctl_table *, int, void __user *, size_t *, loff_t *); +extern int proc_douintvec(struct ctl_table *, int, + void __user *, size_t *, loff_t *); extern int proc_dointvec_minmax(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int proc_dointvec_jiffies(struct ctl_table *, int, diff --git a/kernel/sysctl.c b/kernel/sysctl.c index b43d0b27c1feb..a13bbdaab47dc 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2140,6 +2140,21 @@ static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp, return 0; } +static int do_proc_douintvec_conv(bool *negp, unsigned long *lvalp, + int *valp, + int write, void *data) +{ + if (write) { + if (*negp) + return -EINVAL; + *valp = *lvalp; + } else { + unsigned int val = *valp; + *lvalp = (unsigned long)val; + } + return 0; +} + static const char proc_wspace_sep[] = { ' ', '\t', '\n' }; static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, @@ -2259,8 +2274,27 @@ static int do_proc_dointvec(struct ctl_table *table, int write, int proc_dointvec(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { - return do_proc_dointvec(table,write,buffer,lenp,ppos, - NULL,NULL); + return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL); +} + +/** + * proc_douintvec - read a vector of unsigned integers + * @table: the sysctl table + * @write: %TRUE if this is a write to the sysctl file + * @buffer: the user buffer + * @lenp: the size of the user buffer + * @ppos: file position + * + * Reads/writes up to table->maxlen/sizeof(unsigned int) unsigned integer + * values from/to the user buffer, treated as an ASCII string. + * + * Returns 0 on success. + */ +int proc_douintvec(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + return do_proc_dointvec(table, write, buffer, lenp, ppos, + do_proc_douintvec_conv, NULL); } /* @@ -2858,6 +2892,12 @@ int proc_dointvec(struct ctl_table *table, int write, return -ENOSYS; } +int proc_douintvec(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + return -ENOSYS; +} + int proc_dointvec_minmax(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -2903,6 +2943,7 @@ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, * exception granted :-) */ EXPORT_SYMBOL(proc_dointvec); +EXPORT_SYMBOL(proc_douintvec); EXPORT_SYMBOL(proc_dointvec_jiffies); EXPORT_SYMBOL(proc_dointvec_minmax); EXPORT_SYMBOL(proc_dointvec_userhz_jiffies); From 804dd150468cfd920d92d4b3cf00536fedef3902 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli <aarcange@redhat.com> Date: Thu, 25 Aug 2016 15:16:57 -0700 Subject: [PATCH 500/513] soft_dirty: fix soft_dirty during THP split While adding proper userfaultfd_wp support with bits in pagetable and swap entry to avoid false positives WP userfaults through swap/fork/ KSM/etc, I've been adding a framework that mostly mirrors soft dirty. So I noticed in one place I had to add uffd_wp support to the pagetables that wasn't covered by soft_dirty and I think it should have. Example: in the THP migration code migrate_misplaced_transhuge_page() pmd_mkdirty is called unconditionally after mk_huge_pmd. entry = mk_huge_pmd(new_page, vma->vm_page_prot); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); That sets soft dirty too (it's a false positive for soft dirty, the soft dirty bit could be more finegrained and transfer the bit like uffd_wp will do.. pmd/pte_uffd_wp() enforces the invariant that when it's set pmd/pte_write is not set). However in the THP split there's no unconditional pmd_mkdirty after mk_huge_pmd and pte_swp_mksoft_dirty isn't called after the migration entry is created. The code sets the dirty bit in the struct page instead of setting it in the pagetable (which is fully equivalent as far as the real dirty bit is concerned, as the whole point of pagetable bits is to be eventually flushed out of to the page, but that is not equivalent for the soft-dirty bit that gets lost in translation). This was found by code review only and totally untested as I'm working to actually replace soft dirty and I don't have time to test potential soft dirty bugfixes as well :). Transfer the soft_dirty from pmd to pte during THP splits. This fix avoids losing the soft_dirty bit and avoids userland memory corruption in the checkpoint. Fixes: eef1b3ba053aa6 ("thp: implement split_huge_pmd()") Link: http://lkml.kernel.org/r/1471610515-30229-2-git-send-email-aarcange@redhat.com Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Acked-by: Pavel Emelyanov <xemul@virtuozzo.com> Cc: "Kirill A. Shutemov" <kirill@shutemov.name> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- mm/huge_memory.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2373f0a7d3405..2db2112aa31ed 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1512,7 +1512,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, struct page *page; pgtable_t pgtable; pmd_t _pmd; - bool young, write, dirty; + bool young, write, dirty, soft_dirty; unsigned long addr; int i; @@ -1546,6 +1546,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, write = pmd_write(*pmd); young = pmd_young(*pmd); dirty = pmd_dirty(*pmd); + soft_dirty = pmd_soft_dirty(*pmd); pmdp_huge_split_prepare(vma, haddr, pmd); pgtable = pgtable_trans_huge_withdraw(mm, pmd); @@ -1562,6 +1563,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, swp_entry_t swp_entry; swp_entry = make_migration_entry(page + i, write); entry = swp_entry_to_pte(swp_entry); + if (soft_dirty) + entry = pte_swp_mksoft_dirty(entry); } else { entry = mk_pte(page + i, vma->vm_page_prot); entry = maybe_mkwrite(entry, vma); @@ -1569,6 +1572,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, entry = pte_wrprotect(entry); if (!young) entry = pte_mkold(entry); + if (soft_dirty) + entry = pte_mksoft_dirty(entry); } if (dirty) SetPageDirty(page + i); From ae6c33ba6e37eea3012fe2640b22400ef3f2d0f3 Mon Sep 17 00:00:00 2001 From: Nicolas Iooss <nicolas.iooss_linux@m4x.org> Date: Thu, 25 Aug 2016 15:17:00 -0700 Subject: [PATCH 501/513] printk: fix parsing of "brl=" option Commit bbeddf52adc1 ("printk: move braille console support into separate braille.[ch] files") moved the parsing of braille-related options into _braille_console_setup(), changing the type of variable str from char* to char**. In this commit, memcmp(str, "brl,", 4) was correctly updated to memcmp(*str, "brl,", 4) but not memcmp(str, "brl=", 4). Update the code to make "brl=" option work again and replace memcmp() with strncmp() to make the compiler able to detect such an issue. Fixes: bbeddf52adc1 ("printk: move braille console support into separate braille.[ch] files") Link: http://lkml.kernel.org/r/20160823165700.28952-1-nicolas.iooss_linux@m4x.org Signed-off-by: Nicolas Iooss <nicolas.iooss_linux@m4x.org> Cc: Joe Perches <joe@perches.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- kernel/printk/braille.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/printk/braille.c b/kernel/printk/braille.c index 276762f3a4607..d5760c42f042f 100644 --- a/kernel/printk/braille.c +++ b/kernel/printk/braille.c @@ -9,10 +9,10 @@ char *_braille_console_setup(char **str, char **brl_options) { - if (!memcmp(*str, "brl,", 4)) { + if (!strncmp(*str, "brl,", 4)) { *brl_options = ""; *str += 4; - } else if (!memcmp(str, "brl=", 4)) { + } else if (!strncmp(*str, "brl=", 4)) { *brl_options = *str + 4; *str = strchr(*brl_options, ','); if (!*str) From a5ff1b34e16c203397542d98c49c5c7783193946 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada <yamada.masahiro@socionext.com> Date: Thu, 25 Aug 2016 15:17:02 -0700 Subject: [PATCH 502/513] treewide: replace config_enabled() with IS_ENABLED() (2nd round) Commit 97f2645f358b ("tree-wide: replace config_enabled() with IS_ENABLED()") mostly killed config_enabled(), but some new users have appeared for v4.8-rc1. They are all used for a boolean option, so can be replaced with IS_ENABLED() safely. Link: http://lkml.kernel.org/r/1471970749-24867-1-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com> Acked-by: Kees Cook <keescook@chromium.org> Acked-by: Peter Oberparleiter <oberpar@linux.vnet.ibm.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- arch/mips/include/asm/page.h | 4 ++-- arch/s390/kernel/setup.c | 6 ++---- arch/x86/mm/kaslr.c | 2 +- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h index ea0cd9773914c..5f987598054f9 100644 --- a/arch/mips/include/asm/page.h +++ b/arch/mips/include/asm/page.h @@ -164,7 +164,7 @@ typedef struct { unsigned long pgprot; } pgprot_t; */ static inline unsigned long ___pa(unsigned long x) { - if (config_enabled(CONFIG_64BIT)) { + if (IS_ENABLED(CONFIG_64BIT)) { /* * For MIPS64 the virtual address may either be in one of * the compatibility segements ckseg0 or ckseg1, or it may @@ -173,7 +173,7 @@ static inline unsigned long ___pa(unsigned long x) return x < CKSEG0 ? XPHYSADDR(x) : CPHYSADDR(x); } - if (!config_enabled(CONFIG_EVA)) { + if (!IS_ENABLED(CONFIG_EVA)) { /* * We're using the standard MIPS32 legacy memory map, ie. * the address x is going to be in kseg0 or kseg1. We can diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index ba5f456edaa90..7f7ba5f23f130 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -204,11 +204,9 @@ static void __init conmode_default(void) #endif } } else if (MACHINE_IS_KVM) { - if (sclp.has_vt220 && - config_enabled(CONFIG_SCLP_VT220_CONSOLE)) + if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE)) SET_CONSOLE_VT220; - else if (sclp.has_linemode && - config_enabled(CONFIG_SCLP_CONSOLE)) + else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE)) SET_CONSOLE_SCLP; else SET_CONSOLE_HVC; diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index ec8654f117d8e..bda8d5eef04d5 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -77,7 +77,7 @@ static inline unsigned long get_padding(struct kaslr_memory_region *region) */ static inline bool kaslr_memory_enabled(void) { - return kaslr_enabled() && !config_enabled(CONFIG_KASAN); + return kaslr_enabled() && !IS_ENABLED(CONFIG_KASAN); } /* Initialize base and padding for each memory region randomized with KASLR */ From b32eaf71db6085f2ba54cf3ddf688bfc858219d0 Mon Sep 17 00:00:00 2001 From: Michal Hocko <mhocko@suse.com> Date: Thu, 25 Aug 2016 15:17:05 -0700 Subject: [PATCH 503/513] mm: clarify COMPACTION Kconfig text The current wording of the COMPACTION Kconfig help text doesn't emphasise that disabling COMPACTION might cripple the page allocator which relies on the compaction quite heavily for high order requests and an unexpected OOM can happen with the lack of compaction. Make sure we are vocal about that. Link: http://lkml.kernel.org/r/20160823091726.GK23577@dhcp22.suse.cz Signed-off-by: Michal Hocko <mhocko@suse.com> Cc: Markus Trippelsdorf <markus@trippelsdorf.de> Cc: Mel Gorman <mgorman@suse.de> Cc: Joonsoo Kim <js1304@gmail.com> Cc: Vlastimil Babka <vbabka@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- mm/Kconfig | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/mm/Kconfig b/mm/Kconfig index 78a23c5c302d9..be0ee11fa0d9e 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -262,7 +262,14 @@ config COMPACTION select MIGRATION depends on MMU help - Allows the compaction of memory for the allocation of huge pages. + Compaction is the only memory management component to form + high order (larger physically contiguous) memory blocks + reliably. The page allocator relies on compaction heavily and + the lack of the feature can lead to unexpected OOM killer + invocations for high order memory requests. You shouldn't + disable this option unless there really is a strong reason for + it and then we would be really interested to hear about that at + linux-mm@kvack.org. # # support for page migration From 358c07fcc3b60ab08d77f1684de8bd81bcf49a1a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann <arnd@arndb.de> Date: Thu, 25 Aug 2016 15:17:08 -0700 Subject: [PATCH 504/513] mm: memcontrol: avoid unused function warning A bugfix in v4.8-rc2 introduced a harmless warning when CONFIG_MEMCG_SWAP is disabled but CONFIG_MEMCG is enabled: mm/memcontrol.c:4085:27: error: 'mem_cgroup_id_get_online' defined but not used [-Werror=unused-function] static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg) This moves the function inside of the #ifdef block that hides the calling function, to avoid the warning. Fixes: 1f47b61fb407 ("mm: memcontrol: fix swap counter leak on swapout from offline cgroup") Link: http://lkml.kernel.org/r/20160824113733.2776701-1-arnd@arndb.de Signed-off-by: Arnd Bergmann <arnd@arndb.de> Acked-by: Michal Hocko <mhocko@suse.com> Acked-by: Vladimir Davydov <vdavydov@virtuozzo.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- mm/memcontrol.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2ff0289ad0613..9a6a51a7c4160 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4082,24 +4082,6 @@ static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n) atomic_add(n, &memcg->id.ref); } -static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg) -{ - while (!atomic_inc_not_zero(&memcg->id.ref)) { - /* - * The root cgroup cannot be destroyed, so it's refcount must - * always be >= 1. - */ - if (WARN_ON_ONCE(memcg == root_mem_cgroup)) { - VM_BUG_ON(1); - break; - } - memcg = parent_mem_cgroup(memcg); - if (!memcg) - memcg = root_mem_cgroup; - } - return memcg; -} - static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n) { if (atomic_sub_and_test(n, &memcg->id.ref)) { @@ -5821,6 +5803,24 @@ static int __init mem_cgroup_init(void) subsys_initcall(mem_cgroup_init); #ifdef CONFIG_MEMCG_SWAP +static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg) +{ + while (!atomic_inc_not_zero(&memcg->id.ref)) { + /* + * The root cgroup cannot be destroyed, so it's refcount must + * always be >= 1. + */ + if (WARN_ON_ONCE(memcg == root_mem_cgroup)) { + VM_BUG_ON(1); + break; + } + memcg = parent_mem_cgroup(memcg); + if (!memcg) + memcg = root_mem_cgroup; + } + return memcg; +} + /** * mem_cgroup_swapout - transfer a memsw charge to swap * @page: page whose memsw charge to transfer From 088bf2ff5d12e2e32ee52a4024fec26e582f44d3 Mon Sep 17 00:00:00 2001 From: Vegard Nossum <vegard.nossum@oracle.com> Date: Thu, 25 Aug 2016 15:17:11 -0700 Subject: [PATCH 505/513] fs/seq_file: fix out-of-bounds read seq_read() is a nasty piece of work, not to mention buggy. It has (I think) an old bug which allows unprivileged userspace to read beyond the end of m->buf. I was getting these: BUG: KASAN: slab-out-of-bounds in seq_read+0xcd2/0x1480 at addr ffff880116889880 Read of size 2713 by task trinity-c2/1329 CPU: 2 PID: 1329 Comm: trinity-c2 Not tainted 4.8.0-rc1+ #96 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014 Call Trace: kasan_object_err+0x1c/0x80 kasan_report_error+0x2cb/0x7e0 kasan_report+0x4e/0x80 check_memory_region+0x13e/0x1a0 kasan_check_read+0x11/0x20 seq_read+0xcd2/0x1480 proc_reg_read+0x10b/0x260 do_loop_readv_writev.part.5+0x140/0x2c0 do_readv_writev+0x589/0x860 vfs_readv+0x7b/0xd0 do_readv+0xd8/0x2c0 SyS_readv+0xb/0x10 do_syscall_64+0x1b3/0x4b0 entry_SYSCALL64_slow_path+0x25/0x25 Object at ffff880116889100, in cache kmalloc-4096 size: 4096 Allocated: PID = 1329 save_stack_trace+0x26/0x80 save_stack+0x46/0xd0 kasan_kmalloc+0xad/0xe0 __kmalloc+0x1aa/0x4a0 seq_buf_alloc+0x35/0x40 seq_read+0x7d8/0x1480 proc_reg_read+0x10b/0x260 do_loop_readv_writev.part.5+0x140/0x2c0 do_readv_writev+0x589/0x860 vfs_readv+0x7b/0xd0 do_readv+0xd8/0x2c0 SyS_readv+0xb/0x10 do_syscall_64+0x1b3/0x4b0 return_from_SYSCALL_64+0x0/0x6a Freed: PID = 0 (stack is not available) Memory state around the buggy address: ffff88011688a000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff88011688a080: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 >ffff88011688a100: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ^ ffff88011688a180: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88011688a200: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Disabling lock debugging due to kernel taint This seems to be the same thing that Dave Jones was seeing here: https://lkml.org/lkml/2016/8/12/334 There are multiple issues here: 1) If we enter the function with a non-empty buffer, there is an attempt to flush it. But it was not clearing m->from after doing so, which means that if we try to do this flush twice in a row without any call to traverse() in between, we are going to be reading from the wrong place -- the splat above, fixed by this patch. 2) If there's a short write to userspace because of page faults, the buffer may already contain multiple lines (i.e. pos has advanced by more than 1), but we don't save the progress that was made so the next call will output what we've already returned previously. Since that is a much less serious issue (and I have a headache after staring at seq_read() for the past 8 hours), I'll leave that for now. Link: http://lkml.kernel.org/r/1471447270-32093-1-git-send-email-vegard.nossum@oracle.com Signed-off-by: Vegard Nossum <vegard.nossum@oracle.com> Reported-by: Dave Jones <davej@codemonkey.org.uk> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- fs/seq_file.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/seq_file.c b/fs/seq_file.c index 19f532e7d35e9..6dc4296eed62c 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -223,8 +223,10 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) size -= n; buf += n; copied += n; - if (!m->count) + if (!m->count) { + m->from = 0; m->index++; + } if (!size) goto Done; } From d0e5845561c238619de9f5b77e0d763f4c331ca5 Mon Sep 17 00:00:00 2001 From: Dan Williams <dan.j.williams@intel.com> Date: Thu, 25 Aug 2016 15:17:14 -0700 Subject: [PATCH 506/513] dax: fix device-dax region base The data offset for a dax region needs to account for a reservation in the resource range. Otherwise, device-dax is allowing mappings directly into the memmap or device-info-block area with crash signatures like the following: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 IP: get_zone_device_page+0x11/0x30 Call Trace: follow_devmap_pmd+0x298/0x2c0 follow_page_mask+0x275/0x530 __get_user_pages+0xe3/0x750 __gfn_to_pfn_memslot+0x1b2/0x450 [kvm] tdp_page_fault+0x130/0x280 [kvm] kvm_mmu_page_fault+0x5f/0xf0 [kvm] handle_ept_violation+0x94/0x180 [kvm_intel] vmx_handle_exit+0x1d3/0x1440 [kvm_intel] kvm_arch_vcpu_ioctl_run+0x81d/0x16a0 [kvm] kvm_vcpu_ioctl+0x33c/0x620 [kvm] do_vfs_ioctl+0xa2/0x5d0 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x1a/0xa4 Fixes: ab68f2622136 ("/dev/dax, pmem: direct access to persistent memory") Link: http://lkml.kernel.org/r/147205536732.1606.8994275381938837346.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com> Reported-by: Abhilash Kumar Mulumudi <m.abhilash-kumar@hpe.com> Reported-by: Toshi Kani <toshi.kani@hpe.com> Tested-by: Toshi Kani <toshi.kani@hpe.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- drivers/dax/pmem.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c index dfb168568af1a..1f01e98c83c7d 100644 --- a/drivers/dax/pmem.c +++ b/drivers/dax/pmem.c @@ -116,6 +116,9 @@ static int dax_pmem_probe(struct device *dev) if (rc) return rc; + /* adjust the dax_region resource to the start of data */ + res.start += le64_to_cpu(pfn_sb->dataoff); + nd_region = to_nd_region(dev->parent); dax_region = alloc_dax_region(dev, nd_region->id, &res, le32_to_cpu(pfn_sb->align), addr, PFN_DEV|PFN_MAP); From 11bd969fdefea3ac0cb9791224f1e09784e21e58 Mon Sep 17 00:00:00 2001 From: Ross Zwisler <ross.zwisler@linux.intel.com> Date: Thu, 25 Aug 2016 15:17:17 -0700 Subject: [PATCH 507/513] mm: silently skip readahead for DAX inodes For DAX inodes we need to be careful to never have page cache pages in the mapping->page_tree. This radix tree should be composed only of DAX exceptional entries and zero pages. ltp's readahead02 test was triggering a warning because we were trying to insert a DAX exceptional entry but found that a page cache page had already been inserted into the tree. This page was being inserted into the radix tree in response to a readahead(2) call. Readahead doesn't make sense for DAX inodes, but we don't want it to report a failure either. Instead, we just return success and don't do any work. Link: http://lkml.kernel.org/r/20160824221429.21158-1-ross.zwisler@linux.intel.com Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com> Reported-by: Jeff Moyer <jmoyer@redhat.com> Cc: Dan Williams <dan.j.williams@intel.com> Cc: Dave Chinner <david@fromorbit.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Jan Kara <jack@suse.com> Cc: <stable@vger.kernel.org> [4.5+] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> --- mm/readahead.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mm/readahead.c b/mm/readahead.c index 65ec288dc057e..c8a955b1297e0 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -8,6 +8,7 @@ */ #include <linux/kernel.h> +#include <linux/dax.h> #include <linux/gfp.h> #include <linux/export.h> #include <linux/blkdev.h> @@ -544,6 +545,14 @@ do_readahead(struct address_space *mapping, struct file *filp, if (!mapping || !mapping->a_ops) return -EINVAL; + /* + * Readahead doesn't make sense for DAX inodes, but we don't want it + * to report a failure either. Instead, we just return success and + * don't do any work. + */ + if (dax_mapping(mapping)) + return 0; + return force_page_cache_readahead(mapping, filp, index, nr); } From add1fa75101263ab4d74240f93000998d4325624 Mon Sep 17 00:00:00 2001 From: Mario Kleiner <mario.kleiner.de@gmail.com> Date: Sat, 27 Aug 2016 01:02:28 +0200 Subject: [PATCH 508/513] drm/atomic: Don't potentially reset color_mgmt_changed on successive property updates. Due to assigning the 'replaced' value instead of or'ing it, if drm_atomic_crtc_set_property() gets called multiple times, the last call will define the color_mgmt_changed flag, so a non-updating call to a property can reset the flag and prevent actual hw state updates required by preceding property updates. Signed-off-by: Mario Kleiner <mario.kleiner.de@gmail.com> Cc: Daniel Vetter <daniel.vetter@intel.com> Cc: <stable@vger.kernel.org> # v4.6+ Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch> Signed-off-by: Dave Airlie <airlied@redhat.com> --- drivers/gpu/drm/drm_atomic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c index fa39307579721..2a3ded44cf2a6 100644 --- a/drivers/gpu/drm/drm_atomic.c +++ b/drivers/gpu/drm/drm_atomic.c @@ -475,7 +475,7 @@ int drm_atomic_crtc_set_property(struct drm_crtc *crtc, val, -1, &replaced); - state->color_mgmt_changed = replaced; + state->color_mgmt_changed |= replaced; return ret; } else if (property == config->ctm_property) { ret = drm_atomic_replace_property_blob_from_id(crtc, @@ -483,7 +483,7 @@ int drm_atomic_crtc_set_property(struct drm_crtc *crtc, val, sizeof(struct drm_color_ctm), &replaced); - state->color_mgmt_changed = replaced; + state->color_mgmt_changed |= replaced; return ret; } else if (property == config->gamma_lut_property) { ret = drm_atomic_replace_property_blob_from_id(crtc, @@ -491,7 +491,7 @@ int drm_atomic_crtc_set_property(struct drm_crtc *crtc, val, -1, &replaced); - state->color_mgmt_changed = replaced; + state->color_mgmt_changed |= replaced; return ret; } else if (crtc->funcs->atomic_set_property) return crtc->funcs->atomic_set_property(crtc, state, property, val); From 3eab887a55424fc2c27553b7bfe32330df83f7b8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds <torvalds@linux-foundation.org> Date: Sun, 28 Aug 2016 15:04:33 -0700 Subject: [PATCH 509/513] Linux 4.8-rc4 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3537aa23905fa..67f42d57e6e7a 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 8 SUBLEVEL = 0 -EXTRAVERSION = -rc3 +EXTRAVERSION = -rc4 NAME = Psychotic Stoned Sheep # *DOCUMENTATION* From 478573c93abd369c4850de55c387be43aa01e2e8 Mon Sep 17 00:00:00 2001 From: Lukas Wunner <lukas@wunner.de> Date: Thu, 28 Jul 2016 02:25:41 +0200 Subject: [PATCH 510/513] driver core: Don't leak secondary fwnode on device removal If device_add_property_set() is called for a device, a secondary fwnode is allocated and assigned to the device but currently not freed once the device is removed. This can be triggered on Apple Macs if a Thunderbolt device is plugged in on boot since Apple's NHI EFI driver sets a number of properties for that device which are leaked on unplug. Signed-off-by: Lukas Wunner <lukas@wunner.de> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- drivers/base/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/base/core.c b/drivers/base/core.c index 0a8bdade53f2f..70c5be5b03a77 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1266,6 +1266,7 @@ void device_del(struct device *dev) bus_remove_device(dev); device_pm_remove(dev); driver_deferred_probe_del(dev); + device_remove_properties(dev); /* Notify the platform of the removal, in case they * need to do anything... From 7ff55d174cbfdd06245c20a7488da4e9499cfd11 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus <heikki.krogerus@linux.intel.com> Date: Tue, 23 Aug 2016 11:33:26 +0300 Subject: [PATCH 511/513] ACPI / APD: Provide build-in properties of the UART The UART driver, dw8250.c, needs some details regarding the Designware UART. For ACPI enumerated devices the values are hard-coded, but since the driver also reads the values from device properties, providing them with build-in properties. This allows us to later remove the hard-coded values from the driver. Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com> Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- drivers/acpi/acpi_apd.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/acpi/acpi_apd.c b/drivers/acpi/acpi_apd.c index 1daf9c46df8e4..5f112d811e427 100644 --- a/drivers/acpi/acpi_apd.c +++ b/drivers/acpi/acpi_apd.c @@ -42,6 +42,7 @@ struct apd_private_data; struct apd_device_desc { unsigned int flags; unsigned int fixed_clk_rate; + struct property_entry *properties; int (*setup)(struct apd_private_data *pdata); }; @@ -76,9 +77,17 @@ static struct apd_device_desc cz_i2c_desc = { .fixed_clk_rate = 133000000, }; +static struct property_entry uart_properties[] = { + PROPERTY_ENTRY_U32("reg-io-width", 4), + PROPERTY_ENTRY_U32("reg-shift", 2), + PROPERTY_ENTRY_BOOL("snps,uart-16550-compatible"), + { }, +}; + static struct apd_device_desc cz_uart_desc = { .setup = acpi_apd_setup, .fixed_clk_rate = 48000000, + .properties = uart_properties, }; #endif @@ -125,6 +134,12 @@ static int acpi_apd_create_device(struct acpi_device *adev, goto err_out; } + if (dev_desc->properties) { + ret = device_add_properties(&adev->dev, dev_desc->properties); + if (ret) + goto err_out; + } + adev->driver_data = pdata; pdev = acpi_create_platform_device(adev); if (!IS_ERR_OR_NULL(pdev)) From a5565cf238daa6b1d5a260d844f857176d280886 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus <heikki.krogerus@linux.intel.com> Date: Tue, 23 Aug 2016 11:33:27 +0300 Subject: [PATCH 512/513] ACPI / LPSS: Provide build-in properties of the UART The UART driver, dw8250.c, needs some details regarding the Designware UART. For ACPI enumerated devices the values are hard-coded, but since the driver also reads the values from device properties, providing them with build-in properties. This allows us to later remove the hard-coded values from the driver. Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com> Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com> Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- drivers/acpi/acpi_lpss.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index 357a0b8f860b3..5520102881357 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -75,6 +75,7 @@ struct lpss_device_desc { const char *clk_con_id; unsigned int prv_offset; size_t prv_size_override; + struct property_entry *properties; void (*setup)(struct lpss_private_data *pdata); }; @@ -163,11 +164,19 @@ static const struct lpss_device_desc lpt_i2c_dev_desc = { .prv_offset = 0x800, }; +static struct property_entry uart_properties[] = { + PROPERTY_ENTRY_U32("reg-io-width", 4), + PROPERTY_ENTRY_U32("reg-shift", 2), + PROPERTY_ENTRY_BOOL("snps,uart-16550-compatible"), + { }, +}; + static const struct lpss_device_desc lpt_uart_dev_desc = { .flags = LPSS_CLK | LPSS_CLK_GATE | LPSS_CLK_DIVIDER | LPSS_LTR, .clk_con_id = "baudclk", .prv_offset = 0x800, .setup = lpss_uart_setup, + .properties = uart_properties, }; static const struct lpss_device_desc lpt_sdio_dev_desc = { @@ -189,6 +198,7 @@ static const struct lpss_device_desc byt_uart_dev_desc = { .clk_con_id = "baudclk", .prv_offset = 0x800, .setup = lpss_uart_setup, + .properties = uart_properties, }; static const struct lpss_device_desc bsw_uart_dev_desc = { @@ -197,6 +207,7 @@ static const struct lpss_device_desc bsw_uart_dev_desc = { .clk_con_id = "baudclk", .prv_offset = 0x800, .setup = lpss_uart_setup, + .properties = uart_properties, }; static const struct lpss_device_desc byt_spi_dev_desc = { @@ -440,6 +451,12 @@ static int acpi_lpss_create_device(struct acpi_device *adev, goto err_out; } + if (dev_desc->properties) { + ret = device_add_properties(&adev->dev, dev_desc->properties); + if (ret) + goto err_out; + } + adev->driver_data = pdata; pdev = acpi_create_platform_device(adev); if (!IS_ERR_OR_NULL(pdev)) { From 20a875e2e86e73d13ec256781a7d55a7885868ec Mon Sep 17 00:00:00 2001 From: Heikki Krogerus <heikki.krogerus@linux.intel.com> Date: Tue, 23 Aug 2016 11:33:28 +0300 Subject: [PATCH 513/513] serial: 8250_dw: Add quirk for APM X-Gene SoC The APM X-Gene SoC UART is the only board that still needs the hard-coded values, so handle it separately in dw8250_quirks(). The other ACPI platforms are able to provide the values with device properties. Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com> Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> --- drivers/tty/serial/8250/8250_dw.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c index e199696142035..5c0c123565ad8 100644 --- a/drivers/tty/serial/8250/8250_dw.c +++ b/drivers/tty/serial/8250/8250_dw.c @@ -298,12 +298,17 @@ static void dw8250_quirks(struct uart_port *p, struct dw8250_data *data) p->serial_out = dw8250_serial_out32be; } } else if (has_acpi_companion(p->dev)) { - p->iotype = UPIO_MEM32; - p->regshift = 2; - p->serial_in = dw8250_serial_in32; + const struct acpi_device_id *id; + + id = acpi_match_device(p->dev->driver->acpi_match_table, + p->dev); + if (id && !strcmp(id->id, "APMC0D08")) { + p->iotype = UPIO_MEM32; + p->regshift = 2; + p->serial_in = dw8250_serial_in32; + data->uart_16550_compatible = true; + } p->set_termios = dw8250_set_termios; - /* So far none of there implement the Busy Functionality */ - data->uart_16550_compatible = true; } /* Platforms with iDMA */