From f623f75ae443d0c771635d51cc986b9d389bf631 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 14 Aug 2018 00:35:24 +0200
Subject: [PATCH 001/257] rfkill-gpio: include linux/mod_devicetable.h

One more driver is apparently broken by the recent change
to linux/platform_device.h:

net/rfkill/rfkill-gpio.c: In function 'rfkill_gpio_acpi_probe':
net/rfkill/rfkill-gpio.c:82:29: error: dereferencing pointer to incomplete type 'const struct acpi_device_id'

Include linux/mod_devicetable.h to get the definition of the
acpi_device_id structure.

Fixes: ac3167257b9f ("headers: separate linux/mod_devicetable.h from linux/platform_device.h")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/rfkill/rfkill-gpio.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index 00192a996be0e..0f84658522543 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -20,6 +20,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/rfkill.h>
 #include <linux/platform_device.h>
 #include <linux/clk.h>

From 77cfaf52eca5cac30ed029507e0cab065f888995 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@toke.dk>
Date: Mon, 13 Aug 2018 14:16:25 +0200
Subject: [PATCH 002/257] mac80211: Run TXQ teardown code before de-registering
 interfaces
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The TXQ teardown code can reference the vif data structures that are
stored in the netdev private memory area if there are still packets on
the queue when it is being freed. Since the TXQ teardown code is run
after the netdevs are freed, this can lead to a use-after-free. Fix this
by moving the TXQ teardown code to earlier in ieee80211_unregister_hw().

Reported-by: Ben Greear <greearb@candelatech.com>
Tested-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index fb73451ed85ec..0358f20b675f2 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -1182,6 +1182,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 #if IS_ENABLED(CONFIG_IPV6)
 	unregister_inet6addr_notifier(&local->ifa6_notifier);
 #endif
+	ieee80211_txq_teardown_flows(local);
 
 	rtnl_lock();
 
@@ -1210,7 +1211,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw)
 	skb_queue_purge(&local->skb_queue);
 	skb_queue_purge(&local->skb_queue_unreliable);
 	skb_queue_purge(&local->skb_queue_tdls_chsw);
-	ieee80211_txq_teardown_flows(local);
 
 	destroy_workqueue(local->workqueue);
 	wiphy_unregister(local->hw.wiphy);

From c066fafc595eef5ae3c83ae3a8305956b8c3ef15 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Tue, 14 Aug 2018 20:37:45 +1000
Subject: [PATCH 003/257] KVM: PPC: Book3S HV: Use correct pagesize in
 kvm_unmap_radix()

Since commit e641a317830b ("KVM: PPC: Book3S HV: Unify dirty page map
between HPT and radix", 2017-10-26), kvm_unmap_radix() computes the
number of PAGE_SIZEd pages being unmapped and passes it to
kvmppc_update_dirty_map(), which expects to be passed the page size
instead.  Consequently it will only mark one system page dirty even
when a large page (for example a THP page) is being unmapped.  The
consequence of this is that part of the THP page might not get copied
during live migration, resulting in memory corruption for the guest.

This fixes it by computing and passing the page size in kvm_unmap_radix().

Cc: stable@vger.kernel.org # v4.15+
Fixes: e641a317830b (KVM: PPC: Book3S HV: Unify dirty page map between HPT and radix)
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 176f911ee983a..7efc42538ccf6 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -738,10 +738,10 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
 					      gpa, shift);
 		kvmppc_radix_tlbie_page(kvm, gpa, shift);
 		if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) {
-			unsigned long npages = 1;
+			unsigned long psize = PAGE_SIZE;
 			if (shift)
-				npages = 1ul << (shift - PAGE_SHIFT);
-			kvmppc_update_dirty_map(memslot, gfn, npages);
+				psize = 1ul << shift;
+			kvmppc_update_dirty_map(memslot, gfn, psize);
 		}
 	}
 	return 0;				

From 484004339d4514fde425f6e8a9f6a6cc979bb0c3 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 15 Aug 2018 18:17:03 +0200
Subject: [PATCH 004/257] mac80211_hwsim: require at least one channel

Syzbot continues to try to create mac80211_hwsim radios, and
manages to pass parameters that are later checked with WARN_ON
in cfg80211 - catch another one in hwsim directly.

Reported-by: syzbot+2a12f11c306afe871c1f@syzkaller.appspotmail.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 18e819d964f1c..fe1b0108f06de 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -3194,6 +3194,11 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info)
 	if (info->attrs[HWSIM_ATTR_CHANNELS])
 		param.channels = nla_get_u32(info->attrs[HWSIM_ATTR_CHANNELS]);
 
+	if (param.channels < 1) {
+		GENL_SET_ERR_MSG(info, "must have at least one channel");
+		return -EINVAL;
+	}
+
 	if (param.channels > CFG80211_MAX_NUM_DIFFERENT_CHANNELS) {
 		GENL_SET_ERR_MSG(info, "too many channels specified");
 		return -EINVAL;

From 8ecebf4d767e2307a946c8905278d6358eda35c3 Mon Sep 17 00:00:00 2001
From: Robbie Ko <robbieko@synology.com>
Date: Mon, 6 Aug 2018 10:30:30 +0800
Subject: [PATCH 005/257] Btrfs: fix unexpected failure of nocow buffered
 writes after snapshotting when low on space

Commit e9894fd3e3b3 ("Btrfs: fix snapshot vs nocow writting") forced
nocow writes to fallback to COW, during writeback, when a snapshot is
created. This resulted in writes made before creating the snapshot to
unexpectedly fail with ENOSPC during writeback when success (0) was
returned to user space through the write system call.

The steps leading to this problem are:

1. When it's not possible to allocate data space for a write, the
   buffered write path checks if a NOCOW write is possible.  If it is,
   it will not reserve space and success (0) is returned to user space.

2. Then when a snapshot is created, the root's will_be_snapshotted
   atomic is incremented and writeback is triggered for all inode's that
   belong to the root being snapshotted. Incrementing that atomic forces
   all previous writes to fallback to COW during writeback (running
   delalloc).

3. This results in the writeback for the inodes to fail and therefore
   setting the ENOSPC error in their mappings, so that a subsequent
   fsync on them will report the error to user space. So it's not a
   completely silent data loss (since fsync will report ENOSPC) but it's
   a very unexpected and undesirable behaviour, because if a clean
   shutdown/unmount of the filesystem happens without previous calls to
   fsync, it is expected to have the data present in the files after
   mounting the filesystem again.

So fix this by adding a new atomic named snapshot_force_cow to the
root structure which prevents this behaviour and works the following way:

1. It is incremented when we start to create a snapshot after triggering
   writeback and before waiting for writeback to finish.

2. This new atomic is now what is used by writeback (running delalloc)
   to decide whether we need to fallback to COW or not. Because we
   incremented this new atomic after triggering writeback in the
   snapshot creation ioctl, we ensure that all buffered writes that
   happened before snapshot creation will succeed and not fallback to
   COW (which would make them fail with ENOSPC).

3. The existing atomic, will_be_snapshotted, is kept because it is used
   to force new buffered writes, that start after we started
   snapshotting, to reserve data space even when NOCOW is possible.
   This makes these writes fail early with ENOSPC when there's no
   available space to allocate, preventing the unexpected behaviour of
   writeback later failing with ENOSPC due to a fallback to COW mode.

Fixes: e9894fd3e3b3 ("Btrfs: fix snapshot vs nocow writting")
Signed-off-by: Robbie Ko <robbieko@synology.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h   |  1 +
 fs/btrfs/disk-io.c |  1 +
 fs/btrfs/inode.c   | 25 ++++---------------------
 fs/btrfs/ioctl.c   | 16 ++++++++++++++++
 4 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 318be78640722..a67cc190a84b4 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1280,6 +1280,7 @@ struct btrfs_root {
 	int send_in_progress;
 	struct btrfs_subvolume_writers *subv_writers;
 	atomic_t will_be_snapshotted;
+	atomic_t snapshot_force_cow;
 
 	/* For qgroup metadata reserved space */
 	spinlock_t qgroup_meta_rsv_lock;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5124c15705ce7..05dc3c17cb62a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1187,6 +1187,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
 	atomic_set(&root->log_batch, 0);
 	refcount_set(&root->refs, 1);
 	atomic_set(&root->will_be_snapshotted, 0);
+	atomic_set(&root->snapshot_force_cow, 0);
 	root->log_transid = 0;
 	root->log_transid_committed = -1;
 	root->last_log_commit = 0;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3f51ddc18f988..c6d8c5d19ff07 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1271,7 +1271,7 @@ static noinline int run_delalloc_nocow(struct inode *inode,
 	u64 disk_num_bytes;
 	u64 ram_bytes;
 	int extent_type;
-	int ret, err;
+	int ret;
 	int type;
 	int nocow;
 	int check_prev = 1;
@@ -1403,11 +1403,8 @@ static noinline int run_delalloc_nocow(struct inode *inode,
 			 * if there are pending snapshots for this root,
 			 * we fall into common COW way.
 			 */
-			if (!nolock) {
-				err = btrfs_start_write_no_snapshotting(root);
-				if (!err)
-					goto out_check;
-			}
+			if (!nolock && atomic_read(&root->snapshot_force_cow))
+				goto out_check;
 			/*
 			 * force cow if csum exists in the range.
 			 * this ensure that csum for a given extent are
@@ -1416,9 +1413,6 @@ static noinline int run_delalloc_nocow(struct inode *inode,
 			ret = csum_exist_in_range(fs_info, disk_bytenr,
 						  num_bytes);
 			if (ret) {
-				if (!nolock)
-					btrfs_end_write_no_snapshotting(root);
-
 				/*
 				 * ret could be -EIO if the above fails to read
 				 * metadata.
@@ -1431,11 +1425,8 @@ static noinline int run_delalloc_nocow(struct inode *inode,
 				WARN_ON_ONCE(nolock);
 				goto out_check;
 			}
-			if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr)) {
-				if (!nolock)
-					btrfs_end_write_no_snapshotting(root);
+			if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr))
 				goto out_check;
-			}
 			nocow = 1;
 		} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
 			extent_end = found_key.offset +
@@ -1448,8 +1439,6 @@ static noinline int run_delalloc_nocow(struct inode *inode,
 out_check:
 		if (extent_end <= start) {
 			path->slots[0]++;
-			if (!nolock && nocow)
-				btrfs_end_write_no_snapshotting(root);
 			if (nocow)
 				btrfs_dec_nocow_writers(fs_info, disk_bytenr);
 			goto next_slot;
@@ -1471,8 +1460,6 @@ static noinline int run_delalloc_nocow(struct inode *inode,
 					     end, page_started, nr_written, 1,
 					     NULL);
 			if (ret) {
-				if (!nolock && nocow)
-					btrfs_end_write_no_snapshotting(root);
 				if (nocow)
 					btrfs_dec_nocow_writers(fs_info,
 								disk_bytenr);
@@ -1492,8 +1479,6 @@ static noinline int run_delalloc_nocow(struct inode *inode,
 					  ram_bytes, BTRFS_COMPRESS_NONE,
 					  BTRFS_ORDERED_PREALLOC);
 			if (IS_ERR(em)) {
-				if (!nolock && nocow)
-					btrfs_end_write_no_snapshotting(root);
 				if (nocow)
 					btrfs_dec_nocow_writers(fs_info,
 								disk_bytenr);
@@ -1532,8 +1517,6 @@ static noinline int run_delalloc_nocow(struct inode *inode,
 					     EXTENT_CLEAR_DATA_RESV,
 					     PAGE_UNLOCK | PAGE_SET_PRIVATE2);
 
-		if (!nolock && nocow)
-			btrfs_end_write_no_snapshotting(root);
 		cur_offset = extent_end;
 
 		/*
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d3a5d2a41e5f1..85c4284bb2cfa 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -747,6 +747,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
 	struct btrfs_pending_snapshot *pending_snapshot;
 	struct btrfs_trans_handle *trans;
 	int ret;
+	bool snapshot_force_cow = false;
 
 	if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))
 		return -EINVAL;
@@ -763,6 +764,11 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
 		goto free_pending;
 	}
 
+	/*
+	 * Force new buffered writes to reserve space even when NOCOW is
+	 * possible. This is to avoid later writeback (running dealloc) to
+	 * fallback to COW mode and unexpectedly fail with ENOSPC.
+	 */
 	atomic_inc(&root->will_be_snapshotted);
 	smp_mb__after_atomic();
 	/* wait for no snapshot writes */
@@ -773,6 +779,14 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
 	if (ret)
 		goto dec_and_free;
 
+	/*
+	 * All previous writes have started writeback in NOCOW mode, so now
+	 * we force future writes to fallback to COW mode during snapshot
+	 * creation.
+	 */
+	atomic_inc(&root->snapshot_force_cow);
+	snapshot_force_cow = true;
+
 	btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
 
 	btrfs_init_block_rsv(&pending_snapshot->block_rsv,
@@ -837,6 +851,8 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
 fail:
 	btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
 dec_and_free:
+	if (snapshot_force_cow)
+		atomic_dec(&root->snapshot_force_cow);
 	if (atomic_dec_and_test(&root->will_be_snapshotted))
 		wake_up_var(&root->will_be_snapshotted);
 free_pending:

From 46dec40fb741f00f1864580130779aeeaf24fb3d Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@ozlabs.org>
Date: Mon, 20 Aug 2018 16:05:45 +1000
Subject: [PATCH 006/257] KVM: PPC: Book3S HV: Don't truncate HPTE index in
 xlate function

This fixes a bug which causes guest virtual addresses to get translated
to guest real addresses incorrectly when the guest is using the HPT MMU
and has more than 256GB of RAM, or more specifically has a HPT larger
than 2GB.  This has showed up in testing as a failure of the host to
emulate doorbell instructions correctly on POWER9 for HPT guests with
more than 256GB of RAM.

The bug is that the HPTE index in kvmppc_mmu_book3s_64_hv_xlate()
is stored as an int, and in forming the HPTE address, the index gets
shifted left 4 bits as an int before being signed-extended to 64 bits.
The simple fix is to make the variable a long int, matching the
return type of kvmppc_hv_find_lock_hpte(), which is what calculates
the index.

Fixes: 697d3899dcb4 ("KVM: PPC: Implement MMIO emulation support for Book3S HV guests")
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 7f3a8cf5d66f3..4c08f42f6406b 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -359,7 +359,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
 	unsigned long pp, key;
 	unsigned long v, orig_v, gr;
 	__be64 *hptep;
-	int index;
+	long int index;
 	int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR);
 
 	if (kvm_is_radix(vcpu->kvm))

From 8a54d8fc160e67ad485d95a0322ce1221f80770a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 18 Jun 2018 09:29:57 +0200
Subject: [PATCH 007/257] cfg80211: remove division by size of sizeof(struct
 ieee80211_wmm_rule)

Pointer arithmetic already adjusts by the size of the struct,
so the sizeof() calculation is wrong. This is basically the
same as Colin King's patch for similar code in the iwlwifi
driver.

Fixes: 230ebaa189af ("cfg80211: read wmm rules from regulatory database")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/reg.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 4fc66a117b7d7..283902974fbfa 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -452,8 +452,7 @@ reg_copy_regd(const struct ieee80211_regdomain *src_regd)
 			continue;
 
 		regd->reg_rules[i].wmm_rule = d_wmm +
-			(src_regd->reg_rules[i].wmm_rule - s_wmm) /
-			sizeof(struct ieee80211_wmm_rule);
+			(src_regd->reg_rules[i].wmm_rule - s_wmm);
 	}
 	return regd;
 }

From d4682ba03ef618b6ef4be7cedc7aacaf505d3a58 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 11 Jun 2018 19:24:28 +0100
Subject: [PATCH 008/257] Btrfs: sync log after logging new name

When we add a new name for an inode which was logged in the current
transaction, we update the inode in the log so that its new name and
ancestors are added to the log. However when we do this we do not persist
the log, so the changes remain in memory only, and as a consequence, any
ancestors that were created in the current transaction are updated such
that future calls to btrfs_inode_in_log() return true. This leads to a
subsequent fsync against such new ancestor directories returning
immediately, without persisting the log, therefore after a power failure
the new ancestor directories do not exist, despite fsync being called
against them explicitly.

Example:

  $ mkfs.btrfs -f /dev/sdb
  $ mount /dev/sdb /mnt

  $ mkdir /mnt/A
  $ mkdir /mnt/B
  $ mkdir /mnt/A/C
  $ touch /mnt/B/foo
  $ xfs_io -c "fsync" /mnt/B/foo
  $ ln /mnt/B/foo /mnt/A/C/foo
  $ xfs_io -c "fsync" /mnt/A
  <power failure>

After the power failure, directory "A" does not exist, despite the explicit
fsync on it.

Instead of fixing this by changing the behaviour of the explicit fsync on
directory "A" to persist the log instead of doing nothing, make the logging
of the new file name (which happens when creating a hard link or renaming)
persist the log. This approach not only is simpler, not requiring addition
of new fields to the inode in memory structure, but also gives us the same
behaviour as ext4, xfs and f2fs (possibly other filesystems too).

A test case for fstests follows soon.

Fixes: 12fcfd22fe5b ("Btrfs: tree logging unlink/rename fixes")
Reported-by: Vijay Chidambaram <vvijay03@gmail.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/inode.c    | 92 +++++++++++++++++++++++++++++++++++++++------
 fs/btrfs/tree-log.c | 48 ++++++++++++++++++++---
 fs/btrfs/tree-log.h | 10 ++++-
 3 files changed, 131 insertions(+), 19 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c6d8c5d19ff07..cb09873ad270c 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -6634,6 +6634,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 		drop_inode = 1;
 	} else {
 		struct dentry *parent = dentry->d_parent;
+		int ret;
+
 		err = btrfs_update_inode(trans, root, inode);
 		if (err)
 			goto fail;
@@ -6647,7 +6649,12 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 				goto fail;
 		}
 		d_instantiate(dentry, inode);
-		btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent);
+		ret = btrfs_log_new_name(trans, BTRFS_I(inode), NULL, parent,
+					 true, NULL);
+		if (ret == BTRFS_NEED_TRANS_COMMIT) {
+			err = btrfs_commit_transaction(trans);
+			trans = NULL;
+		}
 	}
 
 fail:
@@ -9386,14 +9393,21 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 	u64 new_idx = 0;
 	u64 root_objectid;
 	int ret;
-	int ret2;
 	bool root_log_pinned = false;
 	bool dest_log_pinned = false;
+	struct btrfs_log_ctx ctx_root;
+	struct btrfs_log_ctx ctx_dest;
+	bool sync_log_root = false;
+	bool sync_log_dest = false;
+	bool commit_transaction = false;
 
 	/* we only allow rename subvolume link between subvolumes */
 	if (old_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)
 		return -EXDEV;
 
+	btrfs_init_log_ctx(&ctx_root, old_inode);
+	btrfs_init_log_ctx(&ctx_dest, new_inode);
+
 	/* close the race window with snapshot create/destroy ioctl */
 	if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
 		down_read(&fs_info->subvol_sem);
@@ -9540,15 +9554,29 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 
 	if (root_log_pinned) {
 		parent = new_dentry->d_parent;
-		btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir),
-				parent);
+		ret = btrfs_log_new_name(trans, BTRFS_I(old_inode),
+					 BTRFS_I(old_dir), parent,
+					 false, &ctx_root);
+		if (ret == BTRFS_NEED_LOG_SYNC)
+			sync_log_root = true;
+		else if (ret == BTRFS_NEED_TRANS_COMMIT)
+			commit_transaction = true;
+		ret = 0;
 		btrfs_end_log_trans(root);
 		root_log_pinned = false;
 	}
 	if (dest_log_pinned) {
-		parent = old_dentry->d_parent;
-		btrfs_log_new_name(trans, BTRFS_I(new_inode), BTRFS_I(new_dir),
-				parent);
+		if (!commit_transaction) {
+			parent = old_dentry->d_parent;
+			ret = btrfs_log_new_name(trans, BTRFS_I(new_inode),
+						 BTRFS_I(new_dir), parent,
+						 false, &ctx_dest);
+			if (ret == BTRFS_NEED_LOG_SYNC)
+				sync_log_dest = true;
+			else if (ret == BTRFS_NEED_TRANS_COMMIT)
+				commit_transaction = true;
+			ret = 0;
+		}
 		btrfs_end_log_trans(dest);
 		dest_log_pinned = false;
 	}
@@ -9581,8 +9609,26 @@ static int btrfs_rename_exchange(struct inode *old_dir,
 			dest_log_pinned = false;
 		}
 	}
-	ret2 = btrfs_end_transaction(trans);
-	ret = ret ? ret : ret2;
+	if (!ret && sync_log_root && !commit_transaction) {
+		ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root,
+				     &ctx_root);
+		if (ret)
+			commit_transaction = true;
+	}
+	if (!ret && sync_log_dest && !commit_transaction) {
+		ret = btrfs_sync_log(trans, BTRFS_I(new_inode)->root,
+				     &ctx_dest);
+		if (ret)
+			commit_transaction = true;
+	}
+	if (commit_transaction) {
+		ret = btrfs_commit_transaction(trans);
+	} else {
+		int ret2;
+
+		ret2 = btrfs_end_transaction(trans);
+		ret = ret ? ret : ret2;
+	}
 out_notrans:
 	if (new_ino == BTRFS_FIRST_FREE_OBJECTID)
 		up_read(&fs_info->subvol_sem);
@@ -9659,6 +9705,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	int ret;
 	u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
 	bool log_pinned = false;
+	struct btrfs_log_ctx ctx;
+	bool sync_log = false;
+	bool commit_transaction = false;
 
 	if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
 		return -EPERM;
@@ -9816,8 +9865,15 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (log_pinned) {
 		struct dentry *parent = new_dentry->d_parent;
 
-		btrfs_log_new_name(trans, BTRFS_I(old_inode), BTRFS_I(old_dir),
-				parent);
+		btrfs_init_log_ctx(&ctx, old_inode);
+		ret = btrfs_log_new_name(trans, BTRFS_I(old_inode),
+					 BTRFS_I(old_dir), parent,
+					 false, &ctx);
+		if (ret == BTRFS_NEED_LOG_SYNC)
+			sync_log = true;
+		else if (ret == BTRFS_NEED_TRANS_COMMIT)
+			commit_transaction = true;
+		ret = 0;
 		btrfs_end_log_trans(root);
 		log_pinned = false;
 	}
@@ -9854,7 +9910,19 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		btrfs_end_log_trans(root);
 		log_pinned = false;
 	}
-	btrfs_end_transaction(trans);
+	if (!ret && sync_log) {
+		ret = btrfs_sync_log(trans, BTRFS_I(old_inode)->root, &ctx);
+		if (ret)
+			commit_transaction = true;
+	}
+	if (commit_transaction) {
+		ret = btrfs_commit_transaction(trans);
+	} else {
+		int ret2;
+
+		ret2 = btrfs_end_transaction(trans);
+		ret = ret ? ret : ret2;
+	}
 out_notrans:
 	if (old_ino == BTRFS_FIRST_FREE_OBJECTID)
 		up_read(&fs_info->subvol_sem);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 1650dc44a5e37..3c2ae0e4f25a8 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -6025,14 +6025,25 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
  * Call this after adding a new name for a file and it will properly
  * update the log to reflect the new name.
  *
- * It will return zero if all goes well, and it will return 1 if a
- * full transaction commit is required.
+ * @ctx can not be NULL when @sync_log is false, and should be NULL when it's
+ * true (because it's not used).
+ *
+ * Return value depends on whether @sync_log is true or false.
+ * When true: returns BTRFS_NEED_TRANS_COMMIT if the transaction needs to be
+ *            committed by the caller, and BTRFS_DONT_NEED_TRANS_COMMIT
+ *            otherwise.
+ * When false: returns BTRFS_DONT_NEED_LOG_SYNC if the caller does not need to
+ *             to sync the log, BTRFS_NEED_LOG_SYNC if it needs to sync the log,
+ *             or BTRFS_NEED_TRANS_COMMIT if the transaction needs to be
+ *             committed (without attempting to sync the log).
  */
 int btrfs_log_new_name(struct btrfs_trans_handle *trans,
 			struct btrfs_inode *inode, struct btrfs_inode *old_dir,
-			struct dentry *parent)
+			struct dentry *parent,
+			bool sync_log, struct btrfs_log_ctx *ctx)
 {
 	struct btrfs_fs_info *fs_info = trans->fs_info;
+	int ret;
 
 	/*
 	 * this will force the logging code to walk the dentry chain
@@ -6047,9 +6058,34 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans,
 	 */
 	if (inode->logged_trans <= fs_info->last_trans_committed &&
 	    (!old_dir || old_dir->logged_trans <= fs_info->last_trans_committed))
-		return 0;
+		return sync_log ? BTRFS_DONT_NEED_TRANS_COMMIT :
+			BTRFS_DONT_NEED_LOG_SYNC;
+
+	if (sync_log) {
+		struct btrfs_log_ctx ctx2;
+
+		btrfs_init_log_ctx(&ctx2, &inode->vfs_inode);
+		ret = btrfs_log_inode_parent(trans, inode, parent, 0, LLONG_MAX,
+					     LOG_INODE_EXISTS, &ctx2);
+		if (ret == BTRFS_NO_LOG_SYNC)
+			return BTRFS_DONT_NEED_TRANS_COMMIT;
+		else if (ret)
+			return BTRFS_NEED_TRANS_COMMIT;
+
+		ret = btrfs_sync_log(trans, inode->root, &ctx2);
+		if (ret)
+			return BTRFS_NEED_TRANS_COMMIT;
+		return BTRFS_DONT_NEED_TRANS_COMMIT;
+	}
+
+	ASSERT(ctx);
+	ret = btrfs_log_inode_parent(trans, inode, parent, 0, LLONG_MAX,
+				     LOG_INODE_EXISTS, ctx);
+	if (ret == BTRFS_NO_LOG_SYNC)
+		return BTRFS_DONT_NEED_LOG_SYNC;
+	else if (ret)
+		return BTRFS_NEED_TRANS_COMMIT;
 
-	return btrfs_log_inode_parent(trans, inode, parent, 0, LLONG_MAX,
-				      LOG_INODE_EXISTS, NULL);
+	return BTRFS_NEED_LOG_SYNC;
 }
 
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 122e68b89a5ad..7ab9bb88a6393 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -71,8 +71,16 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
 			     int for_rename);
 void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
 				   struct btrfs_inode *dir);
+/* Return values for btrfs_log_new_name() */
+enum {
+	BTRFS_DONT_NEED_TRANS_COMMIT,
+	BTRFS_NEED_TRANS_COMMIT,
+	BTRFS_DONT_NEED_LOG_SYNC,
+	BTRFS_NEED_LOG_SYNC,
+};
 int btrfs_log_new_name(struct btrfs_trans_handle *trans,
 			struct btrfs_inode *inode, struct btrfs_inode *old_dir,
-			struct dentry *parent);
+			struct dentry *parent,
+			bool sync_log, struct btrfs_log_ctx *ctx);
 
 #endif

From de02b9f6bb65a6a1848f346f7a3617b7a9b930c0 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Fri, 17 Aug 2018 09:38:59 +0100
Subject: [PATCH 009/257] Btrfs: fix data corruption when deduplicating between
 different files

If we deduplicate extents between two different files we can end up
corrupting data if the source range ends at the size of the source file,
the source file's size is not aligned to the filesystem's block size
and the destination range does not go past the size of the destination
file size.

Example:

  $ mkfs.btrfs -f /dev/sdb
  $ mount /dev/sdb /mnt

  $ xfs_io -f -c "pwrite -S 0x6b 0 2518890" /mnt/foo
  # The first byte with a value of 0xae starts at an offset (2518890)
  # which is not a multiple of the sector size.
  $ xfs_io -c "pwrite -S 0xae 2518890 102398" /mnt/foo

  # Confirm the file content is full of bytes with values 0x6b and 0xae.
  $ od -t x1 /mnt/foo
  0000000 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b
  *
  11467540 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b ae ae ae ae ae ae
  11467560 ae ae ae ae ae ae ae ae ae ae ae ae ae ae ae ae
  *
  11777540 ae ae ae ae ae ae ae ae
  11777550

  # Create a second file with a length not aligned to the sector size,
  # whose bytes all have the value 0x6b, so that its extent(s) can be
  # deduplicated with the first file.
  $ xfs_io -f -c "pwrite -S 0x6b 0 557771" /mnt/bar

  # Now deduplicate the entire second file into a range of the first file
  # that also has all bytes with the value 0x6b. The destination range's
  # end offset must not be aligned to the sector size and must be less
  # then the offset of the first byte with the value 0xae (byte at offset
  # 2518890).
  $ xfs_io -c "dedupe /mnt/bar 0 1957888 557771" /mnt/foo

  # The bytes in the range starting at offset 2515659 (end of the
  # deduplication range) and ending at offset 2519040 (start offset
  # rounded up to the block size) must all have the value 0xae (and not
  # replaced with 0x00 values). In other words, we should have exactly
  # the same data we had before we asked for deduplication.
  $ od -t x1 /mnt/foo
  0000000 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b
  *
  11467540 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b ae ae ae ae ae ae
  11467560 ae ae ae ae ae ae ae ae ae ae ae ae ae ae ae ae
  *
  11777540 ae ae ae ae ae ae ae ae
  11777550

  # Unmount the filesystem and mount it again. This guarantees any file
  # data in the page cache is dropped.
  $ umount /dev/sdb
  $ mount /dev/sdb /mnt

  $ od -t x1 /mnt/foo
  0000000 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b
  *
  11461300 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 6b 00 00 00 00 00
  11461320 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
  *
  11470000 ae ae ae ae ae ae ae ae ae ae ae ae ae ae ae ae
  *
  11777540 ae ae ae ae ae ae ae ae
  11777550

  # The bytes in range 2515659 to 2519040 have a value of 0x00 and not a
  # value of 0xae, data corruption happened due to the deduplication
  # operation.

So fix this by rounding down, to the sector size, the length used for the
deduplication when the following conditions are met:

  1) Source file's range ends at its i_size;
  2) Source file's i_size is not aligned to the sector size;
  3) Destination range does not cross the i_size of the destination file.

Fixes: e1d227a42ea2 ("btrfs: Handle unaligned length in extent_same")
CC: stable@vger.kernel.org # 4.2+
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ioctl.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 85c4284bb2cfa..011ddfcc96e2d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3469,6 +3469,25 @@ static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 olen,
 
 		same_lock_start = min_t(u64, loff, dst_loff);
 		same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start;
+	} else {
+		/*
+		 * If the source and destination inodes are different, the
+		 * source's range end offset matches the source's i_size, that
+		 * i_size is not a multiple of the sector size, and the
+		 * destination range does not go past the destination's i_size,
+		 * we must round down the length to the nearest sector size
+		 * multiple. If we don't do this adjustment we end replacing
+		 * with zeroes the bytes in the range that starts at the
+		 * deduplication range's end offset and ends at the next sector
+		 * size multiple.
+		 */
+		if (loff + olen == i_size_read(src) &&
+		    dst_loff + len < i_size_read(dst)) {
+			const u64 sz = BTRFS_I(src)->root->fs_info->sectorsize;
+
+			len = round_down(i_size_read(src), sz) - loff;
+			olen = len;
+		}
 	}
 
 again:

From a5b7f4295eeae8b05ca91f6d145cd8773b08de9e Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Thu, 9 Aug 2018 09:46:04 +0800
Subject: [PATCH 010/257] btrfs: fix qgroup_free wrong num_bytes in
 btrfs_subvolume_reserve_metadata

After btrfs_qgroup_reserve_meta_prealloc(), num_bytes will be assigned
again by btrfs_calc_trans_metadata_size(). Once block_rsv fails, we
can't properly free the num_bytes of the previous qgroup_reserve. Use a
separate variable to store the num_bytes of the qgroup_reserve.

Delete the comment for the qgroup_reserved that does not exist and add a
comment about use_global_rsv.

Fixes: c4c129db5da8 ("btrfs: drop unused parameter qgroup_reserved")
CC: stable@vger.kernel.org # 4.18+
Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/extent-tree.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index de6f75f5547bd..2d9074295d7f0 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5800,7 +5800,7 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
  * root: the root of the parent directory
  * rsv: block reservation
  * items: the number of items that we need do reservation
- * qgroup_reserved: used to return the reserved size in qgroup
+ * use_global_rsv: allow fallback to the global block reservation
  *
  * This function is used to reserve the space for snapshot/subvolume
  * creation and deletion. Those operations are different with the
@@ -5810,10 +5810,10 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
  * the space reservation mechanism in start_transaction().
  */
 int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
-				     struct btrfs_block_rsv *rsv,
-				     int items,
+				     struct btrfs_block_rsv *rsv, int items,
 				     bool use_global_rsv)
 {
+	u64 qgroup_num_bytes = 0;
 	u64 num_bytes;
 	int ret;
 	struct btrfs_fs_info *fs_info = root->fs_info;
@@ -5821,12 +5821,11 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
 
 	if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
 		/* One for parent inode, two for dir entries */
-		num_bytes = 3 * fs_info->nodesize;
-		ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
+		qgroup_num_bytes = 3 * fs_info->nodesize;
+		ret = btrfs_qgroup_reserve_meta_prealloc(root,
+				qgroup_num_bytes, true);
 		if (ret)
 			return ret;
-	} else {
-		num_bytes = 0;
 	}
 
 	num_bytes = btrfs_calc_trans_metadata_size(fs_info, items);
@@ -5838,8 +5837,8 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
 	if (ret == -ENOSPC && use_global_rsv)
 		ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes, 1);
 
-	if (ret && num_bytes)
-		btrfs_qgroup_free_meta_prealloc(root, num_bytes);
+	if (ret && qgroup_num_bytes)
+		btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
 
 	return ret;
 }

From 801660b040d132f67fac6a95910ad307c5929b49 Mon Sep 17 00:00:00 2001
From: Anand Jain <anand.jain@oracle.com>
Date: Mon, 6 Aug 2018 18:12:37 +0800
Subject: [PATCH 011/257] btrfs: btrfs_shrink_device should call commit
 transaction at the end

Test case btrfs/164 reports use-after-free:

[ 6712.084324] general protection fault: 0000 [#1] PREEMPT SMP
..
[ 6712.195423]  btrfs_update_commit_device_size+0x75/0xf0 [btrfs]
[ 6712.201424]  btrfs_commit_transaction+0x57d/0xa90 [btrfs]
[ 6712.206999]  btrfs_rm_device+0x627/0x850 [btrfs]
[ 6712.211800]  btrfs_ioctl+0x2b03/0x3120 [btrfs]

Reason for this is that btrfs_shrink_device adds the resized device to
the fs_devices::resized_devices after it has called the last commit
transaction.

So the list fs_devices::resized_devices is not empty when
btrfs_shrink_device returns.  Now the parent function
btrfs_rm_device calls:

        btrfs_close_bdev(device);
        call_rcu(&device->rcu, free_device_rcu);

and then does the transactio ncommit. It goes through the
fs_devices::resized_devices in btrfs_update_commit_device_size and
leads to use-after-free.

Fix this by making sure btrfs_shrink_device calls the last needed
btrfs_commit_transaction before the return. This is consistent with what
the grow counterpart does and this makes sure the on-disk state is
persistent when the function returns.

Reported-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Tested-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Signed-off-by: Anand Jain <anand.jain@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
[ update changelog ]
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/volumes.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index da86706123ffa..f4405e430da6e 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4491,7 +4491,12 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
 
 	/* Now btrfs_update_device() will change the on-disk size. */
 	ret = btrfs_update_device(trans, device);
-	btrfs_end_transaction(trans);
+	if (ret < 0) {
+		btrfs_abort_transaction(trans, ret);
+		btrfs_end_transaction(trans);
+	} else {
+		ret = btrfs_commit_transaction(trans);
+	}
 done:
 	btrfs_free_path(path);
 	if (ret) {

From b9b8a41adeff5666b402996020b698504c927353 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 20 Aug 2018 11:25:33 +0300
Subject: [PATCH 012/257] btrfs: use after free in btrfs_quota_enable

The issue here is that btrfs_commit_transaction() frees "trans" on both
the error and the success path.  So the problem would be if
btrfs_commit_transaction() succeeds, and then qgroup_rescan_init()
fails.  That means that "ret" is non-zero and "trans" is non-NULL and it
leads to a use after free inside the btrfs_end_transaction() macro.

Fixes: 340f1aa27f36 ("btrfs: qgroups: Move transaction management inside btrfs_quota_enable/disable")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/qgroup.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 4353bb69bb867..d4917c0cddf57 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1019,10 +1019,9 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
 	spin_unlock(&fs_info->qgroup_lock);
 
 	ret = btrfs_commit_transaction(trans);
-	if (ret) {
-		trans = NULL;
+	trans = NULL;
+	if (ret)
 		goto out_free_path;
-	}
 
 	ret = qgroup_rescan_init(fs_info, 0, 1);
 	if (!ret) {

From b6fdfbff078975c53383fc146a2a54985eab6b6d Mon Sep 17 00:00:00 2001
From: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Date: Fri, 24 Aug 2018 11:35:28 +0900
Subject: [PATCH 013/257] btrfs: Fix suspicious RCU usage warning in
 btrfs_debug_in_rcu

Commit 672d599041c8 ("btrfs: Use wrapper macro for rcu string to remove
duplicate code") replaces some open coded RCU string handling with macro.

It turns out that btrfs_debug_in_rcu() is used for the first time and
the macro lacks lock/unlock of RCU string for non-debug case (i.e. when
the message is not printed), leading to suspicious RCU usage warning
when CONFIG_PROVE_RCU is on.

Fix this by adding a wrapper to call lock/unlock for the non-debug case
too.

Fixes: 672d599041c8 ("btrfs: Use wrapper macro for rcu string to remove duplicate code")
Reported-by: David Howells <dhowells@redhat.com>
Tested-by: David Howells <dhowells@redhat.com>
Signed-off-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/ctree.h | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a67cc190a84b4..0b856da2fd3b3 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3390,9 +3390,9 @@ do {									\
 #define btrfs_debug(fs_info, fmt, args...) \
 	btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args)
 #define btrfs_debug_in_rcu(fs_info, fmt, args...) \
-	btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args)
+	btrfs_no_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args)
 #define btrfs_debug_rl_in_rcu(fs_info, fmt, args...) \
-	btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args)
+	btrfs_no_printk_in_rcu(fs_info, KERN_DEBUG fmt, ##args)
 #define btrfs_debug_rl(fs_info, fmt, args...) \
 	btrfs_no_printk(fs_info, KERN_DEBUG fmt, ##args)
 #endif
@@ -3404,6 +3404,13 @@ do {							\
 	rcu_read_unlock();				\
 } while (0)
 
+#define btrfs_no_printk_in_rcu(fs_info, fmt, args...)	\
+do {							\
+	rcu_read_lock();				\
+	btrfs_no_printk(fs_info, fmt, ##args);		\
+	rcu_read_unlock();				\
+} while (0)
+
 #define btrfs_printk_ratelimited(fs_info, fmt, args...)		\
 do {								\
 	static DEFINE_RATELIMIT_STATE(_rs,			\

From d177c8b61d6b4ef360b1c2682e4d8e3bae01738b Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <icenowy@aosc.io>
Date: Thu, 26 Jul 2018 12:48:05 +0800
Subject: [PATCH 014/257] arm64: allwinner: dts: h6: fix Pine H64 MMC bus width

Currently the enabled MMC controllers on Pine H64 do not have bus-width
set, which make them fall back to 1-bit mode and become quite slow.

Fix this by add the corresponding bus-width properties.

Fixes: ecbd611882a1 ("arm64: allwinner: h6: enable MMC0/2 on Pine H64")
Signed-off-by: Icenowy Zheng <icenowy@aosc.io>
Signed-off-by: Maxime Ripard <maxime.ripard@bootlin.com>
---
 arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts
index ceffc40810eec..48daec7f78ba7 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-pine-h64.dts
@@ -46,6 +46,7 @@
 	pinctrl-0 = <&mmc0_pins>;
 	vmmc-supply = <&reg_cldo1>;
 	cd-gpios = <&pio 5 6 GPIO_ACTIVE_LOW>;
+	bus-width = <4>;
 	status = "okay";
 };
 
@@ -56,6 +57,7 @@
 	vqmmc-supply = <&reg_bldo2>;
 	non-removable;
 	cap-mmc-hw-reset;
+	bus-width = <8>;
 	status = "okay";
 };
 

From 4051c323c59b535cfb7dc10b349544b4ad49c21e Mon Sep 17 00:00:00 2001
From: Alexey Brodkin <abrodkin@synopsys.com>
Date: Thu, 2 Aug 2018 11:41:07 +0300
Subject: [PATCH 015/257] ARC: configs: cleanup

 - Remove CONFIG_DEFAULT_HOSTNAME from defconfigs

   There's no reason to set the same hostname to all ARC boards
   by default. It usually gets overwritten by init scripts anyways.

 - Remove disabled CONFIG_DEVKMEM from defconfigs

   It is disabled by default

Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/configs/axs101_defconfig          | 2 --
 arch/arc/configs/axs103_defconfig          | 2 --
 arch/arc/configs/axs103_smp_defconfig      | 2 --
 arch/arc/configs/haps_hs_defconfig         | 2 --
 arch/arc/configs/haps_hs_smp_defconfig     | 2 --
 arch/arc/configs/hsdk_defconfig            | 1 -
 arch/arc/configs/nps_defconfig             | 1 -
 arch/arc/configs/nsim_700_defconfig        | 2 --
 arch/arc/configs/nsim_hs_defconfig         | 2 --
 arch/arc/configs/nsim_hs_smp_defconfig     | 2 --
 arch/arc/configs/nsimosci_defconfig        | 2 --
 arch/arc/configs/nsimosci_hs_defconfig     | 2 --
 arch/arc/configs/nsimosci_hs_smp_defconfig | 2 --
 arch/arc/configs/tb10x_defconfig           | 1 -
 arch/arc/configs/vdk_hs38_defconfig        | 2 --
 arch/arc/configs/vdk_hs38_smp_defconfig    | 1 -
 16 files changed, 28 deletions(-)

diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig
index a635ea972304e..41a97eb7598dd 100644
--- a/arch/arc/configs/axs101_defconfig
+++ b/arch/arc/configs/axs101_defconfig
@@ -1,4 +1,3 @@
-CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
@@ -63,7 +62,6 @@ CONFIG_MOUSE_PS2_TOUCHKIT=y
 CONFIG_MOUSE_SERIAL=y
 CONFIG_MOUSE_SYNAPTICS_USB=y
 # CONFIG_LEGACY_PTYS is not set
-# CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_DW=y
diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig
index aa507e423075b..d8e2ca2385ccc 100644
--- a/arch/arc/configs/axs103_defconfig
+++ b/arch/arc/configs/axs103_defconfig
@@ -1,4 +1,3 @@
-CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
@@ -64,7 +63,6 @@ CONFIG_MOUSE_PS2_TOUCHKIT=y
 CONFIG_MOUSE_SERIAL=y
 CONFIG_MOUSE_SYNAPTICS_USB=y
 # CONFIG_LEGACY_PTYS is not set
-# CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_DW=y
diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig
index eba07f4686545..1e729b9726cd3 100644
--- a/arch/arc/configs/axs103_smp_defconfig
+++ b/arch/arc/configs/axs103_smp_defconfig
@@ -1,4 +1,3 @@
-CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
@@ -65,7 +64,6 @@ CONFIG_MOUSE_PS2_TOUCHKIT=y
 CONFIG_MOUSE_SERIAL=y
 CONFIG_MOUSE_SYNAPTICS_USB=y
 # CONFIG_LEGACY_PTYS is not set
-# CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_DW=y
diff --git a/arch/arc/configs/haps_hs_defconfig b/arch/arc/configs/haps_hs_defconfig
index 098b19fbaa51f..240dd2cd51485 100644
--- a/arch/arc/configs/haps_hs_defconfig
+++ b/arch/arc/configs/haps_hs_defconfig
@@ -1,4 +1,3 @@
-CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
@@ -57,7 +56,6 @@ CONFIG_MOUSE_PS2_TOUCHKIT=y
 # CONFIG_SERIO_SERPORT is not set
 CONFIG_SERIO_ARC_PS2=y
 # CONFIG_LEGACY_PTYS is not set
-# CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_NR_UARTS=1
diff --git a/arch/arc/configs/haps_hs_smp_defconfig b/arch/arc/configs/haps_hs_smp_defconfig
index 0104c404d8970..14ae7e5acc7c9 100644
--- a/arch/arc/configs/haps_hs_smp_defconfig
+++ b/arch/arc/configs/haps_hs_smp_defconfig
@@ -1,4 +1,3 @@
-CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
@@ -60,7 +59,6 @@ CONFIG_MOUSE_PS2_TOUCHKIT=y
 # CONFIG_SERIO_SERPORT is not set
 CONFIG_SERIO_ARC_PS2=y
 # CONFIG_LEGACY_PTYS is not set
-# CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_NR_UARTS=1
diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig
index 6491be0ddbc9e..1dec2b4bc5e6e 100644
--- a/arch/arc/configs/hsdk_defconfig
+++ b/arch/arc/configs/hsdk_defconfig
@@ -1,4 +1,3 @@
-CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 CONFIG_SYSVIPC=y
 # CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_NO_HZ_IDLE=y
diff --git a/arch/arc/configs/nps_defconfig b/arch/arc/configs/nps_defconfig
index 7c9c706ae7f66..31ba224bbfb47 100644
--- a/arch/arc/configs/nps_defconfig
+++ b/arch/arc/configs/nps_defconfig
@@ -59,7 +59,6 @@ CONFIG_NETCONSOLE=y
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
 # CONFIG_LEGACY_PTYS is not set
-# CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_NR_UARTS=1
diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig
index 99e05cf63fca2..8e0b8b134cd9e 100644
--- a/arch/arc/configs/nsim_700_defconfig
+++ b/arch/arc/configs/nsim_700_defconfig
@@ -1,5 +1,4 @@
 # CONFIG_LOCALVERSION_AUTO is not set
-CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
@@ -44,7 +43,6 @@ CONFIG_LXT_PHY=y
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
 # CONFIG_LEGACY_PTYS is not set
-# CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_ARC=y
 CONFIG_SERIAL_ARC_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig
index 0dc4f9b737e7a..739b90e5e8931 100644
--- a/arch/arc/configs/nsim_hs_defconfig
+++ b/arch/arc/configs/nsim_hs_defconfig
@@ -1,5 +1,4 @@
 # CONFIG_LOCALVERSION_AUTO is not set
-CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
@@ -45,7 +44,6 @@ CONFIG_DEVTMPFS=y
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
 # CONFIG_LEGACY_PTYS is not set
-# CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_ARC=y
 CONFIG_SERIAL_ARC_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig
index be3c30a15e54c..b5895bdf3a939 100644
--- a/arch/arc/configs/nsim_hs_smp_defconfig
+++ b/arch/arc/configs/nsim_hs_smp_defconfig
@@ -1,5 +1,4 @@
 # CONFIG_LOCALVERSION_AUTO is not set
-CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
 # CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_HIGH_RES_TIMERS=y
@@ -44,7 +43,6 @@ CONFIG_DEVTMPFS=y
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
 # CONFIG_LEGACY_PTYS is not set
-# CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_ARC=y
 CONFIG_SERIAL_ARC_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig
index 3a74b9b217723..f14eeff7d3084 100644
--- a/arch/arc/configs/nsimosci_defconfig
+++ b/arch/arc/configs/nsimosci_defconfig
@@ -1,5 +1,4 @@
 # CONFIG_LOCALVERSION_AUTO is not set
-CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
 # CONFIG_CROSS_MEMORY_ATTACH is not set
@@ -48,7 +47,6 @@ CONFIG_MOUSE_PS2_TOUCHKIT=y
 # CONFIG_SERIO_SERPORT is not set
 CONFIG_SERIO_ARC_PS2=y
 # CONFIG_LEGACY_PTYS is not set
-# CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_NR_UARTS=1
diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig
index ea2834b4dc1da..025298a483056 100644
--- a/arch/arc/configs/nsimosci_hs_defconfig
+++ b/arch/arc/configs/nsimosci_hs_defconfig
@@ -1,5 +1,4 @@
 # CONFIG_LOCALVERSION_AUTO is not set
-CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
 # CONFIG_CROSS_MEMORY_ATTACH is not set
@@ -47,7 +46,6 @@ CONFIG_MOUSE_PS2_TOUCHKIT=y
 # CONFIG_SERIO_SERPORT is not set
 CONFIG_SERIO_ARC_PS2=y
 # CONFIG_LEGACY_PTYS is not set
-# CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_NR_UARTS=1
diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig
index 80a5a1b4924bc..df7b77b13b823 100644
--- a/arch/arc/configs/nsimosci_hs_smp_defconfig
+++ b/arch/arc/configs/nsimosci_hs_smp_defconfig
@@ -1,4 +1,3 @@
-CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
 # CONFIG_CROSS_MEMORY_ATTACH is not set
@@ -58,7 +57,6 @@ CONFIG_MOUSE_PS2_TOUCHKIT=y
 # CONFIG_SERIO_SERPORT is not set
 CONFIG_SERIO_ARC_PS2=y
 # CONFIG_LEGACY_PTYS is not set
-# CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_NR_UARTS=1
diff --git a/arch/arc/configs/tb10x_defconfig b/arch/arc/configs/tb10x_defconfig
index 2cc87f909747c..a7f65313f84a5 100644
--- a/arch/arc/configs/tb10x_defconfig
+++ b/arch/arc/configs/tb10x_defconfig
@@ -57,7 +57,6 @@ CONFIG_STMMAC_ETH=y
 # CONFIG_SERIO is not set
 # CONFIG_VT is not set
 # CONFIG_LEGACY_PTYS is not set
-# CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_NR_UARTS=1
diff --git a/arch/arc/configs/vdk_hs38_defconfig b/arch/arc/configs/vdk_hs38_defconfig
index f629493929ea6..db47c3541f159 100644
--- a/arch/arc/configs/vdk_hs38_defconfig
+++ b/arch/arc/configs/vdk_hs38_defconfig
@@ -1,5 +1,4 @@
 # CONFIG_LOCALVERSION_AUTO is not set
-CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_IKCONFIG=y
@@ -53,7 +52,6 @@ CONFIG_NATIONAL_PHY=y
 CONFIG_MOUSE_PS2_TOUCHKIT=y
 CONFIG_SERIO_ARC_PS2=y
 # CONFIG_LEGACY_PTYS is not set
-# CONFIG_DEVKMEM is not set
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
 CONFIG_SERIAL_8250_DW=y
diff --git a/arch/arc/configs/vdk_hs38_smp_defconfig b/arch/arc/configs/vdk_hs38_smp_defconfig
index 21f0ca26a05d5..a8ac5e917d9a5 100644
--- a/arch/arc/configs/vdk_hs38_smp_defconfig
+++ b/arch/arc/configs/vdk_hs38_smp_defconfig
@@ -1,5 +1,4 @@
 # CONFIG_LOCALVERSION_AUTO is not set
-CONFIG_DEFAULT_HOSTNAME="ARCLinux"
 # CONFIG_CROSS_MEMORY_ATTACH is not set
 CONFIG_HIGH_RES_TIMERS=y
 CONFIG_IKCONFIG=y

From 5c0920897af59779546e9ea0e89c5db45c8aff33 Mon Sep 17 00:00:00 2001
From: Alexey Brodkin <abrodkin@synopsys.com>
Date: Thu, 2 Aug 2018 13:19:37 +0300
Subject: [PATCH 016/257] ARC: [plat-axs*/plat-hsdk]: Allow U-Boot to pass
 MAC-address to the kernel

Otherwise kernel uses random MAC which is not very conveniet.
With that change in place use might set desired MAC in U-Boot
with "setenv ethaddr 11:22:33:44:55:66", save environment and
then from boot to boot the same MAC will be used by the kernel.

One other note for this to happen it's required to pass
board's .dtb in U-Boot's "bootm" command like that:
------------------->8-----------------
bootm 0x82000000 - 0x84000000
------------------->8-----------------

Here 0x82000000 is location of uImage while
0x80000000 is location of either axs10x.dtb or hsdk.dtb
previously loaded from SD-card, USB storage or TFTP server.

Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: stable@vger.kernel.org # 4.14
Cc: devicetree@vger.kernel.org
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/boot/dts/axs10x_mb.dtsi | 7 ++++++-
 arch/arc/boot/dts/hsdk.dts       | 7 ++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi
index 47b74fbc403c2..37bafd44e36d0 100644
--- a/arch/arc/boot/dts/axs10x_mb.dtsi
+++ b/arch/arc/boot/dts/axs10x_mb.dtsi
@@ -9,6 +9,10 @@
  */
 
 / {
+	aliases {
+		ethernet = &gmac;
+	};
+
 	axs10x_mb {
 		compatible = "simple-bus";
 		#address-cells = <1>;
@@ -68,7 +72,7 @@
 			};
 		};
 
-		ethernet@0x18000 {
+		gmac: ethernet@0x18000 {
 			#interrupt-cells = <1>;
 			compatible = "snps,dwmac";
 			reg = < 0x18000 0x2000 >;
@@ -81,6 +85,7 @@
 			max-speed = <100>;
 			resets = <&creg_rst 5>;
 			reset-names = "stmmaceth";
+			mac-address = [00 00 00 00 00 00]; /* Filled in by U-Boot */
 		};
 
 		ehci@0x40000 {
diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts
index 006aa3de5348f..d00f283094d31 100644
--- a/arch/arc/boot/dts/hsdk.dts
+++ b/arch/arc/boot/dts/hsdk.dts
@@ -25,6 +25,10 @@
 		bootargs = "earlycon=uart8250,mmio32,0xf0005000,115200n8 console=ttyS0,115200n8 debug print-fatal-signals=1";
 	};
 
+	aliases {
+		ethernet = &gmac;
+	};
+
 	cpus {
 		#address-cells = <1>;
 		#size-cells = <0>;
@@ -163,7 +167,7 @@
 			#clock-cells = <0>;
 		};
 
-		ethernet@8000 {
+		gmac: ethernet@8000 {
 			#interrupt-cells = <1>;
 			compatible = "snps,dwmac";
 			reg = <0x8000 0x2000>;
@@ -176,6 +180,7 @@
 			phy-handle = <&phy0>;
 			resets = <&cgu_rst HSDK_ETH_RESET>;
 			reset-names = "stmmaceth";
+			mac-address = [00 00 00 00 00 00]; /* Filled in by U-Boot */
 
 			mdio {
 				#address-cells = <1>;

From c83532fb0fe053d2e43e9387354cb1b52ba26427 Mon Sep 17 00:00:00 2001
From: Alexey Brodkin <abrodkin@synopsys.com>
Date: Thu, 2 Aug 2018 11:50:16 +0300
Subject: [PATCH 017/257] ARC: [plat-axs*]: Enable SWAP

SWAP support on ARC was fixed earlier by
commit 6e3761145a9b ("ARC: Fix CONFIG_SWAP")
so now we may safely enable it on platforms that
have external media like USB and SD-card.

Note: it was already allowed for HSDK

Signed-off-by: Alexey Brodkin <abrodkin@synopsys.com>
Cc: stable@vger.kernel.org # 6e3761145a9b: ARC: Fix CONFIG_SWAP
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/configs/axs101_defconfig     | 1 -
 arch/arc/configs/axs103_defconfig     | 1 -
 arch/arc/configs/axs103_smp_defconfig | 1 -
 3 files changed, 3 deletions(-)

diff --git a/arch/arc/configs/axs101_defconfig b/arch/arc/configs/axs101_defconfig
index 41a97eb7598dd..41bc08be6a3b4 100644
--- a/arch/arc/configs/axs101_defconfig
+++ b/arch/arc/configs/axs101_defconfig
@@ -1,4 +1,3 @@
-# CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 # CONFIG_CROSS_MEMORY_ATTACH is not set
diff --git a/arch/arc/configs/axs103_defconfig b/arch/arc/configs/axs103_defconfig
index d8e2ca2385ccc..1e1c4a8011b52 100644
--- a/arch/arc/configs/axs103_defconfig
+++ b/arch/arc/configs/axs103_defconfig
@@ -1,4 +1,3 @@
-# CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 # CONFIG_CROSS_MEMORY_ATTACH is not set
diff --git a/arch/arc/configs/axs103_smp_defconfig b/arch/arc/configs/axs103_smp_defconfig
index 1e729b9726cd3..6b0c0cfd5c304 100644
--- a/arch/arc/configs/axs103_smp_defconfig
+++ b/arch/arc/configs/axs103_smp_defconfig
@@ -1,4 +1,3 @@
-# CONFIG_SWAP is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 # CONFIG_CROSS_MEMORY_ATTACH is not set

From 1e3bece2ded71eb85ac297a43002a942964e381d Mon Sep 17 00:00:00 2001
From: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Date: Mon, 6 Aug 2018 19:44:23 +0300
Subject: [PATCH 018/257] ARC: cleanup show_faulting_vma()

 - Remove unused variables
 - check return value of file_path

Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/kernel/troubleshoot.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c
index 783b20354f8bf..e8d9fb4523462 100644
--- a/arch/arc/kernel/troubleshoot.c
+++ b/arch/arc/kernel/troubleshoot.c
@@ -83,9 +83,6 @@ static void print_task_path_n_nm(struct task_struct *tsk, char *buf)
 static void show_faulting_vma(unsigned long address, char *buf)
 {
 	struct vm_area_struct *vma;
-	struct inode *inode;
-	unsigned long ino = 0;
-	dev_t dev = 0;
 	char *nm = buf;
 	struct mm_struct *active_mm = current->active_mm;
 
@@ -99,12 +96,10 @@ static void show_faulting_vma(unsigned long address, char *buf)
 	 * if the container VMA is not found
 	 */
 	if (vma && (vma->vm_start <= address)) {
-		struct file *file = vma->vm_file;
-		if (file) {
-			nm = file_path(file, buf, PAGE_SIZE - 1);
-			inode = file_inode(vma->vm_file);
-			dev = inode->i_sb->s_dev;
-			ino = inode->i_ino;
+		if (vma->vm_file) {
+			nm = file_path(vma->vm_file, buf, PAGE_SIZE - 1);
+			if (IS_ERR(nm))
+				nm = "?";
 		}
 		pr_info("    @off 0x%lx in [%s]\n"
 			"    VMA: 0x%08lx to 0x%08lx\n",

From c27d0e9045bbbabffdde2bdba74e9971c4194ac4 Mon Sep 17 00:00:00 2001
From: Vineet Gupta <vgupta@synopsys.com>
Date: Thu, 16 Aug 2018 10:20:33 -0700
Subject: [PATCH 019/257] ARC: sort Kconfig

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/Kconfig | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 6d5eb8267e429..b4441b0764d71 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -9,6 +9,7 @@
 config ARC
 	def_bool y
 	select ARC_TIMERS
+	select ARCH_HAS_PTE_SPECIAL
 	select ARCH_HAS_SYNC_DMA_FOR_CPU
 	select ARCH_HAS_SYNC_DMA_FOR_DEVICE
 	select ARCH_HAS_SG_CHAIN
@@ -28,8 +29,12 @@ config ARC
 	select GENERIC_SMP_IDLE_THREAD
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_TRACEHOOK
+	select HAVE_DEBUG_STACKOVERFLOW
 	select HAVE_FUTEX_CMPXCHG if FUTEX
+	select HAVE_GENERIC_DMA_COHERENT
 	select HAVE_IOREMAP_PROT
+	select HAVE_KERNEL_GZIP
+	select HAVE_KERNEL_LZMA
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
 	select HAVE_MEMBLOCK
@@ -44,11 +49,6 @@ config ARC
 	select OF_EARLY_FLATTREE
 	select OF_RESERVED_MEM
 	select PERF_USE_VMALLOC if ARC_CACHE_VIPT_ALIASING
-	select HAVE_DEBUG_STACKOVERFLOW
-	select HAVE_GENERIC_DMA_COHERENT
-	select HAVE_KERNEL_GZIP
-	select HAVE_KERNEL_LZMA
-	select ARCH_HAS_PTE_SPECIAL
 
 config ARCH_HAS_CACHE_LINE_SIZE
 	def_bool y

From 3fba68fa35a2cbda157c6f49f26eefccb2e10043 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 21 Aug 2018 13:44:07 +0200
Subject: [PATCH 020/257] scsi: core: Update SCSI_MQ_DEFAULT help text to match
 default

The default was changed, but the help text was not updated.

Fix grammar (s/the option/this option/) while at it.

[mkp: drop "new" as suggested by John Garry]

Fixes: d5038a13eca72fb2 ("scsi: core: switch to scsi-mq by default")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/Kconfig | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 8fc851a9e1162..7c097006c54db 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -52,12 +52,12 @@ config SCSI_MQ_DEFAULT
 	default y
 	depends on SCSI
 	---help---
-	  This option enables the new blk-mq based I/O path for SCSI
-	  devices by default.  With the option the scsi_mod.use_blk_mq
-	  module/boot option defaults to Y, without it to N, but it can
-	  still be overridden either way.
+	  This option enables the blk-mq based I/O path for SCSI devices by
+	  default.  With this option the scsi_mod.use_blk_mq module/boot
+	  option defaults to Y, without it to N, but it can still be
+	  overridden either way.
 
-	  If unsure say N.
+	  If unsure say Y.
 
 config SCSI_PROC_FS
 	bool "legacy /proc/scsi/ support"

From a7ccd92c8d2ac4eb168b621e086be2dc9b8344f6 Mon Sep 17 00:00:00 2001
From: John Pittman <jpittman@redhat.com>
Date: Thu, 23 Aug 2018 15:49:18 -0400
Subject: [PATCH 021/257] scsi: documentation: add scsi_mod.use_blk_mq to
 scsi-parameters

Kernel line argument scsi_mod.use_blk_mq is missing from file
Documentation/scsi/scsi-parameters.txt.  Add this option, providing
mention of config setting and format.

[mkp: clarified where to look]

Signed-off-by: John Pittman <jpittman@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 Documentation/scsi/scsi-parameters.txt | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Documentation/scsi/scsi-parameters.txt b/Documentation/scsi/scsi-parameters.txt
index 25a4b4cf04a6d..92999d4e0cb80 100644
--- a/Documentation/scsi/scsi-parameters.txt
+++ b/Documentation/scsi/scsi-parameters.txt
@@ -97,6 +97,11 @@ parameters may be changed at runtime by the command
 			allowing boot to proceed.  none ignores them, expecting
 			user space to do the scan.
 
+	scsi_mod.use_blk_mq=
+			[SCSI] use blk-mq I/O path by default
+			See SCSI_MQ_DEFAULT in drivers/scsi/Kconfig.
+			Format: <y/n>
+
 	sim710=		[SCSI,HW]
 			See header of drivers/scsi/sim710.c.
 

From 89809b028b6f54187b7d81a0c69b35d394c52e62 Mon Sep 17 00:00:00 2001
From: Varun Prakash <varun@chelsio.com>
Date: Sat, 11 Aug 2018 21:03:58 +0530
Subject: [PATCH 022/257] scsi: csiostor: add a check for NULL pointer after
 kmalloc()

Reported-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Varun Prakash <varun@chelsio.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/csiostor/csio_hw.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/csiostor/csio_hw.c b/drivers/scsi/csiostor/csio_hw.c
index 23d07e9f87d0d..6ff7c5580fcb6 100644
--- a/drivers/scsi/csiostor/csio_hw.c
+++ b/drivers/scsi/csiostor/csio_hw.c
@@ -2364,8 +2364,8 @@ static int csio_hw_prep_fw(struct csio_hw *hw, struct fw_info *fw_info,
 }
 
 /*
- * Returns -EINVAL if attempts to flash the firmware failed
- * else returns 0,
+ * Returns -EINVAL if attempts to flash the firmware failed,
+ * -ENOMEM if memory allocation failed else returns 0,
  * if flashing was not attempted because the card had the
  * latest firmware ECANCELED is returned
  */
@@ -2393,6 +2393,13 @@ csio_hw_flash_fw(struct csio_hw *hw, int *reset)
 		return -EINVAL;
 	}
 
+	/* allocate memory to read the header of the firmware on the
+	 * card
+	 */
+	card_fw = kmalloc(sizeof(*card_fw), GFP_KERNEL);
+	if (!card_fw)
+		return -ENOMEM;
+
 	if (csio_is_t5(pci_dev->device & CSIO_HW_CHIP_MASK))
 		fw_bin_file = FW_FNAME_T5;
 	else
@@ -2406,11 +2413,6 @@ csio_hw_flash_fw(struct csio_hw *hw, int *reset)
 		fw_size = fw->size;
 	}
 
-	/* allocate memory to read the header of the firmware on the
-	 * card
-	 */
-	card_fw = kmalloc(sizeof(*card_fw), GFP_KERNEL);
-
 	/* upgrade FW logic */
 	ret = csio_hw_prep_fw(hw, fw_info, fw_data, fw_size, card_fw,
 			 hw->fw_state, reset);

From 68bdc630721c40e908d22cffe07b5ca225a69f6e Mon Sep 17 00:00:00 2001
From: Varun Prakash <varun@chelsio.com>
Date: Sat, 11 Aug 2018 21:14:08 +0530
Subject: [PATCH 023/257] scsi: csiostor: fix incorrect port capabilities

 - use be32_to_cpu() instead of ntohs() for 32 bit port capabilities.

 - add a new function fwcaps32_to_caps16() to convert 32 bit port
   capabilities to 16 bit port capabilities.

Signed-off-by: Varun Prakash <varun@chelsio.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/csiostor/csio_hw.c | 55 ++++++++++++++++++++++++++-------
 drivers/scsi/csiostor/csio_hw.h |  1 +
 drivers/scsi/csiostor/csio_mb.c |  6 ++--
 3 files changed, 48 insertions(+), 14 deletions(-)

diff --git a/drivers/scsi/csiostor/csio_hw.c b/drivers/scsi/csiostor/csio_hw.c
index 6ff7c5580fcb6..e519238864758 100644
--- a/drivers/scsi/csiostor/csio_hw.c
+++ b/drivers/scsi/csiostor/csio_hw.c
@@ -1601,6 +1601,46 @@ fw_port_cap32_t fwcaps16_to_caps32(fw_port_cap16_t caps16)
 	return caps32;
 }
 
+/**
+ *	fwcaps32_to_caps16 - convert 32-bit Port Capabilities to 16-bits
+ *	@caps32: a 32-bit Port Capabilities value
+ *
+ *	Returns the equivalent 16-bit Port Capabilities value.  Note that
+ *	not all 32-bit Port Capabilities can be represented in the 16-bit
+ *	Port Capabilities and some fields/values may not make it.
+ */
+fw_port_cap16_t fwcaps32_to_caps16(fw_port_cap32_t caps32)
+{
+	fw_port_cap16_t caps16 = 0;
+
+	#define CAP32_TO_CAP16(__cap) \
+		do { \
+			if (caps32 & FW_PORT_CAP32_##__cap) \
+				caps16 |= FW_PORT_CAP_##__cap; \
+		} while (0)
+
+	CAP32_TO_CAP16(SPEED_100M);
+	CAP32_TO_CAP16(SPEED_1G);
+	CAP32_TO_CAP16(SPEED_10G);
+	CAP32_TO_CAP16(SPEED_25G);
+	CAP32_TO_CAP16(SPEED_40G);
+	CAP32_TO_CAP16(SPEED_100G);
+	CAP32_TO_CAP16(FC_RX);
+	CAP32_TO_CAP16(FC_TX);
+	CAP32_TO_CAP16(802_3_PAUSE);
+	CAP32_TO_CAP16(802_3_ASM_DIR);
+	CAP32_TO_CAP16(ANEG);
+	CAP32_TO_CAP16(FORCE_PAUSE);
+	CAP32_TO_CAP16(MDIAUTO);
+	CAP32_TO_CAP16(MDISTRAIGHT);
+	CAP32_TO_CAP16(FEC_RS);
+	CAP32_TO_CAP16(FEC_BASER_RS);
+
+	#undef CAP32_TO_CAP16
+
+	return caps16;
+}
+
 /**
  *      lstatus_to_fwcap - translate old lstatus to 32-bit Port Capabilities
  *      @lstatus: old FW_PORT_ACTION_GET_PORT_INFO lstatus value
@@ -1759,7 +1799,7 @@ csio_enable_ports(struct csio_hw *hw)
 			val = 1;
 
 			csio_mb_params(hw, mbp, CSIO_MB_DEFAULT_TMO,
-				       hw->pfn, 0, 1, &param, &val, false,
+				       hw->pfn, 0, 1, &param, &val, true,
 				       NULL);
 
 			if (csio_mb_issue(hw, mbp)) {
@@ -1769,16 +1809,9 @@ csio_enable_ports(struct csio_hw *hw)
 				return -EINVAL;
 			}
 
-			csio_mb_process_read_params_rsp(hw, mbp, &retval, 1,
-							&val);
-			if (retval != FW_SUCCESS) {
-				csio_err(hw, "FW_PARAMS_CMD(r) port:%d failed: 0x%x\n",
-					 portid, retval);
-				mempool_free(mbp, hw->mb_mempool);
-				return -EINVAL;
-			}
-
-			fw_caps = val;
+			csio_mb_process_read_params_rsp(hw, mbp, &retval,
+							0, NULL);
+			fw_caps = retval ? FW_CAPS16 : FW_CAPS32;
 		}
 
 		/* Read PORT information */
diff --git a/drivers/scsi/csiostor/csio_hw.h b/drivers/scsi/csiostor/csio_hw.h
index 9e73ef771eb73..e351af6e7c81c 100644
--- a/drivers/scsi/csiostor/csio_hw.h
+++ b/drivers/scsi/csiostor/csio_hw.h
@@ -639,6 +639,7 @@ int csio_handle_intr_status(struct csio_hw *, unsigned int,
 
 fw_port_cap32_t fwcap_to_fwspeed(fw_port_cap32_t acaps);
 fw_port_cap32_t fwcaps16_to_caps32(fw_port_cap16_t caps16);
+fw_port_cap16_t fwcaps32_to_caps16(fw_port_cap32_t caps32);
 fw_port_cap32_t lstatus_to_fwcap(u32 lstatus);
 
 int csio_hw_start(struct csio_hw *);
diff --git a/drivers/scsi/csiostor/csio_mb.c b/drivers/scsi/csiostor/csio_mb.c
index c026417269c3c..6f13673d6aa05 100644
--- a/drivers/scsi/csiostor/csio_mb.c
+++ b/drivers/scsi/csiostor/csio_mb.c
@@ -368,7 +368,7 @@ csio_mb_port(struct csio_hw *hw, struct csio_mb *mbp, uint32_t tmo,
 			FW_CMD_LEN16_V(sizeof(*cmdp) / 16));
 
 	if (fw_caps == FW_CAPS16)
-		cmdp->u.l1cfg.rcap = cpu_to_be32(fc);
+		cmdp->u.l1cfg.rcap = cpu_to_be32(fwcaps32_to_caps16(fc));
 	else
 		cmdp->u.l1cfg32.rcap32 = cpu_to_be32(fc);
 }
@@ -395,8 +395,8 @@ csio_mb_process_read_port_rsp(struct csio_hw *hw, struct csio_mb *mbp,
 			*pcaps = fwcaps16_to_caps32(ntohs(rsp->u.info.pcap));
 			*acaps = fwcaps16_to_caps32(ntohs(rsp->u.info.acap));
 		} else {
-			*pcaps = ntohs(rsp->u.info32.pcaps32);
-			*acaps = ntohs(rsp->u.info32.acaps32);
+			*pcaps = be32_to_cpu(rsp->u.info32.pcaps32);
+			*acaps = be32_to_cpu(rsp->u.info32.acaps32);
 		}
 	}
 }

From 9abd9990e9779dc9c548c3599aaca7e3505ab19d Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Tue, 14 Aug 2018 12:55:05 -0700
Subject: [PATCH 024/257] scsi: lpfc: Default fdmi_on to on

Change default behavior for fdmi registration to on.

[mkp: patch was mangled]

Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_attr.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 5a25553415f8b..057a60abe664d 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -5122,16 +5122,16 @@ LPFC_ATTR_R(enable_SmartSAN, 0, 0, 1, "Enable SmartSAN functionality");
 
 /*
 # lpfc_fdmi_on: Controls FDMI support.
-#       0       No FDMI support (default)
-#       1       Traditional FDMI support
+#       0       No FDMI support
+#       1       Traditional FDMI support (default)
 # Traditional FDMI support means the driver will assume FDMI-2 support;
 # however, if that fails, it will fallback to FDMI-1.
 # If lpfc_enable_SmartSAN is set to 1, the driver ignores lpfc_fdmi_on.
 # If lpfc_enable_SmartSAN is set 0, the driver uses the current value of
 # lpfc_fdmi_on.
-# Value range [0,1]. Default value is 0.
+# Value range [0,1]. Default value is 1.
 */
-LPFC_ATTR_R(fdmi_on, 0, 0, 1, "Enable FDMI support");
+LPFC_ATTR_R(fdmi_on, 1, 0, 1, "Enable FDMI support");
 
 /*
 # Specifies the maximum number of ELS cmds we can have outstanding (for

From 53e13ee087a80e8d4fc95436318436e5c2c1f8c2 Mon Sep 17 00:00:00 2001
From: James Smart <jsmart2021@gmail.com>
Date: Thu, 16 Aug 2018 16:04:05 -0700
Subject: [PATCH 025/257] scsi: lpfc: Correct MDS diag and nvmet configuration

A recent change added some MDS processing in the lpfc_drain_txq routine
that relies on the fcp_wq being allocated. For nvmet operation the fcp_wq
is not allocated because it can only be an nvme-target.  When the original
MDS support was added LS_MDS_LOOPBACK was defined wrong, (0x16) it should
have been 0x10 (decimal value used for hex setting). This incorrect value
allowed MDS_LOOPBACK to be set simultaneously with LS_NPIV_FAB_SUPPORTED,
causing the driver to crash when it accesses the non-existent fcp_wq.

Correct the bad value setting for LS_MDS_LOOPBACK.

Fixes: 	ae9e28f36a6c  ("lpfc: Add MDS Diagnostic support.")
Cc: <stable@vger.kernel.org> # v4.12+
Signed-off-by: Dick Kennedy <dick.kennedy@broadcom.com>
Signed-off-by: James Smart <james.smart@broadcom.com>
Tested-by: Ewan D. Milne <emilne@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index e0d0da5f43d61..43732e8d13473 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -672,7 +672,7 @@ struct lpfc_hba {
 #define LS_NPIV_FAB_SUPPORTED 0x2	/* Fabric supports NPIV */
 #define LS_IGNORE_ERATT       0x4	/* intr handler should ignore ERATT */
 #define LS_MDS_LINK_DOWN      0x8	/* MDS Diagnostics Link Down */
-#define LS_MDS_LOOPBACK      0x16	/* MDS Diagnostics Link Up (Loopback) */
+#define LS_MDS_LOOPBACK      0x10	/* MDS Diagnostics Link Up (Loopback) */
 
 	uint32_t hba_flag;	/* hba generic flags */
 #define HBA_ERATT_HANDLED	0x1 /* This flag is set when eratt handled */

From eb53a3ea3e009578a388f106620b22b1707cf2f6 Mon Sep 17 00:00:00 2001
From: Martin Wilck <mwilck@suse.com>
Date: Wed, 22 Aug 2018 13:25:44 +0200
Subject: [PATCH 026/257] scsi: hpsa: limit transfer length to 1MB, not 512kB

e2c7b43 was supposed to limit transfer length to 1MB, but got the unit of
max_sectors wrong.

Fixes: e2c7b433f729 ("scsi: hpsa: limit transfer length to 1MB")
Signed-off-by: Martin Wilck <mwilck@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hpsa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 58bb70b886d70..c120929d4ffe5 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -976,7 +976,7 @@ static struct scsi_host_template hpsa_driver_template = {
 #endif
 	.sdev_attrs = hpsa_sdev_attrs,
 	.shost_attrs = hpsa_shost_attrs,
-	.max_sectors = 1024,
+	.max_sectors = 2048,
 	.no_write_same = 1,
 };
 

From cedefa8544c6be216b4710575065e3a11065f8d0 Mon Sep 17 00:00:00 2001
From: Varun Prakash <varun@chelsio.com>
Date: Sat, 11 Aug 2018 21:10:29 +0530
Subject: [PATCH 027/257] scsi: target: iscsi: cxgbit: use pr_debug() instead
 of pr_info()

DDP programming happens in data path and it can fail because of lack of
resources so use pr_debug() instead of pr_info() for this case.

Signed-off-by: Varun Prakash <varun@chelsio.com>
Reviewed-by: Mike Christie <mchristi@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/iscsi/cxgbit/cxgbit_ddp.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/target/iscsi/cxgbit/cxgbit_ddp.c b/drivers/target/iscsi/cxgbit/cxgbit_ddp.c
index 768cce0ccb807..76a262674c8dc 100644
--- a/drivers/target/iscsi/cxgbit/cxgbit_ddp.c
+++ b/drivers/target/iscsi/cxgbit/cxgbit_ddp.c
@@ -207,8 +207,8 @@ cxgbit_ddp_reserve(struct cxgbit_sock *csk, struct cxgbi_task_tag_info *ttinfo,
 	ret = dma_map_sg(&ppm->pdev->dev, sgl, sgcnt, DMA_FROM_DEVICE);
 	sgl->offset = sg_offset;
 	if (!ret) {
-		pr_info("%s: 0x%x, xfer %u, sgl %u dma mapping err.\n",
-			__func__, 0, xferlen, sgcnt);
+		pr_debug("%s: 0x%x, xfer %u, sgl %u dma mapping err.\n",
+			 __func__, 0, xferlen, sgcnt);
 		goto rel_ppods;
 	}
 
@@ -250,8 +250,8 @@ cxgbit_get_r2t_ttt(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 
 	ret = cxgbit_ddp_reserve(csk, ttinfo, cmd->se_cmd.data_length);
 	if (ret < 0) {
-		pr_info("csk 0x%p, cmd 0x%p, xfer len %u, sgcnt %u no ddp.\n",
-			csk, cmd, cmd->se_cmd.data_length, ttinfo->nents);
+		pr_debug("csk 0x%p, cmd 0x%p, xfer len %u, sgcnt %u no ddp.\n",
+			 csk, cmd, cmd->se_cmd.data_length, ttinfo->nents);
 
 		ttinfo->sgl = NULL;
 		ttinfo->nents = 0;

From 4e8065aa6c6f50765290be27ab8a64a4e44cb009 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 23 Aug 2018 23:23:06 +0200
Subject: [PATCH 028/257] scsi: libata: Add missing newline at end of file

With gcc 4.1.2:

    drivers/ata/libata-core.c:7396:33: warning: no newline at end of file

Fixes: 2fa4a32613c9182b ("scsi: libsas: dynamically allocate and free ata host")
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ata/libata-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 172e32840256b..599e01bcdef22 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -7394,4 +7394,4 @@ EXPORT_SYMBOL_GPL(ata_cable_unknown);
 EXPORT_SYMBOL_GPL(ata_cable_ignore);
 EXPORT_SYMBOL_GPL(ata_cable_sata);
 EXPORT_SYMBOL_GPL(ata_host_get);
-EXPORT_SYMBOL_GPL(ata_host_put);
\ No newline at end of file
+EXPORT_SYMBOL_GPL(ata_host_put);

From 23aa8e69f2c6ceb0bdca52f4450ad1f45675ca73 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 27 Aug 2018 15:24:42 +0800
Subject: [PATCH 029/257] Revert "scsi: core: fix scsi_host_queue_ready"

This reverts commit 265d59aacbce7e50bdc1f5d25033c38dd70b3767.

There is fundamental issue in commit 328728630d9f2bf1 (scsi: core: avoid
host-wide host_busy counter for scsi_mq) because SCSI's host busy counter
may not be same with counter of blk-mq's inflight tags, especially in case
of none io scheduler.

So revert this commit first.

Cc: Omar Sandoval <osandov@fb.com>,
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>,
Cc: James Bottomley <james.bottomley@hansenpartnership.com>,
Cc: Christoph Hellwig <hch@lst.de>,
Cc: Don Brace <don.brace@microsemi.com>
Cc: Kashyap Desai <kashyap.desai@broadcom.com>
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Laurence Oberman <loberman@redhat.com>
Cc: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Jens Axboe <axboe@kernel.dk>
Reported-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_lib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 0adfb3bce0fd6..1046679f5473c 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1611,7 +1611,7 @@ static inline int scsi_host_queue_ready(struct request_queue *q,
 	else
 		busy = 0;
 	if (atomic_read(&shost->host_blocked) > 0) {
-		if (busy)
+		if (busy || scsi_host_busy(shost))
 			goto starved;
 
 		/*

From d772a65d8a6c45c376a8200a38f7f82fb480af6a Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 27 Aug 2018 15:24:43 +0800
Subject: [PATCH 030/257] Revert "scsi: core: avoid host-wide host_busy counter
 for scsi_mq"

This reverts commit 328728630d9f2bf14b82ca30b5e47489beefe361.

There is fundamental issue in commit 328728630d9f2bf1 (scsi: core: avoid
host-wide host_busy counter for scsi_mq) because SCSI's host busy counter
may not be same with counter of blk-mq's inflight tags, especially in case
of none io scheduler.

We may switch to other approach for addressing this scsi_mq's performance
issue, such as percpu counter or kind of ways, so revert this commit first
for fixing this kind of issue in EH path, as reported by Jens.

Cc: Omar Sandoval <osandov@fb.com>,
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>,
Cc: James Bottomley <james.bottomley@hansenpartnership.com>,
Cc: Christoph Hellwig <hch@lst.de>,
Cc: Don Brace <don.brace@microsemi.com>
Cc: Kashyap Desai <kashyap.desai@broadcom.com>
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Laurence Oberman <loberman@redhat.com>
Cc: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Reported-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hosts.c    | 24 +-----------------------
 drivers/scsi/scsi_lib.c | 23 ++++++-----------------
 2 files changed, 7 insertions(+), 40 deletions(-)

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index f02dcc875a09a..ea4b0bb0c1cd4 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -563,35 +563,13 @@ struct Scsi_Host *scsi_host_get(struct Scsi_Host *shost)
 }
 EXPORT_SYMBOL(scsi_host_get);
 
-struct scsi_host_mq_in_flight {
-	int cnt;
-};
-
-static void scsi_host_check_in_flight(struct request *rq, void *data,
-		bool reserved)
-{
-	struct scsi_host_mq_in_flight *in_flight = data;
-
-	if (blk_mq_request_started(rq))
-		in_flight->cnt++;
-}
-
 /**
  * scsi_host_busy - Return the host busy counter
  * @shost:	Pointer to Scsi_Host to inc.
  **/
 int scsi_host_busy(struct Scsi_Host *shost)
 {
-	struct scsi_host_mq_in_flight in_flight = {
-		.cnt = 0,
-	};
-
-	if (!shost->use_blk_mq)
-		return atomic_read(&shost->host_busy);
-
-	blk_mq_tagset_busy_iter(&shost->tag_set, scsi_host_check_in_flight,
-			&in_flight);
-	return in_flight.cnt;
+	return atomic_read(&shost->host_busy);
 }
 EXPORT_SYMBOL(scsi_host_busy);
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 1046679f5473c..eb97d2dd36516 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -345,8 +345,7 @@ static void scsi_dec_host_busy(struct Scsi_Host *shost)
 	unsigned long flags;
 
 	rcu_read_lock();
-	if (!shost->use_blk_mq)
-		atomic_dec(&shost->host_busy);
+	atomic_dec(&shost->host_busy);
 	if (unlikely(scsi_host_in_recovery(shost))) {
 		spin_lock_irqsave(shost->host_lock, flags);
 		if (shost->host_failed || shost->host_eh_scheduled)
@@ -445,12 +444,7 @@ static inline bool scsi_target_is_busy(struct scsi_target *starget)
 
 static inline bool scsi_host_is_busy(struct Scsi_Host *shost)
 {
-	/*
-	 * blk-mq can handle host queue busy efficiently via host-wide driver
-	 * tag allocation
-	 */
-
-	if (!shost->use_blk_mq && shost->can_queue > 0 &&
+	if (shost->can_queue > 0 &&
 	    atomic_read(&shost->host_busy) >= shost->can_queue)
 		return true;
 	if (atomic_read(&shost->host_blocked) > 0)
@@ -1606,12 +1600,9 @@ static inline int scsi_host_queue_ready(struct request_queue *q,
 	if (scsi_host_in_recovery(shost))
 		return 0;
 
-	if (!shost->use_blk_mq)
-		busy = atomic_inc_return(&shost->host_busy) - 1;
-	else
-		busy = 0;
+	busy = atomic_inc_return(&shost->host_busy) - 1;
 	if (atomic_read(&shost->host_blocked) > 0) {
-		if (busy || scsi_host_busy(shost))
+		if (busy)
 			goto starved;
 
 		/*
@@ -1625,7 +1616,7 @@ static inline int scsi_host_queue_ready(struct request_queue *q,
 				     "unblocking host at zero depth\n"));
 	}
 
-	if (!shost->use_blk_mq && shost->can_queue > 0 && busy >= shost->can_queue)
+	if (shost->can_queue > 0 && busy >= shost->can_queue)
 		goto starved;
 	if (shost->host_self_blocked)
 		goto starved;
@@ -1711,9 +1702,7 @@ static void scsi_kill_request(struct request *req, struct request_queue *q)
 	 * with the locks as normal issue path does.
 	 */
 	atomic_inc(&sdev->device_busy);
-
-	if (!shost->use_blk_mq)
-		atomic_inc(&shost->host_busy);
+	atomic_inc(&shost->host_busy);
 	if (starget->can_queue > 0)
 		atomic_inc(&starget->target_busy);
 

From b9eb3b14f1dbf16bf27b6c1ffe6b8c00ec945c9b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Mon, 27 Aug 2018 12:23:01 +0300
Subject: [PATCH 031/257] scsi: aacraid: fix a signedness bug

The problem is that ->reset_state is a u8 but it can be set to -1 or -2 in
aac_tmf_callback() and the error handling in aac_eh_target_reset() relies
on it to be signed.

[mkp: fixed typo]

Fixes: 0d643ff3c353 ("scsi: aacraid: use aac_tmf_callback for reset fib")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/aacraid/aacraid.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index 29bf1e60f5428..39eb415987fc9 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -1346,7 +1346,7 @@ struct fib {
 struct aac_hba_map_info {
 	__le32	rmw_nexus;		/* nexus for native HBA devices */
 	u8		devtype;	/* device type */
-	u8		reset_state;	/* 0 - no reset, 1..x - */
+	s8		reset_state;	/* 0 - no reset, 1..x - */
 					/* after xth TM LUN reset */
 	u16		qd_limit;
 	u32		scan_counter;

From bab1be79a5169ac748d8292b20c86d874022d7ba Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 27 Aug 2018 18:38:31 +0800
Subject: [PATCH 032/257] sctp: hold transport before accessing its asoc in
 sctp_transport_get_next

As Marcelo noticed, in sctp_transport_get_next, it is iterating over
transports but then also accessing the association directly, without
checking any refcnts before that, which can cause an use-after-free
Read.

So fix it by holding transport before accessing the association. With
that, sctp_transport_hold calls can be removed in the later places.

Fixes: 626d16f50f39 ("sctp: export some apis or variables for sctp_diag and reuse some for proc")
Reported-by: syzbot+fe62a0c9aa6a85c6de16@syzkaller.appspotmail.com
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/proc.c   |  4 ----
 net/sctp/socket.c | 22 +++++++++++++++-------
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index ef5c9a82d4e89..4d6f1c8d66596 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -264,8 +264,6 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v)
 	}
 
 	transport = (struct sctp_transport *)v;
-	if (!sctp_transport_hold(transport))
-		return 0;
 	assoc = transport->asoc;
 	epb = &assoc->base;
 	sk = epb->sk;
@@ -322,8 +320,6 @@ static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
 	}
 
 	transport = (struct sctp_transport *)v;
-	if (!sctp_transport_hold(transport))
-		return 0;
 	assoc = transport->asoc;
 
 	list_for_each_entry_rcu(tsp, &assoc->peer.transport_addr_list,
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index e96b15a66abaa..aa76586a1a1c8 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5005,9 +5005,14 @@ struct sctp_transport *sctp_transport_get_next(struct net *net,
 			break;
 		}
 
+		if (!sctp_transport_hold(t))
+			continue;
+
 		if (net_eq(sock_net(t->asoc->base.sk), net) &&
 		    t->asoc->peer.primary_path == t)
 			break;
+
+		sctp_transport_put(t);
 	}
 
 	return t;
@@ -5017,13 +5022,18 @@ struct sctp_transport *sctp_transport_get_idx(struct net *net,
 					      struct rhashtable_iter *iter,
 					      int pos)
 {
-	void *obj = SEQ_START_TOKEN;
+	struct sctp_transport *t;
 
-	while (pos && (obj = sctp_transport_get_next(net, iter)) &&
-	       !IS_ERR(obj))
-		pos--;
+	if (!pos)
+		return SEQ_START_TOKEN;
 
-	return obj;
+	while ((t = sctp_transport_get_next(net, iter)) && !IS_ERR(t)) {
+		if (!--pos)
+			break;
+		sctp_transport_put(t);
+	}
+
+	return t;
 }
 
 int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *),
@@ -5082,8 +5092,6 @@ int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *),
 
 	tsp = sctp_transport_get_idx(net, &hti, *pos + 1);
 	for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) {
-		if (!sctp_transport_hold(tsp))
-			continue;
 		ret = cb(tsp, p);
 		if (ret)
 			break;

From 834539e69a5fe2aab33cc777ccfd4a4fcc5b9770 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 27 Aug 2018 18:40:18 +0800
Subject: [PATCH 033/257] sctp: remove useless start_fail from sctp_ht_iter in
 proc

After changing rhashtable_walk_start to return void, start_fail would
never be set other value than 0, and the checking for start_fail is
pointless, so remove it.

Fixes: 97a6ec4ac021 ("rhashtable: Change rhashtable_walk_start to return void")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Acked-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/proc.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 4d6f1c8d66596..a644292f9fafd 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -215,7 +215,6 @@ static const struct seq_operations sctp_eps_ops = {
 struct sctp_ht_iter {
 	struct seq_net_private p;
 	struct rhashtable_iter hti;
-	int start_fail;
 };
 
 static void *sctp_transport_seq_start(struct seq_file *seq, loff_t *pos)
@@ -224,7 +223,6 @@ static void *sctp_transport_seq_start(struct seq_file *seq, loff_t *pos)
 
 	sctp_transport_walk_start(&iter->hti);
 
-	iter->start_fail = 0;
 	return sctp_transport_get_idx(seq_file_net(seq), &iter->hti, *pos);
 }
 
@@ -232,8 +230,6 @@ static void sctp_transport_seq_stop(struct seq_file *seq, void *v)
 {
 	struct sctp_ht_iter *iter = seq->private;
 
-	if (iter->start_fail)
-		return;
 	sctp_transport_walk_stop(&iter->hti);
 }
 

From 84581bdae9587023cea1d139523f0ef0f28bd88d Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 27 Aug 2018 18:41:32 +0800
Subject: [PATCH 034/257] erspan: set erspan_ver to 1 by default when adding an
 erspan dev

After erspan_ver is introudced, if erspan_ver is not set in iproute, its
value will be left 0 by default. Since Commit 02f99df1875c ("erspan: fix
invalid erspan version."), it has broken the traffic due to the version
check in erspan_xmit if users are not aware of 'erspan_ver' param, like
using an old version of iproute.

To fix this compatibility problem, it sets erspan_ver to 1 by default
when adding an erspan dev in erspan_setup. Note that we can't do it in
ipgre_netlink_parms, as this function is also used by ipgre_changelink.

Fixes: 02f99df1875c ("erspan: fix invalid erspan version.")
Reported-by: Jianlin Shi <jishi@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c  | 3 +++
 net/ipv6/ip6_gre.c | 1 +
 2 files changed, 4 insertions(+)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 51a5d06085ac4..ae714aecc31c0 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1508,11 +1508,14 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
 
 static void erspan_setup(struct net_device *dev)
 {
+	struct ip_tunnel *t = netdev_priv(dev);
+
 	ether_setup(dev);
 	dev->netdev_ops = &erspan_netdev_ops;
 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 	ip_tunnel_setup(dev, erspan_net_id);
+	t->erspan_ver = 1;
 }
 
 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 18a3794b0f52e..e493b041d4ac9 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1778,6 +1778,7 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
 	if (data[IFLA_GRE_COLLECT_METADATA])
 		parms->collect_md = true;
 
+	parms->erspan_ver = 1;
 	if (data[IFLA_GRE_ERSPAN_VER])
 		parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
 

From d5ed72a55bc0a321ef33d272e2b0bf0d2b06d1fe Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 27 Aug 2018 20:58:43 +0200
Subject: [PATCH 035/257] net: sched: fix extack error message when chain is
 failed to be created

Instead "Cannot find" say "Cannot create".

Fixes: c35a4acc2985 ("net: sched: cls_api: handle generic cls errors")
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 31bd1439cf605..2d41c5b21b487 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1252,7 +1252,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 	}
 	chain = tcf_chain_get(block, chain_index, true);
 	if (!chain) {
-		NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
+		NL_SET_ERR_MSG(extack, "Cannot create specified filter chain");
 		err = -ENOMEM;
 		goto errout;
 	}

From b7b4247d553939ccf02ff597ec60f41a2f93ee8e Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 27 Aug 2018 20:58:44 +0200
Subject: [PATCH 036/257] net: sched: return -ENOENT when trying to remove
 filter from non-existent chain

When chain 0 was implicitly created, removal of non-existent filter from
chain 0 gave -ENOENT. Once chain 0 became non-implicit, the same call is
giving -EINVAL. Fix this by returning -ENOENT in that case.

Reported-by: Roman Mashak <mrv@mojatatu.com>
Fixes: f71e0ca4db18 ("net: sched: Avoid implicit chain 0 creation")
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 2d41c5b21b487..1a67af8a6e8ce 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1399,7 +1399,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 			goto errout;
 		}
 		NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
-		err = -EINVAL;
+		err = -ENOENT;
 		goto errout;
 	}
 

From 30935198b7d0be12b1c45c328b66a7fdefb16256 Mon Sep 17 00:00:00 2001
From: Haiqing Bai <Haiqing.Bai@windriver.com>
Date: Mon, 27 Aug 2018 09:32:26 +0800
Subject: [PATCH 037/257] tipc: fix the big/little endian issue in tipc_dest

In function tipc_dest_push, the 32bit variables 'node' and 'port'
are stored separately in uppper and lower part of 64bit 'value'.
Then this value is assigned to dst->value which is a union like:
union
{
  struct {
    u32 port;
    u32 node;
  };
  u64 value;
}
This works on little-endian machines like x86 but fails on big-endian
machines.

The fix remove the 'value' stack parameter and even the 'value'
member of the union in tipc_dest, assign the 'node' and 'port' member
directly with the input parameter to avoid the endian issue.

Fixes: a80ae5306a73 ("tipc: improve destination linked list")
Signed-off-by: Zhenbo Gao <zhenbo.gao@windriver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Haiqing Bai <Haiqing.Bai@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/name_table.c | 10 ++++------
 net/tipc/name_table.h |  9 ++-------
 2 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 88f027b502f6f..66d5b2c5987ad 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -980,20 +980,17 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
 
 struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port)
 {
-	u64 value = (u64)node << 32 | port;
 	struct tipc_dest *dst;
 
 	list_for_each_entry(dst, l, list) {
-		if (dst->value != value)
-			continue;
-		return dst;
+		if (dst->node == node && dst->port == port)
+			return dst;
 	}
 	return NULL;
 }
 
 bool tipc_dest_push(struct list_head *l, u32 node, u32 port)
 {
-	u64 value = (u64)node << 32 | port;
 	struct tipc_dest *dst;
 
 	if (tipc_dest_find(l, node, port))
@@ -1002,7 +999,8 @@ bool tipc_dest_push(struct list_head *l, u32 node, u32 port)
 	dst = kmalloc(sizeof(*dst), GFP_ATOMIC);
 	if (unlikely(!dst))
 		return false;
-	dst->value = value;
+	dst->node = node;
+	dst->port = port;
 	list_add(&dst->list, l);
 	return true;
 }
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index 0febba41da86e..892bd750b85fa 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -133,13 +133,8 @@ void tipc_nametbl_stop(struct net *net);
 
 struct tipc_dest {
 	struct list_head list;
-	union {
-		struct {
-			u32 port;
-			u32 node;
-		};
-		u64 value;
-	};
+	u32 port;
+	u32 node;
 };
 
 struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port);

From ad8619864f0c9bd89e14d957afa3fd8aaf0720da Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 27 Aug 2018 00:20:11 +0200
Subject: [PATCH 038/257] net: dsa: Drop GPIO includes

Commit 52638f71fcff ("dsa: Move gpio reset into switch driver")
moved the GPIO handling into the switch drivers but forgot
to remove the GPIO header includes.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/dsa.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index e63c554e0623e..9f3209ff7ffde 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -19,12 +19,10 @@
 #include <linux/of_mdio.h>
 #include <linux/of_platform.h>
 #include <linux/of_net.h>
-#include <linux/of_gpio.h>
 #include <linux/netdevice.h>
 #include <linux/sysfs.h>
 #include <linux/phy_fixed.h>
 #include <linux/ptp_classify.h>
-#include <linux/gpio/consumer.h>
 #include <linux/etherdevice.h>
 
 #include "dsa_priv.h"

From 53ae914d898e5dd5984d352d5fa0b23410f966a0 Mon Sep 17 00:00:00 2001
From: Zhu Yanjun <yanjun.zhu@oracle.com>
Date: Sat, 25 Aug 2018 15:19:05 +0800
Subject: [PATCH 039/257] net/rds: Use rdma_read_gids to get connection
 SGID/DGID in IPv6

In IPv4, the newly introduced rdma_read_gids is used to read the SGID/DGID
for the connection which returns GID correctly for RoCE transport as well.

In IPv6, rdma_read_gids is also used. The following are why rdma_read_gids
is introduced.

rdma_addr_get_dgid() for RoCE for client side connections returns MAC
address, instead of DGID.
rdma_addr_get_sgid() for RoCE doesn't return correct SGID for IPv6 and
when more than one IP address is assigned to the netdevice.

So the transport agnostic rdma_read_gids() API is provided by rdma_cm
module.

Signed-off-by: Zhu Yanjun <yanjun.zhu@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/net/rds/ib.c b/net/rds/ib.c
index c1d97640c0be7..eba75c1ba3594 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -341,15 +341,10 @@ static int rds6_ib_conn_info_visitor(struct rds_connection *conn,
 
 	if (rds_conn_state(conn) == RDS_CONN_UP) {
 		struct rds_ib_device *rds_ibdev;
-		struct rdma_dev_addr *dev_addr;
 
 		ic = conn->c_transport_data;
-		dev_addr = &ic->i_cm_id->route.addr.dev_addr;
-		rdma_addr_get_sgid(dev_addr,
-				   (union ib_gid *)&iinfo6->src_gid);
-		rdma_addr_get_dgid(dev_addr,
-				   (union ib_gid *)&iinfo6->dst_gid);
-
+		rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo6->src_gid,
+			       (union ib_gid *)&iinfo6->dst_gid);
 		rds_ibdev = ic->rds_ibdev;
 		iinfo6->max_send_wr = ic->i_send_ring.w_nr;
 		iinfo6->max_recv_wr = ic->i_recv_ring.w_nr;

From e06fa9c16ce4b740996189fa5610eabcee734e6c Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 24 Aug 2018 22:08:50 +0200
Subject: [PATCH 040/257] bpf, sockmap: fix potential use after free in
 bpf_tcp_close

bpf_tcp_close() we pop the psock linkage to a map via psock_map_pop().
A parallel update on the sock hash map can happen between psock_map_pop()
and lookup_elem_raw() where we override the element under link->hash /
link->key. In bpf_tcp_close()'s lookup_elem_raw() we subsequently only
test whether an element is present, but we do not test whether the
element is infact the element we were looking for.

We lock the sock in bpf_tcp_close() during that time, so do we hold
the lock in sock_hash_update_elem(). However, the latter locks the
sock which is newly updated, not the one we're purging from the hash
table. This means that while one CPU is doing the lookup from bpf_tcp_close(),
another CPU is doing the map update in parallel, dropped our sock from
the hlist and released the psock.

Subsequently the first CPU will find the new sock and attempts to drop
and release the old sock yet another time. Fix is that we need to check
the elements for a match after lookup, similar as we do in the sock map.
Note that the hash tab elems are freed via RCU, so access to their
link->hash / link->key is fine since we're under RCU read side there.

Fixes: e9db4ef6bf4c ("bpf: sockhash fix omitted bucket lock in sock_close")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/sockmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index cf5195c7c3317..01879e4d599a5 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -369,7 +369,7 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
 			/* If another thread deleted this object skip deletion.
 			 * The refcnt on psock may or may not be zero.
 			 */
-			if (l) {
+			if (l && l == link) {
 				hlist_del_rcu(&link->hash_node);
 				smap_release_sock(psock, link->sk);
 				free_htab_elem(htab, link);

From 15c480efab01197c965ce0562a43ffedd852b8f9 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 24 Aug 2018 22:08:51 +0200
Subject: [PATCH 041/257] bpf, sockmap: fix psock refcount leak in
 bpf_tcp_recvmsg

In bpf_tcp_recvmsg() we first took a reference on the psock, however
once we find that there are skbs in the normal socket's receive queue
we return with processing them through tcp_recvmsg(). Problem is that
we leak the taken reference on the psock in that path. Given we don't
really do anything with the psock at this point, move the skb_queue_empty()
test before we fetch the psock to fix this case.

Fixes: 8934ce2fd081 ("bpf: sockmap redirect ingress support")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/sockmap.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 01879e4d599a5..26d8a3053407a 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -912,6 +912,8 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 
 	if (unlikely(flags & MSG_ERRQUEUE))
 		return inet_recv_error(sk, msg, len, addr_len);
+	if (!skb_queue_empty(&sk->sk_receive_queue))
+		return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
 
 	rcu_read_lock();
 	psock = smap_psock_sk(sk);
@@ -922,9 +924,6 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		goto out;
 	rcu_read_unlock();
 
-	if (!skb_queue_empty(&sk->sk_receive_queue))
-		return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
-
 	lock_sock(sk);
 bytes_ready:
 	while (copied != len) {

From 3f6e138d41ddff196f452993528cfe75762ede0f Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Mon, 27 Aug 2018 21:30:42 +0200
Subject: [PATCH 042/257] bpf: fix build error with clang

Building the newly introduced BPF_PROG_TYPE_SK_REUSEPORT leads to
a compile time error when building with clang:
net/core/filter.o: In function `sk_reuseport_convert_ctx_access':
  ../net/core/filter.c:7284: undefined reference to `__compiletime_assert_7284'

It seems that clang has issues resolving hweight_long at compile
time. Since SK_FL_PROTO_MASK is a constant, we can use the interface
for known constant arguments which works fine with clang.

Fixes: 2dbb9b9e6df6 ("bpf: Introduce BPF_PROG_TYPE_SK_REUSEPORT")
Signed-off-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/core/filter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index c25eb36f13204..7a2430945c719 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -7281,7 +7281,7 @@ static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
 		break;
 
 	case offsetof(struct sk_reuseport_md, ip_protocol):
-		BUILD_BUG_ON(hweight_long(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
+		BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
 		SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(__sk_flags_offset,
 						    BPF_W, 0);
 		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);

From 501ca81760c204ec59b73e4a00bee5971fc0f1b1 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 24 Aug 2018 17:37:00 -0700
Subject: [PATCH 043/257] bpf: sockmap, decrement copied count correctly in
 redirect error case

Currently, when a redirect occurs in sockmap and an error occurs in
the redirect call we unwind the scatterlist once in the error path
of bpf_tcp_sendmsg_do_redirect() and then again in sendmsg(). Then
in the error path of sendmsg we decrement the copied count by the
send size.

However, its possible we partially sent data before the error was
generated. This can happen if do_tcp_sendpages() partially sends the
scatterlist before encountering a memory pressure error. If this
happens we need to decrement the copied value (the value tracking
how many bytes were actually sent to TCP stack) by the number of
remaining bytes _not_ the entire send size. Otherwise we risk
confusing userspace.

Also we don't need two calls to free the scatterlist one is
good enough. So remove the one in bpf_tcp_sendmsg_do_redirect() and
then properly reduce copied by the number of remaining bytes which
may in fact be the entire send size if no bytes were sent.

To do this use bool to indicate if free_start_sg() should do mem
accounting or not.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/sockmap.c | 45 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 23 deletions(-)

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 26d8a3053407a..ce63e58017468 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -236,7 +236,7 @@ static int bpf_tcp_init(struct sock *sk)
 }
 
 static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
-static int free_start_sg(struct sock *sk, struct sk_msg_buff *md);
+static int free_start_sg(struct sock *sk, struct sk_msg_buff *md, bool charge);
 
 static void bpf_tcp_release(struct sock *sk)
 {
@@ -248,7 +248,7 @@ static void bpf_tcp_release(struct sock *sk)
 		goto out;
 
 	if (psock->cork) {
-		free_start_sg(psock->sock, psock->cork);
+		free_start_sg(psock->sock, psock->cork, true);
 		kfree(psock->cork);
 		psock->cork = NULL;
 	}
@@ -330,14 +330,14 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
 	close_fun = psock->save_close;
 
 	if (psock->cork) {
-		free_start_sg(psock->sock, psock->cork);
+		free_start_sg(psock->sock, psock->cork, true);
 		kfree(psock->cork);
 		psock->cork = NULL;
 	}
 
 	list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
 		list_del(&md->list);
-		free_start_sg(psock->sock, md);
+		free_start_sg(psock->sock, md, true);
 		kfree(md);
 	}
 
@@ -570,14 +570,16 @@ static void free_bytes_sg(struct sock *sk, int bytes,
 	md->sg_start = i;
 }
 
-static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md)
+static int free_sg(struct sock *sk, int start,
+		   struct sk_msg_buff *md, bool charge)
 {
 	struct scatterlist *sg = md->sg_data;
 	int i = start, free = 0;
 
 	while (sg[i].length) {
 		free += sg[i].length;
-		sk_mem_uncharge(sk, sg[i].length);
+		if (charge)
+			sk_mem_uncharge(sk, sg[i].length);
 		if (!md->skb)
 			put_page(sg_page(&sg[i]));
 		sg[i].length = 0;
@@ -594,9 +596,9 @@ static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md)
 	return free;
 }
 
-static int free_start_sg(struct sock *sk, struct sk_msg_buff *md)
+static int free_start_sg(struct sock *sk, struct sk_msg_buff *md, bool charge)
 {
-	int free = free_sg(sk, md->sg_start, md);
+	int free = free_sg(sk, md->sg_start, md, charge);
 
 	md->sg_start = md->sg_end;
 	return free;
@@ -604,7 +606,7 @@ static int free_start_sg(struct sock *sk, struct sk_msg_buff *md)
 
 static int free_curr_sg(struct sock *sk, struct sk_msg_buff *md)
 {
-	return free_sg(sk, md->sg_curr, md);
+	return free_sg(sk, md->sg_curr, md, true);
 }
 
 static int bpf_map_msg_verdict(int _rc, struct sk_msg_buff *md)
@@ -718,7 +720,7 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes,
 		list_add_tail(&r->list, &psock->ingress);
 		sk->sk_data_ready(sk);
 	} else {
-		free_start_sg(sk, r);
+		free_start_sg(sk, r, true);
 		kfree(r);
 	}
 
@@ -752,14 +754,10 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
 		release_sock(sk);
 	}
 	smap_release_sock(psock, sk);
-	if (unlikely(err))
-		goto out;
-	return 0;
+	return err;
 out_rcu:
 	rcu_read_unlock();
-out:
-	free_bytes_sg(NULL, send, md, false);
-	return err;
+	return 0;
 }
 
 static inline void bpf_md_init(struct smap_psock *psock)
@@ -822,7 +820,7 @@ static int bpf_exec_tx_verdict(struct smap_psock *psock,
 	case __SK_PASS:
 		err = bpf_tcp_push(sk, send, m, flags, true);
 		if (unlikely(err)) {
-			*copied -= free_start_sg(sk, m);
+			*copied -= free_start_sg(sk, m, true);
 			break;
 		}
 
@@ -845,16 +843,17 @@ static int bpf_exec_tx_verdict(struct smap_psock *psock,
 		lock_sock(sk);
 
 		if (unlikely(err < 0)) {
-			free_start_sg(sk, m);
+			int free = free_start_sg(sk, m, false);
+
 			psock->sg_size = 0;
 			if (!cork)
-				*copied -= send;
+				*copied -= free;
 		} else {
 			psock->sg_size -= send;
 		}
 
 		if (cork) {
-			free_start_sg(sk, m);
+			free_start_sg(sk, m, true);
 			psock->sg_size = 0;
 			kfree(m);
 			m = NULL;
@@ -1121,7 +1120,7 @@ static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 		err = sk_stream_wait_memory(sk, &timeo);
 		if (err) {
 			if (m && m != psock->cork)
-				free_start_sg(sk, m);
+				free_start_sg(sk, m, true);
 			goto out_err;
 		}
 	}
@@ -1580,13 +1579,13 @@ static void smap_gc_work(struct work_struct *w)
 		bpf_prog_put(psock->bpf_tx_msg);
 
 	if (psock->cork) {
-		free_start_sg(psock->sock, psock->cork);
+		free_start_sg(psock->sock, psock->cork, true);
 		kfree(psock->cork);
 	}
 
 	list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
 		list_del(&md->list);
-		free_start_sg(psock->sock, md);
+		free_start_sg(psock->sock, md, true);
 		kfree(md);
 	}
 

From 67d1ba8a6dc83d90cd58b89fa6cbf9ae35a0cf7f Mon Sep 17 00:00:00 2001
From: Danek Duvall <duvall@comfychair.org>
Date: Wed, 22 Aug 2018 16:01:04 -0700
Subject: [PATCH 044/257] mac80211: correct use of IEEE80211_VHT_CAP_RXSTBC_X

The mod mask for VHT capabilities intends to say that you can override
the number of STBC receive streams, and it does, but only by accident.
The IEEE80211_VHT_CAP_RXSTBC_X aren't bits to be set, but values (albeit
left-shifted).  ORing the bits together gets the right answer, but we
should use the _MASK macro here instead.

Signed-off-by: Danek Duvall <duvall@comfychair.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/main.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 0358f20b675f2..27cd64acaf006 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -470,10 +470,7 @@ static const struct ieee80211_vht_cap mac80211_vht_capa_mod_mask = {
 		cpu_to_le32(IEEE80211_VHT_CAP_RXLDPC |
 			    IEEE80211_VHT_CAP_SHORT_GI_80 |
 			    IEEE80211_VHT_CAP_SHORT_GI_160 |
-			    IEEE80211_VHT_CAP_RXSTBC_1 |
-			    IEEE80211_VHT_CAP_RXSTBC_2 |
-			    IEEE80211_VHT_CAP_RXSTBC_3 |
-			    IEEE80211_VHT_CAP_RXSTBC_4 |
+			    IEEE80211_VHT_CAP_RXSTBC_MASK |
 			    IEEE80211_VHT_CAP_TXSTBC |
 			    IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE |
 			    IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE |

From d7c863a2f65e48f442379f4ee1846d52e0c5d24d Mon Sep 17 00:00:00 2001
From: Danek Duvall <duvall@comfychair.org>
Date: Wed, 22 Aug 2018 16:01:05 -0700
Subject: [PATCH 045/257] mac80211_hwsim: correct use of
 IEEE80211_VHT_CAP_RXSTBC_X

The mac80211_hwsim driver intends to say that it supports up to four
STBC receive streams, but instead it ends up saying something undefined.
The IEEE80211_VHT_CAP_RXSTBC_X macros aren't independent bits that can
be ORed together, but values.  In this case, _4 is the appropriate one
to use.

Signed-off-by: Danek Duvall <duvall@comfychair.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index fe1b0108f06de..7d0b460868f91 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2699,9 +2699,6 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 				IEEE80211_VHT_CAP_SHORT_GI_80 |
 				IEEE80211_VHT_CAP_SHORT_GI_160 |
 				IEEE80211_VHT_CAP_TXSTBC |
-				IEEE80211_VHT_CAP_RXSTBC_1 |
-				IEEE80211_VHT_CAP_RXSTBC_2 |
-				IEEE80211_VHT_CAP_RXSTBC_3 |
 				IEEE80211_VHT_CAP_RXSTBC_4 |
 				IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK;
 			sband->vht_cap.vht_mcs.rx_mcs_map =

From 38cb87ee47fb825f6c9d645c019f75b3905c0ab2 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Wed, 22 Aug 2018 13:52:21 +0200
Subject: [PATCH 046/257] cfg80211: make wmm_rule part of the reg_rule
 structure

Make wmm_rule be part of the reg_rule structure. This simplifies the
code a lot at the cost of having bigger memory usage. However in most
cases we have only few reg_rule's and when we do have many like in
iwlwifi we do not save memory as it allocates a separate wmm_rule for
each channel anyway.

This also fixes a bug reported in various places where somewhere the
pointers were corrupted and we ended up doing a null-dereference.

Fixes: 230ebaa189af ("cfg80211: read wmm rules from regulatory database")
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
[rephrase commit message slightly]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 .../wireless/intel/iwlwifi/iwl-nvm-parse.c    | 50 ++---------
 include/net/cfg80211.h                        |  4 +-
 include/net/regulatory.h                      |  4 +-
 net/mac80211/util.c                           |  8 +-
 net/wireless/nl80211.c                        | 10 +--
 net/wireless/reg.c                            | 90 +++----------------
 6 files changed, 31 insertions(+), 135 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
index b815ba38dbdb1..88121548eb9fe 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
@@ -877,15 +877,12 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
 	const u8 *nvm_chan = cfg->nvm_type == IWL_NVM_EXT ?
 			     iwl_ext_nvm_channels : iwl_nvm_channels;
 	struct ieee80211_regdomain *regd, *copy_rd;
-	int size_of_regd, regd_to_copy, wmms_to_copy;
-	int size_of_wmms = 0;
+	int size_of_regd, regd_to_copy;
 	struct ieee80211_reg_rule *rule;
-	struct ieee80211_wmm_rule *wmm_rule, *d_wmm, *s_wmm;
 	struct regdb_ptrs *regdb_ptrs;
 	enum nl80211_band band;
 	int center_freq, prev_center_freq = 0;
-	int valid_rules = 0, n_wmms = 0;
-	int i;
+	int valid_rules = 0;
 	bool new_rule;
 	int max_num_ch = cfg->nvm_type == IWL_NVM_EXT ?
 			 IWL_NVM_NUM_CHANNELS_EXT : IWL_NVM_NUM_CHANNELS;
@@ -904,11 +901,7 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
 		sizeof(struct ieee80211_regdomain) +
 		num_of_ch * sizeof(struct ieee80211_reg_rule);
 
-	if (geo_info & GEO_WMM_ETSI_5GHZ_INFO)
-		size_of_wmms =
-			num_of_ch * sizeof(struct ieee80211_wmm_rule);
-
-	regd = kzalloc(size_of_regd + size_of_wmms, GFP_KERNEL);
+	regd = kzalloc(size_of_regd, GFP_KERNEL);
 	if (!regd)
 		return ERR_PTR(-ENOMEM);
 
@@ -922,8 +915,6 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
 	regd->alpha2[0] = fw_mcc >> 8;
 	regd->alpha2[1] = fw_mcc & 0xff;
 
-	wmm_rule = (struct ieee80211_wmm_rule *)((u8 *)regd + size_of_regd);
-
 	for (ch_idx = 0; ch_idx < num_of_ch; ch_idx++) {
 		ch_flags = (u16)__le32_to_cpup(channels + ch_idx);
 		band = (ch_idx < NUM_2GHZ_CHANNELS) ?
@@ -977,26 +968,10 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
 		    band == NL80211_BAND_2GHZ)
 			continue;
 
-		if (!reg_query_regdb_wmm(regd->alpha2, center_freq,
-					 &regdb_ptrs[n_wmms].token, wmm_rule)) {
-			/* Add only new rules */
-			for (i = 0; i < n_wmms; i++) {
-				if (regdb_ptrs[i].token ==
-				    regdb_ptrs[n_wmms].token) {
-					rule->wmm_rule = regdb_ptrs[i].rule;
-					break;
-				}
-			}
-			if (i == n_wmms) {
-				rule->wmm_rule = wmm_rule;
-				regdb_ptrs[n_wmms++].rule = wmm_rule;
-				wmm_rule++;
-			}
-		}
+		reg_query_regdb_wmm(regd->alpha2, center_freq, rule);
 	}
 
 	regd->n_reg_rules = valid_rules;
-	regd->n_wmm_rules = n_wmms;
 
 	/*
 	 * Narrow down regdom for unused regulatory rules to prevent hole
@@ -1005,28 +980,13 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
 	regd_to_copy = sizeof(struct ieee80211_regdomain) +
 		valid_rules * sizeof(struct ieee80211_reg_rule);
 
-	wmms_to_copy = sizeof(struct ieee80211_wmm_rule) * n_wmms;
-
-	copy_rd = kzalloc(regd_to_copy + wmms_to_copy, GFP_KERNEL);
+	copy_rd = kzalloc(regd_to_copy, GFP_KERNEL);
 	if (!copy_rd) {
 		copy_rd = ERR_PTR(-ENOMEM);
 		goto out;
 	}
 
 	memcpy(copy_rd, regd, regd_to_copy);
-	memcpy((u8 *)copy_rd + regd_to_copy, (u8 *)regd + size_of_regd,
-	       wmms_to_copy);
-
-	d_wmm = (struct ieee80211_wmm_rule *)((u8 *)copy_rd + regd_to_copy);
-	s_wmm = (struct ieee80211_wmm_rule *)((u8 *)regd + size_of_regd);
-
-	for (i = 0; i < regd->n_reg_rules; i++) {
-		if (!regd->reg_rules[i].wmm_rule)
-			continue;
-
-		copy_rd->reg_rules[i].wmm_rule = d_wmm +
-			(regd->reg_rules[i].wmm_rule - s_wmm);
-	}
 
 out:
 	kfree(regdb_ptrs);
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 1beb3ead03856..7229c186d199c 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4763,8 +4763,8 @@ const char *reg_initiator_name(enum nl80211_reg_initiator initiator);
  *
  * Return: 0 on success. -ENODATA.
  */
-int reg_query_regdb_wmm(char *alpha2, int freq, u32 *ptr,
-			struct ieee80211_wmm_rule *rule);
+int reg_query_regdb_wmm(char *alpha2, int freq,
+			struct ieee80211_reg_rule *rule);
 
 /*
  * callbacks for asynchronous cfg80211 methods, notification
diff --git a/include/net/regulatory.h b/include/net/regulatory.h
index 60f8cc86a4470..3469750df0f44 100644
--- a/include/net/regulatory.h
+++ b/include/net/regulatory.h
@@ -217,15 +217,15 @@ struct ieee80211_wmm_rule {
 struct ieee80211_reg_rule {
 	struct ieee80211_freq_range freq_range;
 	struct ieee80211_power_rule power_rule;
-	struct ieee80211_wmm_rule *wmm_rule;
+	struct ieee80211_wmm_rule wmm_rule;
 	u32 flags;
 	u32 dfs_cac_ms;
+	bool has_wmm;
 };
 
 struct ieee80211_regdomain {
 	struct rcu_head rcu_head;
 	u32 n_reg_rules;
-	u32 n_wmm_rules;
 	char alpha2[3];
 	enum nl80211_dfs_regions dfs_region;
 	struct ieee80211_reg_rule reg_rules[];
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index d02fbfec37835..c80187d6e6bb4 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1120,7 +1120,7 @@ void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
 {
 	struct ieee80211_chanctx_conf *chanctx_conf;
 	const struct ieee80211_reg_rule *rrule;
-	struct ieee80211_wmm_ac *wmm_ac;
+	const struct ieee80211_wmm_ac *wmm_ac;
 	u16 center_freq = 0;
 
 	if (sdata->vif.type != NL80211_IFTYPE_AP &&
@@ -1139,15 +1139,15 @@ void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
 
 	rrule = freq_reg_info(sdata->wdev.wiphy, MHZ_TO_KHZ(center_freq));
 
-	if (IS_ERR_OR_NULL(rrule) || !rrule->wmm_rule) {
+	if (IS_ERR_OR_NULL(rrule) || !rrule->has_wmm) {
 		rcu_read_unlock();
 		return;
 	}
 
 	if (sdata->vif.type == NL80211_IFTYPE_AP)
-		wmm_ac = &rrule->wmm_rule->ap[ac];
+		wmm_ac = &rrule->wmm_rule.ap[ac];
 	else
-		wmm_ac = &rrule->wmm_rule->client[ac];
+		wmm_ac = &rrule->wmm_rule.client[ac];
 	qparam->cw_min = max_t(u16, qparam->cw_min, wmm_ac->cw_min);
 	qparam->cw_max = max_t(u16, qparam->cw_max, wmm_ac->cw_max);
 	qparam->aifs = max_t(u8, qparam->aifs, wmm_ac->aifsn);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 80bc986c79e5a..e3dcffd96919c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -667,13 +667,13 @@ static int nl80211_msg_put_wmm_rules(struct sk_buff *msg,
 			goto nla_put_failure;
 
 		if (nla_put_u16(msg, NL80211_WMMR_CW_MIN,
-				rule->wmm_rule->client[j].cw_min) ||
+				rule->wmm_rule.client[j].cw_min) ||
 		    nla_put_u16(msg, NL80211_WMMR_CW_MAX,
-				rule->wmm_rule->client[j].cw_max) ||
+				rule->wmm_rule.client[j].cw_max) ||
 		    nla_put_u8(msg, NL80211_WMMR_AIFSN,
-			       rule->wmm_rule->client[j].aifsn) ||
+			       rule->wmm_rule.client[j].aifsn) ||
 		    nla_put_u8(msg, NL80211_WMMR_TXOP,
-			       rule->wmm_rule->client[j].cot))
+			       rule->wmm_rule.client[j].cot))
 			goto nla_put_failure;
 
 		nla_nest_end(msg, nl_wmm_rule);
@@ -766,7 +766,7 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy,
 		const struct ieee80211_reg_rule *rule =
 			freq_reg_info(wiphy, chan->center_freq);
 
-		if (!IS_ERR(rule) && rule->wmm_rule) {
+		if (!IS_ERR_OR_NULL(rule) && rule->has_wmm) {
 			if (nl80211_msg_put_wmm_rules(msg, rule))
 				goto nla_put_failure;
 		}
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 283902974fbfa..2f702adf29121 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -425,35 +425,23 @@ static const struct ieee80211_regdomain *
 reg_copy_regd(const struct ieee80211_regdomain *src_regd)
 {
 	struct ieee80211_regdomain *regd;
-	int size_of_regd, size_of_wmms;
+	int size_of_regd;
 	unsigned int i;
-	struct ieee80211_wmm_rule *d_wmm, *s_wmm;
 
 	size_of_regd =
 		sizeof(struct ieee80211_regdomain) +
 		src_regd->n_reg_rules * sizeof(struct ieee80211_reg_rule);
-	size_of_wmms = src_regd->n_wmm_rules *
-		sizeof(struct ieee80211_wmm_rule);
 
-	regd = kzalloc(size_of_regd + size_of_wmms, GFP_KERNEL);
+	regd = kzalloc(size_of_regd, GFP_KERNEL);
 	if (!regd)
 		return ERR_PTR(-ENOMEM);
 
 	memcpy(regd, src_regd, sizeof(struct ieee80211_regdomain));
 
-	d_wmm = (struct ieee80211_wmm_rule *)((u8 *)regd + size_of_regd);
-	s_wmm = (struct ieee80211_wmm_rule *)((u8 *)src_regd + size_of_regd);
-	memcpy(d_wmm, s_wmm, size_of_wmms);
-
-	for (i = 0; i < src_regd->n_reg_rules; i++) {
+	for (i = 0; i < src_regd->n_reg_rules; i++)
 		memcpy(&regd->reg_rules[i], &src_regd->reg_rules[i],
 		       sizeof(struct ieee80211_reg_rule));
-		if (!src_regd->reg_rules[i].wmm_rule)
-			continue;
 
-		regd->reg_rules[i].wmm_rule = d_wmm +
-			(src_regd->reg_rules[i].wmm_rule - s_wmm);
-	}
 	return regd;
 }
 
@@ -859,9 +847,10 @@ static bool valid_regdb(const u8 *data, unsigned int size)
 	return true;
 }
 
-static void set_wmm_rule(struct ieee80211_wmm_rule *rule,
+static void set_wmm_rule(struct ieee80211_reg_rule *rrule,
 			 struct fwdb_wmm_rule *wmm)
 {
+	struct ieee80211_wmm_rule *rule = &rrule->wmm_rule;
 	unsigned int i;
 
 	for (i = 0; i < IEEE80211_NUM_ACS; i++) {
@@ -875,11 +864,13 @@ static void set_wmm_rule(struct ieee80211_wmm_rule *rule,
 		rule->ap[i].aifsn = wmm->ap[i].aifsn;
 		rule->ap[i].cot = 1000 * be16_to_cpu(wmm->ap[i].cot);
 	}
+
+	rrule->has_wmm = true;
 }
 
 static int __regdb_query_wmm(const struct fwdb_header *db,
 			     const struct fwdb_country *country, int freq,
-			     u32 *dbptr, struct ieee80211_wmm_rule *rule)
+			     struct ieee80211_reg_rule *rule)
 {
 	unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2;
 	struct fwdb_collection *coll = (void *)((u8 *)db + ptr);
@@ -900,8 +891,6 @@ static int __regdb_query_wmm(const struct fwdb_header *db,
 			wmm_ptr = be16_to_cpu(rrule->wmm_ptr) << 2;
 			wmm = (void *)((u8 *)db + wmm_ptr);
 			set_wmm_rule(rule, wmm);
-			if (dbptr)
-				*dbptr = wmm_ptr;
 			return 0;
 		}
 	}
@@ -909,8 +898,7 @@ static int __regdb_query_wmm(const struct fwdb_header *db,
 	return -ENODATA;
 }
 
-int reg_query_regdb_wmm(char *alpha2, int freq, u32 *dbptr,
-			struct ieee80211_wmm_rule *rule)
+int reg_query_regdb_wmm(char *alpha2, int freq, struct ieee80211_reg_rule *rule)
 {
 	const struct fwdb_header *hdr = regdb;
 	const struct fwdb_country *country;
@@ -924,8 +912,7 @@ int reg_query_regdb_wmm(char *alpha2, int freq, u32 *dbptr,
 	country = &hdr->country[0];
 	while (country->coll_ptr) {
 		if (alpha2_equal(alpha2, country->alpha2))
-			return __regdb_query_wmm(regdb, country, freq, dbptr,
-						 rule);
+			return __regdb_query_wmm(regdb, country, freq, rule);
 
 		country++;
 	}
@@ -934,32 +921,13 @@ int reg_query_regdb_wmm(char *alpha2, int freq, u32 *dbptr,
 }
 EXPORT_SYMBOL(reg_query_regdb_wmm);
 
-struct wmm_ptrs {
-	struct ieee80211_wmm_rule *rule;
-	u32 ptr;
-};
-
-static struct ieee80211_wmm_rule *find_wmm_ptr(struct wmm_ptrs *wmm_ptrs,
-					       u32 wmm_ptr, int n_wmms)
-{
-	int i;
-
-	for (i = 0; i < n_wmms; i++) {
-		if (wmm_ptrs[i].ptr == wmm_ptr)
-			return wmm_ptrs[i].rule;
-	}
-	return NULL;
-}
-
 static int regdb_query_country(const struct fwdb_header *db,
 			       const struct fwdb_country *country)
 {
 	unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2;
 	struct fwdb_collection *coll = (void *)((u8 *)db + ptr);
 	struct ieee80211_regdomain *regdom;
-	struct ieee80211_regdomain *tmp_rd;
-	unsigned int size_of_regd, i, n_wmms = 0;
-	struct wmm_ptrs *wmm_ptrs;
+	unsigned int size_of_regd, i;
 
 	size_of_regd = sizeof(struct ieee80211_regdomain) +
 		coll->n_rules * sizeof(struct ieee80211_reg_rule);
@@ -968,12 +936,6 @@ static int regdb_query_country(const struct fwdb_header *db,
 	if (!regdom)
 		return -ENOMEM;
 
-	wmm_ptrs = kcalloc(coll->n_rules, sizeof(*wmm_ptrs), GFP_KERNEL);
-	if (!wmm_ptrs) {
-		kfree(regdom);
-		return -ENOMEM;
-	}
-
 	regdom->n_reg_rules = coll->n_rules;
 	regdom->alpha2[0] = country->alpha2[0];
 	regdom->alpha2[1] = country->alpha2[1];
@@ -1012,37 +974,11 @@ static int regdb_query_country(const struct fwdb_header *db,
 				1000 * be16_to_cpu(rule->cac_timeout);
 		if (rule->len >= offsetofend(struct fwdb_rule, wmm_ptr)) {
 			u32 wmm_ptr = be16_to_cpu(rule->wmm_ptr) << 2;
-			struct ieee80211_wmm_rule *wmm_pos =
-				find_wmm_ptr(wmm_ptrs, wmm_ptr, n_wmms);
-			struct fwdb_wmm_rule *wmm;
-			struct ieee80211_wmm_rule *wmm_rule;
-
-			if (wmm_pos) {
-				rrule->wmm_rule = wmm_pos;
-				continue;
-			}
-			wmm = (void *)((u8 *)db + wmm_ptr);
-			tmp_rd = krealloc(regdom, size_of_regd + (n_wmms + 1) *
-					  sizeof(struct ieee80211_wmm_rule),
-					  GFP_KERNEL);
-
-			if (!tmp_rd) {
-				kfree(regdom);
-				kfree(wmm_ptrs);
-				return -ENOMEM;
-			}
-			regdom = tmp_rd;
-
-			wmm_rule = (struct ieee80211_wmm_rule *)
-				((u8 *)regdom + size_of_regd + n_wmms *
-				sizeof(struct ieee80211_wmm_rule));
+			struct fwdb_wmm_rule *wmm = (void *)((u8 *)db + wmm_ptr);
 
-			set_wmm_rule(wmm_rule, wmm);
-			wmm_ptrs[n_wmms].ptr = wmm_ptr;
-			wmm_ptrs[n_wmms++].rule = wmm_rule;
+			set_wmm_rule(rrule, wmm);
 		}
 	}
-	kfree(wmm_ptrs);
 
 	return reg_schedule_apply(regdom);
 }

From 20932750d9c78d307e4f2f18f9c6a32b82b1e0e8 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Mon, 20 Aug 2018 13:56:07 +0300
Subject: [PATCH 047/257] mac80211: don't update the PM state of a peer upon a
 multicast frame

I changed the way mac80211 updates the PM state of the peer.
I forgot that we could also have multicast frames from the
peer and that those frame should of course not change the
PM state of the peer: A peer goes to power save when it
needs to scan, but it won't send the broadcast Probe Request
with the PM bit set.

This made us mark the peer as awake when it wasn't and then
Intel's firmware would fail to transmit because the peer is
asleep according to its database. The driver warned about
this and it looked like this:

 WARNING: CPU: 0 PID: 184 at /usr/src/linux-4.16.14/drivers/net/wireless/intel/iwlwifi/mvm/tx.c:1369 iwl_mvm_rx_tx_cmd+0x53b/0x860
 CPU: 0 PID: 184 Comm: irq/124-iwlwifi Not tainted 4.16.14 #1
 RIP: 0010:iwl_mvm_rx_tx_cmd+0x53b/0x860
 Call Trace:
  iwl_pcie_rx_handle+0x220/0x880
  iwl_pcie_irq_handler+0x6c9/0xa20
  ? irq_forced_thread_fn+0x60/0x60
  ? irq_thread_dtor+0x90/0x90

The relevant code that spits the WARNING is:

        case TX_STATUS_FAIL_DEST_PS:
                /* the FW should have stopped the queue and not
                 * return this status
                 */
                WARN_ON(1);
                info->flags |= IEEE80211_TX_STAT_TX_FILTERED;

This fixes https://bugzilla.kernel.org/show_bug.cgi?id=199967.

Fixes: 9fef65443388 ("mac80211: always update the PM state of a peer on MGMT / DATA frames")
Cc: <stable@vger.kernel.org>   #4.16+
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 932985ca4e668..3f80a5ca40504 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1612,6 +1612,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 	 */
 	if (!ieee80211_hw_check(&sta->local->hw, AP_LINK_PS) &&
 	    !ieee80211_has_morefrags(hdr->frame_control) &&
+	    !is_multicast_ether_addr(hdr->addr1) &&
 	    (ieee80211_is_mgmt(hdr->frame_control) ||
 	     ieee80211_is_data(hdr->frame_control)) &&
 	    !(status->rx_flags & IEEE80211_RX_DEFERRED_RELEASE) &&

From 3a2af7cccbbaf2362db9053a946a6084e12bfa73 Mon Sep 17 00:00:00 2001
From: Jinbum Park <jinb.park7@gmail.com>
Date: Tue, 31 Jul 2018 23:10:40 +0900
Subject: [PATCH 048/257] mac80211_hwsim: Fix possible Spectre-v1 for
 hwsim_world_regdom_custom

User controls @idx which to be used as index of hwsim_world_regdom_custom.
So, It can be exploited via Spectre-like attack. (speculative execution)

This kind of attack leaks address of hwsim_world_regdom_custom,
It leads an attacker to bypass security mechanism such as KASLR.

So sanitize @idx before using it to prevent attack.

I leveraged strategy [1] to find and exploit this gadget.

[1] https://github.com/jinb-park/linux-exploit/tree/master/exploit-remaining-spectre-gadget/

Signed-off-by: Jinbum Park <jinb.park7@gmail.com>
[johannes: unwrap URL]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 7d0b460868f91..80e2c8595c7c8 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -33,6 +33,7 @@
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 #include <linux/rhashtable.h>
+#include <linux/nospec.h>
 #include "mac80211_hwsim.h"
 
 #define WARN_QUEUE 100
@@ -3229,6 +3230,9 @@ static int hwsim_new_radio_nl(struct sk_buff *msg, struct genl_info *info)
 			kfree(hwname);
 			return -EINVAL;
 		}
+
+		idx = array_index_nospec(idx,
+					 ARRAY_SIZE(hwsim_world_regdom_custom));
 		param.regd = hwsim_world_regdom_custom[idx];
 	}
 

From d3c89bbc7491d5e288ca2993e999d24ba9ff52ad Mon Sep 17 00:00:00 2001
From: Haim Dreyfuss <haim.dreyfuss@intel.com>
Date: Tue, 21 Aug 2018 09:22:19 +0300
Subject: [PATCH 049/257] nl80211: Fix nla_put_u8 to u16 for NL80211_WMMR_TXOP

TXOP (also known as Channel Occupancy Time) is u16 and should be
added using nla_put_u16 instead of u8, fix that.

Fixes: 50f32718e125 ("nl80211: Add wmm rule attribute to NL80211_CMD_GET_WIPHY dump command")
Signed-off-by: Haim Dreyfuss <haim.dreyfuss@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index e3dcffd96919c..3f7ffbe6c6349 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -672,8 +672,8 @@ static int nl80211_msg_put_wmm_rules(struct sk_buff *msg,
 				rule->wmm_rule.client[j].cw_max) ||
 		    nla_put_u8(msg, NL80211_WMMR_AIFSN,
 			       rule->wmm_rule.client[j].aifsn) ||
-		    nla_put_u8(msg, NL80211_WMMR_TXOP,
-			       rule->wmm_rule.client[j].cot))
+		    nla_put_u16(msg, NL80211_WMMR_TXOP,
+			        rule->wmm_rule.client[j].cot))
 			goto nla_put_failure;
 
 		nla_nest_end(msg, nl_wmm_rule);

From b88d26d97c41680f7327e5fb8061ad0037877f40 Mon Sep 17 00:00:00 2001
From: Haim Dreyfuss <haim.dreyfuss@intel.com>
Date: Tue, 21 Aug 2018 09:22:20 +0300
Subject: [PATCH 050/257] nl80211: Pass center frequency in kHz instead of MHz

freq_reg_info expects to get the frequency in kHz. Instead we
accidently pass it in MHz.  Thus, currently the function always
return ERR rule. Fix that.

Fixes: 50f32718e125 ("nl80211: Add wmm rule attribute to NL80211_CMD_GET_WIPHY dump command")
Signed-off-by: Haim Dreyfuss <haim.dreyfuss@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
[fix kHz/MHz in commit message]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 3f7ffbe6c6349..ce0149a86c139 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -764,7 +764,7 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy,
 
 	if (large) {
 		const struct ieee80211_reg_rule *rule =
-			freq_reg_info(wiphy, chan->center_freq);
+			freq_reg_info(wiphy, MHZ_TO_KHZ(chan->center_freq));
 
 		if (!IS_ERR_OR_NULL(rule) && rule->has_wmm) {
 			if (nl80211_msg_put_wmm_rules(msg, rule))

From 5b24109b0563d45094c470684c1f8cea1af269f8 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 28 Aug 2018 16:15:35 +0200
Subject: [PATCH 051/257] bpf: fix several offset tests in bpf_msg_pull_data

While recently going over bpf_msg_pull_data(), I noticed three
issues which are fixed in here:

1) When we attempt to find the first scatterlist element (sge)
   for the start offset, we add len to the offset before we check
   for start < offset + len, whereas it should come after when
   we iterate to the next sge to accumulate the offsets. For
   example, given a start offset of 12 with a sge length of 8
   for the first sge in the list would lead us to determine this
   sge as the first sge thinking it covers first 16 bytes where
   start is located, whereas start sits in subsequent sges so
   we would end up pulling in the wrong data.

2) After figuring out the starting sge, we have a short-cut test
   in !msg->sg_copy[i] && bytes <= len. This checks whether it's
   not needed to make the page at the sge private where we can
   just exit by updating msg->data and msg->data_end. However,
   the length test is not fully correct. bytes <= len checks
   whether the requested bytes (end - start offsets) fit into the
   sge's length. The part that is missing is that start must not
   be sge length aligned. Meaning, the start offset into the sge
   needs to be accounted as well on top of the requested bytes
   as otherwise we can access the sge out of bounds. For example
   the sge could have length of 8, our requested bytes could have
   length of 8, but at a start offset of 4, so we also would need
   to pull in 4 bytes of the next sge, when we jump to the out
   label we do set msg->data to sg_virt(&sg[i]) + start - offset
   and msg->data_end to msg->data + bytes which would be oob.

3) The subsequent bytes < copy test for finding the last sge has
   the same issue as in point 2) but also it tests for less than
   rather than less or equal to. Meaning if the sge length is of
   8 and requested bytes of 8 while having the start aligned with
   the sge, we would unnecessarily go and pull in the next sge as
   well to make it private.

Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/core/filter.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index 7a2430945c719..ec4d67c0cf0cd 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2286,10 +2286,10 @@ BPF_CALL_4(bpf_msg_pull_data,
 	   struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
 {
 	unsigned int len = 0, offset = 0, copy = 0;
+	int bytes = end - start, bytes_sg_total;
 	struct scatterlist *sg = msg->sg_data;
 	int first_sg, last_sg, i, shift;
 	unsigned char *p, *to, *from;
-	int bytes = end - start;
 	struct page *page;
 
 	if (unlikely(flags || end <= start))
@@ -2299,9 +2299,9 @@ BPF_CALL_4(bpf_msg_pull_data,
 	i = msg->sg_start;
 	do {
 		len = sg[i].length;
-		offset += len;
 		if (start < offset + len)
 			break;
+		offset += len;
 		i++;
 		if (i == MAX_SKB_FRAGS)
 			i = 0;
@@ -2310,7 +2310,11 @@ BPF_CALL_4(bpf_msg_pull_data,
 	if (unlikely(start >= offset + len))
 		return -EINVAL;
 
-	if (!msg->sg_copy[i] && bytes <= len)
+	/* The start may point into the sg element so we need to also
+	 * account for the headroom.
+	 */
+	bytes_sg_total = start - offset + bytes;
+	if (!msg->sg_copy[i] && bytes_sg_total <= len)
 		goto out;
 
 	first_sg = i;
@@ -2330,12 +2334,12 @@ BPF_CALL_4(bpf_msg_pull_data,
 		i++;
 		if (i == MAX_SKB_FRAGS)
 			i = 0;
-		if (bytes < copy)
+		if (bytes_sg_total <= copy)
 			break;
 	} while (i != msg->sg_end);
 	last_sg = i;
 
-	if (unlikely(copy < end - start))
+	if (unlikely(bytes_sg_total > copy))
 		return -EINVAL;
 
 	page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC, get_order(copy));

From 6537886cdc9a637711fd6da980dbb87c2c87c9aa Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Mon, 13 Aug 2018 15:57:44 +0200
Subject: [PATCH 052/257] gpio: adp5588: Fix sleep-in-atomic-context bug

This fixes:
[BUG] gpio: gpio-adp5588: A possible sleep-in-atomic-context bug
                          in adp5588_gpio_write()
[BUG] gpio: gpio-adp5588: A possible sleep-in-atomic-context bug
                          in adp5588_gpio_direction_input()

Reported-by: Jia-Ju Bai <baijiaju1990@gmail.com>
Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-adp5588.c | 24 ++++++++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/gpio/gpio-adp5588.c b/drivers/gpio/gpio-adp5588.c
index 3530ccd17e044..da9781a2ef4ad 100644
--- a/drivers/gpio/gpio-adp5588.c
+++ b/drivers/gpio/gpio-adp5588.c
@@ -41,6 +41,8 @@ struct adp5588_gpio {
 	uint8_t int_en[3];
 	uint8_t irq_mask[3];
 	uint8_t irq_stat[3];
+	uint8_t int_input_en[3];
+	uint8_t int_lvl_cached[3];
 };
 
 static int adp5588_gpio_read(struct i2c_client *client, u8 reg)
@@ -173,12 +175,28 @@ static void adp5588_irq_bus_sync_unlock(struct irq_data *d)
 	struct adp5588_gpio *dev = irq_data_get_irq_chip_data(d);
 	int i;
 
-	for (i = 0; i <= ADP5588_BANK(ADP5588_MAXGPIO); i++)
+	for (i = 0; i <= ADP5588_BANK(ADP5588_MAXGPIO); i++) {
+		if (dev->int_input_en[i]) {
+			mutex_lock(&dev->lock);
+			dev->dir[i] &= ~dev->int_input_en[i];
+			dev->int_input_en[i] = 0;
+			adp5588_gpio_write(dev->client, GPIO_DIR1 + i,
+					   dev->dir[i]);
+			mutex_unlock(&dev->lock);
+		}
+
+		if (dev->int_lvl_cached[i] != dev->int_lvl[i]) {
+			dev->int_lvl_cached[i] = dev->int_lvl[i];
+			adp5588_gpio_write(dev->client, GPIO_INT_LVL1 + i,
+					   dev->int_lvl[i]);
+		}
+
 		if (dev->int_en[i] ^ dev->irq_mask[i]) {
 			dev->int_en[i] = dev->irq_mask[i];
 			adp5588_gpio_write(dev->client, GPIO_INT_EN1 + i,
 					   dev->int_en[i]);
 		}
+	}
 
 	mutex_unlock(&dev->irq_lock);
 }
@@ -221,9 +239,7 @@ static int adp5588_irq_set_type(struct irq_data *d, unsigned int type)
 	else
 		return -EINVAL;
 
-	adp5588_gpio_direction_input(&dev->gpio_chip, gpio);
-	adp5588_gpio_write(dev->client, GPIO_INT_LVL1 + bank,
-			   dev->int_lvl[bank]);
+	dev->int_input_en[bank] |= bit;
 
 	return 0;
 }

From 1f631c3201fe5491808df143d8fcba81b3197ffd Mon Sep 17 00:00:00 2001
From: Yuan-Chi Pang <fu3mo6goo@gmail.com>
Date: Wed, 29 Aug 2018 09:30:08 +0800
Subject: [PATCH 053/257] mac80211: mesh: fix HWMP sequence numbering to follow
 standard

IEEE 802.11-2016 14.10.8.3 HWMP sequence numbering says:
If it is a target mesh STA, it shall update its own HWMP SN to
maximum (current HWMP SN, target HWMP SN in the PREQ element) + 1
immediately before it generates a PREP element in response to a
PREQ element.

Signed-off-by: Yuan-Chi Pang <fu3mo6goo@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mesh_hwmp.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 35ad3983ae4b6..daf9db3c8f24f 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -572,6 +572,10 @@ static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata,
 		forward = false;
 		reply = true;
 		target_metric = 0;
+
+		if (SN_GT(target_sn, ifmsh->sn))
+			ifmsh->sn = target_sn;
+
 		if (time_after(jiffies, ifmsh->last_sn_update +
 					net_traversal_jiffies(sdata)) ||
 		    time_before(jiffies, ifmsh->last_sn_update)) {

From 166ac9d55b0ab70b644e429be1f217fe8393cbd7 Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Wed, 29 Aug 2018 08:57:02 +0200
Subject: [PATCH 054/257] mac80211: avoid kernel panic when building AMSDU from
 non-linear SKB

When building building AMSDU from non-linear SKB, we hit a
kernel panic when trying to push the padding to the tail.
Instead, put the padding at the head of the next subframe.
This also fixes the A-MSDU subframes to not have the padding
accounted in the length field and not have pad at all for
the last subframe, both required by the spec.

Fixes: 6e0456b54545 ("mac80211: add A-MSDU tx support")
Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Reviewed-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 38 +++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index fa1f1e63a2640..667a73d6eb5cf 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3073,27 +3073,18 @@ void ieee80211_clear_fast_xmit(struct sta_info *sta)
 }
 
 static bool ieee80211_amsdu_realloc_pad(struct ieee80211_local *local,
-					struct sk_buff *skb, int headroom,
-					int *subframe_len)
+					struct sk_buff *skb, int headroom)
 {
-	int amsdu_len = *subframe_len + sizeof(struct ethhdr);
-	int padding = (4 - amsdu_len) & 3;
-
-	if (skb_headroom(skb) < headroom || skb_tailroom(skb) < padding) {
+	if (skb_headroom(skb) < headroom) {
 		I802_DEBUG_INC(local->tx_expand_skb_head);
 
-		if (pskb_expand_head(skb, headroom, padding, GFP_ATOMIC)) {
+		if (pskb_expand_head(skb, headroom, 0, GFP_ATOMIC)) {
 			wiphy_debug(local->hw.wiphy,
 				    "failed to reallocate TX buffer\n");
 			return false;
 		}
 	}
 
-	if (padding) {
-		*subframe_len += padding;
-		skb_put_zero(skb, padding);
-	}
-
 	return true;
 }
 
@@ -3117,8 +3108,7 @@ static bool ieee80211_amsdu_prepare_head(struct ieee80211_sub_if_data *sdata,
 	if (info->control.flags & IEEE80211_TX_CTRL_AMSDU)
 		return true;
 
-	if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(*amsdu_hdr),
-					 &subframe_len))
+	if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(*amsdu_hdr)))
 		return false;
 
 	data = skb_push(skb, sizeof(*amsdu_hdr));
@@ -3184,7 +3174,8 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 	void *data;
 	bool ret = false;
 	unsigned int orig_len;
-	int n = 1, nfrags;
+	int n = 1, nfrags, pad = 0;
+	u16 hdrlen;
 
 	if (!ieee80211_hw_check(&local->hw, TX_AMSDU))
 		return false;
@@ -3235,8 +3226,19 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 	if (max_frags && nfrags > max_frags)
 		goto out;
 
-	if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(rfc1042_header) + 2,
-					 &subframe_len))
+	/*
+	 * Pad out the previous subframe to a multiple of 4 by adding the
+	 * padding to the next one, that's being added. Note that head->len
+	 * is the length of the full A-MSDU, but that works since each time
+	 * we add a new subframe we pad out the previous one to a multiple
+	 * of 4 and thus it no longer matters in the next round.
+	 */
+	hdrlen = fast_tx->hdr_len - sizeof(rfc1042_header);
+	if ((head->len - hdrlen) & 3)
+		pad = 4 - ((head->len - hdrlen) & 3);
+
+	if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(rfc1042_header) +
+						     2 + pad))
 		goto out;
 
 	ret = true;
@@ -3248,6 +3250,8 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 	memcpy(data, &len, 2);
 	memcpy(data + 2, rfc1042_header, sizeof(rfc1042_header));
 
+	memset(skb_push(skb, pad), 0, pad);
+
 	head->len += skb->len;
 	head->data_len += skb->len;
 	*frag_tail = skb;

From 993b9bc5c47fda86f8ab4e53d68c6fea5ff2764a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 13 Aug 2018 19:00:27 +0300
Subject: [PATCH 055/257] gpiolib: acpi: Switch to cansleep version of GPIO
 library call

The commit ca876c7483b6

  ("gpiolib-acpi: make sure we trigger edge events at least once on boot")

added a initial value check for pin which is about to be locked as IRQ.
Unfortunately, not all GPIO drivers can do that atomically. Thus,
switch to cansleep version of the call. Otherwise we have a warning:

...
  WARNING: CPU: 2 PID: 1408 at drivers/gpio/gpiolib.c:2883 gpiod_get_value+0x46/0x50
...
  RIP: 0010:gpiod_get_value+0x46/0x50
...

The change tested on Intel Broxton with Whiskey Cove PMIC GPIO controller.

Fixes: ca876c7483b6 ("gpiolib-acpi: make sure we trigger edge events at least once on boot")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Hans de Goede <hdegoede@redhat.com>
Cc: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-acpi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index c48ed9d89ff5f..f9134f23c7e80 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -186,7 +186,7 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares,
 
 	gpiod_direction_input(desc);
 
-	value = gpiod_get_value(desc);
+	value = gpiod_get_value_cansleep(desc);
 
 	ret = gpiochip_lock_as_irq(chip, pin);
 	if (ret) {

From 78d3a92edbfb02e8cb83173cad84c3f2d5e1f070 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 14 Aug 2018 16:07:03 +0200
Subject: [PATCH 056/257] gpiolib-acpi: Register GpioInt ACPI event handlers
 from a late_initcall

GpioInt ACPI event handlers may see there IRQ triggered immediately
after requesting the IRQ (esp. level triggered ones). This means that they
may run before any other (builtin) drivers have had a chance to register
their OpRegion handlers, leading to errors like this:

[    1.133274] ACPI Error: No handler for Region [PMOP] ((____ptrval____)) [UserDefinedRegion] (20180531/evregion-132)
[    1.133286] ACPI Error: Region UserDefinedRegion (ID=141) has no handler (20180531/exfldio-265)
[    1.133297] ACPI Error: Method parse/execution failed \_SB.GPO2._L01, AE_NOT_EXIST (20180531/psparse-516)

We already defer the manual initial trigger of edge triggered interrupts
by running it from a late_initcall handler, this commit replaces this with
deferring the entire acpi_gpiochip_request_interrupts() call till then,
fixing the problem of some OpRegions not being registered yet.

Note that this removes the need to have a list of edge triggered handlers
which need to run, since the entire acpi_gpiochip_request_interrupts() call
is now delayed, acpi_gpiochip_request_interrupt() can call these directly
now.

Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-acpi.c | 84 +++++++++++++++++++++----------------
 1 file changed, 49 insertions(+), 35 deletions(-)

diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index f9134f23c7e80..8b9d7e42c600b 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -25,7 +25,6 @@
 
 struct acpi_gpio_event {
 	struct list_head node;
-	struct list_head initial_sync_list;
 	acpi_handle handle;
 	unsigned int pin;
 	unsigned int irq;
@@ -49,10 +48,19 @@ struct acpi_gpio_chip {
 	struct mutex conn_lock;
 	struct gpio_chip *chip;
 	struct list_head events;
+	struct list_head deferred_req_irqs_list_entry;
 };
 
-static LIST_HEAD(acpi_gpio_initial_sync_list);
-static DEFINE_MUTEX(acpi_gpio_initial_sync_list_lock);
+/*
+ * For gpiochips which call acpi_gpiochip_request_interrupts() before late_init
+ * (so builtin drivers) we register the ACPI GpioInt event handlers from a
+ * late_initcall_sync handler, so that other builtin drivers can register their
+ * OpRegions before the event handlers can run.  This list contains gpiochips
+ * for which the acpi_gpiochip_request_interrupts() has been deferred.
+ */
+static DEFINE_MUTEX(acpi_gpio_deferred_req_irqs_lock);
+static LIST_HEAD(acpi_gpio_deferred_req_irqs_list);
+static bool acpi_gpio_deferred_req_irqs_done;
 
 static int acpi_gpiochip_find(struct gpio_chip *gc, void *data)
 {
@@ -89,21 +97,6 @@ static struct gpio_desc *acpi_get_gpiod(char *path, int pin)
 	return gpiochip_get_desc(chip, pin);
 }
 
-static void acpi_gpio_add_to_initial_sync_list(struct acpi_gpio_event *event)
-{
-	mutex_lock(&acpi_gpio_initial_sync_list_lock);
-	list_add(&event->initial_sync_list, &acpi_gpio_initial_sync_list);
-	mutex_unlock(&acpi_gpio_initial_sync_list_lock);
-}
-
-static void acpi_gpio_del_from_initial_sync_list(struct acpi_gpio_event *event)
-{
-	mutex_lock(&acpi_gpio_initial_sync_list_lock);
-	if (!list_empty(&event->initial_sync_list))
-		list_del_init(&event->initial_sync_list);
-	mutex_unlock(&acpi_gpio_initial_sync_list_lock);
-}
-
 static irqreturn_t acpi_gpio_irq_handler(int irq, void *data)
 {
 	struct acpi_gpio_event *event = data;
@@ -229,7 +222,6 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares,
 	event->irq = irq;
 	event->pin = pin;
 	event->desc = desc;
-	INIT_LIST_HEAD(&event->initial_sync_list);
 
 	ret = request_threaded_irq(event->irq, NULL, handler, irqflags,
 				   "ACPI:Event", event);
@@ -251,10 +243,9 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares,
 	 * may refer to OperationRegions from other (builtin) drivers which
 	 * may be probed after us.
 	 */
-	if (handler == acpi_gpio_irq_handler &&
-	    (((irqflags & IRQF_TRIGGER_RISING) && value == 1) ||
-	     ((irqflags & IRQF_TRIGGER_FALLING) && value == 0)))
-		acpi_gpio_add_to_initial_sync_list(event);
+	if (((irqflags & IRQF_TRIGGER_RISING) && value == 1) ||
+	    ((irqflags & IRQF_TRIGGER_FALLING) && value == 0))
+		handler(event->irq, event);
 
 	return AE_OK;
 
@@ -283,6 +274,7 @@ void acpi_gpiochip_request_interrupts(struct gpio_chip *chip)
 	struct acpi_gpio_chip *acpi_gpio;
 	acpi_handle handle;
 	acpi_status status;
+	bool defer;
 
 	if (!chip->parent || !chip->to_irq)
 		return;
@@ -295,6 +287,16 @@ void acpi_gpiochip_request_interrupts(struct gpio_chip *chip)
 	if (ACPI_FAILURE(status))
 		return;
 
+	mutex_lock(&acpi_gpio_deferred_req_irqs_lock);
+	defer = !acpi_gpio_deferred_req_irqs_done;
+	if (defer)
+		list_add(&acpi_gpio->deferred_req_irqs_list_entry,
+			 &acpi_gpio_deferred_req_irqs_list);
+	mutex_unlock(&acpi_gpio_deferred_req_irqs_lock);
+
+	if (defer)
+		return;
+
 	acpi_walk_resources(handle, "_AEI",
 			    acpi_gpiochip_request_interrupt, acpi_gpio);
 }
@@ -325,11 +327,14 @@ void acpi_gpiochip_free_interrupts(struct gpio_chip *chip)
 	if (ACPI_FAILURE(status))
 		return;
 
+	mutex_lock(&acpi_gpio_deferred_req_irqs_lock);
+	if (!list_empty(&acpi_gpio->deferred_req_irqs_list_entry))
+		list_del_init(&acpi_gpio->deferred_req_irqs_list_entry);
+	mutex_unlock(&acpi_gpio_deferred_req_irqs_lock);
+
 	list_for_each_entry_safe_reverse(event, ep, &acpi_gpio->events, node) {
 		struct gpio_desc *desc;
 
-		acpi_gpio_del_from_initial_sync_list(event);
-
 		if (irqd_is_wakeup_set(irq_get_irq_data(event->irq)))
 			disable_irq_wake(event->irq);
 
@@ -1052,6 +1057,7 @@ void acpi_gpiochip_add(struct gpio_chip *chip)
 
 	acpi_gpio->chip = chip;
 	INIT_LIST_HEAD(&acpi_gpio->events);
+	INIT_LIST_HEAD(&acpi_gpio->deferred_req_irqs_list_entry);
 
 	status = acpi_attach_data(handle, acpi_gpio_chip_dh, acpi_gpio);
 	if (ACPI_FAILURE(status)) {
@@ -1198,20 +1204,28 @@ bool acpi_can_fallback_to_crs(struct acpi_device *adev, const char *con_id)
 	return con_id == NULL;
 }
 
-/* Sync the initial state of handlers after all builtin drivers have probed */
-static int acpi_gpio_initial_sync(void)
+/* Run deferred acpi_gpiochip_request_interrupts() */
+static int acpi_gpio_handle_deferred_request_interrupts(void)
 {
-	struct acpi_gpio_event *event, *ep;
+	struct acpi_gpio_chip *acpi_gpio, *tmp;
+
+	mutex_lock(&acpi_gpio_deferred_req_irqs_lock);
+	list_for_each_entry_safe(acpi_gpio, tmp,
+				 &acpi_gpio_deferred_req_irqs_list,
+				 deferred_req_irqs_list_entry) {
+		acpi_handle handle;
 
-	mutex_lock(&acpi_gpio_initial_sync_list_lock);
-	list_for_each_entry_safe(event, ep, &acpi_gpio_initial_sync_list,
-				 initial_sync_list) {
-		acpi_evaluate_object(event->handle, NULL, NULL, NULL);
-		list_del_init(&event->initial_sync_list);
+		handle = ACPI_HANDLE(acpi_gpio->chip->parent);
+		acpi_walk_resources(handle, "_AEI",
+				    acpi_gpiochip_request_interrupt, acpi_gpio);
+
+		list_del_init(&acpi_gpio->deferred_req_irqs_list_entry);
 	}
-	mutex_unlock(&acpi_gpio_initial_sync_list_lock);
+
+	acpi_gpio_deferred_req_irqs_done = true;
+	mutex_unlock(&acpi_gpio_deferred_req_irqs_lock);
 
 	return 0;
 }
 /* We must use _sync so that this runs after the first deferred_probe run */
-late_initcall_sync(acpi_gpio_initial_sync);
+late_initcall_sync(acpi_gpio_handle_deferred_request_interrupts);

From a618cf4800970d260871c159b7eec014a1da2e81 Mon Sep 17 00:00:00 2001
From: Alexey Khoroshilov <khoroshilov@ispras.ru>
Date: Tue, 28 Aug 2018 23:40:26 +0300
Subject: [PATCH 057/257] gpio: dwapb: Fix error handling in dwapb_gpio_probe()

If dwapb_gpio_add_port() fails in dwapb_gpio_probe(),
gpio->clk is left undisabled.

Found by Linux Driver Verification project (linuxtesting.org).

Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-dwapb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c
index 28da700f5f525..044888fd96a1f 100644
--- a/drivers/gpio/gpio-dwapb.c
+++ b/drivers/gpio/gpio-dwapb.c
@@ -728,6 +728,7 @@ static int dwapb_gpio_probe(struct platform_device *pdev)
 out_unregister:
 	dwapb_gpio_unregister(gpio);
 	dwapb_irq_teardown(gpio);
+	clk_disable_unprepare(gpio->clk);
 
 	return err;
 }

From 0e06b227c5221dd51b5569de93f3b9f532be4a32 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 29 Aug 2018 16:50:34 +0200
Subject: [PATCH 058/257] bpf: fix msg->data/data_end after sg shift repair in
 bpf_msg_pull_data

In the current code, msg->data is set as sg_virt(&sg[i]) + start - offset
and msg->data_end relative to it as msg->data + bytes. Using iterator i
to point to the updated starting scatterlist element holds true for some
cases, however not for all where we'd end up pointing out of bounds. It
is /correct/ for these ones:

1) When first finding the starting scatterlist element (sge) where we
   find that the page is already privately owned by the msg and where
   the requested bytes and headroom fit into the sge's length.

However, it's /incorrect/ for the following ones:

2) After we made the requested area private and updated the newly allocated
   page into first_sg slot of the scatterlist ring; when we find that no
   shift repair of the ring is needed where we bail out updating msg->data
   and msg->data_end. At that point i will point to last_sg, which in this
   case is the next elem of first_sg in the ring. The sge at that point
   might as well be invalid (e.g. i == msg->sg_end), which we use for
   setting the range of sg_virt(&sg[i]). The correct one would have been
   first_sg.

3) Similar as in 2) but when we find that a shift repair of the ring is
   needed. In this case we fix up all sges and stop once we've reached the
   end. In this case i will point to will point to the new msg->sg_end,
   and the sge at that point will be invalid. Again here the requested
   range sits in first_sg.

Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/core/filter.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index ec4d67c0cf0cd..b9225c55926ac 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2310,6 +2310,7 @@ BPF_CALL_4(bpf_msg_pull_data,
 	if (unlikely(start >= offset + len))
 		return -EINVAL;
 
+	first_sg = i;
 	/* The start may point into the sg element so we need to also
 	 * account for the headroom.
 	 */
@@ -2317,8 +2318,6 @@ BPF_CALL_4(bpf_msg_pull_data,
 	if (!msg->sg_copy[i] && bytes_sg_total <= len)
 		goto out;
 
-	first_sg = i;
-
 	/* At this point we need to linearize multiple scatterlist
 	 * elements or a single shared page. Either way we need to
 	 * copy into a linear buffer exclusively owned by BPF. Then
@@ -2400,7 +2399,7 @@ BPF_CALL_4(bpf_msg_pull_data,
 	if (msg->sg_end < 0)
 		msg->sg_end += MAX_SKB_FRAGS;
 out:
-	msg->data = sg_virt(&sg[i]) + start - offset;
+	msg->data = sg_virt(&sg[first_sg]) + start - offset;
 	msg->data_end = msg->data + bytes;
 
 	return 0;

From 2e43f95dd8ee62bc8bf57f2afac37fbd70c8d565 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 29 Aug 2018 16:50:35 +0200
Subject: [PATCH 059/257] bpf: fix shift upon scatterlist ring wrap-around in
 bpf_msg_pull_data

If first_sg and last_sg wraps around in the scatterlist ring, then we
need to account for that in the shift as well. E.g. crafting such msgs
where this is the case leads to a hang as shift becomes negative. E.g.
consider the following scenario:

  first_sg := 14     |=>    shift := -12     msg->sg_start := 10
  last_sg  :=  3     |                       msg->sg_end   :=  5

round  1:  i := 15, move_from :=   3, sg[15] := sg[  3]
round  2:  i :=  0, move_from := -12, sg[ 0] := sg[-12]
round  3:  i :=  1, move_from := -11, sg[ 1] := sg[-11]
round  4:  i :=  2, move_from := -10, sg[ 2] := sg[-10]
[...]
round 13:  i := 11, move_from :=  -1, sg[ 2] := sg[ -1]
round 14:  i := 12, move_from :=   0, sg[ 2] := sg[  0]
round 15:  i := 13, move_from :=   1, sg[ 2] := sg[  1]
round 16:  i := 14, move_from :=   2, sg[ 2] := sg[  2]
round 17:  i := 15, move_from :=   3, sg[ 2] := sg[  3]
[...]

This means we will loop forever and never hit the msg->sg_end condition
to break out of the loop. When we see that the ring wraps around, then
the shift should be MAX_SKB_FRAGS - first_sg + last_sg - 1. Meaning,
the remainder slots from the tail of the ring and the head until last_sg
combined.

Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/core/filter.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index b9225c55926ac..43ba5f8c38cac 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2370,7 +2370,10 @@ BPF_CALL_4(bpf_msg_pull_data,
 	 * had a single entry though we can just replace it and
 	 * be done. Otherwise walk the ring and shift the entries.
 	 */
-	shift = last_sg - first_sg - 1;
+	WARN_ON_ONCE(last_sg == first_sg);
+	shift = last_sg > first_sg ?
+		last_sg - first_sg - 1 :
+		MAX_SKB_FRAGS - first_sg + last_sg - 1;
 	if (!shift)
 		goto out;
 

From a8cf76a9023bc6709b1361d06bb2fae5227b9d68 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 29 Aug 2018 16:50:36 +0200
Subject: [PATCH 060/257] bpf: fix sg shift repair start offset in
 bpf_msg_pull_data

When we perform the sg shift repair for the scatterlist ring, we
currently start out at i = first_sg + 1. However, this is not
correct since the first_sg could point to the sge sitting at slot
MAX_SKB_FRAGS - 1, and a subsequent i = MAX_SKB_FRAGS will access
the scatterlist ring (sg) out of bounds. Add the sk_msg_iter_var()
helper for iterating through the ring, and apply the same rule
for advancing to the next ring element as we do elsewhere. Later
work will use this helper also in other places.

Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/core/filter.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index 43ba5f8c38cac..2c7801f6737ad 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2282,6 +2282,13 @@ static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
+#define sk_msg_iter_var(var)			\
+	do {					\
+		var++;				\
+		if (var == MAX_SKB_FRAGS)	\
+			var = 0;		\
+	} while (0)
+
 BPF_CALL_4(bpf_msg_pull_data,
 	   struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
 {
@@ -2302,9 +2309,7 @@ BPF_CALL_4(bpf_msg_pull_data,
 		if (start < offset + len)
 			break;
 		offset += len;
-		i++;
-		if (i == MAX_SKB_FRAGS)
-			i = 0;
+		sk_msg_iter_var(i);
 	} while (i != msg->sg_end);
 
 	if (unlikely(start >= offset + len))
@@ -2330,9 +2335,7 @@ BPF_CALL_4(bpf_msg_pull_data,
 	 */
 	do {
 		copy += sg[i].length;
-		i++;
-		if (i == MAX_SKB_FRAGS)
-			i = 0;
+		sk_msg_iter_var(i);
 		if (bytes_sg_total <= copy)
 			break;
 	} while (i != msg->sg_end);
@@ -2358,9 +2361,7 @@ BPF_CALL_4(bpf_msg_pull_data,
 		sg[i].length = 0;
 		put_page(sg_page(&sg[i]));
 
-		i++;
-		if (i == MAX_SKB_FRAGS)
-			i = 0;
+		sk_msg_iter_var(i);
 	} while (i != last_sg);
 
 	sg[first_sg].length = copy;
@@ -2377,7 +2378,8 @@ BPF_CALL_4(bpf_msg_pull_data,
 	if (!shift)
 		goto out;
 
-	i = first_sg + 1;
+	i = first_sg;
+	sk_msg_iter_var(i);
 	do {
 		int move_from;
 
@@ -2394,9 +2396,7 @@ BPF_CALL_4(bpf_msg_pull_data,
 		sg[move_from].page_link = 0;
 		sg[move_from].offset = 0;
 
-		i++;
-		if (i == MAX_SKB_FRAGS)
-			i = 0;
+		sk_msg_iter_var(i);
 	} while (1);
 	msg->sg_end -= shift;
 	if (msg->sg_end < 0)

From 9f2895461439fda2801a7906fb4c5fb3dbb37a0a Mon Sep 17 00:00:00 2001
From: Alexey Kodanev <alexey.kodanev@oracle.com>
Date: Thu, 23 Aug 2018 19:49:54 +0300
Subject: [PATCH 061/257] vti6: remove !skb->ignore_df check from vti6_xmit()

Before the commit d6990976af7c ("vti6: fix PMTU caching and reporting
on xmit") '!skb->ignore_df' check was always true because the function
skb_scrub_packet() was called before it, resetting ignore_df to zero.

In the commit, skb_scrub_packet() was moved below, and now this check
can be false for the packet, e.g. when sending it in the two fragments,
this prevents successful PMTU updates in such case. The next attempts
to send the packet lead to the same tx error. Moreover, vti6 initial
MTU value relies on PMTU adjustments.

This issue can be reproduced with the following LTP test script:
    udp_ipsec_vti.sh -6 -p ah -m tunnel -s 2000

Fixes: ccd740cbc6e0 ("vti6: Add pmtu handling to vti6_xmit.")
Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_vti.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 5095367c72049..eeaf7455d51e5 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -481,7 +481,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
 	}
 
 	mtu = dst_mtu(dst);
-	if (!skb->ignore_df && skb->len > mtu) {
+	if (skb->len > mtu) {
 		skb_dst_update_pmtu(skb, mtu);
 
 		if (skb->protocol == htons(ETH_P_IPV6)) {

From bd583fe30427500a2d0abe25724025b1cb5e2636 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Thu, 23 Aug 2018 16:19:44 -0700
Subject: [PATCH 062/257] tipc: fix a missing rhashtable_walk_exit()

rhashtable_walk_exit() must be paired with rhashtable_walk_enter().

Fixes: 40f9f4397060 ("tipc: Fix tipc_sk_reinit race conditions")
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index c1e93c9515bca..c9a50b62c738d 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2672,6 +2672,8 @@ void tipc_sk_reinit(struct net *net)
 
 		rhashtable_walk_stop(&iter);
 	} while (tsk == ERR_PTR(-EAGAIN));
+
+	rhashtable_walk_exit(&iter);
 }
 
 static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid)

From e5133f2f1261f8ab412e7fc5e3694c9f84328f89 Mon Sep 17 00:00:00 2001
From: Jerome Brunet <jbrunet@baylibre.com>
Date: Fri, 24 Aug 2018 11:04:40 +0200
Subject: [PATCH 063/257] Revert "net: stmmac: Do not keep rearming the
 coalesce timer in stmmac_xmit"

This reverts commit 4ae0169fd1b3c792b66be58995b7e6b629919ecf.

This change in the handling of the coalesce timer is causing regression on
(at least) amlogic platforms.

Network will break down very quickly (a few seconds) after starting
a download. This can easily be reproduced using iperf3 for example.

The problem has been reported on the S805, S905, S912 and A113 SoCs
(Realtek and Micrel PHYs) and it is likely impacting all Amlogics
platforms using Gbit ethernet

No problem was seen with the platform using 10/100 only PHYs (GXL internal)

Reverting change brings things back to normal and allows to use network
again until we better understand the problem with the coalesce timer.

Cc: Jose Abreu <joabreu@synopsys.com>
Cc: Joao Pinto <jpinto@synopsys.com>
Cc: Vitor Soares <soares@synopsys.com>
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Cc: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Jerome Brunet <jbrunet@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac.h      | 1 -
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 5 +----
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 76649adf8fb06..c0a855b7ab3b4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -112,7 +112,6 @@ struct stmmac_priv {
 	u32 tx_count_frames;
 	u32 tx_coal_frames;
 	u32 tx_coal_timer;
-	bool tx_timer_armed;
 
 	int tx_coalesce;
 	int hwts_tx_en;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index ff1ffb46198a7..9f458bb16f2a6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3147,16 +3147,13 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * element in case of no SG.
 	 */
 	priv->tx_count_frames += nfrags + 1;
-	if (likely(priv->tx_coal_frames > priv->tx_count_frames) &&
-	    !priv->tx_timer_armed) {
+	if (likely(priv->tx_coal_frames > priv->tx_count_frames)) {
 		mod_timer(&priv->txtimer,
 			  STMMAC_COAL_TIMER(priv->tx_coal_timer));
-		priv->tx_timer_armed = true;
 	} else {
 		priv->tx_count_frames = 0;
 		stmmac_set_tx_ic(priv, desc);
 		priv->xstats.tx_set_ic_bit++;
-		priv->tx_timer_armed = false;
 	}
 
 	skb_tx_timestamp(skb);

From 9a07efa9aea2f4a59f35da0785a4e6a6b5a96192 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Fri, 24 Aug 2018 12:28:06 -0700
Subject: [PATCH 064/257] tipc: switch to rhashtable iterator

syzbot reported a use-after-free in tipc_group_fill_sock_diag(),
where tipc_group_fill_sock_diag() still reads tsk->group meanwhile
tipc_group_delete() just deletes it in tipc_release().

tipc_nl_sk_walk() aims to lock this sock when walking each sock
in the hash table to close race conditions with sock changes like
this one, by acquiring tsk->sk.sk_lock.slock spinlock, unfortunately
this doesn't work at all. All non-BH call path should take
lock_sock() instead to make it work.

tipc_nl_sk_walk() brutally iterates with raw rht_for_each_entry_rcu()
where RCU read lock is required, this is the reason why lock_sock()
can't be taken on this path. This could be resolved by switching to
rhashtable iterator API's, where taking a sleepable lock is possible.
Also, the iterator API's are friendly for restartable calls like
diag dump, the last position is remembered behind the scence,
all we need to do here is saving the iterator into cb->args[].

I tested this with parallel tipc diag dump and thousands of tipc
socket creation and release, no crash or memory leak.

Reported-by: syzbot+b9c8f3ab2994b7cd1625@syzkaller.appspotmail.com
Cc: Jon Maloy <jon.maloy@ericsson.com>
Cc: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/diag.c    |  2 ++
 net/tipc/netlink.c |  2 ++
 net/tipc/socket.c  | 76 ++++++++++++++++++++++++++++++----------------
 net/tipc/socket.h  |  2 ++
 4 files changed, 56 insertions(+), 26 deletions(-)

diff --git a/net/tipc/diag.c b/net/tipc/diag.c
index aaabb0b776dda..73137f4aeb68f 100644
--- a/net/tipc/diag.c
+++ b/net/tipc/diag.c
@@ -84,7 +84,9 @@ static int tipc_sock_diag_handler_dump(struct sk_buff *skb,
 
 	if (h->nlmsg_flags & NLM_F_DUMP) {
 		struct netlink_dump_control c = {
+			.start = tipc_dump_start,
 			.dump = tipc_diag_dump,
+			.done = tipc_dump_done,
 		};
 		netlink_dump_start(net->diag_nlsk, skb, h, &c);
 		return 0;
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 6ff2254088f64..99ee419210baf 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -167,7 +167,9 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
 	},
 	{
 		.cmd	= TIPC_NL_SOCK_GET,
+		.start = tipc_dump_start,
 		.dumpit	= tipc_nl_sk_dump,
+		.done	= tipc_dump_done,
 		.policy = tipc_nl_policy,
 	},
 	{
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index c9a50b62c738d..ab7a2a7178f71 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -3229,45 +3229,69 @@ int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
 				       struct netlink_callback *cb,
 				       struct tipc_sock *tsk))
 {
-	struct net *net = sock_net(skb->sk);
-	struct tipc_net *tn = tipc_net(net);
-	const struct bucket_table *tbl;
-	u32 prev_portid = cb->args[1];
-	u32 tbl_id = cb->args[0];
-	struct rhash_head *pos;
+	struct rhashtable_iter *iter = (void *)cb->args[0];
 	struct tipc_sock *tsk;
 	int err;
 
-	rcu_read_lock();
-	tbl = rht_dereference_rcu((&tn->sk_rht)->tbl, &tn->sk_rht);
-	for (; tbl_id < tbl->size; tbl_id++) {
-		rht_for_each_entry_rcu(tsk, pos, tbl, tbl_id, node) {
-			spin_lock_bh(&tsk->sk.sk_lock.slock);
-			if (prev_portid && prev_portid != tsk->portid) {
-				spin_unlock_bh(&tsk->sk.sk_lock.slock);
+	rhashtable_walk_start(iter);
+	while ((tsk = rhashtable_walk_next(iter)) != NULL) {
+		if (IS_ERR(tsk)) {
+			err = PTR_ERR(tsk);
+			if (err == -EAGAIN) {
+				err = 0;
 				continue;
 			}
+			break;
+		}
 
-			err = skb_handler(skb, cb, tsk);
-			if (err) {
-				prev_portid = tsk->portid;
-				spin_unlock_bh(&tsk->sk.sk_lock.slock);
-				goto out;
-			}
-
-			prev_portid = 0;
-			spin_unlock_bh(&tsk->sk.sk_lock.slock);
+		sock_hold(&tsk->sk);
+		rhashtable_walk_stop(iter);
+		lock_sock(&tsk->sk);
+		err = skb_handler(skb, cb, tsk);
+		if (err) {
+			release_sock(&tsk->sk);
+			sock_put(&tsk->sk);
+			goto out;
 		}
+		release_sock(&tsk->sk);
+		rhashtable_walk_start(iter);
+		sock_put(&tsk->sk);
 	}
+	rhashtable_walk_stop(iter);
 out:
-	rcu_read_unlock();
-	cb->args[0] = tbl_id;
-	cb->args[1] = prev_portid;
-
 	return skb->len;
 }
 EXPORT_SYMBOL(tipc_nl_sk_walk);
 
+int tipc_dump_start(struct netlink_callback *cb)
+{
+	struct rhashtable_iter *iter = (void *)cb->args[0];
+	struct net *net = sock_net(cb->skb->sk);
+	struct tipc_net *tn = tipc_net(net);
+
+	if (!iter) {
+		iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+		if (!iter)
+			return -ENOMEM;
+
+		cb->args[0] = (long)iter;
+	}
+
+	rhashtable_walk_enter(&tn->sk_rht, iter);
+	return 0;
+}
+EXPORT_SYMBOL(tipc_dump_start);
+
+int tipc_dump_done(struct netlink_callback *cb)
+{
+	struct rhashtable_iter *hti = (void *)cb->args[0];
+
+	rhashtable_walk_exit(hti);
+	kfree(hti);
+	return 0;
+}
+EXPORT_SYMBOL(tipc_dump_done);
+
 int tipc_sk_fill_sock_diag(struct sk_buff *skb, struct netlink_callback *cb,
 			   struct tipc_sock *tsk, u32 sk_filter_state,
 			   u64 (*tipc_diag_gen_cookie)(struct sock *sk))
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index aff9b2ae5a1f4..d43032e26532c 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -68,4 +68,6 @@ int tipc_nl_sk_walk(struct sk_buff *skb, struct netlink_callback *cb,
 		    int (*skb_handler)(struct sk_buff *skb,
 				       struct netlink_callback *cb,
 				       struct tipc_sock *tsk));
+int tipc_dump_start(struct netlink_callback *cb);
+int tipc_dump_done(struct netlink_callback *cb);
 #endif

From 05212ba8132b42047ab5d63d759c6f9c28e7eab5 Mon Sep 17 00:00:00 2001
From: Azat Khuzhin <a3at.mail@gmail.com>
Date: Sun, 26 Aug 2018 17:03:09 +0300
Subject: [PATCH 065/257] r8169: set RxConfig after tx/rx is enabled for
 RTL8169sb/8110sb devices

I have two Ethernet adapters:
  r8169 0000:03:01.0 eth0: RTL8169sb/8110sb, 00:14:d1:14:2d:49, XID 10000000, IRQ 18
  r8169 0000:01:00.0 eth0: RTL8168e/8111e, 64:66:b3:11:14:5d, XID 2c200000, IRQ 30
And after upgrading from linux 4.15 [1] to linux 4.18+ [2] RTL8169sb failed to
receive any packets. tcpdump shows a lot of checksum mismatch.

  [1]: a0f79386a4968b4925da6db2d1daffd0605a4402
  [2]: 0519359784328bfa92bf0931bf0cff3b58c16932 (4.19 merge window opened)

I started bisecting and the found that [3] breaks it. According to [4]:
  "For 8110S, 8110SB, and 8110SC series, the initial value of RxConfig
  needs to be set after the tx/rx is enabled."
So I moved rtl_init_rxcfg() after enabling tx/rs and now my adapter works
(RTL8168e works too).

  [3]: 3559d81e76bfe3803e89f2e04cf6ef7ab4f3aace
  [4]: e542a2269f232d61270ceddd42b73a4348dee2bb ("r8169: adjust the RxConfig
settings.")

Also drop "rx" from rtl_set_rx_tx_config_registers(), since it does nothing
with it already.

Fixes: 3559d81e76bfe3803e89f2e04cf6ef7ab4f3aace ("r8169: simplify
rtl_hw_start_8169")

Cc: Heiner Kallweit <hkallweit1@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: netdev@vger.kernel.org
Cc: Realtek linux nic maintainers <nic_swsd@realtek.com>
Signed-off-by: Azat Khuzhin <a3at.mail@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 0efa977c422dd..ac306797590ec 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -4522,7 +4522,7 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp)
 	rtl_hw_reset(tp);
 }
 
-static void rtl_set_rx_tx_config_registers(struct rtl8169_private *tp)
+static void rtl_set_tx_config_registers(struct rtl8169_private *tp)
 {
 	/* Set DMA burst size and Interframe Gap Time */
 	RTL_W32(tp, TxConfig, (TX_DMA_BURST << TxDMAShift) |
@@ -4633,12 +4633,14 @@ static void rtl_hw_start(struct  rtl8169_private *tp)
 
 	rtl_set_rx_max_size(tp);
 	rtl_set_rx_tx_desc_registers(tp);
-	rtl_set_rx_tx_config_registers(tp);
+	rtl_set_tx_config_registers(tp);
 	RTL_W8(tp, Cfg9346, Cfg9346_Lock);
 
 	/* Initially a 10 us delay. Turned it into a PCI commit. - FR */
 	RTL_R8(tp, IntrMask);
 	RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
+	rtl_init_rxcfg(tp);
+
 	rtl_set_rx_mode(tp->dev);
 	/* no early-rx interrupts */
 	RTL_W16(tp, MultiIntr, RTL_R16(tp, MultiIntr) & 0xf000);

From 31fabbee8f5c658c3fa1603c66e9e4f51ea8c2c6 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Mon, 27 Aug 2018 09:59:29 +0800
Subject: [PATCH 066/257] net: hns: add the code for cleaning pkt in chip

If there are packets in hardware when changing the speed
or duplex, it may cause hardware hang up.

This patch adds the code for waiting chip to clean the all
pkts(TX & RX) in chip when the driver uses the function named
"adjust link".

This patch cleans the pkts as follows:
1) close rx of chip, close tx of protocol stack.
2) wait rcb, ppe, mac to clean.
3) adjust link
4) open rx of chip, open tx of protocol stack.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hnae.h     |  2 +
 .../net/ethernet/hisilicon/hns/hns_ae_adapt.c | 67 ++++++++++++++++++-
 .../ethernet/hisilicon/hns/hns_dsaf_gmac.c    | 36 ++++++++++
 .../net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 44 ++++++++++++
 .../net/ethernet/hisilicon/hns/hns_dsaf_mac.h |  8 +++
 .../ethernet/hisilicon/hns/hns_dsaf_main.c    | 29 ++++++++
 .../ethernet/hisilicon/hns/hns_dsaf_main.h    |  3 +
 .../net/ethernet/hisilicon/hns/hns_dsaf_ppe.c | 23 +++++++
 .../net/ethernet/hisilicon/hns/hns_dsaf_ppe.h |  1 +
 .../net/ethernet/hisilicon/hns/hns_dsaf_rcb.c | 23 +++++++
 .../net/ethernet/hisilicon/hns/hns_dsaf_rcb.h |  1 +
 .../net/ethernet/hisilicon/hns/hns_dsaf_reg.h |  1 +
 drivers/net/ethernet/hisilicon/hns/hns_enet.c | 21 +++++-
 13 files changed, 255 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h
index cad52bd331f7b..08a750fb60c49 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.h
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.h
@@ -486,6 +486,8 @@ struct hnae_ae_ops {
 			u8 *auto_neg, u16 *speed, u8 *duplex);
 	void (*toggle_ring_irq)(struct hnae_ring *ring, u32 val);
 	void (*adjust_link)(struct hnae_handle *handle, int speed, int duplex);
+	bool (*need_adjust_link)(struct hnae_handle *handle,
+				 int speed, int duplex);
 	int (*set_loopback)(struct hnae_handle *handle,
 			    enum hnae_loop loop_mode, int en);
 	void (*get_ring_bdnum_limit)(struct hnae_queue *queue,
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index e6aad30e7e69c..b52029e26d153 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
@@ -155,6 +155,41 @@ static void hns_ae_put_handle(struct hnae_handle *handle)
 		hns_ae_get_ring_pair(handle->qs[i])->used_by_vf = 0;
 }
 
+static int hns_ae_wait_flow_down(struct hnae_handle *handle)
+{
+	struct dsaf_device *dsaf_dev;
+	struct hns_ppe_cb *ppe_cb;
+	struct hnae_vf_cb *vf_cb;
+	int ret;
+	int i;
+
+	for (i = 0; i < handle->q_num; i++) {
+		ret = hns_rcb_wait_tx_ring_clean(handle->qs[i]);
+		if (ret)
+			return ret;
+	}
+
+	ppe_cb = hns_get_ppe_cb(handle);
+	ret = hns_ppe_wait_tx_fifo_clean(ppe_cb);
+	if (ret)
+		return ret;
+
+	dsaf_dev = hns_ae_get_dsaf_dev(handle->dev);
+	if (!dsaf_dev)
+		return -EINVAL;
+	ret = hns_dsaf_wait_pkt_clean(dsaf_dev, handle->dport_id);
+	if (ret)
+		return ret;
+
+	vf_cb = hns_ae_get_vf_cb(handle);
+	ret = hns_mac_wait_fifo_clean(vf_cb->mac_cb);
+	if (ret)
+		return ret;
+
+	mdelay(10);
+	return 0;
+}
+
 static void hns_ae_ring_enable_all(struct hnae_handle *handle, int val)
 {
 	int q_num = handle->q_num;
@@ -399,12 +434,41 @@ static int hns_ae_get_mac_info(struct hnae_handle *handle,
 	return hns_mac_get_port_info(mac_cb, auto_neg, speed, duplex);
 }
 
+static bool hns_ae_need_adjust_link(struct hnae_handle *handle, int speed,
+				    int duplex)
+{
+	struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle);
+
+	return hns_mac_need_adjust_link(mac_cb, speed, duplex);
+}
+
 static void hns_ae_adjust_link(struct hnae_handle *handle, int speed,
 			       int duplex)
 {
 	struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle);
 
-	hns_mac_adjust_link(mac_cb, speed, duplex);
+	switch (mac_cb->dsaf_dev->dsaf_ver) {
+	case AE_VERSION_1:
+		hns_mac_adjust_link(mac_cb, speed, duplex);
+		break;
+
+	case AE_VERSION_2:
+		/* chip need to clear all pkt inside */
+		hns_mac_disable(mac_cb, MAC_COMM_MODE_RX);
+		if (hns_ae_wait_flow_down(handle)) {
+			hns_mac_enable(mac_cb, MAC_COMM_MODE_RX);
+			break;
+		}
+
+		hns_mac_adjust_link(mac_cb, speed, duplex);
+		hns_mac_enable(mac_cb, MAC_COMM_MODE_RX);
+		break;
+
+	default:
+		break;
+	}
+
+	return;
 }
 
 static void hns_ae_get_ring_bdnum_limit(struct hnae_queue *queue,
@@ -902,6 +966,7 @@ static struct hnae_ae_ops hns_dsaf_ops = {
 	.get_status = hns_ae_get_link_status,
 	.get_info = hns_ae_get_mac_info,
 	.adjust_link = hns_ae_adjust_link,
+	.need_adjust_link = hns_ae_need_adjust_link,
 	.set_loopback = hns_ae_config_loopback,
 	.get_ring_bdnum_limit = hns_ae_get_ring_bdnum_limit,
 	.get_pauseparam = hns_ae_get_pauseparam,
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
index 5488c6e89f211..09e4061d1fa60 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
@@ -257,6 +257,16 @@ static void hns_gmac_get_pausefrm_cfg(void *mac_drv, u32 *rx_pause_en,
 	*tx_pause_en = dsaf_get_bit(pause_en, GMAC_PAUSE_EN_TX_FDFC_B);
 }
 
+static bool hns_gmac_need_adjust_link(void *mac_drv, enum mac_speed speed,
+				      int duplex)
+{
+	struct mac_driver *drv = (struct mac_driver *)mac_drv;
+	struct hns_mac_cb *mac_cb = drv->mac_cb;
+
+	return (mac_cb->speed != speed) ||
+		(mac_cb->half_duplex == duplex);
+}
+
 static int hns_gmac_adjust_link(void *mac_drv, enum mac_speed speed,
 				u32 full_duplex)
 {
@@ -309,6 +319,30 @@ static void hns_gmac_set_promisc(void *mac_drv, u8 en)
 		hns_gmac_set_uc_match(mac_drv, en);
 }
 
+int hns_gmac_wait_fifo_clean(void *mac_drv)
+{
+	struct mac_driver *drv = (struct mac_driver *)mac_drv;
+	int wait_cnt;
+	u32 val;
+
+	wait_cnt = 0;
+	while (wait_cnt++ < HNS_MAX_WAIT_CNT) {
+		val = dsaf_read_dev(drv, GMAC_FIFO_STATE_REG);
+		/* bit5~bit0 is not send complete pkts */
+		if ((val & 0x3f) == 0)
+			break;
+		usleep_range(100, 200);
+	}
+
+	if (wait_cnt >= HNS_MAX_WAIT_CNT) {
+		dev_err(drv->dev,
+			"hns ge %d fifo was not idle.\n", drv->mac_id);
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
 static void hns_gmac_init(void *mac_drv)
 {
 	u32 port;
@@ -690,6 +724,7 @@ void *hns_gmac_config(struct hns_mac_cb *mac_cb, struct mac_params *mac_param)
 	mac_drv->mac_disable = hns_gmac_disable;
 	mac_drv->mac_free = hns_gmac_free;
 	mac_drv->adjust_link = hns_gmac_adjust_link;
+	mac_drv->need_adjust_link = hns_gmac_need_adjust_link;
 	mac_drv->set_tx_auto_pause_frames = hns_gmac_set_tx_auto_pause_frames;
 	mac_drv->config_max_frame_length = hns_gmac_config_max_frame_length;
 	mac_drv->mac_pausefrm_cfg = hns_gmac_pause_frm_cfg;
@@ -717,6 +752,7 @@ void *hns_gmac_config(struct hns_mac_cb *mac_cb, struct mac_params *mac_param)
 	mac_drv->get_strings = hns_gmac_get_strings;
 	mac_drv->update_stats = hns_gmac_update_stats;
 	mac_drv->set_promiscuous = hns_gmac_set_promisc;
+	mac_drv->wait_fifo_clean = hns_gmac_wait_fifo_clean;
 
 	return (void *)mac_drv;
 }
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 1c2326bd76e24..6ed6f142427e4 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -114,6 +114,26 @@ int hns_mac_get_port_info(struct hns_mac_cb *mac_cb,
 	return 0;
 }
 
+/**
+ *hns_mac_is_adjust_link - check is need change mac speed and duplex register
+ *@mac_cb: mac device
+ *@speed: phy device speed
+ *@duplex:phy device duplex
+ *
+ */
+bool hns_mac_need_adjust_link(struct hns_mac_cb *mac_cb, int speed, int duplex)
+{
+	struct mac_driver *mac_ctrl_drv;
+
+	mac_ctrl_drv = (struct mac_driver *)(mac_cb->priv.mac);
+
+	if (mac_ctrl_drv->need_adjust_link)
+		return mac_ctrl_drv->need_adjust_link(mac_ctrl_drv,
+			(enum mac_speed)speed, duplex);
+	else
+		return true;
+}
+
 void hns_mac_adjust_link(struct hns_mac_cb *mac_cb, int speed, int duplex)
 {
 	int ret;
@@ -430,6 +450,16 @@ int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vmid, bool enable)
 	return 0;
 }
 
+int hns_mac_wait_fifo_clean(struct hns_mac_cb *mac_cb)
+{
+	struct mac_driver *drv = hns_mac_get_drv(mac_cb);
+
+	if (drv->wait_fifo_clean)
+		return drv->wait_fifo_clean(drv);
+
+	return 0;
+}
+
 void hns_mac_reset(struct hns_mac_cb *mac_cb)
 {
 	struct mac_driver *drv = hns_mac_get_drv(mac_cb);
@@ -998,6 +1028,20 @@ static int hns_mac_get_max_port_num(struct dsaf_device *dsaf_dev)
 		return  DSAF_MAX_PORT_NUM;
 }
 
+void hns_mac_enable(struct hns_mac_cb *mac_cb, enum mac_commom_mode mode)
+{
+	struct mac_driver *mac_ctrl_drv = hns_mac_get_drv(mac_cb);
+
+	mac_ctrl_drv->mac_enable(mac_cb->priv.mac, mode);
+}
+
+void hns_mac_disable(struct hns_mac_cb *mac_cb, enum mac_commom_mode mode)
+{
+	struct mac_driver *mac_ctrl_drv = hns_mac_get_drv(mac_cb);
+
+	mac_ctrl_drv->mac_disable(mac_cb->priv.mac, mode);
+}
+
 /**
  * hns_mac_init - init mac
  * @dsaf_dev: dsa fabric device struct pointer
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
index bbc0a98e7ca32..fbc75341bef76 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
@@ -356,6 +356,9 @@ struct mac_driver {
 	/*adjust mac mode of port,include speed and duplex*/
 	int (*adjust_link)(void *mac_drv, enum mac_speed speed,
 			   u32 full_duplex);
+	/* need adjust link */
+	bool (*need_adjust_link)(void *mac_drv, enum mac_speed speed,
+				 int duplex);
 	/* config autoegotaite mode of port*/
 	void (*set_an_mode)(void *mac_drv, u8 enable);
 	/* config loopbank mode */
@@ -394,6 +397,7 @@ struct mac_driver {
 	void (*get_info)(void *mac_drv, struct mac_info *mac_info);
 
 	void (*update_stats)(void *mac_drv);
+	int (*wait_fifo_clean)(void *mac_drv);
 
 	enum mac_mode mac_mode;
 	u8 mac_id;
@@ -427,6 +431,7 @@ void *hns_xgmac_config(struct hns_mac_cb *mac_cb,
 
 int hns_mac_init(struct dsaf_device *dsaf_dev);
 void mac_adjust_link(struct net_device *net_dev);
+bool hns_mac_need_adjust_link(struct hns_mac_cb *mac_cb, int speed, int duplex);
 void hns_mac_get_link_status(struct hns_mac_cb *mac_cb,	u32 *link_status);
 int hns_mac_change_vf_addr(struct hns_mac_cb *mac_cb, u32 vmid, char *addr);
 int hns_mac_set_multi(struct hns_mac_cb *mac_cb,
@@ -463,5 +468,8 @@ int hns_mac_add_uc_addr(struct hns_mac_cb *mac_cb, u8 vf_id,
 int hns_mac_rm_uc_addr(struct hns_mac_cb *mac_cb, u8 vf_id,
 		       const unsigned char *addr);
 int hns_mac_clr_multicast(struct hns_mac_cb *mac_cb, int vfn);
+void hns_mac_enable(struct hns_mac_cb *mac_cb, enum mac_commom_mode mode);
+void hns_mac_disable(struct hns_mac_cb *mac_cb, enum mac_commom_mode mode);
+int hns_mac_wait_fifo_clean(struct hns_mac_cb *mac_cb);
 
 #endif /* _HNS_DSAF_MAC_H */
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index ca50c2553a9cb..e557a4ef5996c 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -2727,6 +2727,35 @@ void hns_dsaf_set_promisc_tcam(struct dsaf_device *dsaf_dev,
 	soft_mac_entry->index = enable ? entry_index : DSAF_INVALID_ENTRY_IDX;
 }
 
+int hns_dsaf_wait_pkt_clean(struct dsaf_device *dsaf_dev, int port)
+{
+	u32 val, val_tmp;
+	int wait_cnt;
+
+	if (port >= DSAF_SERVICE_NW_NUM)
+		return 0;
+
+	wait_cnt = 0;
+	while (wait_cnt++ < HNS_MAX_WAIT_CNT) {
+		val = dsaf_read_dev(dsaf_dev, DSAF_VOQ_IN_PKT_NUM_0_REG +
+			(port + DSAF_XGE_NUM) * 0x40);
+		val_tmp = dsaf_read_dev(dsaf_dev, DSAF_VOQ_OUT_PKT_NUM_0_REG +
+			(port + DSAF_XGE_NUM) * 0x40);
+		if (val == val_tmp)
+			break;
+
+		usleep_range(100, 200);
+	}
+
+	if (wait_cnt >= HNS_MAX_WAIT_CNT) {
+		dev_err(dsaf_dev->dev, "hns dsaf clean wait timeout(%u - %u).\n",
+			val, val_tmp);
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
 /**
  * dsaf_probe - probo dsaf dev
  * @pdev: dasf platform device
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
index 4507e8222683c..0e1cd99831a60 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
@@ -44,6 +44,8 @@ struct hns_mac_cb;
 #define DSAF_ROCE_CREDIT_CHN	8
 #define DSAF_ROCE_CHAN_MODE	3
 
+#define HNS_MAX_WAIT_CNT 10000
+
 enum dsaf_roce_port_mode {
 	DSAF_ROCE_6PORT_MODE,
 	DSAF_ROCE_4PORT_MODE,
@@ -463,5 +465,6 @@ int hns_dsaf_rm_mac_addr(
 
 int hns_dsaf_clr_mac_mc_port(struct dsaf_device *dsaf_dev,
 			     u8 mac_id, u8 port_num);
+int hns_dsaf_wait_pkt_clean(struct dsaf_device *dsaf_dev, int port);
 
 #endif /* __HNS_DSAF_MAIN_H__ */
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
index d160d8c9e45ba..0942e4916d9d0 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.c
@@ -275,6 +275,29 @@ static void hns_ppe_exc_irq_en(struct hns_ppe_cb *ppe_cb, int en)
 	dsaf_write_dev(ppe_cb, PPE_INTEN_REG, msk_vlue & vld_msk);
 }
 
+int hns_ppe_wait_tx_fifo_clean(struct hns_ppe_cb *ppe_cb)
+{
+	int wait_cnt;
+	u32 val;
+
+	wait_cnt = 0;
+	while (wait_cnt++ < HNS_MAX_WAIT_CNT) {
+		val = dsaf_read_dev(ppe_cb, PPE_CURR_TX_FIFO0_REG) & 0x3ffU;
+		if (!val)
+			break;
+
+		usleep_range(100, 200);
+	}
+
+	if (wait_cnt >= HNS_MAX_WAIT_CNT) {
+		dev_err(ppe_cb->dev, "hns ppe tx fifo clean wait timeout, still has %u pkt.\n",
+			val);
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
 /**
  * ppe_init_hw - init ppe
  * @ppe_cb: ppe device
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h
index 9d8e643e8aa6f..f670e63a5a018 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_ppe.h
@@ -100,6 +100,7 @@ struct ppe_common_cb {
 
 };
 
+int hns_ppe_wait_tx_fifo_clean(struct hns_ppe_cb *ppe_cb);
 int hns_ppe_init(struct dsaf_device *dsaf_dev);
 
 void hns_ppe_uninit(struct dsaf_device *dsaf_dev);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
index 9d76e2e54f9df..5d64519b9b1dc 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.c
@@ -66,6 +66,29 @@ void hns_rcb_wait_fbd_clean(struct hnae_queue **qs, int q_num, u32 flag)
 			"queue(%d) wait fbd(%d) clean fail!!\n", i, fbd_num);
 }
 
+int hns_rcb_wait_tx_ring_clean(struct hnae_queue *qs)
+{
+	u32 head, tail;
+	int wait_cnt;
+
+	tail = dsaf_read_dev(&qs->tx_ring, RCB_REG_TAIL);
+	wait_cnt = 0;
+	while (wait_cnt++ < HNS_MAX_WAIT_CNT) {
+		head = dsaf_read_dev(&qs->tx_ring, RCB_REG_HEAD);
+		if (tail == head)
+			break;
+
+		usleep_range(100, 200);
+	}
+
+	if (wait_cnt >= HNS_MAX_WAIT_CNT) {
+		dev_err(qs->dev->dev, "rcb wait timeout, head not equal to tail.\n");
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
 /**
  *hns_rcb_reset_ring_hw - ring reset
  *@q: ring struct pointer
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h
index 602816498c8dd..2319b772a271e 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_rcb.h
@@ -136,6 +136,7 @@ void hns_rcbv2_int_clr_hw(struct hnae_queue *q, u32 flag);
 void hns_rcb_init_hw(struct ring_pair_cb *ring);
 void hns_rcb_reset_ring_hw(struct hnae_queue *q);
 void hns_rcb_wait_fbd_clean(struct hnae_queue **qs, int q_num, u32 flag);
+int hns_rcb_wait_tx_ring_clean(struct hnae_queue *qs);
 u32 hns_rcb_get_rx_coalesced_frames(
 	struct rcb_common_cb *rcb_common, u32 port_idx);
 u32 hns_rcb_get_tx_coalesced_frames(
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
index 886cbbf25761a..74d935d82cbc6 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
@@ -464,6 +464,7 @@
 #define RCB_RING_INTMSK_TX_OVERTIME_REG		0x000C4
 #define RCB_RING_INTSTS_TX_OVERTIME_REG		0x000C8
 
+#define GMAC_FIFO_STATE_REG			0x0000UL
 #define GMAC_DUPLEX_TYPE_REG			0x0008UL
 #define GMAC_FD_FC_TYPE_REG			0x000CUL
 #define GMAC_TX_WATER_LINE_REG			0x0010UL
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index 02a0ba20fad55..f56855e63c961 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -1112,11 +1112,26 @@ static void hns_nic_adjust_link(struct net_device *ndev)
 	struct hnae_handle *h = priv->ae_handle;
 	int state = 1;
 
+	/* If there is no phy, do not need adjust link */
 	if (ndev->phydev) {
-		h->dev->ops->adjust_link(h, ndev->phydev->speed,
-					 ndev->phydev->duplex);
-		state = ndev->phydev->link;
+		/* When phy link down, do nothing */
+		if (ndev->phydev->link == 0)
+			return;
+
+		if (h->dev->ops->need_adjust_link(h, ndev->phydev->speed,
+						  ndev->phydev->duplex)) {
+			/* because Hi161X chip don't support to change gmac
+			 * speed and duplex with traffic. Delay 200ms to
+			 * make sure there is no more data in chip FIFO.
+			 */
+			netif_carrier_off(ndev);
+			msleep(200);
+			h->dev->ops->adjust_link(h, ndev->phydev->speed,
+						 ndev->phydev->duplex);
+			netif_carrier_on(ndev);
+		}
 	}
+
 	state = state && h->dev->ops->get_status(h);
 
 	if (state != priv->link) {

From 455c4401fe7a538facaffb35b906ce19f1ece474 Mon Sep 17 00:00:00 2001
From: Peng Li <lipeng321@huawei.com>
Date: Mon, 27 Aug 2018 09:59:30 +0800
Subject: [PATCH 067/257] net: hns: add netif_carrier_off before change speed
 and duplex

If there are packets in hardware when changing the speed
or duplex, it may cause hardware hang up.

This patch adds netif_carrier_off before change speed and
duplex in ethtool_ops.set_link_ksettings, and adds
netif_carrier_on after complete the change.

Signed-off-by: Peng Li <lipeng321@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_ethtool.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index 08f3c4743f747..774beda040a16 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -243,7 +243,9 @@ static int hns_nic_set_link_ksettings(struct net_device *net_dev,
 	}
 
 	if (h->dev->ops->adjust_link) {
+		netif_carrier_off(net_dev);
 		h->dev->ops->adjust_link(h, (int)speed, cmd->base.duplex);
+		netif_carrier_on(net_dev);
 		return 0;
 	}
 

From 6e0bb04d0e4f597d8d8f4f21401a9636f2809fd1 Mon Sep 17 00:00:00 2001
From: Chris Brandt <chris.brandt@renesas.com>
Date: Mon, 27 Aug 2018 12:42:02 -0500
Subject: [PATCH 068/257] sh_eth: Add R7S9210 support

Add support for the R7S9210 which is part of the RZ/A2 series.

Signed-off-by: Chris Brandt <chris.brandt@renesas.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/sh_eth.txt        |  1 +
 drivers/net/ethernet/renesas/sh_eth.c         | 36 +++++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/sh_eth.txt b/Documentation/devicetree/bindings/net/sh_eth.txt
index 76db9f13ad96c..abc36274227c7 100644
--- a/Documentation/devicetree/bindings/net/sh_eth.txt
+++ b/Documentation/devicetree/bindings/net/sh_eth.txt
@@ -16,6 +16,7 @@ Required properties:
 	      "renesas,ether-r8a7794"  if the device is a part of R8A7794 SoC.
 	      "renesas,gether-r8a77980" if the device is a part of R8A77980 SoC.
 	      "renesas,ether-r7s72100" if the device is a part of R7S72100 SoC.
+	      "renesas,ether-r7s9210" if the device is a part of R7S9210 SoC.
 	      "renesas,rcar-gen1-ether" for a generic R-Car Gen1 device.
 	      "renesas,rcar-gen2-ether" for a generic R-Car Gen2 or RZ/G1
 	                                device.
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index ad4433d592377..f27a0dc8c5633 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -798,6 +798,41 @@ static struct sh_eth_cpu_data r8a77980_data = {
 	.magic		= 1,
 	.cexcr		= 1,
 };
+
+/* R7S9210 */
+static struct sh_eth_cpu_data r7s9210_data = {
+	.soft_reset	= sh_eth_soft_reset,
+
+	.set_duplex	= sh_eth_set_duplex,
+	.set_rate	= sh_eth_set_rate_rcar,
+
+	.register_type	= SH_ETH_REG_FAST_SH4,
+
+	.edtrr_trns	= EDTRR_TRNS_ETHER,
+	.ecsr_value	= ECSR_ICD,
+	.ecsipr_value	= ECSIPR_ICDIP,
+	.eesipr_value	= EESIPR_TWBIP | EESIPR_TABTIP | EESIPR_RABTIP |
+			  EESIPR_RFCOFIP | EESIPR_ECIIP | EESIPR_FTCIP |
+			  EESIPR_TDEIP | EESIPR_TFUFIP | EESIPR_FRIP |
+			  EESIPR_RDEIP | EESIPR_RFOFIP | EESIPR_CNDIP |
+			  EESIPR_DLCIP | EESIPR_CDIP | EESIPR_TROIP |
+			  EESIPR_RMAFIP | EESIPR_RRFIP | EESIPR_RTLFIP |
+			  EESIPR_RTSFIP | EESIPR_PREIP | EESIPR_CERFIP,
+
+	.tx_check	= EESR_FTC | EESR_CND | EESR_DLC | EESR_CD | EESR_TRO,
+	.eesr_err_check	= EESR_TWB | EESR_TABT | EESR_RABT | EESR_RFE |
+			  EESR_RDE | EESR_RFRMER | EESR_TFE | EESR_TDE,
+
+	.fdr_value	= 0x0000070f,
+
+	.apr		= 1,
+	.mpr		= 1,
+	.tpauser	= 1,
+	.hw_swap	= 1,
+	.rpadir		= 1,
+	.no_ade		= 1,
+	.xdfar_rw	= 1,
+};
 #endif /* CONFIG_OF */
 
 static void sh_eth_set_rate_sh7724(struct net_device *ndev)
@@ -3121,6 +3156,7 @@ static const struct of_device_id sh_eth_match_table[] = {
 	{ .compatible = "renesas,ether-r8a7794", .data = &rcar_gen2_data },
 	{ .compatible = "renesas,gether-r8a77980", .data = &r8a77980_data },
 	{ .compatible = "renesas,ether-r7s72100", .data = &r7s72100_data },
+	{ .compatible = "renesas,ether-r7s9210", .data = &r7s9210_data },
 	{ .compatible = "renesas,rcar-gen1-ether", .data = &rcar_gen1_data },
 	{ .compatible = "renesas,rcar-gen2-ether", .data = &rcar_gen2_data },
 	{ }

From 85eb9af182243ce9a8b72410d5321c440ac5f8d7 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Mon, 27 Aug 2018 22:56:22 +0200
Subject: [PATCH 069/257] net/sched: act_pedit: fix dump of extended layered op

in the (rare) case of failure in nla_nest_start(), missing NULL checks in
tcf_pedit_key_ex_dump() can make the following command

 # tc action add action pedit ex munge ip ttl set 64

dereference a NULL pointer:

 BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
 PGD 800000007d1cd067 P4D 800000007d1cd067 PUD 7acd3067 PMD 0
 Oops: 0002 [#1] SMP PTI
 CPU: 0 PID: 3336 Comm: tc Tainted: G            E     4.18.0.pedit+ #425
 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
 RIP: 0010:tcf_pedit_dump+0x19d/0x358 [act_pedit]
 Code: be 02 00 00 00 48 89 df 66 89 44 24 20 e8 9b b1 fd e0 85 c0 75 46 8b 83 c8 00 00 00 49 83 c5 08 48 03 83 d0 00 00 00 4d 39 f5 <66> 89 04 25 00 00 00 00 0f 84 81 01 00 00 41 8b 45 00 48 8d 4c 24
 RSP: 0018:ffffb5d4004478a8 EFLAGS: 00010246
 RAX: ffff8880fcda2070 RBX: ffff8880fadd2900 RCX: 0000000000000000
 RDX: 0000000000000002 RSI: ffffb5d4004478ca RDI: ffff8880fcda206e
 RBP: ffff8880fb9cb900 R08: 0000000000000008 R09: ffff8880fcda206e
 R10: ffff8880fadd2900 R11: 0000000000000000 R12: ffff8880fd26cf40
 R13: ffff8880fc957430 R14: ffff8880fc957430 R15: ffff8880fb9cb988
 FS:  00007f75a537a740(0000) GS:ffff8880fda00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000000000000 CR3: 000000007a2fa005 CR4: 00000000001606f0
 Call Trace:
  ? __nla_reserve+0x38/0x50
  tcf_action_dump_1+0xd2/0x130
  tcf_action_dump+0x6a/0xf0
  tca_get_fill.constprop.31+0xa3/0x120
  tcf_action_add+0xd1/0x170
  tc_ctl_action+0x137/0x150
  rtnetlink_rcv_msg+0x263/0x2d0
  ? _cond_resched+0x15/0x40
  ? rtnl_calcit.isra.30+0x110/0x110
  netlink_rcv_skb+0x4d/0x130
  netlink_unicast+0x1a3/0x250
  netlink_sendmsg+0x2ae/0x3a0
  sock_sendmsg+0x36/0x40
  ___sys_sendmsg+0x26f/0x2d0
  ? do_wp_page+0x8e/0x5f0
  ? handle_pte_fault+0x6c3/0xf50
  ? __handle_mm_fault+0x38e/0x520
  ? __sys_sendmsg+0x5e/0xa0
  __sys_sendmsg+0x5e/0xa0
  do_syscall_64+0x5b/0x180
  entry_SYSCALL_64_after_hwframe+0x44/0xa9
 RIP: 0033:0x7f75a4583ba0
 Code: c3 48 8b 05 f2 62 2c 00 f7 db 64 89 18 48 83 cb ff eb dd 0f 1f 80 00 00 00 00 83 3d fd c3 2c 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 ae cc 00 00 48 89 04 24
 RSP: 002b:00007fff60ee7418 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
 RAX: ffffffffffffffda RBX: 00007fff60ee7540 RCX: 00007f75a4583ba0
 RDX: 0000000000000000 RSI: 00007fff60ee7490 RDI: 0000000000000003
 RBP: 000000005b842d3e R08: 0000000000000002 R09: 0000000000000000
 R10: 00007fff60ee6ea0 R11: 0000000000000246 R12: 0000000000000000
 R13: 00007fff60ee7554 R14: 0000000000000001 R15: 000000000066c100
 Modules linked in: act_pedit(E) ip6table_filter ip6_tables iptable_filter binfmt_misc crct10dif_pclmul ext4 crc32_pclmul mbcache ghash_clmulni_intel jbd2 pcbc snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd snd_timer cryptd glue_helper snd joydev pcspkr soundcore virtio_balloon i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c ata_generic pata_acpi virtio_net net_failover virtio_blk virtio_console failover qxl crc32c_intel drm_kms_helper syscopyarea serio_raw sysfillrect sysimgblt fb_sys_fops ttm drm ata_piix virtio_pci libata virtio_ring i2c_core virtio floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: act_pedit]
 CR2: 0000000000000000

Like it's done for other TC actions, give up dumping pedit rules and return
an error if nla_nest_start() returns NULL.

Fixes: 71d0ed7079df ("net/act_pedit: Support using offset relative to the conventional network headers")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_pedit.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 1070340700197..ad99a99f11f6d 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -109,16 +109,18 @@ static int tcf_pedit_key_ex_dump(struct sk_buff *skb,
 {
 	struct nlattr *keys_start = nla_nest_start(skb, TCA_PEDIT_KEYS_EX);
 
+	if (!keys_start)
+		goto nla_failure;
 	for (; n > 0; n--) {
 		struct nlattr *key_start;
 
 		key_start = nla_nest_start(skb, TCA_PEDIT_KEY_EX);
+		if (!key_start)
+			goto nla_failure;
 
 		if (nla_put_u16(skb, TCA_PEDIT_KEY_EX_HTYPE, keys_ex->htype) ||
-		    nla_put_u16(skb, TCA_PEDIT_KEY_EX_CMD, keys_ex->cmd)) {
-			nlmsg_trim(skb, keys_start);
-			return -EINVAL;
-		}
+		    nla_put_u16(skb, TCA_PEDIT_KEY_EX_CMD, keys_ex->cmd))
+			goto nla_failure;
 
 		nla_nest_end(skb, key_start);
 
@@ -128,6 +130,9 @@ static int tcf_pedit_key_ex_dump(struct sk_buff *skb,
 	nla_nest_end(skb, keys_start);
 
 	return 0;
+nla_failure:
+	nla_nest_cancel(skb, keys_start);
+	return -EINVAL;
 }
 
 static int tcf_pedit_init(struct net *net, struct nlattr *nla,
@@ -418,7 +423,10 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
 	opt->bindcnt = atomic_read(&p->tcf_bindcnt) - bind;
 
 	if (p->tcfp_keys_ex) {
-		tcf_pedit_key_ex_dump(skb, p->tcfp_keys_ex, p->tcfp_nkeys);
+		if (tcf_pedit_key_ex_dump(skb,
+					  p->tcfp_keys_ex,
+					  p->tcfp_nkeys))
+			goto nla_put_failure;
 
 		if (nla_put(skb, TCA_PEDIT_PARMS_EX, s, opt))
 			goto nla_put_failure;

From afe49de44c27a89e8e9631c44b5ffadf6ace65e2 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Tue, 28 Aug 2018 13:40:51 +0200
Subject: [PATCH 070/257] ipv6: fix cleanup ordering for ip6_mr failure

Commit 15e668070a64 ("ipv6: reorder icmpv6_init() and ip6_mr_init()")
moved the cleanup label for ipmr_fail, but should have changed the
contents of the cleanup labels as well. Now we can end up cleaning up
icmpv6 even though it hasn't been initialized (jump to icmp_fail or
ipmr_fail).

Simply undo things in the reverse order of their initialization.

Example of panic (triggered by faking a failure of icmpv6_init):

    kasan: GPF could be caused by NULL-ptr deref or user memory access
    general protection fault: 0000 [#1] PREEMPT SMP KASAN PTI
    [...]
    RIP: 0010:__list_del_entry_valid+0x79/0x160
    [...]
    Call Trace:
     ? lock_release+0x8a0/0x8a0
     unregister_pernet_operations+0xd4/0x560
     ? ops_free_list+0x480/0x480
     ? down_write+0x91/0x130
     ? unregister_pernet_subsys+0x15/0x30
     ? down_read+0x1b0/0x1b0
     ? up_read+0x110/0x110
     ? kmem_cache_create_usercopy+0x1b4/0x240
     unregister_pernet_subsys+0x1d/0x30
     icmpv6_cleanup+0x1d/0x30
     inet6_init+0x1b5/0x23f

Fixes: 15e668070a64 ("ipv6: reorder icmpv6_init() and ip6_mr_init()")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 673bba31eb180..e5da133c6932f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1113,11 +1113,11 @@ static int __init inet6_init(void)
 igmp_fail:
 	ndisc_cleanup();
 ndisc_fail:
-	ip6_mr_cleanup();
+	icmpv6_cleanup();
 icmp_fail:
-	unregister_pernet_subsys(&inet6_net_ops);
+	ip6_mr_cleanup();
 ipmr_fail:
-	icmpv6_cleanup();
+	unregister_pernet_subsys(&inet6_net_ops);
 register_pernet_fail:
 	sock_unregister(PF_INET6);
 	rtnl_unregister_all(PF_INET6);

From a03dc36bdca6b614651fedfcd8559cf914d2d21d Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Tue, 28 Aug 2018 13:40:52 +0200
Subject: [PATCH 071/257] ipv6: fix cleanup ordering for pingv6 registration

Commit 6d0bfe226116 ("net: ipv6: Add IPv6 support to the ping socket.")
contains an error in the cleanup path of inet6_init(): when
proto_register(&pingv6_prot, 1) fails, we try to unregister
&pingv6_prot. When rawv6_init() fails, we skip unregistering
&pingv6_prot.

Example of panic (triggered by faking a failure of
 proto_register(&pingv6_prot, 1)):

    general protection fault: 0000 [#1] PREEMPT SMP KASAN PTI
    [...]
    RIP: 0010:__list_del_entry_valid+0x79/0x160
    [...]
    Call Trace:
     proto_unregister+0xbb/0x550
     ? trace_preempt_on+0x6f0/0x6f0
     ? sock_no_shutdown+0x10/0x10
     inet6_init+0x153/0x1b8

Fixes: 6d0bfe226116 ("net: ipv6: Add IPv6 support to the ping socket.")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/af_inet6.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e5da133c6932f..9a4261e502727 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -938,14 +938,14 @@ static int __init inet6_init(void)
 
 	err = proto_register(&pingv6_prot, 1);
 	if (err)
-		goto out_unregister_ping_proto;
+		goto out_unregister_raw_proto;
 
 	/* We MUST register RAW sockets before we create the ICMP6,
 	 * IGMP6, or NDISC control sockets.
 	 */
 	err = rawv6_init();
 	if (err)
-		goto out_unregister_raw_proto;
+		goto out_unregister_ping_proto;
 
 	/* Register the family here so that the init calls below will
 	 * be able to create sockets. (?? is this dangerous ??)

From f707ef61e17261f2bb18c3e4871c6f135ab3aba9 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Tue, 28 Aug 2018 13:40:53 +0200
Subject: [PATCH 072/257] net: rtnl: return early from rtnl_unregister_all when
 protocol isn't registered

rtnl_unregister_all(PF_INET6) gets called from inet6_init in cases when
no handler has been registered for PF_INET6 yet, for example if
ip6_mr_init() fails. Abort and avoid a NULL pointer deref in that case.

Example of panic (triggered by faking a failure of
 register_pernet_subsys):

    general protection fault: 0000 [#1] PREEMPT SMP KASAN PTI
    [...]
    RIP: 0010:rtnl_unregister_all+0x17e/0x2a0
    [...]
    Call Trace:
     ? rtnetlink_net_init+0x250/0x250
     ? sock_unregister+0x103/0x160
     ? kernel_getsockopt+0x200/0x200
     inet6_init+0x197/0x20d

Fixes: e2fddf5e96df ("[IPV6]: Make af_inet6 to check ip6_route_init return value.")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 24431e578310c..60c928894a786 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -324,6 +324,10 @@ void rtnl_unregister_all(int protocol)
 
 	rtnl_lock();
 	tab = rtnl_msg_handlers[protocol];
+	if (!tab) {
+		rtnl_unlock();
+		return;
+	}
 	RCU_INIT_POINTER(rtnl_msg_handlers[protocol], NULL);
 	for (msgindex = 0; msgindex < RTM_NR_MSGTYPES; msgindex++) {
 		link = tab[msgindex];

From c305660b325463cdf3d944492f96155dc931b448 Mon Sep 17 00:00:00 2001
From: Dinh Nguyen <dinguyen@kernel.org>
Date: Tue, 28 Aug 2018 10:19:20 -0500
Subject: [PATCH 073/257] net: stmmac: build the dwmac-socfpga platform driver
 for Stratix10

The Stratix10 SoC is an AARCH64 based platform that shares the same ethernet
controller that is on other SoCFPGA platforms. Build the platform driver.

Signed-off-by: Dinh Nguyen <dinguyen@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index bf4acebb6bcdd..324049eebb9b1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -110,7 +110,7 @@ config DWMAC_ROCKCHIP
 
 config DWMAC_SOCFPGA
 	tristate "SOCFPGA dwmac support"
-	default ARCH_SOCFPGA
+	default (ARCH_SOCFPGA || ARCH_STRATIX10)
 	depends on OF && (ARCH_SOCFPGA || ARCH_STRATIX10 || COMPILE_TEST)
 	select MFD_SYSCON
 	help

From c3c397c1f16c51601a3fac4fe0c63ad8aa85a904 Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Tue, 28 Aug 2018 12:33:15 -0700
Subject: [PATCH 074/257] net: bcmgenet: use MAC link status for fixed phy

When using the fixed PHY with GENET (e.g. MOCA) the PHY link
status can be determined from the internal link status captured
by the MAC. This allows the PHY state machine to use the correct
link state with the fixed PHY even if MAC link event interrupts
are missed when the net device is opened.

Fixes: 8d88c6ebb34c ("net: bcmgenet: enable MoCA link state change detection")
Signed-off-by: Doug Berger <opendmb@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.h |  3 +++
 drivers/net/ethernet/broadcom/genet/bcmmii.c   | 10 ++++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index b773bc07edf7c..14b49612aa863 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -186,6 +186,9 @@ struct bcmgenet_mib_counters {
 #define UMAC_MAC1			0x010
 #define UMAC_MAX_FRAME_LEN		0x014
 
+#define UMAC_MODE			0x44
+#define  MODE_LINK_STATUS		(1 << 5)
+
 #define UMAC_EEE_CTRL			0x064
 #define  EN_LPI_RX_PAUSE		(1 << 0)
 #define  EN_LPI_TX_PFC			(1 << 1)
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index 5333274a283cb..4241ae928d4ab 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -115,8 +115,14 @@ void bcmgenet_mii_setup(struct net_device *dev)
 static int bcmgenet_fixed_phy_link_update(struct net_device *dev,
 					  struct fixed_phy_status *status)
 {
-	if (dev && dev->phydev && status)
-		status->link = dev->phydev->link;
+	struct bcmgenet_priv *priv;
+	u32 reg;
+
+	if (dev && dev->phydev && status) {
+		priv = netdev_priv(dev);
+		reg = bcmgenet_umac_readl(priv, UMAC_MODE);
+		status->link = !!(reg & MODE_LINK_STATUS);
+	}
 
 	return 0;
 }

From 9174c1d6196d612799808009ec2796df021ab625 Mon Sep 17 00:00:00 2001
From: Xiaolin Zhang <xiaolin.zhang@intel.com>
Date: Tue, 7 Aug 2018 20:39:16 +0800
Subject: [PATCH 075/257] drm/i915/gvt: emulate gen9 dbuf ctl register access

there is below call track at boot time when booting guest
with kabylake vgpu with specifal configuration and this try to fix it.

[drm:gen9_dbuf_enable [i915]] *ERROR* DBuf power enable timeout
------------[ cut here ]------------
WARNING: gen9_dc_off_power_well_enable+0x224/0x230 [i915]
Unexpected DBuf power power state (0x8000000a)
Hardware name: Red Hat KVM, BIOS 1.11.0-2.el7 04/01/2014
Call Trace:
 [<ffffffff99d24408>] dump_stack+0x19/0x1b
 [<ffffffff996926d8>] __warn+0xd8/0x100
 [<ffffffff9969275f>] warn_slowpath_fmt+0x5f/0x80
 [<ffffffffc07bbae4>] gen9_dc_off_power_well_enable+0x224/0x230 [i915]
 [<ffffffffc07ba9d2>] intel_power_well_enable+0x42/0x50 [i915]
 [<ffffffffc07baa6a>] __intel_display_power_get_domain+0x8a/0xb0 [i915]
 [<ffffffffc07bdb93>] intel_display_power_get+0x33/0x50 [i915]
 [<ffffffffc07bdf95>] intel_display_set_init_power+0x45/0x50 [i915]
 [<ffffffffc07be003>] intel_power_domains_init_hw+0x63/0x8a0 [i915]
 [<ffffffffc07995c3>] i915_driver_load+0xae3/0x1760 [i915]
 [<ffffffff99bd6580>] ? nvmem_register+0x500/0x500
 [<ffffffffc07a476c>] i915_pci_probe+0x2c/0x50 [i915]
 [<ffffffff9999cfea>] local_pci_probe+0x4a/0xb0
 [<ffffffff9999e729>] pci_device_probe+0x109/0x160
 [<ffffffff99a79aa5>] driver_probe_device+0xc5/0x3e0
 [<ffffffff99a79ea3>] __driver_attach+0x93/0xa0
 [<ffffffff99a79e10>] ? __device_attach+0x50/0x50
 [<ffffffff99a77645>] bus_for_each_dev+0x75/0xc0
 [<ffffffff99a7941e>] driver_attach+0x1e/0x20
 [<ffffffff99a78ec0>] bus_add_driver+0x200/0x2d0
 [<ffffffff99a7a534>] driver_register+0x64/0xf0
 [<ffffffff9999df65>] __pci_register_driver+0xa5/0xc0
 [<ffffffffc0929000>] ? 0xffffffffc0928fff
 [<ffffffffc0929059>] i915_init+0x59/0x5c [i915]
 [<ffffffff9960210a>] do_one_initcall+0xba/0x240
 [<ffffffff9971108c>] load_module+0x272c/0x2bc0
 [<ffffffff9997b990>] ? ddebug_proc_write+0xf0/0xf0
 [<ffffffff997115e5>] SyS_init_module+0xc5/0x110
 [<ffffffff99d36795>] system_call_fastpath+0x1c/0x21

Signed-off-by: Xiaolin Zhang <xiaolin.zhang@intel.com>

Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 7a58ca5551977..450e730743a13 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1296,6 +1296,19 @@ static int power_well_ctl_mmio_write(struct intel_vgpu *vgpu,
 	return 0;
 }
 
+static int gen9_dbuf_ctl_mmio_write(struct intel_vgpu *vgpu,
+		unsigned int offset, void *p_data, unsigned int bytes)
+{
+	write_vreg(vgpu, offset, p_data, bytes);
+
+	if (vgpu_vreg(vgpu, offset) & DBUF_POWER_REQUEST)
+		vgpu_vreg(vgpu, offset) |= DBUF_POWER_STATE;
+	else
+		vgpu_vreg(vgpu, offset) &= ~DBUF_POWER_STATE;
+
+	return 0;
+}
+
 static int fpga_dbg_mmio_write(struct intel_vgpu *vgpu,
 	unsigned int offset, void *p_data, unsigned int bytes)
 {
@@ -2812,6 +2825,8 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 	MMIO_DH(HSW_PWR_WELL_CTL_DRIVER(SKL_DISP_PW_MISC_IO), D_SKL_PLUS, NULL,
 		skl_power_well_ctl_write);
 
+	MMIO_DH(DBUF_CTL, D_SKL_PLUS, NULL, gen9_dbuf_ctl_mmio_write);
+
 	MMIO_D(_MMIO(0xa210), D_SKL_PLUS);
 	MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS);
 	MMIO_D(GEN9_RENDER_PG_IDLE_HYSTERESIS, D_SKL_PLUS);
@@ -2987,8 +3002,6 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 		NULL, gen9_trtte_write);
 	MMIO_DH(_MMIO(0x4dfc), D_SKL_PLUS, NULL, gen9_trtt_chicken_write);
 
-	MMIO_D(_MMIO(0x45008), D_SKL_PLUS);
-
 	MMIO_D(_MMIO(0x46430), D_SKL_PLUS);
 
 	MMIO_D(_MMIO(0x46520), D_SKL_PLUS);

From c8ab5ac30ccc20a31672ab0f8938a6271dfe4122 Mon Sep 17 00:00:00 2001
From: Colin Xu <colin.xu@intel.com>
Date: Mon, 20 Aug 2018 16:46:34 +0800
Subject: [PATCH 076/257] drm/i915/gvt: Make correct handling to vreg
 BXT_PHY_CTL_FAMILY

Guest kernel will write to BXT_PHY_CTL_FAMILY to reset DDI PHY
and pull BXT_PHY_CTL to check PHY status. Previous handling will
set/reset BXT_PHY_CTL of all PHYs at same time on receiving vreg
write to some BXT_PHY_CTL_FAMILY. If some BXT_PHY_CTL is already
enabled, following reset to another BXT_PHY_CTL_FAMILY will clear
the enabled BXT_PHY_CTL, which result in guest kernel print:

-----------------------------------
[drm:intel_ddi_get_hw_state [i915]]
*ERROR* Port B enabled but PHY powered down? (PHY_CTL 00000000)
-----------------------------------

The correct handling should operate BXT_PHY_CTL_FAMILY and
BXT_PHY_CTL on the same DDI.

v2: Use correct reg define. The naming looks confusing, however
    current i915_reg.h bind DPIO_PHY0 to _PHY_CTL_FAMILY_DDI and
    bind DPIO_PHY1 to _PHY_CTL_FAMILY_EDP, pairing to
    _BXT_PHY_CTL_DDI_A and _BXT_PHY_CTL_DDI_B respectively.
v3: v2 incorrectly map _PHY_CTL_FAMILY_EDP to _BXT_PHY_CTL_DDI_A.
    BXT_PHY_CTL() looks up DDI using PORTx but not PHYx. Based on
    DPIO_PHY to DDI mapping, make correct vreg handle to BXT_PHY_CTL
    on receiving vreg write to BXT_PHY_CTL_FAMILY. (He, Min)

Current mapping according to bxt_power_wells:
dpio-common-a:
    >>> DPIO_PHY1
    >>> BXT_DPIO_CMN_A_POWER_DOMAINS
    >>> POWER_DOMAIN_PORT_DDI_A_LANES
    >>> PORT_A

dpio-common-bc:
    >>> DPIO_PHY0
    >>> BXT_DPIO_CMN_BC_POWER_DOMAINS
    >>> POWER_DOMAIN_PORT_DDI_B_LANES | POWER_DOMAIN_PORT_DDI_C_LANES
    >>> PORT_B or PORT_C

Signed-off-by: Colin Xu <colin.xu@intel.com>
Reviewed-by: He, Min <min.he@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 450e730743a13..d0db55a796276 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -1538,9 +1538,15 @@ static int bxt_phy_ctl_family_write(struct intel_vgpu *vgpu,
 	u32 v = *(u32 *)p_data;
 	u32 data = v & COMMON_RESET_DIS ? BXT_PHY_LANE_ENABLED : 0;
 
-	vgpu_vreg(vgpu, _BXT_PHY_CTL_DDI_A) = data;
-	vgpu_vreg(vgpu, _BXT_PHY_CTL_DDI_B) = data;
-	vgpu_vreg(vgpu, _BXT_PHY_CTL_DDI_C) = data;
+	switch (offset) {
+	case _PHY_CTL_FAMILY_EDP:
+		vgpu_vreg(vgpu, _BXT_PHY_CTL_DDI_A) = data;
+		break;
+	case _PHY_CTL_FAMILY_DDI:
+		vgpu_vreg(vgpu, _BXT_PHY_CTL_DDI_B) = data;
+		vgpu_vreg(vgpu, _BXT_PHY_CTL_DDI_C) = data;
+		break;
+	}
 
 	vgpu_vreg(vgpu, offset) = v;
 

From b9b824a55876275f8506c1c187558ab22d879f73 Mon Sep 17 00:00:00 2001
From: Colin Xu <colin.xu@intel.com>
Date: Fri, 17 Aug 2018 16:42:24 +0800
Subject: [PATCH 077/257] drm/i915/gvt: Handle GEN9_WM_CHICKEN3 with
 F_CMD_ACCESS.

Recent patch introduce strict check on scanning cmd:
Commit 8d458ea0ec33 ("drm/i915/gvt: return error on cmd access")

Before 8d458ea0ec33, if cmd_reg_handler() checks that a cmd access a mmio
that not marked as F_CMD_ACCESS, it simply returns 0 and log an error.
Now it will return -EBADRQC which will cause the workload fail to submit.

On BXT, i915 applies WaClearHIZ_WM_CHICKEN3 which will program
GEN9_WM_CHICKEN3 by LRI when init wa ctx. If it has no F_CMD_ACCESS flag,
vgpu will fail to start. Also add F_MODE_MASK since it's mode mask reg.

v2: Refresh commit message to elaborate issue symptom in detail.
v3: Make SKL_PLUS share same handling since GEN9_WM_CHICKEN3 should be
    F_CMD_ACCESS from HW aspect. (yan, zhenyu)

Signed-off-by: Colin Xu <colin.xu@intel.com>
Acked-by: Zhao Yan <yan.y.zhao@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/handlers.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index d0db55a796276..72afa518edd91 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -3044,7 +3044,9 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(_MMIO(0x44500), D_SKL_PLUS);
 	MMIO_DFH(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(GEN8_HDC_CHICKEN1, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS,
-		NULL, NULL);
+		 NULL, NULL);
+	MMIO_DFH(GEN9_WM_CHICKEN3, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS,
+		 NULL, NULL);
 
 	MMIO_D(_MMIO(0x4ab8), D_KBL);
 	MMIO_D(_MMIO(0x2248), D_KBL | D_SKL);

From b2b599fb54f90ae395ddc51f0d49e4f28244a8f8 Mon Sep 17 00:00:00 2001
From: Hang Yuan <hang.yuan@linux.intel.com>
Date: Wed, 29 Aug 2018 17:15:56 +0800
Subject: [PATCH 078/257] drm/i915/gvt: move intel_runtime_pm_get out of
 spin_lock in stop_schedule

pm_runtime_get_sync in intel_runtime_pm_get might sleep if i915
device is not active. When stop vgpu schedule, the device may be
inactive. So need to move runtime_pm_get out of spin_lock/unlock.

Fixes: b24881e0b0b6("drm/i915/gvt: Add runtime_pm_get/put into gvt_switch_mmio
Cc: <stable@vger.kernel.org>
Signed-off-by: Hang Yuan <hang.yuan@linux.intel.com>
Signed-off-by: Xiong Zhang <xiong.y.zhang@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/mmio_context.c | 2 --
 drivers/gpu/drm/i915/gvt/sched_policy.c | 3 +++
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index 42e1e6bdcc2cf..e872f4847fbe0 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -562,11 +562,9 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre,
 	 * performace for batch mmio read/write, so we need
 	 * handle forcewake mannually.
 	 */
-	intel_runtime_pm_get(dev_priv);
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
 	switch_mmio(pre, next, ring_id);
 	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-	intel_runtime_pm_put(dev_priv);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c
index 09d7bb72b4ff3..985fe81794ddc 100644
--- a/drivers/gpu/drm/i915/gvt/sched_policy.c
+++ b/drivers/gpu/drm/i915/gvt/sched_policy.c
@@ -426,6 +426,7 @@ void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu)
 		&vgpu->gvt->scheduler;
 	int ring_id;
 	struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
+	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 
 	if (!vgpu_data->active)
 		return;
@@ -444,6 +445,7 @@ void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu)
 		scheduler->current_vgpu = NULL;
 	}
 
+	intel_runtime_pm_get(dev_priv);
 	spin_lock_bh(&scheduler->mmio_context_lock);
 	for (ring_id = 0; ring_id < I915_NUM_ENGINES; ring_id++) {
 		if (scheduler->engine_owner[ring_id] == vgpu) {
@@ -452,5 +454,6 @@ void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu)
 		}
 	}
 	spin_unlock_bh(&scheduler->mmio_context_lock);
+	intel_runtime_pm_put(dev_priv);
 	mutex_unlock(&vgpu->gvt->sched_lock);
 }

From b244ffa15c8b1aabdc117c0b6008086df7b668b7 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Thu, 30 Aug 2018 10:50:36 +0800
Subject: [PATCH 079/257] drm/i915/gvt: Fix drm_format_mod value for vGPU plane

Physical plane's tiling mode value is given directly as
drm_format_mod for plane query, which is not correct fourcc
code. Fix it by using correct intel tiling fourcc mod definition.

Current qemu seems also doesn't correctly utilize drm_format_mod
for plane object setting. Anyway this is required to fix the usage.

v3: use DRM_FORMAT_MOD_LINEAR, fix comment

v2: Fix missed old 'tiled' use for stride calculation

Fixes: e546e281d33d ("drm/i915/gvt: Dmabuf support for GVT-g")
Cc: Tina Zhang <tina.zhang@intel.com>
Cc: Gerd Hoffmann <kraxel@redhat.com>
Cc: Colin Xu <Colin.Xu@intel.com>
Reviewed-by: Colin Xu <Colin.Xu@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/dmabuf.c     | 33 +++++++++++++++++++++------
 drivers/gpu/drm/i915/gvt/fb_decoder.c |  5 ++--
 drivers/gpu/drm/i915/gvt/fb_decoder.h |  2 +-
 3 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c
index 6e3f56684f4ec..51ed99a378033 100644
--- a/drivers/gpu/drm/i915/gvt/dmabuf.c
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.c
@@ -170,20 +170,22 @@ static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev,
 		unsigned int tiling_mode = 0;
 		unsigned int stride = 0;
 
-		switch (info->drm_format_mod << 10) {
-		case PLANE_CTL_TILED_LINEAR:
+		switch (info->drm_format_mod) {
+		case DRM_FORMAT_MOD_LINEAR:
 			tiling_mode = I915_TILING_NONE;
 			break;
-		case PLANE_CTL_TILED_X:
+		case I915_FORMAT_MOD_X_TILED:
 			tiling_mode = I915_TILING_X;
 			stride = info->stride;
 			break;
-		case PLANE_CTL_TILED_Y:
+		case I915_FORMAT_MOD_Y_TILED:
+		case I915_FORMAT_MOD_Yf_TILED:
 			tiling_mode = I915_TILING_Y;
 			stride = info->stride;
 			break;
 		default:
-			gvt_dbg_core("not supported tiling mode\n");
+			gvt_dbg_core("invalid drm_format_mod %llx for tiling\n",
+				     info->drm_format_mod);
 		}
 		obj->tiling_and_stride = tiling_mode | stride;
 	} else {
@@ -222,9 +224,26 @@ static int vgpu_get_plane_info(struct drm_device *dev,
 		info->height = p.height;
 		info->stride = p.stride;
 		info->drm_format = p.drm_format;
-		info->drm_format_mod = p.tiled;
+
+		switch (p.tiled) {
+		case PLANE_CTL_TILED_LINEAR:
+			info->drm_format_mod = DRM_FORMAT_MOD_LINEAR;
+			break;
+		case PLANE_CTL_TILED_X:
+			info->drm_format_mod = I915_FORMAT_MOD_X_TILED;
+			break;
+		case PLANE_CTL_TILED_Y:
+			info->drm_format_mod = I915_FORMAT_MOD_Y_TILED;
+			break;
+		case PLANE_CTL_TILED_YF:
+			info->drm_format_mod = I915_FORMAT_MOD_Yf_TILED;
+			break;
+		default:
+			gvt_vgpu_err("invalid tiling mode: %x\n", p.tiled);
+		}
+
 		info->size = (((p.stride * p.height * p.bpp) / 8) +
-				(PAGE_SIZE - 1)) >> PAGE_SHIFT;
+			      (PAGE_SIZE - 1)) >> PAGE_SHIFT;
 	} else if (plane_id == DRM_PLANE_TYPE_CURSOR) {
 		ret = intel_vgpu_decode_cursor_plane(vgpu, &c);
 		if (ret)
diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.c b/drivers/gpu/drm/i915/gvt/fb_decoder.c
index face664be3e8e..481896fb712ab 100644
--- a/drivers/gpu/drm/i915/gvt/fb_decoder.c
+++ b/drivers/gpu/drm/i915/gvt/fb_decoder.c
@@ -220,8 +220,7 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu,
 	if (IS_SKYLAKE(dev_priv)
 		|| IS_KABYLAKE(dev_priv)
 		|| IS_BROXTON(dev_priv)) {
-		plane->tiled = (val & PLANE_CTL_TILED_MASK) >>
-		_PLANE_CTL_TILED_SHIFT;
+		plane->tiled = val & PLANE_CTL_TILED_MASK;
 		fmt = skl_format_to_drm(
 			val & PLANE_CTL_FORMAT_MASK,
 			val & PLANE_CTL_ORDER_RGBX,
@@ -260,7 +259,7 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu,
 		return  -EINVAL;
 	}
 
-	plane->stride = intel_vgpu_get_stride(vgpu, pipe, (plane->tiled << 10),
+	plane->stride = intel_vgpu_get_stride(vgpu, pipe, plane->tiled,
 		(IS_SKYLAKE(dev_priv)
 		|| IS_KABYLAKE(dev_priv)
 		|| IS_BROXTON(dev_priv)) ?
diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.h b/drivers/gpu/drm/i915/gvt/fb_decoder.h
index cb055f3c81a29..60c155085029c 100644
--- a/drivers/gpu/drm/i915/gvt/fb_decoder.h
+++ b/drivers/gpu/drm/i915/gvt/fb_decoder.h
@@ -101,7 +101,7 @@ struct intel_gvt;
 /* color space conversion and gamma correction are not included */
 struct intel_vgpu_primary_plane_format {
 	u8	enabled;	/* plane is enabled */
-	u8	tiled;		/* X-tiled */
+	u32	tiled;		/* tiling mode: linear, X-tiled, Y tiled, etc */
 	u8	bpp;		/* bits per pixel */
 	u32	hw_format;	/* format field in the PRI_CTL register */
 	u32	drm_format;	/* format in DRM definition */

From c4053ef322081554765e1b708d6cdd8855e1d72d Mon Sep 17 00:00:00 2001
From: Baruch Siach <baruch@tkos.co.il>
Date: Wed, 29 Aug 2018 09:44:39 +0300
Subject: [PATCH 080/257] net: mvpp2: initialize port of_node pointer

Without a valid of_node in struct device we can't find the mvpp2 port
device by its DT node. Specifically, this breaks
of_find_net_device_by_node().

For example, the Armada 8040 based Clearfog GT-8K uses Marvell 88E6141
switch connected to the &cp1_eth2 port:

&cp1_mdio {
	...

	switch0: switch0@4 {
		compatible = "marvell,mv88e6085";
		...

		ports {
			...

			port@5 {
				reg = <5>;
				label = "cpu";
				ethernet = <&cp1_eth2>;
			};
		};
	};
};

Without this patch, dsa_register_switch() returns -EPROBE_DEFER because
of_find_net_device_by_node() can't find the device_node of the &cp1_eth2
device.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
index 32d785b616e1e..28500417843ed 100644
--- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
+++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
@@ -4803,6 +4803,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 	dev->min_mtu = ETH_MIN_MTU;
 	/* 9704 == 9728 - 20 and rounding to 8 */
 	dev->max_mtu = MVPP2_BM_JUMBO_PKT_SIZE;
+	dev->dev.of_node = port_node;
 
 	/* Phylink isn't used w/ ACPI as of now */
 	if (port_node) {

From 97763dc0f4010bc20e2969a6bf9a40a2551c4f79 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 29 Aug 2018 10:22:33 +0200
Subject: [PATCH 081/257] net_sched: reject unknown tcfa_action values

After the commit 802bfb19152c ("net/sched: user-space can't set
unknown tcfa_action values"), unknown tcfa_action values are
converted to TC_ACT_UNSPEC, but the common agreement is instead
rejecting such configurations.

This change also introduces a helper to simplify the destruction
of a single action, avoiding code duplication.

v1 -> v2:
 - helper is now static and renamed according to act_* convention
 - updated extack message, according to the new behavior

Fixes: 802bfb19152c ("net/sched: user-space can't set unknown tcfa_action values")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index db83dac1e7f48..316c98bb87e4e 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -662,6 +662,13 @@ int tcf_action_destroy(struct tc_action *actions[], int bind)
 	return ret;
 }
 
+static int tcf_action_destroy_1(struct tc_action *a, int bind)
+{
+	struct tc_action *actions[] = { a, NULL };
+
+	return tcf_action_destroy(actions, bind);
+}
+
 static int tcf_action_put(struct tc_action *p)
 {
 	return __tcf_action_put(p, false);
@@ -881,17 +888,16 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 	if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN)) {
 		err = tcf_action_goto_chain_init(a, tp);
 		if (err) {
-			struct tc_action *actions[] = { a, NULL };
-
-			tcf_action_destroy(actions, bind);
+			tcf_action_destroy_1(a, bind);
 			NL_SET_ERR_MSG(extack, "Failed to init TC action chain");
 			return ERR_PTR(err);
 		}
 	}
 
 	if (!tcf_action_valid(a->tcfa_action)) {
-		NL_SET_ERR_MSG(extack, "invalid action value, using TC_ACT_UNSPEC instead");
-		a->tcfa_action = TC_ACT_UNSPEC;
+		tcf_action_destroy_1(a, bind);
+		NL_SET_ERR_MSG(extack, "Invalid control action value");
+		return ERR_PTR(-EINVAL);
 	}
 
 	return a;

From 25a8238f4cc8425d4aade4f9041be468d0e8aa2e Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Wed, 29 Aug 2018 10:22:34 +0200
Subject: [PATCH 082/257] tc-testing: add test-cases for numeric and invalid
 control action

Only the police action allows us to specify an arbitrary numeric value
for the control action. This change introduces an explicit test case
for the above feature and then leverage it for testing the kernel behavior
for invalid control actions (reject).

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/tc-tests/actions/police.json   | 48 +++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
index f03763d816172..30f9b54bd6668 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
@@ -312,6 +312,54 @@
             "$TC actions flush action police"
         ]
     },
+    {
+        "id": "6aaf",
+        "name": "Add police actions with conform-exceed control pass/pipe [with numeric values]",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed 0/3 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action police index 1",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 3Mbit burst 250Kb mtu 2Kb action pass/pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
+    {
+        "id": "29b1",
+        "name": "Add police actions with conform-exceed control <invalid>/drop",
+        "category": [
+            "actions",
+            "police"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action police",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action police rate 3mbit burst 250k conform-exceed 10/drop index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action police",
+        "matchPattern": "action order [0-9]*:  police 0x1 rate 3Mbit burst 250Kb mtu 2Kb action ",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action police"
+        ]
+    },
     {
         "id": "c26f",
         "name": "Add police action with invalid peakrate value",

From 4f0223bfe9c3e62d8f45a85f1ef1b18a8a263ef9 Mon Sep 17 00:00:00 2001
From: Arunk Khandavalli <akhandav@codeaurora.org>
Date: Thu, 30 Aug 2018 00:40:16 +0300
Subject: [PATCH 083/257] cfg80211: nl80211_update_ft_ies() to validate
 NL80211_ATTR_IE

nl80211_update_ft_ies() tried to validate NL80211_ATTR_IE with
is_valid_ie_attr() before dereferencing it, but that helper function
returns true in case of NULL pointer (i.e., attribute not included).
This can result to dereferencing a NULL pointer. Fix that by explicitly
checking that NL80211_ATTR_IE is included.

Fixes: 355199e02b83 ("cfg80211: Extend support for IEEE 802.11r Fast BSS Transition")
Signed-off-by: Arunk Khandavalli <akhandav@codeaurora.org>
Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/nl80211.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index ce0149a86c139..733ccf8679728 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -12099,6 +12099,7 @@ static int nl80211_update_ft_ies(struct sk_buff *skb, struct genl_info *info)
 		return -EOPNOTSUPP;
 
 	if (!info->attrs[NL80211_ATTR_MDID] ||
+	    !info->attrs[NL80211_ATTR_IE] ||
 	    !is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
 		return -EINVAL;
 

From 1eb507903665442360a959136dfa3234c43db085 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Wed, 29 Aug 2018 21:03:25 +0200
Subject: [PATCH 084/257] mac80211: do not convert to A-MSDU if frag/subframe
 limited

Do not start to aggregate packets in a A-MSDU frame (converting the
first subframe to A-MSDU, adding the header) if max_tx_fragments or
max_amsdu_subframes limits are already exceeded by it. In particular,
this happens when drivers set the limit to 1 to avoid A-MSDUs at all.

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
[reword commit message to be more precise]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 667a73d6eb5cf..1aac5e3c7eeea 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3208,9 +3208,6 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 	if (skb->len + head->len > max_amsdu_len)
 		goto out;
 
-	if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head))
-		goto out;
-
 	nfrags = 1 + skb_shinfo(skb)->nr_frags;
 	nfrags += 1 + skb_shinfo(head)->nr_frags;
 	frag_tail = &skb_shinfo(head)->frag_list;
@@ -3226,6 +3223,9 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 	if (max_frags && nfrags > max_frags)
 		goto out;
 
+	if (!ieee80211_amsdu_prepare_head(sdata, fast_tx, head))
+		goto out;
+
 	/*
 	 * Pad out the previous subframe to a multiple of 4 by adding the
 	 * padding to the next one, that's being added. Note that head->len

From aa58acf325b4aadeecae2bfc90658273b47dbace Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 30 Aug 2018 10:55:49 +0200
Subject: [PATCH 085/257] mac80211: always account for A-MSDU header changes

In the error path of changing the SKB headroom of the second
A-MSDU subframe, we would not account for the already-changed
length of the first frame that just got converted to be in
A-MSDU format and thus is a bit longer now.

Fix this by doing the necessary accounting.

It would be possible to reorder the operations, but that would
make the code more complex (to calculate the necessary pad),
and the headroom expansion should not fail frequently enough
to make that worthwhile.

Fixes: 6e0456b54545 ("mac80211: add A-MSDU tx support")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Acked-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 1aac5e3c7eeea..6ca0865de9451 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3239,7 +3239,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 
 	if (!ieee80211_amsdu_realloc_pad(local, skb, sizeof(rfc1042_header) +
 						     2 + pad))
-		goto out;
+		goto out_recalc;
 
 	ret = true;
 	data = skb_push(skb, ETH_ALEN + 2);
@@ -3256,11 +3256,13 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 	head->data_len += skb->len;
 	*frag_tail = skb;
 
-	flow->backlog += head->len - orig_len;
-	tin->backlog_bytes += head->len - orig_len;
-
-	fq_recalc_backlog(fq, tin, flow);
+out_recalc:
+	if (head->len != orig_len) {
+		flow->backlog += head->len - orig_len;
+		tin->backlog_bytes += head->len - orig_len;
 
+		fq_recalc_backlog(fq, tin, flow);
+	}
 out:
 	spin_unlock_bh(&fq->lock);
 

From 16037643969e095509cd8446a3f8e406a6dc3a2c Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 30 Aug 2018 15:13:16 +0200
Subject: [PATCH 086/257] ALSA: hda - Fix cancel_work_sync() stall from
 jackpoll work

On AMD/ATI controllers, the HD-audio controller driver allows a bus
reset upon the error recovery, and its procedure includes the
cancellation of pending jack polling work as found in
snd_hda_bus_codec_reset().  This works usually fine, but it becomes a
problem when the reset happens from the jack poll work itself; then
calling cancel_work_sync() from the work being processed tries to wait
the finish endlessly.

As a workaround, this patch adds the check of current_work() and
applies the cancel_work_sync() only when it's not from the
jackpoll_work.

This doesn't fix the root cause of the reported error below, but at
least, it eases the unexpected stall of the whole system.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=200937
Cc: <stable@vger.kernel.org>
Cc: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/pci/hda/hda_codec.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 0a50855370348..26d348b47867d 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -3935,7 +3935,8 @@ void snd_hda_bus_reset_codecs(struct hda_bus *bus)
 
 	list_for_each_codec(codec, bus) {
 		/* FIXME: maybe a better way needed for forced reset */
-		cancel_delayed_work_sync(&codec->jackpoll_work);
+		if (current_work() != &codec->jackpoll_work.work)
+			cancel_delayed_work_sync(&codec->jackpoll_work);
 #ifdef CONFIG_PM
 		if (hda_codec_is_power_on(codec)) {
 			hda_call_codec_suspend(codec);

From b871da4a7778eda2e700ae8bf5f86e74a004c1ec Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Thu, 23 Aug 2018 18:24:24 +0200
Subject: [PATCH 087/257] KVM: nVMX: avoid redundant double assignment of
 nested_run_pending
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

nested_run_pending is set 20 lines above and check_vmentry_prereqs()/
check_vmentry_postreqs() don't seem to be resetting it (the later, however,
checks it).

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Eduardo Valentin <eduval@amazon.com>
Reviewed-by: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1d26f3c4985ba..92f027745842c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -13988,9 +13988,6 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
 	    check_vmentry_postreqs(vcpu, vmcs12, &exit_qual))
 		return -EINVAL;
 
-	if (kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING)
-		vmx->nested.nested_run_pending = 1;
-
 	vmx->nested.dirty_vmcs12 = true;
 	ret = enter_vmx_non_root_mode(vcpu, NULL);
 	if (ret)

From 0186ec823280f5db55ab2e6291933bebe2e92772 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 28 Aug 2018 16:22:28 +0100
Subject: [PATCH 088/257] KVM: SVM: remove unused variable dst_vaddr_end
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Variable dst_vaddr_end is being assigned but is never used hence it is
redundant and can be removed.

Cleans up clang warning:
variable 'dst_vaddr_end' set but not used [-Wunused-but-set-variable]

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/svm.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 6276140044d08..4339fc4715fa9 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -6747,7 +6747,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
 static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
 {
 	unsigned long vaddr, vaddr_end, next_vaddr;
-	unsigned long dst_vaddr, dst_vaddr_end;
+	unsigned long dst_vaddr;
 	struct page **src_p, **dst_p;
 	struct kvm_sev_dbg debug;
 	unsigned long n;
@@ -6763,7 +6763,6 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
 	size = debug.len;
 	vaddr_end = vaddr + size;
 	dst_vaddr = debug.dst_uaddr;
-	dst_vaddr_end = dst_vaddr + size;
 
 	for (; vaddr < vaddr_end; vaddr = next_vaddr) {
 		int len, s_off, d_off;

From c4409905cd6eb42cfd06126e9226b0150e05a715 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Thu, 23 Aug 2018 13:56:46 -0700
Subject: [PATCH 089/257] KVM: VMX: Do not allow reexecute_instruction() when
 skipping MMIO instr
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Re-execution after an emulation decode failure is only intended to
handle a case where two or vCPUs race to write a shadowed page, i.e.
we should never re-execute an instruction as part of MMIO emulation.
As handle_ept_misconfig() is only used for MMIO emulation, it should
pass EMULTYPE_NO_REEXECUTE when using the emulator to skip an instr
in the fast-MMIO case where VM_EXIT_INSTRUCTION_LEN is invalid.

And because the cr2 value passed to x86_emulate_instruction() is only
destined for use when retrying or reexecuting, we can simply call
emulate_instruction().

Fixes: d391f1207067 ("x86/kvm/vmx: do not use vm-exit instruction length
                      for fast MMIO when running nested")
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 92f027745842c..b345ad91809c0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7704,8 +7704,8 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 		if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
 			return kvm_skip_emulated_instruction(vcpu);
 		else
-			return x86_emulate_instruction(vcpu, gpa, EMULTYPE_SKIP,
-						       NULL, 0) == EMULATE_DONE;
+			return emulate_instruction(vcpu, EMULTYPE_SKIP) ==
+								EMULATE_DONE;
 	}
 
 	return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);

From 35be0aded76b54a24dc8aa678a71bca22273e8d8 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Thu, 23 Aug 2018 13:56:47 -0700
Subject: [PATCH 090/257] KVM: x86: SVM: Set EMULTYPE_NO_REEXECUTE for RSM
 emulation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Re-execution after an emulation decode failure is only intended to
handle a case where two or vCPUs race to write a shadowed page, i.e.
we should never re-execute an instruction as part of RSM emulation.

Add a new helper, kvm_emulate_instruction_from_buffer(), to support
emulating from a pre-defined buffer.  This eliminates the last direct
call to x86_emulate_instruction() outside of kvm_mmu_page_fault(),
which means x86_emulate_instruction() can be unexported in a future
patch.

Fixes: 7607b7174405 ("KVM: SVM: install RSM intercept")
Cc: Brijesh Singh <brijesh.singh@amd.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 7 +++++++
 arch/x86/kvm/svm.c              | 4 ++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 00ddb0c9e612a..3b220b86c8e48 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1251,6 +1251,13 @@ static inline int emulate_instruction(struct kvm_vcpu *vcpu,
 			emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0);
 }
 
+static inline int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
+						      void *insn, int insn_len)
+{
+	return x86_emulate_instruction(vcpu, 0, EMULTYPE_NO_REEXECUTE,
+				       insn, insn_len);
+}
+
 void kvm_enable_efer_bits(u64);
 bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
 int kvm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 4339fc4715fa9..b3cdfde8ff5e4 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3874,8 +3874,8 @@ static int emulate_on_interception(struct vcpu_svm *svm)
 
 static int rsm_interception(struct vcpu_svm *svm)
 {
-	return x86_emulate_instruction(&svm->vcpu, 0, 0,
-				       rsm_ins_bytes, 2) == EMULATE_DONE;
+	return kvm_emulate_instruction_from_buffer(&svm->vcpu,
+					rsm_ins_bytes, 2) == EMULATE_DONE;
 }
 
 static int rdpmc_interception(struct vcpu_svm *svm)

From 8065dbd1ee0ef04321d80da7999b4f0086e0a407 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Thu, 23 Aug 2018 13:56:48 -0700
Subject: [PATCH 091/257] KVM: x86: Invert emulation re-execute behavior to
 make it opt-in
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Re-execution of an instruction after emulation decode failure is
intended to be used only when emulating shadow page accesses.  Invert
the flag to make allowing re-execution opt-in since that behavior is
by far in the minority.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 8 +++-----
 arch/x86/kvm/mmu.c              | 2 +-
 arch/x86/kvm/x86.c              | 2 +-
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3b220b86c8e48..a69ea11f3bab6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1238,7 +1238,7 @@ enum emulation_result {
 #define EMULTYPE_TRAP_UD	    (1 << 1)
 #define EMULTYPE_SKIP		    (1 << 2)
 #define EMULTYPE_RETRY		    (1 << 3)
-#define EMULTYPE_NO_REEXECUTE	    (1 << 4)
+#define EMULTYPE_ALLOW_REEXECUTE    (1 << 4)
 #define EMULTYPE_NO_UD_ON_FAIL	    (1 << 5)
 #define EMULTYPE_VMWARE		    (1 << 6)
 int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
@@ -1247,15 +1247,13 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
 static inline int emulate_instruction(struct kvm_vcpu *vcpu,
 			int emulation_type)
 {
-	return x86_emulate_instruction(vcpu, 0,
-			emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0);
+	return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
 }
 
 static inline int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
 						      void *insn, int insn_len)
 {
-	return x86_emulate_instruction(vcpu, 0, EMULTYPE_NO_REEXECUTE,
-				       insn, insn_len);
+	return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len);
 }
 
 void kvm_enable_efer_bits(u64);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a282321329b51..4508c34eef200 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -5217,7 +5217,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
 		       void *insn, int insn_len)
 {
-	int r, emulation_type = EMULTYPE_RETRY;
+	int r, emulation_type = EMULTYPE_RETRY | EMULTYPE_ALLOW_REEXECUTE;
 	enum emulation_result er;
 	bool direct = vcpu->arch.mmu.direct_map;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 506bd2b4b8bb7..d6f85ea231019 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5870,7 +5870,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
 	gpa_t gpa = cr2;
 	kvm_pfn_t pfn;
 
-	if (emulation_type & EMULTYPE_NO_REEXECUTE)
+	if (!(emulation_type & EMULTYPE_ALLOW_REEXECUTE))
 		return false;
 
 	if (!vcpu->arch.mmu.direct_map) {

From 384bf2218e96f57118270945b1841e4dbbe9e352 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Thu, 23 Aug 2018 13:56:49 -0700
Subject: [PATCH 092/257] KVM: x86: Merge EMULTYPE_RETRY and
 EMULTYPE_ALLOW_REEXECUTE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

retry_instruction() and reexecute_instruction() are a package deal,
i.e. there is no scenario where one is allowed and the other is not.
Merge their controlling emulation type flags to enforce this in code.
Name the combined flag EMULTYPE_ALLOW_RETRY to make it abundantly
clear that we are allowing re{try,execute} to occur, as opposed to
explicitly requesting retry of a previously failed instruction.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 7 +++----
 arch/x86/kvm/mmu.c              | 2 +-
 arch/x86/kvm/x86.c              | 4 ++--
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a69ea11f3bab6..35e03b13edcb5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1237,10 +1237,9 @@ enum emulation_result {
 #define EMULTYPE_NO_DECODE	    (1 << 0)
 #define EMULTYPE_TRAP_UD	    (1 << 1)
 #define EMULTYPE_SKIP		    (1 << 2)
-#define EMULTYPE_RETRY		    (1 << 3)
-#define EMULTYPE_ALLOW_REEXECUTE    (1 << 4)
-#define EMULTYPE_NO_UD_ON_FAIL	    (1 << 5)
-#define EMULTYPE_VMWARE		    (1 << 6)
+#define EMULTYPE_ALLOW_RETRY	    (1 << 3)
+#define EMULTYPE_NO_UD_ON_FAIL	    (1 << 4)
+#define EMULTYPE_VMWARE		    (1 << 5)
 int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
 			    int emulation_type, void *insn, int insn_len);
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 4508c34eef200..0246a1ea7f55e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -5217,7 +5217,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
 		       void *insn, int insn_len)
 {
-	int r, emulation_type = EMULTYPE_RETRY | EMULTYPE_ALLOW_REEXECUTE;
+	int r, emulation_type = EMULTYPE_ALLOW_RETRY;
 	enum emulation_result er;
 	bool direct = vcpu->arch.mmu.direct_map;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d6f85ea231019..924ce28723c43 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5870,7 +5870,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
 	gpa_t gpa = cr2;
 	kvm_pfn_t pfn;
 
-	if (!(emulation_type & EMULTYPE_ALLOW_REEXECUTE))
+	if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
 		return false;
 
 	if (!vcpu->arch.mmu.direct_map) {
@@ -5958,7 +5958,7 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
 	 */
 	vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
 
-	if (!(emulation_type & EMULTYPE_RETRY))
+	if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
 		return false;
 
 	if (x86_page_table_writing_insn(ctxt))

From 472faffacd9032164f611f56329d0025ddca55b5 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Thu, 23 Aug 2018 13:56:50 -0700
Subject: [PATCH 093/257] KVM: x86: Default to not allowing emulation retry in
 kvm_mmu_page_fault
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Effectively force kvm_mmu_page_fault() to opt-in to allowing retry to
make it more obvious when and why it allows emulation to be retried.
Previously this approach was less convenient due to retry and
re-execute behavior being controlled by separate flags that were also
inverted in their implementations (opt-in versus opt-out).

Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: stable@vger.kernel.org
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/mmu.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 0246a1ea7f55e..01fb8701ccd02 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -5217,7 +5217,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
 		       void *insn, int insn_len)
 {
-	int r, emulation_type = EMULTYPE_ALLOW_RETRY;
+	int r, emulation_type = 0;
 	enum emulation_result er;
 	bool direct = vcpu->arch.mmu.direct_map;
 
@@ -5230,10 +5230,8 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
 	r = RET_PF_INVALID;
 	if (unlikely(error_code & PFERR_RSVD_MASK)) {
 		r = handle_mmio_page_fault(vcpu, cr2, direct);
-		if (r == RET_PF_EMULATE) {
-			emulation_type = 0;
+		if (r == RET_PF_EMULATE)
 			goto emulate;
-		}
 	}
 
 	if (r == RET_PF_INVALID) {
@@ -5260,8 +5258,16 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
 		return 1;
 	}
 
-	if (mmio_info_in_cache(vcpu, cr2, direct))
-		emulation_type = 0;
+	/*
+	 * vcpu->arch.mmu.page_fault returned RET_PF_EMULATE, but we can still
+	 * optimistically try to just unprotect the page and let the processor
+	 * re-execute the instruction that caused the page fault.  Do not allow
+	 * retrying MMIO emulation, as it's not only pointless but could also
+	 * cause us to enter an infinite loop because the processor will keep
+	 * faulting on the non-existent MMIO address.
+	 */
+	if (!mmio_info_in_cache(vcpu, cr2, direct))
+		emulation_type = EMULTYPE_ALLOW_RETRY;
 emulate:
 	/*
 	 * On AMD platforms, under certain conditions insn_len may be zero on #NPF.

From 6c3dfeb6a48b1562bd5b8ec5f3317ef34d0134ef Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Thu, 23 Aug 2018 13:56:51 -0700
Subject: [PATCH 094/257] KVM: x86: Do not re-{try,execute} after failed
 emulation in L2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit a6f177efaa58 ("KVM: Reenter guest after emulation failure if
due to access to non-mmio address") added reexecute_instruction() to
handle the scenario where two (or more) vCPUS race to write a shadowed
page, i.e. reexecute_instruction() is intended to return true if and
only if the instruction being emulated was accessing a shadowed page.
As L0 is only explicitly shadowing L1 tables, an emulation failure of
a nested VM instruction cannot be due to a race to write a shadowed
page and so should never be re-executed.

This fixes an issue where an "MMIO" emulation failure[1] in L2 is all
but guaranteed to result in an infinite loop when TDP is enabled.
Because "cr2" is actually an L2 GPA when TDP is enabled, calling
kvm_mmu_gva_to_gpa_write() to translate cr2 in the non-direct mapped
case (L2 is never direct mapped) will almost always yield UNMAPPED_GVA
and cause reexecute_instruction() to immediately return true.  The
!mmio_info_in_cache() check in kvm_mmu_page_fault() doesn't catch this
case because mmio_info_in_cache() returns false for a nested MMU (the
MMIO caching currently handles L1 only, e.g. to cache nested guests'
GPAs we'd have to manually flush the cache when switching between
VMs and when L1 updated its page tables controlling the nested guest).

Way back when, commit 68be0803456b ("KVM: x86: never re-execute
instruction with enabled tdp") changed reexecute_instruction() to
always return false when using TDP under the assumption that KVM would
only get into the emulator for MMIO.  Commit 95b3cf69bdf8 ("KVM: x86:
let reexecute_instruction work for tdp") effectively reverted that
behavior in order to handle the scenario where emulation failed due to
an access from L1 to the shadow page tables for L2, but it didn't
account for the case where emulation failed in L2 with TDP enabled.

All of the above logic also applies to retry_instruction(), added by
commit 1cb3f3ae5a38 ("KVM: x86: retry non-page-table writing
instructions").  An indefinite loop in retry_instruction() should be
impossible as it protects against retrying the same instruction over
and over, but it's still correct to not retry an L2 instruction in
the first place.

Fix the immediate issue by adding a check for a nested guest when
determining whether or not to allow retry in kvm_mmu_page_fault().
In addition to fixing the immediate bug, add WARN_ON_ONCE in the
retry functions since they are not designed to handle nested cases,
i.e. they need to be modified even if there is some scenario in the
future where we want to allow retrying a nested guest.

[1] This issue was encountered after commit 3a2936dedd20 ("kvm: mmu:
    Don't expose private memslots to L2") changed the page fault path
    to return KVM_PFN_NOSLOT when translating an L2 access to a
    prive memslot.  Returning KVM_PFN_NOSLOT is semantically correct
    when we want to hide a memslot from L2, i.e. there effectively is
    no defined memory region for L2, but it has the unfortunate side
    effect of making KVM think the GFN is a MMIO page, thus triggering
    emulation.  The failure occurred with in-development code that
    deliberately exposed a private memslot to L2, which L2 accessed
    with an instruction that is not emulated by KVM.

Fixes: 95b3cf69bdf8 ("KVM: x86: let reexecute_instruction work for tdp")
Fixes: 1cb3f3ae5a38 ("KVM: x86: retry non-page-table writing instructions")
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Jim Mattson <jmattson@google.com>
Cc: Krish Sadhukhan <krish.sadhukhan@oracle.com>
Cc: Xiao Guangrong <xiaoguangrong@tencent.com>
Cc: stable@vger.kernel.org
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/mmu.c | 7 +++++--
 arch/x86/kvm/x86.c | 6 ++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 01fb8701ccd02..f7e83b1e0eb27 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -5264,9 +5264,12 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
 	 * re-execute the instruction that caused the page fault.  Do not allow
 	 * retrying MMIO emulation, as it's not only pointless but could also
 	 * cause us to enter an infinite loop because the processor will keep
-	 * faulting on the non-existent MMIO address.
+	 * faulting on the non-existent MMIO address.  Retrying an instruction
+	 * from a nested guest is also pointless and dangerous as we are only
+	 * explicitly shadowing L1's page tables, i.e. unprotecting something
+	 * for L1 isn't going to magically fix whatever issue cause L2 to fail.
 	 */
-	if (!mmio_info_in_cache(vcpu, cr2, direct))
+	if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu))
 		emulation_type = EMULTYPE_ALLOW_RETRY;
 emulate:
 	/*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 924ce28723c43..cbe2921e972b6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5873,6 +5873,9 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
 	if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
 		return false;
 
+	if (WARN_ON_ONCE(is_guest_mode(vcpu)))
+		return false;
+
 	if (!vcpu->arch.mmu.direct_map) {
 		/*
 		 * Write permission should be allowed since only
@@ -5961,6 +5964,9 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
 	if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
 		return false;
 
+	if (WARN_ON_ONCE(is_guest_mode(vcpu)))
+		return false;
+
 	if (x86_page_table_writing_insn(ctxt))
 		return false;
 

From 0ce97a2b627c5e26347aee298f571ddf925e5fe4 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Thu, 23 Aug 2018 13:56:52 -0700
Subject: [PATCH 095/257] KVM: x86: Rename emulate_instruction() to
 kvm_emulate_instruction()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Lack of the kvm_ prefix gives the impression that it's a VMX or SVM
specific function, and there's no conflict that prevents adding the
kvm_ prefix.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/svm.c              | 12 ++++++------
 arch/x86/kvm/vmx.c              | 16 ++++++++--------
 arch/x86/kvm/x86.c              |  4 ++--
 4 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 35e03b13edcb5..8c9023661351a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1243,7 +1243,7 @@ enum emulation_result {
 int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
 			    int emulation_type, void *insn, int insn_len);
 
-static inline int emulate_instruction(struct kvm_vcpu *vcpu,
+static inline int kvm_emulate_instruction(struct kvm_vcpu *vcpu,
 			int emulation_type)
 {
 	return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index b3cdfde8ff5e4..89c4c5aa15f16 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -776,7 +776,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 	}
 
 	if (!svm->next_rip) {
-		if (emulate_instruction(vcpu, EMULTYPE_SKIP) !=
+		if (kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) !=
 				EMULATE_DONE)
 			printk(KERN_DEBUG "%s: NOP\n", __func__);
 		return;
@@ -2715,7 +2715,7 @@ static int gp_interception(struct vcpu_svm *svm)
 
 	WARN_ON_ONCE(!enable_vmware_backdoor);
 
-	er = emulate_instruction(vcpu,
+	er = kvm_emulate_instruction(vcpu,
 		EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
 	if (er == EMULATE_USER_EXIT)
 		return 0;
@@ -2819,7 +2819,7 @@ static int io_interception(struct vcpu_svm *svm)
 	string = (io_info & SVM_IOIO_STR_MASK) != 0;
 	in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
 	if (string)
-		return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+		return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
 
 	port = io_info >> 16;
 	size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
@@ -3861,7 +3861,7 @@ static int iret_interception(struct vcpu_svm *svm)
 static int invlpg_interception(struct vcpu_svm *svm)
 {
 	if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
-		return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
+		return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
 
 	kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
 	return kvm_skip_emulated_instruction(&svm->vcpu);
@@ -3869,7 +3869,7 @@ static int invlpg_interception(struct vcpu_svm *svm)
 
 static int emulate_on_interception(struct vcpu_svm *svm)
 {
-	return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
+	return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
 }
 
 static int rsm_interception(struct vcpu_svm *svm)
@@ -4700,7 +4700,7 @@ static int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
 		ret = avic_unaccel_trap_write(svm);
 	} else {
 		/* Handling Fault */
-		ret = (emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
+		ret = (kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
 	}
 
 	return ret;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b345ad91809c0..f910d33858d95 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6983,7 +6983,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
 	 * Cause the #SS fault with 0 error code in VM86 mode.
 	 */
 	if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
-		if (emulate_instruction(vcpu, 0) == EMULATE_DONE) {
+		if (kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE) {
 			if (vcpu->arch.halt_request) {
 				vcpu->arch.halt_request = 0;
 				return kvm_vcpu_halt(vcpu);
@@ -7054,7 +7054,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
 
 	if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
 		WARN_ON_ONCE(!enable_vmware_backdoor);
-		er = emulate_instruction(vcpu,
+		er = kvm_emulate_instruction(vcpu,
 			EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
 		if (er == EMULATE_USER_EXIT)
 			return 0;
@@ -7157,7 +7157,7 @@ static int handle_io(struct kvm_vcpu *vcpu)
 	++vcpu->stat.io_exits;
 
 	if (string)
-		return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+		return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
 
 	port = exit_qualification >> 16;
 	size = (exit_qualification & 7) + 1;
@@ -7231,7 +7231,7 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
 static int handle_desc(struct kvm_vcpu *vcpu)
 {
 	WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
-	return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+	return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
 }
 
 static int handle_cr(struct kvm_vcpu *vcpu)
@@ -7480,7 +7480,7 @@ static int handle_vmcall(struct kvm_vcpu *vcpu)
 
 static int handle_invd(struct kvm_vcpu *vcpu)
 {
-	return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+	return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
 }
 
 static int handle_invlpg(struct kvm_vcpu *vcpu)
@@ -7547,7 +7547,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu)
 			return kvm_skip_emulated_instruction(vcpu);
 		}
 	}
-	return emulate_instruction(vcpu, 0) == EMULATE_DONE;
+	return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
 }
 
 static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
@@ -7704,7 +7704,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 		if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
 			return kvm_skip_emulated_instruction(vcpu);
 		else
-			return emulate_instruction(vcpu, EMULTYPE_SKIP) ==
+			return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) ==
 								EMULATE_DONE;
 	}
 
@@ -7748,7 +7748,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
 		if (kvm_test_request(KVM_REQ_EVENT, vcpu))
 			return 1;
 
-		err = emulate_instruction(vcpu, 0);
+		err = kvm_emulate_instruction(vcpu, 0);
 
 		if (err == EMULATE_USER_EXIT) {
 			++vcpu->stat.mmio_exits;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cbe2921e972b6..915002c7f07e8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4987,7 +4987,7 @@ int handle_ud(struct kvm_vcpu *vcpu)
 		emul_type = 0;
 	}
 
-	er = emulate_instruction(vcpu, emul_type);
+	er = kvm_emulate_instruction(vcpu, emul_type);
 	if (er == EMULATE_USER_EXIT)
 		return 0;
 	if (er != EMULATE_DONE)
@@ -7740,7 +7740,7 @@ static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
 {
 	int r;
 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-	r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
+	r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
 	if (r != EMULATE_DONE)
 		return 0;

From c60658d1d983641fcdbb16f86bc2f3806d88bab4 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <sean.j.christopherson@intel.com>
Date: Thu, 23 Aug 2018 13:56:53 -0700
Subject: [PATCH 096/257] KVM: x86: Unexport x86_emulate_instruction()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Allowing x86_emulate_instruction() to be called directly has led to
subtle bugs being introduced, e.g. not setting EMULTYPE_NO_REEXECUTE
in the emulation type.  While most of the blame lies on re-execute
being opt-out, exporting x86_emulate_instruction() also exposes its
cr2 parameter, which may have contributed to commit d391f1207067
("x86/kvm/vmx: do not use vm-exit instruction length for fast MMIO
when running nested") using x86_emulate_instruction() instead of
emulate_instruction() because "hey, I have a cr2!", which in turn
introduced its EMULTYPE_NO_REEXECUTE bug.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 17 +++--------------
 arch/x86/kvm/x86.c              | 14 +++++++++++++-
 arch/x86/kvm/x86.h              |  2 ++
 3 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 8c9023661351a..e12916e7c2fbb 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1240,20 +1240,9 @@ enum emulation_result {
 #define EMULTYPE_ALLOW_RETRY	    (1 << 3)
 #define EMULTYPE_NO_UD_ON_FAIL	    (1 << 4)
 #define EMULTYPE_VMWARE		    (1 << 5)
-int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
-			    int emulation_type, void *insn, int insn_len);
-
-static inline int kvm_emulate_instruction(struct kvm_vcpu *vcpu,
-			int emulation_type)
-{
-	return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
-}
-
-static inline int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
-						      void *insn, int insn_len)
-{
-	return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len);
-}
+int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
+int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
+					void *insn, int insn_len);
 
 void kvm_enable_efer_bits(u64);
 bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 915002c7f07e8..542f6315444d7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6282,7 +6282,19 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 
 	return r;
 }
-EXPORT_SYMBOL_GPL(x86_emulate_instruction);
+
+int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type)
+{
+	return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_instruction);
+
+int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
+					void *insn, int insn_len)
+{
+	return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len);
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
 
 static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
 			    unsigned short port)
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 257f27620bc27..67b9568613f34 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -274,6 +274,8 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
 bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
 					  int page_num);
 bool kvm_vector_hashing_enabled(void);
+int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
+			    int emulation_type, void *insn, int insn_len);
 
 #define KVM_SUPPORTED_XCR0     (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
 				| XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \

From 58f33cfe73076b6497bada4f7b5bda961ed68083 Mon Sep 17 00:00:00 2001
From: Stefan Raspl <stefan.raspl@de.ibm.com>
Date: Fri, 24 Aug 2018 14:03:55 +0200
Subject: [PATCH 097/257] tools/kvm_stat: fix python3 issues
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Python3 returns a float for a regular division - switch to a division
operator that returns an integer.
Furthermore, filters return a generator object instead of the actual
list - wrap result in yet another list, which makes it still work in
both, Python2 and 3.

Signed-off-by: Stefan Raspl <raspl@linux.ibm.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 tools/kvm/kvm_stat/kvm_stat | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 56c4b3f8a01be..e10b90a8917a5 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -759,7 +759,7 @@ class DebugfsProvider(Provider):
             if len(vms) == 0:
                 self.do_read = False
 
-            self.paths = filter(lambda x: "{}-".format(pid) in x, vms)
+            self.paths = list(filter(lambda x: "{}-".format(pid) in x, vms))
 
         else:
             self.paths = []
@@ -1219,10 +1219,10 @@ class Tui(object):
         (x, term_width) = self.screen.getmaxyx()
         row = 2
         for line in text:
-            start = (term_width - len(line)) / 2
+            start = (term_width - len(line)) // 2
             self.screen.addstr(row, start, line)
             row += 1
-        self.screen.addstr(row + 1, (term_width - len(hint)) / 2, hint,
+        self.screen.addstr(row + 1, (term_width - len(hint)) // 2, hint,
                            curses.A_STANDOUT)
         self.screen.getkey()
 

From 617c66b9f236d20f11cecbb3f45e6d5675b2fae1 Mon Sep 17 00:00:00 2001
From: Stefan Raspl <stefan.raspl@de.ibm.com>
Date: Fri, 24 Aug 2018 14:03:56 +0200
Subject: [PATCH 098/257] tools/kvm_stat: fix handling of invalid paths in
 debugfs provider
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When filtering by guest, kvm_stat displays garbage when the guest is
destroyed - see sample output below.
We add code to remove the invalid paths from the providers, so at least
no more garbage is displayed.
Here's a sample output to illustrate:

  kvm statistics - pid 13986 (foo)

   Event                                         Total %Total CurAvg/s
   diagnose_258                                     -2    0.0        0
   deliver_program_interruption                     -3    0.0        0
   diagnose_308                                     -4    0.0        0
   halt_poll_invalid                               -91    0.0       -6
   deliver_service_signal                         -244    0.0      -16
   halt_successful_poll                           -250    0.1      -17
   exit_pei                                       -285    0.1      -19
   exit_external_request                          -312    0.1      -21
   diagnose_9c                                    -328    0.1      -22
   userspace_handled                              -713    0.1      -47
   halt_attempted_poll                            -939    0.2      -62
   deliver_emergency_signal                      -3126    0.6     -208
   halt_wakeup                                   -7199    1.5     -481
   exit_wait_state                               -7379    1.5     -493
   diagnose_500                                 -56499   11.5    -3757
   exit_null                                    -85491   17.4    -5685
   diagnose_44                                 -133300   27.1    -8874
   exit_instruction                            -195898   39.8   -13037
   Total                                       -492063

Signed-off-by: Stefan Raspl <raspl@linux.vnet.ibm.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 tools/kvm/kvm_stat/kvm_stat | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index e10b90a8917a5..b9e8d0def1ab3 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -766,6 +766,13 @@ class DebugfsProvider(Provider):
             self.do_read = True
         self.reset()
 
+    def _verify_paths(self):
+        """Remove invalid paths"""
+        for path in self.paths:
+            if not os.path.exists(os.path.join(PATH_DEBUGFS_KVM, path)):
+                self.paths.remove(path)
+                continue
+
     def read(self, reset=0, by_guest=0):
         """Returns a dict with format:'file name / field -> current value'.
 
@@ -780,6 +787,7 @@ class DebugfsProvider(Provider):
         # If no debugfs filtering support is available, then don't read.
         if not self.do_read:
             return results
+        self._verify_paths()
 
         paths = self.paths
         if self._pid == 0:

From 710ab11ad9329d2d4b044405e328c994b19a2aa9 Mon Sep 17 00:00:00 2001
From: Stefan Raspl <stefan.raspl@de.ibm.com>
Date: Fri, 24 Aug 2018 14:03:57 +0200
Subject: [PATCH 099/257] tools/kvm_stat: fix updates for dead guests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With pid filtering active, when a guest is removed e.g. via virsh shutdown,
successive updates produce garbage.
Therefore, we add code to detect this case and prevent further body updates.
Note that when displaying the help dialog via 'h' in this case, once we exit
we're stuck with the 'Collecting data...' message till we remove the filter.

Signed-off-by: Stefan Raspl <raspl@linux.ibm.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 tools/kvm/kvm_stat/kvm_stat | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index b9e8d0def1ab3..7c92545931e3f 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -1170,6 +1170,9 @@ class Tui(object):
 
             return sorted_items
 
+        if not self._is_running_guest(self.stats.pid_filter):
+            # leave final data on screen
+            return
         row = 3
         self.screen.move(row, 0)
         self.screen.clrtobot()
@@ -1327,6 +1330,12 @@ class Tui(object):
                 msg = '"' + str(val) + '": Invalid value'
         self._refresh_header()
 
+    def _is_running_guest(self, pid):
+        """Check if pid is still a running process."""
+        if not pid:
+            return True
+        return os.path.isdir(os.path.join('/proc/', str(pid)))
+
     def _show_vm_selection_by_guest(self):
         """Draws guest selection mask.
 
@@ -1354,7 +1363,7 @@ class Tui(object):
             if not guest or guest == '0':
                 break
             if guest.isdigit():
-                if not os.path.isdir(os.path.join('/proc/', guest)):
+                if not self._is_running_guest(guest):
                     msg = '"' + guest + '": Not a running process'
                     continue
                 pid = int(guest)

From 0db8b3102368755242b44f2b30f93302c70e8e82 Mon Sep 17 00:00:00 2001
From: Stefan Raspl <stefan.raspl@de.ibm.com>
Date: Fri, 24 Aug 2018 14:03:58 +0200
Subject: [PATCH 100/257] tools/kvm_stat: don't reset stats when setting PID
 filter for debugfs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When setting a PID filter in debugfs, we unnecessarily reset the
statistics, although there is no reason to do so. This behavior was
merely introduced with commit 9f114a03c6854f "tools/kvm_stat: add
interactive command 'r'", most likely to mimic the behavior of
the tracepoints provider in this respect. However, there are plenty
of differences between the two providers, so there is no reason not
to take advantage of the possibility to filter by PID without
resetting the statistics.

Signed-off-by: Stefan Raspl <raspl@linux.ibm.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 tools/kvm/kvm_stat/kvm_stat | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 7c92545931e3f..62fbb8802f603 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -764,7 +764,6 @@ class DebugfsProvider(Provider):
         else:
             self.paths = []
             self.do_read = True
-        self.reset()
 
     def _verify_paths(self):
         """Remove invalid paths"""

From 29c39f38e4e8dbf0497e81db6c985ee59259f002 Mon Sep 17 00:00:00 2001
From: Stefan Raspl <stefan.raspl@de.ibm.com>
Date: Fri, 24 Aug 2018 14:03:59 +0200
Subject: [PATCH 101/257] tools/kvm_stat: handle guest removals more gracefully
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When running with the DebugFS provider, removal of a guest can result in a
negative CurAvg/s, which looks rather confusing.
If so, suppress the body refresh and print a message instead.
To reproduce, have at least one guest A completely booted. Then start
another guest B (which generates a huge amount of events), then destroy B.
On the next refresh, kvm_stat should display a whole lot of negative values
in the CurAvg/s column.

Signed-off-by: Stefan Raspl <raspl@linux.ibm.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 tools/kvm/kvm_stat/kvm_stat | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 62fbb8802f603..bd620579eb6fc 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -1194,6 +1194,7 @@ class Tui(object):
         # print events
         tavg = 0
         tcur = 0
+        guest_removed = False
         for key, values in get_sorted_events(self, stats):
             if row >= self.screen.getmaxyx()[0] - 1 or values == (0, 0):
                 break
@@ -1201,7 +1202,10 @@ class Tui(object):
                 key = self.get_gname_from_pid(key)
                 if not key:
                     continue
-            cur = int(round(values.delta / sleeptime)) if values.delta else ''
+            cur = int(round(values.delta / sleeptime)) if values.delta else 0
+            if cur < 0:
+                guest_removed = True
+                continue
             if key[0] != ' ':
                 if values.delta:
                     tcur += values.delta
@@ -1214,7 +1218,10 @@ class Tui(object):
                                values.value * 100 / float(ltotal), cur))
             row += 1
         if row == 3:
-            self.screen.addstr(4, 1, 'No matching events reported yet')
+            if guest_removed:
+                self.screen.addstr(4, 1, 'Guest removed, updating...')
+            else:
+                self.screen.addstr(4, 1, 'No matching events reported yet')
         if row > 4:
             tavg = int(round(tcur / sleeptime)) if tcur > 0 else ''
             self.screen.addstr(row, 1, '%-40s %10d        %8s' %

From 404517e40867aef60554ef497d5cf8d089a5b9cf Mon Sep 17 00:00:00 2001
From: Stefan Raspl <stefan.raspl@de.ibm.com>
Date: Fri, 24 Aug 2018 14:04:00 +0200
Subject: [PATCH 102/257] tools/kvm_stat: indicate dead guests as such
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For destroyed guests, kvm_stat essentially freezes with the last data
displayed. This is acceptable for users, in case they want to inspect the
final data. But it looks a bit irritating. Therefore, detect this situation
and display a respective indicator in the header.

Signed-off-by: Stefan Raspl <raspl@linux.ibm.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 tools/kvm/kvm_stat/kvm_stat | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index bd620579eb6fc..5c2422b0f2f8f 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -1108,10 +1108,10 @@ class Tui(object):
                                    if len(gname) > MAX_GUEST_NAME_LEN
                                    else gname))
         if pid > 0:
-            self.screen.addstr(0, 0, 'kvm statistics - pid {0} {1}'
-                               .format(pid, gname), curses.A_BOLD)
+            self._headline = 'kvm statistics - pid {0} {1}'.format(pid, gname)
         else:
-            self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
+            self._headline = 'kvm statistics - summary'
+        self.screen.addstr(0, 0, self._headline, curses.A_BOLD)
         if self.stats.fields_filter:
             regex = self.stats.fields_filter
             if len(regex) > MAX_REGEX_LEN:
@@ -1170,6 +1170,7 @@ class Tui(object):
             return sorted_items
 
         if not self._is_running_guest(self.stats.pid_filter):
+            self._display_guest_dead()
             # leave final data on screen
             return
         row = 3
@@ -1228,6 +1229,11 @@ class Tui(object):
                                ('Total', total, tavg), curses.A_BOLD)
         self.screen.refresh()
 
+    def _display_guest_dead(self):
+        marker = '   Guest is DEAD   '
+        y = min(len(self._headline), 80 - len(marker))
+        self.screen.addstr(0, y, marker, curses.A_BLINK | curses.A_STANDOUT)
+
     def _show_msg(self, text):
         """Display message centered text and exit on key press"""
         hint = 'Press any key to continue'

From c012a0f2677529a0ae8f53a15bd7c61dc4ca5b5e Mon Sep 17 00:00:00 2001
From: Stefan Raspl <stefan.raspl@de.ibm.com>
Date: Fri, 24 Aug 2018 14:04:01 +0200
Subject: [PATCH 103/257] tools/kvm_stat: re-animate display of dead guests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When filtering by guest (interactive commands 'p'/'g'), and the respective
guest was destroyed, detect when the guest is up again through the guest
name if possible.
I.e. when displaying events for a specific guest, it is not necessary
anymore to restart kvm_stat in case the guest is restarted.

Signed-off-by: Stefan Raspl <raspl@linux.ibm.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 tools/kvm/kvm_stat/kvm_stat | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 5c2422b0f2f8f..439b8a27488d3 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -1103,6 +1103,7 @@ class Tui(object):
             pid = self.stats.pid_filter
         self.screen.erase()
         gname = self.get_gname_from_pid(pid)
+        self._gname = gname
         if gname:
             gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...'
                                    if len(gname) > MAX_GUEST_NAME_LEN
@@ -1170,6 +1171,15 @@ class Tui(object):
             return sorted_items
 
         if not self._is_running_guest(self.stats.pid_filter):
+            if self._gname:
+                try: # ...to identify the guest by name in case it's back
+                    pids = self.get_pid_from_gname(self._gname)
+                    if len(pids) == 1:
+                        self._refresh_header(pids[0])
+                        self._update_pid(pids[0])
+                        return
+                except:
+                    pass
             self._display_guest_dead()
             # leave final data on screen
             return

From 2b52e2a67c86b0714eeae0d2030cb7fc14737626 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 27 Aug 2018 12:07:37 +0900
Subject: [PATCH 104/257] arc: remove redundant GCC version checks

Commit cafa0010cd51 ("Raise the minimum required gcc version to 4.6")
bumped the minimum GCC version to 4.6 for all architectures.

With GCC >= 4.6 assumed, 'upto_gcc44' is empty, 'atleast_gcc44' is y.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/Makefile | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index fb026196aaaba..99cce77ab98f2 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -43,10 +43,7 @@ ifdef CONFIG_ARC_CURR_IN_REG
 LINUXINCLUDE	+=  -include ${src}/arch/arc/include/asm/current.h
 endif
 
-upto_gcc44    :=  $(call cc-ifversion, -le, 0404, y)
-atleast_gcc44 :=  $(call cc-ifversion, -ge, 0404, y)
-
-cflags-$(atleast_gcc44)			+= -fsection-anchors
+cflags-y				+= -fsection-anchors
 
 cflags-$(CONFIG_ARC_HAS_LLSC)		+= -mlock
 cflags-$(CONFIG_ARC_HAS_SWAPE)		+= -mswape
@@ -82,11 +79,6 @@ cflags-$(disable_small_data)		+= -mno-sdata -fcall-used-gp
 cflags-$(CONFIG_CPU_BIG_ENDIAN)		+= -mbig-endian
 ldflags-$(CONFIG_CPU_BIG_ENDIAN)	+= -EB
 
-# STAR 9000518362: (fixed with binutils shipping with gcc 4.8)
-# arc-linux-uclibc-ld (buildroot) or arceb-elf32-ld (EZChip) don't accept
-# --build-id w/o "-marclinux". Default arc-elf32-ld is OK
-ldflags-$(upto_gcc44)			+= -marclinux
-
 LIBGCC	:= $(shell $(CC) $(cflags-y) --print-libgcc-file-name)
 
 # Modules with short calls might break for calls into builtin-kernel

From d49b48f088c323dbacae44dfbe56d9c985c8a2a1 Mon Sep 17 00:00:00 2001
From: Vincent Whitchurch <vincent.whitchurch@axis.com>
Date: Fri, 31 Aug 2018 09:04:18 +0200
Subject: [PATCH 105/257] gpio: Fix crash due to registration race

gpiochip_add_data_with_key() adds the gpiochip to the gpio_devices list
before of_gpiochip_add() is called, but it's only the latter which sets
the ->of_xlate function pointer.  gpiochip_find() can be called by
someone else between these two actions, and it can find the chip and
call of_gpiochip_match_node_and_xlate() which leads to the following
crash due to a NULL ->of_xlate().

 Unhandled prefetch abort: page domain fault (0x01b) at 0x00000000
 Modules linked in: leds_gpio(+) gpio_generic(+)
 CPU: 0 PID: 830 Comm: insmod Not tainted 4.18.0+ #43
 Hardware name: ARM-Versatile Express
 PC is at   (null)
 LR is at of_gpiochip_match_node_and_xlate+0x2c/0x38
 Process insmod (pid: 830, stack limit = 0x(ptrval))
  (of_gpiochip_match_node_and_xlate) from  (gpiochip_find+0x48/0x84)
  (gpiochip_find) from  (of_get_named_gpiod_flags+0xa8/0x238)
  (of_get_named_gpiod_flags) from  (gpiod_get_from_of_node+0x2c/0xc8)
  (gpiod_get_from_of_node) from  (devm_fwnode_get_index_gpiod_from_child+0xb8/0x144)
  (devm_fwnode_get_index_gpiod_from_child) from  (gpio_led_probe+0x208/0x3c4 [leds_gpio])
  (gpio_led_probe [leds_gpio]) from  (platform_drv_probe+0x48/0x9c)
  (platform_drv_probe) from  (really_probe+0x1d0/0x3d4)
  (really_probe) from  (driver_probe_device+0x78/0x1c0)
  (driver_probe_device) from  (__driver_attach+0x120/0x13c)
  (__driver_attach) from  (bus_for_each_dev+0x68/0xb4)
  (bus_for_each_dev) from  (bus_add_driver+0x1a8/0x268)
  (bus_add_driver) from  (driver_register+0x78/0x10c)
  (driver_register) from  (do_one_initcall+0x54/0x1fc)
  (do_one_initcall) from  (do_init_module+0x64/0x1f4)
  (do_init_module) from  (load_module+0x2198/0x26ac)
  (load_module) from  (sys_finit_module+0xe0/0x110)
  (sys_finit_module) from  (ret_fast_syscall+0x0/0x54)

One way to fix this would be to rework the hairy registration sequence
in gpiochip_add_data_with_key(), but since I'd probably introduce a
couple of new bugs if I attempted that, simply add a check for a
non-NULL of_xlate function pointer in
of_gpiochip_match_node_and_xlate().  This works since the driver looking
for the gpio will simply fail to find the gpio and defer its probe and
be reprobed when the driver which is registering the gpiochip has fully
completed its probe.

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-of.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index a4f1157d6aa08..d4e7a09598fae 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -31,6 +31,7 @@ static int of_gpiochip_match_node_and_xlate(struct gpio_chip *chip, void *data)
 	struct of_phandle_args *gpiospec = data;
 
 	return chip->gpiodev->dev.of_node == gpiospec->np &&
+				chip->of_xlate &&
 				chip->of_xlate(chip, gpiospec, NULL) >= 0;
 }
 

From 6147b1cf19651c7de297e69108b141fb30aa2349 Mon Sep 17 00:00:00 2001
From: Genki Sky <sky@genki.is>
Date: Tue, 28 Aug 2018 23:26:24 -0400
Subject: [PATCH 106/257] scripts/setlocalversion: git: Make -dirty check more
 robust

$(git diff-index) relies on the index being refreshed. This refreshing
of the index used to happen, but was removed in cdf2bc632ebc
("scripts/setlocalversion on write-protected source tree", 2013-06-14)
due to issues with a read-only filesystem.

If the index is not refreshed, one runs into problems. E.g. as
described in [0], git stores the uid in its index, so even if just the
uid has changed (or git is tricked into thinking so), then we will
think the tree is dirty. So as in [1], if you package linux-git with a
system that uses fakeroot(1), you get a "-dirty" version. Unless you
manually $(git update-index --refresh) themselves.

The simplest solution seems to be $(git status --porcelain), with an
additional flag saying "ignore untracked files". It seems clearer
about what it does, and avoids issues regarding cached indexes and
writable filesystems, but still has stable output for scripting.

[0]: https://public-inbox.org/git/0190ae30-b6c8-2a8b-b1fb-fd9d84e6dfdf@oracle.com/
[1]: https://bbs.archlinux.org/viewtopic.php?id=236702

Signed-off-by: Genki Sky <sky@genki.is>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/setlocalversion | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/setlocalversion b/scripts/setlocalversion
index 71f39410691b6..79f7dd57d571e 100755
--- a/scripts/setlocalversion
+++ b/scripts/setlocalversion
@@ -74,7 +74,7 @@ scm_version()
 		fi
 
 		# Check for uncommitted changes
-		if git diff-index --name-only HEAD | grep -qv "^scripts/package"; then
+		if git status -uno --porcelain | grep -qv '^.. scripts/package'; then
 			printf '%s' -dirty
 		fi
 

From bc8d2e20a3eb2f8d268ad7bbca878cf3acdcf389 Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Thu, 30 Aug 2018 11:18:42 +0200
Subject: [PATCH 107/257] kconfig: remove a spurious self-assignment

The self assignment was probably introduced by an automated code
refactoring in
commit 694c49a7c01c ("kconfig: drop localization support").

The issue was identified by a self-assign warning when running
make menuconfig with clang.

Fixes: 694c49a7c01c ("kconfig: drop localization support")
Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/kconfig/mconf.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c
index 83b5836615fb0..143c05fec1614 100644
--- a/scripts/kconfig/mconf.c
+++ b/scripts/kconfig/mconf.c
@@ -490,7 +490,6 @@ static void build_conf(struct menu *menu)
 			switch (prop->type) {
 			case P_MENU:
 				child_count++;
-				prompt = prompt;
 				if (single_menu_mode) {
 					item_make("%s%*c%s",
 						  menu->data ? "-->" : "++>",

From 0f02cfbc3d9e413d450d8d0fd660077c23f67eff Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Thu, 30 Aug 2018 11:01:21 -0700
Subject: [PATCH 108/257] MIPS: VDSO: Match data page cache colouring when D$
 aliases

When a system suffers from dcache aliasing a user program may observe
stale VDSO data from an aliased cache line. Notably this can break the
expectation that clock_gettime(CLOCK_MONOTONIC, ...) is, as its name
suggests, monotonic.

In order to ensure that users observe updates to the VDSO data page as
intended, align the user mappings of the VDSO data page such that their
cache colouring matches that of the virtual address range which the
kernel will use to update the data page - typically its unmapped address
within kseg0.

This ensures that we don't introduce aliasing cache lines for the VDSO
data page, and therefore that userland will observe updates without
requiring cache invalidation.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Reported-by: Hauke Mehrtens <hauke@hauke-m.de>
Reported-by: Rene Nielsen <rene.nielsen@microsemi.com>
Reported-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Fixes: ebb5e78cc634 ("MIPS: Initial implementation of a VDSO")
Patchwork: https://patchwork.linux-mips.org/patch/20344/
Tested-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Tested-by: Hauke Mehrtens <hauke@hauke-m.de>
Cc: James Hogan <jhogan@kernel.org>
Cc: linux-mips@linux-mips.org
Cc: stable@vger.kernel.org # v4.4+
---
 arch/mips/kernel/vdso.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
index 019035d7225c4..8f845f6e5f426 100644
--- a/arch/mips/kernel/vdso.c
+++ b/arch/mips/kernel/vdso.c
@@ -13,6 +13,7 @@
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
+#include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
@@ -20,6 +21,7 @@
 
 #include <asm/abi.h>
 #include <asm/mips-cps.h>
+#include <asm/page.h>
 #include <asm/vdso.h>
 
 /* Kernel-provided data used by the VDSO. */
@@ -128,12 +130,30 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	vvar_size = gic_size + PAGE_SIZE;
 	size = vvar_size + image->size;
 
+	/*
+	 * Find a region that's large enough for us to perform the
+	 * colour-matching alignment below.
+	 */
+	if (cpu_has_dc_aliases)
+		size += shm_align_mask + 1;
+
 	base = get_unmapped_area(NULL, 0, size, 0, 0);
 	if (IS_ERR_VALUE(base)) {
 		ret = base;
 		goto out;
 	}
 
+	/*
+	 * If we suffer from dcache aliasing, ensure that the VDSO data page
+	 * mapping is coloured the same as the kernel's mapping of that memory.
+	 * This ensures that when the kernel updates the VDSO data userland
+	 * will observe it without requiring cache invalidations.
+	 */
+	if (cpu_has_dc_aliases) {
+		base = __ALIGN_MASK(base, shm_align_mask);
+		base += ((unsigned long)&vdso_data - gic_size) & shm_align_mask;
+	}
+
 	data_addr = base + gic_size;
 	vdso_addr = data_addr + PAGE_SIZE;
 

From 3fcbb8260a87efb691d837e8cd24e81f65b3eb70 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 30 Aug 2018 13:52:38 -0700
Subject: [PATCH 109/257] ARC: atomics: unbork atomic_fetch_##op()

In 4.19-rc1, Eugeniy reported weird boot and IO errors on ARC HSDK

| INFO: task syslogd:77 blocked for more than 10 seconds.
|       Not tainted 4.19.0-rc1-00007-gf213acea4e88 #40
| "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this
| message.
| syslogd         D    0    77     76 0x00000000
|
| Stack Trace:
|  __switch_to+0x0/0xac
|  __schedule+0x1b2/0x730
|  io_schedule+0x5c/0xc0
|  __lock_page+0x98/0xdc
|  find_lock_entry+0x38/0x100
|  shmem_getpage_gfp.isra.3+0x82/0xbfc
|  shmem_fault+0x46/0x138
|  handle_mm_fault+0x5bc/0x924
|  do_page_fault+0x100/0x2b8
|  ret_from_exception+0x0/0x8

He bisected to 84c6591103db ("locking/atomics,
asm-generic/bitops/lock.h: Rewrite using atomic_fetch_*()")

This commit however only unmasked the real issue introduced by commit
4aef66c8ae9 ("locking/atomic, arch/arc: Fix build") which missed the
retry-if-scond-failed branch in atomic_fetch_##op() macros.

The bisected commit started using atomic_fetch_##op() macros for building
the rest of atomics.

Fixes: 4aef66c8ae9 ("locking/atomic, arch/arc: Fix build")
Reported-by: Eugeniy Paltsev <paltsev@synopsys.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
[vgupta: wrote changelog]
---
 arch/arc/include/asm/atomic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arc/include/asm/atomic.h b/arch/arc/include/asm/atomic.h
index 4e00727302412..158af079838d0 100644
--- a/arch/arc/include/asm/atomic.h
+++ b/arch/arc/include/asm/atomic.h
@@ -84,7 +84,7 @@ static inline int atomic_fetch_##op(int i, atomic_t *v)			\
 	"1:	llock   %[orig], [%[ctr]]		\n"		\
 	"	" #asm_op " %[val], %[orig], %[i]	\n"		\
 	"	scond   %[val], [%[ctr]]		\n"		\
-	"						\n"		\
+	"	bnz     1b				\n"		\
 	: [val]	"=&r"	(val),						\
 	  [orig] "=&r" (orig)						\
 	: [ctr]	"r"	(&v->counter),					\

From 678c8110d23c0ac7d69fda558992c31be64e7507 Mon Sep 17 00:00:00 2001
From: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Date: Mon, 30 Jul 2018 19:26:33 +0300
Subject: [PATCH 110/257] ARC: dma [IOC]: mark DMA devices connected as
 dma-coherent

Mark DMA devices on AXS103 and HSDK boards connected through IOC
port as dma-coherent.

Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/boot/dts/axc003.dtsi     | 26 ++++++++++++++++++++++++++
 arch/arc/boot/dts/axc003_idu.dtsi | 26 ++++++++++++++++++++++++++
 arch/arc/boot/dts/hsdk.dts        |  4 ++++
 3 files changed, 56 insertions(+)

diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi
index dc91c663bcc02..d75d65ddf8e31 100644
--- a/arch/arc/boot/dts/axc003.dtsi
+++ b/arch/arc/boot/dts/axc003.dtsi
@@ -93,6 +93,32 @@
 		};
 	};
 
+	/*
+	 * Mark DMA peripherals connected via IOC port as dma-coherent. We do
+	 * it via overlay because peripherals defined in axs10x_mb.dtsi are
+	 * used for both AXS101 and AXS103 boards and only AXS103 has IOC (so
+	 * only AXS103 board has HW-coherent DMA peripherals)
+	 * We don't need to mark pgu@17000 as dma-coherent because it uses
+	 * external DMA buffer located outside of IOC aperture.
+	 */
+	axs10x_mb {
+		ethernet@0x18000 {
+			dma-coherent;
+		};
+
+		ehci@0x40000 {
+			dma-coherent;
+		};
+
+		ohci@0x60000 {
+			dma-coherent;
+		};
+
+		mmc@0x15000 {
+			dma-coherent;
+		};
+	};
+
 	/*
 	 * The DW APB ICTL intc on MB is connected to CPU intc via a
 	 * DT "invisible" DW APB GPIO block, configured to simply pass thru
diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi
index 69ff4895f2ba4..a05bb737ea639 100644
--- a/arch/arc/boot/dts/axc003_idu.dtsi
+++ b/arch/arc/boot/dts/axc003_idu.dtsi
@@ -100,6 +100,32 @@
 		};
 	};
 
+	/*
+	 * Mark DMA peripherals connected via IOC port as dma-coherent. We do
+	 * it via overlay because peripherals defined in axs10x_mb.dtsi are
+	 * used for both AXS101 and AXS103 boards and only AXS103 has IOC (so
+	 * only AXS103 board has HW-coherent DMA peripherals)
+	 * We don't need to mark pgu@17000 as dma-coherent because it uses
+	 * external DMA buffer located outside of IOC aperture.
+	 */
+	axs10x_mb {
+		ethernet@0x18000 {
+			dma-coherent;
+		};
+
+		ehci@0x40000 {
+			dma-coherent;
+		};
+
+		ohci@0x60000 {
+			dma-coherent;
+		};
+
+		mmc@0x15000 {
+			dma-coherent;
+		};
+	};
+
 	/*
 	 * This INTC is actually connected to DW APB GPIO
 	 * which acts as a wire between MB INTC and CPU INTC.
diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts
index d00f283094d31..ef149f59929ae 100644
--- a/arch/arc/boot/dts/hsdk.dts
+++ b/arch/arc/boot/dts/hsdk.dts
@@ -181,6 +181,7 @@
 			resets = <&cgu_rst HSDK_ETH_RESET>;
 			reset-names = "stmmaceth";
 			mac-address = [00 00 00 00 00 00]; /* Filled in by U-Boot */
+			dma-coherent;
 
 			mdio {
 				#address-cells = <1>;
@@ -199,12 +200,14 @@
 			compatible = "snps,hsdk-v1.0-ohci", "generic-ohci";
 			reg = <0x60000 0x100>;
 			interrupts = <15>;
+			dma-coherent;
 		};
 
 		ehci@40000 {
 			compatible = "snps,hsdk-v1.0-ehci", "generic-ehci";
 			reg = <0x40000 0x100>;
 			interrupts = <15>;
+			dma-coherent;
 		};
 
 		mmc@a000 {
@@ -217,6 +220,7 @@
 			clock-names = "biu", "ciu";
 			interrupts = <12>;
 			bus-width = <4>;
+			dma-coherent;
 		};
 	};
 

From 6b06546206868f723f2061d703a3c3c378dcbf4c Mon Sep 17 00:00:00 2001
From: "Dennis Zhou (Facebook)" <dennisszhou@gmail.com>
Date: Fri, 31 Aug 2018 16:22:42 -0400
Subject: [PATCH 111/257] Revert "blk-throttle: fix race between
 blkcg_bio_issue_check() and cgroup_rmdir()"

This reverts commit 4c6994806f708559c2812b73501406e21ae5dcd0.

Destroying blkgs is tricky because of the nature of the relationship. A
blkg should go away when either a blkcg or a request_queue goes away.
However, blkg's pin the blkcg to ensure they remain valid. To break this
cycle, when a blkcg is offlined, blkgs put back their css ref. This
eventually lets css_free() get called which frees the blkcg.

The above commit (4c6994806f70) breaks this order of events by trying to
destroy blkgs in css_free(). As the blkgs still hold references to the
blkcg, css_free() is never called.

The race between blkcg_bio_issue_check() and cgroup_rmdir() will be
addressed in the following patch by delaying destruction of a blkg until
all writeback associated with the blkcg has been finished.

Fixes: 4c6994806f70 ("blk-throttle: fix race between blkcg_bio_issue_check() and cgroup_rmdir()")
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Cc: Jiufei Xue <jiufei.xue@linux.alibaba.com>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c         | 78 ++++++++------------------------------
 include/linux/blk-cgroup.h |  1 -
 2 files changed, 16 insertions(+), 63 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 694595b29b8fd..2998e4f095d1b 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -310,28 +310,11 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 	}
 }
 
-static void blkg_pd_offline(struct blkcg_gq *blkg)
-{
-	int i;
-
-	lockdep_assert_held(blkg->q->queue_lock);
-	lockdep_assert_held(&blkg->blkcg->lock);
-
-	for (i = 0; i < BLKCG_MAX_POLS; i++) {
-		struct blkcg_policy *pol = blkcg_policy[i];
-
-		if (blkg->pd[i] && !blkg->pd[i]->offline &&
-		    pol->pd_offline_fn) {
-			pol->pd_offline_fn(blkg->pd[i]);
-			blkg->pd[i]->offline = true;
-		}
-	}
-}
-
 static void blkg_destroy(struct blkcg_gq *blkg)
 {
 	struct blkcg *blkcg = blkg->blkcg;
 	struct blkcg_gq *parent = blkg->parent;
+	int i;
 
 	lockdep_assert_held(blkg->q->queue_lock);
 	lockdep_assert_held(&blkcg->lock);
@@ -340,6 +323,13 @@ static void blkg_destroy(struct blkcg_gq *blkg)
 	WARN_ON_ONCE(list_empty(&blkg->q_node));
 	WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
 
+	for (i = 0; i < BLKCG_MAX_POLS; i++) {
+		struct blkcg_policy *pol = blkcg_policy[i];
+
+		if (blkg->pd[i] && pol->pd_offline_fn)
+			pol->pd_offline_fn(blkg->pd[i]);
+	}
+
 	if (parent) {
 		blkg_rwstat_add_aux(&parent->stat_bytes, &blkg->stat_bytes);
 		blkg_rwstat_add_aux(&parent->stat_ios, &blkg->stat_ios);
@@ -382,7 +372,6 @@ static void blkg_destroy_all(struct request_queue *q)
 		struct blkcg *blkcg = blkg->blkcg;
 
 		spin_lock(&blkcg->lock);
-		blkg_pd_offline(blkg);
 		blkg_destroy(blkg);
 		spin_unlock(&blkcg->lock);
 	}
@@ -1058,54 +1047,21 @@ static struct cftype blkcg_legacy_files[] = {
  * @css: css of interest
  *
  * This function is called when @css is about to go away and responsible
- * for offlining all blkgs pd and killing all wbs associated with @css.
- * blkgs pd offline should be done while holding both q and blkcg locks.
- * As blkcg lock is nested inside q lock, this function performs reverse
- * double lock dancing.
+ * for shooting down all blkgs associated with @css.  blkgs should be
+ * removed while holding both q and blkcg locks.  As blkcg lock is nested
+ * inside q lock, this function performs reverse double lock dancing.
  *
  * This is the blkcg counterpart of ioc_release_fn().
  */
 static void blkcg_css_offline(struct cgroup_subsys_state *css)
 {
 	struct blkcg *blkcg = css_to_blkcg(css);
-	struct blkcg_gq *blkg;
 
 	spin_lock_irq(&blkcg->lock);
 
-	hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) {
-		struct request_queue *q = blkg->q;
-
-		if (spin_trylock(q->queue_lock)) {
-			blkg_pd_offline(blkg);
-			spin_unlock(q->queue_lock);
-		} else {
-			spin_unlock_irq(&blkcg->lock);
-			cpu_relax();
-			spin_lock_irq(&blkcg->lock);
-		}
-	}
-
-	spin_unlock_irq(&blkcg->lock);
-
-	wb_blkcg_offline(blkcg);
-}
-
-/**
- * blkcg_destroy_all_blkgs - destroy all blkgs associated with a blkcg
- * @blkcg: blkcg of interest
- *
- * This function is called when blkcg css is about to free and responsible for
- * destroying all blkgs associated with @blkcg.
- * blkgs should be removed while holding both q and blkcg locks. As blkcg lock
- * is nested inside q lock, this function performs reverse double lock dancing.
- */
-static void blkcg_destroy_all_blkgs(struct blkcg *blkcg)
-{
-	spin_lock_irq(&blkcg->lock);
 	while (!hlist_empty(&blkcg->blkg_list)) {
 		struct blkcg_gq *blkg = hlist_entry(blkcg->blkg_list.first,
-						    struct blkcg_gq,
-						    blkcg_node);
+						struct blkcg_gq, blkcg_node);
 		struct request_queue *q = blkg->q;
 
 		if (spin_trylock(q->queue_lock)) {
@@ -1117,7 +1073,10 @@ static void blkcg_destroy_all_blkgs(struct blkcg *blkcg)
 			spin_lock_irq(&blkcg->lock);
 		}
 	}
+
 	spin_unlock_irq(&blkcg->lock);
+
+	wb_blkcg_offline(blkcg);
 }
 
 static void blkcg_css_free(struct cgroup_subsys_state *css)
@@ -1125,8 +1084,6 @@ static void blkcg_css_free(struct cgroup_subsys_state *css)
 	struct blkcg *blkcg = css_to_blkcg(css);
 	int i;
 
-	blkcg_destroy_all_blkgs(blkcg);
-
 	mutex_lock(&blkcg_pol_mutex);
 
 	list_del(&blkcg->all_blkcgs_node);
@@ -1480,11 +1437,8 @@ void blkcg_deactivate_policy(struct request_queue *q,
 
 	list_for_each_entry(blkg, &q->blkg_list, q_node) {
 		if (blkg->pd[pol->plid]) {
-			if (!blkg->pd[pol->plid]->offline &&
-			    pol->pd_offline_fn) {
+			if (pol->pd_offline_fn)
 				pol->pd_offline_fn(blkg->pd[pol->plid]);
-				blkg->pd[pol->plid]->offline = true;
-			}
 			pol->pd_free_fn(blkg->pd[pol->plid]);
 			blkg->pd[pol->plid] = NULL;
 		}
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 34aec30e06c73..1615cdd4c7979 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -89,7 +89,6 @@ struct blkg_policy_data {
 	/* the blkg and policy id this per-policy data belongs to */
 	struct blkcg_gq			*blkg;
 	int				plid;
-	bool				offline;
 };
 
 /*

From 59b57717fff8b562825d9d25e0180ad7e8048ca9 Mon Sep 17 00:00:00 2001
From: "Dennis Zhou (Facebook)" <dennisszhou@gmail.com>
Date: Fri, 31 Aug 2018 16:22:43 -0400
Subject: [PATCH 112/257] blkcg: delay blkg destruction until after writeback
 has finished

Currently, blkcg destruction relies on a sequence of events:
  1. Destruction starts. blkcg_css_offline() is called and blkgs
     release their reference to the blkcg. This immediately destroys
     the cgwbs (writeback).
  2. With blkgs giving up their reference, the blkcg ref count should
     become zero and eventually call blkcg_css_free() which finally
     frees the blkcg.

Jiufei Xue reported that there is a race between blkcg_bio_issue_check()
and cgroup_rmdir(). To remedy this, blkg destruction becomes contingent
on the completion of all writeback associated with the blkcg. A count of
the number of cgwbs is maintained and once that goes to zero, blkg
destruction can follow. This should prevent premature blkg destruction
related to writeback.

The new process for blkcg cleanup is as follows:
  1. Destruction starts. blkcg_css_offline() is called which offlines
     writeback. Blkg destruction is delayed on the cgwb_refcnt count to
     avoid punting potentially large amounts of outstanding writeback
     to root while maintaining any ongoing policies. Here, the base
     cgwb_refcnt is put back.
  2. When the cgwb_refcnt becomes zero, blkcg_destroy_blkgs() is called
     and handles destruction of blkgs. This is where the css reference
     held by each blkg is released.
  3. Once the blkcg ref count goes to zero, blkcg_css_free() is called.
     This finally frees the blkg.

It seems in the past blk-throttle didn't do the most understandable
things with taking data from a blkg while associating with current. So,
the simplification and unification of what blk-throttle is doing caused
this.

Fixes: 08e18eab0c579 ("block: add bi_blkg to the bio for cgroups")
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Cc: Jiufei Xue <jiufei.xue@linux.alibaba.com>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c         | 53 ++++++++++++++++++++++++++++++++------
 include/linux/blk-cgroup.h | 44 +++++++++++++++++++++++++++++++
 mm/backing-dev.c           |  5 ++++
 3 files changed, 94 insertions(+), 8 deletions(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 2998e4f095d1b..c19f9078da1ed 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1042,21 +1042,59 @@ static struct cftype blkcg_legacy_files[] = {
 	{ }	/* terminate */
 };
 
+/*
+ * blkcg destruction is a three-stage process.
+ *
+ * 1. Destruction starts.  The blkcg_css_offline() callback is invoked
+ *    which offlines writeback.  Here we tie the next stage of blkg destruction
+ *    to the completion of writeback associated with the blkcg.  This lets us
+ *    avoid punting potentially large amounts of outstanding writeback to root
+ *    while maintaining any ongoing policies.  The next stage is triggered when
+ *    the nr_cgwbs count goes to zero.
+ *
+ * 2. When the nr_cgwbs count goes to zero, blkcg_destroy_blkgs() is called
+ *    and handles the destruction of blkgs.  Here the css reference held by
+ *    the blkg is put back eventually allowing blkcg_css_free() to be called.
+ *    This work may occur in cgwb_release_workfn() on the cgwb_release
+ *    workqueue.  Any submitted ios that fail to get the blkg ref will be
+ *    punted to the root_blkg.
+ *
+ * 3. Once the blkcg ref count goes to zero, blkcg_css_free() is called.
+ *    This finally frees the blkcg.
+ */
+
 /**
  * blkcg_css_offline - cgroup css_offline callback
  * @css: css of interest
  *
- * This function is called when @css is about to go away and responsible
- * for shooting down all blkgs associated with @css.  blkgs should be
- * removed while holding both q and blkcg locks.  As blkcg lock is nested
- * inside q lock, this function performs reverse double lock dancing.
- *
- * This is the blkcg counterpart of ioc_release_fn().
+ * This function is called when @css is about to go away.  Here the cgwbs are
+ * offlined first and only once writeback associated with the blkcg has
+ * finished do we start step 2 (see above).
  */
 static void blkcg_css_offline(struct cgroup_subsys_state *css)
 {
 	struct blkcg *blkcg = css_to_blkcg(css);
 
+	/* this prevents anyone from attaching or migrating to this blkcg */
+	wb_blkcg_offline(blkcg);
+
+	/* put the base cgwb reference allowing step 2 to be triggered */
+	blkcg_cgwb_put(blkcg);
+}
+
+/**
+ * blkcg_destroy_blkgs - responsible for shooting down blkgs
+ * @blkcg: blkcg of interest
+ *
+ * blkgs should be removed while holding both q and blkcg locks.  As blkcg lock
+ * is nested inside q lock, this function performs reverse double lock dancing.
+ * Destroying the blkgs releases the reference held on the blkcg's css allowing
+ * blkcg_css_free to eventually be called.
+ *
+ * This is the blkcg counterpart of ioc_release_fn().
+ */
+void blkcg_destroy_blkgs(struct blkcg *blkcg)
+{
 	spin_lock_irq(&blkcg->lock);
 
 	while (!hlist_empty(&blkcg->blkg_list)) {
@@ -1075,8 +1113,6 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css)
 	}
 
 	spin_unlock_irq(&blkcg->lock);
-
-	wb_blkcg_offline(blkcg);
 }
 
 static void blkcg_css_free(struct cgroup_subsys_state *css)
@@ -1146,6 +1182,7 @@ blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
 	INIT_HLIST_HEAD(&blkcg->blkg_list);
 #ifdef CONFIG_CGROUP_WRITEBACK
 	INIT_LIST_HEAD(&blkcg->cgwb_list);
+	refcount_set(&blkcg->cgwb_refcnt, 1);
 #endif
 	list_add_tail(&blkcg->all_blkcgs_node, &all_blkcgs);
 
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 1615cdd4c7979..6d766a19f2bbb 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -56,6 +56,7 @@ struct blkcg {
 	struct list_head		all_blkcgs_node;
 #ifdef CONFIG_CGROUP_WRITEBACK
 	struct list_head		cgwb_list;
+	refcount_t			cgwb_refcnt;
 #endif
 };
 
@@ -386,6 +387,49 @@ static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
 	return cpd ? cpd->blkcg : NULL;
 }
 
+extern void blkcg_destroy_blkgs(struct blkcg *blkcg);
+
+#ifdef CONFIG_CGROUP_WRITEBACK
+
+/**
+ * blkcg_cgwb_get - get a reference for blkcg->cgwb_list
+ * @blkcg: blkcg of interest
+ *
+ * This is used to track the number of active wb's related to a blkcg.
+ */
+static inline void blkcg_cgwb_get(struct blkcg *blkcg)
+{
+	refcount_inc(&blkcg->cgwb_refcnt);
+}
+
+/**
+ * blkcg_cgwb_put - put a reference for @blkcg->cgwb_list
+ * @blkcg: blkcg of interest
+ *
+ * This is used to track the number of active wb's related to a blkcg.
+ * When this count goes to zero, all active wb has finished so the
+ * blkcg can continue destruction by calling blkcg_destroy_blkgs().
+ * This work may occur in cgwb_release_workfn() on the cgwb_release
+ * workqueue.
+ */
+static inline void blkcg_cgwb_put(struct blkcg *blkcg)
+{
+	if (refcount_dec_and_test(&blkcg->cgwb_refcnt))
+		blkcg_destroy_blkgs(blkcg);
+}
+
+#else
+
+static inline void blkcg_cgwb_get(struct blkcg *blkcg) { }
+
+static inline void blkcg_cgwb_put(struct blkcg *blkcg)
+{
+	/* wb isn't being accounted, so trigger destruction right away */
+	blkcg_destroy_blkgs(blkcg);
+}
+
+#endif
+
 /**
  * blkg_path - format cgroup path of blkg
  * @blkg: blkg of interest
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index f5981e9d6ae2e..8a8bb8796c6c4 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -491,6 +491,7 @@ static void cgwb_release_workfn(struct work_struct *work)
 {
 	struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
 						release_work);
+	struct blkcg *blkcg = css_to_blkcg(wb->blkcg_css);
 
 	mutex_lock(&wb->bdi->cgwb_release_mutex);
 	wb_shutdown(wb);
@@ -499,6 +500,9 @@ static void cgwb_release_workfn(struct work_struct *work)
 	css_put(wb->blkcg_css);
 	mutex_unlock(&wb->bdi->cgwb_release_mutex);
 
+	/* triggers blkg destruction if cgwb_refcnt becomes zero */
+	blkcg_cgwb_put(blkcg);
+
 	fprop_local_destroy_percpu(&wb->memcg_completions);
 	percpu_ref_exit(&wb->refcnt);
 	wb_exit(wb);
@@ -597,6 +601,7 @@ static int cgwb_create(struct backing_dev_info *bdi,
 			list_add_tail_rcu(&wb->bdi_node, &bdi->wb_list);
 			list_add(&wb->memcg_node, memcg_cgwb_list);
 			list_add(&wb->blkcg_node, blkcg_cgwb_list);
+			blkcg_cgwb_get(blkcg);
 			css_get(memcg_css);
 			css_get(blkcg_css);
 		}

From 3111885015b458c97b4cf272e2a87f1d6f0ed06a Mon Sep 17 00:00:00 2001
From: "Dennis Zhou (Facebook)" <dennisszhou@gmail.com>
Date: Fri, 31 Aug 2018 16:22:44 -0400
Subject: [PATCH 113/257] blkcg: use tryget logic when associating a blkg with
 a bio

There is a very small change a bio gets caught up in a really
unfortunate race between a task migration, cgroup exiting, and itself
trying to associate with a blkg. This is due to css offlining being
performed after the css->refcnt is killed which triggers removal of
blkgs that reach their blkg->refcnt of 0.

To avoid this, association with a blkg should use tryget and fallback to
using the root_blkg.

Fixes: 08e18eab0c579 ("block: add bi_blkg to the bio for cgroups")
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Cc: Jiufei Xue <jiufei.xue@linux.alibaba.com>
Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c          | 3 ++-
 block/blk-throttle.c | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index b12966e415d3f..8c680a776171c 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -2015,7 +2015,8 @@ int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
 {
 	if (unlikely(bio->bi_blkg))
 		return -EBUSY;
-	blkg_get(blkg);
+	if (!blkg_try_get(blkg))
+		return -ENODEV;
 	bio->bi_blkg = blkg;
 	return 0;
 }
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index a3eede00d3020..01d0620a4e4a5 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2129,8 +2129,9 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
 static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
 {
 #ifdef CONFIG_BLK_DEV_THROTTLING_LOW
-	if (bio->bi_css)
-		bio_associate_blkg(bio, tg_to_blkg(tg));
+	/* fallback to root_blkg if we fail to get a blkg ref */
+	if (bio->bi_css && (bio_associate_blkg(bio, tg_to_blkg(tg)) == -ENODEV))
+		bio_associate_blkg(bio, bio->bi_disk->queue->root_blkg);
 	bio_issue_init(&bio->bi_issue, bio_sectors(bio));
 #endif
 }

From e254de6bcf3f5b6e78a92ac95fb91acef8adfe1a Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@fb.com>
Date: Wed, 29 Aug 2018 11:05:42 -0700
Subject: [PATCH 114/257] md/raid5-cache: disable reshape completely

We don't support reshape yet if an array supports log device. Previously we
determine the fact by checking ->log. However, ->log could be NULL after a log
device is removed, but the array is still marked to support log device. Don't
allow reshape in this case too. User can disable log device support by setting
'consistency_policy' to 'resync' then do reshape.

Reported-by: Xiao Ni <xni@redhat.com>
Tested-by: Xiao Ni <xni@redhat.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid5-log.h | 5 +++++
 drivers/md/raid5.c     | 6 +++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/md/raid5-log.h b/drivers/md/raid5-log.h
index a001808a2b77d..bfb8114070614 100644
--- a/drivers/md/raid5-log.h
+++ b/drivers/md/raid5-log.h
@@ -46,6 +46,11 @@ extern int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add);
 extern void ppl_quiesce(struct r5conf *conf, int quiesce);
 extern int ppl_handle_flush_request(struct r5l_log *log, struct bio *bio);
 
+static inline bool raid5_has_log(struct r5conf *conf)
+{
+	return test_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
+}
+
 static inline bool raid5_has_ppl(struct r5conf *conf)
 {
 	return test_bit(MD_HAS_PPL, &conf->mddev->flags);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 4ce0d7502fad8..e4e98f47865de 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -733,7 +733,7 @@ static bool stripe_can_batch(struct stripe_head *sh)
 {
 	struct r5conf *conf = sh->raid_conf;
 
-	if (conf->log || raid5_has_ppl(conf))
+	if (raid5_has_log(conf) || raid5_has_ppl(conf))
 		return false;
 	return test_bit(STRIPE_BATCH_READY, &sh->state) &&
 		!test_bit(STRIPE_BITMAP_PENDING, &sh->state) &&
@@ -7737,7 +7737,7 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors)
 	sector_t newsize;
 	struct r5conf *conf = mddev->private;
 
-	if (conf->log || raid5_has_ppl(conf))
+	if (raid5_has_log(conf) || raid5_has_ppl(conf))
 		return -EINVAL;
 	sectors &= ~((sector_t)conf->chunk_sectors - 1);
 	newsize = raid5_size(mddev, sectors, mddev->raid_disks);
@@ -7788,7 +7788,7 @@ static int check_reshape(struct mddev *mddev)
 {
 	struct r5conf *conf = mddev->private;
 
-	if (conf->log || raid5_has_ppl(conf))
+	if (raid5_has_log(conf) || raid5_has_ppl(conf))
 		return -EINVAL;
 	if (mddev->delta_disks == 0 &&
 	    mddev->new_layout == mddev->layout &&

From 1d0ffd264204eba1861865560f1f7f7a92919384 Mon Sep 17 00:00:00 2001
From: Xiao Ni <xni@redhat.com>
Date: Thu, 30 Aug 2018 15:57:09 +0800
Subject: [PATCH 115/257] RAID10 BUG_ON in raise_barrier when force is true and
 conf->barrier is 0

In raid10 reshape_request it gets max_sectors in read_balance. If the underlayer disks
have bad blocks, the max_sectors is less than last. It will call goto read_more many
times. It calls raise_barrier(conf, sectors_done != 0) every time. In this condition
sectors_done is not 0. So the value passed to the argument force of raise_barrier is
true.

In raise_barrier it checks conf->barrier when force is true. If force is true and
conf->barrier is 0, it panic. In this case reshape_request submits bio to under layer
disks. And in the callback function of the bio it calls lower_barrier. If the bio
finishes before calling raise_barrier again, it can trigger the BUG_ON.

Add one pair of raise_barrier/lower_barrier to fix this bug.

Signed-off-by: Xiao Ni <xni@redhat.com>
Suggested-by: Neil Brown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/raid10.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 9818980494914..d6f7978b4449e 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -4529,11 +4529,12 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
 		allow_barrier(conf);
 	}
 
+	raise_barrier(conf, 0);
 read_more:
 	/* Now schedule reads for blocks from sector_nr to last */
 	r10_bio = raid10_alloc_init_r10buf(conf);
 	r10_bio->state = 0;
-	raise_barrier(conf, sectors_done != 0);
+	raise_barrier(conf, 1);
 	atomic_set(&r10_bio->remaining, 0);
 	r10_bio->mddev = mddev;
 	r10_bio->sector = sector_nr;
@@ -4629,6 +4630,8 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
 	if (sector_nr <= last)
 		goto read_more;
 
+	lower_barrier(conf);
+
 	/* Now that we have done the whole section we can
 	 * update reshape_progress
 	 */

From 41a95041126522a921fb73df22cbdd520dfdebad Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <gqjiang@suse.com>
Date: Fri, 31 Aug 2018 10:05:57 +0800
Subject: [PATCH 116/257] md-cluster: release RESYNC lock after the last resync
 message

All the RESYNC messages are sent with resync lock held, the only
exception is resync_finish which releases resync_lockres before
send the last resync message, this should be changed as well.
Otherwise, we can see deadlock issue as follows:

clustermd2-gqjiang2:~ # cat /proc/mdstat
Personalities : [raid10] [raid1]
md0 : active raid1 sdg[0] sdf[1]
      134144 blocks super 1.2 [2/2] [UU]
      [===================>.]  resync = 99.6% (134144/134144) finish=0.0min speed=26K/sec
      bitmap: 1/1 pages [4KB], 65536KB chunk

unused devices: <none>
clustermd2-gqjiang2:~ # ps aux|grep md|grep D
root     20497  0.0  0.0      0     0 ?        D    16:00   0:00 [md0_raid1]
clustermd2-gqjiang2:~ # cat /proc/20497/stack
[<ffffffffc05ff51e>] dlm_lock_sync+0x8e/0xc0 [md_cluster]
[<ffffffffc05ff7e8>] __sendmsg+0x98/0x130 [md_cluster]
[<ffffffffc05ff900>] sendmsg+0x20/0x30 [md_cluster]
[<ffffffffc05ffc35>] resync_info_update+0xb5/0xc0 [md_cluster]
[<ffffffffc0593e84>] md_reap_sync_thread+0x134/0x170 [md_mod]
[<ffffffffc059514c>] md_check_recovery+0x28c/0x510 [md_mod]
[<ffffffffc060c882>] raid1d+0x42/0x800 [raid1]
[<ffffffffc058ab61>] md_thread+0x121/0x150 [md_mod]
[<ffffffff9a0a5b3f>] kthread+0xff/0x140
[<ffffffff9a800235>] ret_from_fork+0x35/0x40
[<ffffffffffffffff>] 0xffffffffffffffff

clustermd-gqjiang1:~ # ps aux|grep md|grep D
root     20531  0.0  0.0      0     0 ?        D    16:00   0:00 [md0_raid1]
root     20537  0.0  0.0      0     0 ?        D    16:00   0:00 [md0_cluster_rec]
root     20676  0.0  0.0      0     0 ?        D    16:01   0:00 [md0_resync]
clustermd-gqjiang1:~ # cat /proc/mdstat
Personalities : [raid10] [raid1]
md0 : active raid1 sdf[1] sdg[0]
      134144 blocks super 1.2 [2/2] [UU]
      [===================>.]  resync = 97.3% (131072/134144) finish=8076.8min speed=0K/sec
      bitmap: 1/1 pages [4KB], 65536KB chunk

unused devices: <none>
clustermd-gqjiang1:~ # cat /proc/20531/stack
[<ffffffffc080974d>] metadata_update_start+0xcd/0xd0 [md_cluster]
[<ffffffffc079c897>] md_update_sb.part.61+0x97/0x820 [md_mod]
[<ffffffffc079f15b>] md_check_recovery+0x29b/0x510 [md_mod]
[<ffffffffc0816882>] raid1d+0x42/0x800 [raid1]
[<ffffffffc0794b61>] md_thread+0x121/0x150 [md_mod]
[<ffffffff9e0a5b3f>] kthread+0xff/0x140
[<ffffffff9e800235>] ret_from_fork+0x35/0x40
[<ffffffffffffffff>] 0xffffffffffffffff
clustermd-gqjiang1:~ # cat /proc/20537/stack
[<ffffffffc0813222>] freeze_array+0xf2/0x140 [raid1]
[<ffffffffc080a56e>] recv_daemon+0x41e/0x580 [md_cluster]
[<ffffffffc0794b61>] md_thread+0x121/0x150 [md_mod]
[<ffffffff9e0a5b3f>] kthread+0xff/0x140
[<ffffffff9e800235>] ret_from_fork+0x35/0x40
[<ffffffffffffffff>] 0xffffffffffffffff
clustermd-gqjiang1:~ # cat /proc/20676/stack
[<ffffffffc080951e>] dlm_lock_sync+0x8e/0xc0 [md_cluster]
[<ffffffffc080957f>] lock_token+0x2f/0xa0 [md_cluster]
[<ffffffffc0809622>] lock_comm+0x32/0x90 [md_cluster]
[<ffffffffc08098f5>] sendmsg+0x15/0x30 [md_cluster]
[<ffffffffc0809c0a>] resync_info_update+0x8a/0xc0 [md_cluster]
[<ffffffffc08130ba>] raid1_sync_request+0xa9a/0xb10 [raid1]
[<ffffffffc079b8ea>] md_do_sync+0xbaa/0xf90 [md_mod]
[<ffffffffc0794b61>] md_thread+0x121/0x150 [md_mod]
[<ffffffff9e0a5b3f>] kthread+0xff/0x140
[<ffffffff9e800235>] ret_from_fork+0x35/0x40
[<ffffffffffffffff>] 0xffffffffffffffff

Reviewed-by: NeilBrown <neilb@suse.com>
Signed-off-by: Guoqing Jiang <gqjiang@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
---
 drivers/md/md-cluster.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 94329e03001ec..0b2af6e74fc37 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -1276,18 +1276,18 @@ static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
 static int resync_finish(struct mddev *mddev)
 {
 	struct md_cluster_info *cinfo = mddev->cluster_info;
+	int ret = 0;
 
 	clear_bit(MD_RESYNCING_REMOTE, &mddev->recovery);
-	dlm_unlock_sync(cinfo->resync_lockres);
 
 	/*
 	 * If resync thread is interrupted so we can't say resync is finished,
 	 * another node will launch resync thread to continue.
 	 */
-	if (test_bit(MD_CLOSING, &mddev->flags))
-		return 0;
-	else
-		return resync_info_update(mddev, 0, 0);
+	if (!test_bit(MD_CLOSING, &mddev->flags))
+		ret = resync_info_update(mddev, 0, 0);
+	dlm_unlock_sync(cinfo->resync_lockres);
+	return ret;
 }
 
 static int area_resyncing(struct mddev *mddev, int direction,

From 3a7ad0634f0986d807772ba74f66f7c3a73612e5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 29 Aug 2018 11:50:12 -0700
Subject: [PATCH 117/257] Revert "packet: switch kvzalloc to allocate memory"

This reverts commit 71e41286203c017d24f041a7cd71abea7ca7b1e0.

mmap()/munmap() can not be backed by kmalloced pages :

We fault in :

    VM_BUG_ON_PAGE(PageSlab(page), page);

    unmap_single_vma+0x8a/0x110
    unmap_vmas+0x4b/0x90
    unmap_region+0xc9/0x140
    do_munmap+0x274/0x360
    vm_munmap+0x81/0xc0
    SyS_munmap+0x2b/0x40
    do_syscall_64+0x13e/0x1c0
    entry_SYSCALL_64_after_hwframe+0x42/0xb7

Fixes: 71e41286203c ("packet: switch kvzalloc to allocate memory")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: John Sperbeck <jsperbeck@google.com>
Bisected-by: John Sperbeck <jsperbeck@google.com>
Cc: Zhang Yu <zhangyu31@baidu.com>
Cc: Li RongQing <lirongqing@baidu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/packet/af_packet.c | 44 +++++++++++++++++++++++++++++-------------
 net/packet/internal.h  |  1 +
 2 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 5610061e7f2e0..75c92a87e7b24 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4137,36 +4137,52 @@ static const struct vm_operations_struct packet_mmap_ops = {
 	.close	=	packet_mm_close,
 };
 
-static void free_pg_vec(struct pgv *pg_vec, unsigned int len)
+static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
+			unsigned int len)
 {
 	int i;
 
 	for (i = 0; i < len; i++) {
 		if (likely(pg_vec[i].buffer)) {
-			kvfree(pg_vec[i].buffer);
+			if (is_vmalloc_addr(pg_vec[i].buffer))
+				vfree(pg_vec[i].buffer);
+			else
+				free_pages((unsigned long)pg_vec[i].buffer,
+					   order);
 			pg_vec[i].buffer = NULL;
 		}
 	}
 	kfree(pg_vec);
 }
 
-static char *alloc_one_pg_vec_page(unsigned long size)
+static char *alloc_one_pg_vec_page(unsigned long order)
 {
 	char *buffer;
+	gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
+			  __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
 
-	buffer = kvzalloc(size, GFP_KERNEL);
+	buffer = (char *) __get_free_pages(gfp_flags, order);
 	if (buffer)
 		return buffer;
 
-	buffer = kvzalloc(size, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+	/* __get_free_pages failed, fall back to vmalloc */
+	buffer = vzalloc(array_size((1 << order), PAGE_SIZE));
+	if (buffer)
+		return buffer;
 
-	return buffer;
+	/* vmalloc failed, lets dig into swap here */
+	gfp_flags &= ~__GFP_NORETRY;
+	buffer = (char *) __get_free_pages(gfp_flags, order);
+	if (buffer)
+		return buffer;
+
+	/* complete and utter failure */
+	return NULL;
 }
 
-static struct pgv *alloc_pg_vec(struct tpacket_req *req)
+static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
 {
 	unsigned int block_nr = req->tp_block_nr;
-	unsigned long size = req->tp_block_size;
 	struct pgv *pg_vec;
 	int i;
 
@@ -4175,7 +4191,7 @@ static struct pgv *alloc_pg_vec(struct tpacket_req *req)
 		goto out;
 
 	for (i = 0; i < block_nr; i++) {
-		pg_vec[i].buffer = alloc_one_pg_vec_page(size);
+		pg_vec[i].buffer = alloc_one_pg_vec_page(order);
 		if (unlikely(!pg_vec[i].buffer))
 			goto out_free_pgvec;
 	}
@@ -4184,7 +4200,7 @@ static struct pgv *alloc_pg_vec(struct tpacket_req *req)
 	return pg_vec;
 
 out_free_pgvec:
-	free_pg_vec(pg_vec, block_nr);
+	free_pg_vec(pg_vec, order, block_nr);
 	pg_vec = NULL;
 	goto out;
 }
@@ -4194,9 +4210,9 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 {
 	struct pgv *pg_vec = NULL;
 	struct packet_sock *po = pkt_sk(sk);
+	int was_running, order = 0;
 	struct packet_ring_buffer *rb;
 	struct sk_buff_head *rb_queue;
-	int was_running;
 	__be16 num;
 	int err = -EINVAL;
 	/* Added to avoid minimal code churn */
@@ -4258,7 +4274,8 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 			goto out;
 
 		err = -ENOMEM;
-		pg_vec = alloc_pg_vec(req);
+		order = get_order(req->tp_block_size);
+		pg_vec = alloc_pg_vec(req, order);
 		if (unlikely(!pg_vec))
 			goto out;
 		switch (po->tp_version) {
@@ -4312,6 +4329,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 		rb->frame_size = req->tp_frame_size;
 		spin_unlock_bh(&rb_queue->lock);
 
+		swap(rb->pg_vec_order, order);
 		swap(rb->pg_vec_len, req->tp_block_nr);
 
 		rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
@@ -4337,7 +4355,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
 	}
 
 	if (pg_vec)
-		free_pg_vec(pg_vec, req->tp_block_nr);
+		free_pg_vec(pg_vec, order, req->tp_block_nr);
 out:
 	return err;
 }
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 8f50036f62f05..3bb7c5fb3bff2 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -64,6 +64,7 @@ struct packet_ring_buffer {
 	unsigned int		frame_size;
 	unsigned int		frame_max;
 
+	unsigned int		pg_vec_order;
 	unsigned int		pg_vec_pages;
 	unsigned int		pg_vec_len;
 

From 9ad716b95fd6c6be46a4f2d5936e514b5bcd744d Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <jakub.kicinski@netronome.com>
Date: Wed, 29 Aug 2018 12:46:08 -0700
Subject: [PATCH 118/257] nfp: wait for posted reconfigs when disabling the
 device

To avoid leaking a running timer we need to wait for the
posted reconfigs after netdev is unregistered.  In common
case the process of deinitializing the device will perform
synchronous reconfigs which wait for posted requests, but
especially with VXLAN ports being actively added and removed
there can be a race condition leaving a timer running after
adapter structure is freed leading to a crash.

Add an explicit flush after deregistering and for a good
measure a warning to check if timer is running just before
structures are freed.

Fixes: 3d780b926a12 ("nfp: add async reconfiguration mechanism")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Reviewed-by: Dirk van der Merwe <dirk.vandermerwe@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/netronome/nfp/nfp_net_common.c   | 48 +++++++++++++------
 1 file changed, 33 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index a8b9fbab5f733..253bdaef15055 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -229,29 +229,16 @@ static void nfp_net_reconfig_post(struct nfp_net *nn, u32 update)
 	spin_unlock_bh(&nn->reconfig_lock);
 }
 
-/**
- * nfp_net_reconfig() - Reconfigure the firmware
- * @nn:      NFP Net device to reconfigure
- * @update:  The value for the update field in the BAR config
- *
- * Write the update word to the BAR and ping the reconfig queue.  The
- * poll until the firmware has acknowledged the update by zeroing the
- * update word.
- *
- * Return: Negative errno on error, 0 on success
- */
-int nfp_net_reconfig(struct nfp_net *nn, u32 update)
+static void nfp_net_reconfig_sync_enter(struct nfp_net *nn)
 {
 	bool cancelled_timer = false;
 	u32 pre_posted_requests;
-	int ret;
 
 	spin_lock_bh(&nn->reconfig_lock);
 
 	nn->reconfig_sync_present = true;
 
 	if (nn->reconfig_timer_active) {
-		del_timer(&nn->reconfig_timer);
 		nn->reconfig_timer_active = false;
 		cancelled_timer = true;
 	}
@@ -260,14 +247,43 @@ int nfp_net_reconfig(struct nfp_net *nn, u32 update)
 
 	spin_unlock_bh(&nn->reconfig_lock);
 
-	if (cancelled_timer)
+	if (cancelled_timer) {
+		del_timer_sync(&nn->reconfig_timer);
 		nfp_net_reconfig_wait(nn, nn->reconfig_timer.expires);
+	}
 
 	/* Run the posted reconfigs which were issued before we started */
 	if (pre_posted_requests) {
 		nfp_net_reconfig_start(nn, pre_posted_requests);
 		nfp_net_reconfig_wait(nn, jiffies + HZ * NFP_NET_POLL_TIMEOUT);
 	}
+}
+
+static void nfp_net_reconfig_wait_posted(struct nfp_net *nn)
+{
+	nfp_net_reconfig_sync_enter(nn);
+
+	spin_lock_bh(&nn->reconfig_lock);
+	nn->reconfig_sync_present = false;
+	spin_unlock_bh(&nn->reconfig_lock);
+}
+
+/**
+ * nfp_net_reconfig() - Reconfigure the firmware
+ * @nn:      NFP Net device to reconfigure
+ * @update:  The value for the update field in the BAR config
+ *
+ * Write the update word to the BAR and ping the reconfig queue.  The
+ * poll until the firmware has acknowledged the update by zeroing the
+ * update word.
+ *
+ * Return: Negative errno on error, 0 on success
+ */
+int nfp_net_reconfig(struct nfp_net *nn, u32 update)
+{
+	int ret;
+
+	nfp_net_reconfig_sync_enter(nn);
 
 	nfp_net_reconfig_start(nn, update);
 	ret = nfp_net_reconfig_wait(nn, jiffies + HZ * NFP_NET_POLL_TIMEOUT);
@@ -3633,6 +3649,7 @@ struct nfp_net *nfp_net_alloc(struct pci_dev *pdev, bool needs_netdev,
  */
 void nfp_net_free(struct nfp_net *nn)
 {
+	WARN_ON(timer_pending(&nn->reconfig_timer) || nn->reconfig_posted);
 	if (nn->dp.netdev)
 		free_netdev(nn->dp.netdev);
 	else
@@ -3920,4 +3937,5 @@ void nfp_net_clean(struct nfp_net *nn)
 		return;
 
 	unregister_netdev(nn->dp.netdev);
+	nfp_net_reconfig_wait_posted(nn);
 }

From e04e7a7bbd4bbabef4e1a58367e5fc9b2edc3b10 Mon Sep 17 00:00:00 2001
From: Dexuan Cui <decui@microsoft.com>
Date: Thu, 30 Aug 2018 05:42:13 +0000
Subject: [PATCH 119/257] hv_netvsc: Fix a deadlock by getting rtnl lock
 earlier in netvsc_probe()

This patch fixes the race between netvsc_probe() and
rndis_set_subchannel(), which can cause a deadlock.

These are the related 3 paths which show the deadlock:

path #1:
    Workqueue: hv_vmbus_con vmbus_onmessage_work [hv_vmbus]
    Call Trace:
     schedule
     schedule_preempt_disabled
     __mutex_lock
     __device_attach
     bus_probe_device
     device_add
     vmbus_device_register
     vmbus_onoffer
     vmbus_onmessage_work
     process_one_work
     worker_thread
     kthread
     ret_from_fork

path #2:
    schedule
     schedule_preempt_disabled
     __mutex_lock
     netvsc_probe
     vmbus_probe
     really_probe
     __driver_attach
     bus_for_each_dev
     driver_attach_async
     async_run_entry_fn
     process_one_work
     worker_thread
     kthread
     ret_from_fork

path #3:
    Workqueue: events netvsc_subchan_work [hv_netvsc]
    Call Trace:
     schedule
     rndis_set_subchannel
     netvsc_subchan_work
     process_one_work
     worker_thread
     kthread
     ret_from_fork

Before path #1 finishes, path #2 can start to run, because just before
the "bus_probe_device(dev);" in device_add() in path #1, there is a line
"object_uevent(&dev->kobj, KOBJ_ADD);", so systemd-udevd can
immediately try to load hv_netvsc and hence path #2 can start to run.

Next, path #2 offloads the subchannal's initialization to a workqueue,
i.e. path #3, so we can end up in a deadlock situation like this:

Path #2 gets the device lock, and is trying to get the rtnl lock;
Path #3 gets the rtnl lock and is waiting for all the subchannel messages
to be processed;
Path #1 is trying to get the device lock, but since #2 is not releasing
the device lock, path #1 has to sleep; since the VMBus messages are
processed one by one, this means the sub-channel messages can't be
procedded, so #3 has to sleep with the rtnl lock held, and finally #2
has to sleep... Now all the 3 paths are sleeping and we hit the deadlock.

With the patch, we can make sure #2 gets both the device lock and the
rtnl lock together, gets its job done, and releases the locks, so #1
and #3 will not be blocked for ever.

Fixes: 8195b1396ec8 ("hv_netvsc: fix deadlock on hotplug")
Signed-off-by: Dexuan Cui <decui@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: K. Y. Srinivasan <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/hyperv/netvsc_drv.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 1121a1ec407cd..70921bbe0e28b 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -2206,6 +2206,16 @@ static int netvsc_probe(struct hv_device *dev,
 
 	memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);
 
+	/* We must get rtnl lock before scheduling nvdev->subchan_work,
+	 * otherwise netvsc_subchan_work() can get rtnl lock first and wait
+	 * all subchannels to show up, but that may not happen because
+	 * netvsc_probe() can't get rtnl lock and as a result vmbus_onoffer()
+	 * -> ... -> device_add() -> ... -> __device_attach() can't get
+	 * the device lock, so all the subchannels can't be processed --
+	 * finally netvsc_subchan_work() hangs for ever.
+	 */
+	rtnl_lock();
+
 	if (nvdev->num_chn > 1)
 		schedule_work(&nvdev->subchan_work);
 
@@ -2224,7 +2234,6 @@ static int netvsc_probe(struct hv_device *dev,
 	else
 		net->max_mtu = ETH_DATA_LEN;
 
-	rtnl_lock();
 	ret = register_netdevice(net);
 	if (ret != 0) {
 		pr_err("Unable to register netdev.\n");

From b0e0b0abbd5e52568e3848b0ba54a9efa3a11547 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Thu, 30 Aug 2018 13:30:03 +0200
Subject: [PATCH 120/257] net/rds: RDS is not Radio Data System

Getting prompt "The RDS Protocol" (RDS) is not too helpful, and it is
easily confused with Radio Data System (which we may want to support
in kernel, too).

Signed-off-by: Pavel Machek <pavel@ucw.cz>
Acked-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Acked-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/rds/Kconfig b/net/rds/Kconfig
index 01b3bd6a3708d..b9092111bc459 100644
--- a/net/rds/Kconfig
+++ b/net/rds/Kconfig
@@ -1,6 +1,6 @@
 
 config RDS
-	tristate "The RDS Protocol"
+	tristate "The Reliable Datagram Sockets Protocol"
 	depends on INET
 	---help---
 	  The RDS (Reliable Datagram Sockets) protocol provides reliable,

From 63cc357f7bba6729869565a12df08441a5995d9a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 30 Aug 2018 14:24:29 +0200
Subject: [PATCH 121/257] tcp: do not restart timewait timer on rst reception

RFC 1337 says:
 ''Ignore RST segments in TIME-WAIT state.
   If the 2 minute MSL is enforced, this fix avoids all three hazards.''

So with net.ipv4.tcp_rfc1337=1, expected behaviour is to have TIME-WAIT sk
expire rather than removing it instantly when a reset is received.

However, Linux will also re-start the TIME-WAIT timer.

This causes connect to fail when tying to re-use ports or very long
delays (until syn retry interval exceeds MSL).

packetdrill test case:
// Demonstrate bogus rearming of TIME-WAIT timer in rfc1337 mode.
`sysctl net.ipv4.tcp_rfc1337=1`

0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
0.000 bind(3, ..., ...) = 0
0.000 listen(3, 1) = 0

0.100 < S 0:0(0) win 29200 <mss 1460,nop,nop,sackOK,nop,wscale 7>
0.100 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 7>
0.200 < . 1:1(0) ack 1 win 257
0.200 accept(3, ..., ...) = 4

// Receive first segment
0.310 < P. 1:1001(1000) ack 1 win 46

// Send one ACK
0.310 > . 1:1(0) ack 1001

// read 1000 byte
0.310 read(4, ..., 1000) = 1000

// Application writes 100 bytes
0.350 write(4, ..., 100) = 100
0.350 > P. 1:101(100) ack 1001

// ACK
0.500 < . 1001:1001(0) ack 101 win 257

// close the connection
0.600 close(4) = 0
0.600 > F. 101:101(0) ack 1001 win 244

// Our side is in FIN_WAIT_1 & waits for ack to fin
0.7 < . 1001:1001(0) ack 102 win 244

// Our side is in FIN_WAIT_2 with no outstanding data.
0.8 < F. 1001:1001(0) ack 102 win 244
0.8 > . 102:102(0) ack 1002 win 244

// Our side is now in TIME_WAIT state, send ack for fin.
0.9 < F. 1002:1002(0) ack 102 win 244
0.9 > . 102:102(0) ack 1002 win 244

// Peer reopens with in-window SYN:
1.000 < S 1000:1000(0) win 9200 <mss 1460,nop,nop,sackOK,nop,wscale 7>

// Therefore, reply with ACK.
1.000 > . 102:102(0) ack 1002 win 244

// Peer sends RST for this ACK.  Normally this RST results
// in tw socket removal, but rfc1337=1 setting prevents this.
1.100 < R 1002:1002(0) win 244

// second syn. Due to rfc1337=1 expect another pure ACK.
31.0 < S 1000:1000(0) win 9200 <mss 1460,nop,nop,sackOK,nop,wscale 7>
31.0 > . 102:102(0) ack 1002 win 244

// .. and another RST from peer.
31.1 < R 1002:1002(0) win 244
31.2 `echo no timer restart;ss -m -e -a -i -n -t -o state TIME-WAIT`

// third syn after one minute.  Time-Wait socket should have expired by now.
63.0 < S 1000:1000(0) win 9200 <mss 1460,nop,nop,sackOK,nop,wscale 7>

// so we expect a syn-ack & 3whs to proceed from here on.
63.0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 7>

Without this patch, 'ss' shows restarts of tw timer and last packet is
thus just another pure ack, more than one minute later.

This restores the original code from commit 283fd6cf0be690a83
("Merge in ANK networking jumbo patch") in netdev-vger-cvs.git .

For some reason the else branch was removed/lost in 1f28b683339f7
("Merge in TCP/UDP optimizations and [..]") and timer restart became
unconditional.

Reported-by: Michal Tesar <mtesar@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_minisocks.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 75ef332a7caf4..12affb7864d98 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -184,8 +184,9 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 				inet_twsk_deschedule_put(tw);
 				return TCP_TW_SUCCESS;
 			}
+		} else {
+			inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
 		}
-		inet_twsk_reschedule(tw, TCP_TIMEWAIT_LEN);
 
 		if (tmp_opt.saw_tstamp) {
 			tcptw->tw_ts_recent	  = tmp_opt.rcv_tsval;

From 902b5417f28d955cdb4898df6ffaab15f56c5cff Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Thu, 30 Aug 2018 16:01:17 +0200
Subject: [PATCH 122/257] selftests: pmtu: maximum MTU for vti4 is 2^16-1-20

Since commit 82612de1c98e ("ip_tunnel: restore binding to ifaces with a
large mtu"), the maximum MTU for vti4 is based on IP_MAX_MTU instead of
the mysterious constant 0xFFF8.  This makes this selftest fail.

Fixes: 82612de1c98e ("ip_tunnel: restore binding to ifaces with a large mtu")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Stefano Brivio <sbrivio@redhat.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index f8cc38afffa2e..0ecf2609b9a4f 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -334,7 +334,7 @@ test_pmtu_vti4_link_add_mtu() {
 	fail=0
 
 	min=68
-	max=$((65528 - 20))
+	max=$((65535 - 20))
 	# Check invalid values first
 	for v in $((min - 1)) $((max + 1)); do
 		${ns_a} ip link add vti4_a mtu ${v} type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 2>/dev/null

From c81c7012e0c769b5704c2b07bd5224965e76fb70 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Thu, 30 Aug 2018 16:01:18 +0200
Subject: [PATCH 123/257] selftests: pmtu: detect correct binary to ping ipv6
 addresses

Some systems don't have the ping6 binary anymore, and use ping for
everything. Detect the absence of ping6 and try to use ping instead.

Fixes: d1f1b9cbf34c ("selftests: net: Introduce first PMTU test")
Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Acked-by: Stefano Brivio <sbrivio@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 0ecf2609b9a4f..32a194e3e07a5 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -46,6 +46,9 @@
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
 
+# Some systems don't have a ping6 binary anymore
+which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
 tests="
 	pmtu_vti6_exception		vti6: PMTU exceptions
 	pmtu_vti4_exception		vti4: PMTU exceptions
@@ -274,7 +277,7 @@ test_pmtu_vti6_exception() {
 	mtu "${ns_b}" veth_b 4000
 	mtu "${ns_a}" vti6_a 5000
 	mtu "${ns_b}" vti6_b 5000
-	${ns_a} ping6 -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null
+	${ns_a} ${ping6} -q -i 0.1 -w 2 -s 60000 ${vti6_b_addr} > /dev/null
 
 	# Check that exception was created
 	if [ "$(route_get_dst_pmtu_from_exception "${ns_a}" ${vti6_b_addr})" = "" ]; then

From f611a5b4a51fa36a0aa792be474f5d6aacaef7e3 Mon Sep 17 00:00:00 2001
From: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Date: Thu, 30 Aug 2018 13:19:53 -0500
Subject: [PATCH 124/257] ibmvnic: Include missing return code checks in reset
 function

Check the return codes of these functions and halt reset
in case of failure. The driver will remain in a dormant state
until the next reset event, when device initialization will be
re-attempted.

Signed-off-by: Thomas Falcon <tlfalcon@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index dafdd4ade705b..4f0daf67b18df 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -1823,11 +1823,17 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 			adapter->map_id = 1;
 			release_rx_pools(adapter);
 			release_tx_pools(adapter);
-			init_rx_pools(netdev);
-			init_tx_pools(netdev);
+			rc = init_rx_pools(netdev);
+			if (rc)
+				return rc;
+			rc = init_tx_pools(netdev);
+			if (rc)
+				return rc;
 
 			release_napi(adapter);
-			init_napi(adapter);
+			rc = init_napi(adapter);
+			if (rc)
+				return rc;
 		} else {
 			rc = reset_tx_pools(adapter);
 			if (rc)

From 9b25436662d5fb4c66eb527ead53cab15f596ee0 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 27 Aug 2018 14:51:54 -0700
Subject: [PATCH 125/257] random: make CPU trust a boot parameter

Instead of forcing a distro or other system builder to choose
at build time whether the CPU is trusted for CRNG seeding via
CONFIG_RANDOM_TRUST_CPU, provide a boot-time parameter for end users to
control the choice. The CONFIG will set the default state instead.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 Documentation/admin-guide/kernel-parameters.txt |  6 ++++++
 drivers/char/Kconfig                            |  4 ++--
 drivers/char/random.c                           | 11 ++++++++---
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 0c8f7889efa1f..227c5c6fa4c17 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3390,6 +3390,12 @@
 	ramdisk_size=	[RAM] Sizes of RAM disks in kilobytes
 			See Documentation/blockdev/ramdisk.txt.
 
+	random.trust_cpu={on,off}
+			[KNL] Enable or disable trusting the use of the
+			CPU's random number generator (if available) to
+			fully seed the kernel's CRNG. Default is controlled
+			by CONFIG_RANDOM_TRUST_CPU.
+
 	ras=option[,option,...]	[KNL] RAS-specific options
 
 		cec_disable	[X86]
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index ce277ee0a28a2..40728491f37b6 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -566,5 +566,5 @@ config RANDOM_TRUST_CPU
 	that CPU manufacturer (perhaps with the insistence or mandate
 	of a Nation State's intelligence or law enforcement agencies)
 	has not installed a hidden back door to compromise the CPU's
-	random number generation facilities.
-
+	random number generation facilities. This can also be configured
+	at boot with "random.trust_cpu=on/off".
diff --git a/drivers/char/random.c b/drivers/char/random.c
index bf5f99fc36f1b..c75b6cdf00533 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -779,6 +779,13 @@ static struct crng_state **crng_node_pool __read_mostly;
 
 static void invalidate_batched_entropy(void);
 
+static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU);
+static int __init parse_trust_cpu(char *arg)
+{
+	return kstrtobool(arg, &trust_cpu);
+}
+early_param("random.trust_cpu", parse_trust_cpu);
+
 static void crng_initialize(struct crng_state *crng)
 {
 	int		i;
@@ -799,12 +806,10 @@ static void crng_initialize(struct crng_state *crng)
 		}
 		crng->state[i] ^= rv;
 	}
-#ifdef CONFIG_RANDOM_TRUST_CPU
-	if (arch_init) {
+	if (trust_cpu && arch_init) {
 		crng_init = 2;
 		pr_notice("random: crng done (trusting CPU's manufacturer)\n");
 	}
-#endif
 	crng->init_time = jiffies - CRNG_RESEED_INTERVAL - 1;
 }
 

From 93bbadd6e0a2a58e49d265b9b1aa58e621b60a26 Mon Sep 17 00:00:00 2001
From: Alexey Kodanev <alexey.kodanev@oracle.com>
Date: Thu, 30 Aug 2018 19:11:24 +0300
Subject: [PATCH 126/257] ipv6: don't get lwtstate twice in ip6_rt_copy_init()

Commit 80f1a0f4e0cd ("net/ipv6: Put lwtstate when destroying fib6_info")
partially fixed the kmemleak [1], lwtstate can be copied from fib6_info,
with ip6_rt_copy_init(), and it should be done only once there.

rt->dst.lwtstate is set by ip6_rt_init_dst(), at the start of the function
ip6_rt_copy_init(), so there is no need to get it again at the end.

With this patch, lwtstate also isn't copied from RTF_REJECT routes.

[1]:
unreferenced object 0xffff880b6aaa14e0 (size 64):
  comm "ip", pid 10577, jiffies 4295149341 (age 1273.903s)
  hex dump (first 32 bytes):
    01 00 04 00 04 00 00 00 10 00 00 00 00 00 00 00  ................
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<0000000018664623>] lwtunnel_build_state+0x1bc/0x420
    [<00000000b73aa29a>] ip6_route_info_create+0x9f7/0x1fd0
    [<00000000ee2c5d1f>] ip6_route_add+0x14/0x70
    [<000000008537b55c>] inet6_rtm_newroute+0xd9/0xe0
    [<000000002acc50f5>] rtnetlink_rcv_msg+0x66f/0x8e0
    [<000000008d9cd381>] netlink_rcv_skb+0x268/0x3b0
    [<000000004c893c76>] netlink_unicast+0x417/0x5a0
    [<00000000f2ab1afb>] netlink_sendmsg+0x70b/0xc30
    [<00000000890ff0aa>] sock_sendmsg+0xb1/0xf0
    [<00000000a2e7b66f>] ___sys_sendmsg+0x659/0x950
    [<000000001e7426c8>] __sys_sendmsg+0xde/0x170
    [<00000000fe411443>] do_syscall_64+0x9f/0x4a0
    [<000000001be7b28b>] entry_SYSCALL_64_after_hwframe+0x49/0xbe
    [<000000006d21f353>] 0xffffffffffffffff

Fixes: 6edb3c96a5f0 ("net/ipv6: Defer initialization of dst to data path")
Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c4ea13e8360b9..18e00ce1719a3 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -996,7 +996,6 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
 	rt->rt6i_src = ort->fib6_src;
 #endif
 	rt->rt6i_prefsrc = ort->fib6_prefsrc;
-	rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate);
 }
 
 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,

From 370a132bb2227ff76278f98370e0e701d86ff752 Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Tue, 31 Jul 2018 07:27:39 -0400
Subject: [PATCH 127/257] x86/microcode: Make sure boot_cpu_data.microcode is
 up-to-date

When preparing an MCE record for logging, boot_cpu_data.microcode is used
to read out the microcode revision on the box.

However, on systems where late microcode update has happened, the microcode
revision output in a MCE log record is wrong because
boot_cpu_data.microcode is not updated when the microcode gets updated.

But, the microcode revision saved in boot_cpu_data's microcode member
should be kept up-to-date, regardless, for consistency.

Make it so.

Fixes: fa94d0c6e0f3 ("x86/MCE: Save microcode revision in machine check records")
Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Tony Luck <tony.luck@intel.com>
Cc: sironi@amazon.de
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/20180731112739.32338-1-prarit@redhat.com
---
 arch/x86/kernel/cpu/microcode/amd.c   | 4 ++++
 arch/x86/kernel/cpu/microcode/intel.c | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 0624957aa0681..602f17134103c 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -537,6 +537,10 @@ static enum ucode_state apply_microcode_amd(int cpu)
 	uci->cpu_sig.rev = mc_amd->hdr.patch_id;
 	c->microcode = mc_amd->hdr.patch_id;
 
+	/* Update boot_cpu_data's revision too, if we're on the BSP: */
+	if (c->cpu_index == boot_cpu_data.cpu_index)
+		boot_cpu_data.microcode = mc_amd->hdr.patch_id;
+
 	return UCODE_UPDATED;
 }
 
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 97ccf4c3b45be..256d336cbc045 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -851,6 +851,10 @@ static enum ucode_state apply_microcode_intel(int cpu)
 	uci->cpu_sig.rev = rev;
 	c->microcode = rev;
 
+	/* Update boot_cpu_data's revision too, if we're on the BSP: */
+	if (c->cpu_index == boot_cpu_data.cpu_index)
+		boot_cpu_data.microcode = rev;
+
 	return UCODE_UPDATED;
 }
 

From 8da38ebaad23fe1b0c4a205438676f6356607cfc Mon Sep 17 00:00:00 2001
From: Filippo Sironi <sironi@amazon.de>
Date: Tue, 31 Jul 2018 17:29:30 +0200
Subject: [PATCH 128/257] x86/microcode: Update the new microcode revision
 unconditionally

Handle the case where microcode gets loaded on the BSP's hyperthread
sibling first and the boot_cpu_data's microcode revision doesn't get
updated because of early exit due to the siblings sharing a microcode
engine.

For that, simply write the updated revision on all CPUs unconditionally.

Signed-off-by: Filippo Sironi <sironi@amazon.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: prarit@redhat.com
Cc: stable@vger.kernel.org
Link: http://lkml.kernel.org/r/1533050970-14385-1-git-send-email-sironi@amazon.de
---
 arch/x86/kernel/cpu/microcode/amd.c   | 22 +++++++++++++---------
 arch/x86/kernel/cpu/microcode/intel.c | 13 ++++++++-----
 2 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c
index 602f17134103c..07b5fc00b1880 100644
--- a/arch/x86/kernel/cpu/microcode/amd.c
+++ b/arch/x86/kernel/cpu/microcode/amd.c
@@ -504,6 +504,7 @@ static enum ucode_state apply_microcode_amd(int cpu)
 	struct microcode_amd *mc_amd;
 	struct ucode_cpu_info *uci;
 	struct ucode_patch *p;
+	enum ucode_state ret;
 	u32 rev, dummy;
 
 	BUG_ON(raw_smp_processor_id() != cpu);
@@ -521,9 +522,8 @@ static enum ucode_state apply_microcode_amd(int cpu)
 
 	/* need to apply patch? */
 	if (rev >= mc_amd->hdr.patch_id) {
-		c->microcode = rev;
-		uci->cpu_sig.rev = rev;
-		return UCODE_OK;
+		ret = UCODE_OK;
+		goto out;
 	}
 
 	if (__apply_microcode_amd(mc_amd)) {
@@ -531,17 +531,21 @@ static enum ucode_state apply_microcode_amd(int cpu)
 			cpu, mc_amd->hdr.patch_id);
 		return UCODE_ERROR;
 	}
-	pr_info("CPU%d: new patch_level=0x%08x\n", cpu,
-		mc_amd->hdr.patch_id);
 
-	uci->cpu_sig.rev = mc_amd->hdr.patch_id;
-	c->microcode = mc_amd->hdr.patch_id;
+	rev = mc_amd->hdr.patch_id;
+	ret = UCODE_UPDATED;
+
+	pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev);
+
+out:
+	uci->cpu_sig.rev = rev;
+	c->microcode	 = rev;
 
 	/* Update boot_cpu_data's revision too, if we're on the BSP: */
 	if (c->cpu_index == boot_cpu_data.cpu_index)
-		boot_cpu_data.microcode = mc_amd->hdr.patch_id;
+		boot_cpu_data.microcode = rev;
 
-	return UCODE_UPDATED;
+	return ret;
 }
 
 static int install_equiv_cpu_table(const u8 *buf)
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 256d336cbc045..16936a24795c8 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -795,6 +795,7 @@ static enum ucode_state apply_microcode_intel(int cpu)
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	struct microcode_intel *mc;
+	enum ucode_state ret;
 	static int prev_rev;
 	u32 rev;
 
@@ -817,9 +818,8 @@ static enum ucode_state apply_microcode_intel(int cpu)
 	 */
 	rev = intel_get_microcode_revision();
 	if (rev >= mc->hdr.rev) {
-		uci->cpu_sig.rev = rev;
-		c->microcode = rev;
-		return UCODE_OK;
+		ret = UCODE_OK;
+		goto out;
 	}
 
 	/*
@@ -848,14 +848,17 @@ static enum ucode_state apply_microcode_intel(int cpu)
 		prev_rev = rev;
 	}
 
+	ret = UCODE_UPDATED;
+
+out:
 	uci->cpu_sig.rev = rev;
-	c->microcode = rev;
+	c->microcode	 = rev;
 
 	/* Update boot_cpu_data's revision too, if we're on the BSP: */
 	if (c->cpu_index == boot_cpu_data.cpu_index)
 		boot_cpu_data.microcode = rev;
 
-	return UCODE_UPDATED;
+	return ret;
 }
 
 static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,

From fd65465b7016dc6d9fa5c2f39cc706c231c9a089 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Fri, 31 Aug 2018 18:34:55 +0900
Subject: [PATCH 129/257] kconfig: do not require pkg-config on make
 {menu,n}config

Meelis Roos reported a {menu,n}config regression:
 "I have libncurses devel package installed in the default system
  location (as do 99%+ on actual developers probably) and in this
  case, pkg-config is useless.  pkg-config is needed only when
  libraries and headers are installed in non-default locations but
  it is bad to require installation of pkg-config on all the machines
  where make menuconfig would be possibly run."

For {menu,n}config, do not use pkg-config if it is not installed.
For {g,x}config, keep checking pkg-config since we really rely on it
for finding the installation paths of the required packages.

Fixes: 4ab3b80159d4 ("kconfig: check for pkg-config on make {menu,n,g,x}config")
Reported-by: Meelis Roos <mroos@linux.ee>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Tested-by: Meelis Roos <mroos@linux.ee>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
---
 Documentation/process/changes.rst  |  2 +-
 scripts/kconfig/Makefile           |  1 -
 scripts/kconfig/check-pkgconfig.sh |  8 --------
 scripts/kconfig/gconf-cfg.sh       |  7 +++++++
 scripts/kconfig/mconf-cfg.sh       | 25 ++++++++++++++-----------
 scripts/kconfig/nconf-cfg.sh       | 25 ++++++++++++++-----------
 scripts/kconfig/qconf-cfg.sh       |  7 +++++++
 7 files changed, 43 insertions(+), 32 deletions(-)
 delete mode 100644 scripts/kconfig/check-pkgconfig.sh

diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst
index 61f918b10a0c7..d1bf143b446f3 100644
--- a/Documentation/process/changes.rst
+++ b/Documentation/process/changes.rst
@@ -86,7 +86,7 @@ pkg-config
 
 The build system, as of 4.18, requires pkg-config to check for installed
 kconfig tools and to determine flags settings for use in
-'make {menu,n,g,x}config'.  Previously pkg-config was being used but not
+'make {g,x}config'.  Previously pkg-config was being used but not
 verified or documented.
 
 Flex
diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index 4a7bd2192073b..67ed9f6ccdf8f 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile
@@ -221,7 +221,6 @@ $(obj)/zconf.tab.o: $(obj)/zconf.lex.c
 
 # check if necessary packages are available, and configure build flags
 define filechk_conf_cfg
-	$(CONFIG_SHELL) $(srctree)/scripts/kconfig/check-pkgconfig.sh; \
 	$(CONFIG_SHELL) $<
 endef
 
diff --git a/scripts/kconfig/check-pkgconfig.sh b/scripts/kconfig/check-pkgconfig.sh
deleted file mode 100644
index 7a1c40bfb58cb..0000000000000
--- a/scripts/kconfig/check-pkgconfig.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-# Check for pkg-config presence
-
-if [ -z $(command -v pkg-config) ]; then
-	echo "'make *config' requires 'pkg-config'. Please install it." 1>&2
-	exit 1
-fi
diff --git a/scripts/kconfig/gconf-cfg.sh b/scripts/kconfig/gconf-cfg.sh
index 533b3d8f8f08d..480ecd8b9f415 100755
--- a/scripts/kconfig/gconf-cfg.sh
+++ b/scripts/kconfig/gconf-cfg.sh
@@ -3,6 +3,13 @@
 
 PKG="gtk+-2.0 gmodule-2.0 libglade-2.0"
 
+if [ -z "$(command -v pkg-config)" ]; then
+	echo >&2 "*"
+	echo >&2 "* 'make gconfig' requires 'pkg-config'. Please install it."
+	echo >&2 "*"
+	exit 1
+fi
+
 if ! pkg-config --exists $PKG; then
 	echo >&2 "*"
 	echo >&2 "* Unable to find the GTK+ installation. Please make sure that"
diff --git a/scripts/kconfig/mconf-cfg.sh b/scripts/kconfig/mconf-cfg.sh
index e6f9facd00772..c812872d7f9d7 100755
--- a/scripts/kconfig/mconf-cfg.sh
+++ b/scripts/kconfig/mconf-cfg.sh
@@ -4,20 +4,23 @@
 PKG="ncursesw"
 PKG2="ncurses"
 
-if pkg-config --exists $PKG; then
-	echo cflags=\"$(pkg-config --cflags $PKG)\"
-	echo libs=\"$(pkg-config --libs $PKG)\"
-	exit 0
-fi
+if [ -n "$(command -v pkg-config)" ]; then
+	if pkg-config --exists $PKG; then
+		echo cflags=\"$(pkg-config --cflags $PKG)\"
+		echo libs=\"$(pkg-config --libs $PKG)\"
+		exit 0
+	fi
 
-if pkg-config --exists $PKG2; then
-	echo cflags=\"$(pkg-config --cflags $PKG2)\"
-	echo libs=\"$(pkg-config --libs $PKG2)\"
-	exit 0
+	if pkg-config --exists $PKG2; then
+		echo cflags=\"$(pkg-config --cflags $PKG2)\"
+		echo libs=\"$(pkg-config --libs $PKG2)\"
+		exit 0
+	fi
 fi
 
-# Unfortunately, some distributions (e.g. openSUSE) cannot find ncurses
-# by pkg-config.
+# Check the default paths in case pkg-config is not installed.
+# (Even if it is installed, some distributions such as openSUSE cannot
+# find ncurses by pkg-config.)
 if [ -f /usr/include/ncursesw/ncurses.h ]; then
 	echo cflags=\"-D_GNU_SOURCE -I/usr/include/ncursesw\"
 	echo libs=\"-lncursesw\"
diff --git a/scripts/kconfig/nconf-cfg.sh b/scripts/kconfig/nconf-cfg.sh
index 42f5ac73548e4..001559ef0a60b 100644
--- a/scripts/kconfig/nconf-cfg.sh
+++ b/scripts/kconfig/nconf-cfg.sh
@@ -4,20 +4,23 @@
 PKG="ncursesw menuw panelw"
 PKG2="ncurses menu panel"
 
-if pkg-config --exists $PKG; then
-	echo cflags=\"$(pkg-config --cflags $PKG)\"
-	echo libs=\"$(pkg-config --libs $PKG)\"
-	exit 0
-fi
+if [ -n "$(command -v pkg-config)" ]; then
+	if pkg-config --exists $PKG; then
+		echo cflags=\"$(pkg-config --cflags $PKG)\"
+		echo libs=\"$(pkg-config --libs $PKG)\"
+		exit 0
+	fi
 
-if pkg-config --exists $PKG2; then
-	echo cflags=\"$(pkg-config --cflags $PKG2)\"
-	echo libs=\"$(pkg-config --libs $PKG2)\"
-	exit 0
+	if pkg-config --exists $PKG2; then
+		echo cflags=\"$(pkg-config --cflags $PKG2)\"
+		echo libs=\"$(pkg-config --libs $PKG2)\"
+		exit 0
+	fi
 fi
 
-# Unfortunately, some distributions (e.g. openSUSE) cannot find ncurses
-# by pkg-config.
+# Check the default paths in case pkg-config is not installed.
+# (Even if it is installed, some distributions such as openSUSE cannot
+# find ncurses by pkg-config.)
 if [ -f /usr/include/ncursesw/ncurses.h ]; then
 	echo cflags=\"-D_GNU_SOURCE -I/usr/include/ncursesw\"
 	echo libs=\"-lncursesw -lmenuw -lpanelw\"
diff --git a/scripts/kconfig/qconf-cfg.sh b/scripts/kconfig/qconf-cfg.sh
index 0862e15625366..02ccc0ae10318 100755
--- a/scripts/kconfig/qconf-cfg.sh
+++ b/scripts/kconfig/qconf-cfg.sh
@@ -4,6 +4,13 @@
 PKG="Qt5Core Qt5Gui Qt5Widgets"
 PKG2="QtCore QtGui"
 
+if [ -z "$(command -v pkg-config)" ]; then
+	echo >&2 "*"
+	echo >&2 "* 'make xconfig' requires 'pkg-config'. Please install it."
+	echo >&2 "*"
+	exit 1
+fi
+
 if pkg-config --exists $PKG; then
 	echo cflags=\"-std=c++11 -fPIC $(pkg-config --cflags Qt5Core Qt5Gui Qt5Widgets)\"
 	echo libs=\"$(pkg-config --libs $PKG)\"

From 914b087ff9e0e9a399a4927fa30793064afc0178 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 28 Aug 2018 12:59:10 -0700
Subject: [PATCH 130/257] kbuild: make missing $DEPMOD a Warning instead of an
 Error

When $DEPMOD is not found, only print a warning instead of exiting
with an error message and error status:

Warning: 'make modules_install' requires /sbin/depmod. Please install it.
This is probably in the kmod package.

Change the Error to a Warning because "not all build hosts for cross
compiling Linux are Linux systems and are able to provide a working
port of depmod, especially at the file patch /sbin/depmod."

I.e., "make modules_install" may be used to copy/install the
loadable modules files to a target directory on a build system and
then transferred to an embedded device where /sbin/depmod is run
instead of it being run on the build system.

Fixes: 934193a654c1 ("kbuild: verify that $DEPMOD is installed")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: H. Nikolaus Schaller <hns@goldelico.com>
Cc: stable@vger.kernel.org
Cc: Lucas De Marchi <lucas.demarchi@profusion.mobi>
Cc: Lucas De Marchi <lucas.de.marchi@gmail.com>
Cc: Michal Marek <michal.lkml@markovi.net>
Cc: Jessica Yu <jeyu@kernel.org>
Cc: Chih-Wei Huang <cwhuang@linux.org.tw>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/depmod.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/depmod.sh b/scripts/depmod.sh
index 999d585eaa735..e5f0aad75b964 100755
--- a/scripts/depmod.sh
+++ b/scripts/depmod.sh
@@ -15,9 +15,9 @@ if ! test -r System.map ; then
 fi
 
 if [ -z $(command -v $DEPMOD) ]; then
-	echo "'make modules_install' requires $DEPMOD. Please install it." >&2
+	echo "Warning: 'make modules_install' requires $DEPMOD. Please install it." >&2
 	echo "This is probably in the kmod package." >&2
-	exit 1
+	exit 0
 fi
 
 # older versions of depmod require the version string to start with three

From 9db39f4d4f94b61e4b64b077f6ddb2bdfb533a88 Mon Sep 17 00:00:00 2001
From: Tushar Dave <tushar.n.dave@oracle.com>
Date: Fri, 31 Aug 2018 23:45:16 +0200
Subject: [PATCH 131/257] bpf: Fix bpf_msg_pull_data()

Helper bpf_msg_pull_data() mistakenly reuses variable 'offset' while
linearizing multiple scatterlist elements. Variable 'offset' is used
to find first starting scatterlist element
    i.e. msg->data = sg_virt(&sg[first_sg]) + start - offset"

Use different variable name while linearizing multiple scatterlist
elements so that value contained in variable 'offset' won't get
overwritten.

Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data")
Signed-off-by: Tushar Dave <tushar.n.dave@oracle.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 net/core/filter.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index 2c7801f6737ad..aecdeba052d3f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2292,7 +2292,7 @@ static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
 BPF_CALL_4(bpf_msg_pull_data,
 	   struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
 {
-	unsigned int len = 0, offset = 0, copy = 0;
+	unsigned int len = 0, offset = 0, copy = 0, poffset = 0;
 	int bytes = end - start, bytes_sg_total;
 	struct scatterlist *sg = msg->sg_data;
 	int first_sg, last_sg, i, shift;
@@ -2348,16 +2348,15 @@ BPF_CALL_4(bpf_msg_pull_data,
 	if (unlikely(!page))
 		return -ENOMEM;
 	p = page_address(page);
-	offset = 0;
 
 	i = first_sg;
 	do {
 		from = sg_virt(&sg[i]);
 		len = sg[i].length;
-		to = p + offset;
+		to = p + poffset;
 
 		memcpy(to, from, len);
-		offset += len;
+		poffset += len;
 		sg[i].length = 0;
 		put_page(sg_page(&sg[i]));
 

From 97911e0ccb54c7478b9dac77e6c54c01b449e3a7 Mon Sep 17 00:00:00 2001
From: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Date: Fri, 31 Aug 2018 15:32:42 +0900
Subject: [PATCH 132/257] tools/bpf: bpftool, add xskmap in map types

When listed all maps, bpftool currently shows (null) for xskmap.
Added xskmap type in map_type_name[] to show correct type.

Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Acked-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 tools/bpf/bpftool/map.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index b2ec20e562bd5..b455930a3eaf7 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -68,6 +68,7 @@ static const char * const map_type_name[] = {
 	[BPF_MAP_TYPE_DEVMAP]		= "devmap",
 	[BPF_MAP_TYPE_SOCKMAP]		= "sockmap",
 	[BPF_MAP_TYPE_CPUMAP]		= "cpumap",
+	[BPF_MAP_TYPE_XSKMAP]           = "xskmap",
 	[BPF_MAP_TYPE_SOCKHASH]		= "sockhash",
 	[BPF_MAP_TYPE_CGROUP_STORAGE]	= "cgroup_storage",
 };

From 597222f72a94118f593e4f32bf58ae7e049a0df1 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Thu, 30 Aug 2018 21:25:02 -0700
Subject: [PATCH 133/257] bpf: avoid misuse of psock when TCP_ULP_BPF collides
 with another ULP

Currently we check sk_user_data is non NULL to determine if the sk
exists in a map. However, this is not sufficient to ensure the psock
or the ULP ops are not in use by another user, such as kcm or TLS. To
avoid this when adding a sock to a map also verify it is of the
correct ULP type. Additionally, when releasing a psock verify that
it is the TCP_ULP_BPF type before releasing the ULP. The error case
where we abort an update due to ULP collision can cause this error
path.

For example,

  __sock_map_ctx_update_elem()
     [...]
     err = tcp_set_ulp_id(sock, TCP_ULP_BPF) <- collides with TLS
     if (err)                                <- so err out here
        goto out_free
     [...]
  out_free:
     smap_release_sock() <- calling tcp_cleanup_ulp releases the
                            TLS ULP incorrectly.

Fixes: 2f857d04601a ("bpf: sockmap, remove STRPARSER map_flags and add multi-map support")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 kernel/bpf/sockmap.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index ce63e58017468..488ef9663c01f 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -1462,10 +1462,16 @@ static void smap_destroy_psock(struct rcu_head *rcu)
 	schedule_work(&psock->gc_work);
 }
 
+static bool psock_is_smap_sk(struct sock *sk)
+{
+	return inet_csk(sk)->icsk_ulp_ops == &bpf_tcp_ulp_ops;
+}
+
 static void smap_release_sock(struct smap_psock *psock, struct sock *sock)
 {
 	if (refcount_dec_and_test(&psock->refcnt)) {
-		tcp_cleanup_ulp(sock);
+		if (psock_is_smap_sk(sock))
+			tcp_cleanup_ulp(sock);
 		write_lock_bh(&sock->sk_callback_lock);
 		smap_stop_sock(psock, sock);
 		write_unlock_bh(&sock->sk_callback_lock);
@@ -1892,6 +1898,10 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map,
 	 * doesn't update user data.
 	 */
 	if (psock) {
+		if (!psock_is_smap_sk(sock)) {
+			err = -EBUSY;
+			goto out_progs;
+		}
 		if (READ_ONCE(psock->bpf_parse) && parse) {
 			err = -EBUSY;
 			goto out_progs;

From 4fb7253e4f9a8f06a986a3b317e2f79d9b43d552 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 29 Aug 2018 18:06:08 +0800
Subject: [PATCH 134/257] igmp: fix incorrect unsolicit report count when join
 group

We should not start timer if im->unsolicit_count equal to 0 after decrease.
Or we will send one more unsolicit report message. i.e. 3 instead of 2 by
default.

Fixes: 1da177e4c3f41 ("Linux-2.6.12-rc2")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index cf75f8944b05e..deb1f8274d71a 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -820,10 +820,9 @@ static void igmp_timer_expire(struct timer_list *t)
 	spin_lock(&im->lock);
 	im->tm_running = 0;
 
-	if (im->unsolicit_count) {
-		im->unsolicit_count--;
+	if (im->unsolicit_count && --im->unsolicit_count)
 		igmp_start_timer(im, unsolicited_report_interval(in_dev));
-	}
+
 	im->reporter = 1;
 	spin_unlock(&im->lock);
 

From ff06525fcb8ae3c302ac1319bf6c07c026dea964 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 29 Aug 2018 18:06:10 +0800
Subject: [PATCH 135/257] igmp: fix incorrect unsolicit report count after link
 down and up

After link down and up, i.e. when call ip_mc_up(), we doesn't init
im->unsolicit_count. So after igmp_timer_expire(), we will not start
timer again and only send one unsolicit report at last.

Fix it by initializing im->unsolicit_count in igmp_group_added(), so
we can respect igmp robustness value.

Fixes: 24803f38a5c0b ("igmp: do not remove igmp souce list info when set link down")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/igmp.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index deb1f8274d71a..4da39446da2d8 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1307,6 +1307,8 @@ static void igmp_group_added(struct ip_mc_list *im)
 
 	if (in_dev->dead)
 		return;
+
+	im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;
 	if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) {
 		spin_lock_bh(&im->lock);
 		igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY);
@@ -1390,9 +1392,6 @@ static void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr,
 			      unsigned int mode)
 {
 	struct ip_mc_list *im;
-#ifdef CONFIG_IP_MULTICAST
-	struct net *net = dev_net(in_dev->dev);
-#endif
 
 	ASSERT_RTNL();
 
@@ -1419,7 +1418,6 @@ static void __ip_mc_inc_group(struct in_device *in_dev, __be32 addr,
 	spin_lock_init(&im->lock);
 #ifdef CONFIG_IP_MULTICAST
 	timer_setup(&im->timer, igmp_timer_expire, 0);
-	im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;
 #endif
 
 	im->next_rcu = in_dev->mc_list;

From 10d7fac4c52618d94a42d701d28f114147291ecc Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Wed, 29 Aug 2018 08:00:23 -0700
Subject: [PATCH 136/257] dt-bindings: net: cpsw: Document cpsw-phy-sel usage
 but prefer phandle

The current cpsw usage for cpsw-phy-sel is undocumented but is used for
all the boards using cpsw. And cpsw-phy-sel is not really a child of
the cpsw device, it lives in the system control module instead.

Let's document the existing usage, and improve it a bit where we prefer
to use a phandle instead of a child device for it. That way we can
properly describe the hardware in dts files for things like genpd.

Cc: devicetree@vger.kernel.org
Cc: Andrew Lunn <andrew@lunn.ch>
Cc: Grygorii Strashko <grygorii.strashko@ti.com>
Cc: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Murali Karicheri <m-karicheri2@ti.com>
Cc: Rob Herring <robh+dt@kernel.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/cpsw.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt
index 41089369f8913..b3acebe08eb0a 100644
--- a/Documentation/devicetree/bindings/net/cpsw.txt
+++ b/Documentation/devicetree/bindings/net/cpsw.txt
@@ -19,6 +19,10 @@ Required properties:
 - slaves		: Specifies number for slaves
 - active_slave		: Specifies the slave to use for time stamping,
 			  ethtool and SIOCGMIIPHY
+- cpsw-phy-sel		: Specifies the phandle to the CPSW phy mode selection
+			  device. See also cpsw-phy-sel.txt for it's binding.
+			  Note that in legacy cases cpsw-phy-sel may be
+			  a child device instead of a phandle.
 
 Optional properties:
 - ti,hwmods		: Must be "cpgmac0"
@@ -75,6 +79,7 @@ Examples:
 		cpts_clock_mult = <0x80000000>;
 		cpts_clock_shift = <29>;
 		syscon = <&cm>;
+		cpsw-phy-sel = <&phy_sel>;
 		cpsw_emac0: slave@0 {
 			phy_id = <&davinci_mdio>, <0>;
 			phy-mode = "rgmii-txid";
@@ -103,6 +108,7 @@ Examples:
 		cpts_clock_mult = <0x80000000>;
 		cpts_clock_shift = <29>;
 		syscon = <&cm>;
+		cpsw-phy-sel = <&phy_sel>;
 		cpsw_emac0: slave@0 {
 			phy_id = <&davinci_mdio>, <0>;
 			phy-mode = "rgmii-txid";

From 18eb8aea7fb2fb4490e578b1b8a1096c34b2fc48 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Wed, 29 Aug 2018 08:00:24 -0700
Subject: [PATCH 137/257] net: ethernet: cpsw-phy-sel: prefer phandle for phy
 sel

The cpsw-phy-sel device is not a child of the cpsw interconnect target
module. It lives in the system control module.

Let's fix this issue by trying to use cpsw-phy-sel phandle first if it
exists and if not fall back to current usage of trying to find the
cpsw-phy-sel child. That way the phy sel driver can be a child of the
system control module where it belongs in the device tree.

Without this fix, we cannot have a proper interconnect target module
hierarchy in device tree for things like genpd.

Note that deferred probe is mostly not supported by cpsw and this patch
does not attempt to fix that. In case deferred probe support is needed,
this could be added to cpsw_slave_open() and phy_connect() so they start
handling and returning errors.

For documenting it, looks like the cpsw-phy-sel is used for all cpsw device
tree nodes. It's missing the related binding documentation, so let's also
update the binding documentation accordingly.

Cc: devicetree@vger.kernel.org
Cc: Andrew Lunn <andrew@lunn.ch>
Cc: Grygorii Strashko <grygorii.strashko@ti.com>
Cc: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Murali Karicheri <m-karicheri2@ti.com>
Cc: Rob Herring <robh+dt@kernel.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpsw-phy-sel.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw-phy-sel.c b/drivers/net/ethernet/ti/cpsw-phy-sel.c
index 0c1adad7415da..396e1cd106679 100644
--- a/drivers/net/ethernet/ti/cpsw-phy-sel.c
+++ b/drivers/net/ethernet/ti/cpsw-phy-sel.c
@@ -170,10 +170,13 @@ void cpsw_phy_sel(struct device *dev, phy_interface_t phy_mode, int slave)
 	struct device_node *node;
 	struct cpsw_phy_sel_priv *priv;
 
-	node = of_get_child_by_name(dev->of_node, "cpsw-phy-sel");
+	node = of_parse_phandle(dev->of_node, "cpsw-phy-sel", 0);
 	if (!node) {
-		dev_err(dev, "Phy mode driver DT not found\n");
-		return;
+		node = of_get_child_by_name(dev->of_node, "cpsw-phy-sel");
+		if (!node) {
+			dev_err(dev, "Phy mode driver DT not found\n");
+			return;
+		}
 	}
 
 	dev = bus_find_device(&platform_bus_type, NULL, node, match);

From 15a81b418e22a9aa4a0504471fdcb0f4ebf69b96 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Thu, 30 Aug 2018 14:15:43 -0700
Subject: [PATCH 138/257] net/ipv6: Only update MTU metric if it set

Jan reported a regression after an update to 4.18.5. In this case ipv6
default route is setup by systemd-networkd based on data from an RA. The
RA contains an MTU of 1492 which is used when the route is first inserted
but then systemd-networkd pushes down updates to the default route
without the mtu set.

Prior to the change to fib6_info, metrics such as MTU were held in the
dst_entry and rt6i_pmtu in rt6_info contained an update to the mtu if
any. ip6_mtu would look at rt6i_pmtu first and use it if set. If not,
the value from the metrics is used if it is set and finally falling
back to the idev value.

After the fib6_info change metrics are contained in the fib6_info struct
and there is no equivalent to rt6i_pmtu. To maintain consistency with
the old behavior the new code should only reset the MTU in the metrics
if the route update has it set.

Fixes: d4ead6b34b67 ("net/ipv6: move metrics from dst to rt6_info")
Reported-by: Jan Janssen <medhefgo@web.de>
Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index c861a6d4671d3..5516f55e214bd 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -989,7 +989,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 					fib6_clean_expires(iter);
 				else
 					fib6_set_expires(iter, rt->expires);
-				fib6_metric_set(iter, RTAX_MTU, rt->fib6_pmtu);
+
+				if (rt->fib6_pmtu)
+					fib6_metric_set(iter, RTAX_MTU,
+							rt->fib6_pmtu);
 				return -EEXIST;
 			}
 			/* If we have the same destination and the same metric,

From 38f5d8d8cbb2ffa2b54315118185332329ec891c Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Fri, 31 Aug 2018 23:30:47 +0900
Subject: [PATCH 139/257] i2c: uniphier: issue STOP only for last message or
 I2C_M_STOP

This driver currently emits a STOP if the next message is not
I2C_MD_RD.  It should not do it because it disturbs the I2C_RDWR
ioctl, where read/write transactions are combined without STOP
between.

Issue STOP only when the message is the last one _or_ flagged with
I2C_M_STOP.

Fixes: dd6fd4a32793 ("i2c: uniphier: add UniPhier FIFO-less I2C driver")
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-uniphier.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/i2c/busses/i2c-uniphier.c b/drivers/i2c/busses/i2c-uniphier.c
index bb181b0882919..454f914ae66db 100644
--- a/drivers/i2c/busses/i2c-uniphier.c
+++ b/drivers/i2c/busses/i2c-uniphier.c
@@ -248,11 +248,8 @@ static int uniphier_i2c_master_xfer(struct i2c_adapter *adap,
 		return ret;
 
 	for (msg = msgs; msg < emsg; msg++) {
-		/* If next message is read, skip the stop condition */
-		bool stop = !(msg + 1 < emsg && msg[1].flags & I2C_M_RD);
-		/* but, force it if I2C_M_STOP is set */
-		if (msg->flags & I2C_M_STOP)
-			stop = true;
+		/* Emit STOP if it is the last message or I2C_M_STOP is set. */
+		bool stop = (msg + 1 == emsg) || (msg->flags & I2C_M_STOP);
 
 		ret = uniphier_i2c_master_xfer_one(adap, msg, stop);
 		if (ret)

From 4c85609b08c4761eca0a40fd7beb06bc650f252d Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Fri, 31 Aug 2018 23:30:48 +0900
Subject: [PATCH 140/257] i2c: uniphier-f: issue STOP only for last message or
 I2C_M_STOP

This driver currently emits a STOP if the next message is not
I2C_MD_RD.  It should not do it because it disturbs the I2C_RDWR
ioctl, where read/write transactions are combined without STOP
between.

Issue STOP only when the message is the last one _or_ flagged with
I2C_M_STOP.

Fixes: 6a62974b667f ("i2c: uniphier_f: add UniPhier FIFO-builtin I2C driver")
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-uniphier-f.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/i2c/busses/i2c-uniphier-f.c b/drivers/i2c/busses/i2c-uniphier-f.c
index 9918bdd816196..a403e8579b652 100644
--- a/drivers/i2c/busses/i2c-uniphier-f.c
+++ b/drivers/i2c/busses/i2c-uniphier-f.c
@@ -401,11 +401,8 @@ static int uniphier_fi2c_master_xfer(struct i2c_adapter *adap,
 		return ret;
 
 	for (msg = msgs; msg < emsg; msg++) {
-		/* If next message is read, skip the stop condition */
-		bool stop = !(msg + 1 < emsg && msg[1].flags & I2C_M_RD);
-		/* but, force it if I2C_M_STOP is set */
-		if (msg->flags & I2C_M_STOP)
-			stop = true;
+		/* Emit STOP if it is the last message or I2C_M_STOP is set. */
+		bool stop = (msg + 1 == emsg) || (msg->flags & I2C_M_STOP);
 
 		ret = uniphier_fi2c_master_xfer_one(adap, msg, stop);
 		if (ret)

From f6eb89349078b2ca3e42dbb272ab444bab1b9f42 Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Fri, 31 Aug 2018 10:24:14 -0300
Subject: [PATCH 141/257] dt-bindings: imx-lpi2c: Remove mx8dv compatible entry

mx8dv never entered into production and there is no other place
in the kernel referring to this SoC, so remove it from the
dt bindings documentation.

Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt b/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt
index 00e4365d72068..091c8dfd32291 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt
+++ b/Documentation/devicetree/bindings/i2c/i2c-imx-lpi2c.txt
@@ -3,7 +3,6 @@
 Required properties:
 - compatible :
   - "fsl,imx7ulp-lpi2c" for LPI2C compatible with the one integrated on i.MX7ULP soc
-  - "fsl,imx8dv-lpi2c" for LPI2C compatible with the one integrated on i.MX8DV soc
 - reg : address and length of the lpi2c master registers
 - interrupts : lpi2c interrupt
 - clocks : lpi2c clock specifier
@@ -11,7 +10,7 @@ Required properties:
 Examples:
 
 lpi2c7: lpi2c7@40a50000 {
-	compatible = "fsl,imx8dv-lpi2c";
+	compatible = "fsl,imx7ulp-lpi2c";
 	reg = <0x40A50000 0x10000>;
 	interrupt-parent = <&intc>;
 	interrupts = <GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>;

From 20fdcd760a63ea66335c58bceb175e7712ab18ff Mon Sep 17 00:00:00 2001
From: Fabio Estevam <fabio.estevam@nxp.com>
Date: Fri, 31 Aug 2018 10:24:15 -0300
Subject: [PATCH 142/257] i2c: imx-lpi2c: Remove mx8dv compatible entry

mx8dv never entered into production and there is no other place
in the kernel referring to this SoC, so remove it from the
driver's compatible entry.

Signed-off-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-imx-lpi2c.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c
index 6d975f5221ca0..06c4c767af322 100644
--- a/drivers/i2c/busses/i2c-imx-lpi2c.c
+++ b/drivers/i2c/busses/i2c-imx-lpi2c.c
@@ -538,7 +538,6 @@ static const struct i2c_algorithm lpi2c_imx_algo = {
 
 static const struct of_device_id lpi2c_imx_of_match[] = {
 	{ .compatible = "fsl,imx7ulp-lpi2c" },
-	{ .compatible = "fsl,imx8dv-lpi2c" },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, lpi2c_imx_of_match);

From 16fe10cf92783ed9ceb182d6ea2b8adf5e8ec1b8 Mon Sep 17 00:00:00 2001
From: Jia-Ju Bai <baijiaju1990@gmail.com>
Date: Sat, 1 Sep 2018 20:11:05 +0800
Subject: [PATCH 143/257] net: cadence: Fix a sleep-in-atomic-context bug in
 macb_halt_tx()

The kernel module may sleep with holding a spinlock.

The function call paths (from bottom to top) in Linux-4.16 are:

[FUNC] usleep_range
drivers/net/ethernet/cadence/macb_main.c, 648:
	usleep_range in macb_halt_tx
drivers/net/ethernet/cadence/macb_main.c, 730:
	macb_halt_tx in macb_tx_error_task
drivers/net/ethernet/cadence/macb_main.c, 721:
	_raw_spin_lock_irqsave in macb_tx_error_task

To fix this bug, usleep_range() is replaced with udelay().

This bug is found by my static analysis tool DSAC.

Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cadence/macb_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index c6707ea2d7519..16e4ef7d71855 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -649,7 +649,7 @@ static int macb_halt_tx(struct macb *bp)
 		if (!(status & MACB_BIT(TGO)))
 			return 0;
 
-		usleep_range(10, 250);
+		udelay(250);
 	} while (time_before(halt_time, timeout));
 
 	return -ETIMEDOUT;

From 59a03fea131d671a57b8ed3dc446264c61d4b75f Mon Sep 17 00:00:00 2001
From: Vinson Lee <vlee@freedesktop.org>
Date: Sat, 1 Sep 2018 21:20:27 +0000
Subject: [PATCH 144/257] uapi: Fix linux/rds.h userspace compilation errors.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Include linux/in6.h for struct in6_addr.

/usr/include/linux/rds.h:156:18: error: field ‘laddr’ has incomplete type
  struct in6_addr laddr;
                  ^~~~~
/usr/include/linux/rds.h:157:18: error: field ‘faddr’ has incomplete type
  struct in6_addr faddr;
                  ^~~~~
/usr/include/linux/rds.h:178:18: error: field ‘laddr’ has incomplete type
  struct in6_addr laddr;
                  ^~~~~
/usr/include/linux/rds.h:179:18: error: field ‘faddr’ has incomplete type
  struct in6_addr faddr;
                  ^~~~~
/usr/include/linux/rds.h:198:18: error: field ‘bound_addr’ has incomplete type
  struct in6_addr bound_addr;
                  ^~~~~~~~~~
/usr/include/linux/rds.h:199:18: error: field ‘connected_addr’ has incomplete type
  struct in6_addr connected_addr;
                  ^~~~~~~~~~~~~~
/usr/include/linux/rds.h:219:18: error: field ‘local_addr’ has incomplete type
  struct in6_addr local_addr;
                  ^~~~~~~~~~
/usr/include/linux/rds.h:221:18: error: field ‘peer_addr’ has incomplete type
  struct in6_addr peer_addr;
                  ^~~~~~~~~
/usr/include/linux/rds.h:245:18: error: field ‘src_addr’ has incomplete type
  struct in6_addr src_addr;
                  ^~~~~~~~
/usr/include/linux/rds.h:246:18: error: field ‘dst_addr’ has incomplete type
  struct in6_addr dst_addr;
                  ^~~~~~~~

Fixes: b7ff8b1036f0 ("rds: Extend RDS API for IPv6 support")
Signed-off-by: Vinson Lee <vlee@freedesktop.org>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/rds.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h
index dc520e1a4123f..8b73cb603c5f3 100644
--- a/include/uapi/linux/rds.h
+++ b/include/uapi/linux/rds.h
@@ -37,6 +37,7 @@
 
 #include <linux/types.h>
 #include <linux/socket.h>		/* For __kernel_sockaddr_storage. */
+#include <linux/in6.h>			/* For struct in6_addr. */
 
 #define RDS_IB_ABI_VERSION		0x301
 

From c90bbce9eedd14f9428388d9442b5b3e98a2a6dd Mon Sep 17 00:00:00 2001
From: Mike Rapoport <rppt@linux.vnet.ibm.com>
Date: Mon, 20 Aug 2018 08:25:37 +0300
Subject: [PATCH 145/257] m68k: fix early memory reservation for ColdFire MMU
 systems

The bootmem to memblock conversion introduced by the commit 1008a11590b9
("m68k: switch to MEMBLOCK + NO_BOOTMEM") made reservation of kernel code
and data to start from a wrong address.

Fix it.

Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com>
Tested-by: Angelo Dureghello <angelo@sysam.it>
Signed-off-by: Greg Ungerer <gerg@linux-m68k.org>
---
 arch/m68k/mm/mcfmmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c
index 70dde040779b5..f5453d944ff5e 100644
--- a/arch/m68k/mm/mcfmmu.c
+++ b/arch/m68k/mm/mcfmmu.c
@@ -172,7 +172,7 @@ void __init cf_bootmem_alloc(void)
 	high_memory = (void *)_ramend;
 
 	/* Reserve kernel text/data/bss */
-	memblock_reserve(memstart, memstart - _rambase);
+	memblock_reserve(_rambase, memstart - _rambase);
 
 	m68k_virt_to_node_shift = fls(_ramend - 1) - 6;
 	module_fixup(NULL, __start_fixup, __stop_fixup);

From c15e3f19a6d5c89b1209dc94b40e568177cb0921 Mon Sep 17 00:00:00 2001
From: Jon Kuhn <jkuhn@barracuda.com>
Date: Mon, 9 Jul 2018 14:33:14 +0000
Subject: [PATCH 146/257] fs/cifs: don't translate SFM_SLASH (U+F026) to
 backslash

When a Mac client saves an item containing a backslash to a file server
the backslash is represented in the CIFS/SMB protocol as as U+F026.
Before this change, listing a directory containing an item with a
backslash in its name will return that item with the backslash
represented with a true backslash character (U+005C) because
convert_sfm_character mapped U+F026 to U+005C when interpretting the
CIFS/SMB protocol response.  However, attempting to open or stat the
path using a true backslash will result in an error because
convert_to_sfm_char does not map U+005C back to U+F026 causing the
CIFS/SMB request to be made with the backslash represented as U+005C.

This change simply prevents the U+F026 to U+005C conversion from
happenning.  This is analogous to how the code does not do any
translation of UNI_SLASH (U+F000).

Signed-off-by: Jon Kuhn <jkuhn@barracuda.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifs_unicode.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index b380e0871372d..a2b2355e7f019 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -105,9 +105,6 @@ convert_sfm_char(const __u16 src_char, char *target)
 	case SFM_LESSTHAN:
 		*target = '<';
 		break;
-	case SFM_SLASH:
-		*target = '\\';
-		break;
 	case SFM_SPACE:
 		*target = ' ';
 		break;

From 5e19697b56a64004e2d0ff1bb952ea05493c088f Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Mon, 27 Aug 2018 17:04:13 -0500
Subject: [PATCH 147/257] SMB3: Backup intent flag missing for directory opens
 with backupuid mounts

When "backup intent" is requested on the mount (e.g. backupuid or
backupgid mount options), the corresponding flag needs to be set
on opens of directories (and files) but was missing in some
places causing access denied trying to enumerate and backup
servers.

Fixes kernel bugzilla #200953
https://bugzilla.kernel.org/show_bug.cgi?id=200953

Reported-and-tested-by: <whh@rubrik.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
CC: Stable <stable@vger.kernel.org>
Reviewed-by: Pavel Shilovsky <pshilov@microsoft.com>
---
 fs/cifs/inode.c   |  2 ++
 fs/cifs/smb2ops.c | 25 ++++++++++++++++++++-----
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index d32eaa4b24376..6e8765f445086 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -467,6 +467,8 @@ cifs_sfu_type(struct cifs_fattr *fattr, const char *path,
 	oparms.cifs_sb = cifs_sb;
 	oparms.desired_access = GENERIC_READ;
 	oparms.create_options = CREATE_NOT_DIR;
+	if (backup_cred(cifs_sb))
+		oparms.create_options |= CREATE_OPEN_BACKUP_INTENT;
 	oparms.disposition = FILE_OPEN;
 	oparms.path = path;
 	oparms.fid = &fid;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 247a98e6c856e..b6fada2445279 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -630,7 +630,10 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon,
 	oparms.tcon = tcon;
 	oparms.desired_access = FILE_READ_ATTRIBUTES;
 	oparms.disposition = FILE_OPEN;
-	oparms.create_options = 0;
+	if (backup_cred(cifs_sb))
+		oparms.create_options = CREATE_OPEN_BACKUP_INTENT;
+	else
+		oparms.create_options = 0;
 	oparms.fid = &fid;
 	oparms.reconnect = false;
 
@@ -779,7 +782,10 @@ smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon,
 	oparms.tcon = tcon;
 	oparms.desired_access = FILE_READ_EA;
 	oparms.disposition = FILE_OPEN;
-	oparms.create_options = 0;
+	if (backup_cred(cifs_sb))
+		oparms.create_options = CREATE_OPEN_BACKUP_INTENT;
+	else
+		oparms.create_options = 0;
 	oparms.fid = &fid;
 	oparms.reconnect = false;
 
@@ -858,7 +864,10 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
 	oparms.tcon = tcon;
 	oparms.desired_access = FILE_WRITE_EA;
 	oparms.disposition = FILE_OPEN;
-	oparms.create_options = 0;
+	if (backup_cred(cifs_sb))
+		oparms.create_options = CREATE_OPEN_BACKUP_INTENT;
+	else
+		oparms.create_options = 0;
 	oparms.fid = &fid;
 	oparms.reconnect = false;
 
@@ -1453,7 +1462,10 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
 	oparms.tcon = tcon;
 	oparms.desired_access = FILE_READ_ATTRIBUTES | FILE_READ_DATA;
 	oparms.disposition = FILE_OPEN;
-	oparms.create_options = 0;
+	if (backup_cred(cifs_sb))
+		oparms.create_options = CREATE_OPEN_BACKUP_INTENT;
+	else
+		oparms.create_options = 0;
 	oparms.fid = fid;
 	oparms.reconnect = false;
 
@@ -1857,7 +1869,10 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
 	oparms.tcon = tcon;
 	oparms.desired_access = FILE_READ_ATTRIBUTES;
 	oparms.disposition = FILE_OPEN;
-	oparms.create_options = 0;
+	if (backup_cred(cifs_sb))
+		oparms.create_options = CREATE_OPEN_BACKUP_INTENT;
+	else
+		oparms.create_options = 0;
 	oparms.fid = &fid;
 	oparms.reconnect = false;
 

From 25f2573512d7b38bca4c0878109db9600b8b711f Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Wed, 29 Aug 2018 09:22:22 -0500
Subject: [PATCH 148/257] smb3: minor debugging clarifications in rfc1001 len
 processing

I ran into some cases where server was returning the wrong length
on frames but I couldn't easily match them to the command in the
network trace (or server logs) since I need the command and/or
multiplex id to find the offending SMB2/SMB3 command.  Add these
two fields to the log message. In the case of padding too much
it may not be a problem in all cases but might have correlated
to a network disconnect case in some problems we have been
looking at. In the case of frame too short is even more important.

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
---
 fs/cifs/smb2misc.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index db0453660ff6c..6a9c47541c53d 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -248,16 +248,20 @@ smb2_check_message(char *buf, unsigned int len, struct TCP_Server_Info *srvr)
 		 * MacOS server pads after SMB2.1 write response with 3 bytes
 		 * of junk. Other servers match RFC1001 len to actual
 		 * SMB2/SMB3 frame length (header + smb2 response specific data)
-		 * Some windows servers do too when compounding is used.
-		 * Log the server error (once), but allow it and continue
+		 * Some windows servers also pad up to 8 bytes when compounding.
+		 * If pad is longer than eight bytes, log the server behavior
+		 * (once), since may indicate a problem but allow it and continue
 		 * since the frame is parseable.
 		 */
 		if (clc_len < len) {
-			printk_once(KERN_WARNING
-				"SMB2 server sent bad RFC1001 len %d not %d\n",
-				len, clc_len);
+			pr_warn_once(
+			     "srv rsp padded more than expected. Length %d not %d for cmd:%d mid:%llu\n",
+			     len, clc_len, command, mid);
 			return 0;
 		}
+		pr_warn_once(
+			"srv rsp too short, len %d not %d. cmd:%d mid:%llu\n",
+			len, clc_len, command, mid);
 
 		return 1;
 	}

From f801568332321e2b1e7a8bd26c3e4913a312a2ec Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Fri, 31 Aug 2018 15:12:10 -0500
Subject: [PATCH 149/257] smb3: check for and properly advertise directory
 lease support

Although servers will typically ignore unsupported features,
we should advertise the support for directory leases (as
Windows e.g. does) in the negotiate protocol capabilities we
pass to the server, and should check for the server capability
(CAP_DIRECTORY_LEASING) before sending a lease request for an
open of a directory.  This will prevent us from accidentally
sending directory leases to SMB2.1 or SMB2 server for example.

Signed-off-by: Steve French <stfrench@microsoft.com>
CC: Stable <stable@vger.kernel.org>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
---
 fs/cifs/smb2ops.c | 10 +++++-----
 fs/cifs/smb2pdu.c |  3 +++
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index b6fada2445279..d954ce36b4734 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -3654,7 +3654,7 @@ struct smb_version_values smb21_values = {
 struct smb_version_values smb3any_values = {
 	.version_string = SMB3ANY_VERSION_STRING,
 	.protocol_id = SMB302_PROT_ID, /* doesn't matter, send protocol array */
-	.req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
+	.req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION | SMB2_GLOBAL_CAP_DIRECTORY_LEASING,
 	.large_lock_type = 0,
 	.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
 	.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
@@ -3675,7 +3675,7 @@ struct smb_version_values smb3any_values = {
 struct smb_version_values smbdefault_values = {
 	.version_string = SMBDEFAULT_VERSION_STRING,
 	.protocol_id = SMB302_PROT_ID, /* doesn't matter, send protocol array */
-	.req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
+	.req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION | SMB2_GLOBAL_CAP_DIRECTORY_LEASING,
 	.large_lock_type = 0,
 	.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
 	.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
@@ -3696,7 +3696,7 @@ struct smb_version_values smbdefault_values = {
 struct smb_version_values smb30_values = {
 	.version_string = SMB30_VERSION_STRING,
 	.protocol_id = SMB30_PROT_ID,
-	.req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
+	.req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION | SMB2_GLOBAL_CAP_DIRECTORY_LEASING,
 	.large_lock_type = 0,
 	.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
 	.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
@@ -3717,7 +3717,7 @@ struct smb_version_values smb30_values = {
 struct smb_version_values smb302_values = {
 	.version_string = SMB302_VERSION_STRING,
 	.protocol_id = SMB302_PROT_ID,
-	.req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
+	.req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION | SMB2_GLOBAL_CAP_DIRECTORY_LEASING,
 	.large_lock_type = 0,
 	.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
 	.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
@@ -3738,7 +3738,7 @@ struct smb_version_values smb302_values = {
 struct smb_version_values smb311_values = {
 	.version_string = SMB311_VERSION_STRING,
 	.protocol_id = SMB311_PROT_ID,
-	.req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION,
+	.req_capabilities = SMB2_GLOBAL_CAP_DFS | SMB2_GLOBAL_CAP_LEASING | SMB2_GLOBAL_CAP_LARGE_MTU | SMB2_GLOBAL_CAP_PERSISTENT_HANDLES | SMB2_GLOBAL_CAP_ENCRYPTION | SMB2_GLOBAL_CAP_DIRECTORY_LEASING,
 	.large_lock_type = 0,
 	.exclusive_lock_type = SMB2_LOCKFLAG_EXCLUSIVE_LOCK,
 	.shared_lock_type = SMB2_LOCKFLAG_SHARED_LOCK,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 5740aa809be66..c08acfc77abcf 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -2178,6 +2178,9 @@ SMB2_open_init(struct cifs_tcon *tcon, struct smb_rqst *rqst, __u8 *oplock,
 	if (!(server->capabilities & SMB2_GLOBAL_CAP_LEASING) ||
 	    *oplock == SMB2_OPLOCK_LEVEL_NONE)
 		req->RequestedOplockLevel = *oplock;
+	else if (!(server->capabilities & SMB2_GLOBAL_CAP_DIRECTORY_LEASING) &&
+		  (oparms->create_options & CREATE_NOT_FILE))
+		req->RequestedOplockLevel = *oplock; /* no srv lease support */
 	else {
 		rc = add_lease_context(server, iov, &n_iov,
 				       oparms->fid->lease_key, oplock);

From 395a2076b4064f97d3fce03af15210ff2a7bb7f9 Mon Sep 17 00:00:00 2001
From: Thomas Werschlein <thomas.werschlein@geo.uzh.ch>
Date: Thu, 30 Aug 2018 18:29:20 +0200
Subject: [PATCH 150/257] cifs: connect to servername instead of IP for IPC$
 share
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch is required allows access to a Microsoft fileserver failover
cluster behind a 1:1 NAT firewall.

The change also provides stronger context for authentication and share
connection (see MS-SMB2 3.3.5.7 and MS-SRVS 3.1.6.8) as noted by
Tom Talpey, and addresses comments about the buffer size for the UNC
made by Aurélien Aptel.

Signed-off-by: Thomas Werschlein <thomas.werschlein@geo.uzh.ch>
Signed-off-by: Steve French <stfrench@microsoft.com>
CC: Tom Talpey <ttalpey@microsoft.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
CC: Stable <stable@vger.kernel.org>
---
 fs/cifs/connect.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index c832a8a1970aa..7aa08dba4719c 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2547,7 +2547,7 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb_vol *volume_info)
 	if (tcon == NULL)
 		return -ENOMEM;
 
-	snprintf(unc, sizeof(unc), "\\\\%s\\IPC$", ses->serverName);
+	snprintf(unc, sizeof(unc), "\\\\%s\\IPC$", ses->server->hostname);
 
 	/* cannot fail */
 	nls_codepage = load_nls_default();

From 54ff01fd0d44b9681615f77c15fe9ea6dfadb501 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyuw@linux.intel.com>
Date: Thu, 30 Aug 2018 11:33:43 +0800
Subject: [PATCH 151/257] drm/i915/gvt: Give new born vGPU higher scheduling
 chance

This trys to give new born vGPU with higher scheduling chance
not only with adding to sched list head and also have higher
priority for workload sched for 2 seconds after starting to
schedule it. In order for fast GPU execution during VM boot,
and ensure guest driver setup with required state given in time.

This fixes recent failure seen on one VM with multiple linux VMs
running on kernel with commit 2621cefaa42b3("drm/i915: Provide a timeout to i915_gem_wait_for_idle() on setup"),
which had shorter setup timeout that caused context state init failed.

v2: change to 2s for higher scheduling period

Cc: Yuan Hang <hang.yuan@intel.com>
Reviewed-by: Hang Yuan <hang.yuan@intel.com>
Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
---
 drivers/gpu/drm/i915/gvt/sched_policy.c | 34 ++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c
index 985fe81794ddc..c32e7d5e86291 100644
--- a/drivers/gpu/drm/i915/gvt/sched_policy.c
+++ b/drivers/gpu/drm/i915/gvt/sched_policy.c
@@ -47,11 +47,15 @@ static bool vgpu_has_pending_workload(struct intel_vgpu *vgpu)
 	return false;
 }
 
+/* We give 2 seconds higher prio for vGPU during start */
+#define GVT_SCHED_VGPU_PRI_TIME  2
+
 struct vgpu_sched_data {
 	struct list_head lru_list;
 	struct intel_vgpu *vgpu;
 	bool active;
-
+	bool pri_sched;
+	ktime_t pri_time;
 	ktime_t sched_in_time;
 	ktime_t sched_time;
 	ktime_t left_ts;
@@ -183,6 +187,14 @@ static struct intel_vgpu *find_busy_vgpu(struct gvt_sched_data *sched_data)
 		if (!vgpu_has_pending_workload(vgpu_data->vgpu))
 			continue;
 
+		if (vgpu_data->pri_sched) {
+			if (ktime_before(ktime_get(), vgpu_data->pri_time)) {
+				vgpu = vgpu_data->vgpu;
+				break;
+			} else
+				vgpu_data->pri_sched = false;
+		}
+
 		/* Return the vGPU only if it has time slice left */
 		if (vgpu_data->left_ts > 0) {
 			vgpu = vgpu_data->vgpu;
@@ -202,6 +214,7 @@ static void tbs_sched_func(struct gvt_sched_data *sched_data)
 	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
 	struct vgpu_sched_data *vgpu_data;
 	struct intel_vgpu *vgpu = NULL;
+
 	/* no active vgpu or has already had a target */
 	if (list_empty(&sched_data->lru_runq_head) || scheduler->next_vgpu)
 		goto out;
@@ -209,12 +222,13 @@ static void tbs_sched_func(struct gvt_sched_data *sched_data)
 	vgpu = find_busy_vgpu(sched_data);
 	if (vgpu) {
 		scheduler->next_vgpu = vgpu;
-
-		/* Move the last used vGPU to the tail of lru_list */
 		vgpu_data = vgpu->sched_data;
-		list_del_init(&vgpu_data->lru_list);
-		list_add_tail(&vgpu_data->lru_list,
-				&sched_data->lru_runq_head);
+		if (!vgpu_data->pri_sched) {
+			/* Move the last used vGPU to the tail of lru_list */
+			list_del_init(&vgpu_data->lru_list);
+			list_add_tail(&vgpu_data->lru_list,
+				      &sched_data->lru_runq_head);
+		}
 	} else {
 		scheduler->next_vgpu = gvt->idle_vgpu;
 	}
@@ -328,11 +342,17 @@ static void tbs_sched_start_schedule(struct intel_vgpu *vgpu)
 {
 	struct gvt_sched_data *sched_data = vgpu->gvt->scheduler.sched_data;
 	struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
+	ktime_t now;
 
 	if (!list_empty(&vgpu_data->lru_list))
 		return;
 
-	list_add_tail(&vgpu_data->lru_list, &sched_data->lru_runq_head);
+	now = ktime_get();
+	vgpu_data->pri_time = ktime_add(now,
+					ktime_set(GVT_SCHED_VGPU_PRI_TIME, 0));
+	vgpu_data->pri_sched = true;
+
+	list_add(&vgpu_data->lru_list, &sched_data->lru_runq_head);
 
 	if (!hrtimer_active(&sched_data->timer))
 		hrtimer_start(&sched_data->timer, ktime_add_ns(ktime_get(),

From 66eb02d839e8495ae6b612e2d09ff599374b80e2 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Date: Fri, 31 Aug 2018 01:04:13 +0200
Subject: [PATCH 152/257] mac80211: fix an off-by-one issue in A-MSDU
 max_subframe computation

Initialize 'n' to 2 in order to take into account also the first
packet in the estimation of max_subframe limit for a given A-MSDU
since frag_tail pointer is NULL when ieee80211_amsdu_aggregate
routine analyzes the second frame.

Fixes: 6e0456b54545 ("mac80211: add A-MSDU tx support")
Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 6ca0865de9451..9b3b069e418ab 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3174,7 +3174,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata,
 	void *data;
 	bool ret = false;
 	unsigned int orig_len;
-	int n = 1, nfrags, pad = 0;
+	int n = 2, nfrags, pad = 0;
 	u16 hdrlen;
 
 	if (!ieee80211_hw_check(&local->hw, TX_AMSDU))

From 8442938c3a2177ba16043b3a935f2c78266ad399 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 31 Aug 2018 11:10:55 +0300
Subject: [PATCH 153/257] cfg80211: fix a type issue in
 ieee80211_chandef_to_operating_class()

The "chandef->center_freq1" variable is a u32 but "freq" is a u16 so we
are truncating away the high bits.  I noticed this bug because in commit
9cf0a0b4b64a ("cfg80211: Add support for 60GHz band channels 5 and 6")
we made "freq <= 56160 + 2160 * 6" a valid requency when before it was
only "freq <= 56160 + 2160 * 4" that was valid.  It introduces a static
checker warning:

    net/wireless/util.c:1571 ieee80211_chandef_to_operating_class()
    warn: always true condition '(freq <= 56160 + 2160 * 6) => (0-u16max <= 69120)'

But really we probably shouldn't have been truncating the high bits
away to begin with.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/wireless/util.c b/net/wireless/util.c
index 3c654cd7ba562..908bf5b6d89e3 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1374,7 +1374,7 @@ bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef,
 					  u8 *op_class)
 {
 	u8 vht_opclass;
-	u16 freq = chandef->center_freq1;
+	u32 freq = chandef->center_freq1;
 
 	if (freq >= 2412 && freq <= 2472) {
 		if (chandef->width > NL80211_CHAN_WIDTH_40)

From abd76d255d69d70206c01b9cb19ba36a9c1df6a1 Mon Sep 17 00:00:00 2001
From: "Dreyfuss, Haim" <haim.dreyfuss@intel.com>
Date: Fri, 31 Aug 2018 11:31:04 +0300
Subject: [PATCH 154/257] mac80211: fix WMM TXOP calculation

In commit 9236c4523e5b ("mac80211: limit wmm params to comply
with ETSI requirements"), we have limited the WMM parameters to
comply with 802.11 and ETSI standard.  Mistakenly the TXOP value
was caluclated wrong.  Fix it by taking the minimum between
802.11 to ETSI to make sure we are not violating both.

Fixes: e552af058148 ("mac80211: limit wmm params to comply with ETSI requirements")
Signed-off-by: Haim Dreyfuss <haim.dreyfuss@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/util.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index c80187d6e6bb4..93b5bb849ad71 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1151,8 +1151,7 @@ void ieee80211_regulatory_limit_wmm_params(struct ieee80211_sub_if_data *sdata,
 	qparam->cw_min = max_t(u16, qparam->cw_min, wmm_ac->cw_min);
 	qparam->cw_max = max_t(u16, qparam->cw_max, wmm_ac->cw_max);
 	qparam->aifs = max_t(u8, qparam->aifs, wmm_ac->aifsn);
-	qparam->txop = !qparam->txop ? wmm_ac->cot / 32 :
-		min_t(u16, qparam->txop, wmm_ac->cot / 32);
+	qparam->txop = min_t(u16, qparam->txop, wmm_ac->cot / 32);
 	rcu_read_unlock();
 }
 

From f3ffb6c3a28963657eb8b02a795d75f2ebbd5ef4 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Fri, 31 Aug 2018 11:31:06 +0300
Subject: [PATCH 155/257] mac80211: fix a race between restart and CSA flows

We hit a problem with iwlwifi that was caused by a bug in
mac80211. A bug in iwlwifi caused the firwmare to crash in
certain cases in channel switch. Because of that bug,
drv_pre_channel_switch would fail and trigger the restart
flow.
Now we had the hw restart worker which runs on the system's
workqueue and the csa_connection_drop_work worker that runs
on mac80211's workqueue that can run together. This is
obviously problematic since the restart work wants to
reconfigure the connection, while the csa_connection_drop_work
worker does the exact opposite: it tries to disconnect.

Fix this by cancelling the csa_connection_drop_work worker
in the restart worker.

Note that this can sound racy: we could have:

driver   iface_work   CSA_work   restart_work
+++++++++++++++++++++++++++++++++++++++++++++
              |
 <--drv_cs ---|
<FW CRASH!>
-CS FAILED-->
              |                       |
              |                 cancel_work(CSA)
           schedule                   |
           CSA work                   |
                         |            |
                        Race between those 2

But this is not possible because we flush the workqueue
in the restart worker before we cancel the CSA worker.
That would be bullet proof if we could guarantee that
we schedule the CSA worker only from the iface_work
which runs on the workqueue (and not on the system's
workqueue), but unfortunately we do have an instance
in which we schedule the CSA work outside the context
of the workqueue (ieee80211_chswitch_done).

Note also that we should probably cancel other workers
like beacon_connection_loss_work and possibly others
for different types of interfaces, at the very least,
IBSS should suffer from the exact same problem, but for
now, do the minimum to fix the actual bug that was actually
experienced and reproduced.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/main.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 27cd64acaf006..66cbddd65b47f 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -255,8 +255,27 @@ static void ieee80211_restart_work(struct work_struct *work)
 
 	flush_work(&local->radar_detected_work);
 	rtnl_lock();
-	list_for_each_entry(sdata, &local->interfaces, list)
+	list_for_each_entry(sdata, &local->interfaces, list) {
+		/*
+		 * XXX: there may be more work for other vif types and even
+		 * for station mode: a good thing would be to run most of
+		 * the iface type's dependent _stop (ieee80211_mg_stop,
+		 * ieee80211_ibss_stop) etc...
+		 * For now, fix only the specific bug that was seen: race
+		 * between csa_connection_drop_work and us.
+		 */
+		if (sdata->vif.type == NL80211_IFTYPE_STATION) {
+			/*
+			 * This worker is scheduled from the iface worker that
+			 * runs on mac80211's workqueue, so we can't be
+			 * scheduling this worker after the cancel right here.
+			 * The exception is ieee80211_chswitch_done.
+			 * Then we can have a race...
+			 */
+			cancel_work_sync(&sdata->u.mgd.csa_connection_drop_work);
+		}
 		flush_delayed_work(&sdata->dec_tailroom_needed_wk);
+	}
 	ieee80211_scan_cancel(local);
 
 	/* make sure any new ROC will consider local->in_reconfig */

From 0007e94355fdb71a1cf5dba0754155cba08f0666 Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Fri, 31 Aug 2018 11:31:10 +0300
Subject: [PATCH 156/257] mac80211: Fix station bandwidth setting after channel
 switch

When performing a channel switch flow for a managed interface, the
flow did not update the bandwidth of the AP station and the rate
scale algorithm. In case of a channel width downgrade, this would
result with the rate scale algorithm using a bandwidth that does not
match the interface channel configuration.

Fix this by updating the AP station bandwidth and rate scaling algorithm
before the actual channel change in case of a bandwidth downgrade, or
after the actual channel change in case of a bandwidth upgrade.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 53 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a59187c016e08..22b6994601765 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -978,6 +978,10 @@ static void ieee80211_chswitch_work(struct work_struct *work)
 	 */
 
 	if (sdata->reserved_chanctx) {
+		struct ieee80211_supported_band *sband = NULL;
+		struct sta_info *mgd_sta = NULL;
+		enum ieee80211_sta_rx_bandwidth bw = IEEE80211_STA_RX_BW_20;
+
 		/*
 		 * with multi-vif csa driver may call ieee80211_csa_finish()
 		 * many times while waiting for other interfaces to use their
@@ -986,6 +990,48 @@ static void ieee80211_chswitch_work(struct work_struct *work)
 		if (sdata->reserved_ready)
 			goto out;
 
+		if (sdata->vif.bss_conf.chandef.width !=
+		    sdata->csa_chandef.width) {
+			/*
+			 * For managed interface, we need to also update the AP
+			 * station bandwidth and align the rate scale algorithm
+			 * on the bandwidth change. Here we only consider the
+			 * bandwidth of the new channel definition (as channel
+			 * switch flow does not have the full HT/VHT/HE
+			 * information), assuming that if additional changes are
+			 * required they would be done as part of the processing
+			 * of the next beacon from the AP.
+			 */
+			switch (sdata->csa_chandef.width) {
+			case NL80211_CHAN_WIDTH_20_NOHT:
+			case NL80211_CHAN_WIDTH_20:
+			default:
+				bw = IEEE80211_STA_RX_BW_20;
+				break;
+			case NL80211_CHAN_WIDTH_40:
+				bw = IEEE80211_STA_RX_BW_40;
+				break;
+			case NL80211_CHAN_WIDTH_80:
+				bw = IEEE80211_STA_RX_BW_80;
+				break;
+			case NL80211_CHAN_WIDTH_80P80:
+			case NL80211_CHAN_WIDTH_160:
+				bw = IEEE80211_STA_RX_BW_160;
+				break;
+			}
+
+			mgd_sta = sta_info_get(sdata, ifmgd->bssid);
+			sband =
+				local->hw.wiphy->bands[sdata->csa_chandef.chan->band];
+		}
+
+		if (sdata->vif.bss_conf.chandef.width >
+		    sdata->csa_chandef.width) {
+			mgd_sta->sta.bandwidth = bw;
+			rate_control_rate_update(local, sband, mgd_sta,
+						 IEEE80211_RC_BW_CHANGED);
+		}
+
 		ret = ieee80211_vif_use_reserved_context(sdata);
 		if (ret) {
 			sdata_info(sdata,
@@ -996,6 +1042,13 @@ static void ieee80211_chswitch_work(struct work_struct *work)
 			goto out;
 		}
 
+		if (sdata->vif.bss_conf.chandef.width <
+		    sdata->csa_chandef.width) {
+			mgd_sta->sta.bandwidth = bw;
+			rate_control_rate_update(local, sband, mgd_sta,
+						 IEEE80211_RC_BW_CHANGED);
+		}
+
 		goto out;
 	}
 

From 6c18b27d6e5c6a7206364eae2b47bc8d8b2fa68f Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Fri, 31 Aug 2018 11:31:12 +0300
Subject: [PATCH 157/257] mac80211: don't Tx a deauth frame if the AP forbade
 Tx

If the driver fails to properly prepare for the channel
switch, mac80211 will disconnect. If the CSA IE had mode
set to 1, it means that the clients are not allowed to send
any Tx on the current channel, and that includes the
deauthentication frame.

Make sure that we don't send the deauthentication frame in
this case.

In iwlwifi, this caused a failure to flush queues since the
firmware already closed the queues after having parsed the
CSA IE. Then mac80211 would wait until the deauthentication
frame would go out (drv_flush(drop=false)) and that would
never happen.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 22b6994601765..b046bf95eb3c6 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1270,6 +1270,16 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 					 cbss->beacon_interval));
 	return;
  drop_connection:
+	/*
+	 * This is just so that the disconnect flow will know that
+	 * we were trying to switch channel and failed. In case the
+	 * mode is 1 (we are not allowed to Tx), we will know not to
+	 * send a deauthentication frame. Those two fields will be
+	 * reset when the disconnection worker runs.
+	 */
+	sdata->vif.csa_active = true;
+	sdata->csa_block_tx = csa_ie.mode;
+
 	ieee80211_queue_work(&local->hw, &ifmgd->csa_connection_drop_work);
 	mutex_unlock(&local->chanctx_mtx);
 	mutex_unlock(&local->mtx);
@@ -2453,6 +2463,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN];
+	bool tx;
 
 	sdata_lock(sdata);
 	if (!ifmgd->associated) {
@@ -2460,6 +2471,8 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
 		return;
 	}
 
+	tx = !sdata->csa_block_tx;
+
 	/* AP is probably out of range (or not reachable for another reason) so
 	 * remove the bss struct for that AP.
 	 */
@@ -2467,7 +2480,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
 
 	ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
 			       WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
-			       true, frame_buf);
+			       tx, frame_buf);
 	mutex_lock(&local->mtx);
 	sdata->vif.csa_active = false;
 	ifmgd->csa_waiting_bcn = false;
@@ -2478,7 +2491,7 @@ static void __ieee80211_disconnect(struct ieee80211_sub_if_data *sdata)
 	}
 	mutex_unlock(&local->mtx);
 
-	ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), true,
+	ieee80211_report_disconnect(sdata, frame_buf, sizeof(frame_buf), tx,
 				    WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY);
 
 	sdata_unlock(sdata);

From c6e57b3896fc76299913b8cfd82d853bee8a2c84 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Fri, 31 Aug 2018 11:31:13 +0300
Subject: [PATCH 158/257] mac80211: shorten the IBSS debug messages

When tracing is enabled, all the debug messages are recorded and must
not exceed MAX_MSG_LEN (100) columns. Longer debug messages grant the
user with:

WARNING: CPU: 3 PID: 32642 at /tmp/wifi-core-20180806094828/src/iwlwifi-stack-dev/net/mac80211/./trace_msg.h:32 trace_event_raw_event_mac80211_msg_event+0xab/0xc0 [mac80211]
Workqueue: phy1 ieee80211_iface_work [mac80211]
 RIP: 0010:trace_event_raw_event_mac80211_msg_event+0xab/0xc0 [mac80211]
 Call Trace:
  __sdata_dbg+0xbd/0x120 [mac80211]
  ieee80211_ibss_rx_queued_mgmt+0x15f/0x510 [mac80211]
  ieee80211_iface_work+0x21d/0x320 [mac80211]

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ibss.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 6449a1c2283bf..f0f5fedb8caac 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -947,8 +947,8 @@ static void ieee80211_rx_mgmt_deauth_ibss(struct ieee80211_sub_if_data *sdata,
 	if (len < IEEE80211_DEAUTH_FRAME_LEN)
 		return;
 
-	ibss_dbg(sdata, "RX DeAuth SA=%pM DA=%pM BSSID=%pM (reason: %d)\n",
-		 mgmt->sa, mgmt->da, mgmt->bssid, reason);
+	ibss_dbg(sdata, "RX DeAuth SA=%pM DA=%pM\n", mgmt->sa, mgmt->da);
+	ibss_dbg(sdata, "\tBSSID=%pM (reason: %d)\n", mgmt->bssid, reason);
 	sta_info_destroy_addr(sdata, mgmt->sa);
 }
 
@@ -966,9 +966,9 @@ static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata,
 	auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg);
 	auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction);
 
-	ibss_dbg(sdata,
-		 "RX Auth SA=%pM DA=%pM BSSID=%pM (auth_transaction=%d)\n",
-		 mgmt->sa, mgmt->da, mgmt->bssid, auth_transaction);
+	ibss_dbg(sdata, "RX Auth SA=%pM DA=%pM\n", mgmt->sa, mgmt->da);
+	ibss_dbg(sdata, "\tBSSID=%pM (auth_transaction=%d)\n",
+		 mgmt->bssid, auth_transaction);
 
 	if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1)
 		return;
@@ -1175,10 +1175,10 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 		rx_timestamp = drv_get_tsf(local, sdata);
 	}
 
-	ibss_dbg(sdata,
-		 "RX beacon SA=%pM BSSID=%pM TSF=0x%llx BCN=0x%llx diff=%lld @%lu\n",
+	ibss_dbg(sdata, "RX beacon SA=%pM BSSID=%pM TSF=0x%llx\n",
 		 mgmt->sa, mgmt->bssid,
-		 (unsigned long long)rx_timestamp,
+		 (unsigned long long)rx_timestamp);
+	ibss_dbg(sdata, "\tBCN=0x%llx diff=%lld @%lu\n",
 		 (unsigned long long)beacon_timestamp,
 		 (unsigned long long)(rx_timestamp - beacon_timestamp),
 		 jiffies);
@@ -1537,9 +1537,9 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata,
 
 	tx_last_beacon = drv_tx_last_beacon(local);
 
-	ibss_dbg(sdata,
-		 "RX ProbeReq SA=%pM DA=%pM BSSID=%pM (tx_last_beacon=%d)\n",
-		 mgmt->sa, mgmt->da, mgmt->bssid, tx_last_beacon);
+	ibss_dbg(sdata, "RX ProbeReq SA=%pM DA=%pM\n", mgmt->sa, mgmt->da);
+	ibss_dbg(sdata, "\tBSSID=%pM (tx_last_beacon=%d)\n",
+		 mgmt->bssid, tx_last_beacon);
 
 	if (!tx_last_beacon && is_multicast_ether_addr(mgmt->da))
 		return;

From 4331f4d5ada5684fc77fa16e3f6177f077c9e6ec Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 2 Sep 2018 19:30:53 -0700
Subject: [PATCH 159/257] x86: Fix kernel-doc atomic.h warnings

Fix kernel-doc warnings in arch/x86/include/asm/atomic.h that are caused by
having a #define macro between the kernel-doc notation and the function
name.  Fixed by moving the #define macro to after the function
implementation.

Make the same change for atomic64_{32,64}.h for consistency even though
there were no kernel-doc warnings found in these header files, but there
would be if they were used in generation of documentation.

Fixes these kernel-doc warnings:

../arch/x86/include/asm/atomic.h:84: warning: Excess function parameter 'i' description in 'arch_atomic_sub_and_test'
../arch/x86/include/asm/atomic.h:84: warning: Excess function parameter 'v' description in 'arch_atomic_sub_and_test'
../arch/x86/include/asm/atomic.h:96: warning: Excess function parameter 'v' description in 'arch_atomic_inc'
../arch/x86/include/asm/atomic.h:109: warning: Excess function parameter 'v' description in 'arch_atomic_dec'
../arch/x86/include/asm/atomic.h:124: warning: Excess function parameter 'v' description in 'arch_atomic_dec_and_test'
../arch/x86/include/asm/atomic.h:138: warning: Excess function parameter 'v' description in 'arch_atomic_inc_and_test'
../arch/x86/include/asm/atomic.h:153: warning: Excess function parameter 'i' description in 'arch_atomic_add_negative'
../arch/x86/include/asm/atomic.h:153: warning: Excess function parameter 'v' description in 'arch_atomic_add_negative'

Fixes: 18cc1814d4e7 ("atomics/treewide: Make test ops optional")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Mark Rutland <mark.rutland@arm.com>
Link: https://lkml.kernel.org/r/0a1e678d-c8c5-b32c-2640-ed4e94d399d2@infradead.org
---
 arch/x86/include/asm/atomic.h      | 12 ++++++------
 arch/x86/include/asm/atomic64_32.h |  8 ++++----
 arch/x86/include/asm/atomic64_64.h | 12 ++++++------
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index b143717b92b34..ce84388e540c9 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -80,11 +80,11 @@ static __always_inline void arch_atomic_sub(int i, atomic_t *v)
  * true if the result is zero, or false for all
  * other cases.
  */
-#define arch_atomic_sub_and_test arch_atomic_sub_and_test
 static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e);
 }
+#define arch_atomic_sub_and_test arch_atomic_sub_and_test
 
 /**
  * arch_atomic_inc - increment atomic variable
@@ -92,12 +92,12 @@ static __always_inline bool arch_atomic_sub_and_test(int i, atomic_t *v)
  *
  * Atomically increments @v by 1.
  */
-#define arch_atomic_inc arch_atomic_inc
 static __always_inline void arch_atomic_inc(atomic_t *v)
 {
 	asm volatile(LOCK_PREFIX "incl %0"
 		     : "+m" (v->counter));
 }
+#define arch_atomic_inc arch_atomic_inc
 
 /**
  * arch_atomic_dec - decrement atomic variable
@@ -105,12 +105,12 @@ static __always_inline void arch_atomic_inc(atomic_t *v)
  *
  * Atomically decrements @v by 1.
  */
-#define arch_atomic_dec arch_atomic_dec
 static __always_inline void arch_atomic_dec(atomic_t *v)
 {
 	asm volatile(LOCK_PREFIX "decl %0"
 		     : "+m" (v->counter));
 }
+#define arch_atomic_dec arch_atomic_dec
 
 /**
  * arch_atomic_dec_and_test - decrement and test
@@ -120,11 +120,11 @@ static __always_inline void arch_atomic_dec(atomic_t *v)
  * returns true if the result is 0, or false for all other
  * cases.
  */
-#define arch_atomic_dec_and_test arch_atomic_dec_and_test
 static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
 {
 	GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e);
 }
+#define arch_atomic_dec_and_test arch_atomic_dec_and_test
 
 /**
  * arch_atomic_inc_and_test - increment and test
@@ -134,11 +134,11 @@ static __always_inline bool arch_atomic_dec_and_test(atomic_t *v)
  * and returns true if the result is zero, or false for all
  * other cases.
  */
-#define arch_atomic_inc_and_test arch_atomic_inc_and_test
 static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
 {
 	GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e);
 }
+#define arch_atomic_inc_and_test arch_atomic_inc_and_test
 
 /**
  * arch_atomic_add_negative - add and test if negative
@@ -149,11 +149,11 @@ static __always_inline bool arch_atomic_inc_and_test(atomic_t *v)
  * if the result is negative, or false when
  * result is greater than or equal to zero.
  */
-#define arch_atomic_add_negative arch_atomic_add_negative
 static __always_inline bool arch_atomic_add_negative(int i, atomic_t *v)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s);
 }
+#define arch_atomic_add_negative arch_atomic_add_negative
 
 /**
  * arch_atomic_add_return - add integer and return
diff --git a/arch/x86/include/asm/atomic64_32.h b/arch/x86/include/asm/atomic64_32.h
index ef959f02d0702..6a5b0ec460da8 100644
--- a/arch/x86/include/asm/atomic64_32.h
+++ b/arch/x86/include/asm/atomic64_32.h
@@ -205,12 +205,12 @@ static inline long long arch_atomic64_sub(long long i, atomic64_t *v)
  *
  * Atomically increments @v by 1.
  */
-#define arch_atomic64_inc arch_atomic64_inc
 static inline void arch_atomic64_inc(atomic64_t *v)
 {
 	__alternative_atomic64(inc, inc_return, /* no output */,
 			       "S" (v) : "memory", "eax", "ecx", "edx");
 }
+#define arch_atomic64_inc arch_atomic64_inc
 
 /**
  * arch_atomic64_dec - decrement atomic64 variable
@@ -218,12 +218,12 @@ static inline void arch_atomic64_inc(atomic64_t *v)
  *
  * Atomically decrements @v by 1.
  */
-#define arch_atomic64_dec arch_atomic64_dec
 static inline void arch_atomic64_dec(atomic64_t *v)
 {
 	__alternative_atomic64(dec, dec_return, /* no output */,
 			       "S" (v) : "memory", "eax", "ecx", "edx");
 }
+#define arch_atomic64_dec arch_atomic64_dec
 
 /**
  * arch_atomic64_add_unless - add unless the number is a given value
@@ -245,7 +245,6 @@ static inline int arch_atomic64_add_unless(atomic64_t *v, long long a,
 	return (int)a;
 }
 
-#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero
 static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
 {
 	int r;
@@ -253,8 +252,8 @@ static inline int arch_atomic64_inc_not_zero(atomic64_t *v)
 			     "S" (v) : "ecx", "edx", "memory");
 	return r;
 }
+#define arch_atomic64_inc_not_zero arch_atomic64_inc_not_zero
 
-#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
 static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
 {
 	long long r;
@@ -262,6 +261,7 @@ static inline long long arch_atomic64_dec_if_positive(atomic64_t *v)
 			     "S" (v) : "ecx", "memory");
 	return r;
 }
+#define arch_atomic64_dec_if_positive arch_atomic64_dec_if_positive
 
 #undef alternative_atomic64
 #undef __alternative_atomic64
diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h
index 4343d9b4f30e3..5f851d92eecd9 100644
--- a/arch/x86/include/asm/atomic64_64.h
+++ b/arch/x86/include/asm/atomic64_64.h
@@ -71,11 +71,11 @@ static inline void arch_atomic64_sub(long i, atomic64_t *v)
  * true if the result is zero, or false for all
  * other cases.
  */
-#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
 static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e);
 }
+#define arch_atomic64_sub_and_test arch_atomic64_sub_and_test
 
 /**
  * arch_atomic64_inc - increment atomic64 variable
@@ -83,13 +83,13 @@ static inline bool arch_atomic64_sub_and_test(long i, atomic64_t *v)
  *
  * Atomically increments @v by 1.
  */
-#define arch_atomic64_inc arch_atomic64_inc
 static __always_inline void arch_atomic64_inc(atomic64_t *v)
 {
 	asm volatile(LOCK_PREFIX "incq %0"
 		     : "=m" (v->counter)
 		     : "m" (v->counter));
 }
+#define arch_atomic64_inc arch_atomic64_inc
 
 /**
  * arch_atomic64_dec - decrement atomic64 variable
@@ -97,13 +97,13 @@ static __always_inline void arch_atomic64_inc(atomic64_t *v)
  *
  * Atomically decrements @v by 1.
  */
-#define arch_atomic64_dec arch_atomic64_dec
 static __always_inline void arch_atomic64_dec(atomic64_t *v)
 {
 	asm volatile(LOCK_PREFIX "decq %0"
 		     : "=m" (v->counter)
 		     : "m" (v->counter));
 }
+#define arch_atomic64_dec arch_atomic64_dec
 
 /**
  * arch_atomic64_dec_and_test - decrement and test
@@ -113,11 +113,11 @@ static __always_inline void arch_atomic64_dec(atomic64_t *v)
  * returns true if the result is 0, or false for all other
  * cases.
  */
-#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
 static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
 {
 	GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e);
 }
+#define arch_atomic64_dec_and_test arch_atomic64_dec_and_test
 
 /**
  * arch_atomic64_inc_and_test - increment and test
@@ -127,11 +127,11 @@ static inline bool arch_atomic64_dec_and_test(atomic64_t *v)
  * and returns true if the result is zero, or false for all
  * other cases.
  */
-#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
 static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
 {
 	GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e);
 }
+#define arch_atomic64_inc_and_test arch_atomic64_inc_and_test
 
 /**
  * arch_atomic64_add_negative - add and test if negative
@@ -142,11 +142,11 @@ static inline bool arch_atomic64_inc_and_test(atomic64_t *v)
  * if the result is negative, or false when
  * result is greater than or equal to zero.
  */
-#define arch_atomic64_add_negative arch_atomic64_add_negative
 static inline bool arch_atomic64_add_negative(long i, atomic64_t *v)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s);
 }
+#define arch_atomic64_add_negative arch_atomic64_add_negative
 
 /**
  * arch_atomic64_add_return - add and return

From c43c5e9f524ec914e7e202f9c5ab91779629ccc6 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Mon, 3 Sep 2018 10:15:33 +0200
Subject: [PATCH 160/257] timekeeping: Fix declaration of
 read_persistent_wall_and_boot_offset()

It is read_persistent_wall_and_boot_offset() and not
read_persistent_clock_and_boot_offset()

Fixes: 3eca993740b8eb40f51 ("timekeeping: Replace read_boot_clock64() with read_persistent_wall_and_boot_offset()")
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Pavel Tatashin <pasha.tatashin@oracle.com>
Link: https://lkml.kernel.org/r/20180903081533.34366-1-borntraeger@de.ibm.com
---
 include/linux/timekeeping.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 5d738804e3d62..a5a3cfc3c2fa7 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -258,8 +258,8 @@ extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot);
 extern int persistent_clock_is_local;
 
 extern void read_persistent_clock64(struct timespec64 *ts);
-void read_persistent_clock_and_boot_offset(struct timespec64 *wall_clock,
-					   struct timespec64 *boot_offset);
+void read_persistent_wall_and_boot_offset(struct timespec64 *wall_clock,
+					  struct timespec64 *boot_offset);
 extern int update_persistent_clock64(struct timespec64 now);
 
 /*

From 9bdda4e9cf2dcecb60a0683b10ffb8cd7e5f2f45 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Sat, 1 Sep 2018 09:40:01 +0300
Subject: [PATCH 161/257] fsnotify: fix ignore mask logic in fsnotify()

Commit 92183a42898d ("fsnotify: fix ignore mask logic in
send_to_group()") acknoledges the use case of ignoring an event on
an inode mark, because of an ignore mask on a mount mark of the same
group (i.e. I want to get all events on this file, except for the events
that came from that mount).

This change depends on correctly merging the inode marks and mount marks
group lists, so that the mount mark ignore mask would be tested in
send_to_group(). Alas, the merging of the lists did not take into
account the case where event in question is not in the mask of any of
the mount marks.

To fix this, completely remove the tests for inode and mount event masks
from the lists merging code.

Fixes: 92183a42898d ("fsnotify: fix ignore mask logic in send_to_group")
Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/notify/fsnotify.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index f174397b63a04..ababdbfab537e 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -351,16 +351,9 @@ int fsnotify(struct inode *to_tell, __u32 mask, const void *data, int data_is,
 
 	iter_info.srcu_idx = srcu_read_lock(&fsnotify_mark_srcu);
 
-	if ((mask & FS_MODIFY) ||
-	    (test_mask & to_tell->i_fsnotify_mask)) {
-		iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] =
-			fsnotify_first_mark(&to_tell->i_fsnotify_marks);
-	}
-
-	if (mnt && ((mask & FS_MODIFY) ||
-		    (test_mask & mnt->mnt_fsnotify_mask))) {
-		iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] =
-			fsnotify_first_mark(&to_tell->i_fsnotify_marks);
+	iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] =
+		fsnotify_first_mark(&to_tell->i_fsnotify_marks);
+	if (mnt) {
 		iter_info.marks[FSNOTIFY_OBJ_TYPE_VFSMOUNT] =
 			fsnotify_first_mark(&mnt->mnt_fsnotify_marks);
 	}

From 5a7b44a8df822e0667fc76ed7130252523993bda Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Mon, 3 Sep 2018 15:16:43 +0200
Subject: [PATCH 162/257] ALSA: rawmidi: Initialize allocated buffers

syzbot reported the uninitialized value exposure in certain situations
using virmidi loop.  It's likely a very small race at writing and
reading, and the influence is almost negligible.  But it's safer to
paper over this just by replacing the existing kvmalloc() with
kvzalloc().

Reported-by: syzbot+194dffdb8b22fc5d207a@syzkaller.appspotmail.com
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/core/rawmidi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c
index 69517e18ef07f..08d5662039e38 100644
--- a/sound/core/rawmidi.c
+++ b/sound/core/rawmidi.c
@@ -129,7 +129,7 @@ static int snd_rawmidi_runtime_create(struct snd_rawmidi_substream *substream)
 		runtime->avail = 0;
 	else
 		runtime->avail = runtime->buffer_size;
-	runtime->buffer = kvmalloc(runtime->buffer_size, GFP_KERNEL);
+	runtime->buffer = kvzalloc(runtime->buffer_size, GFP_KERNEL);
 	if (!runtime->buffer) {
 		kfree(runtime);
 		return -ENOMEM;
@@ -655,7 +655,7 @@ static int resize_runtime_buffer(struct snd_rawmidi_runtime *runtime,
 	if (params->avail_min < 1 || params->avail_min > params->buffer_size)
 		return -EINVAL;
 	if (params->buffer_size != runtime->buffer_size) {
-		newbuf = kvmalloc(params->buffer_size, GFP_KERNEL);
+		newbuf = kvzalloc(params->buffer_size, GFP_KERNEL);
 		if (!newbuf)
 			return -ENOMEM;
 		spin_lock_irq(&runtime->lock);

From edf4e7b7b9104b58fddfcd073bd7dcc1585d5326 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Sat, 1 Sep 2018 01:57:52 -0700
Subject: [PATCH 163/257] apparmor: fix bad debug check in
 apparmor_secid_to_secctx()

apparmor_secid_to_secctx() has a bad debug statement tripping on a
condition handle by the code.  When kconfig SECURITY_APPARMOR_DEBUG is
enabled the debug WARN_ON will trip when **secdata is NULL resulting
in the following trace.

------------[ cut here ]------------
AppArmor WARN apparmor_secid_to_secctx: ((!secdata)):
WARNING: CPU: 0 PID: 14826 at security/apparmor/secid.c:82 apparmor_secid_to_secctx+0x2b5/0x2f0 security/apparmor/secid.c:82
Kernel panic - not syncing: panic_on_warn set ...

CPU: 0 PID: 14826 Comm: syz-executor1 Not tainted 4.19.0-rc1+ #193
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x1c9/0x2b4 lib/dump_stack.c:113
 panic+0x238/0x4e7 kernel/panic.c:184
 __warn.cold.8+0x163/0x1ba kernel/panic.c:536
 report_bug+0x252/0x2d0 lib/bug.c:186
 fixup_bug arch/x86/kernel/traps.c:178 [inline]
 do_error_trap+0x1fc/0x4d0 arch/x86/kernel/traps.c:296
 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:316
 invalid_op+0x14/0x20 arch/x86/entry/entry_64.S:993
RIP: 0010:apparmor_secid_to_secctx+0x2b5/0x2f0 security/apparmor/secid.c:82
Code: c7 c7 40 66 58 87 e8 6a 6d 0f fe 0f 0b e9 6c fe ff ff e8 3e aa 44 fe 48 c7 c6 80 67 58 87 48 c7 c7 a0 65 58 87 e8 4b 6d 0f fe <0f> 0b e9 3f fe ff ff 48 89 df e8 fc a7 83 fe e9 ed fe ff ff bb f4
RSP: 0018:ffff8801ba1bed10 EFLAGS: 00010286
RAX: 0000000000000000 RBX: ffff8801ba1beed0 RCX: ffffc9000227e000
RDX: 0000000000018482 RSI: ffffffff8163ac01 RDI: 0000000000000001
RBP: ffff8801ba1bed30 R08: ffff8801b80ec080 R09: ffffed003b603eca
R10: ffffed003b603eca R11: ffff8801db01f657 R12: 0000000000000001
R13: 0000000000000000 R14: 0000000000000000 R15: ffff8801ba1beed0
 security_secid_to_secctx+0x63/0xc0 security/security.c:1314
 ctnetlink_secctx_size net/netfilter/nf_conntrack_netlink.c:621 [inline]
 ctnetlink_nlmsg_size net/netfilter/nf_conntrack_netlink.c:659 [inline]
 ctnetlink_conntrack_event+0x303/0x1470 net/netfilter/nf_conntrack_netlink.c:706
 nf_conntrack_eventmask_report+0x55f/0x930 net/netfilter/nf_conntrack_ecache.c:151
 nf_conntrack_event_report include/net/netfilter/nf_conntrack_ecache.h:112 [inline]
 nf_ct_delete+0x33c/0x5d0 net/netfilter/nf_conntrack_core.c:601
 nf_ct_iterate_cleanup+0x48c/0x5e0 net/netfilter/nf_conntrack_core.c:1892
 nf_ct_iterate_cleanup_net+0x23c/0x2d0 net/netfilter/nf_conntrack_core.c:1974
 ctnetlink_flush_conntrack net/netfilter/nf_conntrack_netlink.c:1226 [inline]
 ctnetlink_del_conntrack+0x66c/0x850 net/netfilter/nf_conntrack_netlink.c:1258
 nfnetlink_rcv_msg+0xd88/0x1070 net/netfilter/nfnetlink.c:228
 netlink_rcv_skb+0x172/0x440 net/netlink/af_netlink.c:2454
 nfnetlink_rcv+0x1c0/0x4d0 net/netfilter/nfnetlink.c:560
 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
 netlink_unicast+0x5a0/0x760 net/netlink/af_netlink.c:1343
 netlink_sendmsg+0xa18/0xfc0 net/netlink/af_netlink.c:1908
 sock_sendmsg_nosec net/socket.c:621 [inline]
 sock_sendmsg+0xd5/0x120 net/socket.c:631
 ___sys_sendmsg+0x7fd/0x930 net/socket.c:2114
 __sys_sendmsg+0x11d/0x290 net/socket.c:2152
 __do_sys_sendmsg net/socket.c:2161 [inline]
 __se_sys_sendmsg net/socket.c:2159 [inline]
 __x64_sys_sendmsg+0x78/0xb0 net/socket.c:2159
 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x457089
Code: fd b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b4 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007f7bc6e03c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00007f7bc6e046d4 RCX: 0000000000457089
RDX: 0000000000000000 RSI: 0000000020d65000 RDI: 0000000000000003
RBP: 00000000009300a0 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00000000ffffffff
R13: 00000000004d4588 R14: 00000000004c8d5c R15: 0000000000000000
Dumping ftrace buffer:
   (ftrace buffer empty)
Kernel Offset: disabled
Rebooting in 86400 seconds..

CC: <stable@vger.kernel.org> #4.18
Fixes: c092921219d2 ("apparmor: add support for mapping secids and using secctxes")
Reported-by: syzbot+21016130b0580a9de3b5@syzkaller.appspotmail.com
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/secid.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/security/apparmor/secid.c b/security/apparmor/secid.c
index f2f22d00db188..4ccec1bcf6f54 100644
--- a/security/apparmor/secid.c
+++ b/security/apparmor/secid.c
@@ -79,7 +79,6 @@ int apparmor_secid_to_secctx(u32 secid, char **secdata, u32 *seclen)
 	struct aa_label *label = aa_secid_to_label(secid);
 	int len;
 
-	AA_BUG(!secdata);
 	AA_BUG(!seclen);
 
 	if (!label)

From f7c50fa636f72490baceb1664ba64973137266f2 Mon Sep 17 00:00:00 2001
From: Keyon Jie <yang.jie@linux.intel.com>
Date: Mon, 3 Sep 2018 10:47:09 +0800
Subject: [PATCH 164/257] ALSA: hda: Fix several mismatch for register mask and
 value

E.g. for snd_hdac_ext_link_clear_stream_id(), we should set (1 << stream)
as mask, and 0 as value, here correct it and several similar mismatches.

And, here also remove unreadable register_mask usage for those mask value
updating.

Signed-off-by: Keyon Jie <yang.jie@linux.intel.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 sound/hda/ext/hdac_ext_stream.c | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/sound/hda/ext/hdac_ext_stream.c b/sound/hda/ext/hdac_ext_stream.c
index 1bd27576db98d..a835558ddbc9b 100644
--- a/sound/hda/ext/hdac_ext_stream.c
+++ b/sound/hda/ext/hdac_ext_stream.c
@@ -146,7 +146,8 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_decouple);
  */
 void snd_hdac_ext_link_stream_start(struct hdac_ext_stream *stream)
 {
-	snd_hdac_updatel(stream->pplc_addr, AZX_REG_PPLCCTL, 0, AZX_PPLCCTL_RUN);
+	snd_hdac_updatel(stream->pplc_addr, AZX_REG_PPLCCTL,
+			 AZX_PPLCCTL_RUN, AZX_PPLCCTL_RUN);
 }
 EXPORT_SYMBOL_GPL(snd_hdac_ext_link_stream_start);
 
@@ -171,7 +172,8 @@ void snd_hdac_ext_link_stream_reset(struct hdac_ext_stream *stream)
 
 	snd_hdac_ext_link_stream_clear(stream);
 
-	snd_hdac_updatel(stream->pplc_addr, AZX_REG_PPLCCTL, 0, AZX_PPLCCTL_STRST);
+	snd_hdac_updatel(stream->pplc_addr, AZX_REG_PPLCCTL,
+			 AZX_PPLCCTL_STRST, AZX_PPLCCTL_STRST);
 	udelay(3);
 	timeout = 50;
 	do {
@@ -242,7 +244,7 @@ EXPORT_SYMBOL_GPL(snd_hdac_ext_link_set_stream_id);
 void snd_hdac_ext_link_clear_stream_id(struct hdac_ext_link *link,
 				 int stream)
 {
-	snd_hdac_updatew(link->ml_addr, AZX_REG_ML_LOSIDV, 0, (1 << stream));
+	snd_hdac_updatew(link->ml_addr, AZX_REG_ML_LOSIDV, (1 << stream), 0);
 }
 EXPORT_SYMBOL_GPL(snd_hdac_ext_link_clear_stream_id);
 
@@ -415,7 +417,6 @@ void snd_hdac_ext_stream_spbcap_enable(struct hdac_bus *bus,
 				 bool enable, int index)
 {
 	u32 mask = 0;
-	u32 register_mask = 0;
 
 	if (!bus->spbcap) {
 		dev_err(bus->dev, "Address of SPB capability is NULL\n");
@@ -424,12 +425,8 @@ void snd_hdac_ext_stream_spbcap_enable(struct hdac_bus *bus,
 
 	mask |= (1 << index);
 
-	register_mask = readl(bus->spbcap + AZX_REG_SPB_SPBFCCTL);
-
-	mask |= register_mask;
-
 	if (enable)
-		snd_hdac_updatel(bus->spbcap, AZX_REG_SPB_SPBFCCTL, 0, mask);
+		snd_hdac_updatel(bus->spbcap, AZX_REG_SPB_SPBFCCTL, mask, mask);
 	else
 		snd_hdac_updatel(bus->spbcap, AZX_REG_SPB_SPBFCCTL, mask, 0);
 }
@@ -503,7 +500,6 @@ void snd_hdac_ext_stream_drsm_enable(struct hdac_bus *bus,
 				bool enable, int index)
 {
 	u32 mask = 0;
-	u32 register_mask = 0;
 
 	if (!bus->drsmcap) {
 		dev_err(bus->dev, "Address of DRSM capability is NULL\n");
@@ -512,12 +508,8 @@ void snd_hdac_ext_stream_drsm_enable(struct hdac_bus *bus,
 
 	mask |= (1 << index);
 
-	register_mask = readl(bus->drsmcap + AZX_REG_SPB_SPBFCCTL);
-
-	mask |= register_mask;
-
 	if (enable)
-		snd_hdac_updatel(bus->drsmcap, AZX_REG_DRSM_CTL, 0, mask);
+		snd_hdac_updatel(bus->drsmcap, AZX_REG_DRSM_CTL, mask, mask);
 	else
 		snd_hdac_updatel(bus->drsmcap, AZX_REG_DRSM_CTL, mask, 0);
 }

From 36feaac35405e932ad9c321d7a2db8a7e4a4d7ca Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Fri, 31 Aug 2018 16:52:01 +0800
Subject: [PATCH 165/257] ip6_tunnel: respect ttl inherit for ip6tnl

man ip-tunnel ttl section says:
0 is a special value meaning that packets inherit the TTL value.

IPv4 tunnel respect this in ip_tunnel_xmit(), but IPv6 tunnel has not
implement it yet. To make IPv6 behave consistently with IP tunnel,
add ipv6 tunnel inherit support.

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 5df2a58d945cc..419960b0ba163 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1188,7 +1188,15 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield,
 		init_tel_txopt(&opt, encap_limit);
 		ipv6_push_frag_opts(skb, &opt.ops, &proto);
 	}
-	hop_limit = hop_limit ? : ip6_dst_hoplimit(dst);
+
+	if (hop_limit == 0) {
+		if (skb->protocol == htons(ETH_P_IP))
+			hop_limit = ip_hdr(skb)->ttl;
+		else if (skb->protocol == htons(ETH_P_IPV6))
+			hop_limit = ipv6_hdr(skb)->hop_limit;
+		else
+			hop_limit = ip6_dst_hoplimit(dst);
+	}
 
 	/* Calculate max headroom for all the headers and adjust
 	 * needed_headroom if necessary.

From 9fd0e09a4e86499639653243edfcb417a05c5c46 Mon Sep 17 00:00:00 2001
From: Anthony Wong <anthony.wong@ubuntu.com>
Date: Fri, 31 Aug 2018 20:06:42 +0800
Subject: [PATCH 166/257] r8169: add support for NCube 8168 network card

This card identifies itself as:
  Ethernet controller [0200]: NCube Device [10ff:8168] (rev 06)
  Subsystem: TP-LINK Technologies Co., Ltd. Device [7470:3468]

Adding a new entry to rtl8169_pci_tbl makes the card work.

Link: http://launchpad.net/bugs/1788730
Signed-off-by: Anthony Wong <anthony.wong@ubuntu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169.c | 1 +
 include/linux/pci_ids.h              | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index ac306797590ec..b08d51bf7a204 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -218,6 +218,7 @@ static const struct pci_device_id rtl8169_pci_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK,	0x8161), 0, 0, RTL_CFG_1 },
 	{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK,	0x8167), 0, 0, RTL_CFG_0 },
 	{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK,	0x8168), 0, 0, RTL_CFG_1 },
+	{ PCI_DEVICE(PCI_VENDOR_ID_NCUBE,	0x8168), 0, 0, RTL_CFG_1 },
 	{ PCI_DEVICE(PCI_VENDOR_ID_REALTEK,	0x8169), 0, 0, RTL_CFG_0 },
 	{ PCI_VENDOR_ID_DLINK,			0x4300,
 		PCI_VENDOR_ID_DLINK, 0x4b10,		 0, 0, RTL_CFG_1 },
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 99d366cb0e9f5..d157983b84cf9 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -3084,4 +3084,6 @@
 
 #define PCI_VENDOR_ID_OCZ		0x1b85
 
+#define PCI_VENDOR_ID_NCUBE		0x10ff
+
 #endif /* _LINUX_PCI_IDS_H */

From c48300c92ad9f029f4dcbcf5d71ad880e3acf2fa Mon Sep 17 00:00:00 2001
From: Gleb Fotengauer-Malinovskiy <glebfm@altlinux.org>
Date: Mon, 3 Sep 2018 20:59:13 +0300
Subject: [PATCH 167/257] vhost: fix VHOST_GET_BACKEND_FEATURES ioctl request
 definition

The _IOC_READ flag fits this ioctl request more because this request
actually only writes to, but doesn't read from userspace.
See NOTEs in include/uapi/asm-generic/ioctl.h for more information.

Fixes: 429711aec282 ("vhost: switch to use new message format")
Signed-off-by: Gleb Fotengauer-Malinovskiy <glebfm@altlinux.org>
Acked-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/vhost.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
index b1e22c40c4b68..84c3de89696a1 100644
--- a/include/uapi/linux/vhost.h
+++ b/include/uapi/linux/vhost.h
@@ -176,7 +176,7 @@ struct vhost_memory {
 #define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1
 
 #define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
-#define VHOST_GET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x26, __u64)
+#define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)
 
 /* VHOST_NET specific defines */
 

From 399334708b4f07b107094e5db4a390f0f25d2d4f Mon Sep 17 00:00:00 2001
From: Jan-Marek Glogowski <glogow@fbihome.de>
Date: Sat, 25 Aug 2018 15:10:35 -0400
Subject: [PATCH 168/257] drm/i915: Re-apply "Perform link quality check,
 unconditionally during long pulse"

This re-applies the workaround for "some DP sinks, [which] are a
little nuts" from commit 1a36147bb939 ("drm/i915: Perform link
quality check unconditionally during long pulse").
It makes the secondary AOC E2460P monitor connected via DP to an
acer Veriton N4640G usable again.

This hunk was dropped in commit c85d200e8321 ("drm/i915: Move SST
DP link retraining into the ->post_hotplug() hook")

Fixes: c85d200e8321 ("drm/i915: Move SST DP link retraining into the ->post_hotplug() hook")
[Cleaned up commit message, added stable cc]
Signed-off-by: Lyude Paul <lyude@redhat.com>
Signed-off-by: Jan-Marek Glogowski <glogow@fbihome.de>
Cc: stable@vger.kernel.org
Link: https://patchwork.freedesktop.org/patch/msgid/20180825191035.3945-1-lyude@redhat.com
(cherry picked from commit 3cf71bc9904d7ee4a25a822c5dcb54c7804ea388)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/intel_dp.c | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index cd0f649b57a5b..1193202766a2c 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -4160,18 +4160,6 @@ intel_dp_needs_link_retrain(struct intel_dp *intel_dp)
 	return !drm_dp_channel_eq_ok(link_status, intel_dp->lane_count);
 }
 
-/*
- * If display is now connected check links status,
- * there has been known issues of link loss triggering
- * long pulse.
- *
- * Some sinks (eg. ASUS PB287Q) seem to perform some
- * weird HPD ping pong during modesets. So we can apparently
- * end up with HPD going low during a modeset, and then
- * going back up soon after. And once that happens we must
- * retrain the link to get a picture. That's in case no
- * userspace component reacted to intermittent HPD dip.
- */
 int intel_dp_retrain_link(struct intel_encoder *encoder,
 			  struct drm_modeset_acquire_ctx *ctx)
 {
@@ -4661,7 +4649,8 @@ intel_dp_unset_edid(struct intel_dp *intel_dp)
 }
 
 static int
-intel_dp_long_pulse(struct intel_connector *connector)
+intel_dp_long_pulse(struct intel_connector *connector,
+		    struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->base.dev);
 	struct intel_dp *intel_dp = intel_attached_dp(&connector->base);
@@ -4720,6 +4709,22 @@ intel_dp_long_pulse(struct intel_connector *connector)
 		 */
 		status = connector_status_disconnected;
 		goto out;
+	} else {
+		/*
+		 * If display is now connected check links status,
+		 * there has been known issues of link loss triggering
+		 * long pulse.
+		 *
+		 * Some sinks (eg. ASUS PB287Q) seem to perform some
+		 * weird HPD ping pong during modesets. So we can apparently
+		 * end up with HPD going low during a modeset, and then
+		 * going back up soon after. And once that happens we must
+		 * retrain the link to get a picture. That's in case no
+		 * userspace component reacted to intermittent HPD dip.
+		 */
+		struct intel_encoder *encoder = &dp_to_dig_port(intel_dp)->base;
+
+		intel_dp_retrain_link(encoder, ctx);
 	}
 
 	/*
@@ -4781,7 +4786,7 @@ intel_dp_detect(struct drm_connector *connector,
 				return ret;
 		}
 
-		status = intel_dp_long_pulse(intel_dp->attached_connector);
+		status = intel_dp_long_pulse(intel_dp->attached_connector, ctx);
 	}
 
 	intel_dp->detect_done = false;

From 4fe967912ee83048beb45a6b4f0f6774fddcfa0a Mon Sep 17 00:00:00 2001
From: Manasi Navare <manasi.d.navare@intel.com>
Date: Thu, 23 Aug 2018 18:48:07 -0700
Subject: [PATCH 169/257] drm/i915/dsc: Fix PPS register definition macros for
 2nd VDSC engine

This patch fixes the PPS4 and PPS5 register definition macros that were
resulting into an incorect MMIO address.

Fixes: 2efbb2f099fb ("i915/dp/dsc: Add DSC PPS register definitions")
Cc: Anusha Srivatsa <anusha.srivatsa@intel.com>
Signed-off-by: Manasi Navare <manasi.d.navare@intel.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Anusha Srivatsa <anusha.srivatsa@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180824014807.14681-1-manasi.d.navare@intel.com
(cherry picked from commit 5df52391ddbed869c7d67b00fbb013bd64334115)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/i915_reg.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 08ec7446282e7..9e63cd47b60f3 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -10422,7 +10422,7 @@ enum skl_power_gate {
 							   _ICL_DSC0_PICTURE_PARAMETER_SET_4_PB, \
 							   _ICL_DSC0_PICTURE_PARAMETER_SET_4_PC)
 #define ICL_DSC1_PICTURE_PARAMETER_SET_4(pipe)	_MMIO_PIPE((pipe) - PIPE_B, \
-							   _ICL_DSC0_PICTURE_PARAMETER_SET_4_PB, \
+							   _ICL_DSC1_PICTURE_PARAMETER_SET_4_PB, \
 							   _ICL_DSC1_PICTURE_PARAMETER_SET_4_PC)
 #define  DSC_INITIAL_DEC_DELAY(dec_delay)       ((dec_delay) << 16)
 #define  DSC_INITIAL_XMIT_DELAY(xmit_delay)     ((xmit_delay) << 0)
@@ -10437,7 +10437,7 @@ enum skl_power_gate {
 							   _ICL_DSC0_PICTURE_PARAMETER_SET_5_PB, \
 							   _ICL_DSC0_PICTURE_PARAMETER_SET_5_PC)
 #define ICL_DSC1_PICTURE_PARAMETER_SET_5(pipe)	_MMIO_PIPE((pipe) - PIPE_B, \
-							   _ICL_DSC1_PICTURE_PARAMETER_SET_5_PC, \
+							   _ICL_DSC1_PICTURE_PARAMETER_SET_5_PB, \
 							   _ICL_DSC1_PICTURE_PARAMETER_SET_5_PC)
 #define  DSC_SCALE_DEC_INTINT(scale_dec)	((scale_dec) << 16)
 #define  DSC_SCALE_INC_INT(scale_inc)		((scale_inc) << 0)

From 2b82435cb90bed2c5f8398730d964dd11602217c Mon Sep 17 00:00:00 2001
From: Imre Deak <imre.deak@intel.com>
Date: Fri, 31 Aug 2018 20:47:39 +0300
Subject: [PATCH 170/257] drm/i915/dp_mst: Fix enabling pipe clock for all
 streams
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit afb2c4437dae ("drm/i915/ddi: Push pipe clock enabling to encoders")
inadvertently stopped enabling the pipe clock for any DP-MST stream
after the first one. It also rearranged the pipe clock enabling wrt.
initial MST payload allocation step (which may or may not be a
problem, but it's contrary to the spec.).

Fix things by making the above commit truly a non-functional change.

Fixes: afb2c4437dae ("drm/i915/ddi: Push pipe clock enabling to encoders")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=107365
Reported-by: Lyude Paul <lyude@redhat.com>
Reported-by: dmummenschanz@web.de
Tested-by: dmummenschanz@web.de
Tested-by: Lyude Paul <lyude@redhat.com>
Cc: Lyude Paul <lyude@redhat.com>
Cc: dmummenschanz@web.de
Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Imre Deak <imre.deak@intel.com>
Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
Reviewed-by: Lyude Paul <lyude@redhat.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180831174739.30387-1-imre.deak@intel.com
(cherry picked from commit 2b5cf4ef541f1b2facaca58cae5e8e0b5f19ad4c)
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
---
 drivers/gpu/drm/i915/intel_ddi.c    | 17 +++++++++--------
 drivers/gpu/drm/i915/intel_dp_mst.c |  4 ++++
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 8761513f3532c..c9af34861d9e3 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -2708,7 +2708,8 @@ static void intel_ddi_pre_enable_dp(struct intel_encoder *encoder,
 	if (port != PORT_A || INTEL_GEN(dev_priv) >= 9)
 		intel_dp_stop_link_train(intel_dp);
 
-	intel_ddi_enable_pipe_clock(crtc_state);
+	if (!is_mst)
+		intel_ddi_enable_pipe_clock(crtc_state);
 }
 
 static void intel_ddi_pre_enable_hdmi(struct intel_encoder *encoder,
@@ -2810,14 +2811,14 @@ static void intel_ddi_post_disable_dp(struct intel_encoder *encoder,
 	bool is_mst = intel_crtc_has_type(old_crtc_state,
 					  INTEL_OUTPUT_DP_MST);
 
-	intel_ddi_disable_pipe_clock(old_crtc_state);
-
-	/*
-	 * Power down sink before disabling the port, otherwise we end
-	 * up getting interrupts from the sink on detecting link loss.
-	 */
-	if (!is_mst)
+	if (!is_mst) {
+		intel_ddi_disable_pipe_clock(old_crtc_state);
+		/*
+		 * Power down sink before disabling the port, otherwise we end
+		 * up getting interrupts from the sink on detecting link loss.
+		 */
 		intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_OFF);
+	}
 
 	intel_disable_ddi_buf(encoder);
 
diff --git a/drivers/gpu/drm/i915/intel_dp_mst.c b/drivers/gpu/drm/i915/intel_dp_mst.c
index 7e3e01607643d..4ecd653756033 100644
--- a/drivers/gpu/drm/i915/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/intel_dp_mst.c
@@ -166,6 +166,8 @@ static void intel_mst_post_disable_dp(struct intel_encoder *encoder,
 	struct intel_connector *connector =
 		to_intel_connector(old_conn_state->connector);
 
+	intel_ddi_disable_pipe_clock(old_crtc_state);
+
 	/* this can fail */
 	drm_dp_check_act_status(&intel_dp->mst_mgr);
 	/* and this can also fail */
@@ -252,6 +254,8 @@ static void intel_mst_pre_enable_dp(struct intel_encoder *encoder,
 	I915_WRITE(DP_TP_STATUS(port), temp);
 
 	ret = drm_dp_update_payload_part1(&intel_dp->mst_mgr);
+
+	intel_ddi_enable_pipe_clock(pipe_config);
 }
 
 static void intel_mst_enable_dp(struct intel_encoder *encoder,

From c10bbfae3ae43fae1d77e16f05a73474acf514ff Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Mon, 3 Sep 2018 10:04:55 +0300
Subject: [PATCH 171/257] net: sched: null actions array pointer before
 releasing action

Currently, tcf_action_delete() nulls actions array pointer after putting
and deleting it. However, if tcf_idr_delete_index() returns an error,
pointer to action is not set to null. That results it being released second
time in error handling code of tca_action_gd().

Kasan error:

[  807.367755] ==================================================================
[  807.375844] BUG: KASAN: use-after-free in tc_setup_cb_call+0x14e/0x250
[  807.382763] Read of size 8 at addr ffff88033e636000 by task tc/2732

[  807.391289] CPU: 0 PID: 2732 Comm: tc Tainted: G        W         4.19.0-rc1+ #799
[  807.399542] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017
[  807.407948] Call Trace:
[  807.410763]  dump_stack+0x92/0xeb
[  807.414456]  print_address_description+0x70/0x360
[  807.419549]  kasan_report+0x14d/0x300
[  807.423582]  ? tc_setup_cb_call+0x14e/0x250
[  807.428150]  tc_setup_cb_call+0x14e/0x250
[  807.432539]  ? nla_put+0x65/0xe0
[  807.436146]  fl_dump+0x394/0x3f0 [cls_flower]
[  807.440890]  ? fl_tmplt_dump+0x140/0x140 [cls_flower]
[  807.446327]  ? lock_downgrade+0x320/0x320
[  807.450702]  ? lock_acquire+0xe2/0x220
[  807.454819]  ? is_bpf_text_address+0x5/0x140
[  807.459475]  ? memcpy+0x34/0x50
[  807.462980]  ? nla_put+0x65/0xe0
[  807.466582]  tcf_fill_node+0x341/0x430
[  807.470717]  ? tcf_block_put+0xe0/0xe0
[  807.474859]  tcf_node_dump+0xdb/0xf0
[  807.478821]  fl_walk+0x8e/0x170 [cls_flower]
[  807.483474]  tcf_chain_dump+0x35a/0x4d0
[  807.487703]  ? tfilter_notify+0x170/0x170
[  807.492091]  ? tcf_fill_node+0x430/0x430
[  807.496411]  tc_dump_tfilter+0x362/0x3f0
[  807.500712]  ? tc_del_tfilter+0x850/0x850
[  807.505104]  ? kasan_unpoison_shadow+0x30/0x40
[  807.509940]  ? __mutex_unlock_slowpath+0xcf/0x410
[  807.515031]  netlink_dump+0x263/0x4f0
[  807.519077]  __netlink_dump_start+0x2a0/0x300
[  807.523817]  ? tc_del_tfilter+0x850/0x850
[  807.528198]  rtnetlink_rcv_msg+0x46a/0x6d0
[  807.532671]  ? rtnl_fdb_del+0x3f0/0x3f0
[  807.536878]  ? tc_del_tfilter+0x850/0x850
[  807.541280]  netlink_rcv_skb+0x18d/0x200
[  807.545570]  ? rtnl_fdb_del+0x3f0/0x3f0
[  807.549773]  ? netlink_ack+0x500/0x500
[  807.553913]  netlink_unicast+0x2d0/0x370
[  807.558212]  ? netlink_attachskb+0x340/0x340
[  807.562855]  ? _copy_from_iter_full+0xe9/0x3e0
[  807.567677]  ? import_iovec+0x11e/0x1c0
[  807.571890]  netlink_sendmsg+0x3b9/0x6a0
[  807.576192]  ? netlink_unicast+0x370/0x370
[  807.580684]  ? netlink_unicast+0x370/0x370
[  807.585154]  sock_sendmsg+0x6b/0x80
[  807.589015]  ___sys_sendmsg+0x4a1/0x520
[  807.593230]  ? copy_msghdr_from_user+0x210/0x210
[  807.598232]  ? do_wp_page+0x174/0x880
[  807.602276]  ? __handle_mm_fault+0x749/0x1c10
[  807.607021]  ? __handle_mm_fault+0x1046/0x1c10
[  807.611849]  ? __pmd_alloc+0x320/0x320
[  807.615973]  ? check_chain_key+0x140/0x1f0
[  807.620450]  ? check_chain_key+0x140/0x1f0
[  807.624929]  ? __fget_light+0xbc/0xd0
[  807.628970]  ? __sys_sendmsg+0xd7/0x150
[  807.633172]  __sys_sendmsg+0xd7/0x150
[  807.637201]  ? __ia32_sys_shutdown+0x30/0x30
[  807.641846]  ? up_read+0x53/0x90
[  807.645442]  ? __do_page_fault+0x484/0x780
[  807.649949]  ? do_syscall_64+0x1e/0x2c0
[  807.654164]  do_syscall_64+0x72/0x2c0
[  807.658198]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  807.663625] RIP: 0033:0x7f42e9870150
[  807.667568] Code: 8b 15 3c 7d 2b 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb cd 66 0f 1f 44 00 00 83 3d b9 d5 2b 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 be cd 00 00 48 89 04 24
[  807.687328] RSP: 002b:00007ffdbf595b58 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
[  807.695564] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f42e9870150
[  807.703083] RDX: 0000000000000000 RSI: 00007ffdbf595b80 RDI: 0000000000000003
[  807.710605] RBP: 00007ffdbf599d90 R08: 0000000000679bc0 R09: 000000000000000f
[  807.718127] R10: 00000000000005e7 R11: 0000000000000246 R12: 00007ffdbf599d88
[  807.725651] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000

[  807.735048] Allocated by task 2687:
[  807.738902]  kasan_kmalloc+0xa0/0xd0
[  807.742852]  __kmalloc+0x118/0x2d0
[  807.746615]  tcf_idr_create+0x44/0x320
[  807.750738]  tcf_nat_init+0x41e/0x530 [act_nat]
[  807.755638]  tcf_action_init_1+0x4e0/0x650
[  807.760104]  tcf_action_init+0x1ce/0x2d0
[  807.764395]  tcf_exts_validate+0x1d8/0x200
[  807.768861]  fl_change+0x55a/0x26b4 [cls_flower]
[  807.773845]  tc_new_tfilter+0x748/0xa20
[  807.778051]  rtnetlink_rcv_msg+0x56a/0x6d0
[  807.782517]  netlink_rcv_skb+0x18d/0x200
[  807.786804]  netlink_unicast+0x2d0/0x370
[  807.791095]  netlink_sendmsg+0x3b9/0x6a0
[  807.795387]  sock_sendmsg+0x6b/0x80
[  807.799240]  ___sys_sendmsg+0x4a1/0x520
[  807.803445]  __sys_sendmsg+0xd7/0x150
[  807.807473]  do_syscall_64+0x72/0x2c0
[  807.811506]  entry_SYSCALL_64_after_hwframe+0x49/0xbe

[  807.818776] Freed by task 2728:
[  807.822283]  __kasan_slab_free+0x122/0x180
[  807.826752]  kfree+0xf4/0x2f0
[  807.830080]  __tcf_action_put+0x5a/0xb0
[  807.834281]  tcf_action_put_many+0x46/0x70
[  807.838747]  tca_action_gd+0x232/0xc40
[  807.842862]  tc_ctl_action+0x215/0x230
[  807.846977]  rtnetlink_rcv_msg+0x56a/0x6d0
[  807.851444]  netlink_rcv_skb+0x18d/0x200
[  807.855731]  netlink_unicast+0x2d0/0x370
[  807.860021]  netlink_sendmsg+0x3b9/0x6a0
[  807.864312]  sock_sendmsg+0x6b/0x80
[  807.868166]  ___sys_sendmsg+0x4a1/0x520
[  807.872372]  __sys_sendmsg+0xd7/0x150
[  807.876401]  do_syscall_64+0x72/0x2c0
[  807.880431]  entry_SYSCALL_64_after_hwframe+0x49/0xbe

[  807.887704] The buggy address belongs to the object at ffff88033e636000
                which belongs to the cache kmalloc-256 of size 256
[  807.900909] The buggy address is located 0 bytes inside of
                256-byte region [ffff88033e636000, ffff88033e636100)
[  807.913155] The buggy address belongs to the page:
[  807.918322] page:ffffea000cf98d80 count:1 mapcount:0 mapping:ffff88036f80ee00 index:0x0 compound_mapcount: 0
[  807.928831] flags: 0x5fff8000008100(slab|head)
[  807.933647] raw: 005fff8000008100 ffffea000db44f00 0000000400000004 ffff88036f80ee00
[  807.942050] raw: 0000000000000000 0000000080190019 00000001ffffffff 0000000000000000
[  807.950456] page dumped because: kasan: bad access detected

[  807.958240] Memory state around the buggy address:
[  807.963405]  ffff88033e635f00: fc fc fc fc fb fb fb fb fb fb fb fc fc fc fc fb
[  807.971288]  ffff88033e635f80: fb fb fb fb fb fb fc fc fc fc fc fc fc fc fc fc
[  807.979166] >ffff88033e636000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  807.994882]                    ^
[  807.998477]  ffff88033e636080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
[  808.006352]  ffff88033e636100: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
[  808.014230] ==================================================================
[  808.022108] Disabling lock debugging due to kernel taint

Fixes: edfaf94fa705 ("net_sched: improve and refactor tcf_action_put_many()")
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 316c98bb87e4e..e12f8ef7baa43 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -1179,6 +1179,7 @@ static int tcf_action_delete(struct net *net, struct tc_action *actions[])
 		struct tcf_idrinfo *idrinfo = a->idrinfo;
 		u32 act_index = a->tcfa_index;
 
+		actions[i] = NULL;
 		if (tcf_action_put(a)) {
 			/* last reference, action was deleted concurrently */
 			module_put(ops->owner);
@@ -1190,7 +1191,6 @@ static int tcf_action_delete(struct net *net, struct tc_action *actions[])
 			if (ret < 0)
 				return ret;
 		}
-		actions[i] = NULL;
 	}
 	return 0;
 }

From bf68066fccb10fce6bbffdda24ee2ae314c9c5b2 Mon Sep 17 00:00:00 2001
From: Ivan Mikhaylov <ivan@de.ibm.com>
Date: Mon, 3 Sep 2018 10:26:28 +0300
Subject: [PATCH 172/257] net/ibm/emac: wrong emac_calc_base call was used by
 typo

__emac_calc_base_mr1 was used instead of __emac4_calc_base_mr1
by copy-paste mistake for emac4syn.

Fixes: 45d6e545505fd32edb812f085be7de45b6a5c0af ("net/ibm/emac: add 8192 rx/tx fifo size")
Signed-off-by: Ivan Mikhaylov <ivan@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ibm/emac/core.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 354c0982847b8..3726646863095 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -494,9 +494,6 @@ static u32 __emac_calc_base_mr1(struct emac_instance *dev, int tx_size, int rx_s
 	case 16384:
 		ret |= EMAC_MR1_RFS_16K;
 		break;
-	case 8192:
-		ret |= EMAC4_MR1_RFS_8K;
-		break;
 	case 4096:
 		ret |= EMAC_MR1_RFS_4K;
 		break;
@@ -537,6 +534,9 @@ static u32 __emac4_calc_base_mr1(struct emac_instance *dev, int tx_size, int rx_
 	case 16384:
 		ret |= EMAC4_MR1_RFS_16K;
 		break;
+	case 8192:
+		ret |= EMAC4_MR1_RFS_8K;
+		break;
 	case 4096:
 		ret |= EMAC4_MR1_RFS_4K;
 		break;

From af8a2b8ba7678b4695f9e854ba9abae1076beabe Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 3 Sep 2018 15:47:10 +0800
Subject: [PATCH 173/257] sctp: fix invalid reference to the index variable of
 the iterator

Now in sctp_apply_peer_addr_params(), if SPP_IPV6_FLOWLABEL flag is set
and trans is NULL, it would use trans as the index variable to traverse
transport_addr_list, then trans is set as the last transport of it.

Later, if SPP_DSCP flag is set, it would enter into the wrong branch as
trans is actually an invalid reference.

So fix it by using a new index variable to traverse transport_addr_list
for both SPP_DSCP and SPP_IPV6_FLOWLABEL flags process.

Fixes: 0b0dce7a36fb ("sctp: add spp_ipv6_flowlabel and spp_dscp for sctp_paddrparams")
Reported-by: Julia Lawall <julia.lawall@lip6.fr>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index aa76586a1a1c8..a0ccfa4b82205 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2663,14 +2663,15 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 					   SCTP_FLOWLABEL_VAL_MASK;
 			trans->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
 		} else if (asoc) {
-			list_for_each_entry(trans,
-					    &asoc->peer.transport_addr_list,
+			struct sctp_transport *t;
+
+			list_for_each_entry(t, &asoc->peer.transport_addr_list,
 					    transports) {
-				if (trans->ipaddr.sa.sa_family != AF_INET6)
+				if (t->ipaddr.sa.sa_family != AF_INET6)
 					continue;
-				trans->flowlabel = params->spp_ipv6_flowlabel &
-						   SCTP_FLOWLABEL_VAL_MASK;
-				trans->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
+				t->flowlabel = params->spp_ipv6_flowlabel &
+					       SCTP_FLOWLABEL_VAL_MASK;
+				t->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
 			}
 			asoc->flowlabel = params->spp_ipv6_flowlabel &
 					  SCTP_FLOWLABEL_VAL_MASK;
@@ -2687,12 +2688,13 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 			trans->dscp = params->spp_dscp & SCTP_DSCP_VAL_MASK;
 			trans->dscp |= SCTP_DSCP_SET_MASK;
 		} else if (asoc) {
-			list_for_each_entry(trans,
-					    &asoc->peer.transport_addr_list,
+			struct sctp_transport *t;
+
+			list_for_each_entry(t, &asoc->peer.transport_addr_list,
 					    transports) {
-				trans->dscp = params->spp_dscp &
-					      SCTP_DSCP_VAL_MASK;
-				trans->dscp |= SCTP_DSCP_SET_MASK;
+				t->dscp = params->spp_dscp &
+					  SCTP_DSCP_VAL_MASK;
+				t->dscp |= SCTP_DSCP_SET_MASK;
 			}
 			asoc->dscp = params->spp_dscp & SCTP_DSCP_VAL_MASK;
 			asoc->dscp |= SCTP_DSCP_SET_MASK;

From 741880e1f2f59b20125dc480765d2546cec66080 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 3 Sep 2018 15:47:11 +0800
Subject: [PATCH 174/257] sctp: not traverse asoc trans list if non-ipv6 trans
 exists for ipv6_flowlabel

When users set params.spp_address and get a trans, ipv6_flowlabel flag
should be applied into this trans. But even if this one is not an ipv6
trans, it should not go to apply it into all other transes of the asoc
but simply ignore it.

Fixes: 0b0dce7a36fb ("sctp: add spp_ipv6_flowlabel and spp_dscp for sctp_paddrparams")
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index a0ccfa4b82205..f73e9d38d5ba7 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2658,10 +2658,12 @@ static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params,
 	}
 
 	if (params->spp_flags & SPP_IPV6_FLOWLABEL) {
-		if (trans && trans->ipaddr.sa.sa_family == AF_INET6) {
-			trans->flowlabel = params->spp_ipv6_flowlabel &
-					   SCTP_FLOWLABEL_VAL_MASK;
-			trans->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
+		if (trans) {
+			if (trans->ipaddr.sa.sa_family == AF_INET6) {
+				trans->flowlabel = params->spp_ipv6_flowlabel &
+						   SCTP_FLOWLABEL_VAL_MASK;
+				trans->flowlabel |= SCTP_FLOWLABEL_SET_MASK;
+			}
 		} else if (asoc) {
 			struct sctp_transport *t;
 

From 6b95c3e9697254dab0c8eafc6ab9d5e10d2eca4e Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Mon, 3 Sep 2018 04:23:17 -0400
Subject: [PATCH 175/257] bnxt_en: Fix firmware signaled resource change logic
 in open.

When the driver detects that resources have changed during open, it
should reset the rx and tx rings to 0.  This will properly setup the
init sequence to initialize the default rings again.  We also need
to signal the RDMA driver to stop and clear its interrupts.  We then
call the RoCE driver to restart if a new set of default rings is
successfully reserved.

Fixes: 25e1acd6b92b ("bnxt_en: Notify firmware about IF state changes.")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 8bb1e38b1681a..6a1baf375ac3b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -6684,6 +6684,8 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
 		hw_resc->resv_rx_rings = 0;
 		hw_resc->resv_hw_ring_grps = 0;
 		hw_resc->resv_vnics = 0;
+		bp->tx_nr_rings = 0;
+		bp->rx_nr_rings = 0;
 	}
 	return rc;
 }
@@ -8769,20 +8771,25 @@ static int bnxt_init_dflt_ring_mode(struct bnxt *bp)
 	if (bp->tx_nr_rings)
 		return 0;
 
+	bnxt_ulp_irq_stop(bp);
+	bnxt_clear_int_mode(bp);
 	rc = bnxt_set_dflt_rings(bp, true);
 	if (rc) {
 		netdev_err(bp->dev, "Not enough rings available.\n");
-		return rc;
+		goto init_dflt_ring_err;
 	}
 	rc = bnxt_init_int_mode(bp);
 	if (rc)
-		return rc;
+		goto init_dflt_ring_err;
+
 	bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
 	if (bnxt_rfs_supported(bp) && bnxt_rfs_capable(bp)) {
 		bp->flags |= BNXT_FLAG_RFS;
 		bp->dev->features |= NETIF_F_NTUPLE;
 	}
-	return 0;
+init_dflt_ring_err:
+	bnxt_ulp_irq_restart(bp, rc);
+	return rc;
 }
 
 int bnxt_restore_pf_fw_resources(struct bnxt *bp)

From ad95c27bdb930105f3eea02621bda157caf2862d Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Mon, 3 Sep 2018 04:23:18 -0400
Subject: [PATCH 176/257] bnxt_en: Clean up unused functions.

Remove unused bnxt_subtract_ulp_resources().  Change
bnxt_get_max_func_irqs() to static since it is only locally used.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c     |  2 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt.h     |  1 -
 drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 15 ---------------
 drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h |  1 -
 4 files changed, 1 insertion(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 6a1baf375ac3b..6472ce447f879 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -5918,7 +5918,7 @@ void bnxt_set_max_func_cp_rings(struct bnxt *bp, unsigned int max)
 	bp->hw_resc.max_cp_rings = max;
 }
 
-unsigned int bnxt_get_max_func_irqs(struct bnxt *bp)
+static unsigned int bnxt_get_max_func_irqs(struct bnxt *bp)
 {
 	struct bnxt_hw_resc *hw_resc = &bp->hw_resc;
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index fefa011320e0f..c4c77b9fa77b1 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1482,7 +1482,6 @@ unsigned int bnxt_get_max_func_stat_ctxs(struct bnxt *bp);
 void bnxt_set_max_func_stat_ctxs(struct bnxt *bp, unsigned int max);
 unsigned int bnxt_get_max_func_cp_rings(struct bnxt *bp);
 void bnxt_set_max_func_cp_rings(struct bnxt *bp, unsigned int max);
-unsigned int bnxt_get_max_func_irqs(struct bnxt *bp);
 int bnxt_get_avail_msix(struct bnxt *bp, int num);
 int bnxt_reserve_rings(struct bnxt *bp);
 void bnxt_tx_disable(struct bnxt *bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index c37b2842f972c..deac73e8e0f74 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -220,21 +220,6 @@ int bnxt_get_ulp_msix_base(struct bnxt *bp)
 	return 0;
 }
 
-void bnxt_subtract_ulp_resources(struct bnxt *bp, int ulp_id)
-{
-	ASSERT_RTNL();
-	if (bnxt_ulp_registered(bp->edev, ulp_id)) {
-		struct bnxt_en_dev *edev = bp->edev;
-		unsigned int msix_req, max;
-
-		msix_req = edev->ulp_tbl[ulp_id].msix_requested;
-		max = bnxt_get_max_func_cp_rings(bp);
-		bnxt_set_max_func_cp_rings(bp, max - msix_req);
-		max = bnxt_get_max_func_stat_ctxs(bp);
-		bnxt_set_max_func_stat_ctxs(bp, max - 1);
-	}
-}
-
 static int bnxt_send_msg(struct bnxt_en_dev *edev, int ulp_id,
 			 struct bnxt_fw_msg *fw_msg)
 {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
index df48ac71729f5..d9bea37cd211f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
@@ -90,7 +90,6 @@ static inline bool bnxt_ulp_registered(struct bnxt_en_dev *edev, int ulp_id)
 
 int bnxt_get_ulp_msix_num(struct bnxt *bp);
 int bnxt_get_ulp_msix_base(struct bnxt *bp);
-void bnxt_subtract_ulp_resources(struct bnxt *bp, int ulp_id);
 void bnxt_ulp_stop(struct bnxt *bp);
 void bnxt_ulp_start(struct bnxt *bp);
 void bnxt_ulp_sriov_cfg(struct bnxt *bp, int num_vfs);

From 00fe9c326d2027f2437dea38ef0e82f9d02d94c0 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Mon, 3 Sep 2018 04:23:19 -0400
Subject: [PATCH 177/257] bnxt_en: Do not adjust max_cp_rings by the ones used
 by RDMA.

Currently, the driver adjusts the bp->hw_resc.max_cp_rings by the number
of MSIX vectors used by RDMA.  There is one code path in open that needs
to check the true max_cp_rings including any used by RDMA.  This code
is now checking for the reduced max_cp_rings which will fail when the
number of cp rings is very small.

To fix this in a clean way, we don't adjust max_cp_rings anymore.
Instead, we add a helper bnxt_get_max_func_cp_rings_for_en() to get the
reduced max_cp_rings when appropriate.

Fixes: ec86f14ea506 ("bnxt_en: Add ULP calls to stop and restart IRQs.")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c       | 7 ++++---
 drivers/net/ethernet/broadcom/bnxt/bnxt.h       | 2 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c | 7 ++++---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c   | 5 -----
 4 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 6472ce447f879..cecbb1d1f587f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -5913,9 +5913,9 @@ unsigned int bnxt_get_max_func_cp_rings(struct bnxt *bp)
 	return bp->hw_resc.max_cp_rings;
 }
 
-void bnxt_set_max_func_cp_rings(struct bnxt *bp, unsigned int max)
+unsigned int bnxt_get_max_func_cp_rings_for_en(struct bnxt *bp)
 {
-	bp->hw_resc.max_cp_rings = max;
+	return bp->hw_resc.max_cp_rings - bnxt_get_ulp_msix_num(bp);
 }
 
 static unsigned int bnxt_get_max_func_irqs(struct bnxt *bp)
@@ -8631,7 +8631,8 @@ static void _bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx,
 
 	*max_tx = hw_resc->max_tx_rings;
 	*max_rx = hw_resc->max_rx_rings;
-	*max_cp = min_t(int, hw_resc->max_irqs, hw_resc->max_cp_rings);
+	*max_cp = min_t(int, bnxt_get_max_func_cp_rings_for_en(bp),
+			hw_resc->max_irqs);
 	*max_cp = min_t(int, *max_cp, hw_resc->max_stat_ctxs);
 	max_ring_grps = hw_resc->max_hw_ring_grps;
 	if (BNXT_CHIP_TYPE_NITRO_A0(bp) && BNXT_PF(bp)) {
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index c4c77b9fa77b1..bde384630a75f 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1481,7 +1481,7 @@ int bnxt_hwrm_set_coal(struct bnxt *);
 unsigned int bnxt_get_max_func_stat_ctxs(struct bnxt *bp);
 void bnxt_set_max_func_stat_ctxs(struct bnxt *bp, unsigned int max);
 unsigned int bnxt_get_max_func_cp_rings(struct bnxt *bp);
-void bnxt_set_max_func_cp_rings(struct bnxt *bp, unsigned int max);
+unsigned int bnxt_get_max_func_cp_rings_for_en(struct bnxt *bp);
 int bnxt_get_avail_msix(struct bnxt *bp, int num);
 int bnxt_reserve_rings(struct bnxt *bp);
 void bnxt_tx_disable(struct bnxt *bp);
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index 6d583bcd2a81b..fcd085a9853a9 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -451,7 +451,7 @@ static int bnxt_hwrm_func_vf_resc_cfg(struct bnxt *bp, int num_vfs)
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_VF_RESOURCE_CFG, -1, -1);
 
-	vf_cp_rings = hw_resc->max_cp_rings - bp->cp_nr_rings;
+	vf_cp_rings = bnxt_get_max_func_cp_rings_for_en(bp) - bp->cp_nr_rings;
 	vf_stat_ctx = hw_resc->max_stat_ctxs - bp->num_stat_ctxs;
 	if (bp->flags & BNXT_FLAG_AGG_RINGS)
 		vf_rx_rings = hw_resc->max_rx_rings - bp->rx_nr_rings * 2;
@@ -549,7 +549,8 @@ static int bnxt_hwrm_func_cfg(struct bnxt *bp, int num_vfs)
 	max_stat_ctxs = hw_resc->max_stat_ctxs;
 
 	/* Remaining rings are distributed equally amongs VF's for now */
-	vf_cp_rings = (hw_resc->max_cp_rings - bp->cp_nr_rings) / num_vfs;
+	vf_cp_rings = (bnxt_get_max_func_cp_rings_for_en(bp) -
+		       bp->cp_nr_rings) / num_vfs;
 	vf_stat_ctx = (max_stat_ctxs - bp->num_stat_ctxs) / num_vfs;
 	if (bp->flags & BNXT_FLAG_AGG_RINGS)
 		vf_rx_rings = (hw_resc->max_rx_rings - bp->rx_nr_rings * 2) /
@@ -643,7 +644,7 @@ static int bnxt_sriov_enable(struct bnxt *bp, int *num_vfs)
 	 */
 	vfs_supported = *num_vfs;
 
-	avail_cp = hw_resc->max_cp_rings - bp->cp_nr_rings;
+	avail_cp = bnxt_get_max_func_cp_rings_for_en(bp) - bp->cp_nr_rings;
 	avail_stat = hw_resc->max_stat_ctxs - bp->num_stat_ctxs;
 	avail_cp = min_t(int, avail_cp, avail_stat);
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index deac73e8e0f74..beee61292d5e5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -169,7 +169,6 @@ static int bnxt_req_msix_vecs(struct bnxt_en_dev *edev, int ulp_id,
 		edev->ulp_tbl[ulp_id].msix_requested = avail_msix;
 	}
 	bnxt_fill_msix_vecs(bp, ent);
-	bnxt_set_max_func_cp_rings(bp, max_cp_rings - avail_msix);
 	edev->flags |= BNXT_EN_FLAG_MSIX_REQUESTED;
 	return avail_msix;
 }
@@ -178,7 +177,6 @@ static int bnxt_free_msix_vecs(struct bnxt_en_dev *edev, int ulp_id)
 {
 	struct net_device *dev = edev->net;
 	struct bnxt *bp = netdev_priv(dev);
-	int max_cp_rings, msix_requested;
 
 	ASSERT_RTNL();
 	if (ulp_id != BNXT_ROCE_ULP)
@@ -187,9 +185,6 @@ static int bnxt_free_msix_vecs(struct bnxt_en_dev *edev, int ulp_id)
 	if (!(edev->flags & BNXT_EN_FLAG_MSIX_REQUESTED))
 		return 0;
 
-	max_cp_rings = bnxt_get_max_func_cp_rings(bp);
-	msix_requested = edev->ulp_tbl[ulp_id].msix_requested;
-	bnxt_set_max_func_cp_rings(bp, max_cp_rings + msix_requested);
 	edev->ulp_tbl[ulp_id].msix_requested = 0;
 	edev->flags &= ~BNXT_EN_FLAG_MSIX_REQUESTED;
 	if (netif_running(dev)) {

From 9cc1bf3928b31e515ed15477b3c7eb653d0b3b42 Mon Sep 17 00:00:00 2001
From: Zhenbo Gao <zhenbo.gao@windriver.com>
Date: Mon, 3 Sep 2018 16:36:45 +0800
Subject: [PATCH 178/257] tipc: correct spelling errors for struct
 tipc_bc_base's comment

Trivial fix for two spelling mistakes.

Signed-off-by: Zhenbo Gao <zhenbo.gao@windriver.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 9ee6cfea56dd0..d8026543bf4ce 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -51,12 +51,12 @@ const char tipc_bclink_name[] = "broadcast-link";
  * struct tipc_bc_base - base structure for keeping broadcast send state
  * @link: broadcast send link structure
  * @inputq: data input queue; will only carry SOCK_WAKEUP messages
- * @dest: array keeping number of reachable destinations per bearer
+ * @dests: array keeping number of reachable destinations per bearer
  * @primary_bearer: a bearer having links to all broadcast destinations, if any
  * @bcast_support: indicates if primary bearer, if any, supports broadcast
  * @rcast_support: indicates if all peer nodes support replicast
  * @rc_ratio: dest count as percentage of cluster size where send method changes
- * @bc_threshold: calculated drom rc_ratio; if dests > threshold use broadcast
+ * @bc_threshold: calculated from rc_ratio; if dests > threshold use broadcast
  */
 struct tipc_bc_base {
 	struct tipc_link *link;

From a484ef3442d2f05fa59edf4f6d14a8169d1b94a6 Mon Sep 17 00:00:00 2001
From: Zhenbo Gao <zhenbo.gao@windriver.com>
Date: Mon, 3 Sep 2018 16:36:46 +0800
Subject: [PATCH 179/257] tipc: correct spelling errors for
 tipc_topsrv_queue_evt() comments

tipc_conn_queue_evt -> tipc_topsrv_queue_evt
tipc_send_work -> tipc_conn_send_work
tipc_send_to_sock -> tipc_conn_send_to_sock

Signed-off-by: Zhenbo Gao <zhenbo.gao@windriver.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/topsrv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index c8e34ef22c302..2627b5d812e90 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -313,8 +313,8 @@ static void tipc_conn_send_work(struct work_struct *work)
 	conn_put(con);
 }
 
-/* tipc_conn_queue_evt() - interrupt level call from a subscription instance
- * The queued work is launched into tipc_send_work()->tipc_send_to_sock()
+/* tipc_topsrv_queue_evt() - interrupt level call from a subscription instance
+ * The queued work is launched into tipc_conn_send_work()->tipc_conn_send_to_sock()
  */
 void tipc_topsrv_queue_evt(struct net *net, int conid,
 			   u32 event, struct tipc_event *evt)

From 1dfdf99106668679b0de5a62fd4f42c1a11c9445 Mon Sep 17 00:00:00 2001
From: Greentime Hu <greentime@andestech.com>
Date: Wed, 18 Jul 2018 09:54:55 +0800
Subject: [PATCH 180/257] nds32: fix logic for module

This bug is report by Dan Carpenter. We shall use ~loc_mask instead of
!loc_mask because we need to and(&) the bits of ~loc_mask.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Fixes: c9a4a8da6baa ("nds32: Loadable modules")
Signed-off-by: Greentime Hu <greentime@andestech.com>
---
 arch/nds32/kernel/module.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/nds32/kernel/module.c b/arch/nds32/kernel/module.c
index 4167283d8293f..1e31829cbc2a7 100644
--- a/arch/nds32/kernel/module.c
+++ b/arch/nds32/kernel/module.c
@@ -40,7 +40,7 @@ void do_reloc16(unsigned int val, unsigned int *loc, unsigned int val_mask,
 
 	tmp2 = tmp & loc_mask;
 	if (partial_in_place) {
-		tmp &= (!loc_mask);
+		tmp &= (~loc_mask);
 		tmp =
 		    tmp2 | ((tmp + ((val & val_mask) >> val_shift)) & val_mask);
 	} else {
@@ -70,7 +70,7 @@ void do_reloc32(unsigned int val, unsigned int *loc, unsigned int val_mask,
 
 	tmp2 = tmp & loc_mask;
 	if (partial_in_place) {
-		tmp &= (!loc_mask);
+		tmp &= (~loc_mask);
 		tmp =
 		    tmp2 | ((tmp + ((val & val_mask) >> val_shift)) & val_mask);
 	} else {

From 1944a50859ec2b570b42b459ac25d607fc7c31f0 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Tue, 7 Aug 2018 12:03:13 +0800
Subject: [PATCH 181/257] nds32: add NULL entry to the end of_device_id array

Make sure of_device_id tables are NULL terminated.
Found by coccinelle spatch "misc/of_table.cocci"

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Acked-by: Greentime Hu <greentime@andestech.com>
Signed-off-by: Greentime Hu <greentime@andestech.com>
---
 arch/nds32/kernel/atl2c.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/nds32/kernel/atl2c.c b/arch/nds32/kernel/atl2c.c
index 0c6d031a1c4a9..0c5386e72098e 100644
--- a/arch/nds32/kernel/atl2c.c
+++ b/arch/nds32/kernel/atl2c.c
@@ -9,7 +9,8 @@
 
 void __iomem *atl2c_base;
 static const struct of_device_id atl2c_ids[] __initconst = {
-	{.compatible = "andestech,atl2c",}
+	{.compatible = "andestech,atl2c",},
+	{}
 };
 
 static int __init atl2c_of_init(void)

From c17df7960534357fb74074c2f514c831d4a9cf5a Mon Sep 17 00:00:00 2001
From: Zong Li <zong@andestech.com>
Date: Mon, 13 Aug 2018 13:28:23 +0800
Subject: [PATCH 182/257] nds32: Fix empty call trace

The compiler predefined macro 'NDS32_ABI_2' had been removed, it should
use the '__NDS32_ABI_2' here.

Signed-off-by: Zong Li <zong@andestech.com>
Acked-by: Greentime Hu <greentime@andestech.com>
Signed-off-by: Greentime Hu <greentime@andestech.com>
---
 arch/nds32/kernel/traps.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c
index a6205fd4db521..f0e974347c26e 100644
--- a/arch/nds32/kernel/traps.c
+++ b/arch/nds32/kernel/traps.c
@@ -137,7 +137,7 @@ static void __dump(struct task_struct *tsk, unsigned long *base_reg)
 		       !((unsigned long)base_reg & 0x3) &&
 		       ((unsigned long)base_reg >= TASK_SIZE)) {
 			unsigned long next_fp;
-#if !defined(NDS32_ABI_2)
+#if !defined(__NDS32_ABI_2)
 			ret_addr = base_reg[0];
 			next_fp = base_reg[1];
 #else

From 6cce95a6c7d288ac2126eee4b95df448b9015b84 Mon Sep 17 00:00:00 2001
From: Zong Li <zong@andestech.com>
Date: Mon, 13 Aug 2018 14:48:49 +0800
Subject: [PATCH 183/257] nds32: Fix get_user/put_user macro expand pointer
 problem

The pointer argument of macro need to be taken out once first, and then
use the new pointer in the macro body.

In kernel/trace/trace.c, get_user(ch, ubuf++) causes the unexpected
increment after expand the macro.

Signed-off-by: Zong Li <zong@andestech.com>
Acked-by: Greentime Hu <greentime@andestech.com>
Signed-off-by: Greentime Hu <greentime@andestech.com>
---
 arch/nds32/include/asm/uaccess.h | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/arch/nds32/include/asm/uaccess.h b/arch/nds32/include/asm/uaccess.h
index 18a009f3804d5..3f771e0595e8f 100644
--- a/arch/nds32/include/asm/uaccess.h
+++ b/arch/nds32/include/asm/uaccess.h
@@ -78,8 +78,9 @@ static inline void set_fs(mm_segment_t fs)
 #define get_user(x,p)							\
 ({									\
 	long __e = -EFAULT;						\
-	if(likely(access_ok(VERIFY_READ,  p, sizeof(*p)))) {		\
-		__e = __get_user(x,p);					\
+	const __typeof__(*(p)) __user *__p = (p);			\
+	if(likely(access_ok(VERIFY_READ, __p, sizeof(*__p)))) {		\
+		__e = __get_user(x, __p);				\
 	} else								\
 		x = 0;							\
 	__e;								\
@@ -99,10 +100,10 @@ static inline void set_fs(mm_segment_t fs)
 
 #define __get_user_err(x,ptr,err)					\
 do {									\
-	unsigned long __gu_addr = (unsigned long)(ptr);			\
+	const __typeof__(*(ptr)) __user *__gu_addr = (ptr);		\
 	unsigned long __gu_val;						\
-	__chk_user_ptr(ptr);						\
-	switch (sizeof(*(ptr))) {					\
+	__chk_user_ptr(__gu_addr);					\
+	switch (sizeof(*(__gu_addr))) {					\
 	case 1:								\
 		__get_user_asm("lbi",__gu_val,__gu_addr,err);		\
 		break;							\
@@ -119,7 +120,7 @@ do {									\
 		BUILD_BUG(); 						\
 		break;							\
 	}								\
-	(x) = (__typeof__(*(ptr)))__gu_val;				\
+	(x) = (__typeof__(*(__gu_addr)))__gu_val;			\
 } while (0)
 
 #define __get_user_asm(inst,x,addr,err)					\
@@ -169,8 +170,9 @@ do {									\
 #define put_user(x,p)							\
 ({									\
 	long __e = -EFAULT;						\
-	if(likely(access_ok(VERIFY_WRITE,  p, sizeof(*p)))) {		\
-		__e = __put_user(x,p);					\
+	__typeof__(*(p)) __user *__p = (p);				\
+	if(likely(access_ok(VERIFY_WRITE, __p, sizeof(*__p)))) {	\
+		__e = __put_user(x, __p);				\
 	}								\
 	__e;								\
 })
@@ -189,10 +191,10 @@ do {									\
 
 #define __put_user_err(x,ptr,err)					\
 do {									\
-	unsigned long __pu_addr = (unsigned long)(ptr);			\
-	__typeof__(*(ptr)) __pu_val = (x);				\
-	__chk_user_ptr(ptr);						\
-	switch (sizeof(*(ptr))) {					\
+	__typeof__(*(ptr)) __user *__pu_addr = (ptr);			\
+	__typeof__(*(__pu_addr)) __pu_val = (x);			\
+	__chk_user_ptr(__pu_addr);					\
+	switch (sizeof(*(__pu_addr))) {					\
 	case 1:								\
 		__put_user_asm("sbi",__pu_val,__pu_addr,err);		\
 		break;							\

From 7ef39548df8cdb6406e3b4b7255e7f8cd3fe3b13 Mon Sep 17 00:00:00 2001
From: Zong Li <zong@andestech.com>
Date: Mon, 13 Aug 2018 15:08:52 +0800
Subject: [PATCH 184/257] nds32: Clean up the coding style

1. Adjust indentation.
2. Unify argument name of each macro.
3. Add space after comma in parameters list.
4. Add space after 'if' keyword.
5. Replace space by tab.
6. Change asm volatile to __asm__ __volatile__

Signed-off-by: Zong Li <zong@andestech.com>
Acked-by: Greentime Hu <greentime@andestech.com>
Signed-off-by: Greentime Hu <greentime@andestech.com>
---
 arch/nds32/include/asm/uaccess.h | 201 ++++++++++++++++---------------
 1 file changed, 103 insertions(+), 98 deletions(-)

diff --git a/arch/nds32/include/asm/uaccess.h b/arch/nds32/include/asm/uaccess.h
index 3f771e0595e8f..e1a2b5b749b9e 100644
--- a/arch/nds32/include/asm/uaccess.h
+++ b/arch/nds32/include/asm/uaccess.h
@@ -38,7 +38,7 @@ struct exception_table_entry {
 extern int fixup_exception(struct pt_regs *regs);
 
 #define KERNEL_DS 	((mm_segment_t) { ~0UL })
-#define USER_DS     	((mm_segment_t) {TASK_SIZE - 1})
+#define USER_DS		((mm_segment_t) {TASK_SIZE - 1})
 
 #define get_ds()	(KERNEL_DS)
 #define get_fs()	(current_thread_info()->addr_limit)
@@ -49,11 +49,11 @@ static inline void set_fs(mm_segment_t fs)
 	current_thread_info()->addr_limit = fs;
 }
 
-#define segment_eq(a, b)    ((a) == (b))
+#define segment_eq(a, b)	((a) == (b))
 
 #define __range_ok(addr, size) (size <= get_fs() && addr <= (get_fs() -size))
 
-#define access_ok(type, addr, size)                 \
+#define access_ok(type, addr, size)	\
 	__range_ok((unsigned long)addr, (unsigned long)size)
 /*
  * Single-value transfer routines.  They automatically use the right
@@ -75,46 +75,48 @@ static inline void set_fs(mm_segment_t fs)
  * versions are void (ie, don't return a value as such).
  */
 
-#define get_user(x,p)							\
+#define get_user(x, ptr)						\
 ({									\
 	long __e = -EFAULT;						\
-	const __typeof__(*(p)) __user *__p = (p);			\
-	if(likely(access_ok(VERIFY_READ, __p, sizeof(*__p)))) {		\
+	const __typeof__(*(ptr)) __user *__p = (ptr);			\
+	if (likely(access_ok(VERIFY_READ, __p, sizeof(*__p)))) {	\
 		__e = __get_user(x, __p);				\
-	} else								\
-		x = 0;							\
+	} else {							\
+		(x) = 0;						\
+	}								\
 	__e;								\
 })
-#define __get_user(x,ptr)						\
+
+#define __get_user(x, ptr)						\
 ({									\
 	long __gu_err = 0;						\
-	__get_user_err((x),(ptr),__gu_err);				\
+	__get_user_err((x), (ptr), __gu_err);				\
 	__gu_err;							\
 })
 
-#define __get_user_error(x,ptr,err)					\
+#define __get_user_error(x, ptr, err)					\
 ({									\
-	__get_user_err((x),(ptr),err);					\
-	(void) 0;							\
+	__get_user_err((x), (ptr), err);				\
+	(void)0;							\
 })
 
-#define __get_user_err(x,ptr,err)					\
+#define __get_user_err(x, ptr, err)					\
 do {									\
 	const __typeof__(*(ptr)) __user *__gu_addr = (ptr);		\
 	unsigned long __gu_val;						\
 	__chk_user_ptr(__gu_addr);					\
 	switch (sizeof(*(__gu_addr))) {					\
 	case 1:								\
-		__get_user_asm("lbi",__gu_val,__gu_addr,err);		\
+		__get_user_asm("lbi", __gu_val, __gu_addr, (err));	\
 		break;							\
 	case 2:								\
-		__get_user_asm("lhi",__gu_val,__gu_addr,err);		\
+		__get_user_asm("lhi", __gu_val, __gu_addr, (err));	\
 		break;							\
 	case 4:								\
-		__get_user_asm("lwi",__gu_val,__gu_addr,err);		\
+		__get_user_asm("lwi", __gu_val, __gu_addr, (err));	\
 		break;							\
 	case 8:								\
-		__get_user_asm_dword(__gu_val,__gu_addr,err);		\
+		__get_user_asm_dword(__gu_val, __gu_addr, (err));	\
 		break;							\
 	default:							\
 		BUILD_BUG(); 						\
@@ -123,23 +125,23 @@ do {									\
 	(x) = (__typeof__(*(__gu_addr)))__gu_val;			\
 } while (0)
 
-#define __get_user_asm(inst,x,addr,err)					\
-	asm volatile(							\
-	"1:	"inst"	%1,[%2]\n"					\
-	"2:\n"								\
-	"	.section .fixup,\"ax\"\n"				\
-	"	.align	2\n"						\
-	"3:	move %0, %3\n"						\
-	"	move %1, #0\n"						\
-	"	b	2b\n"						\
-	"	.previous\n"						\
-	"	.section __ex_table,\"a\"\n"				\
-	"	.align	3\n"						\
-	"	.long	1b, 3b\n"					\
-	"	.previous"						\
-	: "+r" (err), "=&r" (x)						\
-	: "r" (addr), "i" (-EFAULT)					\
-	: "cc")
+#define __get_user_asm(inst, x, addr, err)				\
+	__asm__ __volatile__ (						\
+		"1:	"inst"	%1,[%2]\n"				\
+		"2:\n"							\
+		"	.section .fixup,\"ax\"\n"			\
+		"	.align	2\n"					\
+		"3:	move %0, %3\n"					\
+		"	move %1, #0\n"					\
+		"	b	2b\n"					\
+		"	.previous\n"					\
+		"	.section __ex_table,\"a\"\n"			\
+		"	.align	3\n"					\
+		"	.long	1b, 3b\n"				\
+		"	.previous"					\
+		: "+r" (err), "=&r" (x)					\
+		: "r" (addr), "i" (-EFAULT)				\
+		: "cc")
 
 #ifdef __NDS32_EB__
 #define __gu_reg_oper0 "%H1"
@@ -150,62 +152,64 @@ do {									\
 #endif
 
 #define __get_user_asm_dword(x, addr, err) 				\
-	asm volatile(			    				\
-	"\n1:\tlwi " __gu_reg_oper0 ",[%2]\n"				\
-	"\n2:\tlwi " __gu_reg_oper1 ",[%2+4]\n"				\
-	"3:\n"								\
-	"	.section .fixup,\"ax\"\n"				\
-	"	.align	2\n"						\
-	"4:	move	%0, %3\n"					\
-	"	b	3b\n"						\
-	"	.previous\n"						\
-	"	.section __ex_table,\"a\"\n"				\
-	"	.align	3\n"						\
-	"	.long	1b, 4b\n"					\
-	"	.long	2b, 4b\n"					\
-	"	.previous"						\
-	: "+r"(err), "=&r"(x)                				\
-	: "r"(addr), "i"(-EFAULT) 					\
-	: "cc")
-#define put_user(x,p)							\
+	__asm__ __volatile__ (						\
+		"\n1:\tlwi " __gu_reg_oper0 ",[%2]\n"			\
+		"\n2:\tlwi " __gu_reg_oper1 ",[%2+4]\n"			\
+		"3:\n"							\
+		"	.section .fixup,\"ax\"\n"			\
+		"	.align	2\n"					\
+		"4:	move	%0, %3\n"				\
+		"	b	3b\n"					\
+		"	.previous\n"					\
+		"	.section __ex_table,\"a\"\n"			\
+		"	.align	3\n"					\
+		"	.long	1b, 4b\n"				\
+		"	.long	2b, 4b\n"				\
+		"	.previous"					\
+		: "+r"(err), "=&r"(x)					\
+		: "r"(addr), "i"(-EFAULT)				\
+		: "cc")
+
+#define put_user(x, ptr)						\
 ({									\
 	long __e = -EFAULT;						\
-	__typeof__(*(p)) __user *__p = (p);				\
-	if(likely(access_ok(VERIFY_WRITE, __p, sizeof(*__p)))) {	\
+	__typeof__(*(ptr)) __user *__p = (ptr);				\
+	if (likely(access_ok(VERIFY_WRITE, __p, sizeof(*__p)))) {	\
 		__e = __put_user(x, __p);				\
 	}								\
 	__e;								\
 })
-#define __put_user(x,ptr)						\
+
+#define __put_user(x, ptr)						\
 ({									\
 	long __pu_err = 0;						\
-	__put_user_err((x),(ptr),__pu_err);				\
+	__put_user_err((x), (ptr), __pu_err);				\
 	__pu_err;							\
 })
 
-#define __put_user_error(x,ptr,err)					\
+#define __put_user_error(x, ptr, err)					\
 ({									\
-	__put_user_err((x),(ptr),err);					\
-	(void) 0;							\
+	__put_user_err((x), (ptr), err);				\
+	(void)0;							\
 })
 
-#define __put_user_err(x,ptr,err)					\
+#define __put_user_err(x, ptr, err)					\
 do {									\
 	__typeof__(*(ptr)) __user *__pu_addr = (ptr);			\
 	__typeof__(*(__pu_addr)) __pu_val = (x);			\
 	__chk_user_ptr(__pu_addr);					\
 	switch (sizeof(*(__pu_addr))) {					\
 	case 1:								\
-		__put_user_asm("sbi",__pu_val,__pu_addr,err);		\
+		__put_user_asm("sbi", __pu_val, __pu_addr, (err));	\
 		break;							\
 	case 2: 							\
-		__put_user_asm("shi",__pu_val,__pu_addr,err);		\
+		__put_user_asm("shi", __pu_val, __pu_addr, (err));	\
 		break;							\
 	case 4: 							\
-		__put_user_asm("swi",__pu_val,__pu_addr,err);		\
+		__put_user_asm("swi", __pu_val, __pu_addr, (err));	\
 		break;							\
 	case 8:								\
-		__put_user_asm_dword(__pu_val,__pu_addr,err);		\
+		__put_user_asm_dword(__pu_val, __pu_addr, (err));	\
 		break;							\
 	default:							\
 		BUILD_BUG(); 						\
@@ -213,22 +217,22 @@ do {									\
 	}								\
 } while (0)
 
-#define __put_user_asm(inst,x,addr,err)					\
-	asm volatile(							\
-	"1:	"inst"	%1,[%2]\n"					\
-	"2:\n"								\
-	"	.section .fixup,\"ax\"\n"				\
-	"	.align	2\n"						\
-	"3:	move	%0, %3\n"					\
-	"	b	2b\n"						\
-	"	.previous\n"						\
-	"	.section __ex_table,\"a\"\n"				\
-	"	.align	3\n"						\
-	"	.long	1b, 3b\n"					\
-	"	.previous"						\
-	: "+r" (err)							\
-	: "r" (x), "r" (addr), "i" (-EFAULT)				\
-	: "cc")
+#define __put_user_asm(inst, x, addr, err)				\
+	__asm__ __volatile__ (						\
+		"1:	"inst"	%1,[%2]\n"				\
+		"2:\n"							\
+		"	.section .fixup,\"ax\"\n"			\
+		"	.align	2\n"					\
+		"3:	move	%0, %3\n"				\
+		"	b	2b\n"					\
+		"	.previous\n"					\
+		"	.section __ex_table,\"a\"\n"			\
+		"	.align	3\n"					\
+		"	.long	1b, 3b\n"				\
+		"	.previous"					\
+		: "+r" (err)						\
+		: "r" (x), "r" (addr), "i" (-EFAULT)			\
+		: "cc")
 
 #ifdef __NDS32_EB__
 #define __pu_reg_oper0 "%H2"
@@ -239,23 +243,24 @@ do {									\
 #endif
 
 #define __put_user_asm_dword(x, addr, err) 				\
-	asm volatile(			    				\
-	"\n1:\tswi " __pu_reg_oper0 ",[%1]\n"				\
-	"\n2:\tswi " __pu_reg_oper1 ",[%1+4]\n"				\
-	"3:\n"								\
-	"	.section .fixup,\"ax\"\n"				\
-	"	.align	2\n"						\
-	"4:	move	%0, %3\n"					\
-	"	b	3b\n"						\
-	"	.previous\n"						\
-	"	.section __ex_table,\"a\"\n"				\
-	"	.align	3\n"						\
-	"	.long	1b, 4b\n"					\
-	"	.long	2b, 4b\n"					\
-	"	.previous"						\
-	: "+r"(err)                					\
-	: "r"(addr), "r"(x), "i"(-EFAULT) 				\
-	: "cc")
+	__asm__ __volatile__ (						\
+		"\n1:\tswi " __pu_reg_oper0 ",[%1]\n"			\
+		"\n2:\tswi " __pu_reg_oper1 ",[%1+4]\n"			\
+		"3:\n"							\
+		"	.section .fixup,\"ax\"\n"			\
+		"	.align	2\n"					\
+		"4:	move	%0, %3\n"				\
+		"	b	3b\n"					\
+		"	.previous\n"					\
+		"	.section __ex_table,\"a\"\n"			\
+		"	.align	3\n"					\
+		"	.long	1b, 4b\n"				\
+		"	.long	2b, 4b\n"				\
+		"	.previous"					\
+		: "+r"(err)						\
+		: "r"(addr), "r"(x), "i"(-EFAULT)			\
+		: "cc")
+
 extern unsigned long __arch_clear_user(void __user * addr, unsigned long n);
 extern long strncpy_from_user(char *dest, const char __user * src, long count);
 extern __must_check long strlen_user(const char __user * str);

From 487913ab18c215b06611c4c91c7e905fc0960eb8 Mon Sep 17 00:00:00 2001
From: Zong Li <zong@andestech.com>
Date: Mon, 13 Aug 2018 16:02:53 +0800
Subject: [PATCH 185/257] nds32: Extract the checking and getting pointer to a
 macro

Signed-off-by: Zong Li <zong@andestech.com>
Acked-by: Greentime Hu <greentime@andestech.com>
Signed-off-by: Greentime Hu <greentime@andestech.com>
---
 arch/nds32/include/asm/uaccess.h | 80 ++++++++++++++++----------------
 1 file changed, 41 insertions(+), 39 deletions(-)

diff --git a/arch/nds32/include/asm/uaccess.h b/arch/nds32/include/asm/uaccess.h
index e1a2b5b749b9e..362a32d9bd168 100644
--- a/arch/nds32/include/asm/uaccess.h
+++ b/arch/nds32/include/asm/uaccess.h
@@ -75,54 +75,54 @@ static inline void set_fs(mm_segment_t fs)
  * versions are void (ie, don't return a value as such).
  */
 
-#define get_user(x, ptr)						\
-({									\
-	long __e = -EFAULT;						\
-	const __typeof__(*(ptr)) __user *__p = (ptr);			\
-	if (likely(access_ok(VERIFY_READ, __p, sizeof(*__p)))) {	\
-		__e = __get_user(x, __p);				\
-	} else {							\
-		(x) = 0;						\
-	}								\
-	__e;								\
-})
+#define get_user	__get_user					\
 
 #define __get_user(x, ptr)						\
 ({									\
 	long __gu_err = 0;						\
-	__get_user_err((x), (ptr), __gu_err);				\
+	__get_user_check((x), (ptr), __gu_err);				\
 	__gu_err;							\
 })
 
 #define __get_user_error(x, ptr, err)					\
 ({									\
-	__get_user_err((x), (ptr), err);				\
+	__get_user_check((x), (ptr), (err));				\
 	(void)0;							\
 })
 
+#define __get_user_check(x, ptr, err)					\
+({									\
+	const __typeof__(*(ptr)) __user *__p = (ptr);			\
+	might_fault();							\
+	if (access_ok(VERIFY_READ, __p, sizeof(*__p))) {		\
+		__get_user_err((x), __p, (err));			\
+	} else {							\
+		(x) = 0; (err) = -EFAULT;				\
+	}								\
+})
+
 #define __get_user_err(x, ptr, err)					\
 do {									\
-	const __typeof__(*(ptr)) __user *__gu_addr = (ptr);		\
 	unsigned long __gu_val;						\
-	__chk_user_ptr(__gu_addr);					\
-	switch (sizeof(*(__gu_addr))) {					\
+	__chk_user_ptr(ptr);						\
+	switch (sizeof(*(ptr))) {					\
 	case 1:								\
-		__get_user_asm("lbi", __gu_val, __gu_addr, (err));	\
+		__get_user_asm("lbi", __gu_val, (ptr), (err));		\
 		break;							\
 	case 2:								\
-		__get_user_asm("lhi", __gu_val, __gu_addr, (err));	\
+		__get_user_asm("lhi", __gu_val, (ptr), (err));		\
 		break;							\
 	case 4:								\
-		__get_user_asm("lwi", __gu_val, __gu_addr, (err));	\
+		__get_user_asm("lwi", __gu_val, (ptr), (err));		\
 		break;							\
 	case 8:								\
-		__get_user_asm_dword(__gu_val, __gu_addr, (err));	\
+		__get_user_asm_dword(__gu_val, (ptr), (err));		\
 		break;							\
 	default:							\
 		BUILD_BUG(); 						\
 		break;							\
 	}								\
-	(x) = (__typeof__(*(__gu_addr)))__gu_val;			\
+	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
 } while (0)
 
 #define __get_user_asm(inst, x, addr, err)				\
@@ -170,15 +170,7 @@ do {									\
 		: "r"(addr), "i"(-EFAULT)				\
 		: "cc")
 
-#define put_user(x, ptr)						\
-({									\
-	long __e = -EFAULT;						\
-	__typeof__(*(ptr)) __user *__p = (ptr);				\
-	if (likely(access_ok(VERIFY_WRITE, __p, sizeof(*__p)))) {	\
-		__e = __put_user(x, __p);				\
-	}								\
-	__e;								\
-})
+#define put_user	__put_user					\
 
 #define __put_user(x, ptr)						\
 ({									\
@@ -189,27 +181,37 @@ do {									\
 
 #define __put_user_error(x, ptr, err)					\
 ({									\
-	__put_user_err((x), (ptr), err);				\
+	__put_user_err((x), (ptr), (err));				\
 	(void)0;							\
 })
 
+#define __put_user_check(x, ptr, err)					\
+({									\
+	__typeof__(*(ptr)) __user *__p = (ptr);				\
+	might_fault();							\
+	if (access_ok(VERIFY_WRITE, __p, sizeof(*__p))) {		\
+		__put_user_err((x), __p, (err));			\
+	} else	{							\
+		(err) = -EFAULT;					\
+	}								\
+})
+
 #define __put_user_err(x, ptr, err)					\
 do {									\
-	__typeof__(*(ptr)) __user *__pu_addr = (ptr);			\
-	__typeof__(*(__pu_addr)) __pu_val = (x);			\
-	__chk_user_ptr(__pu_addr);					\
-	switch (sizeof(*(__pu_addr))) {					\
+	__typeof__(*(ptr)) __pu_val = (x);				\
+	__chk_user_ptr(ptr);						\
+	switch (sizeof(*(ptr))) {					\
 	case 1:								\
-		__put_user_asm("sbi", __pu_val, __pu_addr, (err));	\
+		__put_user_asm("sbi", __pu_val, (ptr), (err));		\
 		break;							\
 	case 2: 							\
-		__put_user_asm("shi", __pu_val, __pu_addr, (err));	\
+		__put_user_asm("shi", __pu_val, (ptr), (err));		\
 		break;							\
 	case 4: 							\
-		__put_user_asm("swi", __pu_val, __pu_addr, (err));	\
+		__put_user_asm("swi", __pu_val, (ptr), (err));		\
 		break;							\
 	case 8:								\
-		__put_user_asm_dword(__pu_val, __pu_addr, (err));	\
+		__put_user_asm_dword(__pu_val, (ptr), (err));		\
 		break;							\
 	default:							\
 		BUILD_BUG(); 						\

From a18082575c664847d36c6ca030b09ce8d93aec2f Mon Sep 17 00:00:00 2001
From: Zong Li <zong@andestech.com>
Date: Wed, 15 Aug 2018 10:45:59 +0800
Subject: [PATCH 186/257] nds32/ftrace: Support static function tracer

This patch support the static function tracer. On nds32 ABI, we need to
always push return address to stack for __builtin_return_address can
work correctly, otherwise, it will get the wrong value of $lp at leaf
function.

Signed-off-by: Zong Li <zong@andestech.com>
Acked-by: Greentime Hu <greentime@andestech.com>
Signed-off-by: Greentime Hu <greentime@andestech.com>
---
 arch/nds32/Kconfig              |  1 +
 arch/nds32/Makefile             |  4 ++++
 arch/nds32/include/asm/ftrace.h | 20 ++++++++++++++++++++
 arch/nds32/kernel/Makefile      |  6 ++++++
 arch/nds32/kernel/ftrace.c      | 28 ++++++++++++++++++++++++++++
 5 files changed, 59 insertions(+)
 create mode 100644 arch/nds32/include/asm/ftrace.h
 create mode 100644 arch/nds32/kernel/ftrace.c

diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 1d4248fa55e99..853497fe42666 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -40,6 +40,7 @@ config NDS32
 	select NO_IOPORT_MAP
 	select RTC_LIB
 	select THREAD_INFO_IN_TASK
+	select HAVE_FUNCTION_TRACER
 	help
 	  Andes(nds32) Linux support.
 
diff --git a/arch/nds32/Makefile b/arch/nds32/Makefile
index 63f4f173e5f4b..3509fac104919 100644
--- a/arch/nds32/Makefile
+++ b/arch/nds32/Makefile
@@ -5,6 +5,10 @@ KBUILD_DEFCONFIG := defconfig
 
 comma = ,
 
+ifdef CONFIG_FUNCTION_TRACER
+arch-y += -malways-save-lp -mno-relax
+endif
+
 KBUILD_CFLAGS	+= $(call cc-option, -mno-sched-prolog-epilog)
 KBUILD_CFLAGS	+= -mcmodel=large
 
diff --git a/arch/nds32/include/asm/ftrace.h b/arch/nds32/include/asm/ftrace.h
new file mode 100644
index 0000000000000..bac7657f576a9
--- /dev/null
+++ b/arch/nds32/include/asm/ftrace.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_NDS32_FTRACE_H
+#define __ASM_NDS32_FTRACE_H
+
+#ifdef CONFIG_FUNCTION_TRACER
+
+#define HAVE_FUNCTION_GRAPH_FP_TEST
+
+#define MCOUNT_ADDR ((unsigned long)(_mcount))
+/* mcount call is composed of three instructions:
+ * sethi + ori + jral
+ */
+#define MCOUNT_INSN_SIZE 12
+
+extern void _mcount(unsigned long parent_ip);
+
+#endif /* CONFIG_FUNCTION_TRACER */
+
+#endif /* __ASM_NDS32_FTRACE_H */
diff --git a/arch/nds32/kernel/Makefile b/arch/nds32/kernel/Makefile
index 42792743e8b95..27cded39fa662 100644
--- a/arch/nds32/kernel/Makefile
+++ b/arch/nds32/kernel/Makefile
@@ -21,3 +21,9 @@ extra-y := head.o vmlinux.lds
 
 
 obj-y				+= vdso/
+
+obj-$(CONFIG_FUNCTION_TRACER)   += ftrace.o
+
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
+endif
diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c
new file mode 100644
index 0000000000000..563f64c070b3b
--- /dev/null
+++ b/arch/nds32/kernel/ftrace.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ftrace.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+
+extern void (*ftrace_trace_function)(unsigned long, unsigned long,
+				     struct ftrace_ops*, struct pt_regs*);
+
+noinline void __naked ftrace_stub(unsigned long ip, unsigned long parent_ip,
+				  struct ftrace_ops *op, struct pt_regs *regs)
+{
+	__asm__ ("");  /* avoid to optimize as pure function */
+}
+
+noinline void _mcount(unsigned long parent_ip)
+{
+	/* save all state by the compiler prologue */
+
+	unsigned long ip = (unsigned long)__builtin_return_address(0);
+
+	if (ftrace_trace_function != ftrace_stub)
+		ftrace_trace_function(ip - MCOUNT_INSN_SIZE, parent_ip,
+				      NULL, NULL);
+
+	/* restore all state by the compiler epilogue */
+}
+EXPORT_SYMBOL(_mcount);

From 1e9b14c0d92b61a0979fd5ee24d5e7f080f11030 Mon Sep 17 00:00:00 2001
From: Zong Li <zong@andestech.com>
Date: Wed, 15 Aug 2018 10:53:04 +0800
Subject: [PATCH 187/257] nds32/ftrace: Support static function graph tracer

This patch contains implementation of static function graph tracer.

Signed-off-by: Zong Li <zong@andestech.com>
Acked-by: Greentime Hu <greentime@andestech.com>
Signed-off-by: Greentime Hu <greentime@andestech.com>
---
 arch/nds32/Kconfig         |  1 +
 arch/nds32/kernel/ftrace.c | 69 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+)

diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 853497fe42666..ea171a00327c4 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -41,6 +41,7 @@ config NDS32
 	select RTC_LIB
 	select THREAD_INFO_IN_TASK
 	select HAVE_FUNCTION_TRACER
+	select HAVE_FUNCTION_GRAPH_TRACER
 	help
 	  Andes(nds32) Linux support.
 
diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c
index 563f64c070b3b..707fce76522e6 100644
--- a/arch/nds32/kernel/ftrace.c
+++ b/arch/nds32/kernel/ftrace.c
@@ -6,6 +6,8 @@
 
 extern void (*ftrace_trace_function)(unsigned long, unsigned long,
 				     struct ftrace_ops*, struct pt_regs*);
+extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace);
+extern void ftrace_graph_caller(void);
 
 noinline void __naked ftrace_stub(unsigned long ip, unsigned long parent_ip,
 				  struct ftrace_ops *op, struct pt_regs *regs)
@@ -23,6 +25,73 @@ noinline void _mcount(unsigned long parent_ip)
 		ftrace_trace_function(ip - MCOUNT_INSN_SIZE, parent_ip,
 				      NULL, NULL);
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	if (ftrace_graph_return != (trace_func_graph_ret_t)ftrace_stub
+	    || ftrace_graph_entry != ftrace_graph_entry_stub)
+		ftrace_graph_caller();
+#endif
+
 	/* restore all state by the compiler epilogue */
 }
 EXPORT_SYMBOL(_mcount);
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+			   unsigned long frame_pointer)
+{
+	unsigned long return_hooker = (unsigned long)&return_to_handler;
+	struct ftrace_graph_ent trace;
+	unsigned long old;
+	int err;
+
+	if (unlikely(atomic_read(&current->tracing_graph_pause)))
+		return;
+
+	old = *parent;
+
+	trace.func = self_addr;
+	trace.depth = current->curr_ret_stack + 1;
+
+	/* Only trace if the calling function expects to */
+	if (!ftrace_graph_entry(&trace))
+		return;
+
+	err = ftrace_push_return_trace(old, self_addr, &trace.depth,
+				       frame_pointer, NULL);
+
+	if (err == -EBUSY)
+		return;
+
+	*parent = return_hooker;
+}
+
+noinline void ftrace_graph_caller(void)
+{
+	unsigned long *parent_ip =
+		(unsigned long *)(__builtin_frame_address(2) - 4);
+
+	unsigned long selfpc =
+		(unsigned long)(__builtin_return_address(1) - MCOUNT_INSN_SIZE);
+
+	unsigned long frame_pointer =
+		(unsigned long)__builtin_frame_address(3);
+
+	prepare_ftrace_return(parent_ip, selfpc, frame_pointer);
+}
+
+extern unsigned long ftrace_return_to_handler(unsigned long frame_pointer);
+void __naked return_to_handler(void)
+{
+	__asm__ __volatile__ (
+		/* save state needed by the ABI     */
+		"smw.adm $r0,[$sp],$r1,#0x0  \n\t"
+
+		/* get original return address      */
+		"move $r0, $fp               \n\t"
+		"bal ftrace_return_to_handler\n\t"
+		"move $lp, $r0               \n\t"
+
+		/* restore state nedded by the ABI  */
+		"lmw.bim $r0,[$sp],$r1,#0x0  \n\t");
+}
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */

From fbf58a52ac088669dfa930e557d0303a9fbb7e17 Mon Sep 17 00:00:00 2001
From: Zong Li <zong@andestech.com>
Date: Wed, 15 Aug 2018 10:57:16 +0800
Subject: [PATCH 188/257] nds32/ftrace: Add RECORD_MCOUNT support

Recognize NDS32 object files in recordmcount.pl.

Signed-off-by: Zong Li <zong@andestech.com>
Acked-by: Greentime Hu <greentime@andestech.com>
Signed-off-by: Greentime Hu <greentime@andestech.com>
---
 arch/nds32/Kconfig      | 1 +
 scripts/recordmcount.pl | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index ea171a00327c4..48d92171ea20e 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -42,6 +42,7 @@ config NDS32
 	select THREAD_INFO_IN_TASK
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
+	select HAVE_FTRACE_MCOUNT_RECORD
 	help
 	  Andes(nds32) Linux support.
 
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index fe06e77c15eb7..f599031260d51 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -389,6 +389,9 @@ sub check_objcopy
     $mcount_regex = "^\\s*([0-9a-fA-F]+):\\sR_RISCV_CALL\\s_mcount\$";
     $type = ".quad";
     $alignment = 2;
+} elsif ($arch eq "nds32") {
+    $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_NDS32_HI20_RELA\\s+_mcount\$";
+    $alignment = 2;
 } else {
     die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
 }

From 6b1d6d2fba37129f690ee7e9164f225c55626cac Mon Sep 17 00:00:00 2001
From: Zong Li <zong@andestech.com>
Date: Wed, 15 Aug 2018 11:00:08 +0800
Subject: [PATCH 189/257] nds32/ftrace: Support dynamic function tracer

This patch contains the implementation of dynamic function tracer.
The mcount call is composed of three instructions, so there are three
nop for enough placeholder.

Signed-off-by: Zong Li <zong@andestech.com>
Acked-by: Greentime Hu <greentime@andestech.com>
Signed-off-by: Greentime Hu <greentime@andestech.com>
---
 arch/nds32/Kconfig              |   1 +
 arch/nds32/include/asm/ftrace.h |  26 +++++
 arch/nds32/kernel/ftrace.c      | 164 ++++++++++++++++++++++++++++++++
 3 files changed, 191 insertions(+)

diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index 48d92171ea20e..7068f341133d7 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -43,6 +43,7 @@ config NDS32
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_DYNAMIC_FTRACE
 	help
 	  Andes(nds32) Linux support.
 
diff --git a/arch/nds32/include/asm/ftrace.h b/arch/nds32/include/asm/ftrace.h
index bac7657f576a9..2f96cc96aa35c 100644
--- a/arch/nds32/include/asm/ftrace.h
+++ b/arch/nds32/include/asm/ftrace.h
@@ -15,6 +15,32 @@
 
 extern void _mcount(unsigned long parent_ip);
 
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+#define FTRACE_ADDR ((unsigned long)_ftrace_caller)
+
+#ifdef __NDS32_EL__
+#define INSN_NOP		0x09000040
+#define INSN_SIZE(insn)		(((insn & 0x00000080) == 0) ? 4 : 2)
+#define IS_SETHI(insn)		((insn & 0x000000fe) == 0x00000046)
+#define ENDIAN_CONVERT(insn)	be32_to_cpu(insn)
+#else /* __NDS32_EB__ */
+#define INSN_NOP		0x40000009
+#define INSN_SIZE(insn)		(((insn & 0x80000000) == 0) ? 4 : 2)
+#define IS_SETHI(insn)		((insn & 0xfe000000) == 0x46000000)
+#define ENDIAN_CONVERT(insn)	(insn)
+#endif
+
+extern void _ftrace_caller(unsigned long parent_ip);
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	return addr;
+}
+struct dyn_arch_ftrace {
+};
+
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
 #endif /* CONFIG_FUNCTION_TRACER */
 
 #endif /* __ASM_NDS32_FTRACE_H */
diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c
index 707fce76522e6..3ca676b75d976 100644
--- a/arch/nds32/kernel/ftrace.c
+++ b/arch/nds32/kernel/ftrace.c
@@ -4,6 +4,7 @@
 #include <linux/uaccess.h>
 #include <asm/cacheflush.h>
 
+#ifndef CONFIG_DYNAMIC_FTRACE
 extern void (*ftrace_trace_function)(unsigned long, unsigned long,
 				     struct ftrace_ops*, struct pt_regs*);
 extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace);
@@ -35,6 +36,168 @@ noinline void _mcount(unsigned long parent_ip)
 }
 EXPORT_SYMBOL(_mcount);
 
+#else /* CONFIG_DYNAMIC_FTRACE */
+
+noinline void __naked ftrace_stub(unsigned long ip, unsigned long parent_ip,
+				  struct ftrace_ops *op, struct pt_regs *regs)
+{
+	__asm__ ("");  /* avoid to optimize as pure function */
+}
+
+noinline void __naked _mcount(unsigned long parent_ip)
+{
+	__asm__ ("");  /* avoid to optimize as pure function */
+}
+EXPORT_SYMBOL(_mcount);
+
+#define XSTR(s) STR(s)
+#define STR(s) #s
+void _ftrace_caller(unsigned long parent_ip)
+{
+	/* save all state needed by the compiler prologue */
+
+	/*
+	 * prepare arguments for real tracing function
+	 * first  arg : __builtin_return_address(0) - MCOUNT_INSN_SIZE
+	 * second arg : parent_ip
+	 */
+	__asm__ __volatile__ (
+		"move $r1, %0				   \n\t"
+		"addi $r0, %1, #-" XSTR(MCOUNT_INSN_SIZE) "\n\t"
+		:
+		: "r" (parent_ip), "r" (__builtin_return_address(0)));
+
+	/* a placeholder for the call to a real tracing function */
+	__asm__ __volatile__ (
+		"ftrace_call:		\n\t"
+		"nop			\n\t"
+		"nop			\n\t"
+		"nop			\n\t");
+
+	/* restore all state needed by the compiler epilogue */
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
+	return 0;
+}
+
+int ftrace_arch_code_modify_prepare(void)
+{
+	set_all_modules_text_rw();
+	return 0;
+}
+
+int ftrace_arch_code_modify_post_process(void)
+{
+	set_all_modules_text_ro();
+	return 0;
+}
+
+static unsigned long gen_sethi_insn(unsigned long addr)
+{
+	unsigned long opcode = 0x46000000;
+	unsigned long imm = addr >> 12;
+	unsigned long rt_num = 0xf << 20;
+
+	return ENDIAN_CONVERT(opcode | rt_num | imm);
+}
+
+static unsigned long gen_ori_insn(unsigned long addr)
+{
+	unsigned long opcode = 0x58000000;
+	unsigned long imm = addr & 0x0000fff;
+	unsigned long rt_num = 0xf << 20;
+	unsigned long ra_num = 0xf << 15;
+
+	return ENDIAN_CONVERT(opcode | rt_num | ra_num | imm);
+}
+
+static unsigned long gen_jral_insn(unsigned long addr)
+{
+	unsigned long opcode = 0x4a000001;
+	unsigned long rt_num = 0x1e << 20;
+	unsigned long rb_num = 0xf << 10;
+
+	return ENDIAN_CONVERT(opcode | rt_num | rb_num);
+}
+
+static void ftrace_gen_call_insn(unsigned long *call_insns,
+				 unsigned long addr)
+{
+	call_insns[0] = gen_sethi_insn(addr); /* sethi $r15, imm20u       */
+	call_insns[1] = gen_ori_insn(addr);   /* ori   $r15, $r15, imm15u */
+	call_insns[2] = gen_jral_insn(addr);  /* jral  $lp,  $r15         */
+}
+
+static int __ftrace_modify_code(unsigned long pc, unsigned long *old_insn,
+				unsigned long *new_insn, bool validate)
+{
+	unsigned long orig_insn[3];
+
+	if (validate) {
+		if (probe_kernel_read(orig_insn, (void *)pc, MCOUNT_INSN_SIZE))
+			return -EFAULT;
+		if (memcmp(orig_insn, old_insn, MCOUNT_INSN_SIZE))
+			return -EINVAL;
+	}
+
+	if (probe_kernel_write((void *)pc, new_insn, MCOUNT_INSN_SIZE))
+		return -EPERM;
+
+	return 0;
+}
+
+static int ftrace_modify_code(unsigned long pc, unsigned long *old_insn,
+			      unsigned long *new_insn, bool validate)
+{
+	int ret;
+
+	ret = __ftrace_modify_code(pc, old_insn, new_insn, validate);
+	if (ret)
+		return ret;
+
+	flush_icache_range(pc, pc + MCOUNT_INSN_SIZE);
+
+	return ret;
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	unsigned long pc = (unsigned long)&ftrace_call;
+	unsigned long old_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP};
+	unsigned long new_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP};
+
+	if (func != ftrace_stub)
+		ftrace_gen_call_insn(new_insn, (unsigned long)func);
+
+	return ftrace_modify_code(pc, old_insn, new_insn, false);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	unsigned long pc = rec->ip;
+	unsigned long nop_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP};
+	unsigned long call_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP};
+
+	ftrace_gen_call_insn(call_insn, addr);
+
+	return ftrace_modify_code(pc, nop_insn, call_insn, true);
+}
+
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+		    unsigned long addr)
+{
+	unsigned long pc = rec->ip;
+	unsigned long nop_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP};
+	unsigned long call_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP};
+
+	ftrace_gen_call_insn(call_insn, addr);
+
+	return ftrace_modify_code(pc, call_insn, nop_insn, true);
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 			   unsigned long frame_pointer)
@@ -94,4 +257,5 @@ void __naked return_to_handler(void)
 		/* restore state nedded by the ABI  */
 		"lmw.bim $r0,[$sp],$r1,#0x0  \n\t");
 }
+
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */

From 95cd2f7bce9aa712473bba1b5b3f4fdec148baee Mon Sep 17 00:00:00 2001
From: Zong Li <zong@andestech.com>
Date: Wed, 15 Aug 2018 11:01:10 +0800
Subject: [PATCH 190/257] nds32/ftrace: Support dynamic function graph tracer

This patch contains the implementation of dynamic function graph tracer.

Signed-off-by: Zong Li <zong@andestech.com>
Acked-by: Greentime Hu <greentime@andestech.com>
Signed-off-by: Greentime Hu <greentime@andestech.com>
---
 arch/nds32/kernel/ftrace.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c
index 3ca676b75d976..a646a83390520 100644
--- a/arch/nds32/kernel/ftrace.c
+++ b/arch/nds32/kernel/ftrace.c
@@ -74,6 +74,14 @@ void _ftrace_caller(unsigned long parent_ip)
 		"nop			\n\t"
 		"nop			\n\t");
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	/* a placeholder for the call to ftrace_graph_caller */
+	__asm__ __volatile__ (
+		"ftrace_graph_call:	\n\t"
+		"nop			\n\t"
+		"nop			\n\t"
+		"nop			\n\t");
+#endif
 	/* restore all state needed by the compiler epilogue */
 }
 
@@ -258,4 +266,32 @@ void __naked return_to_handler(void)
 		"lmw.bim $r0,[$sp],$r1,#0x0  \n\t");
 }
 
+#ifdef CONFIG_DYNAMIC_FTRACE
+extern unsigned long ftrace_graph_call;
+
+static int ftrace_modify_graph_caller(bool enable)
+{
+	unsigned long pc = (unsigned long)&ftrace_graph_call;
+	unsigned long nop_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP};
+	unsigned long call_insn[3] = {INSN_NOP, INSN_NOP, INSN_NOP};
+
+	ftrace_gen_call_insn(call_insn, (unsigned long)ftrace_graph_caller);
+
+	if (enable)
+		return ftrace_modify_code(pc, nop_insn, call_insn, true);
+	else
+		return ftrace_modify_code(pc, call_insn, nop_insn, true);
+}
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+	return ftrace_modify_graph_caller(true);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+	return ftrace_modify_graph_caller(false);
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */

From 1e377ae9b04aef4dc531fa4c5f81b65d440ebcba Mon Sep 17 00:00:00 2001
From: Zong Li <zong@andestech.com>
Date: Wed, 15 Aug 2018 11:05:40 +0800
Subject: [PATCH 191/257] nds32/stack: Get real return address by using
 ftrace_graph_ret_addr

Function graph tracer has modified the return address to
'return_to_handler' on stack, and provide the 'ftrace_graph_ret_addr' to
get the real return address.

Signed-off-by: Zong Li <zong@andestech.com>
Acked-by: Greentime Hu <greentime@andestech.com>
Signed-off-by: Greentime Hu <greentime@andestech.com>
---
 arch/nds32/kernel/stacktrace.c |  4 ++++
 arch/nds32/kernel/traps.c      | 30 ++++++------------------------
 2 files changed, 10 insertions(+), 24 deletions(-)

diff --git a/arch/nds32/kernel/stacktrace.c b/arch/nds32/kernel/stacktrace.c
index 8b231e910ea68..36bc87003e83f 100644
--- a/arch/nds32/kernel/stacktrace.c
+++ b/arch/nds32/kernel/stacktrace.c
@@ -4,6 +4,7 @@
 #include <linux/sched/debug.h>
 #include <linux/sched/task_stack.h>
 #include <linux/stacktrace.h>
+#include <linux/ftrace.h>
 
 void save_stack_trace(struct stack_trace *trace)
 {
@@ -16,6 +17,7 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 	unsigned long *fpn;
 	int skip = trace->skip;
 	int savesched;
+	int graph_idx = 0;
 
 	if (tsk == current) {
 		__asm__ __volatile__("\tori\t%0, $fp, #0\n":"=r"(fpn));
@@ -33,6 +35,8 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 		fpp = fpn[FP_OFFSET];
 		if (!__kernel_text_address(lpp))
 			break;
+		else
+			lpp = ftrace_graph_ret_addr(tsk, &graph_idx, lpp, NULL);
 
 		if (savesched || !in_sched_functions(lpp)) {
 			if (skip) {
diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c
index f0e974347c26e..7684c8f597eda 100644
--- a/arch/nds32/kernel/traps.c
+++ b/arch/nds32/kernel/traps.c
@@ -8,6 +8,7 @@
 #include <linux/kdebug.h>
 #include <linux/sched/task_stack.h>
 #include <linux/uaccess.h>
+#include <linux/ftrace.h>
 
 #include <asm/proc-fns.h>
 #include <asm/unistd.h>
@@ -94,28 +95,6 @@ static void dump_instr(struct pt_regs *regs)
 	set_fs(fs);
 }
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-#include <linux/ftrace.h>
-static void
-get_real_ret_addr(unsigned long *addr, struct task_struct *tsk, int *graph)
-{
-	if (*addr == (unsigned long)return_to_handler) {
-		int index = tsk->curr_ret_stack;
-
-		if (tsk->ret_stack && index >= *graph) {
-			index -= *graph;
-			*addr = tsk->ret_stack[index].ret;
-			(*graph)++;
-		}
-	}
-}
-#else
-static inline void
-get_real_ret_addr(unsigned long *addr, struct task_struct *tsk, int *graph)
-{
-}
-#endif
-
 #define LOOP_TIMES (100)
 static void __dump(struct task_struct *tsk, unsigned long *base_reg)
 {
@@ -126,7 +105,8 @@ static void __dump(struct task_struct *tsk, unsigned long *base_reg)
 		while (!kstack_end(base_reg)) {
 			ret_addr = *base_reg++;
 			if (__kernel_text_address(ret_addr)) {
-				get_real_ret_addr(&ret_addr, tsk, &graph);
+				ret_addr = ftrace_graph_ret_addr(
+						tsk, &graph, ret_addr, NULL);
 				print_ip_sym(ret_addr);
 			}
 			if (--cnt < 0)
@@ -145,7 +125,9 @@ static void __dump(struct task_struct *tsk, unsigned long *base_reg)
 			next_fp = base_reg[FP_OFFSET];
 #endif
 			if (__kernel_text_address(ret_addr)) {
-				get_real_ret_addr(&ret_addr, tsk, &graph);
+
+				ret_addr = ftrace_graph_ret_addr(
+						tsk, &graph, ret_addr, NULL);
 				print_ip_sym(ret_addr);
 			}
 			if (--cnt < 0)

From c5fdf7e00d490dc094a97d287e0fa27e253cca84 Mon Sep 17 00:00:00 2001
From: Zong Li <zong@andestech.com>
Date: Mon, 20 Aug 2018 09:40:08 +0800
Subject: [PATCH 192/257] nds32: Remove the deprecated ABI implementation

We are not using NDS32 ABI 2 for now, just remove the preprocessor
directives __NDS32_ABI_2.

Signed-off-by: Zong Li <zong@andestech.com>
Acked-by: Greentime Hu <greentime@andestech.com>
Signed-off-by: Greentime Hu <greentime@andestech.com>
---
 arch/nds32/kernel/traps.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c
index 7684c8f597eda..f432310f3d02e 100644
--- a/arch/nds32/kernel/traps.c
+++ b/arch/nds32/kernel/traps.c
@@ -117,13 +117,8 @@ static void __dump(struct task_struct *tsk, unsigned long *base_reg)
 		       !((unsigned long)base_reg & 0x3) &&
 		       ((unsigned long)base_reg >= TASK_SIZE)) {
 			unsigned long next_fp;
-#if !defined(__NDS32_ABI_2)
-			ret_addr = base_reg[0];
-			next_fp = base_reg[1];
-#else
 			ret_addr = base_reg[-1];
 			next_fp = base_reg[FP_OFFSET];
-#endif
 			if (__kernel_text_address(ret_addr)) {
 
 				ret_addr = ftrace_graph_ret_addr(

From 95f93ed7fe92c16f5346e477491d91e4fa8e92b8 Mon Sep 17 00:00:00 2001
From: Zong Li <zong@andestech.com>
Date: Mon, 20 Aug 2018 09:51:29 +0800
Subject: [PATCH 193/257] nds32: Add macro definition for offset of lp register
 on stack

Use macro to replace the magic number.

Signed-off-by: Zong Li <zong@andestech.com>
Acked-by: Greentime Hu <greentime@andestech.com>
Signed-off-by: Greentime Hu <greentime@andestech.com>
---
 arch/nds32/include/asm/nds32.h | 1 +
 arch/nds32/kernel/stacktrace.c | 2 +-
 arch/nds32/kernel/traps.c      | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/nds32/include/asm/nds32.h b/arch/nds32/include/asm/nds32.h
index 19b19394a936c..68c38151c3e41 100644
--- a/arch/nds32/include/asm/nds32.h
+++ b/arch/nds32/include/asm/nds32.h
@@ -17,6 +17,7 @@
 #else
 #define FP_OFFSET (-2)
 #endif
+#define LP_OFFSET (-1)
 
 extern void __init early_trap_init(void);
 static inline void GIE_ENABLE(void)
diff --git a/arch/nds32/kernel/stacktrace.c b/arch/nds32/kernel/stacktrace.c
index 36bc87003e83f..d974c0c1c65f3 100644
--- a/arch/nds32/kernel/stacktrace.c
+++ b/arch/nds32/kernel/stacktrace.c
@@ -31,7 +31,7 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 	       && (fpn >= (unsigned long *)TASK_SIZE)) {
 		unsigned long lpp, fpp;
 
-		lpp = fpn[-1];
+		lpp = fpn[LP_OFFSET];
 		fpp = fpn[FP_OFFSET];
 		if (!__kernel_text_address(lpp))
 			break;
diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c
index f432310f3d02e..b0b85b7ab0792 100644
--- a/arch/nds32/kernel/traps.c
+++ b/arch/nds32/kernel/traps.c
@@ -117,7 +117,7 @@ static void __dump(struct task_struct *tsk, unsigned long *base_reg)
 		       !((unsigned long)base_reg & 0x3) &&
 		       ((unsigned long)base_reg >= TASK_SIZE)) {
 			unsigned long next_fp;
-			ret_addr = base_reg[-1];
+			ret_addr = base_reg[LP_OFFSET];
 			next_fp = base_reg[FP_OFFSET];
 			if (__kernel_text_address(ret_addr)) {
 

From 487c4b2323b26cfb5fd4d77d3605a92c182b6288 Mon Sep 17 00:00:00 2001
From: Greentime Hu <greentime@andestech.com>
Date: Thu, 23 Aug 2018 14:47:43 +0800
Subject: [PATCH 194/257] nds32: Only print one page of stack when die to
 prevent printing too much information.

It may print too much information sometimes if the stack is wrong or
too big. This patch can limit the debug information in a page of stack.

Signed-off-by: Greentime Hu <greentime@andestech.com>
---
 arch/nds32/kernel/traps.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c
index b0b85b7ab0792..1496aab489988 100644
--- a/arch/nds32/kernel/traps.c
+++ b/arch/nds32/kernel/traps.c
@@ -173,11 +173,10 @@ void die(const char *str, struct pt_regs *regs, int err)
 	pr_emerg("CPU: %i\n", smp_processor_id());
 	show_regs(regs);
 	pr_emerg("Process %s (pid: %d, stack limit = 0x%p)\n",
-		 tsk->comm, tsk->pid, task_thread_info(tsk) + 1);
+		 tsk->comm, tsk->pid, end_of_stack(tsk));
 
 	if (!user_mode(regs) || in_interrupt()) {
-		dump_mem("Stack: ", regs->sp,
-			 THREAD_SIZE + (unsigned long)task_thread_info(tsk));
+		dump_mem("Stack: ", regs->sp, (regs->sp + PAGE_SIZE) & PAGE_MASK);
 		dump_instr(regs);
 		dump_stack();
 	}

From 0cde56e0280d70ce26b54d22131944c2fe584b38 Mon Sep 17 00:00:00 2001
From: Greentime Hu <greentime@andestech.com>
Date: Thu, 23 Aug 2018 15:05:46 +0800
Subject: [PATCH 195/257] nds32: Fix a kernel panic issue because of wrong
 frame pointer access.

It can make sure that trace_hardirqs_off/trace_hardirqs_on can get a correct
return address by frame pointer through __builtin_return_address() in this fix.

Unable to handle kernel paging request at virtual address fffffffc
pgd = 3c42e9cf
[fffffffc] *pgd=02a9c000

Internal error: Oops: 1 [#1]
Modules linked in:
CPU: 0
PC is at trace_hardirqs_off+0x78/0xec
LP is at common_exception_handler+0xda/0xf4
pc : [<b23ea5a4>]    lp : [<b2352eba>]    Tainted: G        W
sp : ada60ab0  fp : efcaff48  gp : 3a020490
r25: efcb0000  r24: 00000000
r23: 00000000  r22: 00000000  r21: 00000000  r20: 000700c1
r19: 000700ca  r18: 3a21b018  r17: 00000001  r16: 00000002
r15: 00000001  r14: 0000002a  r13: 3a00a804  r12: ada60ab0
r11: 3a113af8  r10: 3a01c530  r9 : 3a124404  r8 : 00120f9c
r7 : b2352eba  r6 : 00000000  r5 : 3a126b58  r4 : 00000000
r3 : 3a1726a8  r2 : b2921000  r1 : 00000000  r0 : 00000000
  IRQs off  Segment user
Process init (pid: 1, stack limit = 0x069d7f15)
Stack: (0xada60ab0 to 0xada61000)
Stack: 0aa0:                                     00000000 00000003 3a110000 0011f000
Stack: 0ac0: 00000005 00000000 00000000 00000000 ada60b10 3a01fe68 ada60b0c ada60b08
Stack: 0ae0: 00000000 ada60ab8 ada60b30 3a020550 00000000 00000001 3a11c2f8 3a01c6e8
Stack: 0b00: 3a01cb80 fffffba8 3a113af8 3a21b018 3a122c28 00003ec4 00000165 00000000
Stack: 0b20: 3a126aec 0000006c 00000000 00000001 3a01fe68 00000000 00000003 00000000
Stack: 0b40: 00000001 000003f8 3a020930 3a01c530 00000008 ada60c18 3a020490 3a003120
Stack: 0b60: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
Stack: 0b80: 00000000 00000000 00000000 00000000 ffff8000 00000000 00000000 00000000
Stack: 0ba0: 00000000 00000001 3a020550 00000000 3a01d020 00000000 fffff000 fffff000
Stack: 0bc0: 00000000 00000000 00000000 00000000 ada60f2c 00000000 00000001 00000000
Stack: 0be0: 00000000 00000000 3a01fe68 fffffab0 00008034 00000008 3a0010cc 3a01fe68
Stack: 0c00: 00000000 00000000 00000001 ada60c88 3a020490 3a0139d4 0009dc6f 00000000
Stack: 0c20: 00000000 00000000 ada60fce fffff000 00000000 0000ebe0 3a020038 3a020550
Stack: 0c40: ada60f20 ada60c90 3a0007f0 3a0002a8 ada60c8c 00000000 00000000 ada60c88
Stack: 0c60: 3a020490 3a004570 00000000 00000000 ada60f20 3a0007f0 3a000000 00000000
Stack: 0c80: 3a020490 3a004850 00000000 3a013f24 3a000000 00000000 3a01ff44 00000000
Stack: 0ca0: 00000000 00000000 00000000 00000000 00000000 00000000 3a01ff84 3a01ff7c
Stack: 0cc0: 3a01ff4c 3a01ff5c 3a01ff64 3a01ff9c 3a01ffa4 3a01ffac 3a01ff6c 3a01ff74
Stack: 0ce0: 00000000 00000000 3a01ff44 00000000 00000000 00000000 00000000 00000000
Stack: 0d00: 3a01ff8c 00000000 00000000 3a01ff94 00000000 00000000 00000000 00000000
Stack: 0d20: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
Stack: 0d40: 3a01ffbc 3a01ffb4 00000000 00000000 00000000 00000000 00000000 00000000
Stack: 0d60: 00000000 00000000 00000000 00000000 00000000 3a01ffc4 00000000 00000000
Stack: 0d80: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
Stack: 0da0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
Stack: 0dc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 3a01ff54
Stack: 0de0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
Stack: 0e00: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
Stack: 0e20: 00000000 00000004 00000000 00000000 00000000 00000000 00000000 00000000
Stack: 0e40: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
Stack: 0e60: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
Stack: 0e80: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
Stack: 0ea0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
Stack: 0ec0: 00000000 00000000 00000000 00000000 ffffffff 00000000 00000000 00000000
Stack: 0ee0: 00000000 00000000 00000000 00000000 ada60f20 00000000 00000000 00000000
Stack: 0f00: 00000000 00000000 00000000 00000000 00000000 00000000 3a020490 3a000b24
Stack: 0f20: 00000001 ada60fde 00000000 ada60fe4 ada60feb 00000000 00000021 3a038000
Stack: 0f40: 00000010 0009dc6f 00000006 00001000 00000011 00000064 00000003 00008034
Stack: 0f60: 00000004 00000020 00000005 00000008 00000007 3a000000 00000008 00000000
Stack: 0f80: 00000009 0000ebe0 0000000b 00000000 0000000c 00000000 0000000d 00000000
Stack: 0fa0: 0000000e 00000000 00000017 00000000 00000019 ada60fce 0000001f ada60ff6
Stack: 0fc0: 00000000 00000000 00000000 b5010000 fa839914 23b5dd89 a2aea540 692fc82e
Stack: 0fe0: 0074696e 454d4f48 54002f3d 3d4d5245 756e696c 692f0078 0074696e 00000000
CPU: 0 PID: 1 Comm: init Tainted: G        W         4.18.0-00015-g1888b64a2558-dirty #112
Hardware name: andestech,ae3xx (DT)
Call Trace:
[<b27a8e34>] dump_stack+0x2c/0x38
[<b2354874>] die+0x128/0x18c
[<b2356f4c>] do_page_fault+0x3b8/0x4e0
[<b2352ed4>] ret_from_exception+0x0/0x10
[<b2352eba>] common_exception_handler+0xda/0xf4

Signed-off-by: Greentime Hu <greentime@andestech.com>
---
 arch/nds32/kernel/ex-entry.S |  2 +-
 arch/nds32/kernel/ex-exit.S  |  4 ++--
 arch/nds32/kernel/ftrace.c   | 12 ++++++++++++
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/arch/nds32/kernel/ex-entry.S b/arch/nds32/kernel/ex-entry.S
index b8ae4e9a6b93d..21a1440715669 100644
--- a/arch/nds32/kernel/ex-entry.S
+++ b/arch/nds32/kernel/ex-entry.S
@@ -118,7 +118,7 @@ common_exception_handler:
 	/* interrupt */
 2:
 #ifdef CONFIG_TRACE_IRQFLAGS
-	jal     trace_hardirqs_off
+	jal     __trace_hardirqs_off
 #endif
 	move	$r0, $sp
 	sethi	$lp, hi20(ret_from_intr)
diff --git a/arch/nds32/kernel/ex-exit.S b/arch/nds32/kernel/ex-exit.S
index 03e4f7788a188..f00af92f7e22f 100644
--- a/arch/nds32/kernel/ex-exit.S
+++ b/arch/nds32/kernel/ex-exit.S
@@ -138,8 +138,8 @@ no_work_pending:
 #ifdef CONFIG_TRACE_IRQFLAGS
 	lwi	$p0, [$sp+(#IPSW_OFFSET)]
 	andi	$p0, $p0, #0x1
-	la	$r10, trace_hardirqs_off
-	la	$r9, trace_hardirqs_on
+	la	$r10, __trace_hardirqs_off
+	la	$r9, __trace_hardirqs_on
 	cmovz	$r9, $p0, $r10
 	jral	$r9
 #endif
diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c
index a646a83390520..a0a9679ad5dee 100644
--- a/arch/nds32/kernel/ftrace.c
+++ b/arch/nds32/kernel/ftrace.c
@@ -295,3 +295,15 @@ int ftrace_disable_ftrace_graph_caller(void)
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+noinline void __trace_hardirqs_off(void)
+{
+	trace_hardirqs_off();
+}
+noinline void __trace_hardirqs_on(void)
+{
+	trace_hardirqs_on();
+}
+#endif /* CONFIG_TRACE_IRQFLAGS */

From ec865393292f5ad8d52da20788b3685ebce44c48 Mon Sep 17 00:00:00 2001
From: Greentime Hu <greentime@andestech.com>
Date: Tue, 28 Aug 2018 16:07:39 +0800
Subject: [PATCH 196/257] nds32: fix build error because of wrong semicolon

It shall be removed in the define usage. We shall not put a semicolon there.

/kisskb/src/arch/nds32/include/asm/elf.h:126:29: error: expected '}' before ';' token
 #define ELF_DATA ELFDATA2LSB;
                             ^
/kisskb/src/fs/proc/kcore.c:318:17: note: in expansion of macro 'ELF_DATA'
     [EI_DATA] = ELF_DATA,
                 ^~~~~~~~
/kisskb/src/fs/proc/kcore.c:312:15: note: to match this '{'
    .e_ident = {
               ^
/kisskb/src/scripts/Makefile.build:307: recipe for target 'fs/proc/kcore.o' failed

Signed-off-by: Greentime Hu <greentime@andestech.com>
---
 arch/nds32/include/asm/elf.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/nds32/include/asm/elf.h b/arch/nds32/include/asm/elf.h
index 56c4790588025..f5f9cf7e05440 100644
--- a/arch/nds32/include/asm/elf.h
+++ b/arch/nds32/include/asm/elf.h
@@ -121,9 +121,9 @@ struct elf32_hdr;
  */
 #define ELF_CLASS	ELFCLASS32
 #ifdef __NDS32_EB__
-#define ELF_DATA	ELFDATA2MSB;
+#define ELF_DATA	ELFDATA2MSB
 #else
-#define ELF_DATA	ELFDATA2LSB;
+#define ELF_DATA	ELFDATA2LSB
 #endif
 #define ELF_ARCH	EM_NDS32
 #define USE_ELF_CORE_DUMP

From a11bdb1a6b782ee97587f92fae798efc78c31093 Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Thu, 30 Aug 2018 10:13:55 +0200
Subject: [PATCH 197/257] KVM: s390: Fix pfmf and conditional skey emulation

We should not return with a lock.
We also have to increase the address when we do page clearing.

Fixes: bd096f644319 ("KVM: s390: Add skey emulation fault handling")
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Message-Id: <20180830081355.59234-1-frankja@linux.ibm.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/priv.c | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index d68f10441a164..8679bd74d337a 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -280,9 +280,11 @@ static int handle_iske(struct kvm_vcpu *vcpu)
 			goto retry;
 		}
 	}
-	if (rc)
-		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 	up_read(&current->mm->mmap_sem);
+	if (rc == -EFAULT)
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	if (rc < 0)
+		return rc;
 	vcpu->run->s.regs.gprs[reg1] &= ~0xff;
 	vcpu->run->s.regs.gprs[reg1] |= key;
 	return 0;
@@ -324,9 +326,11 @@ static int handle_rrbe(struct kvm_vcpu *vcpu)
 			goto retry;
 		}
 	}
-	if (rc < 0)
-		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 	up_read(&current->mm->mmap_sem);
+	if (rc == -EFAULT)
+		return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+	if (rc < 0)
+		return rc;
 	kvm_s390_set_psw_cc(vcpu, rc);
 	return 0;
 }
@@ -390,12 +394,12 @@ static int handle_sske(struct kvm_vcpu *vcpu)
 					      FAULT_FLAG_WRITE, &unlocked);
 			rc = !rc ? -EAGAIN : rc;
 		}
+		up_read(&current->mm->mmap_sem);
 		if (rc == -EFAULT)
 			return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-
-		up_read(&current->mm->mmap_sem);
-		if (rc >= 0)
-			start += PAGE_SIZE;
+		if (rc < 0)
+			return rc;
+		start += PAGE_SIZE;
 	}
 
 	if (m3 & (SSKE_MC | SSKE_MR)) {
@@ -1002,13 +1006,15 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 						      FAULT_FLAG_WRITE, &unlocked);
 				rc = !rc ? -EAGAIN : rc;
 			}
+			up_read(&current->mm->mmap_sem);
 			if (rc == -EFAULT)
 				return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-
-			up_read(&current->mm->mmap_sem);
-			if (rc >= 0)
-				start += PAGE_SIZE;
+			if (rc == -EAGAIN)
+				continue;
+			if (rc < 0)
+				return rc;
 		}
+		start += PAGE_SIZE;
 	}
 	if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
 		if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) {

From 204c97245612b6c255edf4e21e24d417c4a0c008 Mon Sep 17 00:00:00 2001
From: Pierre Morel <pmorel@linux.ibm.com>
Date: Thu, 23 Aug 2018 12:25:54 +0200
Subject: [PATCH 198/257] KVM: s390: vsie: copy wrapping keys to right place

Copy the key mask to the right offset inside the shadow CRYCB

Fixes: bbeaa58b3 ("KVM: s390: vsie: support aes dea wrapping keys")
Signed-off-by: Pierre Morel <pmorel@linux.ibm.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Cc: stable@vger.kernel.org # v4.8+
Message-Id: <1535019956-23539-2-git-send-email-pmorel@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/vsie.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 63844b95c22c9..a2b28cd1e3fed 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -173,7 +173,8 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 		return set_validity_icpt(scb_s, 0x0039U);
 
 	/* copy only the wrapping keys */
-	if (read_guest_real(vcpu, crycb_addr + 72, &vsie_page->crycb, 56))
+	if (read_guest_real(vcpu, crycb_addr + 72,
+			    vsie_page->crycb.dea_wrapping_key_mask, 56))
 		return set_validity_icpt(scb_s, 0x0035U);
 
 	scb_s->ecb3 |= ecb3_flags;

From df88f3181f10565c6e3a89eb6f0f9e6afaaf15f1 Mon Sep 17 00:00:00 2001
From: Janosch Frank <frankja@linux.ibm.com>
Date: Thu, 30 Aug 2018 16:14:18 +0200
Subject: [PATCH 199/257] KVM: s390: Properly lock mm context
 allow_gmap_hpage_1m setting

We have to do down_write on the mm semaphore to set a bitfield in the
mm context.

Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
Fixes: a4499382 ("KVM: s390: Add huge page enablement control")
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/include/asm/mmu.h | 8 +++++++-
 arch/s390/kvm/kvm-s390.c    | 2 ++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index f31a15044c24a..a8418e1379eb7 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -16,7 +16,13 @@ typedef struct {
 	unsigned long asce;
 	unsigned long asce_limit;
 	unsigned long vdso_base;
-	/* The mmu context allocates 4K page tables. */
+	/*
+	 * The following bitfields need a down_write on the mm
+	 * semaphore when they are written to. As they are only
+	 * written once, they can be read without a lock.
+	 *
+	 * The mmu context allocates 4K page tables.
+	 */
 	unsigned int alloc_pgste:1;
 	/* The mmu context uses extended page tables. */
 	unsigned int has_pgste:1;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 91ad4a9425c0b..f69333fd2fa38 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -695,7 +695,9 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 			r = -EINVAL;
 		else {
 			r = 0;
+			down_write(&kvm->mm->mmap_sem);
 			kvm->mm->context.allow_gmap_hpage_1m = 1;
+			up_write(&kvm->mm->mmap_sem);
 			/*
 			 * We might have to create fake 4k page
 			 * tables. To avoid that the hardware works on

From 851a15114895c5bce163a6f2d57e0aa4658a1be4 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <felipe.balbi@linux.intel.com>
Date: Mon, 3 Sep 2018 11:24:57 +0300
Subject: [PATCH 200/257] i2c: i801: fix DNV's SMBCTRL register offset

DNV's iTCO is slightly different with SMBCTRL sitting at a different
offset when compared to all other devices. Let's fix so that we can
properly use iTCO watchdog.

Fixes: 84d7f2ebd70d ("i2c: i801: Add support for Intel DNV")
Cc: <stable@vger.kernel.org> # v4.4+
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Reviewed-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-i801.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 04b60a349d7ed..c91e145ef5a56 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -140,6 +140,7 @@
 
 #define SBREG_BAR		0x10
 #define SBREG_SMBCTRL		0xc6000c
+#define SBREG_SMBCTRL_DNV	0xcf000c
 
 /* Host status bits for SMBPCISTS */
 #define SMBPCISTS_INTS		BIT(3)
@@ -1399,7 +1400,11 @@ static void i801_add_tco(struct i801_priv *priv)
 	spin_unlock(&p2sb_spinlock);
 
 	res = &tco_res[ICH_RES_MEM_OFF];
-	res->start = (resource_size_t)base64_addr + SBREG_SMBCTRL;
+	if (pci_dev->device == PCI_DEVICE_ID_INTEL_DNV_SMBUS)
+		res->start = (resource_size_t)base64_addr + SBREG_SMBCTRL_DNV;
+	else
+		res->start = (resource_size_t)base64_addr + SBREG_SMBCTRL;
+
 	res->end = res->start + 3;
 	res->flags = IORESOURCE_MEM;
 

From bc811f05d77f47059c197a98b6ad242eb03999cb Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Tue, 4 Sep 2018 11:52:34 -0600
Subject: [PATCH 201/257] nbd: don't allow invalid blocksize settings

syzbot reports a divide-by-zero off the NBD_SET_BLKSIZE ioctl.
We need proper validation of the input here. Not just if it's
zero, but also if the value is a power-of-2 and in a valid
range. Add that.

Cc: stable@vger.kernel.org
Reported-by: syzbot <syzbot+25dbecbec1e62c6b0dd4@syzkaller.appspotmail.com>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/nbd.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 3863c00372bb9..14a51254c3db7 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1239,6 +1239,9 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 	case NBD_SET_SOCK:
 		return nbd_add_socket(nbd, arg, false);
 	case NBD_SET_BLKSIZE:
+		if (!arg || !is_power_of_2(arg) || arg < 512 ||
+		    arg > PAGE_SIZE)
+			return -EINVAL;
 		nbd_size_set(nbd, arg,
 			     div_s64(config->bytesize, arg));
 		return 0;

From 639505d4397b8c654a8e2616f9cb70ece40c83f9 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Mon, 3 Sep 2018 18:06:24 +0300
Subject: [PATCH 202/257] net/mlx5: Fix SQ offset in QPs with small RQ

Correct the formula for calculating the RQ page remainder,
which should be in byte granularity.  The result will be
non-zero only for RQs smaller than PAGE_SIZE, as an RQ size
is a power of 2.

Divide this by the SQ stride (MLX5_SEND_WQE_BB) to get the
SQ offset in strides granularity.

Fixes: d7037ad73daa ("net/mlx5: Fix QP fragmented buffer allocation")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/wq.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index 86478a6b99c50..c8c315eb51280 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -139,14 +139,15 @@ int mlx5_wq_qp_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		      struct mlx5_wq_ctrl *wq_ctrl)
 {
 	u32 sq_strides_offset;
+	u32 rq_pg_remainder;
 	int err;
 
 	mlx5_fill_fbc(MLX5_GET(qpc, qpc, log_rq_stride) + 4,
 		      MLX5_GET(qpc, qpc, log_rq_size),
 		      &wq->rq.fbc);
 
-	sq_strides_offset =
-		((wq->rq.fbc.frag_sz_m1 + 1) % PAGE_SIZE) / MLX5_SEND_WQE_BB;
+	rq_pg_remainder   = mlx5_wq_cyc_get_byte_size(&wq->rq) % PAGE_SIZE;
+	sq_strides_offset = rq_pg_remainder / MLX5_SEND_WQE_BB;
 
 	mlx5_fill_fbc_offset(ilog2(MLX5_SEND_WQE_BB),
 			     MLX5_GET(qpc, qpc, log_sq_size),

From 6d784f1625ea68783cc1fb17de8f6cd3e1660c3f Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Mon, 3 Sep 2018 11:08:15 -0700
Subject: [PATCH 203/257] act_ife: fix a potential use-after-free

Immediately after module_put(), user could delete this
module, so e->ops could be already freed before we call
e->ops->release().

Fix this by moving module_put() after ops->release().

Fixes: ef6980b6becb ("introduce IFE action")
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_ife.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 196430aefe87a..fc412769a1bef 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -400,7 +400,6 @@ static void _tcf_ife_cleanup(struct tc_action *a)
 	struct tcf_meta_info *e, *n;
 
 	list_for_each_entry_safe(e, n, &ife->metalist, metalist) {
-		module_put(e->ops->owner);
 		list_del(&e->metalist);
 		if (e->metaval) {
 			if (e->ops->release)
@@ -408,6 +407,7 @@ static void _tcf_ife_cleanup(struct tc_action *a)
 			else
 				kfree(e->metaval);
 		}
+		module_put(e->ops->owner);
 		kfree(e);
 	}
 }

From 84cb8eb26cb9ce3c79928094962a475a9d850a53 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Tue, 4 Sep 2018 00:44:42 +0300
Subject: [PATCH 204/257] net: sched: action_ife: take reference to meta module

Recent refactoring of add_metainfo() caused use_all_metadata() to add
metainfo to ife action metalist without taking reference to module. This
causes warning in module_put called from ife action cleanup function.

Implement add_metainfo_and_get_ops() function that returns with reference
to module taken if metainfo was added successfully, and call it from
use_all_metadata(), instead of calling __add_metainfo() directly.

Example warning:

[  646.344393] WARNING: CPU: 1 PID: 2278 at kernel/module.c:1139 module_put+0x1cb/0x230
[  646.352437] Modules linked in: act_meta_skbtcindex act_meta_mark act_meta_skbprio act_ife ife veth nfsv3 nfs fscache xt_CHECKSUM iptable_mangle ipt_MASQUERADE iptable_nat nf_nat_ipv4 nf_nat xt_conntrack nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 libcrc32c tun ebtable_filter ebtables ip6table_filter ip6_tables bridge stp llc mlx5_ib ib_uverbs ib_core intel_rapl sb_edac x86_pkg_temp_thermal mlx5_core coretemp kvm_intel kvm nfsd igb irqbypass crct10dif_pclmul devlink crc32_pclmul mei_me joydev ses crc32c_intel enclosure auth_rpcgss i2c_algo_bit ioatdma ptp mei pps_core ghash_clmulni_intel iTCO_wdt iTCO_vendor_support pcspkr dca ipmi_ssif lpc_ich target_core_mod i2c_i801 ipmi_si ipmi_devintf pcc_cpufreq wmi ipmi_msghandler nfs_acl lockd acpi_pad acpi_power_meter grace sunrpc mpt3sas raid_class scsi_transport_sas
[  646.425631] CPU: 1 PID: 2278 Comm: tc Not tainted 4.19.0-rc1+ #799
[  646.432187] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017
[  646.440595] RIP: 0010:module_put+0x1cb/0x230
[  646.445238] Code: f3 66 94 02 e8 26 ff fa ff 85 c0 74 11 0f b6 1d 51 30 94 02 80 fb 01 77 60 83 e3 01 74 13 65 ff 0d 3a 83 db 73 e9 2b ff ff ff <0f> 0b e9 00 ff ff ff e8 59 01 fb ff 85 c0 75 e4 48 c7 c2 20 62 6b
[  646.464997] RSP: 0018:ffff880354d37068 EFLAGS: 00010286
[  646.470599] RAX: 0000000000000000 RBX: ffffffffc0a52518 RCX: ffffffff8c2668db
[  646.478118] RDX: 0000000000000003 RSI: dffffc0000000000 RDI: ffffffffc0a52518
[  646.485641] RBP: ffffffffc0a52180 R08: fffffbfff814a4a4 R09: fffffbfff814a4a3
[  646.493164] R10: ffffffffc0a5251b R11: fffffbfff814a4a4 R12: 1ffff1006a9a6e0d
[  646.500687] R13: 00000000ffffffff R14: ffff880362bab890 R15: dead000000000100
[  646.508213] FS:  00007f4164c99800(0000) GS:ffff88036fe40000(0000) knlGS:0000000000000000
[  646.516961] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  646.523080] CR2: 00007f41638b8420 CR3: 0000000351df0004 CR4: 00000000001606e0
[  646.530595] Call Trace:
[  646.533408]  ? find_symbol_in_section+0x260/0x260
[  646.538509]  tcf_ife_cleanup+0x11b/0x200 [act_ife]
[  646.543695]  tcf_action_cleanup+0x29/0xa0
[  646.548078]  __tcf_action_put+0x5a/0xb0
[  646.552289]  ? nla_put+0x65/0xe0
[  646.555889]  __tcf_idr_release+0x48/0x60
[  646.560187]  tcf_generic_walker+0x448/0x6b0
[  646.564764]  ? tcf_action_dump_1+0x450/0x450
[  646.569411]  ? __lock_is_held+0x84/0x110
[  646.573720]  ? tcf_ife_walker+0x10c/0x20f [act_ife]
[  646.578982]  tca_action_gd+0x972/0xc40
[  646.583129]  ? tca_get_fill.constprop.17+0x250/0x250
[  646.588471]  ? mark_lock+0xcf/0x980
[  646.592324]  ? check_chain_key+0x140/0x1f0
[  646.596832]  ? debug_show_all_locks+0x240/0x240
[  646.601839]  ? memset+0x1f/0x40
[  646.605350]  ? nla_parse+0xca/0x1a0
[  646.609217]  tc_ctl_action+0x215/0x230
[  646.613339]  ? tcf_action_add+0x220/0x220
[  646.617748]  rtnetlink_rcv_msg+0x56a/0x6d0
[  646.622227]  ? rtnl_fdb_del+0x3f0/0x3f0
[  646.626466]  netlink_rcv_skb+0x18d/0x200
[  646.630752]  ? rtnl_fdb_del+0x3f0/0x3f0
[  646.634959]  ? netlink_ack+0x500/0x500
[  646.639106]  netlink_unicast+0x2d0/0x370
[  646.643409]  ? netlink_attachskb+0x340/0x340
[  646.648050]  ? _copy_from_iter_full+0xe9/0x3e0
[  646.652870]  ? import_iovec+0x11e/0x1c0
[  646.657083]  netlink_sendmsg+0x3b9/0x6a0
[  646.661388]  ? netlink_unicast+0x370/0x370
[  646.665877]  ? netlink_unicast+0x370/0x370
[  646.670351]  sock_sendmsg+0x6b/0x80
[  646.674212]  ___sys_sendmsg+0x4a1/0x520
[  646.678443]  ? copy_msghdr_from_user+0x210/0x210
[  646.683463]  ? lock_downgrade+0x320/0x320
[  646.687849]  ? debug_show_all_locks+0x240/0x240
[  646.692760]  ? do_raw_spin_unlock+0xa2/0x130
[  646.697418]  ? _raw_spin_unlock+0x24/0x30
[  646.701798]  ? __handle_mm_fault+0x1819/0x1c10
[  646.706619]  ? __pmd_alloc+0x320/0x320
[  646.710738]  ? debug_show_all_locks+0x240/0x240
[  646.715649]  ? restore_nameidata+0x7b/0xa0
[  646.720117]  ? check_chain_key+0x140/0x1f0
[  646.724590]  ? check_chain_key+0x140/0x1f0
[  646.729070]  ? __fget_light+0xbc/0xd0
[  646.733121]  ? __sys_sendmsg+0xd7/0x150
[  646.737329]  __sys_sendmsg+0xd7/0x150
[  646.741359]  ? __ia32_sys_shutdown+0x30/0x30
[  646.746003]  ? up_read+0x53/0x90
[  646.749601]  ? __do_page_fault+0x484/0x780
[  646.754105]  ? do_syscall_64+0x1e/0x2c0
[  646.758320]  do_syscall_64+0x72/0x2c0
[  646.762353]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  646.767776] RIP: 0033:0x7f4163872150
[  646.771713] Code: 8b 15 3c 7d 2b 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb cd 66 0f 1f 44 00 00 83 3d b9 d5 2b 00 00 75 10 b8 2e 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 be cd 00 00 48 89 04 24
[  646.791474] RSP: 002b:00007ffdef7d6b58 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
[  646.799721] RAX: ffffffffffffffda RBX: 0000000000000024 RCX: 00007f4163872150
[  646.807240] RDX: 0000000000000000 RSI: 00007ffdef7d6bd0 RDI: 0000000000000003
[  646.814760] RBP: 000000005b8b9482 R08: 0000000000000001 R09: 0000000000000000
[  646.822286] R10: 00000000000005e7 R11: 0000000000000246 R12: 00007ffdef7dad20
[  646.829807] R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000679bc0
[  646.837360] irq event stamp: 6083
[  646.841043] hardirqs last  enabled at (6081): [<ffffffff8c220a7d>] __call_rcu+0x17d/0x500
[  646.849882] hardirqs last disabled at (6083): [<ffffffff8c004f06>] trace_hardirqs_off_thunk+0x1a/0x1c
[  646.859775] softirqs last  enabled at (5968): [<ffffffff8d4004a1>] __do_softirq+0x4a1/0x6ee
[  646.868784] softirqs last disabled at (6082): [<ffffffffc0a78759>] tcf_ife_cleanup+0x39/0x200 [act_ife]
[  646.878845] ---[ end trace b1b8c12ffe51e657 ]---

Fixes: 5ffe57da29b3 ("act_ife: fix a potential deadlock")
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_ife.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index fc412769a1bef..06a3d48018782 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -326,6 +326,20 @@ static int __add_metainfo(const struct tcf_meta_ops *ops,
 	return ret;
 }
 
+static int add_metainfo_and_get_ops(const struct tcf_meta_ops *ops,
+				    struct tcf_ife_info *ife, u32 metaid,
+				    bool exists)
+{
+	int ret;
+
+	if (!try_module_get(ops->owner))
+		return -ENOENT;
+	ret = __add_metainfo(ops, ife, metaid, NULL, 0, true, exists);
+	if (ret)
+		module_put(ops->owner);
+	return ret;
+}
+
 static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval,
 			int len, bool exists)
 {
@@ -349,7 +363,7 @@ static int use_all_metadata(struct tcf_ife_info *ife, bool exists)
 
 	read_lock(&ife_mod_lock);
 	list_for_each_entry(o, &ifeoplist, list) {
-		rc = __add_metainfo(o, ife, o->metaid, NULL, 0, true, exists);
+		rc = add_metainfo_and_get_ops(o, ife, o->metaid, exists);
 		if (rc == 0)
 			installed += 1;
 	}

From a33710bdb6b284f8f1e24f1119d167037b374ebb Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Tue, 4 Sep 2018 04:23:56 +0200
Subject: [PATCH 205/257] net: phy: sfp: Handle unimplemented hwmon limits and
 alarms

Not all SFPs implement the registers containing sensor limits and
alarms. Luckily, there is a bit indicating if they are implemented or
not. Add checking for this bit, when deciding if the hwmon attributes
should be visible.

Fixes: 1323061a018a ("net: phy: sfp: Add HWMON support for module sensors")
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/sfp.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c
index 4637d980310e1..52fffb98fde9a 100644
--- a/drivers/net/phy/sfp.c
+++ b/drivers/net/phy/sfp.c
@@ -398,7 +398,6 @@ static umode_t sfp_hwmon_is_visible(const void *data,
 	switch (type) {
 	case hwmon_temp:
 		switch (attr) {
-		case hwmon_temp_input:
 		case hwmon_temp_min_alarm:
 		case hwmon_temp_max_alarm:
 		case hwmon_temp_lcrit_alarm:
@@ -407,13 +406,16 @@ static umode_t sfp_hwmon_is_visible(const void *data,
 		case hwmon_temp_max:
 		case hwmon_temp_lcrit:
 		case hwmon_temp_crit:
+			if (!(sfp->id.ext.enhopts & SFP_ENHOPTS_ALARMWARN))
+				return 0;
+			/* fall through */
+		case hwmon_temp_input:
 			return 0444;
 		default:
 			return 0;
 		}
 	case hwmon_in:
 		switch (attr) {
-		case hwmon_in_input:
 		case hwmon_in_min_alarm:
 		case hwmon_in_max_alarm:
 		case hwmon_in_lcrit_alarm:
@@ -422,13 +424,16 @@ static umode_t sfp_hwmon_is_visible(const void *data,
 		case hwmon_in_max:
 		case hwmon_in_lcrit:
 		case hwmon_in_crit:
+			if (!(sfp->id.ext.enhopts & SFP_ENHOPTS_ALARMWARN))
+				return 0;
+			/* fall through */
+		case hwmon_in_input:
 			return 0444;
 		default:
 			return 0;
 		}
 	case hwmon_curr:
 		switch (attr) {
-		case hwmon_curr_input:
 		case hwmon_curr_min_alarm:
 		case hwmon_curr_max_alarm:
 		case hwmon_curr_lcrit_alarm:
@@ -437,6 +442,10 @@ static umode_t sfp_hwmon_is_visible(const void *data,
 		case hwmon_curr_max:
 		case hwmon_curr_lcrit:
 		case hwmon_curr_crit:
+			if (!(sfp->id.ext.enhopts & SFP_ENHOPTS_ALARMWARN))
+				return 0;
+			/* fall through */
+		case hwmon_curr_input:
 			return 0444;
 		default:
 			return 0;
@@ -452,7 +461,6 @@ static umode_t sfp_hwmon_is_visible(const void *data,
 		    channel == 1)
 			return 0;
 		switch (attr) {
-		case hwmon_power_input:
 		case hwmon_power_min_alarm:
 		case hwmon_power_max_alarm:
 		case hwmon_power_lcrit_alarm:
@@ -461,6 +469,10 @@ static umode_t sfp_hwmon_is_visible(const void *data,
 		case hwmon_power_max:
 		case hwmon_power_lcrit:
 		case hwmon_power_crit:
+			if (!(sfp->id.ext.enhopts & SFP_ENHOPTS_ALARMWARN))
+				return 0;
+			/* fall through */
+		case hwmon_power_input:
 			return 0444;
 		default:
 			return 0;

From 2820a708d5a321342bef34e459fdc8679c30e20f Mon Sep 17 00:00:00 2001
From: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Date: Mon, 30 Jul 2018 19:26:34 +0300
Subject: [PATCH 206/257] ARC: dma [IOC] Enable per device io coherency

So far the IOC treatment was global on ARC, being turned on (or off)
for all devices in the system. With this patch, this can now be done
per device using the "dma-coherent" DT property; IOW with this patch
we can use both HW-coherent and regular DMA peripherals simultaneously.

The changes involved are too many so enlisting the summary below:

1. common code calls ARC arch_setup_dma_ops() per device.

2. For coherent dma (IOC) it plugs in generic @dma_direct_ops which
   doesn't need any arch specific backend: No need for any explicit
   cache flushes or MMU mappings to provide for uncached access

   - dma_(map|sync)_single* return early as corresponding dma ops callbacks
     are NULL in generic code.
     So arch_sync_dma_*() -> dma_cache_*() need not handle the coherent
     dma case, hence drop ARC __dma_cache_*_ioc() which were no-op anyways

3. For noncoherent dma (non IOC) generic @dma_noncoherent_ops is used
   which in turns calls ARC specific routines

   - arch_dma_alloc() no longer checks for @ioc_enable since this is
     called only for !IOC case.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
[vgupta: rewrote changelog]
---
 arch/arc/include/asm/dma-mapping.h | 13 +++++++
 arch/arc/mm/cache.c                | 23 +++++--------
 arch/arc/mm/dma.c                  | 54 ++++++++++++++++--------------
 3 files changed, 50 insertions(+), 40 deletions(-)
 create mode 100644 arch/arc/include/asm/dma-mapping.h

diff --git a/arch/arc/include/asm/dma-mapping.h b/arch/arc/include/asm/dma-mapping.h
new file mode 100644
index 0000000000000..c946c0a83e76e
--- /dev/null
+++ b/arch/arc/include/asm/dma-mapping.h
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier:  GPL-2.0
+// (C) 2018 Synopsys, Inc. (www.synopsys.com)
+
+#ifndef ASM_ARC_DMA_MAPPING_H
+#define ASM_ARC_DMA_MAPPING_H
+
+#include <asm-generic/dma-mapping.h>
+
+void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+			const struct iommu_ops *iommu, bool coherent);
+#define arch_setup_dma_ops arch_setup_dma_ops
+
+#endif
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 25c631942500f..2d389cab46ba0 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -65,7 +65,7 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len)
 
 	n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n",
 		       perip_base,
-		       IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency "));
+		       IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency (per-device) "));
 
 	return buf;
 }
@@ -896,15 +896,6 @@ static void __dma_cache_wback_slc(phys_addr_t start, unsigned long sz)
 	slc_op(start, sz, OP_FLUSH);
 }
 
-/*
- * DMA ops for systems with IOC
- * IOC hardware snoops all DMA traffic keeping the caches consistent with
- * memory - eliding need for any explicit cache maintenance of DMA buffers
- */
-static void __dma_cache_wback_inv_ioc(phys_addr_t start, unsigned long sz) {}
-static void __dma_cache_inv_ioc(phys_addr_t start, unsigned long sz) {}
-static void __dma_cache_wback_ioc(phys_addr_t start, unsigned long sz) {}
-
 /*
  * Exported DMA API
  */
@@ -1264,11 +1255,7 @@ void __init arc_cache_init_master(void)
 	if (is_isa_arcv2() && ioc_enable)
 		arc_ioc_setup();
 
-	if (is_isa_arcv2() && ioc_enable) {
-		__dma_cache_wback_inv = __dma_cache_wback_inv_ioc;
-		__dma_cache_inv = __dma_cache_inv_ioc;
-		__dma_cache_wback = __dma_cache_wback_ioc;
-	} else if (is_isa_arcv2() && l2_line_sz && slc_enable) {
+	if (is_isa_arcv2() && l2_line_sz && slc_enable) {
 		__dma_cache_wback_inv = __dma_cache_wback_inv_slc;
 		__dma_cache_inv = __dma_cache_inv_slc;
 		__dma_cache_wback = __dma_cache_wback_slc;
@@ -1277,6 +1264,12 @@ void __init arc_cache_init_master(void)
 		__dma_cache_inv = __dma_cache_inv_l1;
 		__dma_cache_wback = __dma_cache_wback_l1;
 	}
+	/*
+	 * In case of IOC (say IOC+SLC case), pointers above could still be set
+	 * but end up not being relevant as the first function in chain is not
+	 * called at all for @dma_direct_ops
+	 *     arch_sync_dma_for_cpu() -> dma_cache_*() -> __dma_cache_*()
+	 */
 }
 
 void __ref arc_cache_init(void)
diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index ec47e6079f5d0..c0b49399225dd 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -6,20 +6,17 @@
  * published by the Free Software Foundation.
  */
 
-/*
- * DMA Coherent API Notes
- *
- * I/O is inherently non-coherent on ARC. So a coherent DMA buffer is
- * implemented by accessing it using a kernel virtual address, with
- * Cache bit off in the TLB entry.
- *
- * The default DMA address == Phy address which is 0x8000_0000 based.
- */
-
 #include <linux/dma-noncoherent.h>
 #include <asm/cache.h>
 #include <asm/cacheflush.h>
 
+/*
+ * ARCH specific callbacks for generic noncoherent DMA ops (dma/noncoherent.c)
+ *  - hardware IOC not available (or "dma-coherent" not set for device in DT)
+ *  - But still handle both coherent and non-coherent requests from caller
+ *
+ * For DMA coherent hardware (IOC) generic code suffices
+ */
 void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		gfp_t gfp, unsigned long attrs)
 {
@@ -33,19 +30,7 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 	if (!page)
 		return NULL;
 
-	/*
-	 * IOC relies on all data (even coherent DMA data) being in cache
-	 * Thus allocate normal cached memory
-	 *
-	 * The gains with IOC are two pronged:
-	 *   -For streaming data, elides need for cache maintenance, saving
-	 *    cycles in flush code, and bus bandwidth as all the lines of a
-	 *    buffer need to be flushed out to memory
-	 *   -For coherent data, Read/Write to buffers terminate early in cache
-	 *   (vs. always going to memory - thus are faster)
-	 */
-	if ((is_isa_arcv2() && ioc_enable) ||
-	    (attrs & DMA_ATTR_NON_CONSISTENT))
+	if (attrs & DMA_ATTR_NON_CONSISTENT)
 		need_coh = 0;
 
 	/*
@@ -95,8 +80,7 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
 	struct page *page = virt_to_page(paddr);
 	int is_non_coh = 1;
 
-	is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT) ||
-			(is_isa_arcv2() && ioc_enable);
+	is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT);
 
 	if (PageHighMem(page) || !is_non_coh)
 		iounmap((void __force __iomem *)vaddr);
@@ -185,3 +169,23 @@ void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
 		break;
 	}
 }
+
+/*
+ * Plug in coherent or noncoherent dma ops
+ */
+void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+			const struct iommu_ops *iommu, bool coherent)
+{
+	/*
+	 * IOC hardware snoops all DMA traffic keeping the caches consistent
+	 * with memory - eliding need for any explicit cache maintenance of
+	 * DMA buffers - so we can use dma_direct cache ops.
+	 */
+	if (is_isa_arcv2() && ioc_enable && coherent) {
+		set_dma_ops(dev, &dma_direct_ops);
+		dev_info(dev, "use dma_direct_ops cache ops\n");
+	} else {
+		set_dma_ops(dev, &dma_noncoherent_ops);
+		dev_info(dev, "use dma_noncoherent_ops cache ops\n");
+	}
+}

From 2b720e99a1297417b979bf4810a0f01d27133c48 Mon Sep 17 00:00:00 2001
From: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Date: Mon, 30 Jul 2018 19:26:35 +0300
Subject: [PATCH 207/257] ARC: IOC: panic if both IOC and ZONE_HIGHMEM enabled

Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/cache.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 2d389cab46ba0..f2701c13a66b2 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -1144,6 +1144,19 @@ noinline void __init arc_ioc_setup(void)
 {
 	unsigned int ioc_base, mem_sz;
 
+	/*
+	 * As for today we don't support both IOC and ZONE_HIGHMEM enabled
+	 * simultaneously. This happens because as of today IOC aperture covers
+	 * only ZONE_NORMAL (low mem) and any dma transactions outside this
+	 * region won't be HW coherent.
+	 * If we want to use both IOC and ZONE_HIGHMEM we can use
+	 * bounce_buffer to handle dma transactions to HIGHMEM.
+	 * Also it is possible to modify dma_direct cache ops or increase IOC
+	 * aperture size if we are planning to use HIGHMEM without PAE.
+	 */
+	if (IS_ENABLED(CONFIG_HIGHMEM))
+		panic("IOC and HIGHMEM can't be used simultaneously");
+
 	/* Flush + invalidate + disable L1 dcache */
 	__dc_disable();
 

From dd45210b6dd4f1512eafcc41774154ebb762360f Mon Sep 17 00:00:00 2001
From: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Date: Mon, 30 Jul 2018 19:26:36 +0300
Subject: [PATCH 208/257] ARC: don't check for HIGHMEM pages in arch_dma_alloc

__GFP_HIGHMEM flag is cleared by upper layer functions
(in include/linux/dma-mapping.h) so we'll never get a
__GFP_HIGHMEM flag in arch_dma_alloc gfp argument.
That's why alloc_pages will never return highmem page
here.

Get rid of highmem pages handling and cleanup arch_dma_alloc
and arch_dma_free functions.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/mm/dma.c | 32 ++++++++++++++------------------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c
index c0b49399225dd..c75d5c3470e35 100644
--- a/arch/arc/mm/dma.c
+++ b/arch/arc/mm/dma.c
@@ -24,30 +24,29 @@ void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 	struct page *page;
 	phys_addr_t paddr;
 	void *kvaddr;
-	int need_coh = 1, need_kvaddr = 0;
+	bool need_coh = !(attrs & DMA_ATTR_NON_CONSISTENT);
+
+	/*
+	 * __GFP_HIGHMEM flag is cleared by upper layer functions
+	 * (in include/linux/dma-mapping.h) so we should never get a
+	 * __GFP_HIGHMEM here.
+	 */
+	BUG_ON(gfp & __GFP_HIGHMEM);
 
 	page = alloc_pages(gfp, order);
 	if (!page)
 		return NULL;
 
-	if (attrs & DMA_ATTR_NON_CONSISTENT)
-		need_coh = 0;
-
-	/*
-	 * - A coherent buffer needs MMU mapping to enforce non-cachability
-	 * - A highmem page needs a virtual handle (hence MMU mapping)
-	 *   independent of cachability
-	 */
-	if (PageHighMem(page) || need_coh)
-		need_kvaddr = 1;
-
 	/* This is linear addr (0x8000_0000 based) */
 	paddr = page_to_phys(page);
 
 	*dma_handle = paddr;
 
-	/* This is kernel Virtual address (0x7000_0000 based) */
-	if (need_kvaddr) {
+	/*
+	 * A coherent buffer needs MMU mapping to enforce non-cachability.
+	 * kvaddr is kernel Virtual address (0x7000_0000 based).
+	 */
+	if (need_coh) {
 		kvaddr = ioremap_nocache(paddr, size);
 		if (kvaddr == NULL) {
 			__free_pages(page, order);
@@ -78,11 +77,8 @@ void arch_dma_free(struct device *dev, size_t size, void *vaddr,
 {
 	phys_addr_t paddr = dma_handle;
 	struct page *page = virt_to_page(paddr);
-	int is_non_coh = 1;
-
-	is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT);
 
-	if (PageHighMem(page) || !is_non_coh)
+	if (!(attrs & DMA_ATTR_NON_CONSISTENT))
 		iounmap((void __force __iomem *)vaddr);
 
 	__free_pages(page, get_order(size));

From 3100dab2aa09dc6e082956e306fc9f81b3cc0f7a Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Tue, 4 Sep 2018 15:45:34 -0700
Subject: [PATCH 209/257] mm: memcontrol: print proper OOM header when no
 eligible victim left

When the memcg OOM killer runs out of killable tasks, it currently
prints a WARN with no further OOM context.  This has caused some user
confusion.

Warnings indicate a kernel problem.  In a reported case, however, the
situation was triggered by a nonsensical memcg configuration (hard limit
set to 0).  But without any VM context this wasn't obvious from the
report, and it took some back and forth on the mailing list to identify
what is actually a trivial issue.

Handle this OOM condition like we handle it in the global OOM killer:
dump the full OOM context and tell the user we ran out of tasks.

This way the user can identify misconfigurations easily by themselves
and rectify the problem - without having to go through the hassle of
running into an obscure but unsettling warning, finding the appropriate
kernel mailing list and waiting for a kernel developer to remote-analyze
that the memcg configuration caused this.

If users cannot make sense of why the OOM killer was triggered or why it
failed, they will still report it to the mailing list, we know that from
experience.  So in case there is an actual kernel bug causing this,
kernel developers will very likely hear about it.

Link: http://lkml.kernel.org/r/20180821160406.22578-1-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memcontrol.c |  2 --
 mm/oom_kill.c   | 13 ++++++++++---
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4ead5a4817de3..e79cb59552d9b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1701,8 +1701,6 @@ static enum oom_status mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int
 	if (mem_cgroup_out_of_memory(memcg, mask, order))
 		return OOM_SUCCESS;
 
-	WARN(1,"Memory cgroup charge failed because of no reclaimable memory! "
-		"This looks like a misconfiguration or a kernel bug.");
 	return OOM_FAILED;
 }
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index b5b25e4dcbbb7..95fbbc46f68f3 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -1103,10 +1103,17 @@ bool out_of_memory(struct oom_control *oc)
 	}
 
 	select_bad_process(oc);
-	/* Found nothing?!?! Either we hang forever, or we panic. */
-	if (!oc->chosen && !is_sysrq_oom(oc) && !is_memcg_oom(oc)) {
+	/* Found nothing?!?! */
+	if (!oc->chosen) {
 		dump_header(oc, NULL);
-		panic("Out of memory and no killable processes...\n");
+		pr_warn("Out of memory and no killable processes...\n");
+		/*
+		 * If we got here due to an actual allocation at the
+		 * system level, we cannot survive this and will enter
+		 * an endless loop in the allocator. Bail out now.
+		 */
+		if (!is_sysrq_oom(oc) && !is_memcg_oom(oc))
+			panic("System is deadlocked on memory\n");
 	}
 	if (oc->chosen && oc->chosen != (void *)-1UL)
 		oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" :

From 79cc81057eef7ad846588976296ab0f266c1a7a5 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Date: Tue, 4 Sep 2018 15:45:37 -0700
Subject: [PATCH 210/257] mm, oom: fix missing tlb_finish_mmu() in
 __oom_reap_task_mm().

Commit 93065ac753e4 ("mm, oom: distinguish blockable mode for mmu
notifiers") has added an ability to skip over vmas with blockable mmu
notifiers. This however didn't call tlb_finish_mmu as it should.

As a result inc_tlb_flush_pending has been called without its pairing
dec_tlb_flush_pending and all callers mm_tlb_flush_pending would flush
even though this is not really needed.  This alone is not harmful and it
seems there shouldn't be any such callers for oom victims at all but
there is no real reason to skip tlb_finish_mmu on early skip either so
call it.

[mhocko@suse.com: new changelog]
Link: http://lkml.kernel.org/r/b752d1d5-81ad-7a35-2394-7870641be51c@i-love.sakura.ne.jp
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/oom_kill.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 95fbbc46f68f3..f10aa5360616e 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -522,6 +522,7 @@ bool __oom_reap_task_mm(struct mm_struct *mm)
 
 			tlb_gather_mmu(&tlb, mm, start, end);
 			if (mmu_notifier_invalidate_range_start_nonblock(mm, start, end)) {
+				tlb_finish_mmu(&tlb, start, end);
 				ret = false;
 				continue;
 			}

From 1ed0cc5a01a4d868d9907ce96468c4b4c6709556 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Tue, 4 Sep 2018 15:45:41 -0700
Subject: [PATCH 211/257] mm: respect arch_dup_mmap() return value

Commit d70f2a14b72a ("include/linux/sched/mm.h: uninline mmdrop_async(),
etc") ignored the return value of arch_dup_mmap(). As a result, on x86,
a failure to duplicate the LDT (e.g. due to memory allocation error)
would leave the duplicated memory mapping in an inconsistent state.

Fix by using the return value, as it was before the change.

Link: http://lkml.kernel.org/r/20180823051229.211856-1-namit@vmware.com
Fixes: d70f2a14b72a4 ("include/linux/sched/mm.h: uninline mmdrop_async(), etc")
Signed-off-by: Nadav Amit <namit@vmware.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: <stable@vger.kernel.org>

Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 kernel/fork.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index d896e9ca38b0c..f0b58479534f0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -550,8 +550,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
 			goto out;
 	}
 	/* a new mm has just been created */
-	arch_dup_mmap(oldmm, mm);
-	retval = 0;
+	retval = arch_dup_mmap(oldmm, mm);
 out:
 	up_write(&mm->mmap_sem);
 	flush_tlb_mm(oldmm);

From b353756b2b71915e81ed41239292306622d08c9f Mon Sep 17 00:00:00 2001
From: Vincent Whitchurch <vincent.whitchurch@axis.com>
Date: Tue, 4 Sep 2018 15:45:44 -0700
Subject: [PATCH 212/257] kmemleak: always register debugfs file

If kmemleak built in to the kernel, but is disabled by default, the
debugfs file is never registered.  Because of this, it is not possible
to find out if the kernel is built with kmemleak support by checking for
the presence of this file.  To allow this, always register the file.

After this patch, if the file doesn't exist, kmemleak is not available
in the kernel.  If writing "scan" or any other value than "clear" to
this file results in EBUSY, then kmemleak is available but is disabled
by default and can be activated via the kernel command line.

Catalin: "that's also consistent with a late disabling of kmemleak when
the debugfs entry sticks around."

Link: http://lkml.kernel.org/r/20180824131220.19176-1-vincent.whitchurch@axis.com
Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/kmemleak.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 9a085d525bbce..17dd883198aea 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -2097,6 +2097,11 @@ static int __init kmemleak_late_init(void)
 
 	kmemleak_initialized = 1;
 
+	dentry = debugfs_create_file("kmemleak", 0644, NULL, NULL,
+				     &kmemleak_fops);
+	if (!dentry)
+		pr_warn("Failed to create the debugfs kmemleak file\n");
+
 	if (kmemleak_error) {
 		/*
 		 * Some error occurred and kmemleak was disabled. There is a
@@ -2108,10 +2113,6 @@ static int __init kmemleak_late_init(void)
 		return -ENOMEM;
 	}
 
-	dentry = debugfs_create_file("kmemleak", 0644, NULL, NULL,
-				     &kmemleak_fops);
-	if (!dentry)
-		pr_warn("Failed to create the debugfs kmemleak file\n");
 	mutex_lock(&scan_mutex);
 	start_scan_thread();
 	mutex_unlock(&scan_mutex);

From 904506562e0856f2535d876407d087c9459d345b Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Tue, 4 Sep 2018 15:45:48 -0700
Subject: [PATCH 213/257] tools/vm/slabinfo.c: fix sign-compare warning

Currently we get the following compiler warning:

    slabinfo.c:854:22: warning: comparison between signed and unsigned integer expressions [-Wsign-compare]
       if (s->object_size < min_objsize)
                          ^

due to the mismatch of signed/unsigned comparison.  ->object_size and
->slab_size are never expected to be negative, so let's define them as
unsigned int.

[n-horiguchi@ah.jp.nec.com: convert everything - none of these can be negative]
  Link: http://lkml.kernel.org/r/20180826234947.GA9787@hori1.linux.bs1.fc.nec.co.jp
Link: http://lkml.kernel.org/r/1535103134-20239-1-git-send-email-n-horiguchi@ah.jp.nec.com
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/vm/slabinfo.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c
index f82c2eaa859d1..334b16db0ebbe 100644
--- a/tools/vm/slabinfo.c
+++ b/tools/vm/slabinfo.c
@@ -30,8 +30,8 @@ struct slabinfo {
 	int alias;
 	int refs;
 	int aliases, align, cache_dma, cpu_slabs, destroy_by_rcu;
-	int hwcache_align, object_size, objs_per_slab;
-	int sanity_checks, slab_size, store_user, trace;
+	unsigned int hwcache_align, object_size, objs_per_slab;
+	unsigned int sanity_checks, slab_size, store_user, trace;
 	int order, poison, reclaim_account, red_zone;
 	unsigned long partial, objects, slabs, objects_partial, objects_total;
 	unsigned long alloc_fastpath, alloc_slowpath;

From 7ab660f8baecfe26c1c267fa8e64d2073feae2bb Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Date: Tue, 4 Sep 2018 15:45:51 -0700
Subject: [PATCH 214/257] tools/vm/page-types.c: fix "defined but not used"
 warning

debugfs_known_mountpoints[] is not used any more, so let's remove it.

Link: http://lkml.kernel.org/r/1535102651-19418-1-git-send-email-n-horiguchi@ah.jp.nec.com
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 tools/vm/page-types.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index 30cb0a0713fff..37908a83ddc27 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -159,12 +159,6 @@ static const char * const page_flag_names[] = {
 };
 
 
-static const char * const debugfs_known_mountpoints[] = {
-	"/sys/kernel/debug",
-	"/debug",
-	0,
-};
-
 /*
  * data structures
  */

From 04b8e946075d4582093e84f54dc1a004b227794d Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 4 Sep 2018 15:45:55 -0700
Subject: [PATCH 215/257] mm/util.c: improve kvfree() kerneldoc

Scooped from an email from Matthew.

Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/util.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/mm/util.c b/mm/util.c
index d2890a4073321..9e3ebd2ef65f8 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -435,11 +435,14 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
 EXPORT_SYMBOL(kvmalloc_node);
 
 /**
- * kvfree - free memory allocated with kvmalloc
- * @addr: pointer returned by kvmalloc
+ * kvfree() - Free memory.
+ * @addr: Pointer to allocated memory.
  *
- * If the memory is allocated from vmalloc area it is freed with vfree().
- * Otherwise kfree() is used.
+ * kvfree frees memory allocated by any of vmalloc(), kmalloc() or kvmalloc().
+ * It is slightly more efficient to use kfree() or vfree() if you are certain
+ * that you know which one to use.
+ *
+ * Context: Any context except NMI.
  */
 void kvfree(const void *addr)
 {

From 464c7ffbcb164b2e5cebfa406b7fc6cdb7945344 Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Date: Tue, 4 Sep 2018 15:45:59 -0700
Subject: [PATCH 216/257] mm/hugetlb: filter out hugetlb pages if HUGEPAGE
 migration is not supported.

When scanning for movable pages, filter out Hugetlb pages if hugepage
migration is not supported.  Without this we hit infinte loop in
__offline_pages() where we do

	pfn = scan_movable_pages(start_pfn, end_pfn);
	if (pfn) { /* We have movable pages */
		ret = do_migrate_range(pfn, end_pfn);
		goto repeat;
	}

Fix this by checking hugepage_migration_supported both in
has_unmovable_pages which is the primary backoff mechanism for page
offlining and for consistency reasons also into scan_movable_pages
because it doesn't make any sense to return a pfn to non-migrateable
huge page.

This issue was revealed by, but not caused by 72b39cfc4d75 ("mm,
memory_hotplug: do not fail offlining too early").

Link: http://lkml.kernel.org/r/20180824063314.21981-1-aneesh.kumar@linux.ibm.com
Fixes: 72b39cfc4d75 ("mm, memory_hotplug: do not fail offlining too early")
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Reported-by: Haren Myneni <haren@linux.vnet.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/memory_hotplug.c | 3 ++-
 mm/page_alloc.c     | 4 ++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 9eea6e809a4e9..38d94b703e9d4 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1333,7 +1333,8 @@ static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
 			if (__PageMovable(page))
 				return pfn;
 			if (PageHuge(page)) {
-				if (page_huge_active(page))
+				if (hugepage_migration_supported(page_hstate(page)) &&
+				    page_huge_active(page))
 					return pfn;
 				else
 					pfn = round_up(pfn + 1,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 05e983f42316a..89d2a2ab3fe68 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7708,6 +7708,10 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
 		 * handle each tail page individually in migration.
 		 */
 		if (PageHuge(page)) {
+
+			if (!hugepage_migration_supported(page_hstate(page)))
+				goto unmovable;
+
 			iter = round_up(iter + 1, 1<<compound_order(page)) - 1;
 			continue;
 		}

From 9c21dae291d1b7763b749dfaef3fb911286c0d98 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dbueso@suse.de>
Date: Tue, 4 Sep 2018 15:46:02 -0700
Subject: [PATCH 217/257] ipc/shm: properly return EIDRM in shm_lock()

When getting rid of the general ipc_lock(), this was missed furthermore,
making the comment around the ipc object validity check bogus.  Under
EIDRM conditions, callers will in turn not see the error and continue
with the operation.

Link: http://lkml.kernel.org/r/20180824030920.GD3677@linux-r8p5
Link: http://lkml.kernel.org/r/20180823024051.GC13343@shao2-debian
Fixes: 82061c57ce9 ("ipc: drop ipc_lock()")
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Reported-by: kernel test robot <rong.a.chen@intel.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 ipc/shm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ipc/shm.c b/ipc/shm.c
index b0eb3757ab895..4cd402e4cfeb6 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -199,6 +199,7 @@ static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
 	}
 
 	ipc_unlock_object(ipcp);
+	ipcp = ERR_PTR(-EIDRM);
 err:
 	rcu_read_unlock();
 	/*

From 328b5f417a4ac929e20d98b989a2babac66c8520 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 4 Sep 2018 15:46:06 -0700
Subject: [PATCH 218/257] checkpatch: add optional static const to blank line
 declarations test

Using a static const struct definition as part of a series of
declarations produces a false positive "Missing a blank line after
declarations" for code like:

  WARNING: Missing a blank line after declarations
  #710: FILE: drivers/gpu/drm/tidss/tidss_scale_coefs.c:137:
  +       int inc;
  +       static const struct {

So fix it.

Link: http://lkml.kernel.org/r/5905126e70b0ed1781e49265fd5c49c5090d0223.camel@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Reported-by: Jyri Sarha <jsarha@ti.com>
Cc: "Valkeinen, Tomi" <tomi.valkeinen@ti.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index 5219280bf7ff3..b4caee6e269cf 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -3311,7 +3311,7 @@ sub process {
 			# known declaration macros
 		      $sline =~ /^\+\s+$declaration_macros/ ||
 			# start of struct or union or enum
-		      $sline =~ /^\+\s+(?:union|struct|enum|typedef)\b/ ||
+		      $sline =~ /^\+\s+(?:static\s+)?(?:const\s+)?(?:union|struct|enum|typedef)\b/ ||
 			# start or end of block or continuation of declaration
 		      $sline =~ /^\+\s+(?:$|[\{\}\.\#\"\?\:\(\[])/ ||
 			# bitfield continuation

From 4e8346d0be889c7ab5eb2d3deedc18111d741e99 Mon Sep 17 00:00:00 2001
From: Mikhail Zaslonko <zaslonko@linux.ibm.com>
Date: Tue, 4 Sep 2018 15:46:09 -0700
Subject: [PATCH 219/257] memory_hotplug: fix kernel_panic on offline page
 processing

Within show_valid_zones() the function test_pages_in_a_zone() should be
called for online memory blocks only.

Otherwise it might lead to the VM_BUG_ON due to uninitialized struct
pages (when CONFIG_DEBUG_VM_PGFLAGS kernel option is set):

 page dumped because: VM_BUG_ON_PAGE(PagePoisoned(p))
 ------------[ cut here ]------------
 Call Trace:
 ([<000000000038f91e>] test_pages_in_a_zone+0xe6/0x168)
  [<0000000000923472>] show_valid_zones+0x5a/0x1a8
  [<0000000000900284>] dev_attr_show+0x3c/0x78
  [<000000000046f6f0>] sysfs_kf_seq_show+0xd0/0x150
  [<00000000003ef662>] seq_read+0x212/0x4b8
  [<00000000003bf202>] __vfs_read+0x3a/0x178
  [<00000000003bf3ca>] vfs_read+0x8a/0x148
  [<00000000003bfa3a>] ksys_read+0x62/0xb8
  [<0000000000bc2220>] system_call+0xdc/0x2d8

That VM_BUG_ON was triggered by the page poisoning introduced in
mm/sparse.c with the git commit d0dc12e86b31 ("mm/memory_hotplug:
optimize memory hotplug").

With the same commit the new 'nid' field has been added to the struct
memory_block in order to store and later on derive the node id for
offline pages (instead of accessing struct page which might be
uninitialized).  But one reference to nid in show_valid_zones() function
has been overlooked.  Fixed with current commit.  Also, nr_pages will
not be used any more after test_pages_in_a_zone() call, do not update
it.

Link: http://lkml.kernel.org/r/20180828090539.41491-1-zaslonko@linux.ibm.com
Fixes: d0dc12e86b31 ("mm/memory_hotplug: optimize memory hotplug")
Signed-off-by: Mikhail Zaslonko <zaslonko@linux.ibm.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Pavel Tatashin <pavel.tatashin@microsoft.com>
Cc: <stable@vger.kernel.org>	[4.17+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/memory.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index c8a1cb0b61361..817320c7c4c1b 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -416,26 +416,24 @@ static ssize_t show_valid_zones(struct device *dev,
 	struct zone *default_zone;
 	int nid;
 
-	/*
-	 * The block contains more than one zone can not be offlined.
-	 * This can happen e.g. for ZONE_DMA and ZONE_DMA32
-	 */
-	if (!test_pages_in_a_zone(start_pfn, start_pfn + nr_pages, &valid_start_pfn, &valid_end_pfn))
-		return sprintf(buf, "none\n");
-
-	start_pfn = valid_start_pfn;
-	nr_pages = valid_end_pfn - start_pfn;
-
 	/*
 	 * Check the existing zone. Make sure that we do that only on the
 	 * online nodes otherwise the page_zone is not reliable
 	 */
 	if (mem->state == MEM_ONLINE) {
+		/*
+		 * The block contains more than one zone can not be offlined.
+		 * This can happen e.g. for ZONE_DMA and ZONE_DMA32
+		 */
+		if (!test_pages_in_a_zone(start_pfn, start_pfn + nr_pages,
+					  &valid_start_pfn, &valid_end_pfn))
+			return sprintf(buf, "none\n");
+		start_pfn = valid_start_pfn;
 		strcat(buf, page_zone(pfn_to_page(start_pfn))->name);
 		goto out;
 	}
 
-	nid = pfn_to_nid(start_pfn);
+	nid = mem->nid;
 	default_zone = zone_for_pfn_range(MMOP_ONLINE_KEEP, nid, start_pfn, nr_pages);
 	strcat(buf, default_zone->name);
 

From 8a2336e549d385bb0b46880435b411df8d8200e8 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 4 Sep 2018 15:46:13 -0700
Subject: [PATCH 220/257] uapi/linux/keyctl.h: don't use C++ reserved keyword
 as a struct member name

Since this header is in "include/uapi/linux/", apparently people want to
use it in userspace programs -- even in C++ ones.  However, the header
uses a C++ reserved keyword ("private"), so change that to "dh_private"
instead to allow the header file to be used in C++ userspace.

Fixes https://bugzilla.kernel.org/show_bug.cgi?id=191051
Link: http://lkml.kernel.org/r/0db6c314-1ef4-9bfa-1baa-7214dd2ee061@infradead.org
Fixes: ddbb41148724 ("KEYS: Add KEYCTL_DH_COMPUTE command")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: David Howells <dhowells@redhat.com>
Cc: James Morris <jmorris@namei.org>
Cc: "Serge E. Hallyn" <serge@hallyn.com>
Cc: Mat Martineau <mathew.j.martineau@linux.intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/keyctl.h | 2 +-
 security/keys/dh.c          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h
index 7b8c9e19bad1c..910cc4334b215 100644
--- a/include/uapi/linux/keyctl.h
+++ b/include/uapi/linux/keyctl.h
@@ -65,7 +65,7 @@
 
 /* keyctl structures */
 struct keyctl_dh_params {
-	__s32 private;
+	__s32 dh_private;
 	__s32 prime;
 	__s32 base;
 };
diff --git a/security/keys/dh.c b/security/keys/dh.c
index 711e89d8c4153..3b602a1e27fa2 100644
--- a/security/keys/dh.c
+++ b/security/keys/dh.c
@@ -300,7 +300,7 @@ long __keyctl_dh_compute(struct keyctl_dh_params __user *params,
 	}
 	dh_inputs.g_size = dlen;
 
-	dlen = dh_data_from_key(pcopy.private, &dh_inputs.key);
+	dlen = dh_data_from_key(pcopy.dh_private, &dh_inputs.key);
 	if (dlen < 0) {
 		ret = dlen;
 		goto out2;

From 62ec0d8c4f332dedf19d6fad15ddea639044d5fe Mon Sep 17 00:00:00 2001
From: Dave Jiang <dave.jiang@intel.com>
Date: Tue, 4 Sep 2018 15:46:16 -0700
Subject: [PATCH 221/257] mm: fix BUG_ON() in vmf_insert_pfn_pud() from
 VM_MIXEDMAP removal

It looks like I missed the PUD path when doing VM_MIXEDMAP removal.
This can be triggered by:
1. Boot with memmap=4G!8G
2. build ndctl with destructive flag on
3. make TESTS=device-dax check

[  +0.000675] kernel BUG at mm/huge_memory.c:824!

Applying the same change that was applied to vmf_insert_pfn_pmd() in the
original patch.

Link: http://lkml.kernel.org/r/153565957352.35524.1005746906902065126.stgit@djiang5-desk3.ch.intel.com
Fixes: e1fb4a08649 ("dax: remove VM_MIXEDMAP for fsdax and device dax")
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Reported-by: Vishal Verma <vishal.l.verma@intel.com>
Tested-by: Vishal Verma <vishal.l.verma@intel.com>
Acked-by: Jeff Moyer <jmoyer@redhat.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 mm/huge_memory.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index c3bc7e9c9a2ac..533f9b00147d2 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -821,11 +821,11 @@ vm_fault_t vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr,
 	 * but we need to be consistent with PTEs and architectures that
 	 * can't support a 'special' bit.
 	 */
-	BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)));
+	BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) &&
+			!pfn_t_devmap(pfn));
 	BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) ==
 						(VM_PFNMAP|VM_MIXEDMAP));
 	BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags));
-	BUG_ON(!pfn_t_devmap(pfn));
 
 	if (addr < vma->vm_start || addr >= vma->vm_end)
 		return VM_FAULT_SIGBUS;

From c5967e989f1fe702e75d7405b9251ec7e490d847 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Tue, 4 Sep 2018 15:46:20 -0700
Subject: [PATCH 222/257] checkpatch: add __ro_after_init to known $Attribute

__ro_after_init is a specific __attribute__ that checkpatch does currently
not understand.

Add it to the known $Attribute types so that code that uses variables
declared with __ro_after_init are not thought to be a modifier type.

This appears as a defect in checkpatch output of code like:

static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU);
[...]
       if (trust_cpu && arch_init) {

where checkpatch reports:

ERROR: space prohibited after that '&&' (ctx:WxW)
	if (trust_cpu && arch_init) {

Link: http://lkml.kernel.org/r/0fa8a2cb83ade4c525e18261ecf6cfede3015983.camel@perches.com
Signed-off-by: Joe Perches <joe@perches.com>
Reported-by: Kees Cook <keescook@chromium.org>
Tested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 scripts/checkpatch.pl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index b4caee6e269cf..161b0224d6ae9 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -380,6 +380,7 @@ sub hash_show_words {
 			__noclone|
 			__deprecated|
 			__read_mostly|
+			__ro_after_init|
 			__kprobes|
 			$InitAttribute|
 			____cacheline_aligned|

From 4c5d114ea04d5b6c7009d46895ec26109aa654f3 Mon Sep 17 00:00:00 2001
From: Thibaut Sautereau <thibaut@sautereau.fr>
Date: Tue, 4 Sep 2018 15:46:23 -0700
Subject: [PATCH 223/257] lib/Kconfig.debug: fix three typos in help text

Fix three typos in CONFIG_WARN_ALL_UNSEEDED_RANDOM help text.

Link: http://lkml.kernel.org/r/20180830194505.4778-1-thibaut@sautereau.fr
Signed-off-by: Thibaut Sautereau <thibaut@sautereau.fr>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.debug | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 613316724c6af..4966c4fbe7f73 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1277,13 +1277,13 @@ config WARN_ALL_UNSEEDED_RANDOM
 	  time.  This is really bad from a security perspective, and
 	  so architecture maintainers really need to do what they can
 	  to get the CRNG seeded sooner after the system is booted.
-	  However, since users can not do anything actionble to
+	  However, since users cannot do anything actionable to
 	  address this, by default the kernel will issue only a single
 	  warning for the first use of unseeded randomness.
 
 	  Say Y here if you want to receive warnings for all uses of
 	  unseeded randomness.  This will be of use primarily for
-	  those developers interersted in improving the security of
+	  those developers interested in improving the security of
 	  Linux kernels running on their architecture (or
 	  subarchitecture).
 

From 36bdac1e674debd2714cb3e80eaa18266c2426e4 Mon Sep 17 00:00:00 2001
From: Souptick Joarder <jrdr.linux@gmail.com>
Date: Tue, 4 Sep 2018 15:46:26 -0700
Subject: [PATCH 224/257] drivers/dax/device.c: convert variable to vm_fault_t
 type

As part of 226ab561075f ("device-dax: Convert to vmf_insert_mixed and
vm_fault_t") in 4.19-rc1, 'rc' was not converted to vm_fault_t.  Now
converted.

Link: http://lkml.kernel.org/r/20180830153813.GA26059@jordon-HP-15-Notebook-PC
Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Ross Zwisler <zwisler@kernel.org>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/dax/device.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 6fd46083e6295..bbe4d72ca105b 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -392,7 +392,8 @@ static vm_fault_t dev_dax_huge_fault(struct vm_fault *vmf,
 {
 	struct file *filp = vmf->vma->vm_file;
 	unsigned long fault_size;
-	int rc, id;
+	vm_fault_t rc = VM_FAULT_SIGBUS;
+	int id;
 	pfn_t pfn;
 	struct dev_dax *dev_dax = filp->private_data;
 

From ae98043f5f7fa45b65084f70e3ada3209873ebb4 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Tue, 4 Sep 2018 15:46:30 -0700
Subject: [PATCH 225/257] nilfs2: convert to SPDX license tags

Remove the verbose license text from NILFS2 files and replace them with
SPDX tags.  This does not change the license of any of the code.

Link: http://lkml.kernel.org/r/1535624528-5982-1-git-send-email-konishi.ryusuke@lab.ntt.co.jp
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nilfs2/alloc.c     | 11 +----------
 fs/nilfs2/alloc.h     | 11 +----------
 fs/nilfs2/bmap.c      | 11 +----------
 fs/nilfs2/bmap.h      | 11 +----------
 fs/nilfs2/btnode.c    | 11 +----------
 fs/nilfs2/btnode.h    | 11 +----------
 fs/nilfs2/btree.c     | 11 +----------
 fs/nilfs2/btree.h     | 11 +----------
 fs/nilfs2/cpfile.c    | 11 +----------
 fs/nilfs2/cpfile.h    | 11 +----------
 fs/nilfs2/dat.c       | 11 +----------
 fs/nilfs2/dat.h       | 11 +----------
 fs/nilfs2/dir.c       | 11 +----------
 fs/nilfs2/direct.c    | 11 +----------
 fs/nilfs2/direct.h    | 11 +----------
 fs/nilfs2/file.c      | 11 +----------
 fs/nilfs2/gcinode.c   | 11 +----------
 fs/nilfs2/ifile.c     | 11 +----------
 fs/nilfs2/ifile.h     | 11 +----------
 fs/nilfs2/inode.c     | 11 +----------
 fs/nilfs2/ioctl.c     | 11 +----------
 fs/nilfs2/mdt.c       | 11 +----------
 fs/nilfs2/mdt.h       | 11 +----------
 fs/nilfs2/namei.c     | 11 +----------
 fs/nilfs2/nilfs.h     | 11 +----------
 fs/nilfs2/page.c      | 11 +----------
 fs/nilfs2/page.h      | 11 +----------
 fs/nilfs2/recovery.c  | 11 +----------
 fs/nilfs2/segbuf.c    | 11 +----------
 fs/nilfs2/segbuf.h    | 11 +----------
 fs/nilfs2/segment.c   | 11 +----------
 fs/nilfs2/segment.h   | 11 +----------
 fs/nilfs2/sufile.c    | 11 +----------
 fs/nilfs2/sufile.h    | 11 +----------
 fs/nilfs2/super.c     | 11 +----------
 fs/nilfs2/sysfs.c     | 11 +----------
 fs/nilfs2/sysfs.h     | 11 +----------
 fs/nilfs2/the_nilfs.c | 11 +----------
 fs/nilfs2/the_nilfs.h | 11 +----------
 39 files changed, 39 insertions(+), 390 deletions(-)

diff --git a/fs/nilfs2/alloc.c b/fs/nilfs2/alloc.c
index 03b8ba933eb2a..235b959fc2b3a 100644
--- a/fs/nilfs2/alloc.c
+++ b/fs/nilfs2/alloc.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * alloc.c - NILFS dat/inode allocator
  *
  * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Originally written by Koji Sato.
  * Two allocators were unified by Ryusuke Konishi and Amagai Yoshiji.
  */
diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h
index 05149e606a78a..0303c3968cee0 100644
--- a/fs/nilfs2/alloc.h
+++ b/fs/nilfs2/alloc.h
@@ -1,18 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * alloc.h - persistent object (dat entry/disk inode) allocator/deallocator
  *
  * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Originally written by Koji Sato.
  * Two allocators were unified by Ryusuke Konishi and Amagai Yoshiji.
  */
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 01fb1831ca250..fb5a9a8a13cf7 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * bmap.c - NILFS block mapping.
  *
  * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Koji Sato.
  */
 
diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h
index 2b6ffbe5997a2..2c63858e81c9c 100644
--- a/fs/nilfs2/bmap.h
+++ b/fs/nilfs2/bmap.h
@@ -1,18 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * bmap.h - NILFS block mapping.
  *
  * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Koji Sato.
  */
 
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index dec98cab729dd..ebb24a314f431 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * btnode.c - NILFS B-tree node cache
  *
  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Originally written by Seiji Kihara.
  * Fully revised by Ryusuke Konishi for stabilization and simplification.
  *
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 4e8aaa1aeb65d..0f88dbc9bcb3e 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -1,18 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * btnode.h - NILFS B-tree node cache
  *
  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Seiji Kihara.
  * Revised by Ryusuke Konishi.
  */
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index 16a7a67a11c9e..23e043eca237b 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * btree.c - NILFS B-tree.
  *
  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Koji Sato.
  */
 
diff --git a/fs/nilfs2/btree.h b/fs/nilfs2/btree.h
index 2184e47fa4bf6..d1421b646ce46 100644
--- a/fs/nilfs2/btree.h
+++ b/fs/nilfs2/btree.h
@@ -1,18 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * btree.h - NILFS B-tree.
  *
  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Koji Sato.
  */
 
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index a15a1601e931d..8d41311b5db4b 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * cpfile.c - NILFS checkpoint file.
  *
  * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Koji Sato.
  */
 
diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h
index 6eca972f9673c..6336222df24a8 100644
--- a/fs/nilfs2/cpfile.h
+++ b/fs/nilfs2/cpfile.h
@@ -1,18 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * cpfile.h - NILFS checkpoint file.
  *
  * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Koji Sato.
  */
 
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index dffedb2f88179..6f4066636be9a 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * dat.c - NILFS disk address translation.
  *
  * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Koji Sato.
  */
 
diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h
index 57dc6cf466d02..b17ee34580ae6 100644
--- a/fs/nilfs2/dat.h
+++ b/fs/nilfs2/dat.h
@@ -1,18 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * dat.h - NILFS disk address translation.
  *
  * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Koji Sato.
  */
 
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 582831ab3eb95..81394e22d0a09 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * dir.c - NILFS directory entry operations
  *
  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Modified for NILFS by Amagai Yoshiji.
  */
 /*
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index 96e3ed0d9652b..533e24ea3a88d 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * direct.c - NILFS direct block pointer.
  *
  * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Koji Sato.
  */
 
diff --git a/fs/nilfs2/direct.h b/fs/nilfs2/direct.h
index cfe85e848bba1..ec9a23c77994e 100644
--- a/fs/nilfs2/direct.h
+++ b/fs/nilfs2/direct.h
@@ -1,18 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * direct.h - NILFS direct block pointer.
  *
  * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Koji Sato.
  */
 
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 7da0fac71dc26..64bc81363c6cc 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * file.c - NILFS regular file handling primitives including fsync().
  *
  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Amagai Yoshiji and Ryusuke Konishi.
  */
 
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 853a831dcde08..aa3c328ee189c 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * gcinode.c - dummy inodes to buffer blocks for garbage collection
  *
  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Seiji Kihara, Amagai Yoshiji, and Ryusuke Konishi.
  * Revised by Ryusuke Konishi.
  *
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c
index b8fa45c20c63f..4140d232cadc0 100644
--- a/fs/nilfs2/ifile.c
+++ b/fs/nilfs2/ifile.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * ifile.c - NILFS inode file
  *
  * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Amagai Yoshiji.
  * Revised by Ryusuke Konishi.
  *
diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h
index 188b94fe0ec5f..a1e1e5711a054 100644
--- a/fs/nilfs2/ifile.h
+++ b/fs/nilfs2/ifile.h
@@ -1,18 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * ifile.h - NILFS inode file
  *
  * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Amagai Yoshiji.
  * Revised by Ryusuke Konishi.
  *
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 6a612d832e7de..671085512e0fd 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * inode.c - NILFS inode operations.
  *
  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Ryusuke Konishi.
  *
  */
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 1d2c3d7711feb..9b96d79eea6c8 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * ioctl.c - NILFS ioctl operations.
  *
  * Copyright (C) 2007, 2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Koji Sato.
  */
 
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index c6bc1033e7d2c..700870a92bc4a 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * mdt.c - meta data file for NILFS
  *
  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Ryusuke Konishi.
  */
 
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index 3f67f3932097b..e77aea4bb921c 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -1,18 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * mdt.h - NILFS meta data file prototype and definitions
  *
  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Ryusuke Konishi.
  */
 
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index dd52d3f82e8d6..9fe6d4ab74f01 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * namei.c - NILFS pathname lookup operations.
  *
  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Modified for NILFS by Amagai Yoshiji and Ryusuke Konishi.
  */
 /*
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 33f8c8fc96e8e..a2f247b6a209e 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -1,18 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * nilfs.h - NILFS local header file.
  *
  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Koji Sato and Ryusuke Konishi.
  */
 
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 4cb850a6f1c2c..329a056b73b17 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * page.c - buffer/page management specific to NILFS
  *
  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Ryusuke Konishi and Seiji Kihara.
  */
 
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
index f3687c958fa84..62b9bb469e92f 100644
--- a/fs/nilfs2/page.h
+++ b/fs/nilfs2/page.h
@@ -1,18 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * page.h - buffer/page management specific to NILFS
  *
  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Ryusuke Konishi and Seiji Kihara.
  */
 
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 5139efed18882..140b663e91c7f 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * recovery.c - NILFS recovery logic
  *
  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Ryusuke Konishi.
  */
 
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 68cb9e4740b4e..20c479b5e41b8 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * segbuf.c - NILFS segment buffer
  *
  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Ryusuke Konishi.
  *
  */
diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h
index 10e16935fff65..9bea1bd59041e 100644
--- a/fs/nilfs2/segbuf.h
+++ b/fs/nilfs2/segbuf.h
@@ -1,18 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * segbuf.h - NILFS Segment buffer prototypes and definitions
  *
  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Ryusuke Konishi.
  *
  */
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 0953635e7d48e..445eef41bfaf0 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * segment.c - NILFS segment constructor.
  *
  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Ryusuke Konishi.
  *
  */
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index 04634e3e3d583..f5cf5308f3fca 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -1,18 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * segment.h - NILFS Segment constructor prototypes and definitions
  *
  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Ryusuke Konishi.
  *
  */
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index c7fa139d50e82..bf3f8f05c89b3 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * sufile.c - NILFS segment usage file.
  *
  * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Koji Sato.
  * Revised by Ryusuke Konishi.
  */
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h
index 673a891350f49..c4e2c7a7add1d 100644
--- a/fs/nilfs2/sufile.h
+++ b/fs/nilfs2/sufile.h
@@ -1,18 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * sufile.h - NILFS segment usage file.
  *
  * Copyright (C) 2006-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Koji Sato.
  */
 
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 1b9067cf45112..26290aa1023f3 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * super.c - NILFS module and super block management.
  *
  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Ryusuke Konishi.
  */
 /*
diff --git a/fs/nilfs2/sysfs.c b/fs/nilfs2/sysfs.c
index 4b25837e77245..e60be7bb55b0b 100644
--- a/fs/nilfs2/sysfs.c
+++ b/fs/nilfs2/sysfs.c
@@ -1,19 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * sysfs.c - sysfs support implementation.
  *
  * Copyright (C) 2005-2014 Nippon Telegraph and Telephone Corporation.
  * Copyright (C) 2014 HGST, Inc., a Western Digital Company.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Vyacheslav Dubeyko <Vyacheslav.Dubeyko@hgst.com>
  */
 
diff --git a/fs/nilfs2/sysfs.h b/fs/nilfs2/sysfs.h
index 648cedf9c06ec..d001eb862daec 100644
--- a/fs/nilfs2/sysfs.h
+++ b/fs/nilfs2/sysfs.h
@@ -1,19 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * sysfs.h - sysfs support declarations.
  *
  * Copyright (C) 2005-2014 Nippon Telegraph and Telephone Corporation.
  * Copyright (C) 2014 HGST, Inc., a Western Digital Company.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Vyacheslav Dubeyko <Vyacheslav.Dubeyko@hgst.com>
  */
 
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 1a85317e83f0f..484785cdf96e2 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -1,18 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0+
 /*
  * the_nilfs.c - the_nilfs shared structure.
  *
  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Ryusuke Konishi.
  *
  */
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 36da1779f9766..380a543c5b19b 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -1,18 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
 /*
  * the_nilfs.h - the_nilfs shared structure.
  *
  * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation.
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  * Written by Ryusuke Konishi.
  *
  */

From 3350139c0ff3c95724b784f7109987d533cb3ecd Mon Sep 17 00:00:00 2001
From: Greentime Hu <greentime@andestech.com>
Date: Tue, 4 Sep 2018 14:25:57 +0800
Subject: [PATCH 226/257] nds32: linker script: GCOV kernel may refers data in
 __exit

This patch is used to fix nds32 allmodconfig/allyesconfig build error
because GCOV kernel embeds counters in the kernel for each line
and a part of that embed in __exit text. So we need to keep the
EXIT_TEXT and EXIT_DATA  if CONFIG_GCOV_KERNEL=y.

Link: https://lkml.org/lkml/2018/9/1/125
Signed-off-by: Greentime Hu <greentime@andestech.com>
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
---
 arch/nds32/kernel/vmlinux.lds.S | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/nds32/kernel/vmlinux.lds.S b/arch/nds32/kernel/vmlinux.lds.S
index 288313b886efa..9e90f30a181d7 100644
--- a/arch/nds32/kernel/vmlinux.lds.S
+++ b/arch/nds32/kernel/vmlinux.lds.S
@@ -13,14 +13,26 @@ OUTPUT_ARCH(nds32)
 ENTRY(_stext_lma)
 jiffies = jiffies_64;
 
+#if defined(CONFIG_GCOV_KERNEL)
+#define NDS32_EXIT_KEEP(x)	x
+#else
+#define NDS32_EXIT_KEEP(x)
+#endif
+
 SECTIONS
 {
 	_stext_lma = TEXTADDR - LOAD_OFFSET;
 	. = TEXTADDR;
 	__init_begin = .;
 	HEAD_TEXT_SECTION
+	.exit.text : {
+		NDS32_EXIT_KEEP(EXIT_TEXT)
+	}
 	INIT_TEXT_SECTION(PAGE_SIZE)
 	INIT_DATA_SECTION(16)
+	.exit.data : {
+		NDS32_EXIT_KEEP(EXIT_DATA)
+	}
 	PERCPU_SECTION(L1_CACHE_BYTES)
 	__init_end = .;
 

From 865e63b04e9b2a658d7f26bd13a71dcd964a9118 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Tue, 4 Sep 2018 16:26:11 -0400
Subject: [PATCH 227/257] tracing: Add back in rcu_irq_enter/exit_irqson() for
 rcuidle tracepoints

Borislav reported the following splat:

 =============================
 WARNING: suspicious RCU usage
 4.19.0-rc1+ #1 Not tainted
 -----------------------------
 ./include/linux/rcupdate.h:631 rcu_read_lock() used illegally while idle!
 other info that might help us debug this:

 RCU used illegally from idle CPU!
 rcu_scheduler_active = 2, debug_locks = 1
 RCU used illegally from extended quiescent state!
 1 lock held by swapper/0/0:
  #0: 000000004557ee0e (rcu_read_lock){....}, at: perf_event_output_forward+0x0/0x130

 stack backtrace:
 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.19.0-rc1+ #1
 Hardware name: LENOVO 2320CTO/2320CTO, BIOS G2ET86WW (2.06 ) 11/13/2012
 Call Trace:
  dump_stack+0x85/0xcb
  perf_event_output_forward+0xf6/0x130
  __perf_event_overflow+0x52/0xe0
  perf_swevent_overflow+0x91/0xb0
  perf_tp_event+0x11a/0x350
  ? find_held_lock+0x2d/0x90
  ? __lock_acquire+0x2ce/0x1350
  ? __lock_acquire+0x2ce/0x1350
  ? retint_kernel+0x2d/0x2d
  ? find_held_lock+0x2d/0x90
  ? tick_nohz_get_sleep_length+0x83/0xb0
  ? perf_trace_cpu+0xbb/0xd0
  ? perf_trace_buf_alloc+0x5a/0xa0
  perf_trace_cpu+0xbb/0xd0
  cpuidle_enter_state+0x185/0x340
  do_idle+0x1eb/0x260
  cpu_startup_entry+0x5f/0x70
  start_kernel+0x49b/0x4a6
  secondary_startup_64+0xa4/0xb0

This is due to the tracepoints moving to SRCU usage which does not require
RCU to be "watching". But perf uses these tracepoints with RCU and expects
it to be. Hence, we still need to add in the rcu_irq_enter/exit_irqson()
calls for "rcuidle" tracepoints. This is a temporary fix until we have SRCU
working in NMI context, and then perf can be converted to use that instead
of normal RCU.

Link: http://lkml.kernel.org/r/20180904162611.6a120068@gandalf.local.home

Cc: x86-ml <x86@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Reported-by: Borislav Petkov <bp@alien8.de>
Tested-by: Borislav Petkov <bp@alien8.de>
Reviewed-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Fixes: e6753f23d961d ("tracepoint: Make rcuidle tracepoint callers use SRCU")
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 7f2e16e76ac47..041f7e56a2894 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -158,8 +158,10 @@ extern void syscall_unregfunc(void);
 		 * For rcuidle callers, use srcu since sched-rcu	\
 		 * doesn't work from the idle path.			\
 		 */							\
-		if (rcuidle)						\
+		if (rcuidle) {						\
 			idx = srcu_read_lock_notrace(&tracepoint_srcu);	\
+			rcu_irq_enter_irqson();				\
+		}							\
 									\
 		it_func_ptr = rcu_dereference_raw((tp)->funcs);		\
 									\
@@ -171,8 +173,10 @@ extern void syscall_unregfunc(void);
 			} while ((++it_func_ptr)->func);		\
 		}							\
 									\
-		if (rcuidle)						\
+		if (rcuidle) {						\
+			rcu_irq_exit_irqson();				\
 			srcu_read_unlock_notrace(&tracepoint_srcu, idx);\
+		}							\
 									\
 		preempt_enable_notrace();				\
 	} while (0)

From d07f05fb86439c41dd6967c94be3ba3837b21567 Mon Sep 17 00:00:00 2001
From: Peter Robinson <pbrobinson@gmail.com>
Date: Sat, 21 Jul 2018 00:02:12 +0100
Subject: [PATCH 228/257] hwmon: rpi: add module alias to raspberrypi-hwmon

The raspberrypi-hwmon driver doesn't automatically load, although it does work
when loaded, by adding the alias it auto loads as expected when built as a
module. Tested on RPi2/RPi3 on 32 bit kernel and RPi3B+ on aarch64 with
Fedora 28 and a patched 4.18 RC kernel.

Fixes: 3c493c885cf ("hwmon: Add support for RPi voltage sensor")
Signed-off-by: Peter Robinson <pbrobinson@gmail.com>
CC: Stefan Wahren <stefan.wahren@i2se.com>
CC: Eric Anholt <eric@anholt.net>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Stefan Wahren <stefan.wahren@i2se.com>
Reviewed-by: Eric Anholt <eric@anholt.net>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/hwmon/raspberrypi-hwmon.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hwmon/raspberrypi-hwmon.c b/drivers/hwmon/raspberrypi-hwmon.c
index fb4e4a6bb1f63..be5ba46908953 100644
--- a/drivers/hwmon/raspberrypi-hwmon.c
+++ b/drivers/hwmon/raspberrypi-hwmon.c
@@ -164,3 +164,4 @@ module_platform_driver(rpi_hwmon_driver);
 MODULE_AUTHOR("Stefan Wahren <stefan.wahren@i2se.com>");
 MODULE_DESCRIPTION("Raspberry Pi voltage sensor driver");
 MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:raspberrypi-hwmon");

From 8b2ded1c94c06f841f8c1612bcfa33c85012a36b Mon Sep 17 00:00:00 2001
From: Mikulas Patocka <mpatocka@redhat.com>
Date: Wed, 5 Sep 2018 16:14:36 -0600
Subject: [PATCH 229/257] block: don't warn when doing fsync on read-only
 devices

It is possible to call fsync on a read-only handle (for example, fsck.ext2
does it when doing read-only check), and this call results in kernel
warning.

The patch b089cfd95d32 ("block: don't warn for flush on read-only device")
attempted to disable the warning, but it is buggy and it doesn't
(op_is_flush tests flags, but bio_op strips off the flags).

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Fixes: 721c7fc701c7 ("block: fail op_is_write() requests to read-only partitions")
Cc: stable@vger.kernel.org	# 4.18
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-core.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index dee56c282efb0..4dbc93f43b382 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2163,9 +2163,12 @@ static inline bool bio_check_ro(struct bio *bio, struct hd_struct *part)
 {
 	const int op = bio_op(bio);
 
-	if (part->policy && (op_is_write(op) && !op_is_flush(op))) {
+	if (part->policy && op_is_write(op)) {
 		char b[BDEVNAME_SIZE];
 
+		if (op_is_flush(bio->bi_opf) && !bio_sectors(bio))
+			return false;
+
 		WARN_ONCE(1,
 		       "generic_make_request: Trying to write "
 			"to read-only block-device %s (partno %d)\n",

From 5d128fbd8b20f8a48cb13c3eced789d1f9573ecd Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Tue, 4 Sep 2018 14:55:26 +0200
Subject: [PATCH 230/257] ACPI / bus: Only call dmi_check_system() on X86

Calling dmi_check_system() early only works on X86. Other
architectures initialize the DMI subsystem later so it's not
ready yet when ACPI itself gets initialized.

In the best case it results in a useless call to a function which
will do nothing. But depending on the dmi implementation, it could
also result in warnings. Best is to not call the function when it
can't work and isn't needed.

Additionally, if anyone ever needs to add non-x86 quirks, it would
surprisingly not work, so document the limitation to avoid confusion.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Fixes: cce4f632db20 (ACPI: fix early DSDT dmi check warnings on ia64)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/bus.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 292088fcc6245..d2e29a19890d1 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -35,11 +35,11 @@
 #include <linux/delay.h>
 #ifdef CONFIG_X86
 #include <asm/mpspec.h>
+#include <linux/dmi.h>
 #endif
 #include <linux/acpi_iort.h>
 #include <linux/pci.h>
 #include <acpi/apei.h>
-#include <linux/dmi.h>
 #include <linux/suspend.h>
 
 #include "internal.h"
@@ -82,10 +82,6 @@ static const struct dmi_system_id dsdt_dmi_table[] __initconst = {
 	},
 	{}
 };
-#else
-static const struct dmi_system_id dsdt_dmi_table[] __initconst = {
-	{}
-};
 #endif
 
 /* --------------------------------------------------------------------------
@@ -1033,11 +1029,16 @@ void __init acpi_early_init(void)
 
 	acpi_permanent_mmap = true;
 
+#ifdef CONFIG_X86
 	/*
 	 * If the machine falls into the DMI check table,
-	 * DSDT will be copied to memory
+	 * DSDT will be copied to memory.
+	 * Note that calling dmi_check_system() here on other architectures
+	 * would not be OK because only x86 initializes dmi early enough.
+	 * Thankfully only x86 systems need such quirks for now.
 	 */
 	dmi_check_system(dsdt_dmi_table);
+#endif
 
 	status = acpi_reallocate_root_table();
 	if (ACPI_FAILURE(status)) {

From f11fc4bc669b8622510c1039499f5a9d24248fec Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Mon, 3 Sep 2018 10:00:07 +0800
Subject: [PATCH 231/257] ACPI / LPSS: Force LPSS quirks on boot

Commit 12864ff8545f (ACPI / LPSS: Avoid PM quirks on suspend and resume
from hibernation) bypasses lpss quirks for S3 and S4, by setting a flag
for S3/S4 in acpi_lpss_suspend(), and check that flag in
acpi_lpss_resume().

But this overlooks the boot case where acpi_lpss_resume() may get called
without a corresponding acpi_lpss_suspend() having been called.

Thus force setting the flag during boot.

Fixes: 12864ff8545f (ACPI / LPSS: Avoid PM quirks on suspend and resume from hibernation)
Link: https://bugzilla.kernel.org/show_bug.cgi?id=200989
Reported-and-tested-by: William Lieurance <william.lieurance@namikoda.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Cc: 4.15+ <stable@vger.kernel.org> # 4.15+: 12864ff8545f (ACPI / LPSS: Avoid ...)
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_lpss.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index 9706613eecf9e..bf64cfa30febf 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -879,7 +879,7 @@ static void acpi_lpss_dismiss(struct device *dev)
 #define LPSS_GPIODEF0_DMA_LLP		BIT(13)
 
 static DEFINE_MUTEX(lpss_iosf_mutex);
-static bool lpss_iosf_d3_entered;
+static bool lpss_iosf_d3_entered = true;
 
 static void lpss_iosf_enter_d3_state(void)
 {

From 17f6bac2249356c795339e03a0742cd79be3cab8 Mon Sep 17 00:00:00 2001
From: Chuanhua Lei <chuanhua.lei@linux.intel.com>
Date: Thu, 6 Sep 2018 18:03:23 +0800
Subject: [PATCH 232/257] x86/tsc: Prevent result truncation on 32bit

Loops per jiffy is calculated by multiplying tsc_khz with 1e3 and then
dividing it by HZ.

Both tsc_khz and the temporary variable holding the multiplication result
are of type unsigned long, so on 32bit the result is truncated to the lower
32bit.

Use u64 as type for the temporary variable and cast tsc_khz to it before
multiplying.

[ tglx: Massaged changelog and removed pointless braces ]

Fixes: cf7a63ef4e02 ("x86/tsc: Calibrate tsc only once")
Signed-off-by: Chuanhua Lei <chuanhua.lei@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: yixin.zhu@linux.intel.com
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Len Brown <len.brown@intel.com>
Cc: Pavel Tatashin <pasha.tatashin@microsoft.com>
Cc: Rajvi Jingar <rajvi.jingar@intel.com>
Cc: Dou Liyang <douly.fnst@cn.fujitsu.com>
Link: https://lkml.kernel.org/r/1536228203-18701-1-git-send-email-chuanhua.lei@linux.intel.com
---
 arch/x86/kernel/tsc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 1463468ba9a0a..6490f618e0969 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1415,7 +1415,7 @@ static bool __init determine_cpu_tsc_frequencies(bool early)
 
 static unsigned long __init get_loops_per_jiffy(void)
 {
-	unsigned long lpj = tsc_khz * KHZ;
+	u64 lpj = (u64)tsc_khz * KHZ;
 
 	do_div(lpj, HZ);
 	return lpj;

From 9fe6299dde587788f245e9f7a5a1b296fad4e8c7 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Fri, 31 Aug 2018 21:41:51 +0200
Subject: [PATCH 233/257] x86/process: Don't mix user/kernel regs in 64bit
 __show_regs()

When the kernel.print-fatal-signals sysctl has been enabled, a simple
userspace crash will cause the kernel to write a crash dump that contains,
among other things, the kernel gsbase into dmesg.

As suggested by Andy, limit output to pt_regs, FS_BASE and KERNEL_GS_BASE
in this case.

This also moves the bitness-specific logic from show_regs() into
process_{32,64}.c.

Fixes: 45807a1df9f5 ("vdso: print fatal signals")
Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bpetkov@suse.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20180831194151.123586-1-jannh@google.com
---
 arch/x86/include/asm/kdebug.h | 12 +++++++++++-
 arch/x86/kernel/dumpstack.c   | 11 +++--------
 arch/x86/kernel/process_32.c  |  4 ++--
 arch/x86/kernel/process_64.c  | 12 ++++++++++--
 4 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 395c9631e000a..75f1e35e7c153 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -22,10 +22,20 @@ enum die_val {
 	DIE_NMIUNKNOWN,
 };
 
+enum show_regs_mode {
+	SHOW_REGS_SHORT,
+	/*
+	 * For when userspace crashed, but we don't think it's our fault, and
+	 * therefore don't print kernel registers.
+	 */
+	SHOW_REGS_USER,
+	SHOW_REGS_ALL
+};
+
 extern void die(const char *, struct pt_regs *,long);
 extern int __must_check __die(const char *, struct pt_regs *, long);
 extern void show_stack_regs(struct pt_regs *regs);
-extern void __show_regs(struct pt_regs *regs, int all);
+extern void __show_regs(struct pt_regs *regs, enum show_regs_mode);
 extern void show_iret_regs(struct pt_regs *regs);
 extern unsigned long oops_begin(void);
 extern void oops_end(unsigned long, struct pt_regs *, int signr);
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index f56895106ccf6..2b5886401e5f4 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -146,7 +146,7 @@ static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs,
 	 * they can be printed in the right context.
 	 */
 	if (!partial && on_stack(info, regs, sizeof(*regs))) {
-		__show_regs(regs, 0);
+		__show_regs(regs, SHOW_REGS_SHORT);
 
 	} else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
 				       IRET_FRAME_SIZE)) {
@@ -344,7 +344,7 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr)
 	oops_exit();
 
 	/* Executive summary in case the oops scrolled away */
-	__show_regs(&exec_summary_regs, true);
+	__show_regs(&exec_summary_regs, SHOW_REGS_ALL);
 
 	if (!signr)
 		return;
@@ -407,14 +407,9 @@ void die(const char *str, struct pt_regs *regs, long err)
 
 void show_regs(struct pt_regs *regs)
 {
-	bool all = true;
-
 	show_regs_print_info(KERN_DEFAULT);
 
-	if (IS_ENABLED(CONFIG_X86_32))
-		all = !user_mode(regs);
-
-	__show_regs(regs, all);
+	__show_regs(regs, user_mode(regs) ? SHOW_REGS_USER : SHOW_REGS_ALL);
 
 	/*
 	 * When in-kernel, we also print out the stack at the time of the fault..
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 2924fd447e617..5046a3c9dec2f 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -59,7 +59,7 @@
 #include <asm/intel_rdt_sched.h>
 #include <asm/proto.h>
 
-void __show_regs(struct pt_regs *regs, int all)
+void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
 {
 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
 	unsigned long d0, d1, d2, d3, d6, d7;
@@ -85,7 +85,7 @@ void __show_regs(struct pt_regs *regs, int all)
 	printk(KERN_DEFAULT "DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x EFLAGS: %08lx\n",
 	       (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss, regs->flags);
 
-	if (!all)
+	if (mode != SHOW_REGS_ALL)
 		return;
 
 	cr0 = read_cr0();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index a451bc374b9b3..ea5ea850348da 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -62,7 +62,7 @@
 __visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
 
 /* Prints also some state that isn't saved in the pt_regs */
-void __show_regs(struct pt_regs *regs, int all)
+void __show_regs(struct pt_regs *regs, enum show_regs_mode mode)
 {
 	unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
 	unsigned long d0, d1, d2, d3, d6, d7;
@@ -87,9 +87,17 @@ void __show_regs(struct pt_regs *regs, int all)
 	printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
 	       regs->r13, regs->r14, regs->r15);
 
-	if (!all)
+	if (mode == SHOW_REGS_SHORT)
 		return;
 
+	if (mode == SHOW_REGS_USER) {
+		rdmsrl(MSR_FS_BASE, fs);
+		rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
+		printk(KERN_DEFAULT "FS:  %016lx GS:  %016lx\n",
+		       fs, shadowgs);
+		return;
+	}
+
 	asm("movl %%ds,%0" : "=r" (ds));
 	asm("movl %%cs,%0" : "=r" (cs));
 	asm("movl %%es,%0" : "=r" (es));

From f8b7530aa0a1def79c93101216b5b17cf408a70a Mon Sep 17 00:00:00 2001
From: Neeraj Upadhyay <neeraju@codeaurora.org>
Date: Wed, 5 Sep 2018 11:22:07 +0530
Subject: [PATCH 234/257] cpu/hotplug: Adjust misplaced smb() in
 cpuhp_thread_fun()

The smp_mb() in cpuhp_thread_fun() is misplaced. It needs to be after the
load of st->should_run to prevent reordering of the later load/stores
w.r.t. the load of st->should_run.

Fixes: 4dddfb5faa61 ("smp/hotplug: Rewrite AP state machine core")
Signed-off-by: Neeraj Upadhyay <neeraju@codeaurora.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infraded.org>
Cc: josh@joshtriplett.org
Cc: peterz@infradead.org
Cc: jiangshanlai@gmail.com
Cc: dzickus@redhat.com
Cc: brendan.jackman@arm.com
Cc: malat@debian.org
Cc: mojha@codeaurora.org
Cc: sramana@codeaurora.org
Cc: linux-arm-msm@vger.kernel.org
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/1536126727-11629-1-git-send-email-neeraju@codeaurora.org
---
 kernel/cpu.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index aa7fe85ad62e8..eb4041f78073d 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -607,15 +607,15 @@ static void cpuhp_thread_fun(unsigned int cpu)
 	bool bringup = st->bringup;
 	enum cpuhp_state state;
 
+	if (WARN_ON_ONCE(!st->should_run))
+		return;
+
 	/*
 	 * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures
 	 * that if we see ->should_run we also see the rest of the state.
 	 */
 	smp_mb();
 
-	if (WARN_ON_ONCE(!st->should_run))
-		return;
-
 	cpuhp_lock_acquire(bringup);
 
 	if (st->single) {

From 69fa6eb7d6a64801ea261025cce9723d9442d773 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 6 Sep 2018 15:21:38 +0200
Subject: [PATCH 235/257] cpu/hotplug: Prevent state corruption on error
 rollback

When a teardown callback fails, the CPU hotplug code brings the CPU back to
the previous state. The previous state becomes the new target state. The
rollback happens in undo_cpu_down() which increments the state
unconditionally even if the state is already the same as the target.

As a consequence the next CPU hotplug operation will start at the wrong
state. This is easily to observe when __cpu_disable() fails.

Prevent the unconditional undo by checking the state vs. target before
incrementing state and fix up the consequently wrong conditional in the
unplug code which handles the failure of the final CPU take down on the
control CPU side.

Fixes: 4dddfb5faa61 ("smp/hotplug: Rewrite AP state machine core")
Reported-by: Neeraj Upadhyay <neeraju@codeaurora.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Tested-by: Sudeep Holla <sudeep.holla@arm.com>
Tested-by: Neeraj Upadhyay <neeraju@codeaurora.org>
Cc: josh@joshtriplett.org
Cc: peterz@infradead.org
Cc: jiangshanlai@gmail.com
Cc: dzickus@redhat.com
Cc: brendan.jackman@arm.com
Cc: malat@debian.org
Cc: sramana@codeaurora.org
Cc: linux-arm-msm@vger.kernel.org
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1809051419580.1416@nanos.tec.linutronix.de

----
---
 kernel/cpu.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/cpu.c b/kernel/cpu.c
index eb4041f78073d..0097acec1c717 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -916,7 +916,8 @@ static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st,
 		ret = cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL);
 		if (ret) {
 			st->target = prev_state;
-			undo_cpu_down(cpu, st);
+			if (st->state < prev_state)
+				undo_cpu_down(cpu, st);
 			break;
 		}
 	}
@@ -969,7 +970,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
 	 * to do the further cleanups.
 	 */
 	ret = cpuhp_down_callbacks(cpu, st, target);
-	if (ret && st->state > CPUHP_TEARDOWN_CPU && st->state < prev_state) {
+	if (ret && st->state == CPUHP_TEARDOWN_CPU && st->state < prev_state) {
 		cpuhp_reset_state(st, prev_state);
 		__cpuhp_kick_ap(st);
 	}

From 8aaff15168cfbc7c8980fdb0e8a585f1afe56ec0 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 24 Aug 2018 15:32:43 +0200
Subject: [PATCH 236/257] ceph: avoid a use-after-free in
 ceph_destroy_options()

syzbot reported a use-after-free in ceph_destroy_options(), called from
ceph_mount().  The problem was that create_fs_client() consumed the opt
pointer on some errors, but not on all of them.  Make sure it always
consumes both libceph and ceph options.

Reported-by: syzbot+8ab6f1042021b4eed062@syzkaller.appspotmail.com
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: "Yan, Zheng" <zyan@redhat.com>
---
 fs/ceph/super.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 43ca3b763875d..eab1359d05532 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -602,6 +602,8 @@ static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)
 
 /*
  * create a new fs client
+ *
+ * Success or not, this function consumes @fsopt and @opt.
  */
 static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
 					struct ceph_options *opt)
@@ -609,17 +611,20 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
 	struct ceph_fs_client *fsc;
 	int page_count;
 	size_t size;
-	int err = -ENOMEM;
+	int err;
 
 	fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
-	if (!fsc)
-		return ERR_PTR(-ENOMEM);
+	if (!fsc) {
+		err = -ENOMEM;
+		goto fail;
+	}
 
 	fsc->client = ceph_create_client(opt, fsc);
 	if (IS_ERR(fsc->client)) {
 		err = PTR_ERR(fsc->client);
 		goto fail;
 	}
+	opt = NULL; /* fsc->client now owns this */
 
 	fsc->client->extra_mon_dispatch = extra_mon_dispatch;
 	fsc->client->osdc.abort_on_full = true;
@@ -677,6 +682,9 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
 	ceph_destroy_client(fsc->client);
 fail:
 	kfree(fsc);
+	if (opt)
+		ceph_destroy_options(opt);
+	destroy_mount_options(fsopt);
 	return ERR_PTR(err);
 }
 
@@ -1042,8 +1050,6 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type,
 	fsc = create_fs_client(fsopt, opt);
 	if (IS_ERR(fsc)) {
 		res = ERR_CAST(fsc);
-		destroy_mount_options(fsopt);
-		ceph_destroy_options(opt);
 		goto out_final;
 	}
 

From eb3b2d6be4b5e1612827b986cca241c5d104fc41 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 22 Aug 2018 17:11:27 +0200
Subject: [PATCH 237/257] rbd: factor out get_parent_info()

In preparation for the new parent_get and parent_overlap_get class
methods, factor out the fetching and decoding of parent data.

As a side effect, we now decode all four fields in the "no parent"
case.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 drivers/block/rbd.c | 134 ++++++++++++++++++++++++++++----------------
 1 file changed, 86 insertions(+), 48 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 7915f3b03736e..bec5a50c98907 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4584,47 +4584,95 @@ static int rbd_dev_v2_features(struct rbd_device *rbd_dev)
 						&rbd_dev->header.features);
 }
 
+struct parent_image_info {
+	u64		pool_id;
+	const char	*image_id;
+	u64		snap_id;
+
+	u64		overlap;
+};
+
+/*
+ * The caller is responsible for @pii.
+ */
+static int __get_parent_info_legacy(struct rbd_device *rbd_dev,
+				    struct page *req_page,
+				    struct page *reply_page,
+				    struct parent_image_info *pii)
+{
+	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+	size_t reply_len = PAGE_SIZE;
+	void *p, *end;
+	int ret;
+
+	ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
+			     "rbd", "get_parent", CEPH_OSD_FLAG_READ,
+			     req_page, sizeof(u64), reply_page, &reply_len);
+	if (ret)
+		return ret;
+
+	p = page_address(reply_page);
+	end = p + reply_len;
+	ceph_decode_64_safe(&p, end, pii->pool_id, e_inval);
+	pii->image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
+	if (IS_ERR(pii->image_id)) {
+		ret = PTR_ERR(pii->image_id);
+		pii->image_id = NULL;
+		return ret;
+	}
+	ceph_decode_64_safe(&p, end, pii->snap_id, e_inval);
+	ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
+
+	return 0;
+
+e_inval:
+	return -EINVAL;
+}
+
+static int get_parent_info(struct rbd_device *rbd_dev,
+			   struct parent_image_info *pii)
+{
+	struct page *req_page, *reply_page;
+	void *p;
+	int ret;
+
+	req_page = alloc_page(GFP_KERNEL);
+	if (!req_page)
+		return -ENOMEM;
+
+	reply_page = alloc_page(GFP_KERNEL);
+	if (!reply_page) {
+		__free_page(req_page);
+		return -ENOMEM;
+	}
+
+	p = page_address(req_page);
+	ceph_encode_64(&p, rbd_dev->spec->snap_id);
+	ret = __get_parent_info_legacy(rbd_dev, req_page, reply_page, pii);
+
+	__free_page(req_page);
+	__free_page(reply_page);
+	return ret;
+}
+
 static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
 {
 	struct rbd_spec *parent_spec;
-	size_t size;
-	void *reply_buf = NULL;
-	__le64 snapid;
-	void *p;
-	void *end;
-	u64 pool_id;
-	char *image_id;
-	u64 snap_id;
-	u64 overlap;
+	struct parent_image_info pii = { 0 };
 	int ret;
 
 	parent_spec = rbd_spec_alloc();
 	if (!parent_spec)
 		return -ENOMEM;
 
-	size = sizeof (__le64) +				/* pool_id */
-		sizeof (__le32) + RBD_IMAGE_ID_LEN_MAX +	/* image_id */
-		sizeof (__le64) +				/* snap_id */
-		sizeof (__le64);				/* overlap */
-	reply_buf = kmalloc(size, GFP_KERNEL);
-	if (!reply_buf) {
-		ret = -ENOMEM;
+	ret = get_parent_info(rbd_dev, &pii);
+	if (ret)
 		goto out_err;
-	}
 
-	snapid = cpu_to_le64(rbd_dev->spec->snap_id);
-	ret = rbd_obj_method_sync(rbd_dev, &rbd_dev->header_oid,
-				  &rbd_dev->header_oloc, "get_parent",
-				  &snapid, sizeof(snapid), reply_buf, size);
-	dout("%s: rbd_obj_method_sync returned %d\n", __func__, ret);
-	if (ret < 0)
-		goto out_err;
+	dout("%s pool_id %llu image_id %s snap_id %llu overlap %llu\n",
+	     __func__, pii.pool_id, pii.image_id, pii.snap_id, pii.overlap);
 
-	p = reply_buf;
-	end = reply_buf + ret;
-	ret = -ERANGE;
-	ceph_decode_64_safe(&p, end, pool_id, out_err);
-	if (pool_id == CEPH_NOPOOL) {
+	if (pii.pool_id == CEPH_NOPOOL) {
 		/*
 		 * Either the parent never existed, or we have
 		 * record of it but the image got flattened so it no
@@ -4647,19 +4695,11 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
 	/* The ceph file layout needs to fit pool id in 32 bits */
 
 	ret = -EIO;
-	if (pool_id > (u64)U32_MAX) {
+	if (pii.pool_id > (u64)U32_MAX) {
 		rbd_warn(NULL, "parent pool id too large (%llu > %u)",
-			(unsigned long long)pool_id, U32_MAX);
-		goto out_err;
-	}
-
-	image_id = ceph_extract_encoded_string(&p, end, NULL, GFP_KERNEL);
-	if (IS_ERR(image_id)) {
-		ret = PTR_ERR(image_id);
+			(unsigned long long)pii.pool_id, U32_MAX);
 		goto out_err;
 	}
-	ceph_decode_64_safe(&p, end, snap_id, out_err);
-	ceph_decode_64_safe(&p, end, overlap, out_err);
 
 	/*
 	 * The parent won't change (except when the clone is
@@ -4667,9 +4707,10 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
 	 * record the parent spec we have not already done so.
 	 */
 	if (!rbd_dev->parent_spec) {
-		parent_spec->pool_id = pool_id;
-		parent_spec->image_id = image_id;
-		parent_spec->snap_id = snap_id;
+		parent_spec->pool_id = pii.pool_id;
+		parent_spec->image_id = pii.image_id;
+		pii.image_id = NULL;
+		parent_spec->snap_id = pii.snap_id;
 
 		/* TODO: support cloning across namespaces */
 		if (rbd_dev->spec->pool_ns) {
@@ -4683,15 +4724,13 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
 
 		rbd_dev->parent_spec = parent_spec;
 		parent_spec = NULL;	/* rbd_dev now owns this */
-	} else {
-		kfree(image_id);
 	}
 
 	/*
 	 * We always update the parent overlap.  If it's zero we issue
 	 * a warning, as we will proceed as if there was no parent.
 	 */
-	if (!overlap) {
+	if (!pii.overlap) {
 		if (parent_spec) {
 			/* refresh, careful to warn just once */
 			if (rbd_dev->parent_overlap)
@@ -4702,14 +4741,13 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
 			rbd_warn(rbd_dev, "clone is standalone (overlap 0)");
 		}
 	}
-	rbd_dev->parent_overlap = overlap;
+	rbd_dev->parent_overlap = pii.overlap;
 
 out:
 	ret = 0;
 out_err:
-	kfree(reply_buf);
+	kfree(pii.image_id);
 	rbd_spec_put(parent_spec);
-
 	return ret;
 }
 

From e92c0eaf754310f9f31e9229a3f7274a67478f82 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Wed, 22 Aug 2018 17:26:10 +0200
Subject: [PATCH 238/257] rbd: support cloning across namespaces

If parent_get class method is not supported by the OSDs, fall back to
the legacy class method and assume that the parent is in the default
(i.e. "") namespace.  The "use the child's image namespace" workaround
is no longer needed because creating images within namespaces will
require parent_get aware OSDs.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Reviewed-by: Jason Dillaman <dillaman@redhat.com>
---
 drivers/block/rbd.c | 111 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 97 insertions(+), 14 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index bec5a50c98907..73ed5f3a862df 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -4207,11 +4207,13 @@ static ssize_t rbd_parent_show(struct device *dev,
 
 		count += sprintf(&buf[count], "%s"
 			    "pool_id %llu\npool_name %s\n"
+			    "pool_ns %s\n"
 			    "image_id %s\nimage_name %s\n"
 			    "snap_id %llu\nsnap_name %s\n"
 			    "overlap %llu\n",
 			    !count ? "" : "\n", /* first? */
 			    spec->pool_id, spec->pool_name,
+			    spec->pool_ns ?: "",
 			    spec->image_id, spec->image_name ?: "(unknown)",
 			    spec->snap_id, spec->snap_name,
 			    rbd_dev->parent_overlap);
@@ -4586,12 +4588,89 @@ static int rbd_dev_v2_features(struct rbd_device *rbd_dev)
 
 struct parent_image_info {
 	u64		pool_id;
+	const char	*pool_ns;
 	const char	*image_id;
 	u64		snap_id;
 
+	bool		has_overlap;
 	u64		overlap;
 };
 
+/*
+ * The caller is responsible for @pii.
+ */
+static int decode_parent_image_spec(void **p, void *end,
+				    struct parent_image_info *pii)
+{
+	u8 struct_v;
+	u32 struct_len;
+	int ret;
+
+	ret = ceph_start_decoding(p, end, 1, "ParentImageSpec",
+				  &struct_v, &struct_len);
+	if (ret)
+		return ret;
+
+	ceph_decode_64_safe(p, end, pii->pool_id, e_inval);
+	pii->pool_ns = ceph_extract_encoded_string(p, end, NULL, GFP_KERNEL);
+	if (IS_ERR(pii->pool_ns)) {
+		ret = PTR_ERR(pii->pool_ns);
+		pii->pool_ns = NULL;
+		return ret;
+	}
+	pii->image_id = ceph_extract_encoded_string(p, end, NULL, GFP_KERNEL);
+	if (IS_ERR(pii->image_id)) {
+		ret = PTR_ERR(pii->image_id);
+		pii->image_id = NULL;
+		return ret;
+	}
+	ceph_decode_64_safe(p, end, pii->snap_id, e_inval);
+	return 0;
+
+e_inval:
+	return -EINVAL;
+}
+
+static int __get_parent_info(struct rbd_device *rbd_dev,
+			     struct page *req_page,
+			     struct page *reply_page,
+			     struct parent_image_info *pii)
+{
+	struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+	size_t reply_len = PAGE_SIZE;
+	void *p, *end;
+	int ret;
+
+	ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
+			     "rbd", "parent_get", CEPH_OSD_FLAG_READ,
+			     req_page, sizeof(u64), reply_page, &reply_len);
+	if (ret)
+		return ret == -EOPNOTSUPP ? 1 : ret;
+
+	p = page_address(reply_page);
+	end = p + reply_len;
+	ret = decode_parent_image_spec(&p, end, pii);
+	if (ret)
+		return ret;
+
+	ret = ceph_osdc_call(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
+			     "rbd", "parent_overlap_get", CEPH_OSD_FLAG_READ,
+			     req_page, sizeof(u64), reply_page, &reply_len);
+	if (ret)
+		return ret;
+
+	p = page_address(reply_page);
+	end = p + reply_len;
+	ceph_decode_8_safe(&p, end, pii->has_overlap, e_inval);
+	if (pii->has_overlap)
+		ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
+
+	return 0;
+
+e_inval:
+	return -EINVAL;
+}
+
 /*
  * The caller is responsible for @pii.
  */
@@ -4621,6 +4700,7 @@ static int __get_parent_info_legacy(struct rbd_device *rbd_dev,
 		return ret;
 	}
 	ceph_decode_64_safe(&p, end, pii->snap_id, e_inval);
+	pii->has_overlap = true;
 	ceph_decode_64_safe(&p, end, pii->overlap, e_inval);
 
 	return 0;
@@ -4648,7 +4728,10 @@ static int get_parent_info(struct rbd_device *rbd_dev,
 
 	p = page_address(req_page);
 	ceph_encode_64(&p, rbd_dev->spec->snap_id);
-	ret = __get_parent_info_legacy(rbd_dev, req_page, reply_page, pii);
+	ret = __get_parent_info(rbd_dev, req_page, reply_page, pii);
+	if (ret > 0)
+		ret = __get_parent_info_legacy(rbd_dev, req_page, reply_page,
+					       pii);
 
 	__free_page(req_page);
 	__free_page(reply_page);
@@ -4669,10 +4752,11 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
 	if (ret)
 		goto out_err;
 
-	dout("%s pool_id %llu image_id %s snap_id %llu overlap %llu\n",
-	     __func__, pii.pool_id, pii.image_id, pii.snap_id, pii.overlap);
+	dout("%s pool_id %llu pool_ns %s image_id %s snap_id %llu has_overlap %d overlap %llu\n",
+	     __func__, pii.pool_id, pii.pool_ns, pii.image_id, pii.snap_id,
+	     pii.has_overlap, pii.overlap);
 
-	if (pii.pool_id == CEPH_NOPOOL) {
+	if (pii.pool_id == CEPH_NOPOOL || !pii.has_overlap) {
 		/*
 		 * Either the parent never existed, or we have
 		 * record of it but the image got flattened so it no
@@ -4681,6 +4765,10 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
 		 * overlap to 0.  The effect of this is that all new
 		 * requests will be treated as if the image had no
 		 * parent.
+		 *
+		 * If !pii.has_overlap, the parent image spec is not
+		 * applicable.  It's there to avoid duplication in each
+		 * snapshot record.
 		 */
 		if (rbd_dev->parent_overlap) {
 			rbd_dev->parent_overlap = 0;
@@ -4708,20 +4796,14 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
 	 */
 	if (!rbd_dev->parent_spec) {
 		parent_spec->pool_id = pii.pool_id;
+		if (pii.pool_ns && *pii.pool_ns) {
+			parent_spec->pool_ns = pii.pool_ns;
+			pii.pool_ns = NULL;
+		}
 		parent_spec->image_id = pii.image_id;
 		pii.image_id = NULL;
 		parent_spec->snap_id = pii.snap_id;
 
-		/* TODO: support cloning across namespaces */
-		if (rbd_dev->spec->pool_ns) {
-			parent_spec->pool_ns = kstrdup(rbd_dev->spec->pool_ns,
-						       GFP_KERNEL);
-			if (!parent_spec->pool_ns) {
-				ret = -ENOMEM;
-				goto out_err;
-			}
-		}
-
 		rbd_dev->parent_spec = parent_spec;
 		parent_spec = NULL;	/* rbd_dev now owns this */
 	}
@@ -4746,6 +4828,7 @@ static int rbd_dev_v2_parent_info(struct rbd_device *rbd_dev)
 out:
 	ret = 0;
 out_err:
+	kfree(pii.pool_ns);
 	kfree(pii.image_id);
 	rbd_spec_put(parent_spec);
 	return ret;

From d1c392c9e2a301f38998a353f467f76414e38725 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 5 Sep 2018 16:29:49 -0400
Subject: [PATCH 239/257] printk/tracing: Do not trace printk_nmi_enter()

I hit the following splat in my tests:

------------[ cut here ]------------
IRQs not enabled as expected
WARNING: CPU: 3 PID: 0 at kernel/time/tick-sched.c:982 tick_nohz_idle_enter+0x44/0x8c
Modules linked in: ip6t_REJECT nf_reject_ipv6 ip6table_filter ip6_tables ipv6
CPU: 3 PID: 0 Comm: swapper/3 Not tainted 4.19.0-rc2-test+ #2
Hardware name: MSI MS-7823/CSM-H87M-G43 (MS-7823), BIOS V1.6 02/22/2014
EIP: tick_nohz_idle_enter+0x44/0x8c
Code: ec 05 00 00 00 75 26 83 b8 c0 05 00 00 00 75 1d 80 3d d0 36 3e c1 00
75 14 68 94 63 12 c1 c6 05 d0 36 3e c1 01 e8 04 ee f8 ff <0f> 0b 58 fa bb a0
e5 66 c1 e8 25 0f 04 00 64 03 1d 28 31 52 c1 8b
EAX: 0000001c EBX: f26e7f8c ECX: 00000006 EDX: 00000007
ESI: f26dd1c0 EDI: 00000000 EBP: f26e7f40 ESP: f26e7f38
DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010296
CR0: 80050033 CR2: 0813c6b0 CR3: 2f342000 CR4: 001406f0
Call Trace:
 do_idle+0x33/0x202
 cpu_startup_entry+0x61/0x63
 start_secondary+0x18e/0x1ed
 startup_32_smp+0x164/0x168
irq event stamp: 18773830
hardirqs last  enabled at (18773829): [<c040150c>] trace_hardirqs_on_thunk+0xc/0x10
hardirqs last disabled at (18773830): [<c040151c>] trace_hardirqs_off_thunk+0xc/0x10
softirqs last  enabled at (18773824): [<c0ddaa6f>] __do_softirq+0x25f/0x2bf
softirqs last disabled at (18773767): [<c0416bbe>] call_on_stack+0x45/0x4b
---[ end trace b7c64aa79e17954a ]---

After a bit of debugging, I found what was happening. This would trigger
when performing "perf" with a high NMI interrupt rate, while enabling and
disabling function tracer. Ftrace uses breakpoints to convert the nops at
the start of functions to calls to the function trampolines. The breakpoint
traps disable interrupts and this makes calls into lockdep via the
trace_hardirqs_off_thunk in the entry.S code. What happens is the following:

  do_idle {

    [interrupts enabled]

    <interrupt> [interrupts disabled]
	TRACE_IRQS_OFF [lockdep says irqs off]
	[...]
	TRACE_IRQS_IRET
	    test if pt_regs say return to interrupts enabled [yes]
	    TRACE_IRQS_ON [lockdep says irqs are on]

	    <nmi>
		nmi_enter() {
		    printk_nmi_enter() [traced by ftrace]
		    [ hit ftrace breakpoint ]
		    <breakpoint exception>
			TRACE_IRQS_OFF [lockdep says irqs off]
			[...]
			TRACE_IRQS_IRET [return from breakpoint]
			   test if pt_regs say interrupts enabled [no]
			   [iret back to interrupt]
	   [iret back to code]

    tick_nohz_idle_enter() {

	lockdep_assert_irqs_enabled() [lockdep say no!]

Although interrupts are indeed enabled, lockdep thinks it is not, and since
we now do asserts via lockdep, it gives a false warning. The issue here is
that printk_nmi_enter() is called before lockdep_off(), which disables
lockdep (for this reason) in NMIs. By simply not allowing ftrace to see
printk_nmi_enter() (via notrace annotation) we keep lockdep from getting
confused.

Cc: stable@vger.kernel.org
Fixes: 42a0bb3f71383 ("printk/nmi: generic solution for safe printk in NMI")
Acked-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Acked-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 kernel/printk/printk_safe.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c
index a0a74c533e4b7..0913b4d385de3 100644
--- a/kernel/printk/printk_safe.c
+++ b/kernel/printk/printk_safe.c
@@ -306,12 +306,12 @@ static __printf(1, 0) int vprintk_nmi(const char *fmt, va_list args)
 	return printk_safe_log_store(s, fmt, args);
 }
 
-void printk_nmi_enter(void)
+void notrace printk_nmi_enter(void)
 {
 	this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK);
 }
 
-void printk_nmi_exit(void)
+void notrace printk_nmi_exit(void)
 {
 	this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK);
 }

From 96d529bac562574600eda85726fcfa3eef6dde8e Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Thu, 6 Sep 2018 16:10:39 +0100
Subject: [PATCH 240/257] firmware: arm_scmi: fix divide by zero when
 sustained_perf_level is zero

Firmware can provide zero as values for sustained performance level and
corresponding sustained frequency in kHz in order to hide the actual
frequencies and provide only abstract values. It may endup with divide
by zero scenario resulting in kernel panic.

Let's set the multiplication factor to one if either one or both of them
(sustained_perf_level and sustained_freq) are set to zero.

Fixes: a9e3fbfaa0ff ("firmware: arm_scmi: add initial support for performance protocol")
Reported-by: Ionela Voinescu <ionela.voinescu@arm.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 drivers/firmware/arm_scmi/perf.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/arm_scmi/perf.c b/drivers/firmware/arm_scmi/perf.c
index 721e6c57beae5..64342944d9175 100644
--- a/drivers/firmware/arm_scmi/perf.c
+++ b/drivers/firmware/arm_scmi/perf.c
@@ -166,7 +166,13 @@ scmi_perf_domain_attributes_get(const struct scmi_handle *handle, u32 domain,
 					le32_to_cpu(attr->sustained_freq_khz);
 		dom_info->sustained_perf_level =
 					le32_to_cpu(attr->sustained_perf_level);
-		dom_info->mult_factor =	(dom_info->sustained_freq_khz * 1000) /
+		if (!dom_info->sustained_freq_khz ||
+		    !dom_info->sustained_perf_level)
+			/* CPUFreq converts to kHz, hence default 1000 */
+			dom_info->mult_factor =	1000;
+		else
+			dom_info->mult_factor =
+					(dom_info->sustained_freq_khz * 1000) /
 					dom_info->sustained_perf_level;
 		memcpy(dom_info->name, attr->name, SCMI_MAX_STR_SIZE);
 	}

From fac880c7d074fdfca874114b5c47b36aa034e4ee Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Wed, 5 Sep 2018 17:38:57 +0100
Subject: [PATCH 241/257] arm64: fix erroneous warnings in page freeing
 functions

In pmd_free_pte_page() and pud_free_pmd_page() we try to warn if they
hit a present non-table entry. In both cases we'll warn for non-present
entries, as the VM_WARN_ON() only checks the entry is not a table entry.

This has been observed to result in warnings when booting a v4.19-rc2
kernel under qemu.

Fix this by bailing out earlier for non-present entries.

Fixes: ec28bb9c9b0826d7 ("arm64: Implement page table free interfaces")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm64/mm/mmu.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 65f86271f02bc..8080c9f489c3e 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -985,8 +985,9 @@ int pmd_free_pte_page(pmd_t *pmdp, unsigned long addr)
 
 	pmd = READ_ONCE(*pmdp);
 
-	/* No-op for empty entry and WARN_ON for valid entry */
-	if (!pmd_present(pmd) || !pmd_table(pmd)) {
+	if (!pmd_present(pmd))
+		return 1;
+	if (!pmd_table(pmd)) {
 		VM_WARN_ON(!pmd_table(pmd));
 		return 1;
 	}
@@ -1007,8 +1008,9 @@ int pud_free_pmd_page(pud_t *pudp, unsigned long addr)
 
 	pud = READ_ONCE(*pudp);
 
-	/* No-op for empty entry and WARN_ON for valid entry */
-	if (!pud_present(pud) || !pud_table(pud)) {
+	if (!pud_present(pud))
+		return 1;
+	if (!pud_table(pud)) {
 		VM_WARN_ON(!pud_table(pud));
 		return 1;
 	}

From 6b45a2b1c0bc2aec84d1c56a1976ca9c8a621ecb Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Thu, 6 Sep 2018 14:12:19 +0200
Subject: [PATCH 242/257] memory: ti-aemif: fix a potential NULL-pointer
 dereference

Platform data pointer may be NULL. We check it everywhere but in one
place. Fix it.

Fixes: 8af70cd2ca50 ("memory: aemif: add support for board files")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Cc: stable@vger.kernel.org
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 drivers/memory/ti-aemif.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/memory/ti-aemif.c b/drivers/memory/ti-aemif.c
index 31112f622b884..475e5b3790edb 100644
--- a/drivers/memory/ti-aemif.c
+++ b/drivers/memory/ti-aemif.c
@@ -411,7 +411,7 @@ static int aemif_probe(struct platform_device *pdev)
 			if (ret < 0)
 				goto error;
 		}
-	} else {
+	} else if (pdata) {
 		for (i = 0; i < pdata->num_sub_devices; i++) {
 			pdata->sub_devices[i].dev.parent = dev;
 			ret = platform_device_register(&pdata->sub_devices[i]);

From d5274b3cd6a814ccb2f56d81ee87cbbf51bd4cf7 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Date: Thu, 6 Sep 2018 11:05:44 +0300
Subject: [PATCH 243/257] block: bfq: swap puts in bfqg_and_blkg_put

Fix trivial use-after-free. This could be last reference to bfqg.

Fixes: 8f9bebc33dd7 ("block, bfq: access and cache blkg data only when safe")
Acked-by: Paolo Valente <paolo.valente@linaro.org>
Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-cgroup.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 58c6efa9f9a99..9fe5952d117d5 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -275,9 +275,9 @@ static void bfqg_and_blkg_get(struct bfq_group *bfqg)
 
 void bfqg_and_blkg_put(struct bfq_group *bfqg)
 {
-	bfqg_put(bfqg);
-
 	blkg_put(bfqg_to_blkg(bfqg));
+
+	bfqg_put(bfqg);
 }
 
 /* @stats = 0 */

From 4cb205c0c50f613e2de91f0eb19d5247ed003e89 Mon Sep 17 00:00:00 2001
From: Jia He <jia.he@hxt-semitech.com>
Date: Tue, 28 Aug 2018 12:53:26 +0800
Subject: [PATCH 244/257] irqchip/gic-v3-its: Cap lpi_id_bits to reduce memory
 footprint

Commit fe8e93504ce8 ("irqchip/gic-v3-its: Use full range of LPIs"), removes
the cap for lpi_id_bits, which causes the following warning to trigger on a
QDF2400 server:

 WARNING: CPU: 0 PID: 0 at mm/page_alloc.c:4066  __alloc_pages_nodemask
 ...
 Call trace:
  __alloc_pages_nodemask+0x2d8/0x1188
  alloc_pages_current+0x8c/0xd8
  its_allocate_prop_table+0x5c/0xb8
  its_init+0x220/0x3c0
  gic_init_bases+0x250/0x380
  gic_acpi_init+0x16c/0x2a4

In its_alloc_lpi_tables(), lpi_id_bits is 24 in QDF2400. The allocation in
allocate_prop_table() tries therefore to allocate 16M (order 12 if
pagesize=4k), which triggers the warning.

As said by MarcL

 Capping lpi_id_bits at 16 (which is what we had before) is plenty,
 will save a some memory, and gives some margin before we need to push
 it up again.

Bring the upper limit of lpi_id_bits back to prevent

Fixes: fe8e93504ce8 ("irqchip/gic-v3-its: Use full range of LPIs")
Suggested-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Jia He <jia.he@hxt-semitech.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Tested-by: Olof Johansson <olof@lixom.net>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: linux-arm-kernel@lists.infradead.org
Link: https://lkml.kernel.org/r/1535432006-2304-1-git-send-email-jia.he@hxt-semitech.com
---
 drivers/irqchip/irq-gic-v3-its.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 316a57530f6d1..c2df341ff6faf 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -1439,6 +1439,7 @@ static struct irq_chip its_irq_chip = {
  * The consequence of the above is that allocation is cost is low, but
  * freeing is expensive. We assumes that freeing rarely occurs.
  */
+#define ITS_MAX_LPI_NRBITS	16 /* 64K LPIs */
 
 static DEFINE_MUTEX(lpi_range_lock);
 static LIST_HEAD(lpi_range_list);
@@ -1625,7 +1626,8 @@ static int __init its_alloc_lpi_tables(void)
 {
 	phys_addr_t paddr;
 
-	lpi_id_bits = GICD_TYPER_ID_BITS(gic_rdists->gicd_typer);
+	lpi_id_bits = min_t(u32, GICD_TYPER_ID_BITS(gic_rdists->gicd_typer),
+				ITS_MAX_LPI_NRBITS);
 	gic_rdists->prop_page = its_allocate_prop_table(GFP_NOWAIT);
 	if (!gic_rdists->prop_page) {
 		pr_err("Failed to allocate PROPBASE\n");

From ae7304c3ea28a3ba47a7a8312c76c654ef24967e Mon Sep 17 00:00:00 2001
From: Shubhrajyoti Datta <shubhrajyoti.datta@xilinx.com>
Date: Mon, 3 Sep 2018 15:11:11 +0530
Subject: [PATCH 245/257] i2c: xiic: Make the start and the byte count write
 atomic

Disable interrupts while configuring the transfer and enable them back.

We have below as the programming sequence
1. start and slave address
2. byte count and stop

In some customer platform there was a lot of interrupts between 1 and 2
and after slave address (around 7 clock cyles) if 2 is not executed
then the transaction is nacked.

To fix this case make the 2 writes atomic.

Signed-off-by: Shubhrajyoti Datta <shubhrajyoti.datta@xilinx.com>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
[wsa: added a newline for better readability]
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Cc: stable@kernel.org
---
 drivers/i2c/busses/i2c-xiic.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c
index 9a71e50d21f1f..0c51c0ffdda9d 100644
--- a/drivers/i2c/busses/i2c-xiic.c
+++ b/drivers/i2c/busses/i2c-xiic.c
@@ -532,6 +532,7 @@ static void xiic_start_recv(struct xiic_i2c *i2c)
 {
 	u8 rx_watermark;
 	struct i2c_msg *msg = i2c->rx_msg = i2c->tx_msg;
+	unsigned long flags;
 
 	/* Clear and enable Rx full interrupt. */
 	xiic_irq_clr_en(i2c, XIIC_INTR_RX_FULL_MASK | XIIC_INTR_TX_ERROR_MASK);
@@ -547,6 +548,7 @@ static void xiic_start_recv(struct xiic_i2c *i2c)
 		rx_watermark = IIC_RX_FIFO_DEPTH;
 	xiic_setreg8(i2c, XIIC_RFD_REG_OFFSET, rx_watermark - 1);
 
+	local_irq_save(flags);
 	if (!(msg->flags & I2C_M_NOSTART))
 		/* write the address */
 		xiic_setreg16(i2c, XIIC_DTR_REG_OFFSET,
@@ -556,6 +558,8 @@ static void xiic_start_recv(struct xiic_i2c *i2c)
 
 	xiic_setreg16(i2c, XIIC_DTR_REG_OFFSET,
 		msg->len | ((i2c->nmsgs == 1) ? XIIC_TX_DYN_STOP_MASK : 0));
+	local_irq_restore(flags);
+
 	if (i2c->nmsgs == 1)
 		/* very last, enable bus not busy as well */
 		xiic_irq_clr_en(i2c, XIIC_INTR_BNB_MASK);

From e2c631ba75a7e727e8db0a9d30a06bfd434adb3a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 5 Sep 2018 10:41:58 +0200
Subject: [PATCH 246/257] clocksource: Revert "Remove kthread"

I turns out that the silly spawn kthread from worker was actually needed.

clocksource_watchdog_kthread() cannot be called directly from
clocksource_watchdog_work(), because clocksource_select() calls
timekeeping_notify() which uses stop_machine(). One cannot use
stop_machine() from a workqueue() due lock inversions wrt CPU hotplug.

Revert the patch but add a comment that explain why we jump through such
apparently silly hoops.

Fixes: 7197e77abcb6 ("clocksource: Remove kthread")
Reported-by: Siegfried Metz <frame@mailbox.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Niklas Cassel <niklas.cassel@linaro.org>
Tested-by: Kevin Shanahan <kevin@shanahan.id.au>
Tested-by: viktor_jaegerskuepper@freenet.de
Tested-by: Siegfried Metz <frame@mailbox.org>
Cc: rafael.j.wysocki@intel.com
Cc: len.brown@intel.com
Cc: diego.viola@gmail.com
Cc: rui.zhang@intel.com
Cc: bjorn.andersson@linaro.org
Link: https://lkml.kernel.org/r/20180905084158.GR24124@hirez.programming.kicks-ass.net
---
 kernel/time/clocksource.c | 40 +++++++++++++++++++++++++++++----------
 1 file changed, 30 insertions(+), 10 deletions(-)

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index f74fb00d80644..0e6e97a01942d 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -133,19 +133,40 @@ static void inline clocksource_watchdog_unlock(unsigned long *flags)
 	spin_unlock_irqrestore(&watchdog_lock, *flags);
 }
 
+static int clocksource_watchdog_kthread(void *data);
+static void __clocksource_change_rating(struct clocksource *cs, int rating);
+
 /*
  * Interval: 0.5sec Threshold: 0.0625s
  */
 #define WATCHDOG_INTERVAL (HZ >> 1)
 #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
 
+static void clocksource_watchdog_work(struct work_struct *work)
+{
+	/*
+	 * We cannot directly run clocksource_watchdog_kthread() here, because
+	 * clocksource_select() calls timekeeping_notify() which uses
+	 * stop_machine(). One cannot use stop_machine() from a workqueue() due
+	 * lock inversions wrt CPU hotplug.
+	 *
+	 * Also, we only ever run this work once or twice during the lifetime
+	 * of the kernel, so there is no point in creating a more permanent
+	 * kthread for this.
+	 *
+	 * If kthread_run fails the next watchdog scan over the
+	 * watchdog_list will find the unstable clock again.
+	 */
+	kthread_run(clocksource_watchdog_kthread, NULL, "kwatchdog");
+}
+
 static void __clocksource_unstable(struct clocksource *cs)
 {
 	cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
 	cs->flags |= CLOCK_SOURCE_UNSTABLE;
 
 	/*
-	 * If the clocksource is registered clocksource_watchdog_work() will
+	 * If the clocksource is registered clocksource_watchdog_kthread() will
 	 * re-rate and re-select.
 	 */
 	if (list_empty(&cs->list)) {
@@ -156,7 +177,7 @@ static void __clocksource_unstable(struct clocksource *cs)
 	if (cs->mark_unstable)
 		cs->mark_unstable(cs);
 
-	/* kick clocksource_watchdog_work() */
+	/* kick clocksource_watchdog_kthread() */
 	if (finished_booting)
 		schedule_work(&watchdog_work);
 }
@@ -166,7 +187,7 @@ static void __clocksource_unstable(struct clocksource *cs)
  * @cs:		clocksource to be marked unstable
  *
  * This function is called by the x86 TSC code to mark clocksources as unstable;
- * it defers demotion and re-selection to a work.
+ * it defers demotion and re-selection to a kthread.
  */
 void clocksource_mark_unstable(struct clocksource *cs)
 {
@@ -391,9 +412,7 @@ static void clocksource_dequeue_watchdog(struct clocksource *cs)
 	}
 }
 
-static void __clocksource_change_rating(struct clocksource *cs, int rating);
-
-static int __clocksource_watchdog_work(void)
+static int __clocksource_watchdog_kthread(void)
 {
 	struct clocksource *cs, *tmp;
 	unsigned long flags;
@@ -418,12 +437,13 @@ static int __clocksource_watchdog_work(void)
 	return select;
 }
 
-static void clocksource_watchdog_work(struct work_struct *work)
+static int clocksource_watchdog_kthread(void *data)
 {
 	mutex_lock(&clocksource_mutex);
-	if (__clocksource_watchdog_work())
+	if (__clocksource_watchdog_kthread())
 		clocksource_select();
 	mutex_unlock(&clocksource_mutex);
+	return 0;
 }
 
 static bool clocksource_is_watchdog(struct clocksource *cs)
@@ -442,7 +462,7 @@ static void clocksource_enqueue_watchdog(struct clocksource *cs)
 static void clocksource_select_watchdog(bool fallback) { }
 static inline void clocksource_dequeue_watchdog(struct clocksource *cs) { }
 static inline void clocksource_resume_watchdog(void) { }
-static inline int __clocksource_watchdog_work(void) { return 0; }
+static inline int __clocksource_watchdog_kthread(void) { return 0; }
 static bool clocksource_is_watchdog(struct clocksource *cs) { return false; }
 void clocksource_mark_unstable(struct clocksource *cs) { }
 
@@ -810,7 +830,7 @@ static int __init clocksource_done_booting(void)
 	/*
 	 * Run the watchdog first to eliminate unstable clock sources
 	 */
-	__clocksource_watchdog_work();
+	__clocksource_watchdog_kthread();
 	clocksource_select();
 	mutex_unlock(&clocksource_mutex);
 	return 0;

From da4dfaf8428d9f71e2ac4f736bacb81adab36504 Mon Sep 17 00:00:00 2001
From: Michal Simek <michal.simek@xilinx.com>
Date: Fri, 7 Sep 2018 08:02:05 +0200
Subject: [PATCH 247/257] i2c: xiic: Record xilinx i2c with Zynq fragment

Include xilinx soft i2c controller to Zynq fragment to make clear who is
responsible for it.

Signed-off-by: Michal Simek <michal.simek@xilinx.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 9ad052aeac39b..d870cb57c887a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2311,6 +2311,7 @@ F:	drivers/clocksource/cadence_ttc_timer.c
 F:	drivers/i2c/busses/i2c-cadence.c
 F:	drivers/mmc/host/sdhci-of-arasan.c
 F:	drivers/edac/synopsys_edac.c
+F:	drivers/i2c/busses/i2c-xiic.c
 
 ARM64 PORT (AARCH64 ARCHITECTURE)
 M:	Catalin Marinas <catalin.marinas@arm.com>

From 694556d54f354d3fe43bb2e61fd6103cca2638a4 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 23 Aug 2018 09:58:27 +0100
Subject: [PATCH 248/257] KVM: arm/arm64: Clean dcache to PoC when changing PTE
 due to CoW

When triggering a CoW, we unmap the RO page via an MMU notifier
(invalidate_range_start), and then populate the new PTE using another
one (change_pte). In the meantime, we'll have copied the old page
into the new one.

The problem is that the data for the new page is sitting in the
cache, and should the guest have an uncached mapping to that page
(or its MMU off), following accesses will bypass the cache.

In a way, this is similar to what happens on a translation fault:
We need to clean the page to the PoC before mapping it. So let's just
do that.

This fixes a KVM unit test regression observed on a HiSilicon platform,
and subsequently reproduced on Seattle.

Fixes: a9c0e12ebee5 ("KVM: arm/arm64: Only clean the dcache on translation fault")
Cc: stable@vger.kernel.org # v4.16+
Reported-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@arm.com>
---
 virt/kvm/arm/mmu.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 91aaf73b00df8..111a660be3bee 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1860,13 +1860,20 @@ static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
 {
 	unsigned long end = hva + PAGE_SIZE;
+	kvm_pfn_t pfn = pte_pfn(pte);
 	pte_t stage2_pte;
 
 	if (!kvm->arch.pgd)
 		return;
 
 	trace_kvm_set_spte_hva(hva);
-	stage2_pte = pfn_pte(pte_pfn(pte), PAGE_S2);
+
+	/*
+	 * We've moved a page around, probably through CoW, so let's treat it
+	 * just like a translation fault and clean the cache to the PoC.
+	 */
+	clean_dcache_guest_page(pfn, PAGE_SIZE);
+	stage2_pte = pfn_pte(pfn, PAGE_S2);
 	handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
 }
 

From 7d14919c0d475a795c0127631ac8ecb2b0f31831 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 23 Aug 2018 11:51:43 +0100
Subject: [PATCH 249/257] arm64: KVM: Only force FPEXC32_EL2.EN if trapping
 FPSIMD

If trapping FPSIMD in the context of an AArch32 guest, it is critical
to set FPEXC32_EL2.EN to 1 so that the trapping is taken to EL2 and
not EL1.

Conversely, it is just as critical *not* to set FPEXC32_EL2.EN to 1
if we're not going to trap FPSIMD, as we then corrupt the existing
VFP state.

Moving the call to __activate_traps_fpsimd32 to the point where we
know for sure that we are going to trap ensures that we don't set that
bit spuriously.

Fixes: e6b673b741ea ("KVM: arm64: Optimise FPSIMD handling to reduce guest/host thrashing")
Cc: stable@vger.kernel.org # v4.18
Cc: Dave Martin <dave.martin@arm.com>
Reported-by: Alexander Graf <agraf@suse.de>
Tested-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@arm.com>
---
 arch/arm64/kvm/hyp/switch.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index d496ef5798596..ca46153d79154 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -98,8 +98,10 @@ static void activate_traps_vhe(struct kvm_vcpu *vcpu)
 	val = read_sysreg(cpacr_el1);
 	val |= CPACR_EL1_TTA;
 	val &= ~CPACR_EL1_ZEN;
-	if (!update_fp_enabled(vcpu))
+	if (!update_fp_enabled(vcpu)) {
 		val &= ~CPACR_EL1_FPEN;
+		__activate_traps_fpsimd32(vcpu);
+	}
 
 	write_sysreg(val, cpacr_el1);
 
@@ -114,8 +116,10 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
 
 	val = CPTR_EL2_DEFAULT;
 	val |= CPTR_EL2_TTA | CPTR_EL2_TZ;
-	if (!update_fp_enabled(vcpu))
+	if (!update_fp_enabled(vcpu)) {
 		val |= CPTR_EL2_TFP;
+		__activate_traps_fpsimd32(vcpu);
+	}
 
 	write_sysreg(val, cptr_el2);
 }
@@ -129,7 +133,6 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
 	if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
 		write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
 
-	__activate_traps_fpsimd32(vcpu);
 	if (has_vhe())
 		activate_traps_vhe(vcpu);
 	else

From a35381e10dc46dd75e65e4b3832d9a0005d48d44 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Thu, 23 Aug 2018 10:18:14 +0100
Subject: [PATCH 250/257] KVM: Remove obsolete kvm_unmap_hva notifier backend

kvm_unmap_hva is long gone, and we only have kvm_unmap_hva_range to
deal with. Drop the now obsolete code.

Fixes: fb1522e099f0 ("KVM: update to new mmu_notifier semantic v2")
Cc: James Hogan <jhogan@kernel.org>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@arm.com>
---
 arch/arm/include/asm/kvm_host.h   |  1 -
 arch/arm64/include/asm/kvm_host.h |  1 -
 arch/mips/include/asm/kvm_host.h  |  1 -
 arch/mips/kvm/mmu.c               | 10 ----------
 arch/x86/include/asm/kvm_host.h   |  1 -
 arch/x86/kvm/mmu.c                |  5 -----
 virt/kvm/arm/mmu.c                | 12 ------------
 virt/kvm/arm/trace.h              | 15 ---------------
 8 files changed, 46 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 79906cecb091e..3ad482d2f1eb9 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -223,7 +223,6 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
 			      struct kvm_vcpu_events *events);
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm,
 			unsigned long start, unsigned long end);
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index f26055f2306e1..8e6d46df38aa0 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -357,7 +357,6 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
 			      struct kvm_vcpu_events *events);
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm,
 			unsigned long start, unsigned long end);
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index a9af1d2dcd699..2c1c53d121793 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -931,7 +931,6 @@ enum kvm_mips_fault_result kvm_trap_emul_gva_fault(struct kvm_vcpu *vcpu,
 						   bool write);
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm,
 			unsigned long start, unsigned long end);
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index ee64db0327933..d8dcdb3504059 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -512,16 +512,6 @@ static int kvm_unmap_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
 	return 1;
 }
 
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
-{
-	unsigned long end = hva + PAGE_SIZE;
-
-	handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL);
-
-	kvm_mips_callbacks->flush_shadow_all(kvm);
-	return 0;
-}
-
 int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
 {
 	handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 00ddb0c9e612a..e6a33420b8712 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1450,7 +1450,6 @@ asmlinkage void kvm_spurious_fault(void);
 	____kvm_handle_fault_on_reboot(insn, "")
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
 int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a282321329b51..d440154e8938c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1853,11 +1853,6 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 	return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
 }
 
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
-{
-	return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
-}
-
 int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
 {
 	return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c
index 111a660be3bee..ed162a6c57c59 100644
--- a/virt/kvm/arm/mmu.c
+++ b/virt/kvm/arm/mmu.c
@@ -1817,18 +1817,6 @@ static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *dat
 	return 0;
 }
 
-int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
-{
-	unsigned long end = hva + PAGE_SIZE;
-
-	if (!kvm->arch.pgd)
-		return 0;
-
-	trace_kvm_unmap_hva(hva);
-	handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL);
-	return 0;
-}
-
 int kvm_unmap_hva_range(struct kvm *kvm,
 			unsigned long start, unsigned long end)
 {
diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h
index e53b596f483b9..57b3edebbb404 100644
--- a/virt/kvm/arm/trace.h
+++ b/virt/kvm/arm/trace.h
@@ -134,21 +134,6 @@ TRACE_EVENT(kvm_mmio_emulate,
 		  __entry->vcpu_pc, __entry->instr, __entry->cpsr)
 );
 
-TRACE_EVENT(kvm_unmap_hva,
-	TP_PROTO(unsigned long hva),
-	TP_ARGS(hva),
-
-	TP_STRUCT__entry(
-		__field(	unsigned long,	hva		)
-	),
-
-	TP_fast_assign(
-		__entry->hva		= hva;
-	),
-
-	TP_printk("mmu notifier unmap hva: %#08lx", __entry->hva)
-);
-
 TRACE_EVENT(kvm_unmap_hva_range,
 	TP_PROTO(unsigned long start, unsigned long end),
 	TP_ARGS(start, end),

From df3190e22016abf74ef67c9691e9fa1012a66bd5 Mon Sep 17 00:00:00 2001
From: Steven Price <steven.price@arm.com>
Date: Mon, 13 Aug 2018 17:04:53 +0100
Subject: [PATCH 251/257] arm64: KVM: Remove pgd_lock

The lock has never been used and the page tables are protected by
mmu_lock in struct kvm.

Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@arm.com>
---
 arch/arm64/include/asm/kvm_host.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 8e6d46df38aa0..3d6d7336f8712 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -61,8 +61,7 @@ struct kvm_arch {
 	u64    vmid_gen;
 	u32    vmid;
 
-	/* 1-level 2nd stage table and lock */
-	spinlock_t pgd_lock;
+	/* 1-level 2nd stage table, protected by kvm->mmu_lock */
 	pgd_t *pgd;
 
 	/* VTTBR value associated with above pgd and vmid */

From b5861e5cf2fcf83031ea3e26b0a69d887adf7d21 Mon Sep 17 00:00:00 2001
From: Liran Alon <liran.alon@oracle.com>
Date: Mon, 3 Sep 2018 15:20:22 +0300
Subject: [PATCH 252/257] KVM: nVMX: Fix loss of pending IRQ/NMI before
 entering L2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Consider the case L1 had a IRQ/NMI event until it executed
VMLAUNCH/VMRESUME which wasn't delivered because it was disallowed
(e.g. interrupts disabled). When L1 executes VMLAUNCH/VMRESUME,
L0 needs to evaluate if this pending event should cause an exit from
L2 to L1 or delivered directly to L2 (e.g. In case L1 don't intercept
EXTERNAL_INTERRUPT).

Usually this would be handled by L0 requesting a IRQ/NMI window
by setting VMCS accordingly. However, this setting was done on
VMCS01 and now VMCS02 is active instead. Thus, when L1 executes
VMLAUNCH/VMRESUME we force L0 to perform pending event evaluation by
requesting a KVM_REQ_EVENT.

Note that above scenario exists when L1 KVM is about to enter L2 but
requests an "immediate-exit". As in this case, L1 will
disable-interrupts and then send a self-IPI before entering L2.

Reviewed-by: Nikita Leshchenko <nikita.leshchenko@oracle.com>
Co-developed-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Liran Alon <liran.alon@oracle.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/vmx.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f910d33858d95..533a327372c87 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -12537,8 +12537,11 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 	bool from_vmentry = !!exit_qual;
 	u32 dummy_exit_qual;
+	u32 vmcs01_cpu_exec_ctrl;
 	int r = 0;
 
+	vmcs01_cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+
 	enter_guest_mode(vcpu);
 
 	if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
@@ -12574,6 +12577,25 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
 		kvm_make_request(KVM_REQ_GET_VMCS12_PAGES, vcpu);
 	}
 
+	/*
+	 * If L1 had a pending IRQ/NMI until it executed
+	 * VMLAUNCH/VMRESUME which wasn't delivered because it was
+	 * disallowed (e.g. interrupts disabled), L0 needs to
+	 * evaluate if this pending event should cause an exit from L2
+	 * to L1 or delivered directly to L2 (e.g. In case L1 don't
+	 * intercept EXTERNAL_INTERRUPT).
+	 *
+	 * Usually this would be handled by L0 requesting a
+	 * IRQ/NMI window by setting VMCS accordingly. However,
+	 * this setting was done on VMCS01 and now VMCS02 is active
+	 * instead. Thus, we force L0 to perform pending event
+	 * evaluation by requesting a KVM_REQ_EVENT.
+	 */
+	if (vmcs01_cpu_exec_ctrl &
+		(CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING)) {
+		kvm_make_request(KVM_REQ_EVENT, vcpu);
+	}
+
 	/*
 	 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
 	 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet

From bdf7ffc89922a52a4f08a12f7421ea24bb7626a0 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpengli@tencent.com>
Date: Thu, 30 Aug 2018 10:03:30 +0800
Subject: [PATCH 253/257] KVM: LAPIC: Fix pv ipis out-of-bounds access
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Dan Carpenter reported that the untrusted data returns from kvm_register_read()
results in the following static checker warning:
  arch/x86/kvm/lapic.c:576 kvm_pv_send_ipi()
  error: buffer underflow 'map->phys_map' 's32min-s32max'

KVM guest can easily trigger this by executing the following assembly sequence
in Ring0:

mov $10, %rax
mov $0xFFFFFFFF, %rbx
mov $0xFFFFFFFF, %rdx
mov $0, %rsi
vmcall

As this will cause KVM to execute the following code-path:
vmx_handle_exit() -> handle_vmcall() -> kvm_emulate_hypercall() -> kvm_pv_send_ipi()
which will reach out-of-bounds access.

This patch fixes it by adding a check to kvm_pv_send_ipi() against map->max_apic_id,
ignoring destinations that are not present and delivering the rest. We also check
whether or not map->phys_map[min + i] is NULL since the max_apic_id is set to the
max apic id, some phys_map maybe NULL when apic id is sparse, especially kvm
unconditionally set max_apic_id to 255 to reserve enough space for any xAPIC ID.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Liran Alon <liran.alon@oracle.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Liran Alon <liran.alon@oracle.com>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
[Add second "if (min > map->max_apic_id)" to complete the fix. -Radim]
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/lapic.c            | 27 ++++++++++++++++++++-------
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3ad10f634d4c3..8e90488c3d568 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1455,7 +1455,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
 void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
 
 int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
-    		    unsigned long ipi_bitmap_high, int min,
+		    unsigned long ipi_bitmap_high, u32 min,
 		    unsigned long icr, int op_64_bit);
 
 u64 kvm_get_arch_capabilities(void);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0cefba28c864a..17c0472c5b344 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -548,7 +548,7 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 }
 
 int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
-    		    unsigned long ipi_bitmap_high, int min,
+		    unsigned long ipi_bitmap_high, u32 min,
 		    unsigned long icr, int op_64_bit)
 {
 	int i;
@@ -571,18 +571,31 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
 	rcu_read_lock();
 	map = rcu_dereference(kvm->arch.apic_map);
 
+	if (min > map->max_apic_id)
+		goto out;
 	/* Bits above cluster_size are masked in the caller.  */
-	for_each_set_bit(i, &ipi_bitmap_low, BITS_PER_LONG) {
-		vcpu = map->phys_map[min + i]->vcpu;
-		count += kvm_apic_set_irq(vcpu, &irq, NULL);
+	for_each_set_bit(i, &ipi_bitmap_low,
+		min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
+		if (map->phys_map[min + i]) {
+			vcpu = map->phys_map[min + i]->vcpu;
+			count += kvm_apic_set_irq(vcpu, &irq, NULL);
+		}
 	}
 
 	min += cluster_size;
-	for_each_set_bit(i, &ipi_bitmap_high, BITS_PER_LONG) {
-		vcpu = map->phys_map[min + i]->vcpu;
-		count += kvm_apic_set_irq(vcpu, &irq, NULL);
+
+	if (min > map->max_apic_id)
+		goto out;
+
+	for_each_set_bit(i, &ipi_bitmap_high,
+		min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
+		if (map->phys_map[min + i]) {
+			vcpu = map->phys_map[min + i]->vcpu;
+			count += kvm_apic_set_irq(vcpu, &irq, NULL);
+		}
 	}
 
+out:
 	rcu_read_unlock();
 	return count;
 }

From ecfe951f0c1b169ea4b7dd6f3a404dfedd795bc2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 7 Sep 2018 23:55:17 +0100
Subject: [PATCH 254/257] afs: Fix cell specification to permit an empty
 address list

Fix the cell specification mechanism to allow cells to be pre-created
without having to specify at least one address (the addresses will be
upcalled for).

This allows the cell information preload service to avoid the need to issue
loads of DNS lookups during boot to get the addresses for each cell (500+
lookups for the 'standard' cell list[*]).  The lookups can be done later as
each cell is accessed through the filesystem.

Also remove the print statement that prints a line every time a new cell is
added.

[*] There are 144 cells in the list.  Each cell is first looked up for an
    SRV record, and if that fails, for an AFSDB record.  These get a list
    of server names, each of which then has to be looked up to get the
    addresses for that server.  E.g.:

	dig srv _afs3-vlserver._udp.grand.central.org

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/afs/proc.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 0c3285c8db95b..476dcbb79713d 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -98,13 +98,13 @@ static int afs_proc_cells_write(struct file *file, char *buf, size_t size)
 		goto inval;
 
 	args = strchr(name, ' ');
-	if (!args)
-		goto inval;
-	do {
-		*args++ = 0;
-	} while(*args == ' ');
-	if (!*args)
-		goto inval;
+	if (args) {
+		do {
+			*args++ = 0;
+		} while(*args == ' ');
+		if (!*args)
+			goto inval;
+	}
 
 	/* determine command to perform */
 	_debug("cmd=%s name=%s args=%s", buf, name, args);
@@ -120,7 +120,6 @@ static int afs_proc_cells_write(struct file *file, char *buf, size_t size)
 
 		if (test_and_set_bit(AFS_CELL_FL_NO_GC, &cell->flags))
 			afs_put_cell(net, cell);
-		printk("kAFS: Added new cell '%s'\n", name);
 	} else {
 		goto inval;
 	}

From 47b7360ce563e18c524ce92b55fb4da72b3b3578 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 8 Sep 2018 12:07:26 +0200
Subject: [PATCH 255/257] x86/apic/vector: Make error return value negative

activate_managed() returns EINVAL instead of -EINVAL in case of
error. While this is unlikely to happen, the positive return value would
cause further malfunction at the call site.

Fixes: 2db1f959d9dc ("x86/vector: Handle managed interrupts proper")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
---
 arch/x86/kernel/apic/vector.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
index 9f148e3d45b4f..7654febd51027 100644
--- a/arch/x86/kernel/apic/vector.c
+++ b/arch/x86/kernel/apic/vector.c
@@ -413,7 +413,7 @@ static int activate_managed(struct irq_data *irqd)
 	if (WARN_ON_ONCE(cpumask_empty(vector_searchmask))) {
 		/* Something in the core code broke! Survive gracefully */
 		pr_err("Managed startup for irq %u, but no CPU\n", irqd->irq);
-		return EINVAL;
+		return -EINVAL;
 	}
 
 	ret = assign_managed_vector(irqd, vector_searchmask);

From 9bc4f28af75a91aea0ae383f50b0a430c4509303 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Sun, 2 Sep 2018 11:14:50 -0700
Subject: [PATCH 256/257] x86/mm: Use WRITE_ONCE() when setting PTEs

When page-table entries are set, the compiler might optimize their
assignment by using multiple instructions to set the PTE. This might
turn into a security hazard if the user somehow manages to use the
interim PTE. L1TF does not make our lives easier, making even an interim
non-present PTE a security hazard.

Using WRITE_ONCE() to set PTEs and friends should prevent this potential
security hazard.

I skimmed the differences in the binary with and without this patch. The
differences are (obviously) greater when CONFIG_PARAVIRT=n as more
code optimizations are possible. For better and worse, the impact on the
binary with this patch is pretty small. Skimming the code did not cause
anything to jump out as a security hazard, but it seems that at least
move_soft_dirty_pte() caused set_pte_at() to use multiple writes.

Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Sean Christopherson <sean.j.christopherson@intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20180902181451.80520-1-namit@vmware.com
---
 arch/x86/include/asm/pgtable.h    |  2 +-
 arch/x86/include/asm/pgtable_64.h | 20 ++++++++++----------
 arch/x86/mm/pgtable.c             |  8 ++++----
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index e4ffa565a69f0..690c0307afed0 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1195,7 +1195,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
 		return xchg(pmdp, pmd);
 	} else {
 		pmd_t old = *pmdp;
-		*pmdp = pmd;
+		WRITE_ONCE(*pmdp, pmd);
 		return old;
 	}
 }
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index f773d5e6c8cc4..ce2b59047cb83 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -55,15 +55,15 @@ struct mm_struct;
 void set_pte_vaddr_p4d(p4d_t *p4d_page, unsigned long vaddr, pte_t new_pte);
 void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte);
 
-static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
-				    pte_t *ptep)
+static inline void native_set_pte(pte_t *ptep, pte_t pte)
 {
-	*ptep = native_make_pte(0);
+	WRITE_ONCE(*ptep, pte);
 }
 
-static inline void native_set_pte(pte_t *ptep, pte_t pte)
+static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
+				    pte_t *ptep)
 {
-	*ptep = pte;
+	native_set_pte(ptep, native_make_pte(0));
 }
 
 static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
@@ -73,7 +73,7 @@ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
 
 static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
-	*pmdp = pmd;
+	WRITE_ONCE(*pmdp, pmd);
 }
 
 static inline void native_pmd_clear(pmd_t *pmd)
@@ -109,7 +109,7 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp)
 
 static inline void native_set_pud(pud_t *pudp, pud_t pud)
 {
-	*pudp = pud;
+	WRITE_ONCE(*pudp, pud);
 }
 
 static inline void native_pud_clear(pud_t *pud)
@@ -137,13 +137,13 @@ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
 	pgd_t pgd;
 
 	if (pgtable_l5_enabled() || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) {
-		*p4dp = p4d;
+		WRITE_ONCE(*p4dp, p4d);
 		return;
 	}
 
 	pgd = native_make_pgd(native_p4d_val(p4d));
 	pgd = pti_set_user_pgtbl((pgd_t *)p4dp, pgd);
-	*p4dp = native_make_p4d(native_pgd_val(pgd));
+	WRITE_ONCE(*p4dp, native_make_p4d(native_pgd_val(pgd)));
 }
 
 static inline void native_p4d_clear(p4d_t *p4d)
@@ -153,7 +153,7 @@ static inline void native_p4d_clear(p4d_t *p4d)
 
 static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
 {
-	*pgdp = pti_set_user_pgtbl(pgdp, pgd);
+	WRITE_ONCE(*pgdp, pti_set_user_pgtbl(pgdp, pgd));
 }
 
 static inline void native_pgd_clear(pgd_t *pgd)
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index e848a48117856..ae394552fb945 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -269,7 +269,7 @@ static void mop_up_one_pmd(struct mm_struct *mm, pgd_t *pgdp)
 	if (pgd_val(pgd) != 0) {
 		pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
 
-		*pgdp = native_make_pgd(0);
+		pgd_clear(pgdp);
 
 		paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
 		pmd_free(mm, pmd);
@@ -494,7 +494,7 @@ int ptep_set_access_flags(struct vm_area_struct *vma,
 	int changed = !pte_same(*ptep, entry);
 
 	if (changed && dirty)
-		*ptep = entry;
+		set_pte(ptep, entry);
 
 	return changed;
 }
@@ -509,7 +509,7 @@ int pmdp_set_access_flags(struct vm_area_struct *vma,
 	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
 
 	if (changed && dirty) {
-		*pmdp = entry;
+		set_pmd(pmdp, entry);
 		/*
 		 * We had a write-protection fault here and changed the pmd
 		 * to to more permissive. No need to flush the TLB for that,
@@ -529,7 +529,7 @@ int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address,
 	VM_BUG_ON(address & ~HPAGE_PUD_MASK);
 
 	if (changed && dirty) {
-		*pudp = entry;
+		set_pud(pudp, entry);
 		/*
 		 * We had a write-protection fault here and changed the pud
 		 * to to more permissive. No need to flush the TLB for that,

From f0b0d88a825149ef3b06656886bc211c71dcb852 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 6 Sep 2018 16:37:24 -0700
Subject: [PATCH 257/257] kbuild: modules_install: warn when missing System.map
 file

If there is no System.map file for "make modules_install",
scripts/depmod.sh will silently exit with success, having done
nothing.  Since this is an unexpected situation, change it to
report a Warning for the missing file.  The behavior is not
changed except for the Warning message.

The (previous) silent success and new Warning can be reproduced
by:
$ make mrproper; make defconfig
$ make modules; make modules_install

and since System.map is produced by "make vmlinux", the steps
above omit producing the System.map file.

Reported-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
---
 scripts/depmod.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/depmod.sh b/scripts/depmod.sh
index e5f0aad75b964..e083bcae343f3 100755
--- a/scripts/depmod.sh
+++ b/scripts/depmod.sh
@@ -11,6 +11,7 @@ DEPMOD=$1
 KERNELRELEASE=$2
 
 if ! test -r System.map ; then
+	echo "Warning: modules_install: missing 'System.map' file. Skipping depmod." >&2
 	exit 0
 fi